Modify nltk, langdetect, etc. in order to convert from python2 to python3

author jay.ho.park <jay.ho.park@samsung.com>

Thu, 3 Sep 2020 05:04:01 +0000 (14:04 +0900)

committer jay.ho.park <jay.ho.park@samsung.com>

Thu, 3 Sep 2020 05:04:07 +0000 (14:04 +0900)
author jay.ho.park <jay.ho.park@samsung.com>
Thu, 3 Sep 2020 05:04:01 +0000 (14:04 +0900)
committer jay.ho.park <jay.ho.park@samsung.com>
Thu, 3 Sep 2020 05:04:07 +0000 (14:04 +0900)
diff --git a/nlp_resource_data/CMakeLists.txt b/nlp_resource_data/CMakeLists.txt

index 9ad16bae42c1b07013fdfc781f2d3d406c644aa4..fe943f68ea3e106be363677a3cef41564632c9a0 100755 (executable)
--- a/nlp_resource_data/CMakeLists.txt
+++ b/nlp_resource_data/CMakeLists.txt
@@ -6,6 +6,6 @@ PROJECT(${fw_name} C)
  
  INCLUDE(FindPkgConfig)
  
-INSTALL(DIRECTORY nltk DESTINATION ${LIBDIR}/python2.7/site-packages)
-INSTALL(DIRECTORY langdetect DESTINATION ${LIBDIR}/python2.7/site-packages)
+INSTALL(DIRECTORY nltk DESTINATION ${LIBDIR}/python3.7/site-packages)
+INSTALL(DIRECTORY langdetect DESTINATION ${LIBDIR}/python3.7/site-packages)
  INSTALL(DIRECTORY nltk_data DESTINATION /usr/local/lib/)
diff --git a/nlp_resource_data/langdetect/__init__.py b/nlp_resource_data/langdetect/__init__.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/__init__.pyc b/nlp_resource_data/langdetect/__init__.pyc

deleted file mode 100755 (executable)

index 1ff21ed..0000000

Binary files a/nlp_resource_data/langdetect/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/langdetect/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c6e6688

Binary files /dev/null and b/nlp_resource_data/langdetect/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/__pycache__/detector.cpython-37.pyc b/nlp_resource_data/langdetect/__pycache__/detector.cpython-37.pyc

new file mode 100644 (file)

index 0000000..90f1c5a

Binary files /dev/null and b/nlp_resource_data/langdetect/__pycache__/detector.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/__pycache__/detector_factory.cpython-37.pyc b/nlp_resource_data/langdetect/__pycache__/detector_factory.cpython-37.pyc

new file mode 100644 (file)

index 0000000..09e32dd

Binary files /dev/null and b/nlp_resource_data/langdetect/__pycache__/detector_factory.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/__pycache__/lang_detect_exception.cpython-37.pyc b/nlp_resource_data/langdetect/__pycache__/lang_detect_exception.cpython-37.pyc

new file mode 100644 (file)

index 0000000..72bb726

Binary files /dev/null and b/nlp_resource_data/langdetect/__pycache__/lang_detect_exception.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/__pycache__/language.cpython-37.pyc b/nlp_resource_data/langdetect/__pycache__/language.cpython-37.pyc

new file mode 100644 (file)

index 0000000..638d804

Binary files /dev/null and b/nlp_resource_data/langdetect/__pycache__/language.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/detector.py b/nlp_resource_data/langdetect/detector.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/detector.pyc b/nlp_resource_data/langdetect/detector.pyc

deleted file mode 100755 (executable)

index 1db3897..0000000

Binary files a/nlp_resource_data/langdetect/detector.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/detector_factory.py b/nlp_resource_data/langdetect/detector_factory.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/detector_factory.pyc b/nlp_resource_data/langdetect/detector_factory.pyc

deleted file mode 100755 (executable)

index 50fd9da..0000000

Binary files a/nlp_resource_data/langdetect/detector_factory.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/lang_detect_exception.py b/nlp_resource_data/langdetect/lang_detect_exception.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/lang_detect_exception.pyc b/nlp_resource_data/langdetect/lang_detect_exception.pyc

deleted file mode 100755 (executable)

index c39b6be..0000000

Binary files a/nlp_resource_data/langdetect/lang_detect_exception.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/language.py b/nlp_resource_data/langdetect/language.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/language.pyc b/nlp_resource_data/langdetect/language.pyc

deleted file mode 100755 (executable)

index 8b6ae44..0000000

Binary files a/nlp_resource_data/langdetect/language.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/profiles/af b/nlp_resource_data/langdetect/profiles/af

new file mode 100755 (executable)

index 0000000..be8b172
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/af
@@ -0,0 +1 @@
+{"freq":{"D":9246,"E":2445,"F":2510,"G":3299,"A":6930,"B":3706,"C":2451,"L":2519,"M":3951,"N":3334,"O":2514,"H":3034,"I":2837,"J":2196,"K":3663,"U":687,"T":2336,"W":2258,"V":2714,"Q":182,"P":3097,"S":8234,"R":3039,"Y":252,"X":214,"Z":422,"f":13583,"g":42805,"d":77385,"Feb":207,"e":240974,"b":21626,"c":4896,"a":128566,"n":127153,"o":86673,"l":57433,"m":31352,"j":4048,"k":45378,"h":17527,"i":140621,"w":24930,"v":32618,"u":35166,"t":82606,"s":102389,"r":98861,"q":199,"p":23331,"z":1187,"y":11757,"x":1123,"ï":264,"ë":2903,"ê":1053,"é":765,"á":212,"ü":233,"ö":184,"ó":216,"Eur":318,"Eng":637," l":3565," m":7731," n":16000," o":12065," h":7358," i":23795," j":1325," k":6363," d":33601," e":13358," f":1200," g":11018,"р":242,"с":306," a":8747,"т":161," b":8379," c":434," u":1931," t":8537," w":13128," v":24617," p":4859," s":15482," r":3617," J":2155," K":3559," H":2961," I":2185," N":3120," O":2318," L":2396," M":3803," B":3554," C":2109," A":6365," F":2371," G":3138," D":8986," E":2271,"л":219,"к":266," Z":368," Y":241,"и":371,"о":333,"н":199," S":7708,"Ger":200," R":2881,"в":199," Q":162," P":2912,"а":481," W":2205," V":2322," U":571,"е":266," T":2130,"Fra":1006,"A ":345,"Da":804,"Co":478,"Ch":621,"Du":1025,"Do":201,"De":763,"Di":5828,"Fe":367,"Eu":354,"En":721,"El":212,"Ge":659,"Ga":319,"I ":452,"Fr":1217,"Fo":165,"Fi":216,"II ":246,"C ":278,"Au":486,"Ar":425,"At":187,"As":201,"D ":158,"Ba":648,"Af":2087,"Am":566,"An":491,"Ap":353,"Al":628,"Bu":243,"Br":778,"Ca":399,"Bi":180,"Be":880,"Bo":481,"Bl":161,"Kr":224,"Ko":657,"Le":490,"Li":504,"La":658,"Lu":245,"Lo":347,"Me":800,"Mi":548,"Ma":1360,"Mu":186,"Mo":627,"Ni":257,"Ne":763,"Na":666,"No":1092,"Ok":339,"Ol":206,"Her":157,"Gr":1326,"Go":356,"Ha":534,"He":680,"II":369,"Hi":301,"Ho":503,"Hu":294,"Hy":550,"In":919,"Is":158,"It":218,"Ja":713,"Je":157,"Jo":565,"Ju":623,"Ka":1489,"Ki":194,"Ke":447,"Un":253,"Tu":248,"Tr":236,"To":272,"Th":313,"Te":262,"Ta":276,"V ":280,"Sw":402,"Sy":292,"St":964,"Su":1701,"Wo":181,"Wi":534,"Wa":412,"We":720,"Vo":315,"Vr":251,"Vi":374,"Va":314,"Ve":689,"Pr":551,"S ":157,"Pe":310,"Pa":727,"Po":681,"Pi":230,"Os":236,"Oo":423,"Or":191,"Se":814,"Sc":197,"Si":387,"Sl":222,"Sk":201,"Sp":443,"So":680,"Ru":645,"Ry":194,"Sa":728,"Re":621,"Ri":222,"Ro":746,"SA":233,"Ra":223,"Gre":501,"Gri":383,"Gra":158,"b ":1179,"Gro":254,"a ":7054,"i ":2513,"gd":570,"ge":16432,"ga":1621,"gb":319,"fk":224,"fl":183,"fg":323,"ff":351,"fi":1111,"fh":169,"fs":1224,"fr":2334,"fu":174,"ft":300,"fo":725,"Int":180,"he":6229,"ha":2610,"gn":360,"gl":334,"gi":2135,"gh":921,"gg":418,"gu":592,"gt":1512,"gs":1974,"gr":3459,"go":1385,"dt":211,"du":998,"dw":506,"g ":10256,"ea":936,"eb":3497,"ec":406,"ed":5721,"de":18394,"dd":606,"dg":161,"di":29432,"dh":249,"dj":173,"dm":299,"do":2521,"ds":2062,"dr":1453,"ew":3034,"eu":3603,"ev":2016,"ey":309,"fa":570,"h ":864,"Ind":251,"fd":469,"fe":948,"eh":993,"eg":3187,"ef":995,"ee":12296,"el":15653,"ek":7920,"ei":5726,"ep":2393,"eo":692,"en":27638,"em":4686,"et":10282,"es":15156,"er":33393,"ca":479,"e ":78745,"by":1025,"br":1953,"bu":1057,"bo":2123,"bl":1117,"bi":1966,"bb":156,"be":8513,"db":222,"In ":319,"da":3617,"f ":4067,"ct":207,"co":446,"ck":502,"ci":340,"ch":1526,"ce":547,"c ":311,"az":190,"ay":279,"ba":2057,"d ":15502,"at":11369,"as":9342,"ar":11432,"aw":597,"av":407,"au":883,"ak":2797,"al":9554,"ai":1291,"aj":155,"ap":2087,"am":3989,"an":36357,"ac":615,"ad":4564,"aa":18307,"ab":1064,"ag":2729,"ah":292,"ae":907,"af":1901,"nu":917,"nt":6760,"ns":9243,"nr":212,"no":2885,"nn":1621,"ny":191,"nw":666,"nv":455,"oe":6026,"of":3797,"oc":387,"od":1636,"oa":178,"ob":729,"om":5480,"on":10533,"ok":2525,"ol":5346,"oi":587,"og":2271,"oh":382,"ot":3827,"os":3306,"ov":1152,"ou":2993,"op":4558,"oo":12667,"or":14221,"r ":19504,"ow":1144,"pe":3683,"pg":229,"pa":2371,"pl":1195,"lê":351,"po":1932,"ph":223,"pi":1008,"lo":3369,"lm":315,"ll":2990,"ls":2634,"lp":392,"lw":311,"lv":239,"lu":1548,"lt":993,"ly":716,"o ":2083,"md":261,"ma":3853,"mb":2182,"mg":224,"me":9151,"mi":2940,"mm":802,"mp":1223,"mo":1485,"ië":1437,"mt":249,"ms":966,"mu":1085,"p ":4720,"na":6444,"nb":510,"nc":507,"nd":12581,"ne":5737,"nf":203,"ng":9804,"nh":460,"ni":6127,"nj":300,"nk":2057,"nl":616,"nm":203,"jo":532,"ki":2683,"kh":210,"kg":239,"ke":8584,"ka":6722,"m ":5913,"kw":457,"ky":282,"ks":2318,"kt":2084,"ku":1443,"ko":3908,"kr":2375,"kk":1579,"kl":2200,"km":469,"li":9515,"lh":279,"lk":1158,"lj":705,"le":10290,"ld":1944,"lg":1526,"lf":717,"la":8341,"lb":446,"n ":58065,"hr":313,"ht":702,"hu":1684,"hi":1067,"ho":3048,"dé":160,"id":5034,"ic":1058,"ib":451,"ia":2568,"ig":5540,"if":581,"ie":47836,"hy":348,"k ":9212,"ir":2359,"is":17403,"it":9361,"iu":405,"iv":1008,"iw":219,"ik":8953,"il":3774,"im":1386,"in":25004,"io":1984,"eë":1032,"ip":899,"je":609,"ji":572,"iz":156,"l ":8172,"ja":1960,"wy":994,"z ":242,"wi":1800,"wo":4179,"vy":166,"y ":4684,"wa":9856,"we":6959,"vl":1196,"vi":4040,"vu":178,"vr":662,"vo":4078,"uw":282,"uu":992,"ve":5906,"va":16173,"x ":845,"ui":7822,"uk":678,"ul":2052,"ue":905,"ug":1045,"ur":5410,"us":5098,"ut":907,"um":1711,"un":2596,"up":170,"ty":1434,"tu":2643,"tt":1277,"tw":1177,"tv":217,"ub":1182,"ua":728,"ud":950,"uc":160,"w ":232,"to":5433,"tm":201,"tl":667,"ts":3814,"tr":4026,"tg":532,"te":20430,"tk":279,"tj":177,"ti":5658,"th":1701,"tb":213,"ta":9118,"su":1177,"sv":424,"ss":2799,"st":17122,"sy":1309,"sw":531,"sl":1811,"sk":5006,"sn":242,"sm":693,"sp":2566,"oë":412,"so":3731,"sr":312,"sd":385,"sc":448,"sf":208,"se":15556,"sh":473,"sg":396,"sj":338,"si":8436,"u ":1834,"sa":2367,"sb":577,"rr":652,"rs":6262,"rt":4139,"ru":2543,"rv":1198,"rw":1199,"ry":2450,"rp":1265,"ro":8165,"rn":1586,"rm":2087,"rl":1734,"rk":2996,"ri":11752,"rh":614,"rg":2653,"rf":378,"re":10923,"rd":7372,"rc":234,"rb":955,"ra":7710,"t ":22731,"qu":168,"s ":35284,"px":614,"Hy ":529,"py":231,"pt":765,"pu":844,"pp":1058,"pr":3258,"ps":659,"wê":320,"zi":170,"ze":169,"za":209,"yg":162,"ye":406,"yf":643,"yd":927,"yw":439,"ys":1141,"yn":1041,"yl":288,"yk":1145,"Apr":247,"Aug":272,"Afr":2048,"Ame":464,"Ber":218,"Bel":171,"Bre":163,"Bra":191,"Bri":282,"Des":273,"Daa":460,"Chr":224,"Cha":171,"ër":307,"ël":325,"êr":697,"ë ":1979,"ê ":310,"é ":228,"Dit":1028,"Die":4537,"Dui":918,"Ned":417,"Nas":187,"Nov":238,"Noo":595,"Okt":256,"Oli":158,"Oos":361,"Par":313,"Pro":177,"Pre":186,"SA ":161,"Ita":207,"Jan":348,"Joh":290,"Jul":297,"Jun":245,"Kaa":543,"Kan":220,"Kat":191,"Kar":171,"Ker":270,"Kon":276,"Lat":181,"Lit":162,"Mei":281,"Mar":370,"Maa":286,"Mon":210,"Mid":157,"Wil":165,"Wes":439,"Vry":192,"Vol":161,"êre":674,"Swe":193,"Sy ":252,"Sui":1515,"Sta":443,"Ste":208,"Sep":228,"Spa":253,"Rus":560,"Sch":162,"Rep":214,"Rom":176,"Ver":555,"Uni":236,"The":196,"Tur":159,"bin":400,"blo":205,"bli":525,"bla":215,"boe":246,"boo":276,"bor":587,"bou":330,"ban":283,"bal":289,"bai":191,"baa":372,"bas":270,"bar":272,"beh":366,"beg":372,"bee":325,"bed":285,"ber":1916,"bel":540,"bek":1148,"bew":349,"bev":630,"bes":1308,"bet":510,"bie":1052,"ce ":276,"bri":159,"bro":237,"bra":211,"bre":258,"bru":1062,"bur":584,"by ":693,"am ":1182,"ake":292,"al ":2759,"ain":204,"ak ":856,"aie":241,"agt":446,"anu":467,"ann":632,"ant":1705,"ans":3841,"ane":404,"ang":1856,"ani":742,"anj":191,"ank":961,"ap ":635,"ana":788,"anc":195,"and":5528,"amm":186,"amp":480,"ami":512,"ame":657,"amb":236,"ama":204,"alt":231,"als":160,"all":667,"alk":171,"alg":320,"ali":1276,"ald":217,"ale":2352,"alf":209,"ala":367,"an ":18298,"aks":261,"akt":740,"akl":166,"abe":229,"abi":201,"aby":216,"ae ":624,"aag":175,"aad":172,"aak":679,"aai":350,"aan":6190,"aal":1515,"aam":1083,"aas":579,"aar":5293,"aap":567,"aat":1563,"ad ":2565,"afg":266,"ai ":311,"age":184,"afd":268,"adm":206,"adi":436,"ade":539,"ag ":1304,"ads":176,"ach":166,"ada":249,"af ":494,"at ":6755,"arg":256,"are":965,"ard":1124,"ara":390,"aro":332,"arn":185,"arm":157,"arl":301,"ark":397,"ari":1177,"arv":249,"ars":463,"art":1494,"ary":171,"asi":1669,"ase":210,"aso":169,"ar ":3216,"apa":189,"app":418,"aps":269,"as ":5230,"awe":308,"awi":169,"ata":346,"ast":673,"ass":518,"ato":426,"ate":1382,"ati":871,"ats":404,"atu":409,"aty":167,"aus":156,"jaa":1087,"jar":470,"je ":175,"joe":306,"jin":161,"jie":306,"ito":170,"itt":191,"its":1623,"isk":182,"ism":266,"iss":374,"ist":1582,"ita":608,"ite":1331,"itg":386,"iti":469,"ius":176,"ium":203,"ivi":590,"ive":294,"is ":12546,"ion":1252,"eër":158,"ipa":265,"ir ":1648,"isi":1018,"ise":601,"isa":220,"ire":181,"it ":3772,"kil":644,"kie":536,"kin":914,"km ":266,"kgr":173,"kee":210,"kei":339,"kel":962,"ken":2090,"kep":166,"ker":1342,"ke ":3014,"kra":345,"kse":472,"kry":1085,"kri":662,"kou":249,"kor":369,"kop":214,"koo":391,"kon":866,"kom":903,"kol":246,"koe":157,"ks ":710,"kke":1272,"kki":178,"klu":430,"kle":511,"kla":387,"kli":749,"kat":157,"kar":183,"kas":204,"kap":818,"kan":1256,"kal":611,"kaa":1596,"ka ":1388," Ga":319," Ge":658," Fo":161," Fr":1217," Fi":213," Ha":534," He":680," Go":354," Gr":1318," Hy":549," Hu":294," Ho":502," II":202," Hi":301," Ja":710," Is":157," It":218," In":916,"han":779," Ka":1486,"hal":311," Ke":447,"haw":164," Ki":192,"har":356," Jo":563," Ju":622,"haa":238,"had":164," La":657," Le":488," Li":502," Ko":657," Kr":224," Ma":1348," Mi":547," Me":799,"he ":399," Lo":346," Lu":244," Ne":762," Na":662," Ni":257," Mo":624," Mu":186,"hel":273,"hei":994,"hee":465,"hed":169,"het":2911,"her":350,"hem":255," Ap":349," Am":563," An":491," Al":626," Af":2082," Ba":645," Au":486," At":187," As":200," Ar":422," Be":877,"hie":290," Bi":179," Bl":161," Bo":479," Br":777," Bu":243,"his":173," Ca":384," Ch":612," Co":473," Da":803," Di":5802," De":761," Do":196," Du":1024," El":212," En":720," Eu":354," Fe":367," Wo":179," Wi":530," We":720," Wa":412,"god":193,"gs ":887,"gor":522,"gro":2150,"gra":537,"gri":320,"gre":401," Os":236," Or":191," Oo":422," Po":674," Pi":229," Pe":309," Pa":725,"gst":406," No":1092," Ol":205," Ok":339,"gte":962,"gti":391," Ra":221," Ro":743," Re":620," Ri":222," Pr":547,"gus":284," Sy":292," Sw":400," Su":1700," St":953," Ta":273," Th":307," Te":261," Tr":236," To":270," Ry":194," Ru":645," Sa":724," Si":385," Sc":196," Se":811," So":678," Sp":441," Sk":201," Sl":222," Va":313," Ve":669," Vi":371," Vo":314," Vr":251," Tu":243," Un":253," ja":1102,"ial":357,"ian":256," in":12303,"iaa":736," is":11238," ka":1533," ki":531," ke":481,"id ":2425," ha":612," he":3438," gr":2075," go":365,"ia ":794," hy":292," hi":477," ho":1750," hu":727,"iet":320,"ieu":180,"iew":413," ni":722,"iel":277," ne":437,"ien":998," na":2339,"ier":2228,"ies":4471,"ied":1248,"ief":177,"iek":2103," mu":691,"ig ":1346," mo":667," om":1497," on":2106," of":1952,"ifi":218," no":1205," le":910," li":598," n ":10980," la":1290," ku":387,"ich":258,"ie ":34696," km":407," kl":879,"ica":209," kr":319," ko":1672," me":4100," mi":830,"ids":257," ma":1329," lu":186,"idi":291,"ide":993,"idd":457,"ida":156," lo":197," af":820," aa":2320," ad":269," am":322," an":759," ak":286," al":829," ar":263," at":229," as":2284," ba":599,"il ":459," bi":320," be":5430," bo":565," bl":263," by":612," bu":213," br":340,"ika":2950,"igd":381,"ige":1604,"igh":698,"igi":270,"igg":185,"igt":498,"igs":156,"ik ":2305," en":9738,"imp":231," ei":517," el":502,"ime":187," ek":223," ee":1730,"ind":1030,"ina":506," fa":191,"inn":302," fo":227,"int":638,"ins":1349,"ine":545,"ing":6095," fi":368,"ini":615,"ink":417," ge":8191," ga":169,"inw":455,"ikk":629," ch":185,"ike":1814,"ila":498," da":1923,"in ":12178,"iku":209,"iks":287," do":1111,"ilo":514,"ill":662," dr":523," de":3947,"ilj":228,"ili":684,"ild":294," di":25510,"imb":245,"eë ":693,"io ":196," du":309," wê":298,"hom":166,"hou":360,"hoo":1325,"hoe":410," wy":201,"hul":552,"hui":260,"hri":224,"ht ":578," ru":233," sa":888," se":2315," si":590," sl":329," sk":1250," sp":887," so":2211," ra":237," re":1576," ri":825," ro":614," pr":1589," s ":207," px":614,"hy ":302," ou":447,"hum":674," oo":2639," op":2809," or":325," pe":402," pa":556," pl":641," po":737," lê":242," wa":7840," we":1395," wo":2888," wi":454," va":14670," ve":4043," vo":2359," vr":575," vi":2068," vl":594," ty":439," tw":582," tu":692," ui":1746," ta":895," sw":227," sy":1183," st":4293," su":859," tr":387," to":1857," th":729," ti":190," te":2715,"ffe":165,"fer":157,"fel":155,"fha":158,"fge":290,"fam":176,"fde":429,"eta":359,"ete":1299,"eti":372,"esp":358,"eso":210,"est":2951,"ess":405,"eun":234,"eto":320,"etr":438,"ets":217,"ett":493,"eve":456,"eva":262,"evo":907,"evi":274,"eur":2292,"eus":242,"ewi":337,"ewe":1704,"ewo":449,"ey ":181,"ewa":222,"epe":254,"er ":10617,"epa":228,"eor":221,"es ":4626,"ept":277,"epu":400,"epr":184,"erk":2067,"erl":875,"eri":1765,"erg":1022,"erh":416,"ere":1861,"erf":286,"erd":1514,"era":1470,"erb":529,"et ":6083,"esk":1018,"esl":228,"esi":976,"ese":3607,"eu ":338,"erv":860,"erw":949,"err":349,"ert":1101,"ers":4583,"ern":1142,"erm":861,"erp":342,"ero":382,"ekg":155,"ekk":206,"eko":474,"eks":950,"ekt":701,"en ":13492,"ela":904,"eld":1199,"elf":322,"ele":2593,"eli":1906,"elj":427,"elg":226,"elk":209,"ell":778,"elo":234,"els":1983,"elt":333,"ely":255,"emb":839,"ema":484,"eme":1266,"emo":181,"emi":456,"ep ":699,"ene":1142,"enh":254,"eng":314,"enb":269,"ena":610,"end":3112,"eno":500,"enn":400,"enk":275,"eni":1151,"ens":2864,"ent":2318,"ego":497,"ege":690,"egi":516,"eha":370,"egr":238,"egs":217,"egt":193,"eho":266,"ehe":259,"ek ":1799,"eis":330,"eil":544,"ein":1010,"eie":633,"eid":1307,"el ":3516,"eit":680,"eke":2739,"eka":220,"em ":967,"gin":784,"gie":714,"ght":548,"gep":249,"gen":1564,"get":297,"ger":1248,"ges":2014,"gev":788,"gew":944,"gee":448,"ged":475,"geb":2499,"geh":356,"geg":181,"gem":756,"gel":1995,"gek":350,"gde":427,"ge ":1916,"gaa":266,"gan":539,"ga ":157,"fst":852,"fri":2089,"for":371,"fie":369,"fil":208,"fin":174,"fis":177,"da ":327,"de ":6409,"daa":645,"dag":700,"dae":480,"dat":659,"dan":233,"dam":165,"dde":490,"ch ":316,"cha":160,"ck ":233,"che":490,"ed ":1090,"eba":159,"ebe":354,"ebi":752,"ebo":768,"ebr":1168,"ei ":821,"ega":168,"eek":631,"een":2520,"eel":2072,"eem":410,"eed":587,"ees":884,"eer":3295,"eeu":449,"eet":195,"edi":638,"ede":2561,"eda":161,"eg ":316,"eds":321,"edr":340,"ee ":892,"ef ":280,"dwe":310,"dus":171,"dor":875,"doo":416,"don":160,"dom":227,"ds ":353,"dmi":211,"doe":283,"dst":428,"dui":309,"dri":421,"dra":423,"dry":204,"dsk":181,"dse":527,"dia":294,"der":4829,"des":476,"deu":1676,"dee":1279,"del":1695,"dek":186,"den":1206,"do ":172,"din":875,"dio":177,"dis":425,"dit":656,"die":24964,"dig":1168,"dik":198,"rhe":301,"rga":496,"rgi":335,"rge":595,"ret":312,"res":944,"rg ":777,"rea":245,"ree":1091,"ref":257,"red":294,"rei":545,"reg":1039,"ren":1300,"rek":765,"rel":674,"rep":191,"rf ":180,"rdo":215,"rdi":841,"rde":1873,"re ":2607,"rd ":3667,"ras":532,"rat":587,"rbi":190,"rba":160,"rbe":287,"rag":291,"ran":2011,"ram":317,"ral":832,"rak":247,"raa":1046,"raf":284,"rad":331,"rs ":1922,"ros":273,"rot":330,"rom":305,"ron":1072,"roo":1778,"rop":575,"rou":212,"rov":708,"rod":199,"rol":315,"roe":1277,"rog":195,"rno":196,"rp ":728,"rna":508,"rne":469,"rmo":164,"rma":539,"rme":324,"rmi":175,"rlo":320,"rli":409,"rle":270,"rla":508,"rks":184,"rko":248,"rki":199,"rkl":203,"rke":440,"rka":271,"rm ":692,"rio":174,"rit":493,"ris":571,"riv":501,"rig":863,"ril":278,"rik":3384,"rin":1384,"ria":924,"ric":236,"rie":2029,"rk ":1040,"rwe":410,"rwy":498,"ryf":393,"rui":1143,"rug":256,"rum":244,"ruk":231,"rus":225,"rva":502,"rvl":353,"rvo":192,"rwa":171,"ry ":383,"rsk":872,"rsi":432,"rso":249,"rsp":591,"rsa":225,"rse":478,"rta":186,"rst":1083,"rtk":160,"rto":274,"rte":620,"rti":334,"rua":209,"rty":351,"rt ":1413,"rre":272,"saa":540,"sal":170,"sam":303,"san":408,"sas":204,"sa ":155,"ryw":338,"rys":282,"ryk":576,"sge":305,"sie":4039,"sid":185,"sia":299,"sit":436,"sis":296,"sip":279,"sin":541,"sio":799,"sil":194,"sim":173,"sik":231,"sif":160,"sig":289,"sbu":231,"se ":9840,"sch":268,"ser":501,"ses":400,"set":250,"seu":239,"sea":162,"see":618,"sed":264,"sen":1323,"sem":298,"sel":1093,"sek":186,"spo":405,"spr":756,"spe":934,"spa":260,"sow":508,"som":247,"son":545,"soo":954,"soe":195,"sok":377,"st ":267,"sli":202,"slu":297,"sky":183,"sla":1006,"sle":205,"ski":804,"sko":594,"skr":1152,"sku":244,"ska":1212,"ske":665,"sië":283,"sma":173,"sme":382,"sse":1275,"ssa":198,"ssi":922,"ste":6829,"sta":5065,"sto":805,"sti":1396,"stu":693,"str":1673,"sty":226,"sui":596,"sve":167,"sy ":1199,"swa":313,"tal":1301,"taa":2499,"tad":2323,"tau":165,"tat":456,"tas":164,"tan":1021,"te ":8469,"ta ":339,"pa ":202,"pe ":459,"par":608,"pas":176,"paa":333,"pal":324,"pan":428,"pge":207,"pen":295,"per":1379,"pes":438,"pee":201,"pel":568,"pla":660,"lê ":268,"pli":169,"ple":241,"pie":480,"por":394,"poo":160,"pos":197,"pol":518,"ppy":184,"ppe":636,"pst":229,"pub":435,"pte":575,"pra":251,"pri":484,"pre":726,"pro":1677,"pun":246,"px ":614,"py ":166,"ra ":424,"ngo":161,"ngr":289,"ngs":1292,"nge":2327,"nhe":276,"nel":314,"nen":189,"nem":225,"ner":1014,"net":468,"nes":533,"ng ":4906,"nee":762,"nce":206,"ne ":1530,"ndr":216,"nds":657,"ndo":326,"ndi":878,"nde":5081,"nda":453,"nal":790,"nam":291,"nad":316,"naf":372,"nab":229,"naa":1198,"nd ":4245,"nat":282,"nas":677,"na ":1572,"nwo":542,"nus":209,"nua":266,"ntw":393,"nto":201,"nts":300,"ntr":543,"nti":571,"ntl":164,"nta":457,"nte":1815,"nst":787,"nse":3345,"nsi":1079,"nsl":207,"nsk":498,"nt ":1757,"ns ":2476,"nog":456,"noe":477,"noo":659,"nom":368,"nne":904,"nni":442,"nië":246,"nli":373,"nke":345,"nkl":391,"nks":179,"nkr":453,"nje":156,"nig":640,"nie":1831,"nk ":274,"niv":210,"nis":1512,"nin":804,"ogr":272,"ogi":423,"oi ":216,"oha":228,"oeë":178,"ok ":1432,"ol ":554,"ock":164,"oe ":303,"ode":551,"odi":176,"ods":177,"of ":2323,"oek":499,"oel":276,"oem":563,"oeg":231,"oei":336,"oer":752,"oes":295,"oet":302,"oen":602,"oep":714,"odu":188,"oed":477,"og ":895,"ofs":803,"oew":261,"od ":254,"obe":382,"oud":510,"oue":197,"ote":350,"ott":175,"ots":913,"oto":266,"ost":637,"osi":266,"ose":346,"oss":176,"oso":190,"owa":484,"owe":208,"ovi":678,"ove":370,"ous":302,"our":167,"out":306,"opo":205,"opp":449,"ope":438,"opg":213,"opa":195,"os ":1171,"oon":731,"ool":561,"oom":393,"ook":1376,"ooi":288,"oof":1146,"oog":389,"ood":288,"or ":1152,"oot":1351,"oos":958,"oor":4776,"oop":341,"ork":260,"orl":386,"orm":964,"orp":858,"ord":4583,"ore":773,"org":587,"ori":1212,"ou ":999,"ort":1219,"ors":871,"orw":195,"ot ":1528,"orb":186,"ora":235,"ola":171,"on ":1522,"oli":772,"oll":288,"olk":702,"ole":263,"olg":904,"ols":270,"olo":636,"om ":1870,"okk":553,"ona":980,"ond":1915,"one":1178,"ong":620,"oni":1012,"onl":220,"onk":232,"onn":184,"ono":391,"ons":511,"ont":1339,"oma":425,"ome":845,"omi":324,"omm":454,"omp":297,"oms":595,"op ":2264,"la ":334,"le ":3834,"lf ":175,"lde":601,"laa":982,"lad":180,"lag":434,"lak":490,"lan":4154,"lar":155,"lat":361,"las":433,"ld ":695,"kus":410,"kun":548,"kul":242,"kwe":204,"kwa":191,"kte":822,"kst":257,"ksi":463,"ktr":342,"ktu":210,"kti":247,"kto":369,"ls ":1008,"lon":293,"lom":430,"loo":382,"loe":423,"log":655,"los":274,"lië":349,"lti":157,"lub":411,"lug":221,"lst":643,"lte":252,"lse":623,"lge":754,"lew":250,"leu":193,"les":329,"let":347,"ler":415,"lem":358,"len":1056,"lek":605,"lei":1010,"leg":257,"lee":477,"led":218,"lg ":483,"lo ":169,"lla":325,"lle":1578,"lli":615,"lke":200,"lki":447,"ljo":223,"ll ":176,"lja":430,"lit":831,"lis":504,"leë":449,"lin":1208,"lim":201,"lid":165,"lia":364,"lik":2917,"lig":818,"lie":1618,"ma ":226,"mb ":655,"maa":1244,"mag":221,"mar":331,"mas":207,"mal":270,"man":726,"mat":394,"mba":172,"mbi":179,"mbe":814,"mbo":161,"me ":936,"mde":163,"med":223,"mee":1533,"met":2981,"mes":247,"mer":991,"mel":330,"men":1550,"lui":390,"lus":194,"lwe":213,"lyk":221,"lyn":187,"mpi":220,"mpe":208,"mpo":176,"mpt":267,"ms ":488,"moe":196,"mod":233,"mon":329,"mst":248,"mus":488,"mun":417,"ër ":180,"mge":191,"min":806,"mil":465,"mit":231,"mig":184,"mie":523,"mid":310,"ië ":1136,"mme":353,"wêr":319,"yst":183,"ys ":680,"ywe":370,"ye ":306,"yf ":380,"yde":281,"yds":165,"yd ":230,"yn ":461,"yns":175,"yk ":810,"wys":531,"wor":2620,"woo":760,"won":526,"we ":1260,"wes":799,"wer":1583,"wet":305,"wen":427,"wel":545,"weg":270,"wee":1257,"wis":166,"wit":342,"wie":194,"win":417,"wil":177,"wik":231,"wan":300,"wat":5174,"war":532,"was":2236,"waa":1031,"vry":194,"vro":313,"vir":1570,"vin":921,"vie":880,"vis":289,"vla":709,"vlo":280,"voe":444,"vol":1592,"voo":1083,"vor":625,"ver":4566,"ven":170,"vem":236,"vel":250,"vee":302,"val":319,"van":14723,"vat":155,"vaa":414,"uwe":229,"uur":863,"usl":180,"usi":606,"use":380,"ust":585,"uss":1129,"ute":176,"uto":171,"us ":1998,"ure":395,"urg":669,"uri":191,"urk":167,"uro":352,"urs":211,"urt":189,"ur ":2547,"umb":689,"ume":172,"unt":325,"uns":289,"uni":820,"und":530,"um ":614,"ult":270,"ull":459,"uli":358,"un ":219,"uid":2285,"uik":850,"uim":162,"uis":508,"uk ":200,"uit":3378,"ul ":272,"ugb":161,"ugu":278,"ude":184,"udi":240,"ue ":322,"ug ":159,"ub ":406,"uar":522,"ubl":464,"ud ":181,"tyn":228,"ty ":384,"tur":232,"tus":988,"tuu":617,"tui":232,"tud":171,"tyd":628,"twi":269,"twe":751,"ts ":533,"tre":1022,"tra":1128,"tri":607,"tru":366,"tro":780,"tse":746,"tsk":298,"tsl":425,"tst":993,"tte":641,"tti":226,"to ":272,"tof":244,"toe":713,"tob":268,"tot":1108,"tom":182,"ton":586,"tol":317,"tor":808,"too":280,"til":187,"tik":334,"tie":1846,"tig":1053,"tis":241,"tin":826,"tio":267,"thu":695,"tkl":165,"tli":191,"tla":301,"tem":732,"ten":1059,"tei":844,"tek":528,"tel":2135,"tee":779,"teg":166,"ted":237,"th ":270,"teu":212,"tes":357,"ter":4231,"tge":442,"the":380},"n_words":[1541130,1808182,1328687],"name":"af"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ar b/nlp_resource_data/langdetect/profiles/ar

new file mode 100755 (executable)

index 0000000..a7b40cc
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ar
@@ -0,0 +1 @@
+{"freq":{"و":674395,"ى":83925,"ي":1050070,"ً":13534,"َ":5694,"ُ":4812,"ـ":6044,"ف":291773,"ق":234289,"ك":238048,"ل":1258387,"م":769173,"ن":600182,"ه":275861,"ّ":6098,"ِ":3286,"ْ":2044,"خ":81779,"د":374335,"ج":159763,"ح":207504,"ت":462068,"ث":62775,"ب":456733,"ة":436596,"ئ":43113,"ا":1630465,"ؤ":8533,"إ":87017,"آ":11409,"أ":206598,"ء":28935,"غ":62643,"ع":367154,"ظ":26879,"ط":114141,"ض":60872,"ص":101344,"ش":119185,"س":320648,"ز":83586,"ر":577132,"ذ":48426,"،":89553,"؛":1398," ،":20335," ن":38705," ه":63492," ل":88748," م":255388," ق":40529," ك":61242," ف":145932," ي":66494," و":214375," ص":12486," ش":28169," ط":11510," ض":8567," ر":24040," ذ":6579," س":49888," ز":6199," ع":110158," ظ":1667," غ":11703," إ":56701," ا":619492," آ":6461," أ":122164," ج":33981," ح":46703," خ":15646," د":26005," ب":145150," ت":96353," ث":8150,"ア":1366,"ا، ":7320,"ئة ":2129,"ؤسس":2077,"إن ":1277,"أيض":3023,"أور":2981,"أهم":1974,"أهل":1229,"أول":9618,"أنو":1568,"أنه":3955,"أمي":2439,"ألم":3407,"أما":1795,"ألف":1545,"أمر":6318,"أكب":2802,"أكت":1724,"أكث":2574,"أفر":1800,"أعل":1292,"أعم":1551,"أغس":1326,"أصل":1634,"اث ":2044,"ئي ":3037,"ب، ":1611,"ات ":57401,"إضا":1407,"اح ":3081,"اج ":4358,"إسب":3960,"ئل ":2536,"اة ":4792,"إسل":3234,"اب ":9098,"إسر":1395,"إدا":2059,"اء ":22733,"إحد":6124,"إذا":1265,"ئر ":2350,"ة، ":15452,"اي ":2212,"بت ":1435,"بب ":1489,"اه ":2688,"بة ":12759,"ان ":40858,"با ":4030,"بد ":6780,"بح ":1410,"اً ":10489,"اف ":3597,"إيط":2527,"إير":1277,"ام ":31875,"ال ":23428,"ئرة":2003,"اك ":2756,"اق ":4807,"اع ":5640,"إلي":1606,"إلى":18043,"إما":1946,"اض ":1731,"إقل":1614,"اط ":2017,"إنج":4004,"إنت":2754,"إنس":1518,"ار ":15484,"اد ":13875,"اص ":1586,"از ":3651,"اس ":4999,"بي ":11247,"ت، ":2665,"اضي":4506,"اصر":1618,"بو ":3109,"اصم":3245,"اعب":4589,"اعة":3428,"اعت":1731,"اعد":2039,"اعر":1432,"اطع":4959,"اطق":1642,"اطي":1824,"اعي":4248,"ادا":1408,"اخل":1728,"احي":1991,"اخت":2071,"احل":1721,"احت":2360,"احة":4653,"احد":2974,"ارب":3471,"ارة":7735,"ارا":5132,"ادي":11501,"ادة":5295,"بق ":1932,"ادر":1555,"ازي":2304,"است":9208,"اسة":1966,"اسا":1699,"اري":13333,"اره":1487,"ارو":1690,"ارك":3428,"ارع":1412,"ارس":4342,"بل ":7375,"ارد":1984,"ارت":2585,"ارج":1460,"اصة":1686,"اشم":2112,"به ":4466,"اسم":7414,"اشت":1547,"اسي":6201,"اسع":1320,"بن ":14722,"ئلة":1510,"بع ":5351,"ائد":1719,"ائر":5727,"ائز":1678,"ائم":1524,"ائل":4622,"ائي":8565,"ابا":4420,"ابت":1320,"ابة":2266,"ابر":1286,"ابع":5010,"ابي":3084,"ات،":1833,"ابل":1965,"ابق":3015,"ابن":2545,"اتح":2012,"اتب":1961,"ئيس":4458,"اتي":3444,"اته":3712,"ئية":4071,"اجت":1267,"ئيل":1637,"بر ":15051,"بط ":1447,"آخر":1604,"أن ":6708,"آن ":1439,"أس ":2019,"أبر":1929,"أبي":2141,"أبو":3598,"أحد":5818,"أحم":2690,"أخر":2780,"أحي":1348,"أرا":1757,"أرب":1317,"أرض":1852,"أرد":4510,"أست":1490,"أسا":2106,"أسس":2383,"أشه":1227,"أصب":1379,"أو ":17977,"أي ":2409,"جزي":2701,"جدي":2169,"جزء":1922,"حل ":2135,"جرا":1466,"جري":2220,"جزا":2275,"جتم":2280,"ثير":2870,"جات":1361,"ثنا":1548,"جار":2741,"جال":2447,"جان":2465,"جبا":1237,"جام":4767,"جبل":1832,"جلي":4317,"جما":2589,"جلس":2643,"حاف":7429,"حال":5921,"حاك":1299,"جهة":1426,"جنو":6846,"جها":2454,"جمي":2428,"حاد":3278,"حار":1507,"جنس":1263,"حاس":1289,"جمع":2692,"جمه":2396,"جمو":3506,"خط ":1642,"خر ":2126,"حي ":2379,"د، ":2106,"دث ":1521,"حصل":1369,"دة ":22913,"دت ":1894,"حسب":1473,"دا ":3896,"حسي":1367,"حسن":1628,"حرك":2813,"حرا":2117,"حرب":2616,"خل ":1874,"حزب":1786,"حري":2754,"دأ ":1419,"حدث":1823,"حدة":6151,"حدا":1890,"حدي":3344,"حدو":1601,"حدى":6092,"حتى":2850,"حتو":1235,"جية":1498,"جيا":1555,"جيد":1284,"جين":1406,"جود":2059,"ختص":1274,"حول":2075,"حوض":3972,"حوا":3753,"خاص":2779,"خار":1757,"حمل":1584,"خاب":1378,"حلي":1951,"حمد":7071,"حما":1348,"حكو":1587,"حكم":2998,"حقي":1342,"دس ":1564,"در ":3643,"دد ":5353,"ذا ":6858,"دن ":3532,"ده ":2855,"دو ":1586,"دى ":7927,"خصي":1891,"ر، ":3918,"دي ":16032,"حيا":3483,"ختل":2576,"حيث":5743,"حية":2840,"حيو":1435,"خدا":1713,"خدم":4681,"خرا":1406,"دل ":1527,"خرج":2035,"خرى":2288,"دم ":12380,"خلي":2944,"خلا":4049,"دان":2670,"دام":2603,"دال":1357,"داي":1553,"داء":1232,"دائ":2836,"دار":5259,"دات":2179,"داخ":1695,"داد":3415,"ذي ":7019,"رج ":3503,"رة ":42156,"رت ":3799,"رد ":2816,"را ":5024,"رب ":12795,"ذه ":5431,"درس":2590,"درج":3597,"درا":3796,"دري":2750,"دول":7090,"دون":2547,"دوا":2452,"دود":1701,"دور":4858,"دها":2546,"دني":4787,"ذات":1939,"رف ":5073,"زء ":1646,"ديو":1641,"ديم":4297,"دين":17630,"ديا":3615,"دية":10224,"ديس":2111,"ديد":5637,"دير":2378,"ديث":1831,"رس ":4802,"رز ":1453,"رع ":2500,"دما":2598,"رض ":4034,"اقي":2703,"اقت":1926,"اقة":1778,"افي":2956,"اقع":1373,"الق":27005,"الف":20852,"الل":14610,"الك":19511,"الي":32249,"ان،":1862,"امت":1485,"امج":2075,"اما":2969,"الن":19094,"الم":106832,"امة":2785,"الو":15207,"اله":10510,"امر":2004,"امع":4584,"الأ":53765,"الآ":3087,"الج":24268,"اكي":1240,"الث":10222,"الت":44136,"الة":4500,"الب":27575,"الا":23279,"اكم":1385,"الإ":23108,"الس":25474,"الش":23985,"الر":18584,"الز":4628,"الد":22592,"الذ":9992,"الح":28530,"الخ":10845,"الع":45326,"الغ":7597,"الط":9353,"الص":10951,"الض":2239,"افة":3763,"افظ":6821,"اين":1399,"ايو":2112,"بتم":1591,"ايا":4981,"اية":7953,"اير":3864,"بدأ":2040,"، ":87650,"بحي":1333,"بحر":3662,"اهر":2224,"اني":27995,"انه":3927,"انو":4021,"بان":8783,"بال":24841,"باد":1374,"اند":2371,"بار":7325,"انس":1452,"باس":3805,"انا":3741,"باب":2616,"انب":1315,"انة":1432,"بات":4730,"انت":10216,"امي":6789,"باح":1289,"امل":2591,"بائ":1535,"اوي":2785,"اول":1939,"اهي":1800,"تا ":1767,"تب ":3208,"بشك":1596,"بدا":2243,"برا":7053,"برت":1316,"برل":1347,"برو":1928,"برن":1554,"بري":6364,"تر ":3568,"بطو":2790,"بعد":8590,"بعة":3652,"بغد":1205,"بعض":3329,"بني":2271,"بها":4540,"بنا":4249,"ئ ":1205,"ا ":148281,"بلي":1501,"بلا":3530,"ب ":70841,"بلغ":5588,"بلد":6651,"ء ":25878,"بقا":1527,"أ ":2761,"ح ":16042,"خ ":7746,"د ":111902,"ذ ":7044,"بين":10021,"ة ":417779,"بيل":2585,"بيض":1538,"بيع":2501,"ت ":98281,"بيا":2983,"بية":13845,"بير":5823,"بيت":1659,"بون":1363,"ث ":14734,"بول":3156,"بور":2404,"ج ":16315,"بوا":1540,"تي ":19035,"ثة ":2508,"تو ":1236,"تى ":3327,"ته ":7416,"تل ":1533,"تم ":5813,"ثر ":3741,"؛ ":1372,"تأس":2342,"تبر":4383,"تبع":2953,"تال":2664,"تان":2849,"تبا":1684,"تبة":1215,"تار":5301,"تاب":6518,"تاج":2425,"تبل":2737,"تجا":3016,"تخد":5000,"تخب":1329,"تخا":1370,"تحد":6290,"تحر":1572,"تحا":3110,"تحت":3198,"تدا":1332,"ثم ":2995,"تري":1455,"ترو":2478,"ثل ":4130,"ترا":5683,"ترة":2099,"ترك":2885,"تشا":2599,"تسم":2030,"تست":1924,"تصا":3295,"تشر":1532,"جة ":6594,"تشي":1720,"تضم":1238,"تطو":1569,"تعا":1531,"تعت":2311,"تعم":2414,"تعل":2476,"تعر":2059,"تعد":2614,"جد ":3856,"جر ":1340,"تفا":2566,"تقا":2028,"تقد":2088,"تقر":1287,"تقس":2440,"تقع":8338,"تقل":2844,"تهر":1315,"ثال":2031,"تها":6374,"ثان":4499,"تنظ":2616,"تمي":2924,"تنا":1728,"تمد":1486,"تمر":1589,"تلف":3439,"تلك":1400,"تما":2848,"تمب":1576,"تكو":3404,"تلا":1846,"تين":4590,"تية":1561,"تيا":2052,"تون":4528,"توي":1644,"توف":2858,"تول":1447,"تور":2920,"توس":1688,"توب":2148,"توا":1906,"تهم":1291,"جه ":1478,"جم ":2117,"جل ":2059,"جي ":1843,"حة ":7424,"حت ":2989,"حر ":2424,"حد ":6903,"ثلا":2751,"ثما":1889,"ثقا":1374,"وي":38534,"ي،":8021,"وو":3027,"وى":2435,"ية":149201,"يب":15166,"يا":76978,"يئ":2097,"يض":5937,"يط":9332,"يع":16206,"يز":13642,"يس":24541,"يش":6638,"يص":2709,"يخ":8388,"يد":33231,"ير":50995,"يت":21919,"يث":8777,"يج":6934,"يح":6752,"يه":17510,"ين":82989,"ًا":1622,"يو":34779,"يك":18257,"يق":23953,"يم":33457,"يل":36829,"يف":12030,"يغ":2332,"يي":7762,"فع":3459,"فض":3349,"فص":2010,"فظ":7666,"فز":1405,"فر":20396,"فس":4079,"فة":10560,"فت":8107,"ق،":1414,"فا":19253,"فب":1563,"قف":1276,"قع":16814,"قط":5495,"قض":1727,"قص":4820,"قش":1443,"قس":4834,"قر":18930,"قد":24585,"فى":1203,"قت":5868,"في":137714,"فه":4197,"قب":10150,"قة":17223,"فو":7133,"فم":1876,"فن":5345,"قا":33043,"فك":2212,"فل":7982,"فق":4801,"لأ":58367,"لآ":3323,"كف":1496,"كس":5921,"قي":21786,"كث":5852,"ل،":2207,"كذ":1298,"كر":19872,"كز":7004,"كأ":1420,"قل":10832,"قم":3276,"كا":41128,"قن":3553,"كب":9528,"قه":2295,"كة":14080,"قو":10571,"كت":14016,"قى":1403,"لك":43877,"مؤ":5145,"لق":35996,"لف":31832,"لط":12463,"لض":2442,"لص":14130,"لش":25578,"لغ":17940,"لع":56838,"لظ":1228,"لخ":11729,"لد":39289,"لج":27745,"لح":34548,"لز":5989,"لس":38336,"لذ":10795,"لر":20100,"كم":13188,"لا":89050,"كن":10747,"كل":17531,"لإ":25479,"لت":54464,"م،":3972,"كي":22561,"لث":10985,"لب":37291,"كه":2654,"كو":22542,"لة":26342,"مع":31975,"مغ":4324,"مص":12239,"مض":1909,"مط":2885,"مف":2713,"مق":12733,"مك":8220,"مل":28998,"مت":25250,"لى":49564,"مة":21124,"لو":37077,"مج":15409,"ن،":5643,"لي":91040,"مث":7965,"لم":137485,"لل":37112,"مب":12595,"له":28033,"ما":84509,"لن":25730,"مز":2658,"مر":34445,"مش":6760,"مس":26092,"مخ":5223,"مح":22151,"مذ":1262,"مد":31562,"نغ":3178,"نظ":9512,"نع":2681,"نط":11265,"نل":1207,"نم":4711,"نق":5444,"نك":3963,"نف":6969,"نح":3012,"نج":9690,"مي":49166,"ه،":2071,"مى":3142,"نت":26741,"نة":26561,"مو":33352,"مه":13330,"نب":6671,"نا":51288,"من":110993,"مم":11520,"نص":4888,"نش":6378,"نس":20514,"نز":3094,"نر":1454,"نذ":4480,"ند":18475,"هـ":2446,"وأ":7843,"وإ":2408,"هل":3624,"هم":13508,"هن":5865,"وا":96512,"هب":2043,"نه":22166,"ها":63427,"هت":1254,"نى":2312,"نو":28243,"هة":2492,"هج":2434,"و،":1267,"ني":68053,"هذ":11164,"هد":6895,"هز":1306,"هر":13839,"وغ":3162,"وظ":1284,"وع":15945,"وق":15305,"وك":13977,"وف":17742,"ون":47917,"وه":18574,"ول":64949,"وم":36461,"هي":33444,"وث":2062,"وت":24045,"هو":37979,"وة":2112,"وب":25979,"ود":18341,"وخ":2267,"وح":8074,"وج":13010,"وس":25063,"وز":7609,"ور":46060,"وذ":2568,"وط":5229,"وض":6816,"وص":4846,"وش":3688,"دة":23918,"خو":3104,"دت":2861,"دا":34518,"دب":3013,"دخ":1822,"دث":2213,"خي":4889,"خل":11579,"خم":2109,"دأ":2148,"خط":4248,"خر":10258,"خد":6707,"خص":5211,"ده":6608,"دو":24057,"دى":7961,"دي":71508,"ر،":3948,"دف":3789,"دق":1886,"دك":1306,"دل":4421,"دم":20032,"ذا":11430,"دن":9851,"دع":2356,"دد":6837,"در":20942,"دس":4645,"حث":1327,"جي":13871,"حج":2300,"حة":7643,"جو":9984,"حت":11398,"جن":11985,"حا":28730,"حب":2668,"جه":7398,"جل":12168,"جم":17274,"جع":2397,"جس":2540,"جز":8412,"جر":7944,"جد":8657,"خت":6160,"حي":21851,"د،":2117,"حم":14977,"حن":1639,"خا":10036,"خب":2705,"حو":13676,"حف":3106,"حق":4632,"حك":5593,"حل":8199,"حض":1296,"حص":3737,"حز":2507,"حر":15992,"حس":5733,"حد":30065,"تغ":2721,"تف":6519,"تم":21550,"تل":11923,"تك":7075,"تق":23507,"تى":3340,"ثة":2642,"تو":27372,"ته":18459,"ثا":9771,"تن":10701,"تج":7596,"تح":21120,"تت":7159,"تر":25584,"تخ":11500,"تد":5897,"تش":11506,"تص":9178,"تز":2287,"تس":9386,"تع":17161,"تض":2140,"تط":3895,"ثق":2026,"ثل":9196,"ثن":2113,"جا":22121,"ثم":5275,"جة":6738,"ثو":2447,"جب":5048,"ثي":5910,"جت":3434,"تي":34720,"ثر":5818,"بغ":2025,"بع":25200,"به":11285,"بن":26227,"بم":5940,"بل":28735,"بك":4252,"بق":7310,"بد":14809,"بج":2196,"اً":11293,"بح":10218,"بت":8320,"اي":29388,"ة،":15567,"بب":2982,"اه":11841,"او":11068,"بة":13305,"بط":8037,"بش":3786,"بص":1566,"بس":4381,"بر":41886,"تا":29084,"تب":19287,"تأ":5378,"بو":18997,"ت،":2681,"بي":60818,"ئد":1726,"ئر":5978,"ئز":1688,"إن":13110,"ئا":1293,"إم":3605,"ئة":2198,"ا،":7371,"إي":6297,"اء":25293,"ئل":4704,"ائ":31094,"ئم":1653,"از":9640,"ار":69575,"اذ":2153,"اد":41536,"اض":8516,"اص":11589,"اش":9130,"اس":39158,"ات":77501,"اة":4919,"ئه":1298,"اب":38588,"اخ":6254,"اح":20482,"اج":12583,"اث":6287,"ئي":14445,"ب،":1623,"اف":23866,"بأ":2984,"اق":16552,"ام":64051,"با":69767,"ان":106270,"اك":11934,"ال":726452,"بإ":1475,"اع":27489,"اغ":2741,"اط":14678,"ؤس":2217,"أو":36459,"أه":4128,"أي":8325,"أل":9391,"أك":9086,"أن":19914,"أم":14875,"إس":11365,"إر":1668,"إذ":1752,"إد":2895,"إخ":1619,"إح":7179,"إب":1901,"إل":22468,"إق":2002,"إع":1925,"إض":1479,"آخ":1667,"آل":2561,"أت":2542,"آن":1925,"أب":10779,"أح":11512,"أخ":6156,"أث":2807,"أج":3225,"أر":12567,"أد":4137,"أص":5345,"أش":3739,"أس":15627,"أغ":3162,"أع":5434,"أط":1989,"أق":3836,"أف":5069,"غي":8633,"غو":4024,"غن":2949,"غل":3357,"عي":17935,"غد":1582,"غر":13200,"غس":1789,"غز":1267,"عق":2845,"عل":49520,"عم":19388,"غا":8614,"عن":19968,"عه":6016,"غة":3737,"عو":7644,"ظي":3474,"عث":1594,"عت":10284,"عة":23777,"عد":27555,"عش":4458,"عس":1709,"عز":2669,"عر":25424,"عظ":2315,"عط":1205,"عض":5918,"عص":2498,"عا":50253,"ظه":2422,"عب":25388,"ظم":4301,"ظا":4120,"طن":4817,"طل":7662,"طق":11023,"طف":1950,"طع":6327,"ظر":2587,"طي":9898,"طو":11014,"ظة":6299,"ضم":8353,"ضل":2264,"ضع":1853,"طس":1527,"طر":9793,"ضي":7171,"طح":1428,"طا":16584,"طب":7274,"طة":4986,"ضو":3284,"صل":10145,"صف":5067,"صط":2224,"صص":1416,"صغ":2398,"ضر":2826,"ضة":1642,"صو":8178,"صم":4802,"صن":4515,"ضا":11319,"صي":8440,"شف":1822,"شك":5750,"شق":1763,"شع":5311,"شغ":1513,"صح":4260,"صد":4269,"صر":13415,"شم":11602,"صا":13513,"صب":5260,"شه":5994,"صة":3495,"شو":3361,"شي":12032,"سع":7475,"سط":12291,"سس":4815,"سف":3591,"شأ":1378,"سي":47561,"شت":5440,"سو":18175,"سه":3635,"شب":4456,"شا":16192,"سن":12898,"سم":24549,"سل":21272,"سك":11128,"شر":23735,"شد":1209,"شخ":3092,"زع":1204,"سب":17052,"زه":1918,"زن":1607,"سا":35091,"ست":30958,"سة":7853,"زو":4517,"زم":3202,"زل":2051,"سد":1627,"سر":8485,"سج":2382,"زي":20894,"رس":13875,"رش":2594,"رز":2717,"رط":1498,"رض":6362,"رع":5610,"رغ":2804,"زء":1924,"رل":3156,"رك":24686,"رق":13754,"رف":9661,"رو":32705,"زة":4009,"زب":2789,"ره":9008,"زا":10186,"رن":13036,"رم":7236,"س،":1548,"ري":92759,"رى":6117,"زر":4095,"ذر":1628,"ذك":2116,"رأ":2244,"رئ":5538,"ذل":5170,"رب":36257,"ذه":6871,"را":67798,"رت":12252,"رة":43634,"ذو":1454,"رج":14223,"ذي":9511,"رخ":1356,"رح":4450,"رد":11552,"ف ":28941,"ـ ":3661,"ع ":53161,"غ ":7968,"ص ":6276,"ض ":14917,"ط ":15243,"ظ ":1554,"ر ":119691,"ز ":20518,"س ":42343,"ش ":6531,"ً ":11009,"ي ":305236,"ه ":68493,"ن ":236663,"ى ":82765,"و ":86344,"ك ":22299,"ق ":30798,"م ":131428,"ل ":111126," ، ":18980," و ":12423," م ":5345," جن":4512," حا":7431," جه":1218," جم":4339," جي":2378," جو":3451," حت":2592," جد":1981," جز":3197," جر":1610," ثل":1523," ثم":3265," جا":6114," جب":2225," تي":1533," خل":4736," دا":4684," خط":2109," حق":1280," حك":2014," حل":1546," حي":8173," حم":2037," خا":3436," حو":8126," حر":3258," حس":2733," حد":2242," بك":1952," بق":1818," به":5365," بن":17676," بم":5621," بل":8899," بغ":1392," بع":9614," بس":2145," بر":8567," بط":3259," بش":2647," بت":3125," بد":4624," بج":1734," بح":3333," بأ":2828," ال":581886," بإ":1446," با":31729," ان":6245," ام":2207," اع":1488," ار":1569," اس":7954," اب":3029," ات":1745," اح":1418," اخ":1766," تو":6684," ثا":1511," تن":5058," تم":5949," تل":2911," تك":2690," تق":13534," تع":7393," تط":1471," تش":4009," تص":2525," تس":4420," تر":4248," تخ":1540," تد":1553," تج":1794," تح":6360," تت":4407," تا":4988," تب":4366," تأ":3604," بو":6771," بي":13768," أل":4956," أك":6683," أن":14293," أم":6327," أو":27007," أه":2795," أي":6316," آل":1841," أب":8228," أخ":2816," أح":8158," أج":2000," أث":1423," أر":3743," أد":2035," أص":3105," أس":5597," أش":2245," أع":3015," أغ":2285," أف":3376," أق":2564," إي":4306," إن":5007," إم":1413," إب":1312," إس":5794," إر":1268," إذ":1294," إد":1530," إح":6465," إل":21224," إق":1208," طو":2165," عش":2714," عر":3955," عد":6773," عا":21951," عب":8841," عي":1567," غر":3853," عل":33952," عم":7142," عن":15313," غا":2188," غي":2774," سع":2415," سي":7136," شب":1886," سو":5683," سم":1931," سن":7850," شا":3992," سك":3794," سل":4129," شر":6157," شخ":1437," شع":1651," شك":1235," شي":2028," شم":4082," صا":1583," شه":2018," صح":1413," ضم":5470," طا":1844," طب":1958," طر":3408," در":2995," دي":6094," دو":6652," ذا":1885," رئ":2370," ذل":2035," را":2673," رس":1784," ري":3022," رق":1403," رو":5273," زي":1372," سب":3177," سا":6895," ست":1885," لك":4551," مؤ":2595," لق":1950," لع":2571," لغ":1253," لص":2532," لج":1545," لح":1350," لد":1501," لب":2260," كو":5523," لت":3577," كي":3645," كل":6741," لإ":1374," كم":7185," كن":1321," لا":10830," مل":4355," مك":2868," مق":7894," مغ":1598," مع":14545," مص":6997," مس":11150," مش":2698," مر":11437," مد":14412," مح":14231," مخ":2723," لي":4897," مث":3231," مج":7634," لو":4068," مت":7729," لن":2133," ما":17956," مب":2224," له":5386," لل":21688," لم":6816," نف":1935," نق":1569," نظ":2491," نس":4267," نش":1472," مم":3100," نا":8059," من":85288," مه":1676," مو":12716," مي":4669," نج":1432," وأ":7664," هـ":2402," هن":1965," وا":50314," وإ":2381," هذ":9093," نو":6152," ها":3011," نه":2387," ني":2637," فر":6896," فب":1504," فا":3547," فت":2193," قص":1901," قط":1391," فق":1797," فل":2626," فن":1697," قا":5741," فو":2197," فه":1391," قب":5253," في":115948," قد":7793," قر":6492," لأ":2928," قل":1628," كت":3116," قو":2125," كب":2590," كا":17231," قي":2400," كث":1288," كر":7439," ود":1531," وخ":1367," وح":3870," وج":3378," هي":20610," وت":16290," هو":22238," وب":6445," وص":1974," وش":2083," وس":5388," وز":1947," ور":2782," وذ":1466," وغ":1624," وع":5254," ون":2492," وه":15877," ول":15982," وم":13412," وق":7666," وك":6757," وف":4804," وو":1985," وي":13320," يا":1484," يب":2139," يح":3014," يج":1805," يت":5470," ير":1696," يد":1787," يص":1234," يش":1764," يس":4063," يع":6186," يل":3428," يم":2984," يق":8446," يك":1977," يو":8148," ين":4302,"فس ":1508,"فر ":2554,"فع ":1454,"فا ":1471,"ق، ":1401,"فة ":10112,"قع ":14708,"فار":1789,"فات":2286,"فال":1824,"فاع":2545,"فان":1282,"فبر":1355,"فتر":2588,"قب ":1566,"قا ":1491,"قت ":1589,"قة ":16768,"في ":111805,"قد ":7827,"عظم":1719,"عضو":1448,"عشر":3482,"عسك":1369,"عزي":1296,"عري":1226,"عرو":2224,"عرف":3788,"عرض":1945,"عرب":8060,"عرا":4113,"عدة":2689,"عدا":2204,"عدد":5430,"عدي":2807,"عتم":1554,"ظيم":2842,"ظهر":1451,"عبر":1705,"عبد":6249,"عبي":2224,"عتب":4439,"عات":3583,"عائ":1805,"عاب":1251,"عال":9007,"عام":18844,"عبا":2776,"عاص":3770,"عار":1586,"عاد":3689,"ظمة":1530,"غسط":1434,"غدا":1272,"غرا":1470,"غرب":8757,"عية":4635,"عيد":1672,"عين":3620,"عمل":7530,"عمو":1379,"عمر":2346,"عها":1818,"عني":1581,"غال":2120,"عند":3134,"عهد":2118,"عود":4342,"علا":3567,"علي":8023,"على":25792,"علو":2893,"عما":5586,"علم":6076,"غني":1336,"غير":5788,"شعر":1240,"شعب":2093,"شما":7339,"شكل":4349,"صال":3820,"شهر":2525,"صبح":1804,"صاد":2357,"صار":2022,"شمي":2083,"صدر":2012,"شير":1659,"شيخ":2626,"طة ":4836,"ضي ":2799,"صطل":1544,"صري":4147,"ضم ":1876,"طب ":1314,"طس ":1466,"صغي":1584,"صول":1823,"صور":2575,"ضاف":1670,"ضاء":2485,"صنا":1592,"صمة":2695,"طق ":1799,"صية":1437,"صين":1293,"طي ":1477,"ظة ":6257,"طان":3004,"طال":3847,"طاق":1625,"طار":1511,"طائ":1561,"ضمن":5751,"طبي":3127,"ضية":1599,"طري":4168,"ظم ":1728,"عب ":10724,"عة ":23265,"طعة":4116,"عد ":10757,"عر ":1910,"طلح":1521,"طلق":2117,"طلا":1304,"عض ":2738,"طقة":8653,"طول":4093,"طوي":1533,"طور":2637,"ظام":2445,"طني":2154,"طين":2907,"عل ":1213,"غة ":3658,"عن ":10475,"عه ":1714,"عي ":3208,"س، ":1536,"ري ":14515,"رن ":3048,"زب ":1762,"ره ":3780,"زة ":3835,"رو ":2228,"رى ":5751,"رق ":5370,"رك ":3561,"رجي":1396,"ردن":4580,"ردي":1386,"رتب":1460,"ربع":2964,"ربي":13069,"رجة":3401,"رجا":1686,"ذين":1312,"رتف":1376,"رته":1217,"رتي":1328,"ذلك":5018,"ران":5308,"ربا":2177,"راه":1718,"راي":2650,"رة،":1284,"راً":1437,"راط":1969,"راض":2661,"راع":2044,"راف":2275,"راك":1677,"راق":4338,"رام":2614,"رال":1755,"راب":2845,"رائ":2827,"راج":1725,"رات":7689,"رئي":4628,"رار":2322,"راد":1959,"راز":1251,"راس":3033,"راء":3735,"ذكر":1364,"رأس":1309,"سس ":1682,"سر ":1409,"زي ":2367,"سة ":7637,"ست ":2491,"رسة":1580,"رسا":1400,"سا ":2325,"رسو":1660,"رسم":1498,"رسي":1211,"سب ":2371,"سم ":8379,"زرا":1870,"سل ":2019,"ريو":1361,"ريم":1723,"رين":5069,"ريك":7429,"ريل":2022,"ريف":1889,"ريق":6975,"ريا":9368,"ريب":2334,"ريخ":4312,"ريد":2596,"ريت":1569,"رية":19287,"ريط":2009,"رير":1548,"ريس":2387,"روي":1263,"روف":2873,"رون":3533,"روم":2673,"روع":1294,"روس":3818,"روت":1414,"روا":3408,"روب":3276,"رها":3942,"رنس":4822,"زار":1503,"رنا":1951,"زائ":1892,"سع ":1228,"ركي":2730,"ركز":6519,"ركا":1845,"رقم":1561,"ركة":6574,"سط ":4117,"رقي":3306,"رفي":1260,"رقة":1238,"شر ":3663,"سن ":1880,"سي ":9223,"سري":1260,"سرا":2022,"صب ":1600,"ساع":1424,"سام":1889,"سال":1558,"سبب":1717,"سان":4418,"سبا":5421,"سبت":1759,"سبة":1761,"ساح":5795,"ساس":2185,"سائ":1433,"سات":2066,"ساب":4056,"ستخ":5235,"ستا":3297,"ستر":2386,"زوج":1305,"زيو":1415,"ستع":2199,"ستق":2335,"زيا":2152,"ستي":2146,"زية":3984,"ستو":3126,"زيز":1328,"زير":3447,"صر ":6441,"سلة":1569,"سكن":1586,"سلا":5866,"سكر":1892,"سلي":1241,"سمب":1534,"سمة":2358,"سلم":2588,"سما":2312,"سلط":1809,"سلس":3040,"سكا":4595,"سطس":1421,"سطي":2518,"صة ":3360,"سعو":3150,"شرك":4217,"شرق":6331,"شرو":1260,"شري":3368,"صل ":5101,"ضة ":1538,"ضا ":2191,"سوف":1368,"سون":1558,"شتا":1442,"سوي":1779,"سوا":1477,"سود":1542,"سور":3979,"شاع":1427,"سمى":2128,"سنة":6767,"سمه":2089,"شار":4714,"سمي":3653,"شخص":2650,"صف ":1872,"سين":3825,"سيم":3305,"سيق":1533,"سيس":1471,"سير":1927,"سيد":1957,"سية":7660,"سيا":7442,"شته":1322,"يين":4613,"يقو":1288,"يقي":3960,"يقع":4212,"يكا":2890,"يكي":7123,"يلع":3028,"يلة":3508,"يكو":2936,"يلا":3946,"يلي":4907,"ين،":2158,"يلم":2210,"يما":3516,"يمة":2518,"يلو":2309,"يمت":1285,"ينا":6385,"يمن":1503,"يمك":1768,"ينت":2412,"يمي":5784,"ينة":14065,"ينو":1382,"يني":7771,"ينم":1347,"ينه":1443,"يها":5753,"يوس":2044,"يور":1429,"يوج":1408,"يوا":1512,"يون":8407,"يوي":1456,"يوم":3633,"يول":2410,"يقا":3504,"يقة":2474,"يفي":1837,"يفة":1491,"يضا":3644,"يسي":3184,"يسم":2811,"يزي":6603,"يست":3754,"يسا":1431,"يره":2025,"يرو":2802,"يري":3325,"يعي":2030,"يعر":1330,"يعة":1447,"يعت":2839,"يطا":4786,"يجي":2143,"يتا":1503,"يتو":1517,"يتي":2848,"يتم":2214,"يته":1496,"يدي":4565,"يرا":5132,"يرة":7852,"يحي":1325,"يدة":3171,"يدا":2277,"يال":1789,"يبا":1460,"يان":5624,"يام":1663,"ية،":7809,"ياً":2041,"يبل":2267,"يبي":2606,"يئة":1233,"يا،":2810,"ياس":4902,"يار":3005,"ياد":1919,"ياض":3099,"ياء":2370,"يات":14201,"ياب":2680,"يائ":1532,"وز ":1740,"ور ":10877,"ود ":7936,"وض ":4335,"نفس":2468,"وس ":4788,"هاد":1315,"هاج":1381,"هاز":1295,"هار":1592,"هاش":2115,"نما":2137,"ها،":1369,"وع ":4025,"نقل":1354,"وط ":1225,"نيس":1670,"نيا":8848,"نية":21320,"نوي":1623,"نون":2797,"نور":1366,"نوب":7417,"نهم":1521,"نوا":4198,"نوف":1932,"نوع":3104,"هاي":1376,"هام":1659,"نها":10954,"نهر":1868,"وف ":3389,"هذا":5637,"وق ":2861,"نين":2937,"نيو":3670,"وم ":10050,"ون ":24085,"هرة":1975,"هذه":5272,"هرب":1206,"وك ":1613,"ول ":15598,"وي ":5541,"ي، ":7950,"وى ":2336,"مغر":2269,"معي":2389,"معل":1517,"معه":1532,"معر":3470,"معت":1206,"معة":3514,"معا":4652,"هد ":3199,"هر ":6421,"مقا":7017,"مقر":1875,"مقد":1322,"ملة":1472,"ملا":2140,"مكن":2428,"مكا":1474,"مكت":1317,"منا":3879,"ممل":5220,"نائ":1835,"ناء":3405,"ناد":6151,"منذ":4141,"منت":3699,"نات":4769,"منظ":1521,"منط":9032,"ناط":2245,"ناص":1498,"منص":1619,"ناس":1610,"ناع":1893,"ملك":8686,"مما":1241,"ملي":3442,"ممث":1944,"مهو":2239,"موا":8484,"نبي":1323,"موج":1832,"مور":1727,"مود":1263,"موس":3401,"موع":3576,"موق":1778,"ناك":1264,"نام":2198,"نان":5318,"نبا":1540,"مها":3875,"منه":3913,"ناي":2308,"مني":1297,"نتق":1216,"نتش":1566,"نتخ":2464,"نتج":1837,"نتر":1240,"نتا":3081,"مون":2835,"موم":1267,"مول":1330,"ميل":3548,"ميع":1518,"ميد":1868,"ميز":2165,"مير":3381,"نتم":1247,"ميا":3833,"ميت":1549,"مية":10968,"نتي":2558,"نجل":4522,"مين":4370,"هـ ":2122,"ندا":2054,"ندم":1329,"ندو":1295,"ندر":1700,"ندس":1559,"ندي":3768,"هل ":1310,"هم ":9416,"نسا":3625,"نسب":2400,"وا ":3570,"نسم":2218,"نشا":1687,"نسي":6788,"وب ":7482,"هو ":28172,"وة ":2028,"وت ":2509,"هي ":25821,"نطق":9203,"نظا":2438,"نظر":1914,"نظم":2245,"نظي":2652,"ومن":4650,"ونا":4025,"ومي":3551,"ونس":2979,"وما":5525,"وله":2128,"ولي":9104,"ومة":1640,"ولو":3124,"ولى":4041,"ومت":1425,"ولك":1651,"يع ":3808,"ولة":5953,"وكي":1778,"ولا":11130,"ولد":6407,"وكا":4860,"يط ":1772,"وقا":1242,"وفم":1445,"وفي":6567,"يض ":1235,"وقد":4338,"وقع":2237,"وفا":1769,"يش ":2562,"يس ":7953,"يق ":5733,"يف ":4553,"وين":2166,"ويو":1325,"ويق":1310,"ويل":2767,"ويس":2024,"ويع":1988,"وية":5123,"ويت":3787,"وير":2162,"ويد":1280,"وهي":6413,"وهو":7384,"وني":7306,"يه ":8938,"يو ":7129,"يم ":12246,"ين ":40630,"ًا ":1445,"يك ":2151,"يل ":12694,"وتو":2723,"وتق":1399,"هير":1949,"وتع":2163,"هول":1332,"وتر":1472,"وتت":1213,"وجه":1246,"وجي":2191,"وجو":2155,"وحد":1804,"هيم":1268,"وجد":2346,"واع":2005,"واس":3039,"وار":3337,"هند":2766,"واد":3067,"واح":3498,"واج":1496,"وات":4032,"واب":1388,"هنا":1458,"وائ":3044,"واء":1341,"هما":1800,"هور":3656,"وبي":4664,"وبل":1243,"وبر":3000,"واي":2060,"واق":2063,"وال":49239,"وان":5702,"وبا":3324,"وري":12364,"وسا":1602,"وزي":2144,"يا ":24320,"وسط":5112,"وسي":5438,"يب ":4787,"ية ":139658,"ودا":1655,"ودة":1389,"ودي":4370,"ورا":3830,"ورد":1319,"ورت":1259,"ورة":4176,"وزا":1263,"ورو":3091,"يد ":18420,"ير ":22585,"يز ":3857,"يث ":7060,"يت ":4897,"يج ":1671,"وطن":2615,"يح ":1259,"وعة":3516,"وعا":1992,"يخ ":5401,"وعي":1203,"لد ":7149,"لس ":3640,"لة ":25605,"كو ":1397,"لت ":2572,"لا ":8651,"كن ":4742,"قسم":1469,"لب ":3341,"قسي":2428,"لح ":4377,"لث ":1339,"م، ":3933,"كي ":5663,"لق ":2574,"لف ":3842,"كثي":2170,"له ":10668,"كسي":1295,"ما ":21645,"كرو":1414,"لم ":13383,"كري":3468,"لك ":9653,"كرا":1233,"كرة":8153,"كات":3795,"قنا":1376,"كار":2777,"لغ ":5250,"قلي":3507,"قيم":1916,"قيق":2120,"كثر":3184,"قية":4744,"كتو":3421,"قيا":3458,"كتب":2651,"كتا":3440,"قوم":2204,"قوا":2182,"كبي":3060,"كبر":3750,"كان":20675,"كام":2151,"كال":2865,"قني":1349,"كر ":2225,"كز ":6044,"كس ":1605,"قل ":2905,"فرا":2274,"قم ":1363,"فري":3990,"فرن":4775,"فرق":1262,"كا ":2472,"كة ":13804,"قى ":1356,"ل، ":2187,"قي ":5360,"فضل":1264,"فظة":6038,"فير":1961,"فيز":1412,"فية":2824,"فيد":1310,"فيل":3788,"فيه":4815,"فين":1921,"قتص":1591,"فيا":1360,"قدر":1289,"قدم":9632,"قدي":3437,"قرن":2599,"كم ":5038,"قرى":1679,"قري":5990,"كل ":7728,"قرا":2563,"قرب":1568,"فلس":2794,"فلا":1746,"قاط":5379,"قاف":1502,"قال":3148,"قائ":2080,"قاب":1505,"فنا":1210,"قات":2176,"قاد":1517,"قار":3777,"فمب":1413,"فور":1725,"قبل":4422,"قان":1522,"قام":2893,"لمع":7155,"لمغ":2396,"ماء":2699,"لمر":5941,"لمس":9884,"لمش":2559,"لمص":4216,"لمط":1236,"لمت":9057,"للو":1687,"لمة":1580,"لمج":4154,"للي":1280,"لمخ":2131,"لمح":5998,"لمد":5967,"لمؤ":2072,"للم":3559,"لمب":2156,"لله":4901,"لما":10778,"لنف":1218,"لنس":1662,"ماس":1244,"لنظ":1668,"ماع":3269,"لمي":7408,"مات":4917,"مار":7604,"ماد":2511,"لند":3683,"لمل":4323,"لمك":1995,"لمق":3111,"لمو":6905,"لمه":2311,"لنب":1670,"لنا":3789,"لمن":8707,"لمم":6914,"مائ":1529,"مؤس":2067,"لكر":4170,"لقي":1879,"لكب":1845,"لكا":2896,"لكت":3059,"لكة":5304,"لقو":2652,"للغ":3104,"للع":2547,"للح":1276,"لكي":2900,"للت":1711,"لكو":3675,"للب":1703,"لكه":1307,"للا":2747,"لكن":3420,"لكل":2395,"للأ":1511,"لفر":5137,"لفا":3058,"لفة":1423,"لفت":1915,"لقر":6914,"لقد":5675,"لقص":1290,"لقط":1312,"لفل":2420,"لقب":2460,"لقا":4848,"لفن":2512,"لفي":3665,"نس ":2245,"ند ":3871,"نذ ":4066,"لعب":7815,"لعا":12423,"لعد":3105,"لعش":1359,"لعز":1344,"لعر":10614,"لعص":1325,"لعل":5280,"لغا":2170,"لعم":4185,"لغة":3148,"لغر":4050,"لطب":2889,"لطا":3030,"لطر":1829,"ه، ":2068,"مي ":10289,"لطي":1270,"و، ":1265,"ني ":19921,"نى ":2242,"مصر":6502,"مصط":1846,"نو ":1687,"هة ":2433,"مصا":1239,"مسل":3430,"نه ":6502,"مسي":2261,"مشا":1795,"ها ":46135,"مست":5627,"مسا":7688,"مرك":7198,"مري":7170,"مرو":1205,"مرا":3794,"مرة":1768,"مرب":1566,"مرت":1375,"مدي":14448,"مدر":2755,"مدن":2068,"مخت":2389,"مدا":1536,"محل":1510,"محم":5603,"ليو":6519,"ليه":5228,"مجا":2341,"لين":3186,"مجم":3939,"محا":8346,"مجل":3322,"ليا":10564,"ليب":2394,"لية":13788,"متو":1712,"ليز":4850,"ليس":2021,"ليد":6582,"ليم":5806,"ليل":2383,"مثل":6304,"ليف":2446,"لوي":1597,"لون":3102,"متا":1234,"لول":3949,"لوم":4251,"متر":3819,"متد":1293,"متح":5418,"متع":1688,"لهن":1850,"لوا":3317,"لهو":1322,"مبي":1451,"لوج":1868,"لوح":1460,"لور":2028,"لوس":2838,"لوط":2044,"لوك":1356,"مال":13395,"مام":2587,"لنق":1670,"لنو":1865,"مان":13056,"مبا":2715,"لها":8071,"ماي":3281,"لهج":1315,"مبر":6074,"لأص":1592,"لأس":4356,"لأر":8270,"لأد":1635,"لأع":1940,"لأخ":2355,"لأح":2438,"لأب":1984,"لأف":1387,"لأل":2944,"لأك":1560,"لأن":3516,"لأم":7460,"لأو":8534,"لأي":1221,"لإس":5236,"كلا":1532,"مر ":4953,"مس ":1733,"مد ":7895,"مة ":20092,"لو ":1775,"مت ":2546,"لى ":49394,"لي ":18230,"ن، ":5582,"مج ":2119,"لسف":1264,"لشا":2194,"لسن":1963,"لسك":1528,"لسل":6280,"لسي":5235,"لشب":1219,"لسو":4213,"نب ":1230,"مه ":3791,"لسع":2105,"لسط":2488,"لسا":4332,"من ":76224,"نا ":5184,"لصو":1503,"مى ":3080,"نت ":7260,"لصي":1707,"لشم":4027,"لصا":2983,"لشه":1224,"لصح":2065,"نة ":25901,"لشي":3442,"لشر":6885,"لشع":2442,"لدي":8697,"لدو":6858,"لدر":3930,"لخل":2460,"لدا":2727,"لدة":1345,"لحي":2699,"لري":2867,"لرو":3390,"لرس":1728,"مل ":8293,"لذي":7896,"لرئ":2291,"لرا":2519,"لته":1222,"كيا":1952,"لتن":1720,"لثا":5157,"كية":5479,"لتو":2940,"لتي":15294,"كيل":2229,"كيم":1808,"لجا":2810,"لتا":4876,"كون":7678,"كوم":2797,"لتح":2710,"لتج":1719,"كوي":2650,"لتر":3418,"لتص":1218,"لتش":1260,"لتع":2665,"لتل":1255,"لتق":2650,"لحد":2696,"لجو":1720,"لجي":2140,"لحر":4857,"لحس":1519,"لحم":1993,"لخا":2598,"لحق":1482,"لحك":2052,"لجد":1516,"لجب":1382,"لجز":3847,"لحا":4240,"لجن":5221,"لجه":1784,"لجم":3177,"لاث":2511,"لاج":1537,"لاح":3135,"لاد":4259,"كند":1265,"لار":1311,"لاب":1932,"لات":8075,"لاق":3425,"لاف":2118,"لاس":6196,"لاع":5890,"كلي":3267,"لإي":1693,"مع ":8709,"كلم":2454,"لإم":2003,"لإن":6523,"كما":4559,"كور":1677,"لبو":1874,"لبي":3816,"لبل":3617,"لبن":3363,"كول":1392,"لبر":5552,"كهر":1331,"لبح":3898,"لاي":9010,"كنه":1287,"لام":9049,"لان":6278,"لبا":3771,"لال":6356,"لبط":1368},"n_words":[11749565,13990834,9440598],"name":"ar"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/bg b/nlp_resource_data/langdetect/profiles/bg

new file mode 100755 (executable)

index 0000000..201663a
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/bg
@@ -0,0 +1 @@
+{"freq":{"D":2636,"E":1936,"F":2232,"G":2334,"A":4351,"B":3121,"C":4592,"L":2480,"M":3950,"N":1781,"O":1368,"H":1934,"I":6368,"J":1261,"K":1010,"T":2986,"W":1604,"V":1965,"P":3771,"S":5211,"R":2316,"X":1029,"f":3146,"g":5865,"d":9193,"e":32549,"b":4459,"c":9844,"a":30637,"n":21831,"o":21963,"l":16413,"m":12336,"k":7480,"h":8702,"i":28615,"w":2350,"v":3143,"u":15394,"t":17966,"s":19762,"r":22456,"p":5894,"z":1439,"y":4095,"x":1681,"²":3527,"̀":1150,"μ":1054,"ν":2280,"ο":2756,"ι":1833,"κ":1014,"λ":1144,"ε":1298,"α":2356,"ί":816,"σ":1479,"ς":1823,"ρ":1221,"τ":1963,"ь":4950,"ю":16520," o":1042,"я":119927,"ш":24527,"щ":37947,"ъ":118638," k":4461," d":1024,"ф":35061,"х":26992," e":2141,"ц":57885,"ч":69969,"р":442208,"с":363493," a":954,"т":513431,"у":110117,"ѝ":1026," t":1519," p":1297," s":806,"Й":1921,"И":10722,"Л":9341,"К":22064,"Н":13530,"М":19622,"П":22329,"О":13337,"Б":18556,"А":21177,"Г":11598,"В":16284,"Е":7594,"Д":15403,"З":5627,"Ж":1534," J":1196," K":898,"Ш":3131," H":1769,"Щ":2903," I":3444," N":1367,"Ю":3236,"Я":1596," O":956," L":2204," M":3517," B":2694,"Т":16259,"У":4389," C":3884,"Р":16110," A":3726,"С":33637," F":2034,"Ц":2988," G":2045,"Ч":3270,"Ф":8267," D":2259," E":1564,"Х":7715,"л":282008,"к":273267,"й":56719,"и":734943,"п":177542,"о":637033,"н":589099,"м":161532,"г":129585," S":4264," R":2038,"в":294348,"б":87024," P":3303,"а":881417," W":1432,"з":132692,"ж":46599," V":1365,"е":647345,"д":212987," T":2518," А":17337," Б":18206," В":15997," Г":11364," Д":15029," Е":7181," Ж":1503," З":5448," И":10217," Й":1919," К":20624," Л":9135," М":19107," Н":12710," О":12505," П":21699,"Co":972,"I ":2499," б":25135," а":29615," г":41568," в":81473," е":87884," д":46722," з":26978," ж":5999," и":93394," л":8242," к":53137," н":137556," м":36472," п":102853," о":88041," Р":15097," С":31643," Т":15680," У":4153," Ф":7812," Х":7558," Ц":2895," Ч":3246," Ш":3096," Ю":3200," Я":1567," т":27331," у":9960," р":36272," с":111437," ц":6298," ч":13239," ф":16514," х":6966," ш":3296," щ":6810," ю":3698," я":2381,"Ca":930,"Ma":1469,"II":1888,"Th":962,"b ":891,"a ":4950,"i ":1873,"ge":970,"he":2112,"ha":1204,"g ":1075,"ea":1397,"ec":826,"ed":916,"de":1775,"di":1045,"do":824,"h ":1014,"el":2062,"en":3274,"et":1200,"es":2494,"er":5287,"ca":1035,"e ":9458,"da":1108,"f ":974,"co":1092,"ci":1241,"ch":1482,"ce":1332,"d ":2383,"at":2587,"as":1457,"ar":3743,"al":2739,"ai":928,"am":1115,"an":4726,"ac":1396,"ad":830,"ae":1227,"nu":977,"nt":2097,"ns":1121,"no":824,"of":927,"om":1215,"on":3740,"ol":1525,"m²":3381,"ot":815,"os":1234,"ou":906,"or":3049,"r ":3015,"pe":805,"lo":1036,"ll":2061,"o ":2327,"ma":1173,"mb":856,"me":1262,"mi":900,"na":1825,"nd":1832,"ne":1694,"ng":1442,"ni":2114,"m ":2526,"km":4286,"li":2736,"le":2314,"la":2281,"n ":4895,"hu":824,"hi":896,"id":1203,"ic":2468,"ia":2261,"ig":1082,"ie":1280,"k ":904,"ir":844,"is":2366,"it":1525,"iu":2316,"il":1887,"in":4071,"io":1821,"l ":2342,"y ":2050,"vi":1029,"ve":1027,"x ":1094,"ul":1045,"ur":1295,"us":5560,"um":1527,"un":918,"tu":1122,"to":1372,"tr":962,"te":2234,"ti":2705,"th":2095,"ta":1616,"ss":891,"st":2061,"se":1041,"si":1173,"rt":1054,"ro":1958,"rn":867,"ri":3533,"re":2281,"rd":988,"ra":2624,"t ":3334,"s ":10393,"² ":3527,"ς ":1819,"ν ":1021,"К ":979,"В ":2290,"юг":1081,"юз":1052,"юж":980,"юл":1213,"юн":1123,"юр":1010,"ют":1002,"юц":3295,"юч":2002,"яд":1101,"яг":923,"яв":7999,"ян":5481,"ям":2556,"ял":2107,"як":3070,"ях":803,"яс":1766,"ят":23106,"яр":1490,"ящ":989,"щи":5926,"ще":7866,"ща":13149,"щт":2297,"що":3015,"щн":816,"ъв":5386,"ъг":8535,"ъд":2915,"ъе":2628,"ъж":1821,"ъз":5682,"ъб":2128,"ът":9515,"ъч":1319,"ъц":2116,"ъщ":3958,"ъл":18163,"ък":3158,"ън":6470,"ъм":3577,"ъп":2106,"ъо":998,"ъс":7274,"ър":28553,"ьо":4687,"хе":1611,"хи":3810,"хн":1728,"хо":7132,"хр":1667,"ху":1707,"ха":4667,"ци":32071,"цк":2149,"ца":7811,"це":9248,"чл":917,"чн":11748,"чо":1988,"чи":7731,"чк":2263,"чу":949,"чр":804,"цъ":803,"че":23255,"ча":14633,"чв":2213,"шн":3596,"шк":2046,"ши":6517,"ше":4003,"шв":1228,"ша":3103,"ск":74214,"см":3598,"сл":11817,"со":8891,"сн":7769,"ср":4324,"сп":8021,"св":7943,"се":41908,"си":23052,"рш":1570,"ръ":16028,"ря":3303,"са":17003,"рс":15501,"рт":9867,"ру":13758,"рх":2778,"рц":2298,"тн":10904,"тл":2176,"тк":3861,"тс":3598,"тр":27985,"то":91021,"те":76446,"тд":804,"тв":20249,"ти":47326,"сь":1346,"съ":18957,"ся":1659,"та":97852,"тб":3928,"су":2617,"ст":96170,"сц":1590,"ур":10561,"уп":6474,"ут":4765,"ус":9980,"ум":6401,"ул":6590,"ун":6478,"уи":1236,"уз":3299,"ук":5239,"уд":4037,"уг":3988,"уж":1616,"уе":1334,"уа":3124,"тя":2319,"уб":6162,"ув":3514,"тъ":10928,"тт":3628,"ту":8218,"фу":2995,"фс":1030,"фр":3456,"фо":5112,"фи":11161,"фе":4259,"фа":3044,"уч":6066,"уш":4842,"ух":1233,"уц":1227,"Щ ":2591," I ":819," II":1042," Ma":1457,"а ":358572,"С ":937," Ca":925," Co":962,"Ис":1007,"Им":977,"Ин":1158,"к ":21804,"Из":2286,"Ив":1372,"й ":18686,"Ле":1710,"Ли":1795,"Ла":1638,"Ку":1413,"Кл":1286,"Ко":5445,"м ":14788,"Кр":2175,"Ке":836,"Ки":1609,"Ка":6073,"л ":25621,"Йо":1544,"На":5278,"Не":1875,"Ни":2119,"Мо":2461,"о ":136170,"Ма":9260,"Ми":2960,"Ме":2505,"Ло":1497,"н ":69303,"Лу":1452,"Па":3246,"Пе":2744,"Пи":1272,"Пл":3225,"с ":29451,"По":4526,"Оп":1050,"р ":30654,"Ос":2133,"Ор":1247,"От":1087,"Об":2207,"Ок":2446,"Но":2049,"п ":2647,"в ":63211,"Ам":1040,"Ан":3455,"Ал":2956,"Ав":1300,"Ба":2769,"Ат":831,"Ар":1719,"б ":1391,"АЩ":2595,"Во":969,"д ":24331,"Ве":2399,"Ви":2290,"Въ":2366,"Га":1581,"Бо":2717,"г ":21217,"Бр":1925,"Бе":2785,"Би":1506,"Бъ":3741,"Ва":2968,"Бу":1134,"Ди":2316,"Дж":3197,"Де":2501,"Др":1035,"До":1935,"ж ":1730," Th":951,"Ед":825,"Ев":1551,"Ге":2579,"Гр":2290,"е ":164103,"Го":1786,"Гъ":1180,"Да":1394,"и ":202644,"За":3607,"Ел":966,"з ":12874,"Ша":824,"Юж":1162,"ю ":929," km":4266,"я ":66327,"Ст":4040,"Та":1626,"Съ":4599,"Ти":1166,"Те":2600,"ф ":1914,"То":5455,"Тр":1786,"Ту":994,"Тя":1180," e ":1798,"х ":2350,"Пр":4433,"Пъ":1179,"СА":2694,"Ра":3306,"Ре":3655,"Ри":2528,"т ":98842,"Ро":3022,"Ру":2132,"СС":822,"Са":2940,"Св":1625,"Си":2245,"Се":4020,"Сл":1252,"Ск":946,"Ср":881,"Сп":1322,"Со":2387,"у ":10195,"Це":1510,"ш ":1936,"Че":1438,"щ ":4203,"ц ":3623,"Фо":984,"Фр":2381,"Фе":819,"Фи":1127,"Фл":885,"Ха":2310,"Хр":971,"Хо":1081,"ч ":2829,"Хе":1295,"лю":6411,"мб":1834,"ля":9658,"ма":34017,"мв":3536,"ме":34130,"ми":25035,"лм":3442,"лн":16796,"ло":33848,"лс":4866,"лт":3196,"лу":5370,"лъ":2856,"къ":5642,"лв":914,"лб":2941,"ла":34165,"лж":1120,"ле":40750,"лд":1625,"лг":12737,"лк":4727,"ли":64840,"лз":2343,"км":1179,"кн":2476,"кл":6420,"кр":18141,"кс":6288,"ко":52495,"кт":11901,"ку":5862,"кц":1433,"ка":68697,"ки":58354,"кв":3364,"ке":7674,"йн":7255,"йо":1892,"йк":1995,"йл":1354,"йм":901,"йс":13136,"йт":4652,"ия":73064,"ищ":2902,"иш":2457,"йд":1453,"йв":1615,"ио":15275,"ип":3934,"им":23532,"ин":58356,"ик":36656,"ил":29439,"ии":4086,"ий":12560,"иц":13361,"ич":23215,"иф":2446,"их":3360,"ит":63592,"ир":17875,"ис":34276,"ри":74729,"рк":5043,"рл":2965,"рм":7553,"рн":12220,"ро":46663,"рп":941,"ра":98873,"рб":2434,"рв":7403,"рг":7600,"рд":5085,"ре":68474,"рж":3084,"рз":1336,"пъ":7097,"пр":53247,"пт":2156,"пс":2576,"пу":5784,"пи":12418,"по":49161,"пл":7108,"оя":7068,"па":18259,"пе":14376,"ощ":6422,"ош":1648,"оч":6409,"оц":2874,"ос":31400,"ор":52184,"оп":12711,"оо":918,"ох":1705,"оф":5618,"оу":1691,"от":71852,"ок":20081,"ол":46187,"ом":18413,"он":40545,"ож":8402,"оз":8385,"ои":10610,"ой":13778,"ов":49786,"ог":14314,"од":35825,"ое":7078,"оа":1786,"ня":5762,"об":22311,"нъ":1330,"нц":6022,"нч":1314,"нт":20098,"нс":29338,"нф":1197,"ну":2379,"но":61747,"нн":7922,"нр":827,"нк":4229,"нз":932,"ни":106631,"не":37699,"нг":9593,"нд":12141,"мя":2035,"на":206302,"мъ":3046,"му":8961,"мс":4731,"мп":7095,"мо":14552,"мн":4561,"ге":6356,"ги":12185,"гн":1340,"го":24434,"гл":8226,"гр":23857,"гу":2534,"гъ":1359,"дв":5622,"да":30493,"вг":1251,"вд":1142,"ве":44922,"ви":31125,"вк":2396,"вл":5218,"вн":10484,"во":35579,"вр":11007,"вс":5682,"ву":1966,"вт":3431,"вш":985,"въ":12004,"га":25651,"вя":3485,"би":11046,"бе":7947,"бр":9346,"бн":1672,"бо":12574,"бл":10831,"бу":4434,"бс":2154,"бя":950,"ва":58204,"бъ":10139,"бщ":5858,"ад":29187,"ае":3072,"аж":2734,"аз":20177,"аб":6725,"ав":32263,"аг":6621,"ам":19866,"ан":92124,"ап":10532,"аи":1848,"ай":16768,"ак":18833,"ал":43351,"ах":2622,"аф":3137,"ач":7356,"ац":9711,"ас":30917,"ар":52528,"ау":3593,"ат":95702,"ая":1940,"ба":6691,"ащ":6812,"аш":2543,"зт":3682,"зс":1637,"зр":2676,"зп":8092,"зх":1144,"зу":1891,"зк":2737,"зи":16355,"зо":4852,"зн":6991,"зм":2937,"зл":3437,"ив":14438,"иг":8987,"иа":9057,"иб":2301,"иж":2121,"из":36345,"ид":10074,"ие":26794,"зъ":1412,"жо":1971,"жу":937,"жи":10091,"жк":1307,"жн":4179,"за":35424,"зб":1209,"зв":12292,"зг":1227,"зд":6172,"зе":4307,"еф":2109,"ет":45213,"ес":32066,"ер":49860,"еп":8513,"ео":5548,"ен":105327,"ем":23126,"ел":47225,"ек":18670,"ей":11615,"еи":962,"ез":20414,"еж":9844,"ее":2077,"жд":7425,"же":11581,"жа":5863,"ея":1281,"ещ":3642,"еч":4774,"еш":4262,"ех":2366,"ец":5302,"дс":7551,"др":9809,"ду":10303,"дн":14843,"дм":2251,"дп":805,"до":23013,"ди":35808,"дл":987,"дк":815,"де":31362,"дж":3808,"еб":3744,"ев":22477,"ег":6758,"ед":43311,"еа":2059,"дя":1235," th":1043,"дъ":7215," ар":2430," ас":835," ба":2316," ав":2928," ад":1436," ал":2238," ак":2281," ан":5186," ам":5117," ап":1113," бу":1045," ва":1142," бъ":8445," би":3599," бе":1826," бр":3218," бо":2578," бл":1708," вт":1082," въ":7192," ви":5408," ве":5179," во":6415," вс":1918," вр":3197," вл":1868," вк":1271," дв":3064," да":7239," го":11993," гл":1931," гр":14818," ге":2801," ев":904," ед":7019," дъ":3603," дн":1385," до":9466," др":4782," ду":4688," де":7755," ди":3107," же":950," еп":830," ел":1747," ек":959," ез":2706," зе":1022," за":22793," зв":852," жи":3991," зн":1109," иг":2469," ид":823," из":17689," ил":7883," ин":3652," им":7956," ит":1193," ис":3053," ка":12665," ки":1971," кр":5742," ко":21829," кн":1333," км":1031," кл":2272," ку":1841," ла":1472," къ":3355," ли":3157," ле":2135," ме":7169," ми":3625," ма":9333," мо":4895," мн":1906," му":6862," ни":1121," не":9125," на":119378," но":5035," ок":9717," оз":981," од":1340," об":13021," ня":1648," ощ":1554," оф":1095," от":45416," ор":4392," ос":5533," оп":3022," по":33250," пл":5058," пи":3055," пе":4542," па":4088," Ре":3648," Ра":3274," Ро":3019," Ри":2525," Пр":4415," СА":2616," Пъ":1174," Пе":2739," Па":3238," с ":12955," По":4518," Пл":3222," Пи":1267," От":1080," Ос":2127," Ор":1245," Оп":1048," Те":2595," Ти":1162," То":5392," Тр":1782," Ст":4014," Съ":4598," Та":1620," Св":1624," Си":2198," Се":4013," Сл":1251," Ск":944," Сп":1315," Ср":879," Со":2383," Ру":2131," Са":2935," Фр":2376," Фо":984," Фи":1126," Фл":885," Фе":819," Тя":1180," Ту":993," Це":1509," Хр":970," Хо":1079," Хе":1290," Ха":2307," Ша":822," Че":1435," Юж":1162," Ба":2734," Ат":830," Ар":1714," в ":45152," Ан":3448," Ам":1038," Ал":2947," Ав":1297," Ва":2965," Бъ":3733," Бу":1133," Бо":2714," г ":8207," Бр":1924," Бе":2781," Би":1492," а ":4115," Ед":825," Ев":1550," Ди":2315," Дж":3188," Де":2497," Др":1035," До":1933," Ел":962," Въ":2363," Га":1572," Ве":2397," Ви":2282," Во":966," Гъ":1180," Да":1391," Ге":2574," е ":70638," Го":1784," Гр":2287," Ис":1003," Ин":1152," Им":970," Йо":1543," Ки":1607," Ке":833," Ка":6065," и ":47012," За":3604," Ив":1368," Из":2283," Мо":2455," На":5243," Не":1867," Ни":2110," Но":2047," Об":2206," Ок":2446," Кл":1282," Ко":5424," Кр":2158," Ку":1412," Ла":1629," Ле":1707," Ли":1793," Ло":1496," н ":881," Лу":1447," Ма":9223," Ме":2502," Ми":2950," В ":2161,"Зап":1336,"Ива":1277,"II ":1339,"Кар":1717,"Кал":1241,"Кон":1216,"Кол":892," ра":14338," ре":11569," ри":1504," ро":5884," пр":44389," пс":1741," пу":1035," пъ":4915," св":5747," си":8451," се":28117," сл":4935," см":1742," ск":1388," сп":4517," ср":3911," со":1964," ру":1874," са":8475," ти":1471," те":8827," то":4709," тр":4417," сц":1244," ст":9556," су":971," та":2605," съ":15734," ус":1789," уп":1428," ун":1177," ту":1481," тя":1501," тъ":1070," фо":2263," фр":2272," фу":2793," фе":1675," фи":5383," фа":1634," уч":2779," хр":1022," хо":1370," ху":840," хи":1498," ха":1277," це":3524," чо":1060," чл":866," чи":1314," че":4520," ча":4234,"Мак":3324,"Мар":2452," ша":881," ща":6448," юж":900," юг":1007,"Южн":1054,"Нам":821,"Нас":1183,"Ник":1456,"ад ":8166,"ав ":839,"ам ":1105,"ан ":12026,"ак ":1047,"ал ":5489,"ай ":7880,"Окр":2063,"авт":1723,"ага":1670,"авя":1374,"агр":1076,"аго":1396,"ада":3119,"ади":3451,"аде":5585,"адо":1154,"адм":1400,"адн":2853,"аем":863,"би ":1015,"ажд":973,"Опе":874,"аба":833,"або":2518,"ава":11168,"авн":2861,"авл":2542,"аво":1409,"аве":2231,"ави":4818,"алс":906,"алн":8729,"ало":2628,"алб":1650,"ала":2998,"алк":1807,"али":10040,"але":5565,"амо":2012,"амп":965,"ама":2784,"ами":3947,"аме":7196,"анн":912,"ано":4066,"анс":13953,"ант":4813,"анц":1702,"ана":8635,"анд":5902,"анг":4584,"ани":19618,"ане":11702,"анк":1502,"азр":1036,"азп":4124,"азо":828,"азн":1081,"азл":1632,"ази":2620,"азв":2060,"аза":1484,"айс":963,"айк":1056,"айо":1501,"айн":2031,"акт":4848,"ако":1531,"акс":811,"аке":4784,"аки":1127,"ака":2207,"Пар":949,"ас ":2329,"ар ":3643,"ат ":10948,"Пло":2474,"ба ":1349,"ая ":1362,"ащ ":1410,"Пет":1186,"Пър":1058,"САЩ":2595,"Пол":1310,"При":1038,"Пре":1768,"Про":1122,"Рим":1524,"АЩ ":2589,"Род":1271,"Раз":1500,"Реп":2237,"Але":1070,"Сан":921,"Ант":866,"Рус":1292,"Соф":1291,"Сев":1817,"Све":1167,"Бра":846,"Съе":2317,"Тя ":1123,"Ста":1292,"Сто":900,"Бел":1301,"Тов":901,"Той":2267,"Вел":1059,"Бъл":3319,"Гра":1071,"Вът":1318,"Фра":1531,"Гер":981,"Гео":1043,"Дим":1088,"Хри":815,"Гър":1058,"Джо":1453,"Цен":1121,"Евр":1146,"лбу":1567,"лам":805,"лан":5659,"лас":5904,"лат":3266,"лги":963,"ме ":3219,"лга":11038,"ма ":9888,"ля ":1353,"лав":4235,"лаг":1602,"лад":2109,"къс":900,"към":1928,"кус":995,"кул":1639,"кци":1405,"коя":2700,"кре":815,"кра":5743,"кри":2482,"кръ":8385,"кса":1220,"кси":1227,"кте":804,"кти":2426,"кто":3328,"ктр":1676,"кла":1490,"ло ":7485,"кло":1049,"клю":1993,"кни":998,"ког":1332,"ков":4810,"ком":4727,"кон":4363,"коп":1296,"кор":1875,"кос":1575,"кот":3860,"кое":2107,"кои":3390,"кой":3594,"кол":6405,"кин":1233,"кия":7501,"лм ":1747,"кит":4721,"ле ":963,"кед":4374,"ли ":17289,"ква":2572,"кат":20763,"кар":2222,"кан":8113,"как":1566,"кал":2533,"каз":1398,"ла ":6130,"йто":3630,"йск":8916,"йст":3543,"кт ":1337,"йна":2642,"йно":1113,"йни":1595,"кс ":1165,"йон":1165,"ко ":8475,"км ":856,"ки ":42277,"йво":1025,"ият":14767,"од ":4250,"нац":1700,"нау":1528,"нач":4474,"ог ":1478,"нан":1145,"нам":2383,"нал":6581,"мят":820,"нат":17022,"нас":8028,"нар":6159,"нап":1914,"над":1993,"наг":991,"най":5108,"наз":881,"нде":899,"нда":1986,"нгл":4272,"нга":993,"нем":1875,"нен":7192,"ои ":1291,"нер":3755,"нес":1561,"нет":4998,"нег":1689,"нев":1009,"нди":2598,"ндо":877,"ндс":1234,"ндр":1171,"нив":1213,"низ":3481,"ник":6697,"ниг":977,"ние":16641,"ок ":2212,"ой ":4351,"ня ":922,"ов ":8975,"нав":1281,"нт ":3764,"мпи":1340,"мпе":2702,"мпа":913,"мот":885,"мск":3992,"мун":847,"муз":1721,"мик":1065,"мил":1270,"мич":1772,"мин":5435,"мис":1421,"мир":3990,"мит":3385,"мия":2187,"но ":26769,"мна":831,"мно":2749,"мод":977,"мов":951,"мож":1224,"мон":1361,"мол":819,"мос":1285,"мор":2165,"нс ":993,"нд ":1174,"мац":839,"мал":2220,"мак":2180,"май":1485,"лят":839,"мат":5742,"мас":872,"мар":1751,"ляр":941,"нг ":1106,"ман":6016,"лян":866,"лям":2082,"люц":3280,"люч":1984,"ляв":2485,"маг":843,"мес":1879,"мет":5373,"мен":8396,"ни ":29732,"мер":7582,"мей":1790,"меж":2783,"мед":1422,"мвр":3060,"не ":9731,"на ":142528,"лощ":4100,"му ":4914,"лни":5607,"лно":5548,"лна":4870,"лог":4324,"лож":4131,"лор":940,"лос":1248,"лот":2604,"лом":832,"лон":1406,"лов":4045,"луч":1122,"лст":1405,"лск":2670,"лта":884,"лзв":2068,"лиа":1575,"лиг":889,"лив":1397,"лиз":3791,"лим":1095,"лий":4336,"лик":5518,"леж":946,"лев":1784,"лед":5662,"лер":878,"ми ":2984,"лен":16713,"лем":2731,"лек":3978,"лет":2054,"мо ":1651,"лищ":1562,"лиц":2347,"лич":3301,"лис":3220,"лит":6583,"лиф":806,"лин":3617,"лия":3676,"лко":1751,"лка":1568,"оят":3638,"пат":1787,"ояв":887,"пад":4016,"рг ":1045,"оян":1222,"пан":2587,"пар":3257,"рд ":1208,"ре ":1433,"ра ":12349,"пит":1282,"пис":5195,"пла":2637,"пле":1081,"ро ":1581,"пло":2643,"ри ":13979,"пер":6644,"пет":985,"пей":864,"пен":871,"пец":914,"рк ":1201,"ори":9957,"орд":1685,"оре":5253,"орг":4291,"орс":1471,"оро":2299,"орм":3070,"орн":2306,"опу":959,"ора":5790,"опе":1825,"опи":2343,"опо":1901,"опр":1742,"опа":1705,"оте":1711,"отк":1219,"отл":842,"оти":2542,"ото":15284,"отн":1748,"отр":1056,"отв":1153,"отб":1366,"ота":1480,"осе":1009,"оси":1734,"осл":2267,"осм":1054,"осн":3443,"осо":2118,"ост":14287,"орт":2090,"оръ":1155,"осв":865,"оми":2906,"оме":3032,"оля":2830,"ома":3616,"олю":3412,"олу":2069,"олс":1164,"олн":1271,"по ":9531,"оло":11644,"олк":1549,"оле":5004,"оли":8455,"олз":2123,"ола":2641,"окр":6747,"окт":1051,"око":5161,"оня":990,"онс":3193,"онт":1752,"они":6360,"оно":4114,"онн":3311,"она":6297,"онд":803,"оне":3282,"омо":1980,"омп":2499,"оше":916,"очи":922,"очн":3093,"още":1644,"ощт":2295,"офе":923,"офи":2989,"оце":1219,"оци":1428,"няк":1962,"няв":1202,"ова":9654,"общ":5055,"обр":3456,"обо":1542,"обн":833,"обл":3221,"оби":2313,"обе":2463,"па ":3482,"оит":3156,"ойв":1029,"ойт":3479,"ойс":1115,"ойн":2834,"ока":2144,"ожн":1229,"ози":2118,"оза":2042,"озн":2520,"оиз":4705,"одн":3018,"оди":10451,"оду":932,"одр":1677,"одс":1049,"одо":2573,"оем":891,"оен":1806,"оет":2833,"оже":5313,"ове":9903,"ови":7840,"ово":5297,"овн":3952,"овс":1386,"ога":2254,"оги":3492,"ого":3019,"огр":2846,"ода":3875,"оде":5129,"от ":40884,"нот":4382,"нос":7389,"нош":880,"нор":1012,"ос ":1874,"ном":2595,"ное":844,"ног":2289,"нов":10487,"ор ":8928,"нно":1572,"нни":2936,"нна":3374,"нко":832,"он ":7439,"нка":817,"ом ":1452,"ния":18624,"нир":1151,"нис":3663,"нит":11345,"ним":1508,"нин":2416,"ол ":1852,"нич":2172,"ниц":3898,"нце":866,"нци":4501,"ощ ":1985,"нтъ":2337,"нуа":804,"нта":3339,"нте":1868,"нти":4317,"нто":1031,"нтр":2358,"нск":22095,"нст":3816,"сам":1827,"сан":2444,"сат":1871,"сва":1135,"сво":1889,"те ":30876,"све":3239,"свъ":838,"сев":2490,"сед":1178,"сел":9974,"сек":1165,"сеп":870,"ти ":11022,"сен":2491,"сем":2208,"сет":920,"сер":1205,"сия":1303,"сич":1328,"сис":1833,"сит":2033,"сих":1495,"син":1858,"сил":2220,"сим":1511,"ски":43899,"ска":20500,"сле":5451,"сла":2258,"ско":7743,"сми":843,"слу":1333,"то ":53757,"сло":1460,"сна":1239,"сни":1224,"соб":1154,"сов":1181,"сок":1395,"сно":4759,"спе":2031,"спа":1513,"спи":822,"соф":1092,"соц":1039,"сре":3295,"спо":2745,"роц":854,"рот":1758,"роф":1212,"роп":2362,"рос":2878,"ст ":15103,"роя":1120,"рта":1478,"рти":2728,"рск":11358,"рси":1583,"руг":2898,"руп":3391,"рус":2190,"рум":975,"рци":1577,"рхи":885,"ръц":1917,"ръс":815,"ръг":7752,"ръж":849,"ръб":829,"та ":68838,"рад":11688,"рае":904,"раж":1539,"раз":12192,"раб":3055,"рав":5786,"рам":2735,"ран":13900,"рай":2815,"рак":2465,"рал":6906,"раф":2143,"рац":1472,"рас":2637,"рат":10837,"рая":1251,"ращ":1083,"рби":1070,"рва":2274,"рди":900,"рдж":839,"реб":1455,"рев":6384,"рег":1970,"ред":14324,"рет":1976,"рес":2290,"реп":1732,"си ":5183,"рен":6538,"рем":3987,"рел":1209,"рек":2832,"рей":892,"рез":11468,"реж":2438,"ржа":2590,"рещ":1470,"реч":1092,"реш":2153,"се ":16807,"рво":979,"рве":1684,"рви":2226,"рга":3761,"рги":1489,"рда":805,"рия":9828,"рио":1833,"рим":2306,"рин":4333,"рик":7682,"рил":2477,"рий":2243,"рич":4087,"рит":6765,"рир":851,"рис":4939,"рка":815,"риа":2344,"риг":1450,"рив":950,"рие":1786,"рид":1515,"риз":1180,"рни":3773,"рна":4116,"рок":1893,"рол":1507,"ром":2389,"рон":2340,"роз":1160,"рои":4466,"рой":1470,"ров":7381,"рог":1151,"род":7855,"рое":938,"рно":2617,"рла":1189,"рми":2339,"рма":3665,"пра":5058,"при":9598,"пре":21619,"про":14907,"поп":1206,"пор":4301,"пос":3943,"пот":1094,"поч":1394,"рт ":2255,"пое":904,"под":4233,"пов":2082,"пон":1889,"пом":919,"пол":13153,"пок":1115,"поз":1787,"пуб":3689,"пул":1018,"пте":935,"пси":1481,"са ":8477,"пър":3460,"път":1769,"пъл":1566,"вар":1920,"ват":9299,"ващ":3095,"ван":13950,"вал":2194,"га ":4136,"бщи":2449,"бъд":803,"бър":1419,"бъл":7641,"бща":1581,"бще":1011,"бум":1554,"бук":823,"бск":1045,"вто":2970,"вси":896,"вск":2447,"вст":1194,"вре":3504,"ври":3322,"вро":1874,"връ":1065,"вол":4504,"вой":3658,"вое":1398,"вод":4759,"вот":3890,"вор":2694,"вни":3836,"вна":3375,"вно":2879,"вля":1602,"вле":1219,"вли":1041,"вла":1152,"го ":2882,"вкл":1196,"виц":847,"вич":1491,"вия":2762,"виз":1513,"виж":1141,"вил":1146,"вин":3278,"вис":2864,"вит":3903,"вид":3578,"вие":1422,"веч":1347,"вес":4369,"вет":7499,"вер":5871,"вен":9985,"ги ":2787,"вел":1221,"век":3636,"веж":1619,"вед":2219,"вгу":856,"ва ":24595,"бан":1118,"ачи":1132,"аши":924,"аща":2195,"ащи":2202,"ащо":823,"афс":811,"афи":915,"ача":3148,"аче":1418,"аци":9279,"апр":1718,"апа":4649,"апо":1282,"апи":1535,"арх":1193,"арс":9548,"арт":4677,"аса":1041,"аре":2062,"ард":1683,"ара":4896,"арн":1173,"арм":1084,"аро":3343,"ари":12961,"арл":1170,"арк":1896,"аст":14401,"ася":976,"ата":47391,"аси":2434,"асе":6138,"асо":1029,"ату":1909,"ате":8423,"ати":9387,"ато":12657,"атр":1084,"бол":3321,"бор":2442,"бот":2082,"бро":1543,"бри":1900,"бре":1262,"бра":3810,"бла":3919,"бли":5668,"во ":9926,"ви ":4285,"бен":998,"бер":1182,"без":1026,"бед":1013,"бел":1391,"бек":854,"бит":1770,"бил":1742,"бик":912,"бив":1415,"ве ":3400,"дан":2923,"дар":1829,"дат":3453,"дви":1371,"два":2719,"две":833,"ед ":5333,"дал":1095,"дад":3266,"дав":2214,"ев ":3676,"дее":1168,"дек":992,"дей":1639,"дем":1659,"дел":3837,"ден":12663,"дер":1762,"джи":1277,"ей ":1537,"дес":1272,"дет":1508,"ез ":10672,"дст":4125,"дск":2604,"дро":826,"дру":2364,"дре":1806,"дри":2347,"дра":1827,"ет ":4265,"душ":3708,"ец ":2918,"ен ":33180,"дия":1914,"диц":1640,"ем ":1311,"див":1005,"дим":1128,"дин":15737,"дио":936,"ел ":8445,"дис":974,"дит":1804,"дие":842,"ек ":3487,"доб":1818,"дов":2389,"ес ":1992,"дос":1220,"дор":1353,"док":903,"дон":5500,"дна":5206,"дни":3855,"дне":986,"ер ":6034,"дно":4679,"дми":1636,"да ":13229,"вяв":1024,"гал":815,"вят":853,"гат":2134,"ган":4672,"гар":12015,"де ":1425,"and":809,"an ":963,"във":2376,"въз":2174,"вър":4758,"гол":3089,"гор":2288,"гов":3764,"год":7938,"гру":3152,"ду ":2227,"гръ":1949,"гра":16257,"гри":1115,"гус":909,"ген":2209,"гер":1710,"ди ":4057,"гио":1272,"гия":3508,"гич":1145,"ati":807,"гле":897,"гла":2323,"до ":6155,"гли":4051,"жан":924,"жав":2301,"за ":15694,"еще":814,"жит":2528,"жив":1957,"жис":1541,"жес":1221,"жду":3122,"зи ":3101,"жен":7044,"жда":3366,"жно":1097,"жни":1433,"жна":1550,"ея ":816,"жа ":1065,"дъл":1733,"дър":3663,"еец":1133,"ежи":2367,"ежд":5041,"едс":4153,"жи ":1063,"еза":1907,"езо":914,"ези":4043,"ева":1249,"еви":2758,"еве":5028,"его":2858,"едв":1395,"еда":2616,"еде":4183,"еди":11058,"едо":5545,"едн":6376,"евн":1170,"ево":4583,"же ":1283,"евр":1969,"ега":870,"еги":1678,"ент":10211,"енс":4280,"енц":1053,"ени":27128,"ено":7268,"енн":2947,"ена":10075,"ене":4173,"енд":1760,"еор":1928,"епт":1100,"епу":3069,"епо":1006,"ерс":2306,"ерт":1057,"ерм":2802,"ерн":4552,"еро":3866,"ери":14051,"ерг":986,"ере":1937,"ера":7049,"ерв":1379,"ейн":1799,"ейс":4529,"еке":826,"еки":1118,"еко":1156,"ект":4178,"екс":3530,"ека":2557,"елн":4078,"ели":9768,"елс":1847,"ело":4047,"еле":11585,"елг":908,"ела":1732,"емо":966,"еми":3799,"емс":1747,"еме":6592,"еля":2163,"ема":3994,"емв":2337,"ехн":1011,"еци":1425,"ече":2969,"ешн":2173,"еща":1146,"еса":875,"есе":2292,"еси":1554,"еск":7153,"есн":1000,"ест":15331,"ета":4044,"ети":4225,"ете":3470,"етр":1876,"ето":20630,"етн":1476,"етс":1612,"етъ":1538,"иве":2646,"иви":1509,"ива":2727,"иал":3813,"иан":3297,"иен":1203,"иет":8077,"иже":812,"иев":1368,"игр":2806,"иго":824,"ида":1273,"иди":1105,"иде":2406,"иво":2091,"ивн":2071,"ивш":976,"ига":2136,"иги":1169,"икн":877,"ико":3994,"йн ":1098,"ики":1147,"ика":16650,"ийс":6452,"иит":965,"изъ":865,"изх":1032,"изс":1113,"изт":2743,"изп":3253,"изм":1954,"изл":1205,"изо":1731,"изн":1188,"изи":3858,"изк":1474,"изд":1669,"иза":5067,"изв":5752,"ион":10338,"инц":2399,"иня":874,"иод":1232,"ине":4216,"ини":7734,"ино":2511,"инс":5959,"инт":1527,"ина":17378,"инд":1369,"инг":2575,"ими":3098,"име":5126,"имс":2034,"имо":1892,"имп":3052,"имн":924,"има":3880,"или":12587,"иле":1355,"илм":2481,"илн":880,"ило":3073,"ила":2122,"иси":1489,"иса":3563,"ист":15576,"исп":938,"исо":1673,"исл":1753,"иск":1625,"ити":5468,"ите":38859,"ита":6132,"исъ":1087,"ись":1263,"иту":932,"ито":6371,"итн":995,"ира":10008,"ири":1911,"иро":1628,"ихо":1462,"ице":1072,"ица":5886,"ици":6007,"ифо":843,"ище":1958,"ичи":1220,"ичк":1339,"ичн":6798,"ича":3197,"иче":8842,"ка ":28086,"ив ":1131,"зав":1648,"зае":830,"ид ":2167,"зви":1188,"зве":5171,"зва":3840,"зац":2538,"зат":1310,"зар":1064,"зап":3505,"зан":2665,"защ":954,"зда":4154,"зво":872,"ие ":14021,"ий ":4437,"ии ":2933,"зем":1420,"зик":4455,"зир":1742,"ил ":4244,"ик ":10314,"ин ":8393,"им ":2180,"зия":1529,"зит":1037,"ип ":824,"зма":848,"ио ":855,"зли":2352,"зна":4787,"зни":1000,"зно":822,"ир ":1662,"зов":1006,"ис ":2255,"зон":1141,"зпо":5089,"ит ":1689,"зпр":1401,"зра":1726,"зпъ":815,"зсл":944,"зто":3205,"ич ":1609,"зхо":1090,"ия ":57202,"ius":2045,"is ":934,"ion":1052,"ьор":2769,"km ":885,"южн":872,"km²":3379,"ъцк":1859,"ътр":1736,"ъще":1730,"ъщо":1266,"he ":1146,"ъде":1407,"ъед":2623,"ъве":1335,"ъзд":2638,"ълн":1684,"ълг":11342,"ълж":1010,"ърн":1183,"ърк":1004,"ърж":2771,"ърз":1179,"ърх":1303,"ърт":1540,"ърс":856,"ърш":1020,"ърц":1189,"ъст":3892,"ъпр":1168,"ърв":4886,"ърд":852,"ia ":930,"er ":1669,"es ":1352,"яко":2322,"яло":937,"ява":6515,"юци":3278,"ючв":950,"ят ":9602,"яне":821,"яни":1186,"яма":1227,"ята":8869,"ято":2957,"уци":861,"уча":2759,"учи":956,"уче":1377,"уши":3537,"фес":938,"фер":834,"фан":1085,"фин":933,"физ":1170,"фил":3622,"фия":1796,"фиц":1324,"фут":2220,"фре":1952,"фор":3324,"фон":806,"ца ":4811,"ци ":3879,"хан":805,"хар":1263,"хол":1095,"хор":974,"хов":933,"ход":1906,"хим":863,"стн":3691,"сто":10615,"стр":11666,"ств":16841,"сте":8055,"сти":11129,"ста":11874,"стъ":2246,"сту":903,"стт":2922,"сце":1326,"съд":995,"съв":1667,"съз":2361,"със":4442,"сък":852,"сън":863,"същ":2787,"сьо":1331,"тав":5484,"так":1485,"тал":3945,"тан":6731,"тай":879,"тат":3225,"тар":3238,"тбо":3512,"твъ":860,"тво":9227,"тви":1621,"тве":3786,"тва":3607,"тех":952,"тем":3541,"тел":16601,"тео":1008,"тен":6993,"тер":6816,"тет":1731,"тес":853,"тез":1494,"тек":1131,"тив":4139,"тие":1898,"тка":1702,"ум ":1519,"тич":5203,"тиц":909,"тия":3555,"тин":3775,"тик":4630,"тил":1307,"тир":2210,"тис":902,"тит":2981,"тла":1006,"тно":3117,"ток":1533,"тол":2748,"той":1268,"тов":5201,"тни":5121,"тна":2467,"тре":4236,"тра":10350,"три":4952,"тор":11467,"тот":821,"том":2089,"тон":2179,"ус ":1180,"топ":1014,"точ":3383,"тоя":1817,"тта":3294,"тро":5709,"тру":1929,"тск":2626,"тур":4751,"тър":5898,"тът":942,"тън":1062,"ува":2742,"уги":1696,"уга":1360,"уар":1716,"убл":3878,"узи":1937,"уди":922,"удо":1145,"уме":1194,"ума":1453,"улт":1584,"ули":1004,"ула":1122,"укт":810,"ука":1012,"упр":1211,"ура":2417,"ург":1080,"ури":1177,"упа":3307,"уна":1368,"уни":1783,"уст":2872,"утб":2290,"урс":982,"урн":1905,"уск":2598,"уси":1038,"що ":2387,"шни":1037,"шна":1940,"шин":804,"ще ":3484,"шен":1940,"щи ":1591,"ъв ":2817,"ът ":6352,"ъс ":1987,"ър ":7548,"ън ":3472,"ъм ":3109,"щит":1320,"щин":2379,"ъл ":2252,"ък ":1412,"щен":865,"щес":2508,"ъг ":7573,"щат":7215,"m² ":3381,"on ":1530,"щта":2294,"че ":2916,"цен":4575,"чи ":992,"цел":1258,"цес":947,"цер":828,"циа":2597,"ции":1846,"цио":6596,"цит":1550,"ция":11555,"ча ":1350,"цар":1123,"цат":1708,"цки":1353,"чев":1376,"чен":5933,"чес":8800,"чер":977,"ши ":3970,"чет":1965,"чле":872,"чки":1198,"чин":1845,"чит":1626,"ша ":1093,"чва":2149,"час":5618,"чал":1786,"чан":1529,"чав":2550,"ща ":4581,"чре":802,"чна":4150,"чов":1436,"чни":4370,"чно":3174,"us ":4874,"ter":901},"n_words":[7994134,9177756,6462334],"name":"bg"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/bn b/nlp_resource_data/langdetect/profiles/bn

new file mode 100755 (executable)

index 0000000..6908336
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/bn
@@ -0,0 +1 @@
+{"freq":{"D":455,"E":376,"F":388,"G":399,"A":894,"B":684,"C":848,"L":430,"M":683,"N":429,"O":309,"H":360,"I":507,"J":241,"K":244,"T":598,"W":240,"V":199,"P":651,"S":947,"R":531,"f":1089,"g":1979,"d":2581,"e":8201,"b":1267,"c":2589,"a":8669,"n":6047,"o":5561,"l":3930,"m":2503,"j":208,"k":699,"h":3040,"i":6685,"w":672,"v":711,"u":2926,"t":5635,"s":4238,"r":5985,"p":1752,"z":245,"y":1310,"x":420,"এলা":514,"ক। ":614,"ঐতি":234," l":233," m":235," o":526," i":296," d":312," f":245," a":616," b":228," c":315," t":765," p":493," s":393," r":292," J":235," K":227," H":321," I":408," N":324," O":231," L":375," M":610," B":602," C":719," A":760," F":332," G":372," D":404," E":296," S":771," R":448," P":577," W":226," T":506,"ا":336,"Co":219,"উ":9536,"এ":27004,"ঃ":514,"ং":15654,"ঁ":2141,"ই":18078,"আ":10399,"অ":13627,"চ":16928,"ছ":9717,"ঘ":1893,"ঙ":3088,"ঞ":3794,"ট":30397,"জ":27267,"ঝ":790,"ও":9899,"ঐ":344,"খ":8927,"গ":20205,"ঔ":204,"ক":78264,"ন":86702,"ফ":6705,"প":46274,"ভ":18017,"ব":70605,"য":69717,"ম":48748,"ড":11071,"ঠ":3258,"ণ":10122,"ঢ":578,"থ":13519,"ত":69776,"ধ":12543,"দ":33053,"স":56165,"হ":25168,"়":39420,"া":175719,"ি":114763,"র":156970,"ল":52543,"Ma":241,"শ":24901,"ষ":17272,"ৎ":1686,"্":145506,"ৌ":1675,"ো":19195,"ৈ":1879,"ে":113569,"ৃ":4705,"ূ":5615,"ু":27604,"ী":22483,"৭":2295,"৬":2181,"৯":5541,"৮":2706,"৩":2127,"২":3895,"৫":2215,"৪":1978,"১":8858,"০":4887,"৷":254,"In":199,"।":25409,"Th":253,"b ":285,"a ":1054,"i ":350,"ge":332,"ga":252,"he":773,"ha":472,"gh":243,"g ":316,"ea":343,"ec":205,"ed":350,"de":506,"di":332,"এছা":284,"h ":358,"el":402,"en":818,"em":278,"et":317,"es":737,"er":1291,"ca":315,"e ":2258,"be":225,"da":210,"f ":445,"ct":266,"co":263,"ci":201,"ch":362,"ce":347,"c ":220,"এটি":2063,"d ":875,"at":923,"as":401,"ar":1026,"al":912,"ai":231,"am":360,"an":1536,"ac":322,"ad":251,"ag":317," ১":7243,"nt":573,"ns":267,"no":211," ৯":250," ৮":298," ৭":311," ৬":364," ৫":417," ৪":387," ৩":666," ২":2429,"of":407,"om":400,"on":1111,"ol":393,"ot":205,"os":230,"ou":252,"or":795,"r ":1035,"pe":220,"lo":359," ঢ":514," ড":1933,"ll":307," ঠ":230," ধ":2352," দ":9809," থ":3157," ত":9984," ফ":3863," প":28296," ন":11037," য":6674," ম":15133," ভ":9746," ব":28530," ল":4344," র":8416,"o ":382," শ":7392,"ma":464,"mb":342," স":27697,"me":396," হ":13181,"mi":232,"একট":5136,"একজ":1493,"na":524,"একক":204,"nc":255,"nd":698,"ne":429,"ng":599,"ni":414,"একা":295," ।":1901,"m ":412," এ":26382," উ":6430,"এখা":461," অ":13571," আ":10126," ই":6407,"li":503," জ":9765," ট":1754,"le":571," ঘ":968," ছ":3198," চ":4584," ক":22576,"la":589," ঔ":203," গ":7129," খ":2591," ঐ":343," ও":5820,"n ":1470,"ht":261,"hu":305,"hi":274,"ho":238,"id":213,"ic":689,"ia":484,"ig":326,"ie":307,"ir":212,"is":631,"it":565,"il":352,"in":1040,"io":639,"l ":743,"এবং":4187,"এপ্":233,"y ":803,"ve":326,"x ":253,"ul":217,"ur":460,"us":340,"ut":207,"um":443,"un":241,"to":352,"tr":334,"te":708,"ti":997,"th":909,"ta":488,"ss":271,"st":614,"se":325,"এমন":239,"si":361,"rt":257,"ry":236,"ro":438,"ri":872,"re":667,"rd":212,"ra":839,"t ":920,"s ":1577,"এদে":277,"এই ":3233," �":274,"এক ":911,"এর ":2530,"উনি":291,"উন্":360,"উদ্":622,"উপা":289,"উপর":460,"উপন":490,"উরো":279,"উল্":354,"উৎস":239,"উৎপ":278,"উৎ":563,"উর":543,"উপ":2213,"উস":347,"উল":530,"উক":210,"উজ":216,"উচ":390,"উট":503,"উত":973,"উন":827,"উদ":753,"ইন":1626,"ইব":205,"ইম":223,"ইয":591,"ইর":537,"ইল":683,"ইস":741,"উই":375,"এছ":287,"এখ":589,"এক":9005,"এম":465,"এব":4214,"এপ":275,"এন":320,"এদ":306,"এট":2154,"এই":3366,"ছর ":262,"ংশ":1413,"ংস":1069,"ংল":2600,"ংর":2237,"ংব":422,"ংঘ":236,"ংগ":523,"ংখ":753,"ংক":757,"ঁর":727,"ৎ ":620,"ইত":723,"ইট":668,"ইড":241,"ইজ":233,"আস":454,"আল":1088,"ইক":452,"আয":323,"অ্":840,"আম":516,"আর":1419,"আফ":296,"আব":708,"আধ":289,"আদ":422,"আন":1004,"ইউ":806,"ইং":2317,"আছ":551,"অস":568,"আগ":507,"আক":520,"অল":207,"অর":1154,"অভ":838,"অব":2125,"অফ":360,"অপ":433,"অন":3326,"অধ":754,"আই":743,"অথ":198,"অঞ":597,"অত":316,"অক":381,"অঙ":287,"অং":592,"ঙা":410,"চট":225,"চত":213,"চন":791,"ঙ্":2470,"৩ ":950,"চর":577,"চল":2320,"চী":679,"চু":374,"চা":2514,"চি":4076,"চে":985,"চ্":2421,"ছব":324,"৪ ":965,"ছর":393,"জগ":223,"ঘট":513,"গা":2158,"গস":249,"গি":590,"গী":642,"গু":2497,"গো":907,"গ্":3546,"গে":1783,"১ ":1256,"চক":208,"২ ":1015,"ঝা":497,"গোল":259,"৭ ":899,"টক":320,"টপ":202,"টন":552,"ঞ্":2042,"টব":433,"ঞা":1608,"টা":3413,"৮ ":899,"টর":259,"জন":5856,"ছে":4099,"জধ":330,"ছি":2738,"ছু":520,"গ্র":2862,"ছা":810,"গ্য":399,"জর":278,"৫ ":1051,"জল":300,"ছো":231,"জম":242,"জয":281,"জো":267,"জে":2691,"জু":830,"জী":975,"জি":3650,"জা":4328,"৬ ":1035,"জ্":3683,"গের":766,"ওয":1866,"ক।":744,"ঐত":235,"এশ":223,"এল":685,"এর":2874,"এস":513,"ক্":11379,"খন":635,"কে":8760,"কো":2299,"কী":339,"কি":3416,"কা":12553,"কৃ":1298,"কু":1325,"কস":263,"গর":1257,"০ ":1923,"গল":479,"খে":1085,"গন":254,"গব":429,"খ্":2313,"গঠ":636,"খি":334,"খা":2637,"খু":253,"গড":267,"গত":723,"গণ":853,"কল":1752,"খক":249,"কভ":207,"কম":1122,"কয":216,"কর":9263,"কব":584,"কথ":578,"কদ":234,"কন":446,"কত":422,"কট":5691,"কজ":1571,"কক":371,"পক":777,"নো":1529,"নৈ":281,"ন্":18037,"পথ":251,"পত":1140,"পন":1278,"পদ":1221,"পড":271,"পট":230,"পঞ":305,"নর":378,"৷ ":217,"নন":368,"ধে":496,"নপ":524,"নব":764,"নভ":385,"ধ্":3005,"নম":449,"নয":630,"নু":2067,"নী":3272,"নে":8228,"নস":741,"নি":10383,"না":9095,"বচ":430,"বছ":386,"বঙ":528,"বক":640,"ফল":533,"ফর":691,"পৌ":262,"প্":14673,"পো":493,"বপ":409,"বন":1731,"ফে":647,"বদ":516,"বত":795,"ম।":432,"ফু":801,"বড":289,"ফি":694,"ফা":803,"পশ":1040,"পস":313,"পল":236,"পম":204,"পর":6275,"পে":1955,"পৃ":469,"পূ":1624,"বই":313,"পি":1741,"পা":6024,"পু":2560,"পী":458,"বং":4443,"ভর":238,"ব্":5813,"মক":698,"মগ":375,"ভি":3000,"ভা":10166,"মত":941,"র।":1267,"ভু":429,"ভূ":887,"মণ":247,"ভে":813,"মন":1866,"মদ":356,"মধ":1657,"মব":565,"ভো":225,"মপ":208,"বব":837,"ফো":381,"বয":356,"ফ্":917,"বর":3916,"বল":2788,"বশ":202,"বহ":1569,"বস":2742,"বা":13175,"বি":13020,"বী":1634,"বু":748,"য।":373,"বৃ":690,"বে":6807,"বৈ":512,"বো":1000,"ভব":322,"রগ":376,"রক":3370,"রও":363,"রট":549,"যা":13478,"য়":35819,"রজ":517,"রচ":1226,"যস":235,"রঙ":226,"রদ":1297,"রধ":882,"রত":4811,"রথ":1065,"ল।":866,"যু":2179,"রণ":2957,"যি":521,"রম":1030,"রয":914,"যো":1322,"রব":1734,"যৌ":231,"রভ":428,"রপ":736,"রন":1614,"যে":3967,"মল":265,"যক":743,"ময":969,"ভ্":389,"মর":648,"মহ":1061,"মস":593,"মূ":1527,"মৃ":459,"যত":845,"যদ":240,"ঙাল":366,"মা":10507,"মি":3934,"মী":801,"মু":1962,"মো":1026,"যব":1634,"মৌ":259,"যম":725,"ম্":4843,"যন":902,"মে":4592,"ড়":3388,"ডা":1047,"ডি":1739,"ঠি":877,"ঠা":1161,"৯ ":835,"টে":2184,"ঠন":362,"ট্":2862,"টো":693,"টু":322,"টি":14501,"ণী":616,"ণি":777,"দ।":351,"ণা":1071,"তক":615,"ণত":463,"ঢা":467,"ড্":286,"ডো":228,"ডে":1416,"ত।":1976,"থা":4072,"থি":1849,"ন।":3725,"থন":275,"তে":6636,"তৈ":476,"থব":514,"তো":511,"ত্":11770,"থম":1047,"দক":1036,"তা":9664,"তি":11706,"তৃ":563,"তী":2327,"তু":1609,"তথ":334,"তত":465,"তন":1191,"ণে":1383,"ণ্":713,"তম":1948,"তব":730,"তল":215,"তর":2978,"নট":466,"ধা":3367,"নত":805,"নদ":832,"ধি":1235,"ধী":706,"ধু":525,"দো":593,"দ্":6813,"ধন":270,"দে":7678,"দৈ":238,"নক":807,"নগ":632,"ধর":1424,"নও":200,"দস":274,"দশ":433,"দূ":217,"ধত":234,"দৃ":215,"দী":1553,"দু":1592,"দা":3394,"দি":3189,"থে":3273,"দন":421,"ছে ":1536,"জন ":2036,"দল":773,"দর":900,"থ্":402,"হন":525,"সে":5003,"সী":651,"সু":1284,"সূ":479,"হণ":341,"হত":965,"সৃ":307,"সা":9422,"সি":3847,"সহ":471,"হচ":543,"হল":1575,"স্":12917,"হম":242,"হর":1722,"হয":5736,"সো":556,"হে":854,"হৃ":361,"়।":3879,"হু":683,"হি":3582,"হী":535,"হা":4314,"হ্":705,"হো":256,"়ক":477,"়ত":462,"়ন":1084,"া।":1709,"়ম":211,"়র":208,"ি।":908,"়ু":308,"়ী":503,"াং":3360,"ছু ":408,"াঁ":1482,"়ি":1178,"়া":8455,"়ো":436,"াই":3512,"়ে":7915,"াউ":581,"াও":962,"াক":5532,"়্":205,"াচ":1101,"াছ":570,"াজ":4489,"াখ":950,"াগ":2341,"াঙ":862,"াথ":1203,"াত":6156,"াণ":1382,"ী।":871,"াড":1549,"িং":1140,"াঠ":366,"াট":1770,"াঞ":296,"াভ":908,"াব":4643,"াফ":353,"াপ":2739,"ান":18665,"িউ":1255,"াধ":2334,"াদ":5045,"িক":11087,"িখ":922,"াল":10227,"িও":631,"ার":30756,"িএ":231,"াম":7273,"ায":8979,"িছ":516,"িজ":2552,"াস":5883,"িচ":1754,"াহ":2942,"িগ":546,"াশ":1723,"াষ":6041,"িট":1865,"িড":639,"িত":12239,"িণ":858,"িদ":2453,"িন":8978,"িধ":463,"িপ":1269,"িব":2698,"িফ":329,"িম":2790,"িভ":1825,"াৎ":271,"িয":6762,"ির":6023,"িল":5504,"ীক":817,"িশ":3794,"ীগ":209,"িষ":2506,"িস":4278,"িহ":941,"ুই":664,"ীদ":382,"ীত":1782,"ীপ":505,"ীন":1798,"ীয":4089,"িৎ":317,"ীম":347,"ীব":842,"ীল":338,"ীর":2436,"ুগ":541,"ুক":2294,"ুখ":360,"ুজ":362,"ুট":930,"র্":19499,"রো":2284,"রে":15121,"রূ":464,"রু":2561,"রী":3578,"রা":17630,"রি":11429,"রস":1549,"রহ":932,"রশ":407,"রল":454,"লয":931,"লম":504,"লব":520,"লন":1184,"লত":1046,"লট":257,"লচ":904,"লগ":240,"লক":1697,"ল্":3376,"লো":3053,"লে":9966,"লী":1486,"লু":671,"লা":9534,"লি":6096,"লস":278,"শক":649,"শব":592,"শন":1507,"ষক":239,"শর":329,"স।":308,"শত":558,"শট":242,"শে":3570,"ষম":327,"শ্":3936,"শো":350,"ষয":380,"সক":584,"শহ":1521,"শু":848,"শী":865,"সং":3293,"শি":3267,"শা":2252,"ষণ":732,"সন":1003,"ষে":1161,"সফ":273,"সব":1306,"সভ":427,"সম":3977,"ষ্":5176,"সর":1993,"সল":739,"হক":226,"সঙ":453,"ষা":5502,"ষি":1250,"সত":301,"সদ":563,"ৎস":482,"ৎপ":325,"্শ":719,"্স":2033,"্ষ":4401,"্ম":4357,"্ভ":1012,"্র":30653,"্য":25184,"্ল":2305,"্ত":14406,"্ণ":1498,"্ড":2937,"্ঠ":1626,"্ট":7869,"্ঞ":1750,"্ব":10276,"্প":3453,"্ন":2407,"্ধ":2336,"্দ":4738,"্থ":5602,"ৌল":231,"ৌর":302,"্ছ":784,"্জ":1742,"্চ":4084,"্গ":2822,"্ঘ":304,"্ক":3885,"োয":771,"োর":1773,"োল":1204,"োব":621,"োভ":229,"োম":910,"োষ":455,"োস":598,"োহ":395,"োঝ":365,"োজ":493,"োড":367,"োট":842,"োদ":303,"োত":491,"োপ":763,"োন":1801,"োধ":294,"োক":1031,"োচ":456,"োগ":1362,"গঠি":273,"খান":1047,"গঠন":344,"ৈর":614,"েশ":5180,"েল":4085,"ের":25063,"েয":1289,"েম":1903,"েভ":263,"েব":2130,"েপ":618,"েন":8060,"ৈত":343,"েহ":255,"েষ":1422,"েস":1309,"েও":994,"েখ":1729,"েক":4154,"েই":1429,"েউ":248,"েট":1435,"েড":786,"েত":1719,"েদ":505,"েগ":411,"েছ":3105,"েজ":2937,"ে।":4052,"ৃথ":340,"ৃত":2338,"ৃহ":465,"ৃষ":756,"ূত":331,"ূপ":414,"ূম":517,"ূর":2094,"ূল":1187,"ূহ":378,"ুত":1038,"ুড":349,"ুন":1462,"ুদ":1447,"ুব":643,"ুপ":599,"ুয":1183,"ুর":4508,"ুভ":198,"ুম":876,"ুল":3399,"ুস":1262,"ুষ":874,"ুশ":279,"কেট":226,"কেন":738,"কের":985,"কোন":942,"ক্ত":2888,"ক্ট":455,"ক্ষ":3987,"ক্ল":430,"ক্র":1781,"ক্য":806,"ক্স":679,"কিছ":467,"কাহ":230,"কিন":953,"কাব":216,"কায":382,"কার":4553,"কাল":1045,"কাশ":812,"কিল":233,"কিস":331,"কুল":267,"কুর":331,"কুম":261,"কৃত":1025,"কৃষ":259,"কে।":511,"কেই":224,"৮০":213,"৭১":230,"৯৯":610,"৯০":387,"৯৫":459,"৯৬":482,"৯৭":685,"৯৮":501,"৯১":321,"৯২":343,"৯৩":359,"৯৪":434,"৮৯":204,"৮৮":200,"গুর":367,"গুল":1495,"৩০":296,"২৪":220,"গীত":413,"৫০":212,"গান":542,"গার":291,"গায":230,"১৬":326,"১৫":332,"১৮":996,"১৭":415,"১২":292,"১১":238,"১৪":244,"১৩":256,"১৯":3860,"২০":1142,"০০":1455,"১০":452,"গস্":210,"্র।":452,"্যক":735,"্মা":901,"্মী":308,"্মি":327,"্যন":687,"্মে":324,"্যত":807,"্যম":709,"্যব":1629,"্রচ":617,"্যস":234,"্রক":1414,"্যু":488,"্যি":244,"্যা":10276,"্রজ":405,"গবে":354,"্বত":419,"্বন":491,"্বপ":359,"্বব":695,"্বর":1021,"্বা":1809,"্য।":373,"্বি":841,"্বী":559,"্বে":1025,"্বো":213,"্মগ":199,"্ভা":239,"্লা":616,"্লি":367,"গরে":240,"্লে":524,"্রব":578,"্যো":301,"্রম":577,"্রভ":304,"্রয":464,"্রত":2154,"্রণ":361,"্রদ":1113,"্রথ":1019,"্রন":758,"্যে":2625,"্রধ":869,"্রপ":383,"্রা":4952,"্রি":3967,"্রী":1344,"্রু":685,"খ্য":1786,"্রশ":293,"খ্র":509,"্রস":463,"্রহ":735,"্রো":780,"্রে":2630,"্ষণ":271,"গণি":276,"্ষি":1061,"্ষা":679,"্ষে":819,"্ষম":296,"গড়":257,"্শন":406,"খেল":564,"্সে":379,"্সি":438,"গত ":349,"খা ":665,"কজন":1518,"খে ":204,"। ":19077,"কলে":496,"কল্":282,"করত":378,"করণ":527,"করে":4422,"করা":2433,"কলক":407,"কর্":871,"কম্":406,"কাজ":532,"কাছ":278,"কাত":521,"কান":552,"গে ":596,"গর ":415,"কটি":5240,"ঃ ":340,"ং ":5055,"কবি":409,"ই ":6830,"উ ":312,"কথা":468,"ও ":6690,"এ ":847,"চ ":769,"খ ":354,"গ ":1629,"ক ":11586,"ত ":12607,"ণ ":3613,"ঠ ":296,"ড ":1706,"ট ":3372,"জ ":1903,"ব ":3202,"ফ ":611,"প ":1284,"ন ":20715,"ধ ":808,"দ ":2233,"থ ":1007,"কা ":1480,"ল ":8450,"ওয়":1861,"ভ ":587,"ম ":6723,"কি ":282,"য ":4809,"র ":54334,"হ ":1019,"় ":12855,"শ ":3191,"খন ":279,"কে ":5302,"ষ ":1259,"স ":4530,"া ":26160,"ী ":7142,"ু ":2897,"ি ":25805,"ো ":2835,"্ ":1910,"কল ":227,"ে ":37314,"তর ":887,"ডার":368,"ডিস":334,"ডিয":319,"তন ":462,"ণে ":593,"তম ":977,"ড়া":1089,"ড়ি":682,"ড়ে":451,"১৯০":231,"১৯১":267,"তু ":433,"১৯৪":366,"১৯৫":361,"১৯২":273,"১৯৩":285,"১৯৮":393,"১৯৯":498,"১৯৬":398,"১৯৭":613,"তী ":298,"তে ":4456,"ডের":406,"ডেন":243,"তি ":2372,"তা ":2207,"ঠান":465,"ঠিত":686,"২০০":842,"দ। ":304,"ণা ":499,"টাব":261,"টান":241,"টিক":566,"টার":1384,"টাল":237,"টিত":421,"টিন":246,"টিশ":274,"টির":990,"টাই":256,"ডে ":223,"ণত ":291,"টোব":204,"ট্র":2092,"ট্য":377,"ট্ট":258,"টেম":345,"টের":443,"টেল":237,"ঞান":1488,"০০০":220,"ত। ":1457,"ডি ":251,"ঞ্চ":1271,"ঞ্জ":764,"টবল":369,"ড় ":618,"টি ":10700,"জ্ঞ":1748,"টা ":567,"জ্য":1590,"টে ":252,"ঠন ":205,"ঝায":280,"ঠা ":199,"ছোট":215,"জয়":273,"টন ":232,"জান":513,"জাত":1408,"জার":1034,"জিক":202,"জিত":234,"জীব":714,"জুল":281,"জুন":236,"জেন":272,"জেল":1183,"জের":493,"চ্চ":1435,"চ্ছ":780,"ছবি":323,"ছিল":2594,"ছাড":436,"জধা":329,"ছেন":778,"জনপ":400,"জনী":266,"জন্":2129,"ছে।":1579,"জি ":2005,"৭১ ":202,"ঙ্গ":2125,"ঙ্ক":292,"জা ":285,"চনা":453,"চন্":198,"জে ":350,"চলি":385,"চলে":500,"চলচ":862,"চরি":276,"চিত":2401,"চাল":768,"চার":933,"চিম":924,"The":204,"চেয":454,"চীন":632,"পৃথ":340,"মক ":218,"পূর":1563,"পুর":1770,"পুত":228,"বংশ":207,"পের":502,"পেন":312,"পশ্":957,"পাশ":229,"পিত":257,"পাক":348,"পান":707,"পিউ":331,"পাত":414,"পাদ":424,"পাধ":205,"পার":1335,"পাল":505,"পায":279,"পাও":244,"পরে":327,"পর্":1509,"বী ":353,"পরি":2733,"য। ":281,"পরব":233,"বে ":2767,"পন্":745,"পদা":548,"পদ্":310,"বা ":2607,"বি ":661,"পঞ্":305,"বর ":1036,"পড়":268,"বল ":484,"পত্":595,"পতি":403,"বন ":272,"ন্ত":3819,"ন্ড":1774,"ন্ট":1141,"ন্স":716,"ন্থ":421,"ন্দ":2863,"ন্ধ":523,"ন্ন":1373,"ন্ম":1138,"ন্য":3823,"ম। ":348,"নেক":580,"নেত":451,"নের":3172,"নৈত":249,"নোব":216,"নুয":400,"নুষ":645,"নুস":424,"নিস":305,"নীত":622,"নীয":704,"নীর":259,"নিত":220,"নিজ":306,"নির":1511,"নিয":1391,"নিব":223,"নিম":242,"নাথ":204,"নী।":283,"নাট":333,"নিক":1359,"নাল":376,"নার":884,"নায":377,"নাম":2830,"নিউ":358,"নান":283,"নাই":230,"না।":209,"পে ":392,"বং ":4190,"নয়":574,"ধ্য":2527,"পি ":200,"নভে":235,"ধ্ব":397,"নপ্":410,"নদী":606,"ধের":201,"নতা":305,"নটি":399,"al ":424,"পর ":878,"ধার":1388,"ধিক":529,"ধান":1581,"ধুন":236,"ধীন":509,"and":351,"an ":333,"নগর":339,"ধর্":622,"নকা":301,"দ্দ":285,"দ্র":1656,"দ্য":1705,"দ্ব":1242,"দ্ভ":351,"দ্ধ":1443,"ধরণ":208,"ধরন":300,"দোল":241,"ধতি":214,"দেখ":386,"দেশ":3340,"দেব":447,"দের":2400,"দেয":262,"দীর":527,"দুই":388,"দুর":229,"পক ":215,"ati":407,"দর্":464,"০০ ":448,"দলে":293,"নী ":1022,"দশক":219,"নে ":2592,"নো ":750,"দস্":252,"দায":342,"দান":683,"দিক":556,"দার":1068,"দিয":527,"দিন":460,"ন্ ":258,"থিত":1239,"থিব":279,"থান":907,"থার":216,"থাপ":342,"থের":228,"থেক":1914,"থ্য":318,"নি ":3145,"না ":2078,"ত্ত":2681,"ত্ব":1563,"ত্র":5371,"ত্য":1749,"ত্ম":200,"১০ ":255,"ধি ":218,"দক্":833,"ধে ":246,"থাক":949,"তাঁ":801,"তি।":212,"তাক":433,"তা।":273,"তিত":496,"তিন":2758,"তিব":469,"তিয":323,"তির":977,"তিহ":514,"তিষ":915,"তিস":234,"তাদ":379,"তান":831,"তাব":456,"তায":251,"তার":2316,"তাল":484,"তিক":1444,"তুল":199,"তুর":359,"তীর":252,"তীয":1406,"তৃত":307,"তের":1547,"তৈর":454,"থবি":319,"তর্":666,"দী ":536,"তরা":670,"তরে":281,"দু ":322,"ণ্ড":418,"দা ":406,"তমা":738,"দি ":552,"ণ্য":221,"তবে":327,"দে ":324,"ণিত":430,"দর ":218,"তন্":360,"ণের":721,"তথ্":200,"তত্":385,"থা ":838,"ন। ":2670,"থে ":916,"তকে":220,"ঢাক":444,"থম ":866,"রকা":1796,"লত ":375,"রচন":240,"রকে":273,"রক্":515,"লন ":417,"য়ক":403,"য়ন":1056,"য়ত":420,"রজা":344,"য়র":205,"য়ম":204,"য়ো":399,"য়ে":7441,"য়ু":262,"য়ী":440,"য়ি":464,"য়া":7334,"রচল":394,"রচি":290,"য়।":3790,"যুগ":221,"যুক":999,"যুদ":441,"যাক":626," । ":1068,"যাট":233,"যাত":1108,"যাদ":399,"যান":2143,"যাপ":621,"যাব":244,"যাম":399,"যাল":1187,"যার":1249,"যায":1288,"যিক":236,"যাস":933,"রটি":492,"যিন":243,"রথম":978,"রত্":227,"রতে":1478,"রতি":1816,"রতী":622,"রণে":668,"রণা":328,"রণত":263,"রপত":250,"ion":522,"রন্":434,"রনা":231,"রনে":444,"যেম":243,"যের":1196,"রধা":866,"রদে":667,"রদা":349,"রভা":262,"লি ":1224,"রবা":369,"রবি":326,"লা ":2174,"রবর":362,"যোগ":846,"রমা":242,"লী ":505,"রয়":679,"লে ":3437,"রহণ":319,"রস্":785,"লো ":880,"ল্ ":250,"রাই":307,"রাক":460,"রাচ":539,"রাজ":2647,"রাখ":212,"রাণ":570,"রাপ":301,"রাব":230,"রান":1015,"রিক":1905,"রার":655,"রাম":984,"রায":1265,"রিজ":258,"রাস":948,"রিচ":1336,"রাহ":202,"রাশ":201,"রাষ":1115,"রিত":670,"রিট":394,"রিম":358,"রিব":807,"রিন":213,"রিস":680,"রিয":1449,"রিল":310,"রীক":256,"রীয":402,"রীর":289,"রুয":235,"রুত":425,"রূপ":403,"রে।":1025,"রেন":1720,"রের":3004,"রেল":310,"রেশ":198,"রেছ":854,"রেজ":2190,"রেট":233,"রেক":207,"রেস":215,"রোম":235,"রোপ":366,"রোগ":263,"র্ণ":1236,"র্ত":1912,"র্ড":513,"র্ট":567,"র্ব":2473,"র্ধ":342,"র্ন":427,"র্থ":1669,"র্দ":480,"র্ল":198,"র্য":1584,"র্ম":2023,"র্ভ":416,"র্স":595,"র্ষ":411,"র্শ":716,"র্চ":340,"র্জ":782,"র্ক":1521,"র্ঘ":284,"র্গ":673,"লকা":488,"লচ্":855,"লক্":395," Ma":238,"he ":468,"লতে":397," Co":216,"লটি":222,"লনা":313," Th":233,"লয়":917,"লম্":237,"লাই":481,"লাক":628,"লান":256,"লাদ":1657,"লাস":227,"লিখ":198,"লিক":746,"লার":1528,"লাভ":487,"লায":309,"লাম":488,"লিপ":223,"লিন":254,"লিত":865," of":374," an":214,"লে।":338,"লেও":259,"লের":1832,"লেন":1829,"লেজ":413,"লেখ":829,"লেক":269,"লীন":213,"লিম":293,"লিয":577,"লির":379,"ing":263,"লীয":377,"in ":228,"ল্প":990,"ল্ড":206,"ল্য":834,"ল্ল":756,"স। ":246,"লোচ":236,"লোম":202,"লোর":312,"লোয":202,"লোক":706,"শন ":671,"hum":238," th":574,"মত ":200,"মন ":560,"ফরা":505,"র। ":1082,"প্ত":664,"প্য":216,"প্র":13060,"প্ল":304,"প্ট":290,"পৌর":203,"ভা ":303,"বনি":284,"er ":443,"বনে":288,"বন্":515,"es ":375,"ফেব":217,"বচে":366,"বঙ্":528,"ent":254,"বছর":385,"ফুল":203,"বড়":273,"ফুট":478,"ফার":283,"বলে":833,"মে ":1672,"বস্":2112,"বহু":351,"বহৃ":337,"বহা":619,"ফোর":229,"ববি":644,"বপূ":262,"মি ":517,"ফ্র":706,"মা ":428,"বয়":292,"মী ":371,"বলা":524,"বলত":367,"বর্":2305,"বৃহ":359,"বেক":203,"বেল":404,"বের":931,"বেশ":937,"বেষ":386,"বোঝ":365,"য় ":12199,"বাই":313,"বাং":2410,"বান":279,"বাদ":861,"বাধ":440,"বার":1699,"বাম":198,"বায":216,"বাঙ":403,"বাজ":214,"বাচ":199,"বিত":730,"বিদ":1525,"বিধ":272,"বিন":362,"বিপ":215,"বিব":278,"বিভ":1099,"বিয":248,"বির":389,"বাল":212,"বিখ":512,"বিক":671,"বাহ":872,"বিচ":203,"বাস":937,"বিজ":1409,"রও ":300,"বীর":327,"বীপ":377,"বিষ":640,"বিস":375,"বিশ":2344,"ভাই":208,"রম ":217,"মগ্":268,"রন ":224,"যে ":1841,"রত ":384,"রণ ":1177,"ল। ":672,"ব্র":1160,"ব্য":3166,"ব্দ":1107,"যা ":1724,"রা ":4296,"রি ":1578,"ভ্য":238,"মবঙ":362,"মন্":664,"ভেম":223,"মধ্":1555,"মতা":307,"ভূম":496,"ভুক":240,"ভাগ":718,"ভার":2311,"ভাব":1222,"ভাষ":4828,"ভিয":254,"ভিন":1250,"ভিত":372,"ed ":213,"মাই":303,"মিন":199,"মিত":518,"মিট":419,"মাস":327,"মিক":501,"মাল":710,"মার":1752,"মান":3429,"মাত":575,"মাধ":574,"মাণ":369,"মাজ":400,"রো ":310,"মহা":801,"মস্":221,"রে ":4008,"যক্":405,"রু ":494,"রী ":1730,"ময়":958,"মৌল":207,"ম্র":226,"ম্য":700,"ম্ভ":243,"ম্ম":363,"ম্ব":1398,"ম্প":1633,"যমে":422,"যন্":880,"যবস":455,"যবহ":926,"মেন":347,"মের":1210,"মিশ":280,"মিল":414,"মীয":198,"মুখ":293,"মুক":494,"মুদ":244,"মুস":261,"মূল":1049,"লক ":453,"মূহ":371,"মৃত":392,"যতম":572,"সলা":347,"re ":199,"হচ্":536,"সাং":239,"সাম":709,"সায":256,"সার":942,"সাল":2557,"সিক":500,"সাধ":708,"সান":242,"সাব":458,"সিত":294,"সাহ":727,"সাই":326,"সাথ":746,"সাগ":439,"সীম":247,"সিন":266,"সির":308,"়ক ":242,"সূর":236,"সত্":211,"সদস":228,"ষেত":374,"সন্":338,"ষের":340,"সবচ":368,"সবা":223,"ষ্ক":275,"ষ্ণ":260,"ষ্ট":2671,"সভা":316,"ষ্ঠ":1554,"সময":711,"ষ্য":236,"সমা":613,"সমূ":375,"সম্":1299,"সরক":840,"সর্":566,"়ন ":433,"হিন":860,"হিস":1250,"হাদ":208,"হান":202,"হাম":272,"হায":221,"হার":1136,"হাস":659,"হিত":775,"হাজ":205,"হৃত":340,"সেই":206,"সেম":294,"সেব":1065,"সেপ":226,"সেন":564,"হতে":419,"সৃষ":262,"হত্":332,"সের":1064,"হয়":5615,"া। ":1439,"স্ক":1611,"স্ট":2276,"স্ত":2428,"স্থ":3469,"স্প":563,"স্ব":1109,"স্য":561,"হলে":402,"হলো":289,"হরে":416,"াগ ":429,"াও ":315,"াক ":329,"়া ":2568,"়ি ":218,"হ্য":294,"হের":399,"াই ":889,"়ে ":3374,"়ী ":420,"ি। ":726,"াল ":1724,"িও ":348,"ার ":13003,"াম ":1773,"াভ ":401," ১০":339,"াব ":352," ৩০":204," ১২":242,"়নে":202," ১৩":220," ১৪":201," ১৫":282," ১৬":286," ১৭":371," ১৮":940," ১৯":3783,"াহ ":308," ২০":1046,"াস ":1335,"াশ ":330,"িক ":5573,"িং ":561,"ী। ":724,"াট ":241,"াজ ":536,"াপ ":211,"ান ":5410,"াদ ":431,"াণ ":268,"াত ":1005," এই":3270," উৎ":560,"ng ":235,"শে ":909," উদ":684," উন":258," উত":959," উচ":373," উল":389," উপ":2191," ইত":669," আস":453," আয":322," অ্":835," আম":509," আর":1370," আল":1083," আধ":289," আদ":422," আন":905," ইউ":775," আফ":294," আব":706," উই":304," ইস":476," ইল":209,"nd ":283," ইর":274," ইয":287," ইন":731," অঙ":286," অক":379," অত":316," অঞ":596," অপ":429," অফ":358," অব":2124," অথ":198," আই":665," অধ":751," অন":3315," অল":206," আক":517," অভ":831," অর":1146," আছ":551," আগ":504," অস":564," ইং":2299,"শি ":343," অং":591,"শী ":336,"ষণ ":226," জ্":458," জা":2134," জি":334," জী":551," জু":646," জে":1308," জো":204," জল":229," ছো":231," ছি":1929," ছা":399," জন":3064," ছব":300," চা":848," চি":755," চে":275," চল":1081," চর":388," ২ ":205," ১ ":302," গ্":1853," গো":546," গে":282," গু":745," গি":203," ঘট":372," গা":986," গব":363," খ্":684," খে":607," গণ":589," গঠ":432," খা":596," খু":226," কা":3277," কি":1369," কু":537," কৃ":271," কে":1060," কো":1627," ক্":2305," কন":254," কথ":522," কব":408," কর":8000," কম":804," কল":1072," ওয":520," এস":383," এশ":223," এর":2835," এল":645," ঐত":234," এছ":286," এখ":588," এক":8988," এম":399," এব":4212," এপ":273," এন":256," এদ":302," এট":2135," ফে":435," বন":336," ফা":463," বড":264," ফি":389," ফু":675," বছ":364," পো":225," পৌ":257," প্":11714," ফর":609," ফল":347," বে":1555," বো":669," বৈ":440," বি":7573," বা":7403," বৃ":550," বু":368," বস":531," বহ":441," ফ্":592," বল":1749," বর":1400," মধ":1533," মন":645," ভে":259," ভূ":442,"শক্":282," মত":465," ভা":7371," ভি":710,"শকে":227," ব্":3507," ম্":464," মৌ":257," মো":583," মে":1083," মৃ":318," মূ":802," মু":1346," মি":1118," মা":4397,"সন ":262,"ষে ":226," মহ":923," মর":212," ধা":649," নদ":623," দে":2058," দৈ":202," দ্":1551," ধর":1182," নগ":253," না":3576," নি":3394," নী":245," নে":783," ধ্":292," নভ":274,"of ":368," পঞ":266," পড":228," পদ":913," পত":409," ন্":272," নো":287,"সব ":353," পু":1605," পা":3594," পি":495," পে":490," বই":234," পূ":997," পৃ":428," পর":4825," পশ":1001," তব":328," তত":253," তথ":332," তি":2653," তা":3867," তু":325," দক":818," ত্":257," তৈ":474,"ষা ":1614," তে":303," থা":1028," দল":654," দর":261," থে":1958," দি":1421," দা":849," দু":810," দশ":293," টা":272," টি":345," টে":546," ট্":234," ডি":734," ডা":459," ডে":331," ঢা":461," হয":5669," স্":3980," সো":334," হল":1487," সি":1221," সা":6870," সহ":408," হচ":537," সে":1681," হত":623," সৃ":267," সূ":402," সু":981," সী":216," সব":807," সম":3235," সন":329," সর":1532," সঙ":383," সত":207," সদ":329," হো":212," হ্":276," হা":1267," হি":1859," হে":312," শক":251," শা":1199," শি":1188," সং":2886," শু":645," শহ":1500," সক":314," শে":357," শ্":581," শত":509," শব":479,"on ":532," রি":295," রা":3902," রে":556," রু":295," রূ":298," রো":452," রক":302," যা":2826," রচ":547," যু":1137," যি":261," রয":403," রব":219," যো":242," যৌ":226," যে":1328," লি":670," লা":967," লে":811," লো":608," লক":345,"স্ ":266," ও ":4589,"শাখ":311,"শাস":697,"শিত":339,"হর ":728,"শাল":206,"শিক":625,"le ":226,"শহর":1422,"সে ":658,"হন ":263,"সী ":240," এ ":753,"হণ ":261,"সি ":877,"শব্":493,"সা ":275,"শনে":210,"শনা":303,"শতক":230,"শতা":251,"শটি":231,"mb ":231,"ষাত":229,"ষিণ":767,"ষায":1936,"ষার":498,"ষাব":214," স ":339,"ষা।":354,"সঙ্":452," র ":316,"়। ":2782,"ষয়":378,"হী ":223,"শ্র":601,"শ্য":362,"শ্ব":1645,"ষমত":242,"শ্চ":1032,"শেষ":751,"শের":1660,"ষণা":351,"শুর":445,"সংব":214,"হল ":715,"সংস":804,"শিয":443,"শিল":531,"সংক":417,"সংগ":424,"সংখ":731,"সংঘ":234,"শিষ":385,"ুসা":325,"ুষ্":372,"ুসল":207,"ূমি":444,"ূলক":257,"ূলত":394,"ে। ":2956,"ূর্":1748,"ুক্":1812,"ুটি":275,"ুড়":269,"ুটব":355,"ুপ্":205,"ুনি":328,"ুদ্":1024,"ুত্":534,"ুলা":360,"ুলি":957,"ুলো":677,"ুরস":488,"ুরা":502,"ুরু":902,"ুরে":214,"ুর্":475,"ুয়":999,"ুমা":378,"ৃহত":230,"েও ":751,"েক ":677,"েজ ":442,"েট ":476,"েড ":233,"েত ":298,"েন ":2383,"ৃতি":667,"ৃথি":275,"ৃত্":530,"ৃষ্":660,"েই ":990,"াচী":479,"াঙা":392,"াঙ্":430,"াজে":291,"াজ্":1187,"াজা":637,"াজি":350,"াজধ":330,"াজন":429,"াছে":290,"াখা":510,"াগর":531,"ীন ":996,"াকা":1674,"াকৃ":214,"াকি":417,"াক্":624,"াকে":1517,"াগা":228,"াগু":251,"াগে":340,"ীত ":407,"াওয":538,"ুই ":270,"াপে":239,"াপি":216,"াপা":332,"াপ্":438,"াবল":223,"াব্":798,"াবে":1282,"াবি":607,"াবা":550,"ামক":357,"ার।":391,"াথে":795,"াদা":503,"াদি":433,"াদী":233,"াদে":2459,"াদ্":228,"ানক":226,"াধি":292,"াধী":407,"াধা":767,"িউট":438,"াধ্":701,"ানম":216,"ানব":285,"ানা":935,"ানি":1277,"ানী":1247,"ানু":818,"ানে":2964,"ান্":3257,"ানো":483,"াপক":268,"াপন":257,"াণী":233,"াণি":288,"াতী":444,"াতি":1024,"াতা":904,"াতন":242,"াণে":256,"ান।":554,"াত্":1070,"াতে":889,"ীর ":1512,"াটি":687,"াঞ্":296,"াট্":216,"াড়":1116,"িন ":1620,"িদ ":390,"িত ":5217,"িণ ":578,"িজ ":216,"়ের":988,"়েল":232,"াইল":290,"িস ":391,"়েন":224,"়েত":314,"়েছ":1789,"িশ ":479,"াইক":267,"াইন":398,"াইট":347,"়িত":393,"াঁর":695,"াংশ":369,"িল ":1310,"াংল":2405,"়াড":214,"াৎ ":225,"়াল":346,"়াম":294,"ির ":3233,"়ার":2357,"়ান":720,"়াত":233,"িম ":623,"িহা":558,"ংশ ":608,"িস্":1568,"িসা":395,"িসি":199,"িসে":1302,"ূল ":317,"িরি":292,"িরা":288,"ির্":1412,"িলা":304,"িলি":409,"িলে":1534,"িল্":792,"িলো":314,"িশন":236,"িষ্":1946,"িশি":298,"িশে":548,"িষয":343,"িশ্":1581,"ীবন":365,"ীমা":216,"ীয়":4081,"ীর্":354,"ীরে":207,"ৃত ":857,"umb":265,"ীতি":604,"ীতে":555,"ীদে":242,"ীনত":266,"াহা":469,"িচা":733,"াহী":229,"িচি":688,"াহি":1315,"াস্":708,"াসে":596,"াসি":1223,"াসা":419,"াসী":261,"াষ্":1171,"াসন":338,"াষা":4713,"াশি":537,"ুর ":1172,"িটি":780,"িটা":629,"িজ্":1437,"িজে":324,"িছু":475,"ালক":364,"ালয":874,"ালন":396,"ারী":1146,"ারি":1557,"ারা":1659,"ার্":5067,"ারে":2345,"ায়":8671,"ারক":300,"ারন":347,"ারণ":1235,"ারত":1942,"ামি":443,"ামা":748,"াম্":609,"ামে":1488,"ামী":317,"াশন":211,"িখ্":507,"ুন ":561,"িকা":1858,"িকি":198,"িকে":1068,"িক্":1074,"ালী":661,"ালা":739,"ালি":1410,"ালো":249,"ালে":2817,"াল্":297,"িপি":219,"িপ্":331,"িনে":565,"িনা":408,"িনি":2917,"িনী":642,"িনয":241,"িন্":1908,"িমি":290,"িম্":258,"িমে":259,"িমা":636,"িল।":473,"িয়":6354,"িবা":741,"িবী":293,"িবি":361,"িবে":347,"িবর":417,"িভি":889,"িভা":580,"িমব":382,"tio":423,"thu":236,"িদ।":233,"ুল ":613,"িডি":222,"িত।":1272,"িদ্":1210,"িদে":199,"িনট":209,"িধা":259,"ter":251,"িতী":319,"িতি":339,"িতা":574,"িতে":1209,"িত্":2907,"the":401,"ঁর ":674,"্র ":2468,"্য ":4783,"োঝা":363,"্ম ":1055,"্ব ":1443,"্প ":322,"োগ্":438,"্ন ":1119,"্স ":725,"্ষ ":348,"ংশে":259,"ংসদ":201,"্ট ":1640,"্জ ":260,"্চ ":547,"ংলা":2409,"ংরে":2127,"্দ ":444,"োকে":215,"্ধ ":572,"্ত ":2310,"ংবা":245,"্থ ":613,"্ণ ":661,"্ঠ ":206,"্ড ":1005,"্ক ":455,"্গ ":569,"ংক্":310,"ংখ্":741,"্ঞা":1605,"্জা":489,"্জি":208,"্জে":211,"্জন":340,"্ছে":599,"্ত।":316,"্ডি":303,"্ডা":322,"্ডে":556,"্ঠা":754,"্ঠি":421,"্টি":1218,"্টা":1173,"্টো":370,"্ট্":1738,"্টে":710,"্দ্":1049,"আর ":224,"্দো":445,"্দা":237,"্দি":541,"্দী":307,"্দে":669,"্দু":407,"্ধত":232,"্দর":217,"্থা":2220,"্থি":1299,"্থে":279,"্থন":231,"্তে":276,"্থব":329,"্ত্":1963,"্তা":1329,"্তি":2212,"্তী":445,"্তু":740,"্তৃ":252,"্তন":456,"্তম":911,"্তর":2524,"্বক":355,"্প্":259,"্পে":443,"্পা":452,"্পী":215,"্পি":552,"্পর":340,"্পন":198,"অংশ":574,"্না":360,"্নি":236,"্ধে":424,"্ধি":256,"্ধা":305,"অক্":342,"ইন ":402,"অঙ্":286,"্কি":934,"্কা":944,"্কৃ":335,"্কে":282,"্গত":198,"্চা":486,"্চি":1888,"্চল":770,"্গে":523,"্গী":329,"্গা":318,"অফ ":315,"ংস্":691,"োনো":215,"োনা":252,"োপা":229,"োমা":248,"োমি":242,"োয়":741,"োরি":210,"োর্":375,"োলা":267,"োলন":236,"েছি":696,"েছে":2368,"েজি":1969,"ইংর":2136,"েক্":594,"আমে":217,"েকে":2070,"েখক":248,"অ্য":839,"আয়":311,"েখা":876,"আবি":225,"েকট":292,"ইউন":276,"আধু":208,"ইউর":313,"�":1457,"আন্":621,"আলো":342,"আর্":422,"আরব":337,"ইটি":293,"অঞ্":595,"অবস":1435,"অভি":728,"আইন":296,"অধি":383,"অনু":1003,"অধ্":210,"অন্":1608,"অনে":547,"আকা":241,"েশ ":1172,"েস ":337,"অর্":1060,"েষ ":526,"েল ":879,"আছে":543,"ের ":22964,"অস্":383,"োর ":595,"োন ":788,"োট ":334,"ৈরী":206,"ৈরি":250,"উচ্":373,"ৈতি":259,"োগ ":311,"উটা":283,"োক ":321,"উত্":954,"েস্":373,"ইত্":216,"ইতি":273,"ইন্":608,"েরা":505,"েরি":683,"েলে":417,"েলি":409,"েলা":1478,"েলো":239,"ইয়":586,"েশন":465,"েশী":418,"েশি":582,"েষণ":370,"ইরা":327,"েশে":1749,"েষ্":229,"েন্":1439,"েনে":324,"েনি":277,"েনী":276,"েনা":409,"েপ্":238,"েবে":991,"েব্":240,"েয়":1277,"েম্":732,"েমি":222,"েমন":290,"ইসল":353,"েটি":246,"েডি":240,"েতা":376,"েত্":488,"েন।":2598},"n_words":[1969690,2210879,1502429],"name":"bn"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ca b/nlp_resource_data/langdetect/profiles/ca

new file mode 100755 (executable)

index 0000000..5ed9b5a
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ca
@@ -0,0 +1 @@
+{"freq":{"A":167967,"B":105769,"C":184193,"D":56742,"E":163017,"F":65433,"G":73588,"H":42421,"I":60824,"J":44362,"K":25608,"L":169492,"M":139288,"N":48908,"O":35569,"P":128397,"Q":7502,"R":71953,"S":138553,"T":80712,"U":35329,"V":59949,"W":15160,"X":18166,"Y":7184,"Z":7346,"a":5069786,"b":518425,"c":1582005,"d":1999798,"e":4809590,"f":422965,"g":584353,"h":244304,"i":3245856,"j":85607,"k":66957,"l":2733912,"m":1163655,"n":2818805,"o":1980421,"p":975294,"q":256584,"r":2749326,"s":2849210,"t":2490288,"u":1690205,"v":411610,"w":27879,"x":144397,"y":182310,"z":79156,"²":1429,"·":30306,"À":7240,"Á":636,"É":12780,"Í":3623,"Ò":1001,"à":198286,"á":7019,"â":1478,"ã":604,"ä":1134,"ç":38339,"è":199825,"é":298805,"ê":577,"ë":644,"í":174684,"ï":18462,"ñ":3283,"ò":99853,"ó":228041,"ô":729,"ö":1733,"ø":541,"ú":37007,"ü":11730,"ā":2431,"č":430,"ī":1261,"ı":619,"ō":1024,"ş":566,"š":763,"ū":1145,"ʿ":732,"́":819,"Δ":527,"Κ":607,"Μ":435,"ά":1060,"έ":590,"ί":1375,"α":3941,"γ":904,"δ":940,"ε":1681,"η":1323,"θ":467,"ι":2936,"κ":1516,"λ":2022,"μ":1369,"ν":2904,"ο":4163,"π":1084,"ρ":2637,"ς":3296,"σ":1399,"τ":2098,"υ":1042,"φ":460,"ω":718,"ό":1021,"ύ":457," A":166692,"С":472," B":105179," C":176036," D":56284," E":162233," F":64928," G":73223," H":42126," I":60593," J":44254," K":25473," L":168992," M":138313," N":48671," O":35253," P":127947,"а":4416," Q":7462,"б":612," R":71632,"в":1837," S":137876,"г":573," T":73676,"д":937," U":35184,"е":2590," V":59769," W":15003," X":18048,"и":2992," Y":7170,"й":818," Z":7293,"к":1874,"л":1832,"м":805,"н":2553,"о":3273,"п":464,"р":2416," a":725198,"с":2063," b":68648,"т":1583," c":407309,"у":846," d":1356539," e":639012," f":270157," g":94272,"ч":604," h":91458," i":370987," j":31625," k":11008,"ы":477," l":598095,"ь":661," m":263831," n":110413," o":166206,"я":694," p":438961," q":158543," r":159534," s":351560," t":204462," u":330955," v":140137," w":3345," x":7050," y":2383," z":6624," À":7220," Á":633," É":12740," Í":3619," Ò":1001," à":11900," è":4562," é":191856," í":1138," ò":3052," ú":6203,"ו":509,"י":657,"ا":3045,"ب":1225,"ة":477,"ت":440,"ح":495,"د":933,"ر":1451,"س":713,"ع":505,"ل":1984,"م":1193,"ن":1380,"و":1076,"ي":1784," ʿ":526," Δ":517," Κ":595," Μ":434,"A ":9255," С":470,"B ":2075,"C ":13226,"Ab":6627,"Ac":4787,"Ad":3585,"Ae":827,"Af":1295,"Ag":3813,"Ah":707,"Ai":4269,"Aj":795,"Ak":629,"Al":39224,"Am":9397,"An":20902,"Ap":2822,"Aq":9084,"Ar":25400,"As":6532,"At":5083,"Au":9222,"Av":2488,"Ay":587,"Az":1176,"D ":2763,"Ba":39223,"Be":14720,"Bh":637,"Bi":6584,"Bl":4174,"Bo":13076,"Br":13262,"Bu":6221,"E ":3270,"Ca":68472,"Ce":8421,"Ch":13953,"Ci":7812,"Cl":6317,"Co":48282,"Cr":7825,"Cu":5445,"Cy":693,"F ":1643,"Da":7941,"De":14385,"Dh":531,"Di":12144,"Dj":766,"Do":7923,"Dr":2842,"Du":4335,"G ":1233,"Ea":693,"Eb":905,"Ec":954,"Ed":3150,"Eg":2041,"Ei":1183,"El":69098,"Em":4361,"En":13208,"Ep":1014,"Eq":1096,"Er":7104,"Es":39860,"Et":1325,"Eu":9201,"Ev":823,"Ex":2356,"H ":1588,"Fa":5893,"Bà":594,"Fe":9420,"Fi":7653,"Fl":3848,"Bè":916,"Bé":1704,"Fo":13036,"Fr":15905,"Fu":3564,"Bò":470,"I ":21603,"Ga":15801,"Cà":785,"Ge":10838,"Gh":1076,"Gi":6837,"Gl":1477,"Cè":637,"Go":6345,"Gr":13805,"Gu":12143,"Cò":1744,"J ":861,"Ha":12389,"He":8029,"Hi":6527,"Dé":1253,"Ho":7750,"Hu":3320,"Hy":937,"K ":914,"Ib":1566,"Ic":431,"Id":511,"Ie":544,"Ig":1125,"Il":3787,"Im":2928,"In":18160,"Ir":2918,"Is":4771,"It":3174,"Iv":698,"L ":45795,"Ja":9385,"Je":4192,"Ji":1098,"Jo":19183,"Ju":8013,"Fó":1755,"M ":2715,"Ka":7690,"Gà":486,"Ke":2606,"Kh":3464,"Ki":2315,"Kl":511,"Ko":2713,"Kr":1035,"Ku":2506,"N ":2827,"La":56334,"Le":16809,"Li":11134,"Ll":15746,"Lo":13957,"Lu":6126,"Ly":567,"O ":2384,"Ma":59683,"Mc":443,"Me":15371,"Mi":17134,"Mo":23355,"Mu":11733,"My":826,"P ":2536,"Na":13517,"Ne":6267,"Ni":5864,"No":16913,"Nu":1421,"Ob":1520,"Oc":3465,"Od":657,"Oe":644,"Of":675,"Oi":1219,"Ol":5522,"Om":527,"On":1132,"Op":1151,"Or":10390,"Os":2218,"Ot":1074,"Ou":1055,"Ov":557," ا":1151," ب":472,"R ":1759,"Pa":41526,"Pe":16857,"Ph":2138,"Pi":12456,"Pl":6395,"Po":15956,"Lí":769,"Pr":19211,"Ps":950,"Pt":835,"Pu":5518,"S ":6346,"Qa":686,"Mà":1235,"Mè":2592,"Mé":951,"Qu":5784,"Mó":959,"T ":1872,"Mú":915,"Ra":9945,"Nà":738,"Re":23843,"Rh":761,"Ri":9307,"Ro":18718,"Ru":3720,"U ":929,"Sa":46292,"Sc":3755,"Se":22758,"Sh":4186,"Si":10935,"Sk":504,"Sl":456,"Sm":598,"So":13749,"Sp":2135,"St":6579,"Su":12982,"Sy":1208,"V ":6128,"Ta":12185,"Te":13493,"Th":7720,"Ti":4800,"Pè":909,"Pé":634,"To":18580,"Tr":10282,"Tu":4976,"Tx":977,"W ":470,"Uc":601,"Ul":841,"Un":23842,"Ur":3201,"Us":529,"Ut":952,"X ":4850,"Va":25736,"Ve":9006,"Vi":16767,"Vo":2501,"Rú":1355,"Wa":4037,"Sà":468,"We":2882,"Wh":595,"Wi":3723,"Sè":828,"Wo":1818,"Sí":750,"Só":1116,"Xa":1968,"Tà":555,"Xe":604,"Xi":3780,"Té":2128,"Tí":779,"Ya":1516,"Ye":462,"Yo":2907,"Yu":831,"Yv":453,"Za":2055,"Ze":1518,"Zh":746,"Zi":749,"Zo":625,"Ví":574,"Zu":662,"a ":1907139,"b ":69361,"aC":7284,"aM":438,"aT":1147,"c ":117335,"aa":2507,"ab":81857,"ac":168114,"ad":175744,"ae":19041,"af":16941,"ag":47208,"ah":9943,"ai":64967,"aj":11232,"ak":9722,"al":526126,"am":241207,"an":572272,"ao":5807,"ap":55491,"aq":20654,"ar":448864,"as":120225,"at":401723,"au":52031,"av":49622,"aw":4860,"ax":4714,"ay":11377,"az":7229,"d ":243396,"ba":77121,"bb":1707,"bc":1189,"bd":3624,"be":38173,"bf":959,"bg":430,"bh":519,"bi":65502,"bj":4126,"bl":57656,"bm":511,"bn":4600,"bo":28803,"br":91356,"bs":8723,"cT":763,"bt":3126,"bu":24674,"by":1055,"e ":1214321,"ca":278570,"cc":39000,"ce":91107,"ch":30090,"ci":397490,"ck":6861,"cl":40148,"cm":1337,"cn":3405,"co":253884,"cq":789,"cr":50505,"cs":28362,"ct":98830,"cu":58326,"cy":1648,"f ":11433,"da":170876,"db":629,"dd":2022,"de":1171054,"dg":1108,"dh":2156,"di":184633,"dj":2031,"dl":818,"dm":5146,"dn":868,"do":78971,"dq":613,"dr":40396,"ds":14629,"dt":962,"du":43475,"dv":2047,"dw":1267,"dy":1275,"g ":36541,"ea":46323,"eb":28701,"ec":98348,"ed":54037,"ee":9382,"ef":17938,"eg":148516,"eh":3371,"ei":74170,"ej":4770,"aç":6243,"ek":2937,"el":585626,"aè":664,"em":113998,"en":690324,"eo":20271,"ep":77495,"eq":8133,"aí":5793,"er":588374,"es":721405,"aï":5031,"et":119799,"eu":70196,"añ":1235,"ev":44586,"ew":2926,"aó":868,"ex":36681,"ey":6489,"ez":7065,"h ":20052,"fa":45069,"bà":4314,"fe":54681,"ff":2949,"fg":617,"fi":79892,"fl":9361,"bè":1987,"bé":22369,"fo":106827,"bí":1070,"fr":72660,"fs":1040,"ft":1561,"fu":22275,"bò":958,"bó":1179,"i ":486758,"bú":1399,"ga":75189,"gb":771,"gc":471,"cà":10275,"gd":4247,"ge":73143,"gg":1424,"gh":4754,"gi":106632,"gl":34226,"cè":55455,"cé":2934,"gm":2392,"gn":21080,"go":46155,"cí":5094,"gr":62392,"gs":3173,"gt":930,"gu":81931,"cò":5199,"có":1705,"gy":1048,"j ":1114,"cú":854,"ha":86053,"dà":6395,"he":31934,"hi":40755,"dè":10213,"hl":2013,"dé":2052,"hm":2373,"hn":2732,"ho":22152,"dí":3197,"hr":5173,"hs":1334,"ht":3853,"hu":11760,"dò":2833,"dó":2693,"hw":1082,"hy":3032,"k ":14568,"dú":1183,"ia":328149,"ib":41073,"ic":335049,"id":100152,"eà":1569,"ie":89153,"if":32414,"ig":77952,"ih":1878,"ii":2809,"ij":2329,"ik":6237,"eç":1708,"il":121450,"im":89358,"in":289495,"io":108785,"ip":102606,"iq":16936,"ir":97698,"is":214334,"eï":2440,"it":310090,"iu":65412,"eñ":500,"iv":56195,"eò":2925,"eó":1719,"iw":641,"ix":62728,"iy":2272,"iz":5848,"l ":882089,"ja":27282,"fà":1429,"je":8661,"ji":2284,"fè":1129,"jo":14068,"jp":497,"fí":6560,"ju":27368,"fò":2736,"fó":925,"m ":104114,"ka":9761,"gà":2624,"ke":6167,"kh":4960,"ki":7623,"kk":730,"kl":1393,"gè":11938,"km":6156,"ko":4420,"gí":1320,"kr":1224,"ks":2046,"kt":721,"ku":2884,"gò":1533,"gó":2256,"ky":900,"n ":578690,"gú":437,"la":530288,"gü":5534,"lb":8378,"lc":8376,"ld":12030,"hà":2116,"le":272073,"lf":6593,"hâ":823,"lg":12357,"lh":3062,"li":214695,"lk":1865,"ll":191289,"hè":710,"lm":29585,"hé":819,"ln":1295,"lo":99596,"lp":5594,"hí":443,"lq":539,"lr":541,"ls":184444,"lt":65042,"lu":50394,"lv":8536,"lw":659,"ly":3656,"lz":1155,"o ":143542,"ma":183950,"mb":120660,"ià":20729,"me":272481,"mf":2067,"mi":98713,"iç":485,"iè":6430,"ml":501,"mm":10261,"ié":606,"mn":3648,"mo":72058,"mp":89678,"mr":646,"ms":9139,"nT":869,"mt":7221,"mu":85902,"iñ":548,"iò":4221,"ió":161911,"my":1339,"p ":39705,"na":375478,"nb":2542,"nc":178780,"nd":100189,"jà":1447,"ne":166059,"nf":15519,"ng":69078,"nh":3574,"ni":268135,"nj":8272,"nk":3882,"nl":3285,"nm":1655,"nn":16989,"no":122300,"np":434,"nq":5665,"nr":4421,"ns":164301,"nt":494268,"nu":21373,"nv":20735,"nw":559,"nx":2047,"l·":30104,"ny":118633,"nz":6004,"q ":1464,"oa":11667,"ob":72497,"oc":96788,"od":44343,"oe":16400,"of":21715,"og":30375,"oh":5627,"oi":19729,"oj":5019,"ok":3546,"ol":160835,"om":204290,"on":355923,"oo":6465,"op":56175,"oq":4318,"or":346034,"os":142907,"ot":69675,"m²":1392,"ou":85807,"ov":56462,"ow":4060,"ox":4774,"oy":3450,"oz":2678,"r ":408018,"pa":172744,"pc":3590,"là":25527,"pe":221857,"lá":716,"ph":7073,"pi":107129,"lç":1604,"pl":39811,"lè":17093,"lé":5762,"po":126025,"pp":3431,"lí":35679,"pr":140727,"ps":16209,"pt":25645,"pu":33640,"lò":13377,"ló":5245,"py":430,"s ":1473639,"lú":1530,"qa":697,"mà":21678,"mè":8997,"mé":27182,"mí":23527,"qu":250822,"mò":4162,"mó":5285,"t ":625683,"mú":5977,"ra":441040,"rb":17931,"rc":58344,"rd":67527,"nà":7041,"re":497571,"ná":739,"rf":7074,"rg":40352,"rh":1988,"ri":359092,"rj":900,"nç":18933,"rk":6010,"nè":12361,"rl":16633,"rm":73740,"né":4208,"rn":44429,"ro":178134,"rp":7200,"rq":17579,"ní":11412,"rr":85564,"rs":79726,"sT":1915,"rt":160127,"ru":44855,"nò":4723,"rv":15280,"nó":1643,"rw":832,"rx":4521,"ry":6748,"rz":2127,"u ":147534,"nú":2164,"sa":108844,"sb":5605,"sc":71599,"sd":3875,"oà":1370,"se":241029,"sf":4238,"sg":5660,"sh":11747,"si":198022,"sk":5043,"oç":438,"sl":7672,"sm":18866,"sn":2982,"so":88253,"sp":70884,"sq":6816,"sr":1520,"ss":87061,"tT":542,"oï":1001,"st":335577,"su":57136,"sv":1065,"sw":922,"sy":1227,"sz":489,"v ":3284,"ta":510626,"tb":6398,"tc":2726,"pà":3263,"te":302092,"tf":766,"tg":14220,"th":18383,"ti":231505,"tj":8240,"pç":566,"tk":428,"tl":7081,"pè":15203,"tm":4208,"tn":2119,"to":113740,"tp":2252,"pí":2210,"tr":213293,"ts":128142,"tt":11395,"tu":133104,"pò":3840,"pó":2030,"tw":750,"tx":5039,"ty":3045,"tz":30195,"w ":3595,"pú":7209,"ua":125142,"ub":32903,"uc":40135,"ud":47596,"ue":222808,"uf":3537,"ug":17601,"uh":2436,"ui":65611,"uj":2631,"uk":2801,"ul":82163,"um":35757,"un":458542,"uo":1697,"up":31594,"uq":1182,"ur":127636,"us":102797,"ut":89357,"uu":506,"uv":3604,"uw":547,"ux":5117,"uy":1209,"uz":2643,"x ":40539,"va":160676,"qü":2554,"rà":23801,"ve":106866,"rá":563,"vi":85084,"rç":7866,"rè":14693,"ré":11337,"vo":29266,"rí":22061,"vr":1975,"vs":490,"vu":4528,"rò":15984,"ró":4652,"vy":458,"y ":80251,"rú":2239,"wa":9037,"sà":4294,"we":4065,"wi":3435,"sè":5971,"wl":546,"sé":2104,"wn":954,"wo":1249,"sí":4229,"ws":941,"sò":2669,"só":14955,"ww":1633,"z ":11513,"sú":925,"xa":17709,"xc":2574,"tà":33305,"xe":24074,"xf":784,"xi":24181,"tè":9568,"té":9288,"xo":5611,"xp":7246,"tí":21499,"xt":10062,"xu":1571,"tò":24328,"tó":12504,"tú":1377,"ya":40218,"yb":542,"yc":1775,"uà":2374,"yd":1562,"ye":9657,"yg":491,"yi":3467,"uè":8877,"yl":2725,"ym":2095,"ué":3485,"yn":2835,"yo":12462,"yp":1251,"uí":8337,"yr":2338,"ys":12606,"uï":9663,"yt":1373,"yu":1178,"uñ":433,"yy":1005,"za":33059,"zb":548,"và":1420,"ze":8191,"zh":878,"zi":6215,"vè":1754,"vé":3525,"zo":9277,"ví":10528,"zu":1263,"zy":458,"zz":1582,"xà":605,"xè":1753,"xí":3147,"xò":1895,"yà":1020,"zà":1087,"zá":498,"² ":1423,"·l":30185,"Àf":1855,"Àn":592,"Àr":795,"Às":1563,"Àu":1032,"És":11168,"Ín":3390,"Òl":650,"à ":79768,"á ":764,"àb":2832,"àc":12425,"àd":1403,"àf":4242,"àg":2016,"ài":670,"àl":10998,"àm":6161,"àn":28201,"àp":2170,"àq":1193,"àr":18957,"às":9461,"àt":12150,"àu":1000,"àv":961,"àx":2054,"ál":686,"án":2546,"ár":550,"ás":500,"ât":940,"ão":548,"ç ":8144,"è ":9699,"é ":40616,"ça":24720,"ço":2465,"çu":715,"èc":20010,"èd":3296,"èf":490,"èg":1496,"èi":2236,"èl":5777,"èm":3022,"èn":34822,"èp":2640,"èr":19285,"ès":83075,"èt":7978,"èu":817,"èv":1486,"èx":3115,"éc":527,"éd":596,"ée":1019,"ég":451,"éi":2349,"él":845,"ém":860,"én":4525,"ér":2460,"és":238308,"ét":1364,"éu":1948,"év":574,"í ":26502,"çà":803,"çó":1080,"ía":3855,"íb":1797,"íc":13966,"íd":3192,"íf":5625,"íg":2541,"íl":22042,"ím":15141,"ín":22592,"ío":4311,"íp":2416,"íq":1061,"ír":1956,"ís":23333,"ít":21665,"ív":1488,"ïc":443,"ïd":3760,"ïl":889,"ïn":3033,"ïs":4007,"ït":4826,"ò ":9454,"ó ":192574,"ña":1358,"ño":1045,"òb":1306,"òc":3252,"òd":1983,"òf":2506,"òg":6120,"òl":7780,"òm":7356,"òn":22930,"òp":6668,"òq":909,"òr":18203,"òs":7050,"òt":2457,"òv":580,"òx":778,"ón":23445,"óp":508,"ór":3217,"ós":6022,"ú ":4222,"úb":7182,"úl":2870,"úm":2316,"ún":4012,"úr":2758,"ús":11106,"út":602,"üe":5188,"üi":499,"ül":636,"ür":890,"üè":1316,"üí":1085,"ān":472,"ī ":511,"ō ":457,"あ":716,"ア":1421,"丁":1198,"三":1779,"丘":601,"並":683,"之":1430," 丁":432," 三":613," 之":527,"α ":985,"ν ":764,"ς ":3290,"ης":436,"ικ":453,"ιο":521,"ος":1578," A ":4408," B ":735," C ":1205," Ab":6311," Ac":4685," Ad":3550," Ae":816," Af":1280," Ag":3774," Ah":704," Ai":4234," Aj":780," Ak":625," Al":39006," Am":9331," An":20811," Ap":2783," Aq":9010," Ar":25239," As":6402," At":5045," Au":9186," Av":2457," Ay":583," Az":1157," D ":1164," Ba":38919," Be":14672," Bh":636," Bi":6545," Bl":4160," Bo":13028," Br":13227," Bu":6184," E ":775," Ca":68255," Ce":8404," Ch":13909," Ci":7767," Cl":6222," Co":48100," Cr":7791," Cu":5356," Cy":692," F ":454," Da":7891," De":14315," Dh":530," Di":12054," Dj":766," Do":7766," Dr":2838," Du":4321," Ea":682," Eb":904," Ec":948," Ed":3109," Eg":2035," Ei":1171," El":68877," Em":4324," En":13065," Ep":1006," Eq":1092," Er":7077," Es":39661," Et":1311," Eu":9149," Ev":812," Ex":2310," H ":700," Fa":5864," Bà":594," Fe":9397," Fi":7475," Fl":3835," Bè":916," Bé":1703," Fo":12863," Fr":15864," Fu":3553," Bò":470," I ":5983," Ga":15731," Cà":784," Ge":10789," Gh":1074," Gi":6808," Gl":1472," Cè":637," Go":6328," Gr":13726," Gu":12098," Cò":1742," J ":694," Ha":12355," He":7998," Hi":6493," Dé":1253," Ho":7677," Hu":3311," Hy":932," Ib":1560," Ic":427," Id":507," Ie":528," Ig":1119," Il":3765," Im":2905," In":18025," Ir":2911," Is":4748," It":3163," Iv":695," L ":43996," Ja":9364," Je":4175," Ji":1097," Jo":19131," Ju":8002," Fó":1755," M ":726," Ka":7658," Gà":486," Ke":2565," Kh":3462," Ki":2288," Kl":509," Ko":2706," Kr":1034," Ku":2506," N ":658," La":56149," Le":16748," Li":11048," Ll":15690," Lo":13931," Lu":6114," Ly":567," O ":760," Ma":59175," Mc":441," Me":15319," Mi":17084," Mo":23321," Mu":11454," My":824,"а ":1052," Na":13479," Ne":6202," Ni":5851," No":16843," Nu":1416," Ob":1491," Oc":3449," Od":648," Oe":640," Of":637," Oi":1216," Ol":5499," Om":522," On":1113," Op":1132," Or":10300," Os":2203," Ot":1069," Ou":1046," Ov":553," R ":654," Pa":41429," Pe":16814," Ph":2094," Pi":12404," Pl":6359," Po":15893," Lí":769," Pr":19157," Ps":927," Pt":835," Pu":5503," S ":3252," Qa":686," Mà":1235," Mè":2592," Mé":948," Qu":5748," Mó":958," Mú":914," Ra":9904," Nà":737," Re":23771," Rh":759," Ri":9299," Ro":18670," Ru":3714," Sa":46252," Sc":3705," Se":22671," Sh":4149," Si":10895," Sk":493," Sl":452," Sm":594," So":13713," Sp":2112," St":6344," Su":12961," Sy":1201," V ":1559," Ta":12137," Te":13393," Th":7441," Ti":4779," Pè":909," Pé":634," To":12436," Tr":10239," Tu":4923," Tx":972," Uc":597," Ul":830," Un":23813," Ur":3195," Us":518," Ut":947," X ":990," Va":25713," Ve":8965," Vi":16711," Vo":2488,"й ":603," Rú":1355," Wa":4019," Sà":468," We":2833," Wh":591," Wi":3680," Sè":828," Wo":1786," Sí":750," Só":1115," Xa":1951," Tà":551," Xe":597," Xi":3773," Té":2126," Tí":512," Ya":1513," Ye":461," Yo":2905," Yu":830," Yv":453," Za":2050," Ze":1516," Zh":744," Zi":744," Zo":622," Ví":573," Zu":634," a ":231352," aC":7206," c ":430," ab":12838," ac":25191," ad":11985," ae":1083," af":4594," ag":9996," ai":8072," aj":1499," al":160919," am":67850," an":89094," ap":11748," aq":16353," ar":28721," as":11985," at":6592," au":11616," av":4952," d ":162001," ba":23834," be":5533," bi":7646," bl":3360," bo":8059," br":9864," bu":2523," e ":1417," ca":98241," ce":19284," ch":1175," ci":34378," cl":10711," cm":668," co":200977," cr":18454," cu":13751," da":10184," de":1049103," di":85650," do":21151," dr":5214," du":18527," ec":3573," ed":10010," ef":1766," eg":916," ei":1300," el":207472," em":12650," en":176225," ep":1274," eq":4231," er":13002," es":169872," et":3945," eu":3020," ev":1852," ex":25956," fa":33351," bà":1738," fe":21683," fi":36564," fl":4625," bé":4485," fo":88068," fr":62189," fu":18369," i ":271040," ga":8265," cà":3140," ge":23086," gi":1550," cè":1581," gl":2065," go":8124," cí":770," gr":33196," gu":9031," cò":2765," ha":48412," he":5899," hi":20727," dè":1324," dé":790," ho":9258," ht":889," hu":4769," dó":1035," ib":4538," id":3775," ig":1019," il":7669," im":11832," in":61206," ir":1802," is":2015," it":4507," l ":149189," ja":7585," fà":748," je":550," jo":3465," fí":2217," ju":19392," fò":830," fó":669," m ":1255," ka":1313," kh":929," ki":720," gè":7765," km":5787," ku":747," n ":2490," la":304087," hà":645," le":69759," li":12244," ll":44363," lo":9050," lu":1492," o ":58017,"я ":506," ma":59149," me":33740," mi":24930," mo":39073," mu":67009," p ":665," na":17801," ne":9561," ni":4279," no":66588," nu":3940," ob":15205," oc":25051," oe":7522," of":8508," ol":2632," om":602," on":8518," op":3920," or":31833," os":1160," ot":673," ox":679,"ан":476," pa":61499," là":888," pe":161014," pi":11638," pl":16017," lè":787," po":63767," lí":4842," pr":107250," ps":1873," pu":9224," lò":503," s ":17098," mà":3346," mè":1643," mé":23474," mí":929," qu":157968," mò":673," mó":3234," mú":4524," ra":7726," re":118428," ri":10476," né":2548," ro":14342," ru":5495," nò":512," nú":1717," sa":13342," se":146872," sh":793," si":74594," so":35644," sp":523," st":706," su":40122," ta":28792," pà":1118," te":75775," th":3081," ti":9692," pè":733," to":18091," tr":50136," tu":4078," tx":561," pú":2386," ub":1521," uc":432," ul":1092," un":314756," ur":2881," us":3429," ut":5889," x ":546," va":79470," rà":1366," ve":23288," vi":23682," rè":848," vo":10050," vu":1584," y ":1832," wa":660," sà":457," we":1115," sè":3098," sí":1633," sò":1297," só":13852," ww":774," xa":1796," tà":547," xi":3734," tè":2248," té":5319," tí":4871,"ка":474," za":471," zo":5183," ví":900,"на":519,"ов":707,"ра":455,"ск":629,"ст":468," Àf":1850," Àn":588," Àr":787," Às":1561," Àu":1032," És":11156," Ín":3384," Òl":650," àc":959," àf":666," àl":1282," àm":1512," àn":818," àr":5183," àt":602," è ":614," èp":2053," èt":908," èx":653," és":191686," ín":757," òp":1627," òr":1127," úl":1460," ún":2954," ús":1545,"アアア":811,"가":632,"ة ":476,"ن ":754,"ي ":540,"ال":1136,"AC ":427,"BA ":592,"Abb":439,"Aba":835,"Abd":1681,"Abe":471,"Abi":451,"Abu":1629,"Aca":1206,"Acc":847,"Act":1576,"Ada":599,"Ade":460,"Adm":436,"Ado":605,"Adr":590,"Afr":441,"Aga":446,"Agr":922,"Agu":921,"Aig":490,"Ain":596,"Ais":995,"Air":926,"Aix":700,"Al ":2273,"Aju":580,"Alb":3276,"Ala":1970,"Alc":1635,"Alf":1418,"Ale":5749,"Alg":1324,"Ali":1700,"All":1516,"Alm":895,"Alp":2816,"Alt":9210,"Als":2588,"Alv":529,"Ama":1446,"Amb":1366,"Ame":1021,"Ami":781,"Amp":487,"Ana":1238,"And":4325,"Ang":3614,"Ani":441,"Ann":1359,"Ano":667,"Ant":6768,"Al·":437,"Apa":481,"App":480,"Apo":915,"Amè":2404,"Aqu":8976,"Arb":535,"Ara":3272,"Ard":1422,"Arc":1131,"Are":892,"Arg":2137,"Ari":1880,"Arn":611,"Arm":1606,"Arr":864,"Arq":5004,"Art":3155,"Arx":525,"Ass":2750,"Ast":1600,"Ate":1472,"Atl":2047,"Aub":950,"Aud":1123,"Aug":1230,"Aul":445,"Aus":2207,"Aur":757,"Aut":1669,"Ava":645,"Arà":478,"Ave":588,"Avi":816,"Bab":526,"Bad":1466,"Bac":609,"Bah":633,"Bag":1094,"Bai":4905,"Bal":3540,"Ban":2412,"Bar":16181,"Bat":1579,"Bas":2781,"Bav":652,"Bau":520,"Bay":640,"Bea":1289,"Bel":2629,"Ben":2746,"Ber":4569,"Bet":501,"Bes":801,"Bib":559,"Bil":822,"Bio":545,"Bir":867,"Bis":957,"Biz":433,"Bla":2977,"Blo":538,"Boi":680,"Bol":1542,"Bon":1514,"Bom":615,"Bor":2684,"Bot":563,"Bos":818,"Bou":1555,"Bra":3413,"Bre":3909,"Bri":2360,"Bro":1428,"Bru":1662,"Buc":460,"Bue":559,"Bul":739,"Bur":1549,"Bus":549,"Cab":1292,"Cae":457,"Cad":907,"Cai":1201,"Cam":7097,"Cal":8123,"Can":10091,"Cap":2678,"Cas":9757,"Car":8990,"Cau":681,"Cat":14977,"Cav":477,"Cel":753,"Cen":4020,"Cer":2044,"Cha":6699,"Che":1984,"Chi":1603,"Cho":571,"Chr":1309,"Chu":431,"Cic":741,"Cin":969,"Cir":1072,"Ciu":1559,"Cit":606,"Civ":835,"Cla":2546,"Châ":742,"Cle":767,"Cli":509,"Clo":595,"Clu":1412,"Ciè":520,"Cod":481,"Com":13254,"Col":5594,"Coo":476,"Con":12377,"Cop":1662,"Cos":4020,"Cor":6641,"Cou":1178,"Cra":772,"Cre":2168,"Cri":2170,"Cro":1381,"Cru":694,"Cub":578,"Cul":2134,"Cur":631,"EC ":776,"Dal":794,"Dan":1281,"Dam":954,"Dar":1085,"Dav":1039,"De ":1301,"Dec":620,"Def":481,"Del":1249,"Den":843,"Dem":2650,"Dep":1015,"Der":576,"Des":2834,"Dev":451,"Deu":783,"Dia":891,"Dic":847,"Die":793,"Din":2556,"Dip":625,"Dio":1016,"Dir":624,"Dis":1960,"Div":832,"Doc":553,"Dol":487,"Don":1274,"Dom":1970,"Dor":1286,"Dou":1102,"Dra":714,"Dre":955,"Dro":599,"Dub":501,"Duc":927,"Dur":1205,"FC ":460,"Ebr":656,"Eco":537,"Edi":863,"Edu":769,"Egi":1637,"El ":53100,"En ":7629,"Ele":990,"Eli":1057,"Ell":470,"Els":12202,"Emi":805,"Emp":2669,"Enc":1212,"Eng":618,"Enr":1136,"Ent":1321,"Epi":667,"Es ":7626,"Equ":1084,"Era":4172,"Eri":725,"Ern":564,"Esc":3957,"Esg":1543,"Esl":432,"Esq":691,"Esp":7947,"Ess":459,"Est":16703,"Eti":592,"Eug":470,"Eus":540,"Eur":6546,"Exp":558,"Ext":492,"Exè":520,"Fab":658,"Fal":839,"Far":1147,"Fed":2158,"Fel":1485,"Fer":3479,"Fes":1048,"Fig":804,"Fil":2790,"Fin":1628,"Fir":475,"Fit":451,"Fis":548,"Fla":1182,"Bèl":724,"Flo":1914,"Bén":1073,"Fon":1799,"For":4330,"Fou":4771,"Fra":10976,"Fre":2000,"Fri":1067,"Fro":1171,"Fun":1040,"Fut":785,"Gab":852,"Gai":1316,"Gam":644,"Gal":3763,"Gan":792,"Gas":726,"Gar":5151,"Gau":545,"Gav":516,"Gel":633,"Geo":1768,"Gen":3908,"Ger":2784,"Gha":668,"Gil":834,"Gio":742,"Gin":447,"Gir":2867,"Gla":555,"Cès":490,"Gol":772,"Gon":924,"Gor":807,"Got":462,"Gov":915,"Gra":8029,"Gre":1705,"Gri":568,"Gro":823,"Gru":812,"Gua":2233,"Gue":3552,"Gui":3733,"Grè":1211,"Còr":1485,"Ha ":1586,"Hab":849,"Hai":525,"Hal":1026,"Han":1601,"Ham":1069,"Har":1914,"Has":482,"Hau":489,"Hei":662,"Hel":854,"Hen":1083,"Her":3308,"Hes":521,"II ":11551,"Hi ":667,"IL ":496,"Hil":810,"Him":457,"Hip":538,"His":1995,"IN ":545,"Déu":993,"Hol":1076,"Hon":1356,"Hom":721,"Hor":1537,"Hos":973,"Hou":427,"IV ":2284,"IX ":2178,"Hug":456,"Hum":647,"Ibè":454,"Igu":523,"Ill":2804,"Imp":2310,"Ind":3042,"Inc":1174,"Inf":899,"Int":5298,"Ins":1704,"Inv":4574,"Ira":1225,"Irl":972,"Isa":677,"Isl":998,"Isr":606,"Isè":610,"Ità":2371,"Jac":1249,"Jan":842,"Jam":1007,"Jap":1788,"Jar":512,"Jav":647,"Jau":1327,"Jea":1258,"Jer":1024,"Jes":979,"Joa":4658,"Joc":2904,"Joh":2200,"Jon":498,"Jor":1675,"Jos":5343,"Jov":701,"Jua":1133,"Jul":1845,"Jun":1207,"Jur":1022,"Jus":1158,"Fór":1750,"Kal":667,"Kan":1326,"Kas":646,"Kar":1823,"Kat":730,"Ken":1039,"Ker":512,"Kha":2066,"Kin":628,"Kir":437,"Kon":521,"Kur":796,"La ":44734,"Lab":732,"Lac":602,"Lag":454,"Lan":2502,"Lam":739,"Lar":952,"Lat":473,"Las":877,"Lau":991,"Le ":1701,"Lea":612,"Leg":479,"Lei":605,"Len":488,"Leo":1055,"Let":618,"Les":8179,"Lib":1275,"Lic":729,"Lie":545,"Lig":440,"Lin":1292,"Lim":1985,"Lit":1358,"Lis":662,"Liv":631,"Lla":1420,"Lle":5009,"Lli":3849,"Llo":2525,"Llu":2662,"Lo ":509,"Loc":935,"Loi":3900,"Lon":2521,"Lor":1079,"Los":1387,"Lou":1368,"Lud":436,"Luc":2154,"Lui":841,"Lux":520,"Mac":1977,"Mad":4850,"Mag":1918,"Mai":1400,"Mah":1236,"Maj":817,"Mal":5341,"Man":7072,"Map":489,"Mas":2898,"Mar":24205,"Mau":1281,"Mat":2199,"May":903,"Max":566,"Med":2321,"Meg":499,"Mem":552,"Mel":1235,"Men":2299,"Mes":1909,"Mer":2220,"Meu":791,"Met":1461,"Mic":1967,"Mig":3773,"Mik":435,"Mil":2264,"Min":2581,"Miq":1084,"Mis":977,"Mir":1383,"Mit":1306,"Mod":572,"Moi":445,"Mol":1866,"Mon":10135,"Mos":3258,"Mor":3228,"Mou":615,"Mot":612,"Mov":719,"Muh":1530,"Mul":514,"Mun":3699,"Mus":3011,"Mur":1125,"Nad":543,"Nac":3718,"Nag":474,"Nan":670,"Nam":469,"Nar":887,"Nat":1765,"Nas":943,"Nav":1541,"Neg":551,"Nep":465,"Neu":723,"New":1011,"Nic":1721,"Nig":501,"Nil":485,"Nin":583,"No ":949,"Nob":765,"Nog":546,"Nom":506,"Nor":7475,"Not":558,"Nov":3754,"Nou":976,"Obr":542,"Occ":1964,"Oce":633,"Oes":458,"Ois":1085,"Oli":1151,"Or ":1144,"Ope":627,"Olí":2742,"Ord":1202,"Org":811,"Ori":3250,"Orl":432,"Orn":702,"Ort":537,"Osc":592,"Oso":563,"Pac":1255,"Pal":5917,"Pak":720,"Pan":1931,"Pam":450,"Pap":987,"Par":12424,"Pat":5920,"Pas":3293,"Pau":2314,"Ped":953,"Pel":908,"Pen":2445,"Per":8284,"Paí":2486,"Pet":1992,"Paï":1367,"Pha":593,"Phi":793,"Pic":997,"Pie":1705,"Pil":517,"Pin":1362,"Pir":5077,"Pit":507,"Pis":643,"Pla":4255,"Ple":573,"Pli":619,"Pob":891,"Pod":515,"Poi":546,"Pol":3110,"Pon":2233,"Pom":919,"Pop":1499,"Por":3188,"Pot":747,"Pos":637,"Pra":2096,"Pre":5132,"Pri":3663,"Pro":6882,"Psi":498,"Pto":509,"Pub":1052,"Pue":644,"Pui":963,"Prí":433,"Puè":630,"Màl":503," ال":969,"Mèx":2060,"Qua":1293,"Que":2037,"Qui":1884,"Món":857,"SA ":697,"Rad":711,"Raf":698,"Raj":585,"Rai":671,"Ral":542,"Ran":696,"Ram":2345,"Ras":506,"Nàp":561,"Rea":888,"Red":459,"Rec":1201,"Reg":5159,"Rei":2533,"Ren":1400,"Rep":4891,"Res":1790,"Rev":1903,"Reu":922,"Rib":1454,"Ric":1775,"Rie":437,"Rin":1512,"Rip":435,"Rio":773,"Riv":653,"Riu":648,"SO ":429,"Rob":1500,"Rod":1354,"Roc":1550,"Rog":460,"Roi":1301,"Ron":515,"Rom":4973,"Ros":3668,"Rou":702,"Roy":493,"SS ":576,"Rub":511,"Rus":893,"Sac":904,"Sab":1266,"Sad":437,"Sag":828,"Sai":6861,"Sam":1360,"Sal":4902,"Sao":1292,"San":18549,"Sar":3138,"Sau":1050,"Sat":669,"Sav":1601,"Sax":634,"Se ":1121,"Sca":430,"Sch":1869,"Sco":614,"Sec":816,"Seb":701,"Seg":4635,"Sei":460,"Sem":755,"Sel":1581,"Sen":4433,"Sep":474,"Ser":3924,"Set":597,"Sev":1041,"Sha":1754,"She":554,"Shi":788,"Sic":1112,"Sib":491,"Sie":912,"Sid":494,"Sig":466,"Sim":1293,"Sil":985,"Sin":1608,"Sis":732,"Sir":914,"Sit":741,"Soc":2681,"Sob":484,"Sof":504,"Som":1707,"Sol":2641,"Son":1522,"Sor":783,"Sou":790,"Sot":432,"Sov":997,"Spa":467,"Sta":2211,"Ste":1230,"Sto":592,"Str":1325,"Stu":700,"Sub":526,"Sud":4066,"Sum":592,"Sul":1193,"Sun":586,"Sup":1168,"Sur":766,"Suè":768,"Suï":870,"UA ":676,"Tai":859,"Tal":909,"Tan":1153,"Tam":2163,"Tar":2953,"Tau":512,"Tax":694,"Tea":1333,"Tel":1110,"Ten":1269,"Tem":675,"Teo":751,"Ter":5037,"Tes":748,"Tex":474,"Tha":531,"The":4710,"Thi":492,"Tho":1030,"Tib":574,"Tin":475,"Tim":819,"Tir":690,"Tit":744,"Pèr":751,"Pér":494,"Tol":1537,"Ton":536,"Tom":896,"Top":6603,"Tor":4551,"Tot":919,"Tos":566,"Tou":1405,"Tra":3043,"Tre":2743,"Tri":2234,"Tro":1124,"Tun":1064,"Tur":2311,"Txe":473,"Ucr":518,"VI ":1796,"Un ":5869,"Una":3414,"Uni":14119,"Urg":1121,"Uru":766,"Va ":8704,"Val":12827,"Van":1324,"Var":1091,"Veg":497,"Vel":1558,"Ven":2761,"Ver":2949,"Via":539,"Vid":677,"Vic":2194,"Vie":1915,"Vig":450,"Vil":5935,"Vin":1052,"Vir":789,"Vit":683,"Vis":651,"Viv":524,"Vol":1052,"Vos":641,"Rús":1325,"Wal":1101,"Was":531,"War":644,"Wei":623,"Wes":827,"XI ":700,"Wil":1831,"Win":668,"Wol":473,"Wor":825,"Sír":544,"XV ":828,"XX ":1330,"Són":982,"Xar":475,"Xil":666,"Xin":1974,"Té ":2031,"Tít":634,"Yon":610,"Yor":1648,"Zel":575,"丁 ":477,"三 ":646,"aC ":7161,"aTo":1030,"ab ":4227,"ac ":6540,"ad ":6641,"abb":488,"aba":10954,"abd":578,"abe":3580,"abi":32079,"abl":11206,"abo":3927,"abr":8859,"abs":2070,"acT":607,"abu":1194,"ae ":11093,"aca":8202,"acc":6873,"ace":6587,"aci":87894,"ach":4569,"ack":1414,"acl":1203,"aco":6238,"acq":477,"acs":2095,"acr":2622,"acu":2617,"act":26858,"af ":1940,"ada":73814,"add":677,"ade":24344,"adh":764,"adj":840,"adi":12618,"adm":4187,"ado":31930,"adr":7171,"adq":601,"adv":1601,"adu":4495,"ag ":1296,"ael":2215,"aen":718,"aes":793,"aer":1540,"aet":471,"ah ":2535,"afa":2091,"aff":697,"afe":1992,"afi":3879,"afl":795,"abè":473,"afo":1441,"afr":2220,"ai ":7608,"aga":6999,"age":3272,"acà":746,"agd":566,"agi":3767,"agh":680,"agm":689,"acè":540,"agl":465,"ago":11990,"agn":4049,"ací":903,"agr":4590,"agu":4082,"aha":2319,"adà":1479,"ahi":1122,"adè":1215,"ahm":588,"aho":687,"ahr":545,"adí":1006,"ak ":1321,"aia":1969,"aic":955,"aid":1169,"aig":9622,"aim":583,"ail":1889,"aio":474,"ain":11965,"ais":4814,"air":4856,"aiu":570,"ait":690,"aix":15023,"al ":239788,"aja":1558,"aje":644,"ajo":5914,"aju":1645,"am ":6592,"aka":1683,"ake":1123,"aki":1448,"akh":1508,"agè":1166,"ako":697,"aku":618,"agó":1804,"an ":66807,"alb":1945,"ala":24734,"ald":4936,"alc":4171,"alf":1579,"ale":25255,"alh":840,"alg":6863,"ali":38592,"all":34050,"alk":512,"alm":23959,"alp":1373,"alo":6503,"alt":21186,"als":45950,"alv":3401,"alu":13202,"alz":450,"ao ":1801,"ama":13473,"amb":79002,"ame":83485,"amf":804,"ami":6845,"amm":2154,"amo":4971,"amn":458,"amp":13969,"ams":1153,"amu":1923,"ap ":11550,"ana":43743,"and":29111,"anc":71924,"ane":15291,"anh":850,"ang":20211,"anj":1451,"ani":23420,"anl":536,"ank":1770,"ann":5057,"anm":616,"ano":18530,"anr":592,"anq":2008,"ant":129567,"ans":27548,"anv":2326,"anu":5845,"anx":1525,"anz":1554,"al·":4737,"any":83908,"aq ":723,"aon":2010,"aor":467,"ar ":93910,"apa":10585,"alà":12459,"ape":4186,"aph":689,"api":8079,"apl":3094,"alè":3173,"alç":1033,"app":589,"apo":4636,"apr":3633,"alí":1466,"apt":2457,"aps":822,"alò":521,"apu":1311,"aló":481,"alú":527,"as ":20122,"amà":1509,"amè":932,"amí":21249,"aqu":19443,"amó":1134,"at ":205988,"arb":6120,"ara":30683,"ard":16741,"anà":1635,"arc":29113,"are":20898,"arh":634,"arg":7849,"ari":46452,"arl":9250,"anè":1527,"anç":11606,"ark":2032,"arn":6199,"arm":5748,"arp":1136,"aro":5216,"arr":31135,"arq":7113,"aní":596,"art":92161,"ars":9714,"arv":792,"anò":614,"aru":1141,"arx":3639,"arz":535,"ary":1313,"au ":10283,"asa":10565,"asc":12981,"ase":5138,"asi":8686,"ash":2159,"ask":721,"asm":919,"asl":572,"aso":1870,"asq":958,"asp":2366,"ass":25671,"asu":572,"ast":24258,"ata":40935,"ate":23772,"ath":2609,"atg":10074,"atj":940,"ati":22473,"atl":1905,"apç":564,"atm":559,"atp":518,"ato":7197,"atr":17757,"att":1995,"ats":36813,"atu":14371,"atx":730,"apó":1638,"atz":1039,"aw ":507,"auc":1243,"aud":3138,"aug":1634,"aum":2063,"aul":5438,"aun":1108,"aus":6631,"aur":6501,"aut":10121,"auv":582,"aux":1245,"ax ":1119,"ava":21112,"arà":3336,"ave":9872,"avi":9915,"arè":1555,"arç":4698,"avo":3437,"arí":6995,"avu":1833,"aró":868,"ay ":3903,"awa":2159,"az ":1002,"axa":436,"atà":637,"axi":822,"atè":1859,"axo":1134,"atí":4152,"atò":3244,"ató":484,"aya":1481,"aye":1242,"ayo":535,"ayn":542,"ays":433,"之 ":486,"aza":1604,"azi":1649,"avé":2003,"azo":649,"azz":706,"axò":585,"ba ":12235,"bab":2635,"bad":3682,"bac":2347,"bag":527,"bai":3872,"bal":9163,"ban":11239,"bam":526,"bar":12405,"bat":6461,"bas":9205,"bav":489,"bd ":1382,"be ":3383,"bcl":710,"bdi":878,"bea":692,"bec":772,"beg":545,"bei":604,"bel":4237,"ben":4316,"ber":17069,"bet":1760,"bes":2688,"bfa":896,"bi ":2973,"bib":722,"bia":4295,"bid":1305,"bic":2896,"bie":1762,"big":444,"bil":5829,"bin":3024,"bio":2560,"bir":1703,"bit":31890,"bis":3356,"biz":656,"bje":4022,"bn ":4422,"bla":13353,"ble":25765,"bli":16768,"blo":1020,"bo ":825,"boa":461,"boc":1058,"bol":8535,"bon":3318,"bom":502,"bor":5228,"bot":2150,"bos":3156,"bou":954,"bs ":2550,"bra":15655,"bre":57548,"bri":13610,"bro":2609,"bru":988,"bu ":3185,"bse":1373,"bso":827,"cTo":715,"bst":2632,"bte":1652,"bti":549,"bud":487,"buc":1582,"bue":692,"bui":1777,"bul":1896,"bun":1101,"bum":1003,"bur":5072,"but":2804,"bus":3278,"by ":676,"buï":1006,"ca ":91703,"cac":9604,"cab":3071,"cad":13917,"cai":1639,"cam":11096,"cal":20680,"can":23151,"cap":21105,"cas":12109,"car":29335,"cau":2881,"cat":33246,"cav":2671,"ce ":4275,"cca":584,"cce":4115,"cci":32492,"cea":2192,"ceb":520,"ced":4416,"cei":1155,"caç":439,"cel":24876,"cen":17603,"cep":5727,"ces":14952,"cer":10622,"cet":1368,"ch ":5829,"ci ":10536,"ccé":569,"cha":4666,"che":6600,"chi":4008,"chl":697,"chn":490,"cho":1760,"chr":636,"cht":1042,"chs":468,"chu":1319,"chw":564,"chy":514,"ck ":3785,"cia":71294,"cic":3669,"cie":28020,"cid":8886,"ceà":1020,"cif":2343,"cim":1374,"cil":3459,"cio":42238,"cin":9175,"cip":70269,"cis":6308,"cir":3899,"ciu":18180,"cit":15001,"civ":1365,"cm ":663,"cke":1017,"cla":10996,"cle":7657,"cli":5636,"clo":9026,"clu":3084,"co ":5558,"cma":564,"cià":5744,"ciè":1687,"ció":93089,"cni":2204,"cno":1079,"coa":1145,"coc":1430,"cob":3849,"coe":611,"cod":1781,"cog":1054,"coi":889,"coh":497,"com":94267,"col":18694,"coo":976,"con":84469,"cop":3499,"cos":13573,"cor":15898,"cou":2376,"cot":1941,"cov":689,"clà":1541,"clò":1640,"cs ":28127,"cqu":601,"cra":3589,"cre":15748,"cri":20672,"cro":4644,"cru":742,"cta":11278,"cte":31381,"cti":13744,"cto":10968,"ctr":4907,"ctu":18407,"cua":740,"cuc":922,"cud":997,"cui":2876,"cum":3049,"cul":26044,"cun":1991,"cup":3866,"cus":3431,"cur":7121,"cut":6363,"crà":2375,"crí":1017,"crò":1272,"cy ":895,"ctà":449,"ctò":6507,"da ":106799,"dad":7741,"dac":2392,"dae":7362,"dag":1161,"dai":933,"dal":8337,"dan":7132,"dam":4466,"dap":1308,"dar":8439,"dat":8541,"das":1597,"dav":1950,"dau":578,"de ":731066,"ddi":693,"deb":713,"dea":1779,"ded":2420,"dec":4514,"def":4724,"dee":790,"deg":1083,"dei":4464,"del":223902,"den":29595,"dem":4208,"dep":44739,"deo":3384,"der":25618,"det":3669,"des":76584,"dev":3022,"deu":2190,"dex":508,"dez":729,"di ":11122,"dge":655,"dha":462,"dib":1304,"dia":26304,"did":2342,"dic":20315,"dif":12060,"die":5221,"dig":1614,"dil":1360,"din":14926,"dim":2318,"dip":2139,"dio":4697,"dir":15836,"diq":427,"dit":7813,"dis":34050,"div":14053,"diu":3152,"dja":992,"do ":6850,"dià":1566,"dmi":4472,"diò":459,"dob":2439,"doc":4823,"dol":3039,"don":12077,"dom":3069,"dop":1469,"dor":31450,"dot":1997,"dos":7697,"dov":779,"dou":475,"dox":667,"dow":451,"ds ":13269,"dqu":610,"dt ":624,"dra":5966,"dre":20197,"dri":8059,"dro":3178,"du ":1931,"dua":2037,"duc":9457,"due":6092,"dui":1768,"dul":1590,"dun":583,"dur":12174,"dut":440,"dus":2391,"drà":793,"dré":463,"dvo":1486,"drí":514,"dy ":834,"dwa":684,"dwi":444,"duï":2716,"ea ":11116,"eb ":1298,"ec ":10740,"eac":2447,"eae":1662,"ead":3245,"eag":611,"eal":7656,"ean":3211,"eas":681,"ear":4154,"eau":2217,"eat":7817,"ed ":3555,"eba":5649,"ebe":2525,"ebi":1137,"ebl":1185,"ebo":1010,"ebr":12552,"ebs":543,"ebu":1509,"ee ":1159,"eca":4354,"ecc":8576,"ece":4741,"eci":8492,"ech":1846,"eck":770,"ecl":2812,"eco":9587,"ecn":1045,"ecs":1149,"ecr":2148,"ecu":5052,"ect":34496,"ef ":453,"eda":7510,"ede":8641,"edi":20322,"edo":3166,"edr":3881,"edu":2561,"eg ":3638,"eed":448,"eei":505,"een":1291,"ees":2623,"eer":1377,"eet":440,"efa":810,"efe":8879,"efi":3643,"efl":540,"ebé":837,"efo":1656,"efr":499,"efu":502,"ei ":11603,"ega":18566,"ege":4080,"ecà":910,"egi":53966,"egl":11995,"ego":14924,"egn":5842,"ecí":1108,"egr":5884,"egu":26013,"ehi":1324,"edè":1034,"edò":1193,"aç ":1156,"ek ":1125,"eia":3285,"eic":714,"eie":670,"eid":2015,"eig":2063,"eim":1263,"eil":1187,"ein":4918,"eis":3439,"eir":2304,"eit":2121,"eix":36438,"el ":342397,"eja":3154,"ejo":757,"em ":5051,"aça":4285,"aço":547,"egí":471,"en ":198166,"elb":478,"ela":12101,"egü":997,"eld":1138,"elf":710,"ele":21700,"elh":575,"elg":974,"eli":11221,"ell":53060,"aèl":445,"elm":1163,"elo":14812,"elt":1433,"els":102578,"elv":2210,"elu":847,"eo ":2000,"ema":20188,"emb":23594,"eme":18533,"emi":9954,"emm":981,"emo":6063,"emn":559,"emp":21619,"ems":837,"emu":739,"ep ":5633,"enb":1598,"ena":31909,"end":15928,"enc":25792,"enf":1661,"ene":32525,"enh":951,"eng":12438,"enj":1041,"eni":43865,"enl":1490,"enk":437,"enn":4327,"eno":9400,"enr":1452,"enq":679,"ent":250920,"ens":21295,"env":6112,"enu":1527,"enz":2223,"eny":10523,"el·":13441,"eoc":722,"eod":852,"eog":1412,"eoj":1466,"eom":1010,"eol":3046,"eon":1964,"eop":812,"eos":1173,"eor":4067,"eot":635,"er ":189445,"epa":44596,"epc":812,"epe":4667,"eph":1592,"epi":1711,"elè":1649,"epp":440,"epo":1017,"epr":5519,"elí":752,"ept":4227,"eps":459,"epu":1513,"es ":411788,"emà":2705,"aís":5446,"equ":6099,"emò":1396,"et ":25747,"erb":3837,"era":74760,"erd":5652,"enà":736,"erc":13759,"erf":4507,"ere":32246,"erg":6703,"eri":45981,"enè":1818,"erl":3041,"enç":6985,"erk":557,"ern":23953,"erm":22973,"erp":2962,"ero":10444,"err":35859,"erq":1670,"ení":2568,"ert":22614,"ers":42235,"esT":439,"erv":13334,"enò":458,"eru":2316,"erz":433,"ery":712,"eu ":35559,"esa":19441,"esc":32880,"esb":561,"ese":21436,"esd":2452,"esg":3019,"esf":926,"esi":12819,"esh":2249,"esm":4178,"esl":1048,"aïl":606,"eso":4807,"aïn":1010,"esn":735,"esq":2957,"esp":45950,"ess":23338,"aïs":2742,"esu":5094,"est":121230,"ev ":1270,"eta":26671,"etc":1311,"ete":15994,"eth":1185,"etg":1896,"eti":12720,"etl":540,"etn":724,"etm":769,"eto":4154,"etr":11973,"ett":2525,"ets":4387,"etu":846,"etx":432,"etz":908,"epú":4257,"ew ":1125,"aó ":858,"eua":1296,"aña":505,"euc":836,"eue":676,"eud":1880,"eug":783,"eui":585,"euj":610,"eum":762,"eul":512,"eun":1000,"eus":15496,"eur":6223,"eut":1400,"euv":478,"eux":1268,"ex ":2741,"eva":19578,"eqü":1946,"erà":2492,"eve":8878,"evi":8750,"erè":7517,"erç":777,"evo":5130,"erí":5636,"erò":8453,"eró":999,"erú":955,"ey ":4222,"ewa":613,"esè":852,"ez ":5223,"esú":770,"exa":3082,"età":1153,"exc":2327,"exf":551,"exe":4662,"exi":4985,"etè":606,"exp":6667,"ext":8657,"etò":822,"exu":806,"etó":2471,"exè":1087,"fa ":5715,"fab":2410,"fac":1990,"fae":522,"fal":2239,"fan":3597,"fam":21118,"far":2707,"fat":794,"fas":1245,"fav":856,"bà ":1286,"fe ":979,"ff ":607,"bàc":497,"bàn":471,"bàs":1680,"feb":4294,"fed":1684,"fec":3709,"feg":553,"fei":1130,"fel":956,"fen":4386,"fem":2144,"fer":21684,"fet":4235,"fes":7134,"feu":1380,"ffa":432,"ffe":606,"ffi":601,"fi ":1082,"fga":590,"fib":922,"fia":3829,"fid":612,"fic":28976,"fie":778,"fig":1714,"fil":14950,"fin":20246,"fir":1021,"fiq":1022,"fit":2051,"fis":1262,"fix":818,"bé ":21222,"fla":1357,"fle":1233,"fli":683,"flo":2541,"bèr":1217,"flu":3428,"bén":800,"bí ":554,"foc":841,"fol":1629,"fon":7008,"for":42476,"fot":1319,"fos":1763,"fou":50634,"fs ":758,"ft ":899,"fra":59098,"fre":3413,"fri":4659,"fro":3635,"fru":1196,"bó ":1017,"fug":662,"ful":1038,"fun":11935,"fur":649,"fut":4817,"fus":2525,"bú ":739,"ga ":19272,"gac":2467,"gad":8463,"gaf":564,"gai":1485,"gam":2445,"gal":6733,"gan":12273,"gas":3274,"gar":8028,"gau":743,"gat":6271,"gav":699,"cà ":4299,"gbi":427,"ge ":14223,"gda":596,"gdi":3267,"càl":452,"càn":1571,"càr":2496,"geg":571,"gei":1282,"gem":444,"gel":5071,"gaè":430,"geo":2538,"gen":27727,"ges":8582,"ger":8998,"geu":553,"get":1515,"gh ":1166,"gi ":2290,"ggi":525,"gha":928,"ght":1061,"gia":10303,"gic":5745,"gie":1046,"gid":6384,"gim":1254,"gil":971,"gio":4566,"gin":10839,"giq":637,"gip":2151,"gis":5461,"gir":3350,"git":6862,"cè ":929,"gla":3610,"gle":14405,"gli":1448,"cèl":1465,"cèn":1040,"glo":1864,"cès":51222,"glu":480,"go ":4475,"gma":570,"gme":1573,"cés":2600,"gió":43533,"gna":5935,"gne":6704,"gni":3825,"gno":1760,"gny":1080,"goc":613,"gog":562,"gol":2625,"gon":15671,"gos":7535,"gor":5349,"got":978,"gov":6468,"glè":7577,"glé":4175,"gs ":2190,"cía":762,"cíc":646,"cíf":1693,"cíl":928,"gra":28539,"gre":13487,"gri":3099,"gnè":749,"gro":1831,"gru":9359,"gto":672,"có ":1079,"gua":16987,"gue":14100,"gud":7181,"gui":8806,"gum":532,"gul":3339,"gun":6066,"gus":2383,"gur":4581,"gut":13910,"grà":3970,"còc":650,"còl":466,"còn":1558,"còm":1025,"gré":941,"còp":598,"grí":718,"gy ":492,"gué":2153,"guè":693,"ha ":12835,"hab":29035,"had":849,"hac":947,"hae":966,"hah":490,"hag":1379,"hai":816,"hal":3284,"hak":573,"han":10626,"ham":3661,"hap":986,"har":8141,"hat":1256,"has":1344,"hav":4880,"hau":1739,"dà ":4550,"he ":9369,"dàr":741,"heb":937,"hea":599,"hec":618,"hei":1380,"hel":2633,"hen":2285,"hem":1268,"heo":584,"her":7245,"het":829,"hes":1479,"hi ":8328,"hib":968,"hia":1191,"hid":2139,"hic":1329,"hie":812,"hig":483,"hil":2191,"hik":471,"hin":3026,"him":1247,"hip":1365,"hio":574,"hir":1774,"hit":1226,"his":10997,"hiv":1397,"hn ":1390,"dèc":1241,"hle":576,"dèm":2035,"dèn":3518,"dès":2865,"ho ":1745,"hma":1072,"hme":479,"déu":772,"dí ":1171,"hod":538,"hoe":482,"hol":1392,"hon":3217,"hom":4461,"hop":470,"hor":4248,"hoq":607,"hot":684,"hos":1136,"hou":928,"dís":800,"ht ":1178,"hra":931,"hre":427,"hri":1581,"hro":903,"hry":475,"hu ":535,"htt":932,"dó ":1006,"hua":543,"hun":546,"hum":4995,"hur":1505,"hus":1570,"dòn":1756,"hy ":459,"hwa":576,"dón":1091,"dós":502,"hya":490,"hyl":446,"ia ":240700,"dús":667,"ib ":828,"ic ":66155,"iac":4872,"iab":1230,"iae":474,"iad":4471,"iag":1233,"iam":3348,"ial":34113,"ian":18657,"ias":1473,"iar":7010,"iat":8115,"eà ":1206,"id ":9736,"iba":5730,"ibe":5840,"ibi":2687,"ibl":5185,"ibn":4092,"ibo":751,"ibr":5119,"ibu":8678,"ie ":17464,"ica":99673,"icc":2791,"ice":5639,"ici":97310,"ich":5290,"ick":1279,"icl":5272,"ico":6687,"ics":16582,"icr":2098,"icu":3925,"ict":15934,"if ":436,"ida":28123,"ide":32628,"idg":508,"idi":6454,"ido":3414,"idr":1982,"ids":10903,"idu":2167,"ig ":10039,"ied":1379,"ieg":716,"iej":698,"iem":565,"iel":2750,"ien":17964,"ies":27602,"ier":8829,"ieu":1133,"iet":7033,"iev":1540,"ifa":1103,"ife":7155,"ifi":14910,"ibè":709,"ifo":4579,"ifr":661,"ifu":733,"ibú":572,"ii ":1495,"iga":11186,"ige":6095,"icà":3585,"igd":3266,"igi":17333,"igh":1474,"igm":515,"igl":1012,"igo":1893,"ign":8509,"icí":959,"igs":552,"igr":1725,"igu":11339,"icò":748,"iha":835,"idè":1574,"ik ":1476,"iid":759,"il ":18226,"ija":779,"ifò":1231,"im ":10274,"eça":920,"ika":1087,"ike":554,"iki":730,"ikh":444,"igè":690,"iko":916,"eçu":667,"in ":16856,"ilb":730,"ila":11336,"igü":807,"ild":1048,"ile":4850,"ilh":868,"ili":25409,"ill":37178,"ilm":879,"ilo":5127,"ilt":532,"ils":3166,"ilv":781,"ilu":637,"io ":7808,"ima":12326,"imb":1419,"ime":26750,"imf":554,"imi":7652,"imm":1734,"imo":8644,"imn":949,"imp":12072,"ims":3094,"imu":1172,"ip ":3014,"ina":55244,"ind":13573,"inc":26929,"inf":7398,"ine":27327,"inh":911,"ing":18368,"ini":21752,"inl":745,"ink":797,"inn":1417,"ino":7464,"inq":1248,"int":40330,"ins":28968,"inv":3498,"inu":3882,"inz":527,"il·":2910,"iny":3648,"ioc":841,"iod":2266,"iog":1116,"iom":1458,"iol":8925,"ion":66529,"iop":546,"ios":3642,"ior":10193,"iot":1969,"iov":765,"ir ":38156,"ipa":13903,"ilà":1296,"ipc":1672,"ipe":1388,"ipi":56300,"ipl":2748,"ipp":1057,"ipo":1719,"ipr":1169,"ilí":1353,"ipt":7598,"ips":1386,"ilò":3165,"ipu":7068,"is ":48738,"imà":684,"iqu":16573,"it ":34854,"ira":12762,"inà":2459,"irc":3627,"ire":12121,"irg":707,"iri":14280,"irl":1148,"inè":2542,"irm":1510,"iro":5273,"irr":1094,"irt":776,"irs":803,"iru":1243,"inó":1010,"iu ":23871,"isa":3230,"isc":7357,"isb":2891,"ise":3763,"isd":800,"isf":544,"isi":12145,"ish":2116,"ism":11247,"isl":2580,"iso":2263,"eïn":1531,"isn":1192,"isq":801,"isp":8390,"iss":11198,"isu":888,"ist":91367,"ita":105273,"itc":439,"ite":18961,"ith":1434,"itg":494,"itj":6011,"iti":5407,"ipè":1346,"ito":13878,"itr":2127,"ipí":486,"itt":1867,"its":13586,"ipò":666,"itu":63810,"itx":1409,"itz":26586,"ity":1073,"eó ":1479,"iue":1082,"iud":581,"ium":2617,"ius":11241,"iur":6384,"iut":18717,"ix ":29909,"iva":16973,"irà":1120,"ive":21240,"ivi":15764,"eòl":734,"ivo":762,"eòr":820,"ivu":434,"iwa":449,"isò":490,"iz ":983,"ixa":7163,"ità":9523,"ixe":14602,"ixi":2409,"itè":770,"ixo":2617,"ixt":502,"itò":478,"iya":1218,"iyy":518,"iza":2122,"ize":674,"izi":565,"izo":617,"ixí":2870,"ixò":1238,"ja ":9811,"jab":521,"jad":1156,"jac":943,"jan":5594,"jam":597,"jap":2013,"jar":2907,"jat":1290,"jas":516,"jec":6374,"jer":640,"ji ":775,"fèr":741,"jo ":1478,"joc":3043,"jol":501,"jor":5738,"jos":562,"jov":1333,"jou":428,"fíc":3185,"fís":2284,"jud":1724,"jue":850,"jug":3096,"jul":4737,"jun":11838,"jur":2180,"jut":652,"jus":1031,"fòs":737,"fòr":1399,"fór":670,"ka ":3378,"kal":645,"kan":1255,"kar":1252,"kat":459,"ke ":1535,"gàn":1108,"gàr":525,"kel":464,"ken":917,"kes":438,"ker":1269,"ket":532,"kh ":635,"ki ":2286,"kha":2001,"kil":538,"kin":1440,"kis":1164,"km ":4428,"gèn":10476,"gèr":686,"ko ":893,"kon":459,"km²":1350,"kot":435,"kov":432,"ks ":761,"kra":491,"ku ":504,"gó ":1930,"kur":755,"ky ":488,"la ":366608,"lab":3695,"lad":10085,"lac":17768,"laf":1186,"güe":3838,"lae":632,"lah":1156,"lag":2894,"laj":467,"güi":457,"lai":5096,"lal":1564,"lan":35918,"lam":7327,"lap":841,"lar":27534,"lat":19752,"las":9334,"lav":5042,"lau":6114,"law":647,"lay":1456,"ld ":2920,"lba":2224,"lbe":2433,"lbi":585,"lbo":638,"lbu":1332,"le ":44349,"lca":2167,"lce":653,"lci":863,"lco":1607,"lcu":700,"lf ":1584,"lda":1491,"hàb":605,"lde":3739,"ldi":977,"hàm":864,"ldo":785,"ldr":572,"leb":5789,"lea":4091,"led":1685,"lec":11774,"lef":726,"leg":10347,"lej":477,"lei":5536,"lel":975,"laç":2967,"len":22888,"lem":19927,"lep":867,"leo":2123,"ler":12983,"güí":1062,"let":9344,"les":102946,"lev":3996,"leu":2726,"lex":3722,"lez":616,"ley":1273,"lfa":1785,"lfo":1309,"lfr":505,"hât":765,"li ":13660,"lga":2207,"lcà":614,"lge":809,"lgi":1101,"lgo":616,"lgr":600,"lgu":6098,"lha":1103,"lhe":555,"lho":784,"lib":7028,"lia":36207,"lid":6442,"lic":25988,"lif":3375,"lie":4063,"lig":7319,"lil":1149,"lik":909,"lin":14087,"lim":5858,"lip":2949,"lio":8726,"lir":2152,"liq":1148,"lit":39579,"lis":20036,"liv":1295,"leò":554,"liu":3960,"lix":612,"leó":1184,"liz":562,"ll ":39351,"lm ":1385,"lla":48510,"lle":42354,"lli":19175,"llo":15655,"lls":9306,"llu":4700,"lly":1261,"lo ":5465,"lma":3091,"lià":4545,"lme":22342,"lmi":745,"lmo":863,"liò":1277,"lob":2604,"lod":773,"loc":12848,"loe":1134,"log":10645,"loi":800,"lol":567,"lon":19003,"lom":4084,"lop":2384,"lor":17064,"loq":527,"lot":4665,"los":9416,"lov":1246,"lou":3952,"lpa":437,"llà":2875,"lpi":844,"lph":454,"llé":435,"llè":1476,"lps":2560,"lló":3244,"ls ":175094,"lqu":497,"lt ":13361,"lu ":594,"lsa":2423,"lse":2148,"lsi":715,"lso":1025,"lst":785,"lta":12959,"lte":4554,"lti":6082,"lto":1778,"lts":3121,"ltr":12243,"ltu":9528,"lub":3271,"luc":4590,"lue":1692,"lug":599,"lui":2022,"lul":2008,"lun":11448,"lum":4460,"lup":4423,"lur":1356,"lut":2289,"lus":5945,"luv":436,"lux":666,"lva":4670,"lve":1388,"lvi":1149,"ly ":1812,"lwa":457,"lsà":1169,"ltà":475,"luè":981,"luí":1350,"lvè":457,"ma ":38293,"mb ":57615,"mac":7166,"mad":8615,"mag":4837,"mai":6164,"maj":4814,"mam":1722,"mal":9961,"man":37306,"map":527,"mas":7207,"mar":29309,"mau":502,"mat":23089,"mav":1042,"maz":573,"ià ":17352,"mba":4524,"mbd":669,"mbe":2096,"mbi":6355,"mbl":3299,"mbo":2954,"mbr":23851,"mbu":1912,"me ":21969,"iàc":438,"iàr":461,"iàt":728,"iàs":758,"mea":647,"mec":1479,"med":6233,"mei":2249,"mem":5597,"mel":3662,"men":163114,"mes":20454,"mer":28498,"meu":1125,"met":15043,"mex":577,"mfi":799,"mbé":16617,"mi ":4707,"mia":9327,"mic":14032,"mie":1315,"mid":3169,"mig":2506,"mil":12628,"mio":566,"min":22171,"miq":948,"mis":7181,"mir":3272,"mit":14514,"mm ":453,"ièn":2714,"ièr":1466,"ièt":1155,"ièv":447,"mo ":1578,"mma":3679,"mme":3188,"mmi":630,"mmo":1110,"mmu":872,"mp ":3415,"mna":954,"mne":950,"mni":680,"mno":552,"moc":3211,"mob":1740,"mod":6269,"mog":841,"mom":1280,"mol":10435,"mon":20326,"mos":4856,"mor":11845,"mou":1279,"mot":4140,"mov":2586,"mpa":7026,"mpe":12187,"mph":615,"mpi":8423,"mpl":13140,"mpo":20484,"mpr":9709,"mpt":6437,"mps":5011,"mpu":1876,"ms ":6835,"nTo":847,"mst":734,"mta":3202,"mte":3651,"ió ":158662,"mud":478,"mul":5494,"mun":70097,"mus":5896,"mur":1824,"mut":620,"iòd":522,"iòf":670,"iòp":1205,"my ":586,"ión":1227,"iós":1780,"na ":220961,"nc ":7524,"nab":616,"nad":17767,"nac":13835,"nae":935,"nag":1094,"nai":2426,"nal":38916,"nak":484,"nan":5493,"nam":5460,"nap":613,"nar":17101,"nat":30685,"nas":9688,"nav":3059,"nau":2292,"naz":561,"nay":822,"jà ":1387,"nd ":7816,"nbe":579,"nbu":1232,"ne ":22377,"nca":14539,"nce":16829,"nch":3145,"nci":64974,"ncl":9885,"nco":4144,"ncr":1949,"ncs":1443,"ncu":1505,"ncy":517,"nda":23869,"nde":16620,"ndi":19292,"ndo":6191,"nds":943,"ndr":11684,"ndu":3915,"ng ":11247,"neb":984,"nea":2648,"ned":2947,"nec":3698,"nef":504,"nee":1092,"neg":18439,"nei":6432,"nel":3597,"nen":12397,"nem":2849,"neo":1410,"ner":33907,"net":5899,"nes":34659,"nev":447,"neu":7424,"nex":705,"nez":789,"ney":938,"nfa":1307,"nfe":3228,"nfi":1231,"nfl":2773,"nfo":4729,"nfr":1766,"ni ":20144,"nga":4787,"nge":7038,"ngh":831,"ngi":4881,"ngl":12728,"ncè":50834,"ngo":3054,"ngr":3118,"ngt":766,"ngs":1374,"ngu":13250,"nha":1988,"nhe":494,"ndà":979,"ndé":554,"ndè":4617,"ndí":543,"nk ":1042,"ndú":819,"nib":664,"nia":53223,"nid":9692,"nic":86188,"nif":4650,"nie":3921,"nig":794,"nil":1584,"nik":481,"neç":754,"nin":4394,"nim":9359,"nip":464,"nio":2632,"nir":5941,"niq":1948,"nit":24686,"nis":21391,"niv":8065,"niu":1270,"nja":2333,"njo":868,"nju":4341,"nka":688,"nki":503,"nn ":1976,"ngü":3506,"nla":582,"nll":1177,"no ":17934,"nma":725,"nià":880,"nme":466,"nió":3827,"nna":2687,"nne":6803,"nni":2838,"nno":1064,"nob":2525,"nod":871,"noc":1416,"nof":494,"nog":1211,"noi":1668,"nol":3970,"non":1881,"nom":44404,"nop":1021,"nor":21716,"not":4432,"nos":4014,"nov":10569,"nou":2451,"nlà":603,"ns ":93685,"nqu":5542,"nt ":218860,"nre":1636,"nri":1763,"nu ":885,"nsa":6315,"nsc":2889,"nsf":1150,"nse":13629,"nsh":524,"nsi":13645,"nsl":626,"nsk":598,"nsm":857,"nsp":2161,"nso":4459,"nst":17890,"nsu":4044,"nta":57284,"ntc":442,"ntb":438,"nte":45205,"ntg":483,"nti":34736,"nth":1782,"ntm":1132,"nto":12026,"ntp":552,"nts":52368,"ntr":44179,"ntu":4635,"nua":2471,"nuc":3125,"nue":1727,"nul":593,"nun":1667,"num":4207,"nur":444,"nut":1033,"nus":3661,"nva":693,"nve":10382,"nvi":4155,"nvo":5353,"ny ":54383,"nz ":509,"nxa":1082,"ntà":2307,"nté":1802,"ntè":783,"ntí":4716,"ntó":8247,"nya":32997,"nye":6811,"nyi":2590,"l·l":30097,"nyo":10078,"nys":9599,"nza":1097,"nze":1172,"nzi":1387,"nzo":563,"nyà":880,"oa ":1401,"ob ":526,"oc ":11621,"oac":453,"oad":649,"oal":1685,"oan":4480,"oaq":609,"oat":570,"od ":1121,"oba":13189,"obe":6752,"obj":3414,"obi":3723,"obl":18679,"obo":674,"obr":20373,"obt":2022,"obs":1595,"obu":764,"oe ":493,"oca":16830,"occ":10555,"oce":9769,"oci":14079,"och":2121,"ock":2140,"ocl":1591,"oco":3039,"ocs":5711,"ocr":3188,"ocu":5515,"oct":6615,"of ":4550,"oda":2665,"ode":17239,"odi":7067,"odo":3984,"odr":1784,"odu":8140,"og ":698,"oem":738,"oel":520,"oen":1465,"oes":8459,"oet":3024,"ofa":494,"off":580,"ofe":5675,"ofi":6735,"ofo":434,"ofr":707,"oft":679,"ofu":793,"oi ":1289,"oga":2429,"oge":2507,"ogi":7936,"océ":1787,"ocè":1333,"ogl":1025,"ogo":1358,"ogn":1130,"ogr":9571,"ogu":1863,"oha":1268,"ohe":725,"ohi":598,"ohn":1581,"oho":474,"ok ":646,"oia":2098,"oic":468,"oid":2341,"oig":744,"oin":2067,"ois":3399,"oir":4439,"oit":531,"oix":942,"ol ":30372,"oja":1081,"oje":1839,"ojo":1630,"om ":61645,"on ":37263,"ola":15384,"old":2685,"olc":987,"olf":1900,"ole":8236,"oli":14768,"oll":8132,"olk":744,"olm":858,"olo":16547,"olt":11680,"ols":5388,"olu":10825,"oly":514,"oma":23743,"omb":8927,"ome":25817,"omi":12024,"omm":3091,"omo":5351,"omp":30868,"oms":1766,"omu":8862,"omt":6651,"op ":5673,"ona":74755,"ond":10218,"onc":9281,"onf":5042,"one":36322,"ong":9559,"onj":5102,"oni":20927,"onn":3651,"ono":7822,"onr":882,"onq":1433,"ont":36749,"ons":68141,"onv":3868,"onu":4027,"onz":1423,"ol·":4740,"ony":2654,"ood":947,"ook":705,"ool":561,"oon":456,"oop":682,"oor":1244,"oot":626,"or ":70900,"opa":6157,"olà":508,"ope":9789,"oph":1988,"opi":6583,"olè":1083,"opl":744,"olç":548,"opo":12014,"opr":1132,"olí":15446,"opt":2194,"ops":1309,"olò":6420,"opu":4511,"olú":505,"os ":45596,"omà":6487,"omé":2932,"omè":1744,"oqu":4205,"omò":1267,"ot ":16595,"m² ":1388,"omú":1077,"orb":2892,"ora":25812,"ord":38310,"onà":1051,"orc":5879,"orf":1857,"ore":13865,"org":12794,"ori":40355,"onè":4899,"orl":959,"ork":2130,"orn":8044,"orm":37822,"orp":2115,"oro":4638,"orr":12139,"orq":2088,"oní":6153,"ort":34195,"ors":17718,"onò":2272,"oru":2747,"ory":799,"ou ":62701,"osa":16929,"osc":5383,"ose":10173,"osg":602,"osf":1074,"osi":11689,"osh":645,"osm":641,"osl":529,"oso":4882,"osp":1959,"oss":9976,"osu":464,"ost":28273,"ov ":962,"ota":15314,"ote":11949,"oth":1551,"otg":555,"oti":2918,"opè":600,"oto":6053,"otr":1076,"ott":1806,"ots":4720,"opò":2183,"otu":509,"otx":851,"otz":628,"ow ":986,"oua":574,"ouc":609,"oub":998,"oue":927,"oud":487,"oug":838,"oui":1328,"oul":1643,"oun":1441,"oup":535,"ous":2944,"our":6788,"out":1242,"ouv":831,"oux":592,"ox ":683,"ova":10310,"orà":1216,"ove":22419,"ovi":10137,"orè":686,"orç":2260,"ovo":1045,"orí":1417,"oy ":974,"owe":632,"osé":1584,"own":771,"osí":520,"ows":544,"oz ":585,"oxa":515,"otà":1202,"oxi":2953,"otè":716,"otí":765,"oya":1044,"oza":526,"ovè":610,"ové":1028,"ozo":576,"oví":8946,"pa ":9739,"pad":1653,"pac":5079,"pag":1729,"pai":2828,"pal":13281,"pan":17440,"pam":2029,"pap":1804,"par":94459,"pat":8342,"pas":6344,"pav":979,"pau":650,"là ":15487,"pe ":1865,"pci":3487,"làb":466,"làm":772,"làn":3621,"làs":2212,"pea":2241,"ped":2950,"pec":9867,"pee":640,"pei":3947,"pel":25143,"paç":459,"pen":10267,"per":146616,"paí":2905,"pet":9548,"paï":1343,"pes":3904,"peu":2601,"pañ":529,"pez":544,"ph ":863,"pi ":50107,"pha":1431,"phe":861,"phi":1300,"pho":1016,"phy":655,"pia":4024,"pid":2120,"pic":8010,"pie":3141,"pig":652,"pil":2968,"peç":675,"pin":8423,"pio":3260,"pir":3302,"piq":784,"pit":8813,"pis":9463,"lé ":469,"lça":1180,"pla":15092,"lèc":1983,"ple":12246,"pli":6764,"plo":3236,"lèn":3270,"lès":10249,"plu":1347,"po ":810,"lés":4374,"pió":810,"lí ":2941,"pob":11436,"poa":620,"pod":6800,"poc":4020,"poe":3561,"pog":871,"pol":20332,"pon":13711,"pom":605,"pop":3253,"por":25781,"pot":6581,"pos":25283,"ppe":1145,"plà":837,"ppi":516,"ppo":488,"ps ":11507,"líd":807,"líc":6419,"lín":4234,"lím":5029,"líq":803,"lít":11654,"lís":1456,"lív":799,"pra":3353,"pre":34824,"pri":25834,"pro":63491,"poà":783,"pse":893,"psi":2653,"pta":8281,"pte":6450,"pti":2623,"pto":6121,"ptu":1270,"ló ":4588,"pub":5108,"pug":578,"pul":6310,"pun":3708,"pur":2968,"put":7580,"pus":5848,"prà":1039,"lòg":4388,"pré":7343,"lòm":2228,"prè":1483,"lòn":2075,"prí":1326,"lòs":3229,"prò":1685,"lúm":491,"lús":483,"mà ":8683,"màl":657,"màn":3002,"màr":618,"màq":700,"màt":5607,"màs":464,"màx":1377,"mèd":1307,"mèn":1100,"mèr":3763,"mèt":1992,"mès":692,"més":26164,"mí ":1247,"míf":815,"míl":19165,"mín":758,"mís":442,"mít":521,"mó ":601,"qua":27959,"que":179626,"qui":29966,"mòb":818,"mòc":660,"mòn":1162,"mòr":663,"món":3556,"mós":1051,"quà":681,"qué":942,"què":5556,"quí":5413,"mú ":1104,"ra ":117928,"múl":441,"mús":4126,"rb ":573,"rc ":4562,"rab":6001,"rad":25715,"rac":27295,"raf":5098,"rae":1650,"rah":1040,"rag":7813,"raj":1208,"rai":2449,"ral":36833,"rak":919,"ran":117404,"ram":13092,"rap":2196,"rao":812,"rar":11553,"raq":935,"rat":34383,"ras":9366,"rav":6126,"rau":4831,"rax":445,"raz":685,"ray":988,"rd ":28176,"nà ":1344,"rba":4175,"rbe":2172,"rbi":3110,"rbo":2571,"rbr":1395,"rbu":1357,"re ":127856,"rca":15228,"rce":16884,"rch":2208,"rci":9579,"rcl":628,"rco":1658,"rcs":525,"rcu":4447,"rf ":586,"rda":5771,"rde":7271,"rdi":7800,"nàm":532,"nàl":791,"rdo":4676,"nàs":563,"rds":1314,"rdr":6511,"nàr":2379,"rdu":1144,"rg ":8132,"reb":7840,"rea":19325,"red":6383,"rec":25629,"ref":7131,"ree":3093,"reh":637,"reg":59431,"rej":893,"rei":14923,"rel":14087,"raç":1275,"ren":38502,"nán":562,"rem":10413,"rep":9263,"reo":750,"rer":13581,"req":1335,"ret":28281,"res":90579,"raï":1274,"rev":5388,"reu":8404,"raó":605,"rez":1149,"rey":666,"rh ":437,"rfa":571,"rfe":986,"rbà":1165,"rfi":689,"rfo":609,"rbó":588,"ri ":38233,"rga":12320,"rge":7141,"rgi":4873,"rgo":1906,"rcí":934,"rgu":2380,"rha":441,"rdà":2437,"rdí":525,"rk ":3017,"nç ":1220,"rib":13766,"ria":54826,"rid":11934,"ric":41587,"rif":1883,"rie":19265,"rig":19048,"rii":559,"ril":7707,"rik":753,"rin":25425,"rim":24490,"rip":8142,"rio":15451,"rir":3990,"riq":1644,"rit":24233,"ris":22414,"riv":4321,"riu":12192,"rix":568,"riz":963,"rl ":840,"rja":462,"rfí":2906,"né ":642,"rm ":573,"nça":14937,"rka":439,"rgà":1140,"rke":572,"rki":433,"rgè":459,"nço":1204,"rn ":10052,"rla":7357,"nèc":538,"rle":3694,"rld":647,"nèi":452,"rli":990,"rlo":1299,"nès":8870,"nèr":476,"nèt":1405,"ro ":9618,"rma":33523,"rià":2590,"rme":22120,"rmi":5557,"néi":2057,"riè":519,"rmo":1879,"nét":629,"nés":466,"riò":721,"rmu":2846,"rió":532,"rp ":429,"rna":16135,"rne":8134,"rni":4677,"rno":1290,"rns":1273,"rnu":487,"ní ":1501,"rob":14119,"roa":1438,"rod":10472,"nçà":569,"roc":11373,"rof":6472,"roe":762,"roh":627,"rog":6246,"roj":2089,"roi":2600,"rol":6688,"ron":19875,"rom":14247,"rop":23195,"roo":664,"ror":1191,"roq":1246,"rot":11015,"ros":8555,"rov":19098,"rou":2184,"rox":1912,"row":590,"nçó":919,"roy":445,"rpa":569,"rpe":904,"rpi":853,"rpo":1802,"rlí":799,"rpr":1467,"rs ":43553,"rmà":5015,"rmè":811,"nín":1769,"ním":5187,"nís":1535,"rqu":17402,"rt ":37622,"rra":26732,"rre":30205,"rná":562,"rri":15516,"rro":7545,"rní":499,"rru":892,"rry":820,"ru ":895,"rsa":4205,"rse":7640,"rsh":486,"rsi":8955,"rso":11182,"sTo":1591,"rst":783,"rsu":641,"rta":61936,"rte":6840,"rti":28399,"rth":2873,"rto":4054,"rts":5527,"rtr":804,"rtu":4238,"nó ":1101,"rub":519,"rud":719,"ruc":5530,"rue":1673,"rug":2246,"rui":2261,"rul":696,"run":2378,"rum":3191,"rup":10424,"rur":944,"rut":1660,"rus":7244,"ruz":521,"rva":5037,"rve":5065,"rrà":1253,"rvi":4488,"nòm":2605,"nòn":672,"rrò":1005,"ry ":4327,"rxa":1731,"rxe":465,"rtà":1217,"rxi":2115,"rtí":4136,"ruï":2409,"rza":620,"rze":469,"sa ":49499,"núm":1489,"sc ":5158,"sac":3907,"sab":2777,"sad":5098,"sag":1808,"sai":1232,"sam":2513,"sal":7035,"san":7869,"sap":1258,"sas":1691,"sar":9997,"sau":1965,"sat":8262,"sav":1021,"say":540,"sba":1107,"sbe":2109,"sbo":842,"sbu":1086,"se ":23004,"sca":10216,"sce":2797,"sci":2911,"sch":2851,"scl":1633,"sco":15125,"scr":15598,"scu":12352,"oàc":1284,"sde":2503,"sdi":860,"sea":449,"sec":4985,"see":524,"sed":628,"seg":26346,"sei":1192,"sem":9727,"sel":10402,"sen":30737,"seq":1206,"sep":5994,"ses":14361,"ser":50030,"seu":29396,"set":8719,"sev":20050,"sey":497,"sex":1181,"sh ":2994,"sfa":531,"sfe":1589,"sfo":1332,"si ":8535,"sge":646,"scà":439,"sgl":4161,"scó":611,"scò":901,"sha":1765,"she":777,"shi":2827,"sho":1034,"shu":449,"sk ":596,"sia":14705,"sic":17879,"sib":3043,"sie":1616,"sid":12721,"sig":8275,"sif":2038,"sim":6186,"sil":5083,"sio":8144,"sin":7331,"siq":657,"sis":13553,"sir":1327,"siu":1368,"sit":64187,"siv":1692,"ska":1514,"ske":504,"ski":1332,"sla":3540,"sle":675,"sli":610,"sll":456,"slo":1071,"so ":2581,"sma":1501,"sme":14571,"sià":1711,"smi":1069,"smo":829,"sió":16559,"sne":1493,"sni":622,"soc":7991,"sob":11796,"sod":622,"sof":3156,"som":1185,"sol":10431,"son":13880,"sop":1090,"sos":16303,"sor":9876,"sou":838,"sot":4932,"sov":2577,"spa":16778,"slà":1095,"spe":11857,"spi":4099,"spl":1010,"spo":10340,"spr":7848,"spu":5277,"ss ":1800,"squ":6763,"st ":39126,"sra":955,"su ":660,"ssa":20530,"sse":19728,"oïd":505,"ssi":25382,"sso":13917,"tTo":499,"ssu":1701,"sta":101622,"spà":903,"ste":42113,"sth":493,"sti":43697,"spè":11910,"sto":9599,"str":59818,"sts":2256,"stu":8027,"sua":2241,"suc":3183,"sub":8739,"sue":1081,"sud":11949,"suf":817,"sum":2161,"sul":6935,"sun":568,"sup":9176,"sus":1301,"sur":6523,"svi":496,"sy ":573,"swa":456,"ssà":1360,"ssí":918,"stà":12114,"stè":2045,"sté":688,"stí":1611,"stò":9473,"stó":429,"stú":635,"ta ":122424,"tc ":1199,"tab":6420,"tad":11316,"tac":14485,"taf":1085,"tae":440,"tag":3020,"tai":1997,"tal":64634,"tak":604,"tan":69063,"tam":65119,"tap":1329,"tar":32566,"taq":429,"tat":102986,"tas":2466,"tav":5072,"tau":2716,"tax":624,"pà ":654,"tba":556,"tbo":5303,"te ":35517,"tch":930,"pàg":896,"pàn":607,"pàs":516,"teb":673,"tea":3306,"ted":1805,"tec":11537,"teg":9002,"tej":688,"tei":11170,"tel":17386,"ten":56121,"tem":24684,"tep":736,"teo":2790,"ter":78366,"tet":1273,"tes":39189,"tev":1479,"teu":1106,"tex":1880,"th ":3201,"ti ":4614,"tge":13266,"tha":2284,"the":5645,"thi":1651,"tho":1884,"thr":609,"thu":1372,"thy":461,"tib":1285,"tia":9331,"tid":5028,"tic":56853,"tif":3870,"tie":2340,"tig":8482,"til":13513,"tin":20966,"tim":7466,"tip":6726,"tio":6592,"tir":9312,"tiq":5351,"tit":28398,"tis":7207,"teï":834,"tiv":16038,"teò":848,"tiu":13608,"tja":6493,"pça":563,"tla":862,"pèc":11863,"tle":1839,"pèd":1199,"tli":456,"tll":1435,"pèl":878,"to ":6648,"tma":1238,"tià":1107,"tme":1776,"tmo":629,"tiò":607,"tió":1083,"tp ":925,"tna":665,"tjà":1175,"tni":849,"pí ":547,"tob":475,"tod":2373,"toc":2978,"tog":2089,"toi":886,"tol":9991,"ton":11361,"tom":4842,"top":2299,"tor":49989,"tot":12646,"tos":4059,"tov":523,"tou":627,"tpe":793,"tlà":1789,"ts ":123279,"pít":678,"tt ":1057,"tra":52849,"tre":76882,"tri":41278,"tro":24672,"tru":10105,"tu ":878,"tsa":465,"tse":1279,"tsi":484,"tsu":500,"tta":2267,"tte":2664,"tti":1561,"tto":1020,"ttp":937,"pó ":1893,"tub":5389,"tua":62128,"tud":8635,"tuc":2658,"tue":1514,"tug":2149,"tui":769,"tul":1604,"tun":4869,"tum":1676,"tur":31002,"tut":3095,"tus":3479,"tx ":864,"trà":2278,"pòn":1874,"trí":1078,"pòs":948,"tró":603,"trò":2082,"ty ":2018,"tz ":1159,"txa":1326,"txe":1510,"txi":632,"tuà":944,"tuï":1466,"tza":24199,"tze":2838,"ua ":10767,"púb":6544,"tzà":645,"ub ":3172,"uc ":2667,"uac":2232,"uad":11839,"uai":1953,"uam":682,"ual":28714,"uan":11822,"uas":561,"uar":7084,"uat":47989,"ud ":18502,"uba":1775,"ubd":594,"ubc":1131,"ubf":937,"ube":2183,"ubj":552,"ubi":3012,"ubl":6690,"ubm":494,"ubo":616,"ubr":5366,"ubt":888,"ubs":3721,"ubu":437,"ue ":126160,"uca":3754,"ucc":7080,"uce":759,"uci":9562,"uch":1395,"uck":584,"ucl":3075,"uco":864,"ucr":1695,"ucu":635,"uct":6590,"uf ":614,"uda":11060,"ude":3855,"udi":9915,"udo":1171,"uds":473,"uec":1219,"ueb":903,"ued":1823,"ueg":856,"uei":5164,"uel":7936,"ueo":992,"uen":7564,"ues":47764,"uer":15228,"ueu":754,"uet":3253,"uev":836,"uez":760,"uff":474,"ufi":449,"ufr":480,"ui ":5837,"uga":5261,"ugb":506,"uge":1945,"ugi":891,"ugh":677,"ugm":498,"ugo":826,"ugu":4941,"uha":903,"udò":503,"uk ":502,"uia":3917,"uic":515,"uie":813,"uid":2598,"uig":1442,"uim":1036,"uil":5429,"uio":761,"uin":7811,"uip":3162,"uis":4149,"uir":5469,"uit":17969,"uiv":1075,"uix":2053,"ul ":3717,"uja":1612,"um ":10025,"uka":443,"un ":192982,"ula":29038,"uhà":853,"ulc":505,"ulf":473,"ule":5627,"ulg":1229,"uli":10081,"ull":4486,"ulm":1227,"ulp":452,"ulo":1383,"ult":17665,"uls":2133,"ulu":974,"uma":4588,"umb":1839,"ume":9875,"umi":2916,"umo":867,"umn":844,"ump":546,"ums":1051,"umu":683,"up ":7583,"una":115810,"und":14739,"unc":5934,"une":6232,"ung":1717,"uni":75336,"unk":469,"unn":474,"uno":472,"unt":18187,"uns":8577,"uny":15412,"ul·":549,"ur ":10863,"upa":8358,"upe":8274,"upi":692,"upo":2679,"upr":678,"ulí":685,"ups":1372,"us ":57417,"umà":929,"uqu":973,"ut ":22574,"urb":2707,"ura":45952,"urd":1647,"urc":2054,"ure":17552,"urg":6124,"uri":8027,"urk":531,"urn":1964,"uro":9811,"urr":1213,"urq":1132,"uní":880,"urt":4141,"urs":6415,"uru":1680,"ury":571,"usa":7168,"usc":1878,"use":5260,"usi":6699,"ush":829,"usk":567,"uso":806,"uss":4155,"usu":2167,"ust":14137,"utb":4938,"uta":29096,"ute":2409,"uth":1062,"uti":9123,"utl":486,"uto":7896,"utr":950,"utt":722,"uts":3313,"utu":1063,"utx":493,"ux ":3488,"uva":445,"urà":594,"uve":1383,"uvi":1205,"urí":1257,"uró":857,"uz ":716,"utà":1102,"uxe":777,"utò":2459,"uza":451,"va ":106023,"vad":4853,"vac":2535,"qüe":1179,"vag":486,"vai":1691,"val":13789,"van":14175,"vam":2174,"var":7823,"vat":3912,"vas":1634,"rà ":3933,"ve ":4089,"ràc":5104,"ràb":602,"ràd":560,"ràf":3237,"ràm":664,"ràl":1743,"ràn":2104,"ràp":855,"ràs":1116,"ràr":841,"ràt":2402,"ved":590,"vec":630,"veh":638,"veg":5362,"vei":3281,"vel":8498,"qüè":1315,"ven":21441,"vem":4525,"ver":38422,"vet":1227,"ves":14888,"veu":1565,"vi ":3211,"rç ":5332,"via":10597,"vid":9383,"vic":2175,"vie":3820,"vig":1092,"vil":8533,"vin":8553,"vim":3021,"vio":1545,"vir":3660,"vit":4613,"vis":14610,"veï":765,"viv":1644,"viu":4994,"ré ":1046,"rça":2105,"rèc":1336,"règ":503,"rèi":642,"rèn":5308,"rès":5304,"vo ":1039,"vià":560,"viè":1375,"rés":8781,"rí ":2419,"voc":3261,"voi":1301,"vol":16015,"von":1417,"vor":4142,"vot":1057,"ría":1669,"ríd":607,"ríc":752,"ríg":1189,"rín":2502,"río":3512,"rít":3404,"rís":5081,"vre":1338,"rò ":7556,"ró ":3651,"vui":3195,"vul":923,"ròn":2822,"ròq":622,"ròp":2137,"ròs":681,"rós":546,"rú ":1201,"wa ":1404,"rús":665,"wal":701,"wan":1387,"war":2529,"wat":497,"way":601,"sà ":1407,"we ":460,"sàc":1251,"sàn":515,"web":921,"wer":676,"wig":486,"win":579,"sè ":545,"sé ":1728,"wn ":649,"sèn":1110,"sèr":3720,"sí ":864,"ws ":526,"sím":1023,"síl":458,"sín":504,"sís":559,"só ":937,"ww ":778,"sòl":1221,"són":13869,"www":774,"xa ":8099,"sús":797,"xad":949,"xac":781,"xan":2894,"xam":598,"xar":2283,"xat":539,"xas":500,"tà ":13676,"xe ":713,"xce":1029,"xcl":664,"tàc":678,"tàl":3228,"tàn":10732,"tàr":2876,"tàt":701,"xeb":700,"xec":2213,"xel":999,"xen":5829,"xem":5052,"xer":5848,"xes":1826,"xfu":529,"xi ":673,"xia":768,"xid":1180,"xic":3203,"xig":486,"xil":1324,"xin":3485,"xim":3896,"xip":901,"xit":1472,"xis":3718,"xiu":786,"tè ":945,"té ":7771,"tèc":1878,"tèg":457,"tèl":670,"tèn":2186,"tès":574,"tèr":1721,"tèt":687,"tén":954,"xió":452,"tí ":7856,"xon":1279,"xos":2744,"xpa":461,"xpe":1742,"xpl":1855,"xpo":1220,"xpr":1563,"tíc":1597,"tíf":2180,"tín":1414,"típ":1255,"tít":4017,"tís":1777,"xt ":1229,"xte":2775,"xti":2038,"xto":453,"xtr":2818,"tó ":11716,"xua":820,"tòg":462,"tòm":658,"tòl":2436,"tòn":8389,"tòr":10789,"ya ":28447,"túr":886,"yad":2319,"yam":961,"yal":1710,"yan":1672,"yar":1667,"yat":1531,"uà ":578,"ye ":706,"uàn":829,"yen":2530,"yes":3123,"yer":2450,"yia":1497,"yin":895,"uè ":4716,"ué ":2510,"yn ":760,"yla":494,"uèc":777,"uèi":776,"yll":431,"uèn":1009,"uès":1293,"yo ":645,"ués":652,"uí ":2229,"yol":7324,"yon":647,"yos":583,"yor":2318,"ys ":10504,"uím":3096,"uín":428,"uís":1893,"yra":458,"uïd":2904,"uïs":1175,"uït":4233,"yst":691,"yya":579,"za ":5587,"zad":5095,"zac":6060,"zal":446,"zak":429,"zan":2462,"zam":487,"zar":4633,"zat":5982,"ze ":2627,"vàn":432,"zel":560,"zen":1838,"zem":672,"zer":1157,"zi ":986,"zia":485,"zil":1095,"zin":652,"zim":483,"vé ":1312,"vèn":473,"vèr":780,"zo ":1020,"vés":2039,"zon":6127,"víd":553,"vín":8895,"zz ":445,"xèr":1610,"xí ":2939,"xò ":1228,"xòn":572,"yà ":951,"zà ":697,"アア":1081,"ης ":436,"ος ":1575,"·la":8064,"·le":5700,"·li":4957,"·lo":1202,"·lu":2994,"·lí":5910,"Àfr":1807,"Àsi":1439,"Àus":1017,"És ":10713,"Ésa":437,"Índ":3364,"Òlt":635,"àbi":1441,"àbr":1178,"àci":8619,"àct":3420,"àdi":1055,"àfi":3296,"àfr":638,"àgi":1334,"àlb":937,"àla":554,"àle":596,"àli":6485,"àmb":1406,"àme":878,"àmi":2273,"àmp":626,"àmm":855,"ànd":2746,"ànc":3868,"àni":16801,"ànt":2743,"àns":473,"àl·":933,"àpi":984,"àpo":807,"às ":2023,"àqu":1070,"àra":3012,"àre":2240,"àri":9009,"àrt":1073,"àrr":2164,"àsi":1729,"àsq":664,"àst":2034,"àss":2414,"àti":10227,"àto":823,"àvi":811,"àxi":1936,"án ":1347,"ánd":563,"âte":655,"ão ":540,"ça ":14258,"çad":1687,"çam":1930,"çal":758,"çan":2501,"çat":1213,"çar":1761,"çon":901,"ços":820,"çue":665,"èca":1286,"èci":14442,"ècn":2007,"ècu":627,"èct":1398,"èdi":3114,"èi ":602,"ègi":1097,"èix":1072,"èn ":1343,"èla":841,"èlg":684,"èle":454,"èli":1252,"èmi":2546,"ène":8766,"ènc":20961,"èni":2309,"ènt":776,"èl·":1637,"èpo":1872,"ès ":80199,"èra":876,"ère":1653,"èrc":1864,"èri":10984,"èrn":556,"èrt":486,"èrs":756,"èsa":561,"èsi":1364,"èst":608,"èti":5012,"èto":1124,"ètn":703,"ètr":730,"èvr":822,"èxi":2818,"ée ":805,"éix":2212,"én ":1549,"éns":1755,"és ":232119,"ét ":506,"éra":478,"ére":495,"éri":684,"éu ":1664,"ési":4228,"éss":1399,"çà ":741,"çó ":1068,"ía ":2053,"íac":872,"íad":461,"íbl":475,"íbi":489,"íci":5839,"ícl":678,"íco":662,"ícu":6130,"íct":496,"íde":1426,"ídi":1300,"ífe":1270,"ífi":4217,"íge":870,"ígi":569,"ígu":457,"ín ":2860,"íli":20926,"ímb":929,"íme":625,"ími":9376,"ímp":3941,"índ":1015,"ínc":10455,"íne":431,"íni":4688,"ínt":657,"íns":1631,"íod":3531,"ípi":1480,"ís ":12059,"íqu":1045,"íri":1722,"ísi":3436,"íst":7167,"íti":15581,"ítr":512,"íto":5091,"ívi":958,"ívo":458,"ïda":2603,"ïde":848,"ïll":554,"ïna":1371,"ïne":928,"ït ":3336,"ïso":2690,"ïss":1050,"ïta":563,"ïts":737,"ña ":779,"òbi":1083,"òci":1146,"òcr":1177,"òdi":1030,"òfi":1393,"òfo":870,"ògi":4468,"ògr":1140,"òl ":442,"òle":2548,"òli":3897,"òmb":968,"òme":2032,"òmi":3820,"òni":18460,"òno":2682,"òns":1135,"òpe":1178,"òpi":2718,"òpo":1001,"òpt":1029,"òps":647,"òs ":2570,"òqu":909,"òrg":1217,"òrd":960,"òri":12552,"òrn":1226,"òrs":1069,"òsi":753,"òso":1297,"òst":751,"òss":949,"òti":1705,"òvi":472,"òxi":771,"ón ":21967,"óna":1070,"ós ":5791,"órm":2361,"úbl":6781,"úli":431,"últ":1838,"úmb":502,"úme":1155,"úni":3359,"ús ":3526,"úri":1851,"úsc":672,"úsi":4110,"úst":870,"úss":1836,"üen":1610,"ües":3017,"üèn":1315,"üís":1030},"n_words":[42630400,51577449,41251739],"name":"ca"}
diff --git a/nlp_resource_data/langdetect/profiles/cs b/nlp_resource_data/langdetect/profiles/cs

new file mode 100755 (executable)

index 0000000..e46e24f
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/cs
@@ -0,0 +1 @@
+{"freq":{"D":14883,"E":9868,"F":10603,"G":9505,"A":22558,"B":22848,"C":16455,"L":15978,"M":26754,"N":16476,"O":10801,"H":14139,"I":10554,"J":20213,"K":19386,"U":5533,"T":19206,"W":5350,"V":19456,"P":31985,"S":35760,"R":13848,"Y":1260,"Z":7679,"f":46799,"g":61772,"d":331353,"e":908926,"b":175631,"c":304749,"a":764805,"n":728035,"o":890229,"l":442609,"m":328621,"j":233650,"k":440118,"h":259891,"i":521717,"w":9425,"v":451146,"u":342714,"t":548551,"s":514164,"r":514188,"q":1288,"p":289793,"z":213126,"y":191798,"x":11788,"í":321621,"é":145384,"á":236804,"ý":129650,"ú":13969,"ó":5647,"ě":138011,"ď":2560,"Č":7702,"č":98194,"ř":105343,"Ř":1405,"ň":6555,"Ž":1649,"ž":81570,"ť":3583,"Š":3572,"š":60015,"ů":48717," l":32594," m":62986," n":107131," o":79037," h":32176," i":15946," j":127655," k":89036," d":65217," e":11656," f":19925," g":5629,"р":1443," a":90604,"с":1165," b":47368," c":20381," z":85758," u":20743," t":59923," w":1272," v":146123," p":164476," s":159790," r":52674," J":20168," K":19243," H":14014," I":10456," N":16390," O":10658," L":15894," M":26594," B":22644," C":16189," A":22443," F":10496," G":9381," D":14695," E":9809," Z":7643," Y":1252,"и":1928,"о":2213,"н":1523," S":35443," R":13762,"в":1172," P":31807,"а":2293," W":5251," V":19363," U":5519,"е":1554," T":19078," ú":11862," č":34240," Č":7692," ž":9995," Ž":1647," Š":3567," š":7349," ř":11127," Ř":1403,"A ":3554,"Da":2148,"Co":3211,"Ce":1274,"Ch":3956,"Do":2726,"De":2027,"Di":1704,"Ev":1845,"Ge":1462,"Ga":1606,"I ":2501,"Fr":2934,"Fo":1409,"Fi":1671,"C ":2405,"Au":1413,"Ar":2392,"As":1426,"D ":1568,"Ba":3474,"Am":1706,"An":3282,"Al":3092,"By":2085,"Bu":2103,"Br":3950,"Ca":2582,"Bi":1362,"Be":3232,"Bo":3007,"Ku":1175,"Kl":1430,"Kr":2544,"Ko":4086,"Le":3365,"Li":3491,"La":2847,"Lu":1426,"Lo":2548,"Me":3379,"Mi":4015,"O ":1633,"Ma":8187,"Mo":4597,"Ni":1492,"Ne":3393,"Na":4223,"P ":1532,"No":2609,"Ob":1767,"Gr":1729,"Ha":2931,"He":2201,"Ho":2992,"Hr":1212,"In":2920,"Ja":4326,"L ":1226,"Ji":2201,"Je":8576,"Jo":1796,"Ju":1244,"Ka":5031,"M ":1302,"Tu":1161,"Tr":2534,"To":2275,"Th":2120,"Ti":1361,"Te":3239,"Ně":1339,"Ta":2435,"V ":4585,"St":6172,"Sv":1697,"Su":1422,"Wi":1615,"Wa":1205,"Vy":1686,"Vo":1294,"Vi":2012,"Va":1654,"Ve":3321,"Pr":7552,"S ":3112,"Pe":2524,"Pa":5499,"Pl":1516,"Po":7183,"Pi":1236,"Os":1168,"Or":1234,"R ":1716,"Se":3431,"Sc":1219,"Si":1856,"Sl":2584,"Sk":1212,"Sp":2686,"So":3053,"Ru":1888,"Sa":3516,"Re":2582,"Ná":1404,"Ro":3655,"Ra":2507,"b ":5436,"Zá":1325,"a ":217661,"Př":2063,"Za":1646,"Ze":1335,"i ":90037,"fy":1326,"ge":8367,"ga":7746,"bý":4615,"fi":13067,"ač":14478,"fr":4738,"fu":2046,"ft":1555,"fo":9138,"bí":5183,"j ":5728,"gy":1560,"dá":9259,"he":13421,"ha":16982,"bě":8276,"gn":2020,"gl":5355,"gi":10280,"gh":1578,"gu":3569,"gr":6781,"cí":27801,"go":4942,"du":15000,"dv":6535,"dy":13097,"g ":5595,"ea":6165,"eb":20255,"ec":32017,"ed":50406,"de":36807,"dd":1592,"di":24806,"dh":1175,"dk":3086,"dm":3275,"dl":11857,"do":40054,"dn":44007,"dp":2177,"ds":6618,"dr":15992,"ew":1387,"ex":5658,"eu":3247,"ev":22848,"ey":1497,"ez":19185,"fa":3278,"h ":68060,"fe":4502,"bá":2585,"eh":9007,"eg":5751,"ef":3385,"ee":1918,"el":51949,"ek":22014,"ej":27806,"ei":3682,"ep":11390,"eo":6058,"en":103592,"em":54636,"et":33365,"es":44385,"er":89316,"ca":4919,"e ":246825,"bv":2259,"by":26507,"bs":4261,"br":9964,"bu":11101,"bn":7303,"bo":26250,"bj":2483,"bl":11541,"bi":8921,"bc":3044,"bd":1995,"be":13547,"dc":2442,"db":1918,"da":22450,"f ":2962,"cy":1677,"cu":2117,"ct":4541,"cr":1140,"co":10175,"cn":2818,"ck":45489,"ci":24104,"ch":106800,"ce":56738,"c ":9636,"az":14206,"ay":1595,"ba":13431,"d ":32945,"at":54683,"as":30808,"ar":43773,"ax":1424,"av":34552,"au":9271,"ak":35472,"al":52089,"ai":4202,"aj":15958,"ap":12841,"am":28407,"an":76603,"ac":26092,"ad":38615,"ab":9081,"ag":6085,"ah":10143,"ae":4121,"af":3809,"nu":14720,"nt":27386,"ns":25831,"ič":7118,"jí":23302,"no":57801,"nn":8311,"nz":2252,"ny":17200,"oe":1227,"ká":21030,"of":6319,"oc":21110,"od":69409,"oa":1996,"ob":47136,"ké":50289,"om":27583,"on":44356,"ok":26216,"ol":47354,"oi":2071,"oj":17875,"og":10937,"oh":13386,"ot":27460,"os":56160,"ov":112940,"ou":73196,"op":21879,"oo":2643,"or":53147,"r ":22705,"ox":1211,"ow":2081,"oz":27164,"lá":15416,"pe":15026,"pa":25836,"ký":35391,"pc":1302,"pl":12514,"lé":7239,"pn":3046,"po":81579,"ph":1918,"pi":14831,"eň":2211,"lo":61865,"ln":20483,"hé":2618,"lm":4934,"ll":7520,"ls":8702,"dů":3418,"lu":15908,"lt":4743,"lz":1355,"ly":9717,"o ":138337,"ma":25523,"eř":2784,"mb":4786,"hý":1516,"me":40325,"iá":5235,"ml":2209,"eš":4400,"mi":28342,"mn":8112,"mm":1350,"mp":4986,"mo":29298,"mr":1370,"ií":2470,"ms":3506,"mu":15591,"my":5631,"p ":4419,"na":97771,"nc":16128,"nd":14047,"ne":57176,"já":1158,"nf":2490,"ež":8504,"ng":13163,"ni":45831,"nk":8297,"jv":3866,"ju":1525,"eč":10324,"js":11164,"jn":5134,"jo":3496,"jm":7494,"ki":3600,"ke":11246,"kd":5064,"kc":3108,"ka":39832,"m ":95402,"ců":2079,"ky":30906,"kt":39344,"ku":33102,"kv":3857,"ko":71233,"kr":19435,"kl":18948,"km":3641,"kn":3161,"li":57563,"lk":10732,"le":77448,"há":9650,"ld":2521,"lf":1159,"la":65008,"dř":2617,"lc":1721,"lb":2945,"n ":43523,"hr":15902,"dí":8010,"hv":1306,"ht":2875,"hu":12412,"hi":7867,"hn":5204,"ho":73846,"hl":13059,"hm":1256,"dé":2586,"id":15111,"ic":65339,"dý":1471,"ib":5266,"ař":4683,"ia":10547,"ih":5555,"ig":6211,"if":3335,"eá":1247,"ie":19796,"hy":6927,"k ":33844,"ir":8411,"is":36532,"it":38607,"iu":2679,"iv":18333,"aš":1639,"ii":6209,"ij":4185,"ik":23953,"il":27036,"im":9173,"in":70373,"io":12386,"ip":4762,"je":121519,"až":7470,"ji":17967,"iz":8625,"l ":42283,"bř":2940,"ja":17600,"dě":13132,"xi":2953,"té":13825,"tí":14980,"pů":6675,"xt":1600,"z ":24530,"př":32632,"ož":13354,"tá":14232,"nž":1154,"oš":2365,"sé":1859,"sí":5664,"rč":2879,"nů":2947,"vy":26292,"vz":6628,"y ":96008,"rý":11309,"oř":9947,"wa":2470,"sá":2703,"we":1453,"vl":7295,"ré":10396,"vk":3052,"nš":1672,"vi":26093,"mž":1362,"vu":5429,"vr":8123,"vs":6209,"vn":24780,"vo":37739,"uz":6709,"mů":3291,"uv":3489,"ve":47740,"rá":30974,"vc":2056,"va":51428,"pě":4347,"x ":2742,"ui":1687,"uj":20016,"uk":7056,"ul":12653,"ue":2392,"ug":1937,"uh":10244,"ur":17880,"us":20839,"ut":14862,"um":13455,"un":12543,"up":13452,"ty":13294,"tz":1768,"tu":25069,"tt":3192,"lů":2536,"tv":17643,"ub":11697,"ua":2086,"ud":12808,"uc":6671,"w ":1552,"to":60100,"tn":18581,"tm":1809,"tl":6035,"ts":8087,"oč":8726,"tr":38784,"pí":3163,"te":85042,"pá":2981,"tk":8430,"ti":58581,"lš":2281,"th":6153,"v ":66402,"tb":1873,"tc":1257,"ta":58747,"ně":45741,"su":6657,"sv":12247,"ss":3706,"st":145711,"sy":7700,"ků":5193,"sl":22654,"sk":92692,"sn":8865,"sm":8745,"sp":20097,"so":35354,"sr":1708,"nč":1796,"sc":4718,"se":52884,"sh":2118,"si":15634,"rz":2926,"u ":121101,"mě":22506,"sa":15498,"kř":3224,"sb":1371,"ný":29623,"rr":2082,"rs":10884,"rt":11161,"ru":27381,"rv":11951,"ry":11066,"ní":119912,"rp":2592,"ro":116857,"rn":21334,"rm":10574,"né":26518,"rl":3217,"rk":6663,"ri":39119,"jš":5173,"rh":1365,"rg":7113,"iž":5869,"ná":41081,"re":36192,"rd":6788,"rc":6965,"mý":1831,"rb":3000,"ra":81349,"t ":42900,"mí":11983,"mé":6077,"iš":4295,"má":9760,"lý":4646,"s ":40054,"py":1779,"pt":3558,"pu":7668,"hů":2019,"lí":8097,"pr":57083,"ps":5342,"zý":1387,"zá":15336,"už":12787,"vš":5164,"zí":7043,"vů":2483,"uš":4102,"tš":4235,"vě":25419,"tř":15220,"tů":3818,"rž":1454,"vý":29752,"zh":2175,"zi":13719,"rš":2140,"zb":2038,"zd":9382,"ze":35435,"vá":31782,"tě":14576,"za":24684,"yz":2224,"rů":8094,"zv":6153,"zy":3702,"zs":3415,"uč":6510,"zr":1709,"zu":4066,"zt":1618,"zo":8118,"zn":27029,"ví":13118,"zp":5782,"zk":6049,"zm":2157,"vé":19370,"zl":2273,"yh":1425,"yc":3822,"yd":3200,"tý":6426,"yb":3071,"yv":3371,"yu":1291,"yt":7847,"ys":11111,"yr":3273,"yp":5097,"yn":5498,"ym":4086,"yl":23502,"yk":5878,"zš":1160,"yř":1210,"yš":3131,"ěžn":1182,"ám":8120,"án":28607,"áp":5503,"áj":1550,"ák":7074,"ál":25430,"áh":2531,"áb":3286,"ác":7677,"ád":11483,"áz":13195,"áv":12832,"ár":9720,"át":14731,"ás":12931,"á ":61028,"íč":1699,"ód":1198,"ón":1579,"éž":3751,"ív":7836,"íz":3776,"ín":7431,"ím":27488,"íp":2283,"ír":5728,"ít":5219,"ís":9896,"íh":10920,"ík":10708,"íl":6160,"íj":1652,"íc":36122,"íd":6139,"íb":1336,"í ":168172,"áž":2247,"él":1388,"áš":2438,"ém":16118,"én":5964,"ét":2091,"ér":4128,"ář":4474,"éd":1164,"éh":25602,"é ":80173,"áč":1328,"úč":1787,"ýc":33402,"ýz":2981,"ýv":5852,"ýs":1457,"ýt":1801,"ýr":2669,"ým":16378,"úz":3790,"ý ":58367,"ús":1640,"íš":1631,"íř":2525,"íž":4061,"ě ":45418,"ěh":2633,"ěj":8834,"ěd":3131,"ěc":1612,"ěz":2258,"ěs":10757,"ět":15295,"ěv":1529,"ěr":3724,"ěk":7417,"ěl":9917,"ěm":5852,"ěn":11047,"ěž":3582,"ěř":2184,"ýš":1701,"Če":5023,"či":11688,"čk":4324,"čl":2919,"če":26348,"ča":7771,"ď ":1420,"č ":2311,"čá":8501,"čn":16418,"čo":3182,"čt":3695,"ču":2648,"čí":6991,"ň ":2419,"š ":1248,"ří":28277,"řá":2554,"řn":1352,"ři":14008,"řs":2960,"ře":44896,"řa":3989,"ší":18419,"ť ":1589,"še":8882,"ša":2373,"šn":3181,"šk":3546,"šl":1812,"ši":4496,"št":7997,"ňu":1474,"ř ":4182,"žs":2453,"žn":8213,"žo":1725,"že":16587,"žd":1803,"žk":1495,"ži":11885,"ž ":17370,"ží":12326,"žá":1346,"ů ":27572,"ům":2630,"ůl":1327,"ůs":3997,"ův":4803,"ůz":2205,"šš":1500,"ůž":1940,"čás":7287,"čí ":1292,"čít":2030,"čís":1405,"čtv":1158,"čuj":2526,"ční":7738,"čné":1325,"čný":1279,"čně":1729,"ěji":2384,"ěko":1330,"ěkd":1747,"ělo":1511,"ěle":2558,"ěkt":1458,"ěme":3785,"ěls":1244,"ětš":3901,"ěta":1179,"ěto":2030,"ěný":1390,"ěst":9587,"ějš":4291,"ění":3741," Ga":1588," Ge":1446," Fo":1382," Fr":2926," Fi":1650," Ha":2917," He":2194," Gr":1717," Hr":1206," Ho":2982," Ji":2194," Je":8552," Ja":4310," In":2868," Ka":5019," Jo":1792," Ju":1242," La":2820," Le":3350," Li":3464," Kl":1383," Ko":4081," Kr":2542," Ku":1171," Ma":8133," Mi":3991," Me":3365," Lo":2542," Lu":1423," Ne":3375," Na":4212," Ni":1486," Mo":4565," Am":1697," An":3275," Al":3081," Ba":3436," Au":1408," As":1420," Ar":2370," Be":3213," Bi":1350," Bo":2981," Br":3935," Bu":2094," By":2085," Ca":2514," Ce":1267," Ch":3924," Co":3154," Da":2135," Di":1684," De":2001," Do":2665," Ev":1844," Př":2056," Wi":1593," Wa":1184," Vy":1684," Ze":1330," Za":1643," a ":53430," Zá":1320," Os":1166," Or":1230," Po":7138," Pl":1502," Pi":1231," Pe":2516," Pa":5464," No":2595," Ob":1764," Ra":2495," Ro":3640," Re":2567," Ná":1395," Pr":7523," Sv":1696," Su":1408," St":6100," Ně":1337," Ta":2426," V ":3746," Th":2108," Ti":1351," Te":3210," Tr":2526," To":2242," Ru":1883," Sa":3503," Si":1839," Sc":1168," Se":3406," So":3031," Sp":2664," Sk":1209," Sl":2578," Va":1650," Ve":3306," Vi":1986," Vo":1286," Tu":1143," ja":14129," dě":2397," bř":1481," ji":8392," až":2786," je":93301," in":6001," it":1175," ka":8683," m ":1247," kd":3115," ke":1672," jm":2892," js":6707," ha":1600," bě":2145," he":2509," dá":1402," cí":2459," gr":1666," k ":5918," hi":2201," hl":5156," ho":6862," dí":1705," hr":6552," hu":3075," ni":2426," ne":33309," na":43193," my":1417," mu":2661," mo":9457," mn":2424," ok":4623," oc":1666," od":14999," of":1957," ob":22345," no":3710," le":12637," li":8468," la":3675," dř":1310," kv":1829," ku":2340," kt":24717," kn":2205," km":3399," kl":3809," kr":8199," ko":16866," me":11211," mi":4779," ml":1339," o ":7191," ma":9419," dů":1272," lo":3118," ab":1328," am":3804," an":6347," ap":1198," ak":1935," al":5289," au":3694," ar":3304," at":1332," as":2951," ba":3819," bi":2222," be":2412," bo":3752," by":19260," bu":3054," br":3187," en":1401," el":2125," fa":1772," ex":2023," fu":1558," fr":2890," fo":4715," fi":6203," ge":1763," bý":3017," i ":5273," co":2133," ce":6243," ch":5478," da":4034," do":19631," dn":2609," dl":1361," dr":8732," de":7028," di":4948," dv":4297," du":1944," vý":12688," zk":2698," ví":2124," zp":3908," zn":6476," rů":1942," zv":2770," tě":2639," za":18239," zd":2577," vá":3060," ze":8914," tý":1167," té":5230," pů":4589," z ":20036," př":29127," vš":2884," už":1794," zá":11557," tř":3845," vě":4981," ru":2498," ry":1961," u ":3351," sa":3585," mě":10287," kř":2709," se":38618," sc":1843," si":3884," sn":1263," sm":2713," sl":8492," sk":7519," sr":1447," sp":14637," so":12794," mí":3137," ra":2856," ná":11985," re":10229," ro":31079," ní":1455," pr":44618," ps":1456," s ":10437," má":2572," os":6522," ot":1283," ov":1214," op":3355," or":4357," oz":5841," lá":1294," pe":2741," pa":10148," pl":5963," po":56929," vy":18421," vz":6214," sí":2537," pě":1240," va":1643," ve":20353," uv":1309," mů":1354," vo":5885," vr":1606," vi":2111," vl":5324," ty":2521," tv":3467," tu":1727," us":1203," ur":2603," um":2673," un":1389," ta":9553," ně":9405," v ":54233," sy":4875," st":28273," sv":11047," su":1768," pí":1932," tr":6193," to":5957," th":1725," ti":1444," pá":1635," te":10751," Če":5018," čí":2219," čá":5494," čt":2423," če":12226," čl":2635," či":5961," ča":2613,"ňuj":1462," ús":1372," úz":3683," úč":1533," šk":1284," řa":1815," ře":4682," ří":3390," ži":4198," že":3745,"Evr":1575,"Fra":1982,"šší":1462,"ším":3926,"šíc":3060,"Byl":1846,"šíř":1269,"ší ":8873,"Cha":1224,"Nov":1143,"Par":1206,"Pro":2287,"Pra":2828,"Pod":1248,"Pol":1195,"ůzn":1758,"ůso":2743,"ůvo":3624,"Jed":1974,"Jeh":1143,"Je ":2998,"ům ":1924,"Kar":1595,"Mar":2760,"řez":1271,"řev":1736,"řet":1507,"řes":3291,"řen":6131,"řel":1284,"ři ":4648,"řed":13088,"řej":1316,"řek":2349,"řeb":1351,"řec":3115,"ře ":3029,"řad":2628,"ého":25401,"ém ":11396,"řsk":2573,"áře":1219,"ému":2422,"éna":1816,"éno":1417,"éri":1326,"Sta":2172,"Spo":1540,"Slo":1556,"Vel":1392,"The":1321,"šec":1390,"šen":2332,"še ":1844,"šak":1283,"šti":2097,"šní":1582,"ško":1848,"šin":1751,"ště":3036,"átu":1328,"bje":2435,"áte":1666,"átk":2625,"átn":2002,"áto":1624,"bit":1438,"ást":9035,"bil":2518,"bo ":10795,"ávi":1211,"bli":4783,"ávn":2176,"bla":3565,"áva":2824,"bod":1239,"bol":1903,"boj":1202,"blí":1257,"ázv":1186,"ává":2557,"áze":5164,"bor":3758,"bov":1176,"bou":1297,"álk":1854,"áln":9542,"álo":2573,"álu":1211,"ákl":2414,"bal":2672,"áko":1419,"án ":4623,"ále":4254,"bar":2156,"áli":1317,"áns":1949,"bdo":1660,"áno":2096,"ány":1316,"ánu":1360,"ámo":1148,"bce":1301,"ána":2621,"áni":1332,"bec":2884,"ber":2768,"ben":2163,"bez":1903,"ápa":4357,"ámě":1142,"ání":9623,"át ":2061,"áro":4387,"árn":3371,"ámý":1162,"áda":1737,"ách":3663,"áce":1349,"ádá":1824,"ábě":1152,"ca ":1459,"ák ":1153,"ál ":2023,"ce ":37517,"ám ":1274,"bri":1805,"bro":1589,"bra":3506,"bu ":2220,"bní":3217,"bsk":1200,"bsa":1785,"bur":1260,"bud":1860,"bvy":1413,"by ":4307,"byl":18860,"byv":1815,"am ":2742,"ake":1229,"aji":2410,"al ":7387,"adě":2234,"aje":2039,"ak ":5398,"ahu":2375,"ahr":2506,"aha":1562,"ací":4252,"aké":5289,"anu":1370,"any":1635,"ano":3773,"ann":1847,"ant":5679,"ans":3967,"ají":6907,"ane":2662,"ang":5036,"ani":8977,"ank":1763,"ana":4560,"anc":5148,"and":5394,"amu":1194,"amo":2812,"amn":1598,"ami":3729,"ame":8001,"ama":1900,"aly":1179,"als":1998,"alo":7874,"all":1742,"ali":6637,"ale":6714,"ala":3987,"alb":1604,"an ":5076,"aku":1247,"akt":3867,"ako":12804,"abe":1141,"abs":1339,"ae ":1943,"ad ":4944,"afi":1777,"age":1161,"ael":1206,"ado":1550,"adl":1340,"adn":6465,"adi":3651,"ade":3282,"ady":1580,"adu":2552,"aco":1432,"aci":3453,"ach":6559,"ace":7532,"ada":3897,"arš":1293,"azu":1350,"aze":2771,"azy":2655,"até":1146,"apř":2101,"ba ":3672,"azý":1373,"avě":1439,"atř":2572,"at ":6248,"aná":3560,"are":2437,"ard":3288,"arc":2227,"ara":4391,"aro":4126,"arn":1320,"arm":1290,"ané":4536,"arl":1695,"ark":1851,"ází":4141,"ari":4139,"ars":2390,"art":3122,"asa":1162,"amě":1185,"asi":3609,"aný":5688,"ase":1167,"aso":1587,"asn":2528,"ask":1257,"ar ":2605,"alá":1216,"ape":1349,"alé":1624,"apo":2941,"as ":2058,"alý":1783,"ava":3516,"aut":3726,"avs":1166,"avo":2741,"avn":5290,"avi":3981,"ave":3850,"avy":1957,"avu":1943,"av ":1604,"ata":3060,"aně":2278,"ast":13117,"atn":1969,"atk":1885,"atr":1563,"ato":5121,"ate":11112,"ati":10351,"alš":1631,"atu":2078,"aur":1970,"řád":2065,"ří ":6262,"řív":1184,"říz":2145,"říc":1240,"říp":1616,"řís":1609,"řím":2008,"řík":1716,"říd":3286,"říž":1421,"až ":2790,"jeh":3911,"jej":4795,"jed":12881,"jek":1841,"jem":2659,"jen":5483,"jev":2523,"ji ":4802,"ažd":1260,"bře":2487,"děl":3685,"jak":10723,"děn":1255,"děj":2198,"jaz":2568,"je ":83650,"jme":2592,"jno":1257,"jov":2720,"jin":3546,"jih":2344,"jic":2414,"ito":2582,"itu":2506,"its":1828,"ity":1661,"isk":2395,"ism":2509,"isl":2097,"iso":2022,"ist":15778,"ině":2331,"ita":3897,"ite":5726,"iti":3568,"ivo":2672,"ivn":3856,"ium":1381,"iva":2483,"ivi":1448,"ive":2466,"is ":3839,"ion":6938,"iro":1733,"irm":1303,"ise":1488,"iný":1514,"iná":3960,"it ":3805,"dě ":4195,"itý":2115,"ití":1214,"ité":1984,"itá":1643,"izo":2349,"iza":2404,"km ":2378,"kdy":3252,"kej":1210,"kem":3800,"ket":1376,"ke ":1607,"kce":1649,"kde":1621,"kra":5687,"kre":3334,"kt ":1234,"ku ":19613,"kro":2033,"kov":11417,"kou":9659,"kos":4285,"kor":1310,"kop":1321,"kon":10573,"kom":4572,"kol":7494,"úze":3480,"klá":2870,"kni":1628,"klu":1477,"ko ":14736,"kle":2073,"kla":8470,"klo":1299,"jso":6864,"jsk":2959,"ečn":6772,"jmé":3893,"eče":1504,"již":2514,"kaz":1148,"kat":3393,"kar":1707,"kap":1616,"kan":2047,"kal":2034,"kam":1509,"kac":1503,"ka ":20272,"jvě":1879,"ha ":4741,"bě ":2724,"han":1905,"hal":1464,"har":2922,"běh":2126,"he ":2188,"dá ":2060,"dál":1753,"dáv":1525,"her":2617,"dán":2244,"hem":3497,"his":2398,"běž":1211,"hla":5571,"hle":2243,"hlo":1435,"ho ":41844,"gli":3394,"cí ":18956,"cíc":3683,"gra":4857,"cím":1211,"ial":1377,"ian":1754,"ic ":2965,"ibl":1394,"ia ":4182,"ien":1656,"aří":2027,"ier":1153,"ifi":1720,"ict":1675,"ick":33133,"ici":3981,"ich":6375,"ice":11279,"ie ":13367,"ica":1513,"ido":1643,"idl":1498,"ide":3474,"ida":1748,"il ":4856,"ika":5332,"ii ":5869,"igi":1176,"icí":1825,"iho":2925,"ik ":2696,"imo":1396,"ime":1223,"inc":4325,"ind":2184,"ina":8223,"inn":2167,"ino":5250,"int":2630,"ins":4625,"inf":1536,"ine":3495,"ing":4248,"ini":3264,"ink":1380,"iká":1715,"inu":3551,"iny":6607,"iko":2695,"ikl":2333,"ila":2431,"in ":6095,"iky":3097,"ikt":1203,"iku":2133,"ilo":3489,"ill":1963,"iln":1971,"ilm":2591,"ili":3988,"ile":1526,"hok":1282,"hol":1704,"hot":1459,"hou":1879,"hov":5549,"hor":3514,"dí ":3801,"hod":8706,"hni":1267,"huj":1855,"hud":2458,"hu ":4702,"hro":1617,"hrn":1403,"hra":7196,"díl":2411,"hyb":1243,"hož":1183,"hy ":3116,"hum":1365,"hrá":2941,"evš":1525,"ext":1409,"etí":2247,"exi":1437,"ezn":2836,"eze":2667,"ezi":7262,"eně":1924,"eta":2522,"ete":4466,"eti":3348,"etn":2128,"esp":1404,"esn":2468,"eso":1292,"est":7801,"ev ":3656,"eto":3093,"etr":4039,"erá":7485,"eve":6187,"evn":2175,"eré":8410,"evi":2322,"erý":10219,"er ":7795,"eor":1444,"es ":7550,"epu":2593,"elé":1267,"epr":1430,"eri":9740,"erg":1674,"ere":2706,"ená":5438,"era":5702,"et ":4584,"emí":4275,"esk":11348,"esi":1360,"ený":6913,"ese":3215,"emě":2087,"erz":1749,"erv":3468,"eru":1909,"ení":16758,"ert":1878,"ers":3684,"ern":7190,"erm":2474,"ené":4823,"ero":6299,"ekl":1253,"eko":2098,"ekt":6413,"en ":12357,"ela":2337,"ele":12125,"eli":4792,"elm":1412,"eln":3331,"elk":5262,"ell":1825,"elo":2426,"els":2269,"ema":1842,"eme":2383,"emn":1840,"emo":2184,"emi":4395,"ene":4580,"ena":4918,"end":1571,"enc":3468,"eno":6243,"enn":2162,"eni":2669,"ens":7642,"ent":12967,"ejí":5227,"eny":2102,"egi":1835,"ej ":1214,"edí":1331,"eho":5425,"ek ":7971,"ein":1192,"el ":8637,"ejs":2194,"ejn":3303,"ejm":2020,"eji":2735,"em ":31739,"ejv":3622,"gin":1170,"gio":1330,"gie":2007,"gic":2475,"gii":1170,"gen":3213,"býv":3044,"být":1189,"gan":3061,"ga ":1144,"íže":1335,"íž ":1472,"fun":1501,"fra":2633,"ače":3487,"ačo":2232,"ačn":3017,"aču":2194,"for":4925,"fot":2015,"bí ":2241,"fic":2581,"fil":3337,"fik":2015,"fin":1574,"fir":1363,"da ":6390,"de ":6350,"dal":3374,"daj":1518,"dat":3084,"dan":2182,"dce":1208,"ctv":1837,"cně":1190,"cko":3237,"chá":6943,"cky":4430,"ciá":2350,"co ":1224,"cká":6180,"cké":15214,"cov":2906,"cou":2423,"cký":12856,"ch ":62063,"ces":2551,"cen":5710,"cem":1899,"cel":4240,"ci ":10328,"cha":4867,"chu":1777,"chy":1469,"cia":1607,"cie":2249,"che":3452,"chl":1932,"chi":2422,"cho":11778,"chn":2940,"cht":1572,"chr":1838,"ed ":3026,"ebn":2564,"ebo":12455,"ec ":4754,"edl":1223,"edm":1145,"edn":17139,"edi":5930,"ede":6652,"eda":1810,"edy":1655,"eds":1915,"edo":3095,"eck":8996,"ech":12275,"eci":1513,"ecn":1350,"dy ":11421,"drá":1267,"dvo":3395,"dva":1139,"dor":1289,"dop":2114,"dom":1869,"dol":1600,"dok":1496,"dov":6030,"dou":1896,"dos":2695,"dpo":1574,"dna":2226,"dne":2707,"dni":1534,"dno":8163,"dob":7531,"dst":2606,"íře":1323,"dně":4952,"duc":1342,"dné":1547,"dra":2227,"dná":2916,"du ":8252,"dro":2166,"dní":18004,"dru":7228,"dsk":2773,"dic":1916,"dia":1263,"der":3512,"des":2009,"dev":1882,"deb":1947,"dec":1456,"del":3171,"den":6683,"dem":3354,"di ":2989,"dle":4381,"dla":1946,"do ":8212,"dlo":2844,"div":1579,"din":4766,"dio":1325,"dis":2494,"die":1484,"rga":2761,"rgi":1143,"ižn":3600,"ret":1327,"res":5841,"nás":2254,"náv":1475,"rez":1829,"náz":3803,"rač":1242,"iž ":1205,"rea":1647,"nác":1291,"rec":1925,"reg":1788,"nám":4897,"rem":3700,"ren":2252,"rek":1273,"nál":3115,"nár":3570,"rep":3246,"rdi":1273,"ná ":14630,"re ":2546,"rch":3978,"rce":1159,"raz":3775,"rd ":1372,"ras":1475,"rat":6479,"rav":9308,"raj":4559,"rah":1855,"ran":13598,"ram":4289,"ral":1943,"rak":3296,"rab":1667,"raf":2398,"rad":5238,"rac":4788,"ros":9033,"rot":4473,"rom":4735,"ron":3499,"rop":5057,"roz":10708,"rou":3738,"rov":17134,"rob":3794,"rod":11155,"roc":8820,"ní ":79844,"roj":4450,"rol":2078,"rok":8745,"rof":1452,"rog":2059,"rno":1887,"rnu":1168,"rna":1957,"rež":1163,"rni":1331,"ném":2468,"rmo":1370,"jší":4959,"ro ":11368,"rma":3339,"riá":1833,"néh":5048,"né ":18438,"raž":1181,"rit":3911,"ris":3258,"rig":1476,"rik":1662,"rin":2410,"ria":2007,"ric":8579,"rid":1291,"rie":3806,"rk ":1359,"ryc":1171,"ruh":6961,"rum":1222,"ruk":1449,"rus":3302,"rva":1915,"rve":2525,"rvn":4272,"ry ":6263,"rsk":6183,"rně":2168,"rst":1339,"rto":1577,"rti":1692,"rub":1147,"rtu":1152,"ním":14655,"ník":6579,"níh":7211,"rmá":1574,"níc":9560,"rt ":1561,"rné":1326,"ru ":6692,"rní":7275,"sah":2489,"měl":2079,"sam":2091,"nýc":8901,"ným":4766,"měn":2340,"san":1523,"sau":1548,"měs":9181,"měr":2410,"sa ":1633,"mě ":2287,"ruž":1312,"ný ":15629,"si ":3518,"měř":1748,"sin":1775,"sil":2216,"se ":32266,"sch":2059,"sev":4239,"ser":1773,"sed":1877,"kří":1234,"sen":1348,"sem":1612,"sel":1714,"spo":10422,"spr":1930,"spe":2260,"spi":1842,"ský":19060,"sou":17153,"sov":4194,"ské":26627,"son":1432,"ská":8637,"sof":1375,"soc":1565,"sob":5198,"su ":2847,"sní":1788,"sné":1153,"st ":11395,"slo":9919,"slu":2567,"sky":9407,"sla":4078,"sle":3342,"skl":1707,"sko":9797,"sku":8770,"ska":4807,"ske":1405,"sni":1352,"smu":2054,"stí":6180,"sté":3269,"stá":6877,"syn":1327,"sys":2806,"stě":3272,"své":1798,"smě":1301,"ste":9093,"sně":1417,"sta":23517,"stn":5308,"sto":14050,"sti":18925,"stl":1904,"stv":6645,"stu":6506,"str":16919,"sts":1291,"ků ":4430,"sty":1890,"sva":1646,"svo":1261,"sy ":1657,"něj":4079,"tak":7734,"něk":4566,"něl":1306,"tal":4758,"tac":1310,"tad":1311,"tba":1700,"tav":8149,"tat":3631,"tas":1264,"tar":5738,"tan":5175,"něn":3988,"něm":4105,"te ":2317,"svě":5020,"stř":6161,"éž ":3711,"ně ":25254,"ta ":12315,"ký ":20031,"ouž":6121,"ozá":1627,"pa ":1199,"ově":6255,"lá ":1917,"kýc":10186,"kým":4999,"par":3389,"pat":3635,"pad":7309,"pak":1199,"pal":1582,"pam":1204,"pan":2500,"pec":1647,"lád":4346,"lán":1802,"pen":1946,"per":4148,"lát":1414,"pla":3524,"ple":1738,"plo":2559,"lé ":2839,"pic":1267,"pin":4585,"pis":3803,"poz":3018,"por":4291,"pop":2679,"pov":4058,"pou":6174,"pot":2565,"pos":5385,"poj":5321,"poh":2575,"pom":2458,"pon":2428,"pok":1746,"pol":12866,"pob":1330,"poc":1426,"pod":13049,"láš":1492,"po ":3633,"lí ":2188,"psk":1538,"hů ":1499,"pub":3056,"poč":3912,"pra":9290,"prv":4970,"psa":1291,"pu ":1292,"pri":1503,"pre":2679,"pro":30809,"prá":4755,"poř":1288,"py ":1312,"prů":1838,"lý ":2387,"má ":3426,"mát":1458,"mán":1640,"išt":1515,"mén":4741,"mí ":3982,"mís":2771,"mír":1517,"mín":2069,"ra ":9350,"eži":1875,"ngl":4299,"ni ":2504,"nge":1442,"nej":9246,"nek":1332,"nen":1766,"nem":4884,"nep":1631,"ner":3084,"net":2605,"nes":3430,"ež ":1798,"ng ":3190,"neb":12487,"nec":2031,"ned":1356,"nfo":1282,"nač":7033,"nez":1422,"nco":2679,"nci":5166,"nce":5874,"ne ":3607,"ndo":1521,"ndi":1883,"nde":1639,"nda":1497,"nak":1732,"nal":3376,"nam":6213,"nan":1425,"nap":3429,"nar":1416,"nac":3724,"nad":3749,"nd ":2404,"nav":1415,"nat":3143,"nas":1417,"naz":1632,"na ":53204,"mys":1461,"mož":1630,"nož":1329,"ny ":15609,"nuj":1460,"nut":2319,"nto":3206,"ntu":1650,"ntr":2728,"nti":4061,"nta":3294,"nte":4199,"nst":3785,"nsk":18177,"nu ":7672,"ičn":2337,"nné":1576,"nt ":3215,"jím":1621,"jíc":10934,"noh":2321,"nol":1273,"nom":2363,"not":3595,"nos":13323,"nor":2008,"nov":9782,"nou":8746,"než":1270,"nno":1485,"nič":2090,"jí ":9261,"neš":1391,"no ":7023,"nka":1355,"nko":1306,"eží":3799,"nih":1649,"nie":1760,"nic":12884,"niz":2240,"niv":1387,"nis":3526,"nit":3056,"nin":2133,"nik":6308,"ogr":3240,"ogi":4525,"odí":1145,"ohl":1659,"oho":4168,"oha":1909,"obě":2552,"oj ":1676,"ok ":1263,"ohy":1525,"ojo":1142,"ojm":1998,"oji":1192,"oje":7276,"odě":2068,"obř":1215,"ol ":1430,"oce":9242,"och":4480,"oci":2151,"ock":1389,"obs":1745,"obv":2042,"obu":1622,"oby":3426,"ká ":15983,"ode":4045,"odl":3777,"odi":2898,"odo":4607,"odp":1587,"odn":14263,"ods":1272,"odr":1258,"of ":1489,"oda":2197,"kán":1145,"ody":1671,"odv":1246,"odu":5226,"ofi":1587,"obí":2810,"oba":1485,"od ":15166,"obo":2639,"obr":2915,"obl":4657,"obn":3328,"obj":2067,"obi":2455,"obd":1788,"obc":2803,"obe":3958,"ový":12973,"orů":1170,"ové":14190,"ozn":6156,"ozl":1181,"ouč":4572,"ozo":1460,"ozd":2636,"oze":3842,"ová":16410,"ozi":1341,"oty":1225,"oud":1374,"oub":1598,"ouc":1601,"otk":1156,"oti":3017,"ote":2572,"oto":6726,"otn":1824,"ost":32198,"ota":1788,"otb":1150,"ov ":3056,"osi":1402,"osk":1491,"ose":1306,"osp":1323,"osm":1339,"osl":4606,"oso":3077,"ovy":1368,"ovi":10902,"ovn":5524,"ovo":6751,"ovs":3742,"ouz":3479,"ova":19936,"ove":6197,"ouh":2082,"oun":1766,"oup":1457,"ous":3223,"out":2402,"opo":2402,"opi":2216,"ope":2692,"opa":2307,"os ":1967,"opu":1150,"opr":3218,"olí":1626,"ops":1833,"or ":5513,"ork":1330,"orm":4795,"orn":2809,"oro":4330,"ord":1649,"ore":3272,"oná":1885,"org":3417,"ori":6242,"ou ":37147,"omě":1557,"osa":2963,"ort":3045,"ors":1818,"oru":3220,"ory":1722,"omá":2183,"ora":3762,"ívá":2944,"íze":1985,"ola":2087,"on ":7329,"oli":8828,"ole":10375,"ols":1995,"oln":2388,"olo":8650,"oly":1795,"olu":2611,"oka":1642,"om ":1785,"íst":3954,"ké ":30775,"ísl":1496,"oke":1629,"íta":1783,"okr":4661,"oko":4002,"íků":1200,"oku":8537,"ona":3352,"ond":1711,"onc":1948,"one":2668,"ong":1188,"oni":3548,"ono":3674,"ons":4146,"ont":3048,"onu":1779,"ony":1528,"oma":2266,"ome":3465,"omi":2447,"kéh":13733,"kém":5563,"omp":1767,"omo":5000,"omu":2175,"íva":3319,"la ":20221,"ími":2048,"íms":1206,"íns":1474,"ím ":20382,"kvě":1156,"ín ":1890,"íle":1385,"le ":15329,"íro":1439,"írk":1295,"íse":1140,"lac":1653,"lad":8838,"ípa":1629,"lan":4986,"lam":1157,"lat":5358,"las":7702,"lav":8047,"krá":4170,"kup":4964,"kum":1330,"kul":2572,"ky ":27061,"ích":18473,"kte":27582,"íce":2062,"ídl":1751,"ců ":1941,"ktr":2141,"ktu":2012,"kti":3026,"kto":1589,"kyt":2072,"ík ":3909,"ící":14415,"ího":9884,"lok":1275,"lon":1573,"lké":1191,"lom":1730,"lod":1650,"loh":1236,"log":5594,"lký":1221,"los":3409,"lou":5777,"lov":17182,"lni":1153,"lež":2967,"lič":1203,"lmi":1186,"ltu":1148,"dů ":1701,"lub":1957,"lsk":6596,"lně":2708,"lné":1553,"lní":11343,"lu ":4880,"lný":1549,"liš":1271,"li ":6590,"lez":2008,"ház":6195,"lev":1768,"les":3807,"let":9902,"ler":1221,"lem":5693,"len":8028,"lek":3907,"lej":1379,"led":7715,"lec":1964,"eň ":1751,"lo ":9285,"lla":1318,"lle":1365,"lli":1334,"lko":2458,"lky":2194,"lka":1525,"leč":4689,"hé ":1767,"lm ":1297,"ll ":1176,"lit":5959,"lis":4420,"lin":6175,"liz":1555,"liv":3315,"lic":7730,"lid":2928,"lia":1350,"lik":5494,"lig":1241,"lie":1691,"ma ":3621,"mac":1223,"maj":1332,"mar":1328,"mal":2994,"man":3894,"mat":5599,"me ":1813,"med":1375,"mec":4748,"met":4900,"mer":5988,"mem":1269,"iál":4186,"men":11122,"mez":6394,"ly ":6413,"lož":5290,"moc":2506,"mob":1461,"mod":2374,"mon":2073,"mov":3077,"mor":1679,"mos":2024,"mot":2793,"mou":1328,"mní":1927,"mu ":8025,"msk":2845,"moř":3176,"my ":2400,"mus":1983,"mun":1943,"mi ":12425,"ešn":1222,"min":3830,"mil":1861,"mis":1525,"ešt":1436,"mic":2749,"mo ":1420,"ií ":2195,"mno":2859,"tří":4710,"tši":1374,"tší":2640,"ůže":1655,"Čes":3563,"vě ":4735,"věz":1289,"vět":11745,"tři":1228,"věk":2228,"tře":8474,"věd":2018,"výr":2025,"výs":1250,"vým":2640,"výz":2611,"výš":1486,"čas":5969,"zná":4232,"uča":1905,"zní":1327,"víc":1941,"čen":7616,"čel":3765,"čet":1680,"čes":6771,"čer":2858,"zný":1440,"zu ":1323,"zsk":2646,"učá":1855,"či ":4977,"rů ":3148,"zuj":1535,"čit":1616,"způ":1462,"čin":2747,"růz":1961,"čka":1735,"zyk":1998,"čle":1629,"čov":3019,"vý ":6798,"čno":3388,"výc":8997,"zi ":5006,"zač":1189,"zej":2847,"zev":2458,"zen":7446,"ván":11057,"zem":8432,"vál":2700,"zel":1300,"zer":1349,"vá ":12375,"ze ":9310,"zde":1205,"zab":1246,"zac":2566,"těj":1439,"zah":1842,"těn":1733,"zal":3135,"těl":1552,"zas":1298,"ví ":7249,"zor":1418,"zov":3172,"zpr":1480,"vém":1518,"rší":1390,"véh":3281,"zna":13601,"zni":4038,"vé ":13437,"zko":1259,"zkr":2046,"zdí":1212,"zař":1286,"zin":1828,"zit":1344,"zdě":2035,"těž":1441,"yrá":1302,"yva":1858,"ytu":1246,"ytv":1652,"yto":1723,"yst":4390,"yso":1389,"ysk":1190,"ysl":1771,"tě ":5527,"za ":5089,"ych":2524,"tým":1477,"tý ":2037,"týc":2116,"ykl":2523,"yla":6390,"ylo":2929,"yly":1314,"yl ":10326,"tím":1511,"tém":3602,"tí ":11245,"též":3726,"pův":2875,"půs":2701,"ože":4937,"ožn":1513,"oži":1506,"tán":1850,"pří":8711,"tát":4498,"ož ":2063,"tál":2187,"té ":3506,"xis":1170,"př ":1535,"tá ":1819,"při":7219,"pře":14856,"sér":1644,"síd":1352,"rče":1253,"oří":2027,"vzn":2563,"vzd":1455,"vyk":2012,"vyr":1739,"vyd":1992,"vys":3366,"vyt":1966,"vyv":1398,"rý ":8817,"vyš":2053,"rýc":1165,"oři":1304,"oře":3041,"vní":15429,"vro":2455,"vrc":1706,"vst":1754,"vsk":4082,"vu ":2977,"nů ":2566,"vně":2710,"vuj":1416,"vy ":4952,"voř":3595,"vil":1507,"vin":6325,"vic":2325,"vid":2543,"viz":1532,"vit":4611,"vis":2095,"ré ":7668,"vla":2806,"vo ":2147,"vna":1206,"vno":1207,"vni":1709,"vod":8774,"voj":3429,"vol":2921,"vor":1476,"vot":1529,"vov":1657,"vou":5924,"voz":2714,"vlá":2701,"vi ":1398,"ver":8833,"ves":2177,"rát":2382,"ráv":4812,"mž ":1199,"rán":2154,"ven":9163,"vem":1785,"rál":6073,"vel":5581,"ráb":1423,"ved":2640,"rác":2253,"ve ":12748,"rá ":6574,"val":7096,"van":12007,"var":2621,"vat":8883,"pěv":1148,"vaz":1671,"vac":2111,"vad":1154,"vaj":2879,"můž":1309,"va ":9872,"uze":1855,"uzs":2041,"utí":1162,"urč":2415,"usk":3978,"use":1494,"umě":1413,"ust":3573,"ute":1270,"mů ":1642,"uto":4678,"us ":7285,"ut ":1210,"ura":2035,"ure":1155,"uri":1389,"urn":1235,"uro":1540,"uru":1160,"ury":1381,"upi":4594,"upe":1451,"upn":1306,"umo":1169,"umb":1314,"ume":1179,"ují":7869,"unk":1600,"uni":2964,"uko":1228,"um ":4532,"ult":2459,"ulo":1228,"uhé":1788,"uli":1303,"ula":1664,"uje":11818,"ucí":1196,"uho":1206,"ude":2608,"udi":1376,"ubo":1407,"uce":1140,"uch":2624,"uh ":1827,"udo":2156,"ub ":1463,"ubl":3299,"ud ":1221,"trů":1395,"tví":6726,"typ":2401,"ty ":7625,"tvr":1530,"očí":2254,"tvo":4249,"trá":2691,"tva":2695,"tur":4631,"tuj":2396,"tup":3639,"tud":1736,"pís":1810,"oče":1902,"tre":1691,"tra":10732,"tné":1153,"oči":1195,"tri":3505,"tru":3242,"tro":11520,"očn":1259,"tní":9445,"tu ":7164,"tný":1437,"tsk":6630,"tně":1562,"lů ":2152,"to ":14761,"lší":1771,"tna":1155,"tno":1779,"toh":1166,"tou":3172,"tov":8578,"tos":1217,"tom":3677,"ton":3319,"tok":1791,"tol":3953,"tor":9066,"top":2079,"til":1384,"tik":3154,"tit":2068,"tis":2205,"tin":7629,"tio":2588,"thu":1152,"tic":11897,"teč":1644,"tiv":5666,"tko":1227,"tka":3033,"tli":2690,"tky":1508,"tla":1310,"tem":4570,"ten":3204,"teo":1298,"tej":1972,"tek":2779,"tel":12611,"tec":6325,"ted":1925,"tev":1290,"ter":37331,"ti ":14561,"tač":2083,"the":1375,"ží ":4449,"žíc":1641,"žív":5443,"yšš":1364,"zýv":1380,"žil":1490,"živ":2602,"žit":2707,"žij":1345,"žov":1554,"že ":5417,"žel":1619,"žen":7485,"záv":1903,"záp":4090,"zák":3066,"uži":2255,"ýt ":1211,"ýro":1252,"ým ":11479,"ými":4161,"žní":2919,"ých":32937,"žsk":1623,"žně":2362,"vší":1568,"zí ":4964,"vša":1280,"vše":1958,"uží":6581,"tů ":3433,"ýzn":2182,"ývá":1959,"ýva":2863,"uše":1228},"n_words":[11333226,13010717,8780627],"name":"cs"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/cy b/nlp_resource_data/langdetect/profiles/cy

new file mode 100755 (executable)

index 0000000..3de9ea3
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/cy
@@ -0,0 +1 @@
+{"freq":{"D":338489,"E":166377,"F":175306,"G":453070,"A":371519,"B":275760,"C":751456,"L":302911,"M":508956,"N":226941,"O":101563,"H":125996,"I":97942,"J":76318,"K":27436,"U":63033,"T":165496,"W":129096,"V":18216,"Q":3136,"P":245280,"S":271355,"R":184371,"Y":305003,"X":1831,"Z":4074,"f":4291702,"g":4740469,"d":12541164,"e":10317499,"b":2060020,"c":3013168,"a":13050188,"n":11443262,"o":7624533,"l":7117558,"m":2908226,"j":9805,"k":97581,"h":4955284,"i":9078358,"w":5492011,"v":80439,"u":3661293,"t":3899111,"s":3554349,"r":9375348,"q":5143,"p":918555,"z":19700,"y":11865619,"x":17901,"\92":1460,"£":24959,"ï":24620,"î":12446,"í":2171,"ë":2404,"ê":12514,"é":9458,"è":1489,"â":204955,"á":11089,"û":2546,"ö":4198,"ô":116116,"ó":1726,"ŵ":34962,"ŷ":23825," l":444350," m":892336," n":991408," o":1241413," h":924635," i":1098661," j":1707," k":16142," d":1472270," e":1030930," f":928615," g":2001832," a":3061352," b":1038202," c":1432889," y":4250280," u":270784," t":410248," w":722625," v":2781," p":602564," s":805496," r":1506799," J":76301," K":27204," H":125793," I":97830," N":226785," O":101451," L":302168," M":506691," B":275536," C":750240," A":370037," F":175148," G":448916," D":334497," E":166168," Z":4022," Y":304965," X":1781,"о":1412," S":271032," R":184243," Q":3067,"а":1632," P":245017," W":128867," V":18126," U":62998," T":165103," â":171287," ô":68394," £":24955," ŵ":5995,"ا":1484,"A ":51136,"F ":1624,"Da":83430,"Cu":4761,"Cw":8435,"Cy":336581,"Cl":14084,"Co":52549,"Cr":41637,"Ce":66425,"Ch":48051,"Ci":4683,"G ":8214,"Ec":3834,"Ed":13126,"Ea":2252,"Eb":7093,"Dw":4392,"Du":9735,"Dy":52260,"Do":17846,"Dr":14255,"De":55959,"Dd":28273,"Di":51623,"Fe":52440,"H ":1793,"Fa":17497,"Eu":3442,"Ev":3997,"Ew":22302,"Er":29255,"Et":3900,"Es":7913,"En":10149,"Em":3160,"Ei":18716,"El":12213,"Ef":13230,"Eg":4955,"Ge":36048,"Ga":68330,"I ":18154,"Fy":5682,"Fw":2093,"Fu":2056,"Fr":13553,"Fo":36065,"Fl":3457,"Fi":6588,"Ff":31987,"B ":3900,"C ":51723,"Au":3464,"Aw":14409,"Ar":59958,"At":8101,"As":10798,"D ":3873,"Ba":34881,"Ae":32249,"Af":18144,"Ag":3289,"Ab":19319,"Ac":8551,"Ad":20323,"Am":33758,"An":24599,"Ap":2294,"Ai":10453,"Al":50222,"By":49106,"Bw":10815,"Bu":24953,"Br":56488,"Ca":121524,"E ":5353,"Bh":1396,"Bi":5859,"Be":36724,"Bo":26776,"Bl":19768,"Ko":1892,"Le":24072,"Li":11077,"N ":2781,"La":23873,"Lu":6554,"Ly":19349,"Ll":197561,"Lo":16574,"Me":55686,"Mh":9909,"Mi":19373,"O ":20786,"Ma":353446,"Mc":1615,"My":8802,"Mw":2121,"Mu":8098,"Mr":4592,"Mo":28206,"Nh":3558,"Ni":66554,"Ng":81458,"Ne":23468,"Na":15773,"P ":4199,"Q ":4850,"Nw":4046,"Ny":1586,"Nu":1470,"No":23777,"Ol":5539,"On":7737,"Og":3813,"Oh":2023,"Oe":13014,"Of":2109,"Ob":1524,"Gi":6350,"Gl":18922,"Gr":31398,"Go":63652,"Gu":3975,"Gy":66269,"Gw":140934,"J ":2021,"Ha":33467,"He":30826,"Hi":10111,"Ho":16382,"Hu":14940,"Hw":2512,"Hy":14748,"Ia":6486,"Id":2894,"If":1982,"Ie":12677,"Io":10592,"In":11178,"Iw":8666,"Is":11408,"Ir":8587,"Ja":20953,"L ":1847,"Je":8470,"Jo":39810,"Ju":3096,"Ka":5774,"M ":8246,"Ki":8305,"Ke":4703,"Ur":4373,"Un":36736,"Uc":8498,"W ":2665,"Ty":9645,"Tw":3412,"Tu":6322,"Tr":35826,"Ts":3701,"To":17417,"Th":31400,"Ti":9045,"Te":14091,"Ta":24608,"V ":3179,"Sw":30740,"Sy":9582,"St":29219,"Su":12733,"Wr":10595,"Wo":5648,"Wi":24699,"Wh":3162,"Wl":2195,"Wa":16981,"We":55163,"Y ":52072,"Vo":1516,"Vi":5015,"Va":5513,"Ve":2919,"Uw":3260,"Pw":24712,"Pu":5107,"Pr":53804,"S ":7880,"Py":1882,"Pe":60817,"Pa":41773,"Pl":14210,"Po":23538,"Pi":5405,"Ph":9698,"Os":15921,"Or":7777,"R ":3429,"Gŵ":1552,"Ow":7608,"Se":32095,"Sc":4665,"Si":41090,"Sh":7678,"Sg":4461,"Sm":1606,"Sl":2027,"Sp":3276,"So":13929,"Ru":6905,"Ry":6829,"Rw":4999,"U ":13762,"Sa":60869,"Sb":4209,"Re":10571,"Ri":8224,"Rh":86312,"Ro":44014,"Qu":2233,"Mô":8098,"T ":3460,"Ra":12393,"Tŷ":1928,"b ":188407,"a ":1388065,"Yc":2873,"Yn":102548,"Ym":25982,"Yo":2068,"Yr":86694,"Ys":28919,"Wy":8865,"i ":2272558,"fw":93325,"fy":431027,"gd":5453,"ge":277096,"gf":18580,"câ":3046,"ga":724292,"gb":3402,"fl":180022,"fg":1481,"ff":496818,"fi":195556,"fh":2098,"fs":1452,"fr":272586,"fu":97232,"ft":30862,"fo":484371,"fn":177691,"gy":688780,"gw":585146,"dâ":1740,"hf":16701,"hg":20440,"hd":28466,"he":488866,"hb":5711,"ha":816562,"gn":8582,"gl":184207,"gi":178183,"gh":227509,"gg":2030,"gu":106896,"gt":3652,"gs":2779,"gr":229726,"go":520931,"du":203071,"dw":342258,"dy":517237,"g ":931036,"ea":96187,"eb":183710,"ec":186277,"ed":1610960,"de":623948,"dd":3129194,"dg":7414,"df":82859,"di":1292188,"dh":11741,"dm":2081,"dl":170195,"do":681355,"dn":43219,"ds":26351,"dr":451732,"ew":322523,"ex":7983,"eu":450210,"ev":8136,"ey":50899,"ez":2026,"fa":475846,"h ":1177936,"fd":7994,"fe":594002,"fb":7441,"eh":22151,"eg":311387,"ef":503193,"ee":16361,"el":858160,"ek":2704,"ei":1198797,"ep":14479,"eo":166913,"en":926477,"em":108663,"et":784865,"es":501129,"er":1107411,"ca":313521,"e ":860790,"bw":117830,"by":344044,"bs":2707,"br":185005,"bu":80984,"bo":359315,"bl":303416,"bh":1799,"bi":52710,"bb":4778,"be":239715,"db":3993,"da":1020476,"f ":718154,"cy":633555,"cw":48093,"cu":7350,"ct":62415,"cs":12784,"cr":139745,"co":161171,"cn":2016,"ck":25213,"cl":59380,"ci":33119,"ch":1057816,"ce":98894,"dG":3638,"cc":2543,"c ":343775,"az":3933,"ay":14673,"ba":169166,"d ":3835329,"at":471705,"as":344140,"ar":1444434,"ax":1835,"aw":461613,"av":36603,"au":1090598,"ak":7934,"al":543082,"ai":983141,"aj":1537,"ao":21869,"ap":37508,"am":461201,"an":1743050,"ac":458246,"ad":1129155,"aa":2019,"ab":77615,"ag":211373,"ah":44754,"ae":1532785,"af":522427,"nu":171108,"nt":532413,"ns":84700,"nr":100822,"no":616110,"nn":767548,"nz":1568,"ny":424305,"nw":268361,"oe":579869,"of":163174,"oc":86294,"od":1314157,"oa":8299,"ob":214212,"om":118946,"on":847751,"ok":5774,"ol":1239958,"oi":78983,"og":340671,"oh":65618,"ot":47787,"os":336191,"ov":8960,"ou":32901,"op":89124,"oo":15722,"or":610467,"gŵ":2290,"r ":3281337,"ow":30262,"oy":15398,"pe":161740,"lâ":1645,"pa":189940,"pl":48051,"po":104525,"ph":62097,"pi":31396,"lo":304547,"ln":2199,"lm":22755,"ll":1220502,"ls":17011,"lr":6616,"lp":14553,"lw":345911,"lv":2550,"lu":249977,"lt":77926,"ly":476053,"dŵ":6771,"o ":1345269,"mc":23468,"md":109708,"ma":574820,"mb":31654,"mg":56104,"mh":64042,"me":302440,"iâ":1734,"mf":4671,"ml":92596,"mi":149661,"mn":16796,"mm":6621,"mp":32734,"mo":141620,"mr":302752,"mt":1985,"ms":42608,"mu":106959,"mw":147169,"my":85808,"p ":50248,"na":672622,"nb":21604,"nc":88780,"oD":3640,"nd":345573,"ne":760383,"nf":73346,"ng":590640,"nh":99637,"ni":826694,"nj":1540,"nk":5916,"nl":101812,"nm":6203,"fô":1550,"ki":8663,"kh":2267,"gâ":2707,"ke":13073,"ka":5930,"m ":593528,"ky":1861,"ks":5032,"ko":2829,"gî":3874,"kl":1969,"km":14280,"gê":2506,"li":629190,"lh":7775,"lk":4818,"le":691990,"ld":82933,"lg":46948,"hâ":4704,"lf":83881,"la":750380,"lc":95871,"lb":46438,"n ":4787804,"dî":1547,"hr":183684,"hs":3825,"hp":4147,"hw":371101,"ht":6621,"hu":119315,"hi":282969,"hn":55025,"ho":475196,"hl":49179,"hm":5269,"dé":1797,"id":664839,"ic":192410,"ib":56293,"ia":1263604,"ih":12172,"ig":415868,"if":410052,"ie":166715,"hy":794639,"dô":5622,"k ":32674,"ir":477062,"is":369024,"it":440625,"iu":8410,"iv":7571,"iw":257532,"ix":2024,"ii":2747,"ik":6081,"il":405265,"im":67678,"in":760539,"io":738104,"ip":16792,"ji":1668,"iz":3140,"iy":26973,"l ":1847662,"ja":2972,"tî":3276,"wy":1614822,"sô":8739,"z ":4324,"xa":1792,"tâ":7066,"tá":6012,"wg":42604,"wh":1564,"wi":219189,"wl":160359,"wm":73543,"wn":705438,"wo":131443,"wp":14865,"wr":373917,"sï":6838,"ws":65535,"wt":12136,"wu":13055,"rô":9205,"y ":1910993,"wb":38887,"wa":392191,"wd":79870,"wc":88228,"wf":14814,"we":891527,"ré":1461,"vi":40244,"rï":4655,"rê":1538,"vo":3059,"uw":28315,"ve":20282,"va":11557,"x ":9287,"ui":8429,"uk":2069,"ul":134694,"ue":20744,"uf":15081,"ug":26706,"ur":216040,"us":144900,"ut":48913,"um":24406,"un":441720,"uo":44570,"up":3923,"ty":121986,"tz":1790,"tu":130624,"tt":25031,"tw":37964,"ub":10188,"ua":60074,"ud":237653,"uc":38880,"w ":540641,"lŷ":9710,"to":153977,"tn":16677,"pê":3111,"tm":2048,"tl":20346,"uM":1938,"ts":16715,"tr":321747,"tg":40447,"tf":4449,"te":321285,"ti":203530,"th":1535557,"v ":2088,"tb":45909,"tc":3805,"ta":264272,"su":69725,"ss":22899,"st":428431,"sy":392839,"sw":79656,"sl":16153,"sk":4499,"sn":55412,"sm":8824,"sp":4409,"so":185188,"sr":5002,"sd":3729,"sc":7973,"sf":7557,"se":260799,"sh":30472,"sg":271543,"si":399846,"u ":2090991,"sa":248766,"sb":54079,"rr":55164,"rs":84760,"nï":7723,"rt":328516,"ru":282945,"rv":2570,"rw":329957,"ry":365148,"rp":55838,"ro":602514,"rn":152852,"rm":51319,"rl":78753,"rk":10340,"ri":836480,"rh":499517,"rg":80259,"rf":190186,"re":579529,"rd":310455,"rc":129419,"rb":107129,"ra":893957,"t ":587222,"iŵ":5613,"mô":5956,"qu":4251,"mâ":2049,"s ":948343,"hŷ":8102,"py":7720,"lö":2760,"pt":11534,"pu":26642,"pw":69677,"pp":4438,"pr":132094,"ps":6999,"tŷ":1784,"rŵ":7188,"zi":1900,"ze":2519,"za":3932,"zo":1636,"yg":171315,"yh":72170,"ye":5474,"yf":684944,"yc":141109,"yd":1928458,"ya":51106,"yb":73857,"pŵ":1489,"yw":532100,"yt":123198,"ys":573646,"yr":754273,"yp":2904,"yo":15126,"yn":3470264,"ym":959076,"yl":365116,"yi":3130,"£ ":24958,"áu":7736,"âl":3882,"ân":16650,"âd":3031,"âi":1773,"âu":2621,"âr":2850,"â ":172133,"ôr":11547,"ôl":78411,"ôn":22987,"ïo":4314,"ïa":19236,"îl":4095,"îm":5990,"ên":2267,"êl":5660,"êm":2845,"ép":1550,"é ":1708,"ûm":2087,"öy":2639,"ŷ ":5300,"ŵe":1954,"ŵn":1440,"ŵp":7052,"ŵr":14740,"ŵy":8132,"ŷd":5489,"ŷn":11318,"ŷr":1615," £ ":24954," Ga":68273," Ge":35950," Fy":5682," Fw":2093," I ":9807," Fo":36048," Fu":2054," Fr":13547," Fi":6558," Ff":31955," Fl":3435," Ha":33448," He":30815," Gw":140920," Gy":66257," J ":1675," Go":63609," Gr":31246," Gu":3913," Gi":6313," Gl":18894," If":1978," Ie":12669," Id":2890," Ia":6484," Hy":14744," Hw":2512," Hu":14872," Ho":16371," Hi":10107," Je":8469," Ja":20951," Iw":8664," Ir":8586," Is":11396," In":11117," Io":10590," M ":2051," Ka":5749," Ke":4616," Ki":8206," Jo":39808," Ju":3094," La":23801," Le":23844," Li":11024," Ko":1892," Mc":1615," Ma":353332," O ":18653," Mi":19348," Mh":9907," Me":55666," Lo":16544," Ll":196342," Ly":19337," Lu":6544," Ne":23415," Na":15689," Ng":81454," Nh":3558," Ni":66542," Mr":4592," Mo":28166," My":8789," Mu":8036," Mw":2121," A ":41477," B ":1694," C ":12751," Ap":2290," Am":33754," An":24588," Al":50117," Ai":10453," Ag":3289," Ae":32207," Af":18136," Ac":7374," Ad":20296," Ab":19309," Ba":34835," D ":1809," Aw":14409," Au":3460," At":8099," As":10796," Ar":59933," Be":36689," Bi":5853," Bh":1394," Bl":19761," Bo":26726," Br":56435," Bu":24947," Bw":10815," By":49104," Ca":120911," Ce":66408," Ci":4665," Ch":47285," Cl":13991," Cr":41618," Co":52114," Cu":4731," Cy":336462," Cw":8435," Da":83350," Di":51598," De":55946," Dd":24619," Dr":14253," Do":17642," Dy":52256," Du":9735," Dw":4392," Ea":2216," Eb":7093," Ec":3834," Ed":13121," El":12185," Ei":18714," Eg":4955," Ef":13229," Et":3898," Es":7909," Er":29251," En":10082," Em":3156," Ew":22287," Eu":3440," Ev":3993," Fe":52438," Fa":17460," Wy":8863," Wr":10593," Wo":5603," Wl":2195," Wi":24683," Wh":3073," We":55056," Wa":15114," Y ":51971," Ys":28919," Yr":86693," Yo":2066," Yn":102547," Ym":25969," Yc":2871," a ":830478," Tŷ":1928," R ":2099," Gŵ":1552," Ow":7608," Os":15855," Or":7771," Po":23478," Pl":14184," Pi":5403," Ph":9643," Pe":60810," Pa":41679," Nw":4046," Ny":1586," Nu":1468," No":23771," Ol":5537," On":7732," Oh":2023," Og":3807," Of":2088," Oe":13006," Ob":1522," Ra":12355," T ":1831," Mô":8084," Qu":2176," Ro":43958," Re":10555," Ri":8220," Rh":86232," Py":1882," S ":2106," Pr":53766," Pu":5103," Pw":24712," Sy":9580," Sw":30728," Su":12731," St":29121," Ta":24600," Th":31386," Ti":9036," Te":13995," Tr":35797," Ts":3677," To":17329," Rw":4999," Ry":6829," Ru":6903," Sb":4209," Sa":60443," Sg":4461," Sh":7649," Si":41046," Sc":4619," Se":32084," So":13916," Sp":3220," Sl":2025," Sm":1574," Uw":3258," Va":5509," Ve":2889," Vi":4990," Vo":1509," Tu":6235," Tw":3410," Ty":9645," Uc":8498," Un":36734," Ur":4373," im":11191," in":76482," il":4354," is":17036," m ":11207," gâ":2361," gy":546263," gw":478286," ha":96076," he":162837," gi":11285," gl":41252," gr":78474," go":196462," gu":1711," hy":365311," dô":5426," ia":51916," id":44170," ic":11451," if":15524," ie":33757," hi":37580," dé":1483," ho":95011," dî":1523," hu":28386," hw":135056," nh":11270," ni":121995," ng":46712," ne":182669," na":145170," my":52136," mw":82991," mu":7930," mo":53753," ol":23717," on":100399," og":16388," oh":48015," oc":8291," od":15636," oe":180132," of":38817," ob":2855," nw":4888," ny":5794," no":39883," le":65827," li":10695," n ":431744," la":42246," gê":2462," km":13885," me":211425," mh":11846," mi":65182," ml":11455," dŵ":4098," o ":734485," ma":374621," lu":4706," lw":8217," ly":12507," ll":290079," lo":8575," ae":23414," af":26254," ag":69810," ab":5223," ac":313769," ad":158208," am":308430," an":165543," ap":9179," ai":35372," al":81881," au":70585," aw":67212," ar":754053," at":122334," as":18494," d ":1572," ba":74237," bi":7903," be":114726," bo":279521," bl":71311," by":219304," bw":77585," bu":51690," br":137949," ca":253292," e ":6846," c ":1834," er":135688," et":35799," es":9572," en":103379," em":1894," ei":422771," el":25990," ef":56847," eh":7557," eg":29718," fe":207343," fa":114986," eu":135140," ew":2978," fu":28438," fr":21022," fo":146885," fl":40338," fi":19106," ff":161699," ge":127960," câ":2638," ga":511622," i ":828084," fy":138094," fw":49249," cl":32411," cn":1591," co":109371," cr":63415," ce":72686," ch":221519," ci":5186," da":218944," cu":2628," cw":37840," cy":626409," do":40629," dl":2183," dr":194410," de":169791," dd":436087," di":205363," ec":22162," ed":21781," ea":10130," dw":38607," du":15808," dy":134445," yn":1820343," ym":302114," yw":194442," yr":378078," ys":129891," pŵ":1449," yf":2310," yc":29594," yd":132913," tî":3180," wy":119723," sô":8613," tâ":4127," tŷ":1782," ry":22726," rw":2339," u ":39430," sa":81007," sb":3121," se":119945," si":112421," sg":18999," so":6324," mô":5938," ra":82908," re":37316," ri":5735," rh":384202," ro":65984," pw":59223," pu":5536," pr":117209," hŷ":2144," s ":4882," py":4098," os":35170," op":3331," or":31743," gŵ":2020," r ":896554," pe":127692," pa":91740," pl":41516," po":93931," pi":2900," ph":53142," wa":70353," we":305205," rô":8100," y ":1258970," wr":72987," wo":2162," wi":14944," wn":75369," wl":23728," uw":16002," uc":29790," w ":35333," ty":27053," tw":9262," tu":42804," ur":3073," un":179183," ta":58107," sw":41490," sy":328174," st":56328," su":17891," tr":125613," tl":5636," pê":2394," to":14311," th":49752," ti":12364," te":54434," â ":170861," ôl":68336," ŵy":5585,"BC ":3029,"AC ":1466,"AM ":3083,"AQ ":4611,"Fe ":14074,"Fel":30024,"Fen":1555,"Fer":1865,"Ffo":3170,"Ffl":4215,"Ffi":3556,"Ffe":3739,"Ffr":13232,"GA ":3651,"Faw":4451,"Fan":1616,"Far":2499,"Fai":1542,"Erb":3816,"Ess":3450,"Est":1737,"Eth":2283,"Ers":2768,"Ery":2581,"Esg":1785,"Eur":1788,"Eva":3195,"Ewr":21546,"Eid":3977,"Ein":2345,"Eis":4120,"Ele":2715,"Eli":4180,"Er ":16830,"Enw":3109,"Eni":2851,"Gel":9307,"Gem":2143,"Gei":4118,"Ger":7781,"Geo":3979,"Gen":6710,"Gla":4829,"Gib":2275,"Gil":1484,"Gan":11832,"Gal":17640,"Gar":7461,"Gae":18125,"Gad":5081,"Fro":1533,"Fyn":2428,"Fra":4838,"Fre":4327,"For":3219,"Fod":28712,"II ":4458,"Hil":2449,"Hin":1512,"IG ":5986,"Hef":1863,"Hel":5017,"Hed":3459,"Heb":2210,"Hea":1530,"Hen":9801,"Her":3491,"Hae":1946,"Haf":3014,"Hal":2040,"Han":6863,"Ham":2228,"Har":10025,"Haw":1645,"Gyr":1398,"Gym":42845,"Gyn":8266,"Gyd":5561,"Gyf":6063,"Gwr":3207,"Gwy":26844,"Gwe":57637,"Gwa":19610,"Gwo":1510,"Gwn":13119,"Gwl":15264,"Gwi":2629,"Gre":5983,"Gri":4784,"Gra":8936,"Gru":3564,"Gro":6225,"Gle":2868,"Gly":8806,"Gol":4717,"Gom":2792,"Gor":25978,"HS ":1490,"Gog":9938,"Gof":6071,"Goc":1412,"Gob":6546,"Ins":2483,"Ion":7787,"Ind":4805,"Idd":1600,"Iec":7293,"Ieu":3657,"Iai":2532,"Hyw":1599,"Hyd":9879,"Hut":3728,"Huw":2414,"HuM":1938,"Hug":2444,"Hon":1988,"Hol":2628,"Hof":4468,"Arg":4337,"Arf":2821,"Arc":4451,"Ard":3004,"Ara":5312,"Arb":2881,"Arm":1401,"Arl":2436,"Ari":2551,"Ath":3701,"Ast":2350,"Asi":4771,"Art":3397,"Arw":3824,"Aws":8386,"Awd":3332,"Bal":3216,"Ban":6574,"Bac":2192,"Bae":1627,"Bar":11218,"Bat":2986,"Bas":1933,"CC ":30118,"Aca":1583,"Abe":16172,"Act":3953,"Ach":1406,"Adr":4894,"Add":8121,"Ade":2895,"Aet":1896,"Ael":29221,"Afo":11935,"Aff":4708,"Aif":1390,"Ail":6673,"Am ":2367,"Ala":1841,"Alb":20867,"Ali":1390,"Ale":3047,"Alu":7537,"Alm":6621,"All":2960,"Amg":4897,"Ame":9269,"Amc":5055,"Ama":2394,"Amw":5293,"Ang":3572,"Ana":1441,"And":6722,"Ant":2809,"Ann":4580,"Ar ":18871,"But":1453,"Bus":2726,"Bur":3568,"Bu ":12249,"Bry":11840,"Brw":1809,"Byw":5055,"Byd":42228,"Bwy":3014,"Bwl":1803,"Bwr":4800,"DA ":1963,"Cab":3582,"Cad":8785,"Cae":18704,"Cal":3378,"Cam":3915,"Caf":13882,"Cai":8913,"Cas":11929,"Car":18471,"Cat":4229,"Can":20111,"Cap":2054,"Caw":2412,"Bea":1452,"Bet":6615,"Ber":4938,"Ben":8920,"Bel":4071,"Bei":4562,"Bed":2850,"Bla":15401,"Bre":7230,"Bra":4787,"Bro":7561,"Bri":21486,"Bod":1392,"Bon":2309,"Bor":1693,"Bot":9215,"Bou":3998,"Cyd":2800,"Cys":3130,"Cyr":1535,"Cyt":4308,"Cyn":107375,"Cym":169607,"Cyl":4078,"Cyf":35541,"Cyh":6033,"Cwm":4388,"Cwp":1455,"DdG":3636,"De ":7240,"Dey":7530,"Der":4792,"Dew":2912,"Del":1804,"Dem":6525,"Den":3029,"Dea":2964,"Dec":4054,"Ded":4019,"Def":3522,"Ddy":1483,"Ddw":1635,"Ddu":1844,"Ddi":7938,"Dda":2772,"Dde":7404,"Dan":4200,"Dar":5106,"Dat":11996,"Dav":29514,"Daw":5636,"Daf":5736,"Dae":6708,"Dai":2416,"Dal":7568,"Cho":2344,"Chr":5432,"Che":5180,"Chi":3022,"Chw":12405,"Chy":6459,"Cle":1841,"Cla":3349,"Cei":26151,"Cef":5771,"Cel":5461,"Cen":16383,"Cer":9603,"Cha":10397,"Cri":3356,"Cra":2677,"Cre":11529,"DU ":12564,"Cry":10370,"Cru":2605,"Cro":10328,"Cly":2509,"Clw":2729,"Clu":1384,"Coc":1662,"Cof":5253,"Cod":3452,"Coe":3011,"Cor":9729,"Com":7711,"Col":7269,"Con":7331,"Dyw":10945,"Dyd":1433,"Dym":6530,"Dyn":13135,"Dys":3789,"Dyf":6316,"Dyl":8043,"Dwy":3623,"Egl":4375,"Efr":2334,"Efy":3102,"Ei ":5214,"Efa":5477,"Edr":1682,"Edw":8804,"Eco":3291,"Ebr":6340,"Dis":10684,"Dir":8301,"Dio":8272,"Din":7477,"Dim":3122,"Dil":1632,"Dig":1922,"Diw":4850,"Dur":1585,"Dro":1914,"Drw":2051,"Du ":1598,"Dre":3548,"Dra":2578,"Dr ":1404,"Dos":6032,"Dol":2547,"Don":1750,"Dor":1804,"Ne ":2808,"Nat":5045,"Nid":35510,"Nic":6041,"Ngh":73349,"Ni ":20845,"Ngw":2819,"Ngo":3145,"New":11314,"Myn":6475,"Nan":3487,"Nad":1763,"Nwy":4018,"Nor":10406,"Nof":5476,"Nod":3573,"Oes":12374,"Ogw":2061,"Ohe":1667,"Owa":3576,"Owe":3922,"Oly":2697,"Oni":2962,"Ond":3507,"Os ":13940,"Orl":2808,"Pla":11557,"Phi":3702,"Per":4523,"Pet":10052,"Pen":38815,"Pe ":2342,"Pat":1391,"Pas":1419,"Par":10602,"Pau":3598,"Pan":9885,"Pam":1963,"Pal":1960,"Gŵy":1393,"Pa ":5409,"Pwy":22571,"Pug":1781,"Pro":4264,"Pri":32126,"Pre":2572,"Pry":13518,"Pob":2584,"Pol":1666,"Pon":3230,"Pos":1652,"Por":4501,"Pow":6369,"Rad":1677,"Ran":3290,"Môr":3910,"Môn":4178,"Ise":2138,"Iri":5405,"Isl":2286,"Isr":2081,"Is ":1861,"Ira":2109,"Iwe":7768,"Jac":2440,"Jap":2066,"Jan":11107,"Jam":3046,"Jen":4444,"Jos":1675,"Jon":21672,"Joh":12108,"Joc":1759,"Kar":1967,"Ken":1525,"Kir":4162,"Kin":2076,"LWa":1851,"Lew":3371,"Lep":4312,"Leo":4883,"Lei":2167,"Lea":1755,"Law":3401,"Laf":7238,"Lan":4581,"Lli":2308,"Llo":49625,"Lla":35769,"Lle":9036,"Lly":83139,"MP ":1987,"Llw":5196,"Llu":10289,"Lin":2565,"Lit":1660,"Lun":3403,"Loe":2587,"Loc":3759,"Lor":1686,"Lon":1463,"Lyw":14731,"Lyn":2021,"Mei":2923,"Meh":6831,"Men":2309,"Mel":4571,"Mes":10425,"Mer":5529,"Mew":6590,"Met":2031,"Mea":2200,"Med":8411,"Man":6320,"Mal":3191,"Mar":18396,"Mas":2050,"Mag":1798,"Mad":2418,"Mae":282353,"Mai":8315,"Mac":3022,"Mab":1620,"Mat":8080,"Maw":10696,"Moe":1836,"Mon":4407,"Mos":1676,"Mor":14383,"Mik":1997,"Mid":1494,"Mic":6978,"Mil":4139,"Min":1665,"Mhe":1924,"Mha":1542,"Mhr":3233,"Mho":2381,"Mun":2391,"Mur":2093,"Mrw":1525,"Wyd":2054,"Wyn":4508,"Wrt":6093,"Wre":3327,"Woo":1886,"Wla":1533,"Whi":1964,"Wil":18384,"Win":1533,"Wed":6925,"Wei":36912,"Wel":2589,"Wer":1546,"Wes":3389,"Wen":1975,"War":1988,"Wat":1997,"Wal":6169,"Wa ":1629,"épa":1444,"ên ":2198,"êm ":2653,"êl ":5579,"Ysb":3159,"Ysg":20301,"Yst":5213,"Yr ":86644,"Ym ":5361,"Yn ":82879,"Ymd":5797,"Yme":3234,"Ymg":1462,"Ymh":3610,"Yma":1499,"Yng":7777,"Yny":9396,"Ych":2873,"Syl":2023,"Sym":1457,"Syr":3939,"Swy":26172,"Swe":1556,"Sut":3192,"Sue":3189,"Str":4479,"Sto":4500,"Sta":4281,"Ste":11318,"Tei":2519,"Tel":1449,"Tan":1640,"Tar":1679,"Tal":5919,"Taf":2854,"Tac":5838,"Shi":1462,"She":1962,"Sha":2447,"Sim":1534,"Sir":17352,"Sin":1925,"Sia":11038,"Ser":3627,"Sgi":1542,"Sen":8777,"Sel":1981,"Sei":5257,"Sef":7830,"St ":2475,"SoD":3626,"Sou":1862,"Son":3211,"Rws":4266,"Ryd":2876,"Ryf":1897,"Rus":1456,"Ruf":3021,"Saf":4701,"Sai":8547,"Sam":1614,"Sal":2390,"Sac":1638,"Sae":19137,"Sch":1740,"Sar":1996,"San":14834,"Sba":3834,"SI ":1912,"Rho":14367,"Rhu":7735,"Rhi":4012,"Rha":27242,"Rhe":10007,"Ric":5194,"Rhy":21050,"Rhw":1595,"Rei":1456,"Roe":22967,"Rog":2273,"Rob":6038,"Ros":2400,"Val":3027,"Und":6300,"Une":8975,"Uni":1941,"Uno":6597,"Un ":11013,"Urd":3449,"Uwc":3250,"Twr":1583,"Tyn":1779,"Tyw":3140,"Uch":8478,"Ter":1904,"Tha":2583,"The":14207,"Tho":9841,"Thr":2820,"Tim":1714,"Tir":2224,"Top":2729,"Tor":4968,"Tom":2781,"Ton":1704,"Tou":1744,"Try":3291,"Tro":2928,"Tri":2467,"Tre":17818,"Tra":7618,"Tsi":2930,"Tud":2739,"bl ":108610,"biw":4303,"bis":1554,"bio":12210,"bil":7023,"bin":7207,"bo ":4437,"bly":64984,"blw":6354,"blo":22974,"ble":40629,"bli":7754,"blh":3186,"bla":43862,"bod":220071,"bob":76863,"bol":13954,"boe":2630,"br ":20666,"bon":13209,"bor":7226,"bot":2899,"bos":12396,"bou":1567,"bbo":2130,"be ":1980,"ban":30725,"bal":2700,"bai":11039,"bae":7107,"bac":11983,"baw":4752,"bau":11598,"bat":3598,"bas":7334,"bar":67699,"bi ":5133,"bei":27322,"beg":1762,"bed":17621,"bec":2601,"ber":53727,"ben":77191,"bel":21273,"bet":33514,"áu ":7630,"bia":9858,"byw":30592,"ca ":12587,"car":23845,"cas":8765,"cat":2777,"cau":3853,"can":83151,"cap":2655,"caw":3764,"cac":2094,"cae":84897,"cad":27764,"cam":16483,"cal":6269,"caf":14867,"cai":16996,"ce ":10178,"bri":40394,"bro":46496,"bra":12765,"bre":20136,"bry":38825,"bu ":33518,"brw":4007,"bur":5048,"bun":1597,"bum":2930,"bud":16965,"bus":15423,"by ":3295,"bwe":2992,"bwl":4609,"bwm":1750,"bwn":2420,"bwr":17316,"bwy":85191,"byd":138264,"byc":8029,"byn":105121,"byg":18794,"bys":7434,"byt":14453,"byr":14486,"am ":223910,"ake":2598,"al ":118723,"ail":88553,"ain":127939,"air":41299,"ais":49555,"ait":146887,"aig":9597,"aif":61819,"aid":215030,"aic":3098,"aho":2264,"agw":13994,"aha":34404,"agl":20938,"agf":7616,"agi":1866,"agr":2216,"agu":2103,"agn":2736,"ago":36762,"aol":8695,"aod":3349,"anw":35530,"anu":8926,"any":11333,"ano":100125,"ann":124935,"anm":2621,"ant":194347,"ans":34988,"anr":40810,"ane":76121,"anf":37821,"ang":138700,"anh":9277,"ani":73492,"ank":2431,"anl":26962,"ap ":8824,"ana":93564,"anb":13876,"anc":31617,"and":73259,"amw":8200,"amm":1546,"aml":38063,"amo":8733,"amp":12021,"ams":38011,"amr":14056,"amh":7193,"ami":4294,"amg":18088,"amd":22133,"ame":8313,"amb":16368,"amc":6627,"ama":26992,"aly":1799,"alw":28958,"alu":16444,"alt":2910,"als":1463,"alo":23556,"alm":1484,"all":223132,"ali":33870,"alc":16136,"ald":4188,"ale":30849,"alf":2478,"ahâ":2916,"ala":25578,"alb":3898,"an ":601606,"aba":9347,"abe":7222,"abi":6876,"abl":7168,"abo":19005,"abw":3992,"aby":9040,"ae ":500797,"aca":2574,"ad ":471484,"âr ":2718,"ânt":3622,"ac ":289950,"ab ":10475,"afn":10073,"afo":78808,"afr":10021,"aff":39961,"afe":2749,"afi":6417,"afl":25560,"ai ":234297,"aga":3364,"agd":2687,"age":10175,"afu":15963,"afw":7114,"afy":4556,"aeo":10243,"aen":99852,"aem":1827,"ael":201346,"aes":35220,"aer":50881,"aeg":25434,"aed":23890,"ah ":2434,"afa":19107,"afb":7029,"aew":2978,"aet":558993,"ado":61480,"adr":43817,"adl":44992,"adn":22991,"adi":13872,"âu ":2581,"add":96884,"adf":11029,"ade":56818,"aea":17754,"ag ":103800,"adw":77598,"adu":25844,"aco":1839,"ack":6984,"aci":3315,"ach":128622,"ace":4207,"ada":183664,"af ":282976,"act":12156,"acs":3551,"awy":14858,"atá":5909,"atâ":1572,"âi ":1763,"ân ":12449,"ba ":4022,"âl ":3716,"at ":82516,"arh":23505,"arg":39851,"arf":54518,"are":26707,"ard":84561,"arc":45936,"arb":37300,"ara":94572,"arp":37146,"aro":35640,"arn":81808,"arm":3073,"arl":28705,"ark":5712,"ari":88773,"aru":32539,"arw":69199,"arr":14786,"ars":4259,"art":96475,"au ":1046405,"asa":68559,"ary":11581,"asg":34259,"asi":17231,"ash":2200,"ase":12910,"aso":39583,"asn":11022,"aon":5874,"ar ":523204,"apa":4730,"ape":4263,"api":3039,"apu":8213,"as ":105000,"avi":30525,"ave":3478,"awb":10045,"awe":56474,"awd":64555,"ay ":7450,"awa":7380,"awr":88658,"aws":26002,"awu":8536,"awn":86269,"awo":7616,"awl":31176,"awf":5005,"awg":8609,"awi":6112,"atb":44447,"ata":26686,"asu":5959,"ast":31776,"ass":3705,"asy":2064,"asw":2164,"atr":12767,"ato":14863,"ate":127341,"âd ":2953,"ati":23617,"atg":39033,"ath":60146,"aw ":36271,"att":2841,"ats":1593,"atu":14332,"atw":3787,"aty":5073,"aul":6336,"aun":2939,"aur":3535,"aus":4165,"Tŷ ":1917,"itl":3296,"itr":3898,"itt":3287,"ity":2118,"iw ":27157,"ism":1479,"iso":19060,"isn":3234,"iss":2386,"ist":40808,"isw":4736,"isy":1650,"ita":5607,"ite":5770,"ith":392067,"iti":13491,"iwe":61557,"iwc":5902,"iwa":2911,"iwl":7051,"iwm":5821,"iwi":1939,"iwg":2218,"ius":6220,"ium":1735,"iva":1668,"ive":4125,"ips":2042,"ipi":4074,"ipy":2214,"is ":122562,"ion":238019,"iop":3639,"ior":2183,"iog":25807,"iol":103945,"iom":3562,"ir ":294219,"irw":8004,"irs":4748,"iro":22294,"irp":10769,"irn":10886,"irl":2635,"iri":73365,"isi":88927,"ish":14530,"isg":40104,"ise":9048,"isc":1593,"isa":5639,"iry":7836,"irf":10853,"ire":9324,"irg":2216,"ira":3882,"ird":5855,"it ":3539,"iyn":24711,"iwy":51891,"iwn":59547,"iwt":3364,"iwr":22308,"isï":6130,"kin":4185,"km ":12530,"ki ":1558,"ker":2181,"gân":1978,"ke ":5013,"gîl":3846,"ks ":2185,"gêm":2399,"ka ":1865,"ûm ":2086,"ha ":7052,"ham":23057,"han":175332,"hao":9882,"hap":3998,"hai":110980,"hal":15683,"hau":183675,"haw":26413,"har":53834,"has":47801,"hat":7168,"haf":50664,"hae":27487,"hag":43957,"had":23600,"hac":2354,"hbl":3227,"hbe":1560,"he ":24732,"hdr":5934,"hda":4116,"hde":15547,"hel":62200,"hei":34542,"heg":5488,"hef":77520,"hec":4466,"hed":62198,"hea":5713,"heb":18613,"hew":2495,"heu":8975,"het":5133,"hes":32088,"her":56901,"heo":28941,"hen":50971,"hem":4532,"hfi":1753,"hfe":1827,"hfa":10039,"hi ":46339,"hfy":2014,"hga":10718,"hgy":5492,"hgr":2128,"hig":9778,"hif":7727,"hie":4406,"hic":1408,"hia":52147,"hio":82343,"hin":19845,"hil":13596,"hiw":5818,"his":2712,"hit":4169,"hir":27145,"hn ":11023,"hla":8657,"hle":8160,"hli":7323,"hlo":9522,"hlu":4297,"hlw":5128,"hly":5405,"ho ":2690,"hma":1566,"go ":9041,"glw":22832,"glu":20270,"gly":15650,"glo":12695,"gle":62910,"gli":17036,"gla":12914,"gog":27008,"gof":47564,"goe":5103,"god":27033,"goc":1814,"gob":13416,"gno":2138,"gni":1566,"gne":1625,"glö":2290,"ghŷ":5306,"gr ":45679,"goi":3061,"goh":1450,"gom":3052,"gol":150147,"gon":23871,"gop":6979,"gos":50988,"gor":146456,"got":1926,"gu ":82293,"gro":16743,"gry":25523,"grw":30224,"gru":1887,"gra":32649,"gri":46257,"gre":21573,"gto":3324,"glŷ":8055,"gue":3937,"gwm":8230,"gwl":41207,"gwo":1468,"gwn":53470,"gwi":23351,"gwe":153211,"gwa":134710,"gwb":10043,"gur":10624,"gus":3658,"gyb":7452,"gyc":1560,"gyd":108608,"gyf":227201,"gyh":26111,"gyl":38768,"gym":62238,"gyn":132840,"gwr":40683,"gwt":1815,"gwy":112861,"grŵ":6194,"gyw":6050,"gyt":10218,"gys":38715,"gyr":27902,"iai":68448,"iam":28216,"ial":8742,"ian":170221,"ias":4394,"iar":21162,"iau":160853,"iat":11761,"iaw":38916,"ic ":6466,"iac":3494,"iad":518957,"iae":147014,"iaf":29926,"ibl":17312,"ibi":7734,"ibr":1406,"id ":289494,"iba":1702,"ibb":2227,"ibe":6370,"ia ":45885,"ib ":1986,"iet":3765,"ieu":2995,"iel":6330,"ien":9286,"ier":3745,"ies":20092,"ied":56622,"iei":12758,"ig ":263227,"iec":40455,"ifw":1504,"ify":12476,"ifo":38860,"ifr":12857,"iff":83949,"ife":69059,"ifl":2383,"ifi":33926,"ifd":6564,"ifa":33210,"icw":1931,"icr":58727,"ict":3075,"ico":4065,"ick":10871,"icl":2507,"ici":5011,"ich":67610,"ice":5312,"ie ":6756,"ica":24075,"iby":15215,"idy":13693,"idw":13164,"ids":2799,"idr":2377,"ido":84771,"idl":9715,"idi":42134,"idg":2023,"idf":1384,"ide":21066,"idd":167625,"ida":8532,"if ":111235,"il ":51770,"im ":30833,"ige":4001,"iga":12988,"ii ":1929,"igl":1926,"igh":5434,"igi":27928,"igu":9179,"igr":12369,"igo":33990,"ign":1744,"igy":2377,"igw":25763,"iha":10220,"imo":1905,"iml":10188,"ime":4271,"imi":12632,"ip ":2132,"inc":35034,"ind":13272,"ina":47117,"inb":3553,"inn":55876,"ino":32650,"int":53913,"ins":9108,"inf":1442,"ine":31855,"ing":30270,"ini":144723,"inl":1674,"iod":49289,"ioe":9726,"inw":4435,"iny":18916,"ike":2441,"ila":30156,"ilb":1450,"in ":265607,"ilo":8695,"ilr":2211,"ill":127954,"ilm":12277,"ilg":3984,"ili":76010,"ild":6621,"ilf":16113,"ile":18463,"ima":3325,"io ":289985,"ilw":10819,"ily":31080,"ils":2239,"ilt":1658,"hs ":2295,"hpw":3958,"hr ":7968,"how":2490,"hol":119579,"hom":12153,"hon":90998,"hog":8194,"hoi":31688,"hos":29540,"hou":1629,"hop":1896,"hor":31875,"hob":14250,"hof":10190,"hoe":76914,"hod":33780,"hoc":3799,"hni":2220,"hno":22182,"hmy":2484,"hna":15241,"hne":3112,"hug":1558,"huf":6768,"hud":4015,"hua":1767,"hub":3570,"hw ":4998,"hto":1904,"hu ":49728,"hry":11207,"hrw":2709,"dîm":1511,"hro":19232,"hre":66068,"hri":21764,"ht ":2488,"hra":53668,"hyf":52057,"hyh":2218,"hyg":5523,"hyb":8387,"hyd":135856,"hyc":4670,"hyn":321330,"hym":108203,"hyl":31096,"hwr":4551,"hwy":87925,"dôn":5372,"hy ":6235,"hwa":48372,"hwc":1503,"hwe":32713,"hwi":29986,"hwn":155040,"hwm":2940,"hum":3886,"hun":26480,"hus":5581,"hur":11397,"hyt":13007,"hys":19663,"hyr":32298,"hyw":53451,"ffu":31255,"fft":26915,"ffw":14834,"ffr":49438,"fi ":22805,"ffy":37874,"ffe":74696,"ffa":45191,"ffl":2596,"ffo":87611,"ffi":52404,"fet":3688,"fes":16994,"fer":169521,"fey":13730,"few":12993,"fec":3355,"fed":42340,"fen":64928,"fel":169733,"fei":77800,"fia":40768,"fha":2006,"fbw":7025,"faw":26795,"fau":8610,"fas":4389,"fat":49299,"far":64220,"fao":2062,"fam":3661,"fan":90668,"fal":61308,"fai":47413,"fag":3038,"faf":2234,"fae":34347,"fad":4691,"fac":8237,"fab":6835,"ff ":69555,"fdd":6405,"fe ":15854,"fa ":55971,"eyr":15759,"eys":6216,"eyd":12355,"exa":1465,"ewy":65140,"ews":1768,"ewr":3606,"eta":6194,"ete":11955,"eti":8354,"eth":694907,"etl":3716,"esn":18031,"eso":14039,"est":84304,"esu":36481,"ess":5687,"esy":14685,"esw":8363,"eua":14789,"eue":2639,"eud":123634,"eul":24078,"euo":18520,"eun":5305,"eto":15157,"etr":8086,"ets":4905,"ett":8442,"etw":1998,"ety":2010,"ew ":9767,"eve":3548,"evi":2084,"euw":2490,"eut":7771,"eur":3959,"eus":7690,"ex ":4617,"ewi":73442,"ewc":12139,"ewe":1562,"ewo":1850,"ewn":148037,"ey ":13114,"ewa":1725,"epi":5381,"eph":2880,"er ":422635,"eor":6531,"eol":111949,"eon":27594,"es ":210327,"erl":12508,"eri":91394,"erg":8320,"erh":2615,"ere":19871,"erf":46260,"erc":20516,"erd":46688,"era":89973,"erb":57683,"et ":16065,"esg":10116,"esi":21447,"esb":3967,"ese":18221,"eu ":234659,"esa":40315,"ery":19213,"eru":8702,"erw":39257,"err":9221,"ert":82663,"ers":45260,"ern":17311,"erm":24808,"erp":2000,"ero":29348,"en ":226035,"ela":23185,"eld":45521,"elf":13664,"ele":31547,"eli":27149,"elg":3855,"ell":186956,"elo":57857,"elp":12214,"elu":10714,"els":4599,"elt":6398,"ely":21849,"elw":32426,"eo ":1957,"emb":1626,"ema":27011,"eme":5766,"emo":14994,"emi":5124,"emp":1703,"emy":1397,"enf":12468,"ene":63851,"enh":14016,"eng":36123,"enb":2204,"ena":18298,"end":42838,"enc":10021,"eno":47452,"enm":1999,"enn":152654,"enl":2876,"eni":58550,"enw":78572,"enu":5709,"ens":17384,"ent":99268,"enr":8307,"eny":15492,"eoe":12895,"eod":2681,"egl":13651,"ego":34964,"egn":1555,"ege":6483,"egi":20825,"eha":8653,"egr":51439,"egu":7476,"egw":11198,"egy":3765,"eho":1591,"ehe":10265,"ek ":1464,"eib":5369,"eic":44753,"eia":10726,"eis":78703,"eir":116822,"eim":11019,"eil":72219,"ein":209835,"eih":8539,"eid":87601,"eig":15428,"eif":19864,"el ":372965,"eit":227004,"em ":45721,"gl ":8528,"gis":14755,"gir":4745,"giw":1525,"gil":17145,"gip":1439,"gin":3700,"gio":39495,"gid":2251,"gie":1537,"gig":2186,"gia":52084,"ghy":121718,"ghw":2295,"ghr":23072,"ght":4029,"gho":27460,"ghl":1424,"ghe":16945,"gha":18379,"gfy":6247,"gi ":33571,"gfa":1868,"gfe":8886,"cân":1810,"gen":111143,"get":2018,"ger":33433,"ges":6556,"gh ":5400,"ged":20081,"gei":28902,"geg":3171,"gef":17104,"gem":3427,"gel":39073,"gdd":3282,"ge ":8402,"gbi":2600,"gac":4164,"gad":26323,"gae":134740,"gaf":12507,"gai":29586,"gas":4535,"gar":39437,"gau":19803,"gat":2266,"gaw":5037,"gam":9613,"gal":81263,"gan":349053,"ga ":3455,"fyw":14077,"fyt":1464,"fys":13441,"fwy":54963,"fwr":24223,"fyr":32389,"fyl":23029,"fyn":110487,"fyg":10929,"fyd":186182,"fyf":2810,"fur":37788,"fus":5753,"fwn":11199,"fy ":34929,"fta":2433,"fti":3720,"fud":15215,"fua":3656,"ful":1490,"fun":7356,"fug":1943,"ft ":22715,"fra":51653,"frg":3232,"frd":1806,"fre":60623,"fri":60159,"fu ":21619,"fro":44672,"fru":1678,"frw":12026,"fry":26333,"for":98563,"fos":2404,"fot":2236,"fon":44680,"fol":37771,"fr ":9501,"öyn":2600,"fna":5141,"fne":3267,"fnd":3118,"fnf":2107,"fnu":4897,"fni":17053,"fno":61820,"fod":269071,"foc":1640,"fog":4206,"foe":12090,"fny":50847,"fle":57797,"fn ":23285,"fla":32104,"fli":5176,"flu":2782,"flo":12807,"fo ":8741,"fly":15244,"flw":53054,"fid":2843,"fie":8072,"fig":12205,"fil":26125,"fin":25988,"fio":35785,"fir":3345,"fis":6813,"fit":3614,"fiw":3846,"da ":110490,"dd ":1477253,"de ":26329,"dbw":2382,"dad":34580,"dal":74018,"dai":102403,"dag":8330,"dae":63510,"daf":28823,"dat":79262,"das":16177,"dar":103592,"dan":79963,"dam":6340,"daw":14222,"dau":293256,"dda":258712,"dde":137692,"ddf":57032,"ddg":1492,"ddh":7085,"ddi":485523,"ddl":8742,"ddo":292072,"dds":12690,"ddr":9994,"ddu":67078,"ddw":123638,"ddy":160807,"df ":9080,"cul":1415,"ctr":2570,"cto":27970,"cti":6692,"cte":1554,"cta":12152,"cwl":2358,"cwm":14836,"cwr":4675,"cws":1941,"cwe":12279,"cwb":3579,"cwa":1956,"cus":1837,"cym":87796,"cyl":22094,"cyh":35906,"cyf":177359,"cyt":17926,"cys":21905,"cyr":11925,"cyn":218403,"cyd":30782,"cyc":4381,"cwy":3394,"cyw":3704,"cla":5831,"cle":10053,"clu":21575,"clw":4425,"cli":4923,"clo":4478,"cly":7408,"co ":4403,"coh":1780,"cod":19541,"coe":5573,"cof":9302,"coc":2985,"con":33774,"col":15359,"com":10785,"cor":15348,"cos":8474,"cop":28345,"cot":2350,"cr ":7971,"cs ":1718,"ct ":9647,"cre":25913,"cra":16391,"cri":3869,"crh":44596,"cru":1640,"cro":18038,"crw":6875,"cry":13564,"csa":3697,"cso":2895,"csi":3457,"ch ":387720,"cer":12762,"ces":1879,"cen":12324,"cel":7425,"cef":18982,"cei":27071,"ced":2754,"ci ":2300,"cha":83305,"chd":17277,"chw":118760,"chu":29104,"chy":130786,"cia":11444,"ck ":16479,"chg":9358,"che":62787,"chf":6666,"chl":11299,"chi":60597,"cho":54885,"chm":3020,"chn":17832,"cht":1484,"chr":49582,"ciw":1511,"cil":2269,"cig":1657,"cis":2121,"cin":1623,"cio":4546,"cip":1810,"cke":1619,"ed ":214352,"eba":13413,"ebe":6425,"ôn ":16823,"ebi":9909,"ebl":2268,"ebo":8209,"ebr":1680,"ebu":15491,"ebw":2965,"eby":18028,"eac":1537,"eag":1964,"eaf":3149,"ead":8251,"eai":10078,"ean":17065,"eal":20923,"ear":16517,"eat":6423,"eau":2544,"ôl ":76461,"eb ":101834,"ea ":3365,"efi":22738,"efn":124494,"ôr ":11345,"efo":7601,"efa":25470,"efe":21832,"eff":45413,"efy":131474,"ei ":286301,"ega":31939,"efr":6364,"eft":2356,"een":4284,"ees":1420,"eet":1564,"edl":50591,"edi":359651,"edd":749953,"ede":12372,"edf":5588,"eda":55389,"eg ":123528,"edw":24074,"edy":23468,"ônt":5425,"edu":42143,"edo":40266,"edr":25455,"ech":101594,"eci":1577,"eca":2510,"ee ":3216,"ef ":110622,"ecy":2620,"ecw":1473,"ect":38237,"ecs":5066,"ecr":2112,"eco":26895,"dyg":15248,"dyh":1417,"dyf":26975,"dyl":46528,"dym":70188,"dyn":99844,"dys":59049,"dyr":6354,"dyd":76050,"dyc":34668,"dyb":3305,"dwy":146331,"dwi":8446,"dwe":48518,"dwc":13218,"dwr":24472,"dwm":2562,"dwn":30086,"dwl":10851,"dwa":30525,"dy ":46819,"duw":2993,"dur":48569,"dus":35809,"dyw":30318,"dor":33505,"dop":5293,"don":25967,"dom":7441,"dol":259182,"dos":14598,"dr ":13114,"ds ":5982,"dna":26847,"dne":5245,"dno":10393,"doc":9952,"dod":124322,"doe":27190,"dog":102118,"dso":15419,"dw ":23914,"dun":1669,"dul":10560,"dug":2604,"dud":2961,"dri":30946,"dra":161835,"dre":56442,"drw":45270,"dry":28175,"du ":91895,"dro":110745,"dru":4316,"dgy":2105,"dha":8406,"dge":2999,"dic":3369,"did":20310,"dia":209861,"dib":9407,"der":119203,"des":10401,"det":2296,"deu":21010,"dew":20869,"dey":3681,"dfa":22781,"deb":69554,"dea":13246,"ded":35299,"dec":28845,"def":58015,"deh":3382,"deg":29115,"dei":94188,"del":21705,"den":26263,"dem":15362,"deo":23118,"dfu":3372,"dfr":9809,"dfy":4264,"dfw":12337,"di ":343588,"dfe":12376,"dfo":7225,"dle":36107,"dla":42355,"do ":65572,"dly":4536,"dlw":6934,"dlu":21986,"dlo":19459,"dli":13158,"dl ":24621,"diw":121291,"dim":26389,"din":77731,"dio":147213,"dip":2426,"dir":76164,"dis":29794,"dit":1587,"die":4163,"dif":40002,"dig":144690,"dih":1770,"dil":26660,"rgy":18304,"rhe":70719,"rha":251632,"rhi":11342,"rho":38863,"rfy":33803,"rfu":1652,"rga":18985,"ri ":69199,"rgl":5099,"rgi":2080,"rge":21537,"rgr":6072,"rgo":1666,"ret":16937,"res":54799,"reu":26897,"rew":6998,"rey":2073,"rfa":15148,"rfe":25711,"rff":46741,"rfi":14319,"rfl":1729,"rfo":41751,"rdu":1543,"rds":3609,"rdr":2188,"rdy":20437,"rdw":1665,"rg ":3366,"reb":4258,"rea":12241,"ree":4927,"ref":133959,"rec":21009,"red":130547,"rei":60987,"reg":12687,"rem":4531,"ren":43953,"rel":5532,"reo":10464,"rf ":6728,"rda":40210,"rdo":36701,"rdi":12516,"rde":16337,"rdd":152990,"re ":20437,"rby":56369,"rbw":1488,"rbr":3649,"rci":3956,"rch":112992,"rce":1610,"raw":40774,"rd ":19513,"rap":2670,"ras":11491,"rat":38876,"rau":102545,"rbo":4197,"rbe":37571,"rc ":5087,"rai":131520,"rah":5389,"rag":11266,"ran":123131,"ram":16979,"ral":34431,"rab":4126,"raf":74615,"rae":164083,"rad":66122,"rac":33820,"rpr":10686,"rs ":33187,"rpe":3712,"rpa":38052,"rr ":1410,"ror":7927,"ros":107066,"rot":6229,"rom":4057,"ron":48088,"roo":1626,"rop":23932,"rou":4505,"rov":1701,"row":5422,"rob":23818,"roa":2113,"rod":65494,"roc":6113,"roi":31757,"rol":80831,"rof":21090,"roe":88522,"rog":12801,"rno":22930,"rns":3524,"rnw":1636,"rnu":2159,"rny":10237,"rna":22378,"rng":2426,"rne":19996,"rni":12106,"rnh":11275,"rmo":4622,"rmw":7648,"rmy":2333,"ro ":54898,"rma":11870,"rme":2383,"rmi":13641,"rly":6595,"rlu":5557,"rlo":6770,"rll":39347,"rli":4565,"rle":7637,"rla":3212,"rn ":39795,"rks":1562,"rke":1486,"rm ":6483,"riw":9942,"rl ":2183,"rip":3073,"rio":118981,"rir":9993,"rit":13309,"ris":35757,"rig":15158,"ril":9399,"rin":61972,"rim":1705,"ria":206687,"rib":3285,"ric":21565,"rid":9651,"rie":47399,"rif":193163,"rhw":45472,"rhy":80288,"rk ":5015,"rwg":3630,"rwe":24948,"rwi":1687,"rwp":11620,"rwo":7066,"rwn":8458,"rwm":4787,"rws":4580,"rwr":4007,"rwy":222313,"ryb":4844,"ryc":37546,"ryd":130644,"ryf":17151,"rug":5479,"ruf":3376,"rud":2993,"ruc":1894,"rus":10019,"rut":3548,"rwa":12724,"rwc":4604,"rwd":3531,"ry ":20234,"rsi":9220,"rso":11498,"rsa":9355,"nïa":6814,"rse":4348,"rsy":4479,"rta":14492,"rst":6242,"rsw":1913,"rtn":12219,"rto":2978,"rte":6802,"rtf":2225,"rth":239362,"rti":7087,"rw ":14755,"rts":2811,"rtr":18873,"rt ":17422,"rro":2785,"rri":17884,"rre":7941,"rra":15237,"ru ":229203,"rry":5686,"rru":2402,"sac":1631,"sae":2799,"saf":62271,"sai":25064,"sal":3060,"sam":3928,"sba":15855,"sbe":7863,"sbi":2265,"sbl":1857,"san":70637,"sau":24838,"sar":3658,"saw":42580,"sa ":3554,"ryw":36415,"rys":31710,"ryt":1688,"ryr":1752,"ryl":4848,"rym":18611,"ryn":49541,"ryg":7741,"sha":2603,"sgu":19165,"sgw":18665,"sgy":14440,"she":2309,"shi":5072,"si ":25698,"sga":6215,"sgo":72408,"sgl":24788,"sgr":43324,"sge":4670,"sgi":15631,"siw":27410,"siy":17736,"sie":15940,"sid":3544,"sic":63165,"sib":19534,"sia":76060,"sit":3740,"sir":11832,"sis":3362,"sin":5101,"sio":63000,"sil":4613,"sif":2772,"sig":47581,"sbr":3897,"sbo":3621,"sbu":1443,"sby":15923,"se ":11827,"sch":1975,"sco":2461,"sex":4204,"ser":39757,"ses":23238,"set":6293,"sfa":3579,"sh ":16149,"sff":1898,"sg ":45629,"sei":15116,"seg":4502,"sef":70983,"sed":16516,"sec":20419,"seb":2546,"sep":1434,"sen":25624,"sem":2228,"sel":10605,"sol":35386,"som":9795,"son":34627,"sor":4687,"soe":25172,"sod":44247,"sog":8851,"soc":8028,"su ":11096,"srw":1495,"sra":2879,"siŵ":5591,"st ":33109,"ss ":5302,"sla":5549,"sle":6891,"sgî":3837,"sna":6964,"sni":3185,"sne":44663,"smo":2820,"smy":1876,"so ":9507,"sme":1611,"sws":1382,"swm":11357,"swl":5700,"swy":52041,"syd":111441,"syn":20130,"syt":1505,"sys":17787,"syr":1516,"syf":1428,"sym":29115,"syl":80321,"sse":8921,"ssa":2131,"sso":1595,"ssi":2920,"ste":53847,"sta":62106,"stn":2981,"sto":42022,"sti":50489,"stl":1998,"stu":19363,"stw":7828,"str":84065,"sty":65191,"sul":1526,"sut":16540,"sur":35899,"sy ":127712,"swa":1778,"tai":36622,"tal":43400,"tae":6290,"taf":52261,"tag":1975,"tab":7263,"tac":3832,"tad":18299,"tbl":44383,"taw":7828,"tau":14344,"tat":9098,"tas":2365,"tar":17514,"tan":20001,"tam":1918,"tch":3069,"te ":8864,"ta ":18244,"pa ":37111,"pe ":8693,"par":64997,"pat":3348,"pas":12403,"pau":2226,"paw":4951,"pai":2948,"pao":5682,"pap":3898,"pam":9971,"pan":38154,"phe":11012,"pha":10199,"phw":4112,"phr":10198,"pho":14668,"phl":6358,"phi":1655,"pi ":3150,"ph ":1476,"pea":9878,"pec":2412,"ped":9480,"pen":51739,"per":42893,"pet":16989,"pei":7527,"pel":9957,"pla":28995,"ple":12058,"plw":1815,"pia":10014,"pid":5098,"pin":2030,"pio":4575,"por":7160,"pop":3413,"pot":2376,"pos":11018,"pon":2140,"pol":16902,"pob":55043,"poe":2616,"psi":3534,"pte":5358,"pto":3399,"pra":3655,"prw":10598,"pry":26375,"pu ":10555,"pri":36941,"pre":18820,"pro":34872,"pwr":4656,"pwl":2162,"pwn":2716,"pwy":52147,"pur":7632,"pus":2183,"pum":4086,"pwe":6830,"pyn":3167,"pys":1863,"löy":2600,"hŷd":5310,"hŷn":2140,"que":2021,"môr":5916,"iŵr":5597,"ra ":25900,"ngo":70240,"ngi":8856,"ngl":24492,"ngu":6735,"ngw":13906,"ngr":4027,"ngt":3458,"ngs":1836,"ni ":162627,"nfy":3382,"nge":66569,"ngh":139492,"nga":20430,"ngd":2154,"nho":3123,"nhy":17811,"nhw":13074,"nhr":2194,"nha":39183,"ngy":19113,"nhi":10953,"nhe":12166,"neg":58741,"nei":16297,"nel":23644,"nen":2828,"ner":32050,"net":12681,"nes":89293,"neu":177328,"ndw":3769,"ndy":7766,"ng ":205524,"nea":1708,"neb":36285,"nec":1684,"ned":179856,"nef":3629,"nfi":1656,"nfo":25474,"nfr":5836,"nfu":3122,"ney":2567,"new":81562,"nfa":14793,"nff":8709,"nfe":9475,"nct":2729,"nco":3605,"nci":8047,"ncl":19065,"nce":7558,"nch":2262,"nca":5548,"oDd":3626,"ne ":35524,"nby":4127,"ndu":2109,"ndr":13355,"nds":2198,"ndo":10475,"ndl":1413,"ndi":15446,"nde":57533,"ndd":30664,"nda":28690,"ncw":4231,"nal":27769,"nam":4027,"nan":12422,"nar":15181,"nac":16010,"nad":65361,"nae":85590,"naf":35036,"nag":46791,"nai":38578,"nc ":30927,"nab":30870,"nbe":2174,"nd ":167663,"nba":12055,"nau":88603,"nat":15889,"nas":60382,"naw":19356,"ïo ":1777,"na ":107935,"mys":6814,"myr":5193,"myn":55033,"myl":2278,"mwr":2209,"mwy":106236,"mwl":1400,"mwn":18260,"myd":4653,"myf":3779,"myg":3973,"nyf":10568,"nyd":167388,"nyc":8519,"nwy":109198,"nwi":8478,"nwl":5833,"nwn":2177,"nwo":7690,"nwr":5201,"ny ":138554,"nwg":3510,"nwe":18073,"nwc":5387,"nwa":19489,"nul":75697,"nus":11035,"nud":3500,"nty":9797,"nw ":76947,"nto":6210,"ntu":3630,"ïau":14997,"nts":1727,"ntr":49425,"nti":31515,"nth":5358,"nta":50854,"ïai":3322,"nte":23599,"nsw":5855,"nso":9921,"nst":3803,"nse":9354,"nsi":15567,"nsa":11986,"nu ":76710,"nry":14046,"nrw":4687,"nro":2398,"nri":35884,"nrh":37151,"nra":5874,"nt ":333571,"ns ":20490,"noc":4035,"nod":144646,"nog":50582,"noe":7622,"nof":5561,"nol":206820,"nom":37174,"non":7970,"nos":23694,"nor":25161,"nov":1574,"nne":21867,"nna":136888,"nno":91688,"nnh":1664,"nni":142373,"nnu":52353,"nnw":99293,"nny":211841,"nma":2508,"nmo":2415,"nll":57130,"nn ":8357,"nle":2650,"nly":39314,"ndŵ":1512,"no ":90645,"nig":123645,"nif":44965,"nie":2798,"nid":127033,"nic":4230,"nib":9026,"nia":189506,"nk ":2019,"niw":17294,"niu":1595,"nis":8567,"nit":5323,"nir":11106,"nio":79645,"nin":15456,"nil":16129,"ogr":5332,"ogw":9691,"ogi":49377,"ogl":36135,"ogo":10422,"oga":51638,"oge":17943,"ogf":7295,"ofy":23009,"oi ":70058,"ohi":1622,"oho":21669,"ohn":11771,"ohe":28958,"ogy":17848,"ois":1617,"oir":2176,"ok ":1582,"ol ":763917,"oce":3144,"och":46521,"oci":2367,"ock":4010,"oco":2527,"ocr":11686,"ocs":2087,"oe ":6131,"oca":3987,"ode":12839,"odf":2129,"odl":8794,"odi":83255,"odo":79662,"odr":97425,"ocy":1405,"of ":21772,"odd":343166,"oda":94030,"oel":5335,"oeg":51158,"oer":2610,"oes":86294,"oet":10844,"oen":4783,"ody":7861,"odu":5553,"odw":25659,"oed":409580,"og ":129097,"ofn":11560,"ofi":27518,"ofr":8483,"ofo":2473,"off":27270,"ofe":10926,"ofa":28384,"ob ":52175,"îl ":4003,"oc ":4927,"îm ":5804,"oad":3115,"oba":3465,"od ":547384,"obr":5708,"obl":125783,"obi":2247,"obe":20665,"nyn":23338,"nym":18653,"nyl":5128,"nyr":7724,"nyt":1430,"nys":17788,"nyw":9139,"oyd":3974,"owy":9062,"owl":1588,"own":7478,"oyw":3671,"oyn":3623,"otw":9445,"ow ":4107,"oti":2331,"oth":5403,"ote":7444,"ott":3778,"oto":4245,"osy":2038,"ost":30889,"osw":1982,"ota":4367,"osi":48645,"ose":24991,"osg":13017,"osf":1818,"oss":2774,"oso":27758,"owe":3109,"ovi":2723,"orï":3839,"ove":3431,"oug":2750,"oul":1799,"oun":4204,"ous":4391,"our":8928,"out":3139,"opo":2593,"opi":3247,"ope":16444,"opa":35609,"os ":142644,"opt":5362,"ops":1860,"ool":1739,"ook":2746,"ood":5509,"or ":143087,"ork":1606,"orl":27515,"orm":20652,"orn":6094,"oro":25173,"orr":10396,"orc":13306,"ord":84993,"ore":21995,"orf":42746,"org":17624,"ori":60315,"ou ":2179,"osa":9919,"osb":22160,"ort":48554,"ors":13048,"oru":3461,"orw":15896,"ory":7256,"ot ":6872,"orb":3575,"ora":32977,"olb":14257,"ola":56819,"old":19385,"olc":15348,"on ":432682,"oli":132480,"oll":47544,"olk":2728,"olf":14473,"ole":35161,"olh":1780,"olg":1440,"olr":3706,"oln":1388,"olo":11175,"oly":45871,"olu":1783,"olw":23143,"om ":33744,"oke":1397,"ona":27650,"ond":105123,"onc":2117,"onf":10374,"one":36593,"ong":18051,"oni":37789,"onl":18014,"onn":27921,"ono":53618,"onr":3505,"ons":9110,"ont":18300,"onw":6728,"ony":18054,"oma":27472,"ome":15722,"omb":2548,"omi":28943,"omm":1820,"omp":1938,"omo":2196,"op ":17416,"la ":30161,"le ":102209,"lbw":12810,"lch":90844,"lco":2475,"lf ":2256,"ldd":2333,"lde":19232,"ldi":15321,"ldr":3031,"lab":2112,"lac":29323,"lad":98353,"laf":41321,"lae":128735,"lag":1692,"lai":117947,"lal":3217,"lan":125158,"lam":3538,"lar":5003,"lat":3638,"las":18341,"law":107485,"lau":29230,"lay":1468,"lba":26635,"ld ":37477,"lbe":1978,"lbo":2022,"ls ":7845,"lpu":10684,"lol":3460,"lon":31796,"lom":2651,"lor":7254,"lod":80426,"loc":6030,"lof":4907,"loe":72615,"log":46325,"loi":2389,"los":4346,"lot":6141,"low":2604,"loy":9925,"lmi":2989,"lma":7554,"lti":32430,"lto":2424,"ltw":2317,"ltu":11392,"lw ":31473,"lud":23429,"lsh":1772,"lso":2605,"lst":1753,"lta":4687,"lte":3257,"lu ":96206,"lrw":4768,"lt ":18470,"lhe":1975,"lha":4721,"lgy":4529,"lgo":31713,"lge":2937,"li ":53647,"lga":4421,"lfy":5165,"lfr":2302,"hân":4040,"lfo":3307,"lff":13753,"lfe":12308,"lfa":43443,"ley":7868,"lew":34228,"lex":1550,"leu":25002,"les":19828,"let":14970,"ler":17336,"leo":79925,"lem":37265,"len":73940,"lei":80945,"leg":28145,"lef":21974,"led":124640,"lec":11257,"lea":4274,"lls":6571,"llu":91103,"llt":63202,"llw":65886,"lly":80846,"lo ":18642,"lla":195874,"llb":4606,"lle":173411,"llf":21229,"llg":34044,"lli":226485,"llo":30437,"lm ":9681,"ll ":214894,"lit":13912,"lis":26649,"lir":45447,"lip":2581,"lio":84412,"lin":37120,"liw":34714,"lic":2671,"lid":52663,"lia":236926,"lk ":2487,"lig":7246,"lie":8920,"lif":13223,"dŵr":6394,"ma ":40909,"mac":2067,"mab":4216,"mai":55413,"mad":6604,"mae":247275,"mag":4598,"map":1407,"mar":23042,"mas":14537,"mal":5198,"mam":2142,"man":35032,"maw":18773,"mau":39313,"mat":69909,"mba":1504,"mbl":2148,"mbi":3707,"mbe":5450,"mbr":12581,"mbo":3263,"me ":3994,"mca":11618,"mbw":1444,"mch":11714,"mda":8727,"mde":41204,"mdd":39775,"mdr":17496,"mdo":1963,"med":29820,"meg":19523,"met":12974,"mew":128769,"mes":22759,"mer":46667,"mel":4668,"men":17413,"mei":8560,"mff":2556,"mey":3711,"luo":19901,"lun":80882,"lum":1634,"lus":14270,"lur":8303,"ly ":53421,"lwa":19342,"lwb":3155,"lwc":19181,"lwe":30764,"lwg":19758,"lwi":8918,"lwr":11928,"lwn":21982,"lwm":3879,"lwy":170858,"lya":1979,"lyb":2209,"lyc":2140,"lyd":25341,"lyf":17803,"lyg":99540,"lyw":104640,"lym":14951,"lyn":124126,"lys":19250,"lyt":6538,"lyr":2418,"mpi":2296,"mpe":1411,"mpl":2550,"mpw":3470,"mps":1604,"mpt":2590,"ms ":10491,"moc":10530,"moe":5528,"mod":37018,"mon":12910,"mol":5204,"mor":51987,"mos":7848,"mot":1641,"mpa":10579,"mre":12280,"mro":3439,"mnï":5503,"mrw":13794,"mru":208863,"mry":37758,"mu ":8242,"mse":28496,"mra":25843,"mud":19804,"mwe":10675,"mwd":1948,"mwa":5221,"my ":1506,"mur":1395,"mus":2197,"mun":73350,"mho":8276,"mhr":2830,"mhl":11326,"mhw":5307,"mha":11193,"mhe":24401,"mgo":1890,"mgy":43295,"mgu":2786,"mga":3590,"mi ":29234,"mge":3739,"ml ":13005,"min":8059,"mio":5881,"mil":37316,"mis":33867,"miw":5015,"mit":2252,"mia":20012,"mig":2194,"mo ":4753,"mlw":14332,"mly":19237,"mlu":1776,"mlo":5063,"mli":3947,"mle":2810,"mla":30789,"mni":9880,"mp ":4419,"rŵp":7025,"yw ":270496,"yty":6748,"ytu":28907,"ytr":9374,"yti":2114,"yth":64543,"yta":7981,"ysw":7360,"ysy":36277,"yst":133768,"ysu":6119,"yso":21693,"ysl":6336,"ysm":1721,"ysg":129385,"ysf":2863,"ysi":59834,"ysb":16200,"yse":10011,"ysa":7689,"yri":51337,"yro":2402,"yrn":17748,"yrt":1495,"yrs":5675,"yrr":16707,"yrw":12691,"yrc":40688,"yrd":23223,"yra":30777,"yrh":2696,"yre":5663,"yrf":11768,"ys ":130128,"yr ":522252,"yon":1614,"yny":65358,"ynu":90277,"ynw":17551,"ywe":47689,"ywb":7853,"ywa":7712,"ywi":32582,"ywf":4430,"ywg":2254,"ywu":4318,"yws":4181,"ywo":97481,"ywy":47791,"yby":3450,"yca":1577,"ych":135201,"ybi":5778,"ybl":10563,"ybo":33530,"ybr":6887,"ybu":6366,"ybw":3785,"yf ":94965,"yda":108440,"ydb":2445,"yde":30766,"ydf":2528,"ydd":1031264,"ydi":30452,"ydg":1873,"yg ":23655,"yed":2287,"ydo":18475,"ydr":24295,"ydl":41447,"ydn":15963,"ydw":15590,"ydy":142006,"yfa":79734,"yfl":101205,"yfi":9333,"yfh":1890,"yfe":122684,"yff":49868,"ya ":2403,"yb ":1460,"yaf":39177,"yd ":448856,"yau":2199,"ym ":139299,"yn ":2046385,"yla":24866,"ylc":58295,"yld":3028,"yle":19438,"ylf":11493,"yli":25804,"yll":146688,"ylo":4658,"ylw":47190,"ylu":4087,"ymc":10975,"ymb":5107,"yma":78324,"yo ":9913,"ymh":40871,"ymi":12166,"ymf":2997,"ymg":31535,"ymd":80346,"yme":33885,"ymr":286136,"ymp":5265,"ymo":45199,"yml":28649,"ymy":21331,"ymu":93545,"ymw":38802,"yna":64317,"ynd":48575,"ync":3108,"yni":92899,"ynl":49958,"yne":71151,"ynf":3851,"yng":251524,"ynh":49363,"ynr":15963,"yns":1653,"ynt":143293,"ynn":358379,"yno":84035,"yfw":5609,"yfu":9902,"yfy":23708,"yfo":25624,"yfn":30446,"yfr":128185,"ygi":35049,"ygl":7602,"yga":7036,"ygb":2588,"ygw":4747,"ygy":7099,"ygo":16536,"ygr":9832,"ygu":53946,"yhe":1472,"yhy":1494,"yhu":1841,"yho":67043,"yie":1950,"yl ":17899,"tîm":3174,"tân":2674,"tâd":2075,"tâl":1439,"táu":5879,"wun":4321,"wy ":167584,"wso":5954,"wst":14295,"wsb":1703,"wsa":2381,"sïa":6640,"wu ":8197,"wsi":4835,"wse":1824,"wto":2882,"wti":2767,"wth":3593,"wyt":46841,"wys":164434,"wyr":156167,"wyw":2663,"wyl":86951,"wym":16265,"wyn":179235,"wyo":11729,"wyd":589155,"wye":2580,"wyb":42400,"wyc":3206,"wyi":2503,"wyf":88674,"wyg":7233,"wya":43059,"sôn":8723,"wlw":2007,"wmn":13780,"wmp":7509,"wmw":2030,"wp ":5257,"wna":35677,"wnf":1977,"wne":111452,"wnd":6615,"wnc":4706,"wni":16860,"wng":51730,"wnn":39302,"wno":2540,"wns":4025,"wnt":5833,"wm ":45639,"wlc":4133,"wla":51387,"wn ":404585,"wll":7707,"wli":8016,"wle":39435,"wlf":1463,"ws ":27784,"wre":14290,"wrc":1643,"wrd":17997,"wri":42683,"wra":16401,"wrt":86482,"wrs":5673,"wry":3769,"wrw":3993,"wrn":10068,"wrp":4016,"wro":34924,"wod":95909,"wog":10667,"wob":4429,"woc":1528,"wnw":1599,"wor":2513,"wol":6338,"woo":2607,"won":6564,"wpa":2381,"wr ":124568,"wpi":6408,"wf ":5994,"wda":1936,"wdd":15429,"wcl":1588,"wch":84468,"we ":12027,"wbl":13799,"wfe":1476,"wfa":5328,"wes":19504,"wer":103660,"weu":29216,"wet":19199,"wen":21755,"wel":124321,"wei":166516,"wef":12569,"weg":1420,"wed":374291,"ŵyr":1598,"web":1716,"wec":2901,"wg ":25861,"ŵyl":1815,"wdw":2933,"wdu":37364,"wdr":2488,"wgr":12193,"wga":2076,"wi ":11108,"wir":53666,"wis":18022,"wit":3330,"wl ":41621,"wig":3412,"wie":1606,"wid":29273,"wic":2145,"wio":14312,"win":33765,"wil":30098,"wia":16107,"ŵr ":14597,"wb ":13655,"wa ":3359,"wan":26467,"ŵy ":4246,"wal":8910,"way":3018,"wau":4489,"war":68728,"was":82101,"wd ":12936,"wbe":7471,"wag":2121,"waf":7690,"wai":97239,"wah":33598,"wae":16922,"wad":34580,"rïa":3944,"ŵp ":6952,"rôl":8177,"vil":1778,"vin":1893,"vid":14072,"vie":15891,"vis":1891,"ŵer":1950,"ŷd ":5480,"ver":6993,"ven":3325,"vel":1505,"ve ":5223,"van":5418,"va ":2119,"uwy":3416,"uwc":17112,"uwi":1989,"usi":1968,"usg":1882,"use":4507,"usa":1473,"ust":11010,"uss":2899,"uso":3761,"usn":21216,"uth":9701,"ute":1704,"uw ":4094,"utt":4922,"utu":5650,"us ":87295,"ut ":21567,"ura":19210,"urd":37731,"urf":19995,"ure":3962,"urh":2086,"urg":2936,"uri":15646,"urn":7821,"uro":14115,"urr":2119,"urs":2043,"urt":1924,"urw":5461,"ury":3159,"uny":3961,"uog":7021,"uoe":8806,"uod":5536,"uon":6985,"uol":7386,"uos":4002,"ur ":71669,"ump":2645,"umb":3249,"ume":1431,"uo ":2462,"unt":3561,"unr":30860,"unw":12131,"unu":3801,"uni":81801,"uno":31125,"unn":2027,"und":29684,"una":20771,"ung":1445,"une":61156,"unf":2341,"um ":12570,"ulu":19906,"ull":87609,"uli":6287,"ula":3301,"un ":150712,"uis":1497,"ul ":9445,"ugh":7082,"ugi":2003,"uge":2130,"ugo":2448,"ugl":1647,"uga":3606,"uda":2384,"udd":41306,"ude":18823,"udi":17508,"ue ":5180,"uch":33289,"uck":1600,"uff":4314,"ufe":7387,"ufa":2438,"udu":1460,"udo":9949,"ug ":5003,"udw":2918,"ued":6122,"uen":4053,"ub ":3442,"ua ":27986,"uas":1433,"uar":1702,"uan":9132,"ubl":1462,"uba":1626,"ud ":136452,"lŷn":8979,"uag":5660,"uad":3772,"uae":5221,"tyw":5522,"tyf":2870,"tyl":1639,"tym":6439,"tyn":26295,"tyr":48030,"tys":4278,"ty ":18594,"tur":13689,"tus":3246,"tun":39289,"tuo":2638,"tua":31254,"tud":13123,"tue":1780,"tyd":3508,"tyb":2424,"twy":7578,"tws":4311,"twr":6786,"twn":3869,"twm":10367,"twf":1674,"ts ":6903,"tre":90655,"tt ":11110,"tra":97424,"tri":34368,"tru":3859,"tro":35460,"ŷr ":1449,"tu ":22540,"trw":23329,"try":15812,"tsw":3795,"tta":1471,"tte":3482,"tti":1910,"ttl":1680,"tto":2254,"to ":24556,"tne":12625,"tno":2988,"ŷn ":11249,"pêl":3054,"tod":27653,"toc":5503,"toi":3707,"tog":3800,"tow":1605,"tom":5214,"ton":26491,"tol":2761,"tor":43947,"top":1745,"tr ":17819,"til":1672,"tif":5355,"tie":9333,"tig":9818,"tir":27978,"tis":11461,"tin":9873,"tim":2470,"tio":31979,"thy":46160,"thu":45349,"thw":31457,"tia":52311,"tic":3675,"tid":2222,"tiy":6135,"tiw":14738,"tl ":2446,"tli":3793,"tlo":5586,"tla":2391,"tle":4793,"tem":19391,"ten":7716,"tep":1734,"tei":25035,"tel":15355,"tef":7947,"teg":31285,"teb":43243,"tec":5010,"ted":11202,"tff":2312,"th ":759637,"tey":2772,"teu":11405,"tes":9706,"ter":115193,"tgo":3276,"tge":1530,"ti ":8846,"tga":31917,"thn":24561,"tho":105119,"thl":16268,"thr":80549,"ths":2911,"thp":4012,"thf":9853,"thg":10918,"thd":10799,"the":42149,"thi":135223,"thb":4302,"tha":183648,"tgy":3272,"tŷ ":1779},"n_words":[138024683,168015853,135168311],"name":"cy"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/da b/nlp_resource_data/langdetect/profiles/da

new file mode 100755 (executable)

index 0000000..23e8658
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/da
@@ -0,0 +1 @@
+{"freq":{"D":31963,"E":16857,"F":19795,"G":13674,"A":25625,"B":25733,"C":16797,"L":17275,"M":22557,"N":16361,"O":9041,"H":24577,"I":12015,"J":9674,"K":25355,"U":7743,"T":19455,"W":7598,"V":12180,"P":17849,"S":49336,"R":17256,"Y":1840,"Z":1413,"f":221888,"g":349504,"d":493190,"e":1436845,"b":166372,"c":57573,"a":587527,"n":746797,"o":488340,"l":487393,"m":293320,"j":42561,"k":286164,"h":119659,"i":613022,"w":11095,"v":176622,"u":183003,"t":621215,"s":567338,"r":850344,"q":1336,"p":139855,"z":8890,"y":86732,"x":5939,"Å":1900,"Ø":2784,"é":2737,"æ":62497,"å":62645,"ä":1464,"ü":1583,"ø":71666,"ö":1579," l":35858," m":66057," n":25041," o":98877," h":49372," i":113380," j":6194," k":49552," d":112412," e":192889," f":104584," g":23827," a":95172," b":68676," c":8562," u":24358," t":58677," w":1082," v":42657," p":52152," s":123480," r":22226," J":9634," K":25242," H":24422," I":11943," N":16289," O":8943," L":17176," M":22363," B":25561," C":16566," A":25498," F":19659," G":13552," D":31783," E":16778," Z":1391," Y":1835," S":49095," R":17196,"а":1047," P":17741," W":7540," V":12125," U":7720," T":19345," å":3941," æ":1756," ø":5351," Å":1898," Ø":2782,"A ":4032,"F ":2501,"Da":6717,"Cl":971,"Co":3707,"Ce":1101,"Ch":3540,"Do":1568,"De":17413,"Di":2020,"Fe":1146,"Fa":2183,"Eu":1618,"Er":1008,"Et":1623,"En":5373,"El":1343,"Ge":1911,"Ga":1871,"I ":3326,"Fr":4782,"Fo":3851,"Fl":1651,"Fi":2355,"C ":1471,"Au":1064,"Ar":2625,"As":2462,"Ba":5317,"Aa":1418,"Am":3646,"An":3500,"Al":3794,"By":1617,"Bu":1524,"Br":3812,"Ca":3808,"Bi":1743,"Be":3864,"Bo":3085,"Bl":1436,"Kl":1826,"Kr":1747,"Ko":8878,"Le":2792,"Li":3203,"La":5071,"Lu":1291,"Lo":2732,"Me":3324,"Mi":3899,"Ma":7557,"Mu":1568,"Mo":3310,"Ni":2023,"Ne":2682,"Na":2422,"No":4840,"Gi":973,"Gr":3579,"Go":1190,"Gu":1554,"Ha":6824,"He":6629,"Hi":1514,"Ho":4306,"Hu":1589,"In":3339,"Is":1195,"Ja":2348,"L ":1217,"Je":1924,"Jo":2770,"Ju":1199,"Ka":3252,"M ":1238,"Ki":2897,"Ke":1081,"Un":2388,"Ty":1556,"Tr":2242,"To":2011,"Th":4228,"Ti":1419,"Te":2146,"Ta":2888,"Sy":2480,"St":10943,"Sv":1640,"Su":1509,"Wo":997,"Wi":1997,"Wa":1860,"We":1192,"Vo":988,"Vi":3450,"Va":1711,"Ve":4334,"Pr":4633,"S ":1850,"Pe":2483,"Pa":3757,"Po":2392,"Pi":1096,"Or":1531,"Kø":2561,"Se":2517,"Sc":1974,"Si":2298,"Sh":1188,"Sl":1960,"Sk":3103,"Sp":1977,"So":7973,"Ru":1537,"Sa":4145,"Re":4467,"Ri":2574,"Ro":3970,"Ra":2483,"b ":7025,"a ":44327,"Yo":1017,"Sø":1957,"bø":1832,"i ":102076,"fy":1735,"gd":2544,"ge":89823,"ga":13601,"gb":1002,"bæ":1473,"fj":1452,"fl":7833,"ff":4196,"fi":15203,"bå":1373,"fh":1152,"fs":1525,"fr":23015,"fu":3863,"ft":15592,"fo":54992,"j ":3361,"gy":1588,"he":22182,"ha":30491,"gn":17858,"gl":6422,"gi":15986,"gh":4618,"gg":16392,"gu":4973,"gt":15761,"gs":16863,"gr":16831,"go":3646,"dt":26827,"du":5081,"dv":6155,"dy":2631,"g ":111365,"ea":8665,"eb":9181,"ec":4319,"ed":75055,"de":218146,"dd":6228,"dg":5250,"df":2420,"di":20969,"dh":1605,"dk":4450,"dj":1187,"dm":1943,"dl":12465,"do":7994,"dn":3058,"ds":29024,"dr":12885,"ew":2085,"ex":1370,"eu":3377,"ev":22509,"ey":2555,"ez":1521,"fa":10884,"h ":6139,"fe":8693,"eh":3909,"eg":21663,"ef":12940,"ee":3853,"el":91687,"ek":16142,"ej":9330,"ei":7265,"ep":6194,"eo":4491,"en":269379,"em":29799,"et":141301,"es":67128,"er":363438,"ca":5613,"e ":256545,"by":15888,"bs":2188,"br":14152,"bu":7411,"bo":14081,"bj":2063,"bl":20954,"bi":8379,"bb":2090,"be":49808,"db":9039,"da":25268,"f ":48816,"cu":1080,"ct":2015,"co":4730,"ck":5340,"ci":7534,"ch":11084,"ce":13957,"c ":1829,"az":1010,"ay":3869,"ba":15017,"d ":83491,"at":56481,"as":18978,"ar":71549,"av":20381,"au":6593,"ak":8835,"al":58276,"ai":4230,"aj":1819,"ap":5563,"am":30979,"an":131829,"ac":6807,"ad":19629,"aa":1902,"ab":11909,"ag":21569,"ah":1853,"ae":3053,"af":53343,"nu":5799,"nt":31384,"ns":60694,"nr":1599,"no":15192,"nn":14628,"jø":1567,"ny":4222,"nv":2608,"oe":2046,"of":9995,"oc":5289,"od":18682,"oa":1343,"ob":5028,"om":68251,"on":52834,"ok":6768,"ol":36629,"oi":1528,"kæ":2283,"og":81449,"oh":1849,"ot":9072,"os":11517,"ov":19468,"ou":6113,"op":19430,"oo":2822,"or":109199,"kø":2883,"r ":303465,"ow":2461,"pe":19212,"pf":1434,"pa":11174,"pl":7694,"po":10483,"ph":1997,"pi":13460,"lå":3180,"læ":7117,"lo":18381,"lm":9161,"ll":54007,"ls":28805,"lr":2728,"lp":1954,"lv":5002,"lu":8894,"lt":13794,"ly":5517,"hø":4198,"o ":13391,"ma":28771,"mb":6952,"mh":1531,"me":79970,"mf":2516,"mk":2192,"ml":2687,"mi":18259,"mn":1374,"mm":27337,"mp":7909,"mo":10889,"mr":3179,"mt":7052,"ms":5339,"mu":15391,"my":1487,"p ":7331,"na":26783,"nb":2591,"nc":5256,"nd":115468,"ne":83213,"nf":2882,"ng":65185,"nh":5629,"ni":34813,"jæ":2156,"nj":1002,"nk":9744,"nl":3415,"nm":3852,"jy":1603,"ju":2882,"js":1575,"jo":3376,"jl":1331,"fæ":2974,"ki":13129,"gå":3551,"ke":62501,"kb":1624,"ka":33369,"m ":58614,"fø":16135,"ky":2485,"ks":11787,"kt":15601,"ku":9611,"kv":2686,"ko":24231,"kr":19731,"kk":11850,"kl":9428,"km":2041,"kn":3763,"li":70659,"hå":1642,"lh":1724,"lk":6427,"hæ":2028,"le":98770,"ld":24706,"lg":4872,"lf":2530,"la":47170,"lb":6128,"gø":1910,"n ":242497,"hr":2287,"hv":9222,"ht":1466,"hu":6980,"dæ":994,"hj":3040,"hi":7688,"hn":1182,"ho":15697,"id":25624,"ic":10962,"ib":6095,"ia":12086,"ig":59928,"if":7649,"ie":27731,"hy":1292,"k ":53472,"dø":3867,"ir":14180,"is":62018,"it":24973,"iu":2399,"iv":18874,"ik":30570,"il":63431,"im":7762,"in":99762,"io":27519,"ip":4261,"je":15216,"jd":2875,"få":1004,"iz":977,"l ":57442,"ja":3097,"tæ":3691,"tå":3749,"z ":1958,"sø":4093,"wi":999,"så":7469,"sæ":4934,"y ":17470,"rø":5891,"wa":2989,"we":1786,"ræ":12201,"rå":5052,"vi":25451,"vs":4402,"vn":10855,"vo":8670,"ve":64168,"vd":1809,"va":25040,"x ":2713,"ui":2354,"uk":3985,"ul":11807,"ue":7203,"uf":1429,"ug":8656,"ur":17879,"us":20761,"ut":8713,"um":12141,"un":39154,"up":6927,"ty":12222,"tz":1053,"tu":11275,"tt":18340,"tv":2718,"ub":5076,"ua":3814,"ud":21845,"uc":3424,"w ":2415,"to":25039,"tn":4704,"tm":1160,"tl":7804,"ts":12139,"tr":31125,"tf":1908,"te":116420,"tj":2329,"pæ":1318,"på":21476,"ti":89903,"th":7738,"v ":18910,"tb":1612,"ta":40351,"su":4879,"sv":6120,"ss":17007,"st":117117,"sy":9417,"sl":10786,"sk":90052,"sn":3225,"sm":6568,"sp":18285,"so":39577,"sr":1826,"sd":1411,"sc":3973,"sf":3067,"se":58416,"sh":5513,"sg":1978,"sj":1417,"si":34046,"u ":4030,"sa":20739,"sb":7435,"rr":10396,"rs":33077,"rt":26331,"ru":25205,"rv":5600,"ry":5622,"rp":2368,"ro":34074,"rn":23969,"rm":13546,"rl":8366,"rk":22061,"næ":3458,"nå":1618,"ri":66660,"rh":5594,"rg":18288,"rf":5902,"re":122058,"rd":25935,"rc":2308,"rb":9120,"ra":54038,"t ":215608,"mø":1338,"qu":972,"mæ":2988,"må":5133,"lø":4835,"s ":87017,"pt":4539,"pu":4408,"pp":6074,"pr":20775,"ps":3450,"zi":1962,"ze":1041,"za":1046,"væ":10057,"yg":7090,"ye":6589,"yd":11050,"ya":1357,"yb":1013,"tø":6010,"yt":3483,"ys":11680,"yr":5643,"yp":2457,"yn":4794,"ym":2136,"yl":4209,"yk":3734,"År":1234,"å ":31271,"Øs":1854,"æv":1666,"æs":6691,"ær":17270,"æt":3678,"æn":8602,"æk":5318,"æl":6220,"æg":5488,"æd":2878,"ån":3755,"ås":1047,"år":11909,"åe":2110,"ål":3106,"åb":1474,"åd":6195,"ør":20663,"øs":5971,"øv":2281,"øj":4875,"øk":983,"øn":5238,"øl":3077,"øm":1629,"øb":5904,"øg":2353,"øe":2140,"ød":12599,"ø ":2608," Ga":1860," Ge":1892," I ":2530," Fo":3830," Fr":4772," Fi":2339," Fl":1645," Ha":6811," He":6623," Go":1179," Gr":3557," Gu":1534," Hu":1582," Ho":4265," Hi":1510," Je":1919," Ja":2336," Is":1181," In":3319," Ka":3227," Ke":1072," Ki":2888," Jo":2762," Ju":1199," La":5040," Le":2776," Li":3188," Kl":1820," Ko":8866," Kr":1743," Ma":7451," Mi":3885," Me":3307," Lo":2724," Lu":1285," Ne":2661," Na":2409," Ni":2020," Mo":3297," Mu":1554," Am":3639," An":3489," Al":3774," Aa":1413," Ba":5300," Au":1061," As":1298," Ar":2597," Be":3849," Bi":1733," Bl":1427," Bo":3066," Br":3787," Bu":1519," By":1610," Ca":3749," Ce":1081," Ch":3522," Co":3667," Da":6687," Di":2014," De":17357," Do":1505," El":1337," Et":1617," Er":1002," En":5365," Eu":1616," Fe":1132," Fa":2149," Sø":1956," Wo":983," Wi":1981," We":1181," Wa":1857," Yo":1015," Kø":2558," Or":1513," Po":2365," Pi":1093," Pe":2468," Pa":3735," No":4834," Ra":2473," Ro":3958," Re":4459," Ri":2570," Pr":4618," Sy":2476," Sv":1637," Su":1503," St":10885," Ta":2881," Th":4217," Ti":1411," Te":2130," Tr":2230," To":1975," Ru":1535," Sa":4126," Sh":1168," Si":2284," Sc":1958," Se":2497," So":7960," Sp":1954," Sk":3100," Sl":1954," Va":1707," Ve":4322," Vi":3432," Vo":984," Ty":1554," Un":2381," ja":1664," få":1001," in":18970," ik":2700," is":1666," ka":10401," fø":12841," ki":4060," ke":5007," jo":1235," fæ":1545," ju":2042," ha":16862," he":5872," gi":1512," gr":8357," gu":1731," dø":1180," hi":2424," hj":2649," ho":5868," hu":1995," hv":8333," ne":2244," na":5975," mu":3617," mo":5717," om":10735," og":60338," of":4764," ny":1418," nu":1920," no":8819," le":3614," li":12024," hå":1306," la":9917," kv":1456," ku":4034," km":1736," kl":2396," kr":4395," ko":12549," me":34653," mi":5228," hø":2581," ma":10653," lu":1103," ly":1210," lo":1691," af":47341," ad":2058," am":5476," an":9937," ap":1120," ak":1320," al":5943," au":1423," ar":5256," at":12715," ba":5812," bi":2763," be":21652," bo":2959," bl":16483," by":7166," bu":1513," br":6861," ca":2244," er":79580," et":23500," en":65390," el":13704," ek":1980," ef":3944," eg":1315," fe":2371," fa":4879," fu":1922," fr":19782," fo":43206," fl":5239," fi":8516," bå":1058," ge":4611," ga":3502," bø":1211," i ":86016," fy":1194," co":1546," ce":1786," ci":1062," da":14483," do":1734," dr":1947," de":85238," di":4041," dy":1311," væ":5438," tæ":998," sø":1940," ru":2426," sa":10706," se":8834," si":12012," sm":1227," sl":3070," sk":10508," sp":8070," so":28866," ra":1828," re":10118," nå":1108," ri":1551," næ":2233," ro":3386," pr":10950," lø":1660," s ":1355," må":3141," ov":4934," op":11111," or":3538," kø":1518," pe":4046," pa":3870," pl":3433," po":4785," lå":2186," pi":1184," læ":1975," så":2369," sæ":2010," va":14642," ve":13203," vo":1548," vi":7015," ræ":1334," ud":14434," ty":5331," tv":1505," un":6550," ta":3468," sy":6433," st":20417," sv":2066," su":1555," tr":6706," to":3836," th":1962," på":21281," ti":28004," te":4720," År":1234," Øs":1852," år":3117," øs":2023,"Årh":1078,"Fil":1000,"Et ":1446,"Eur":1467,"En ":3727,"Fra":1859,"Fre":1891,"For":2210,"Hel":1134,"Her":3429,"Han":2509,"Ind":1212,"Øst":1835,"Hol":1719,"As ":1183,"Bay":1295,"Amt":2376,"And":1120,"Bye":1086,"Car":1184,"Ber":1053,"EF ":1275,"De ":1771,"Det":5562,"Den":6802,"Dan":4528,"Chr":1315,"Cha":1198,"New":1218,"Nor":4041,"Køb":2308,"Par":1507,"Pro":2885,"SA ":2039,"SAs":1153,"Joh":1172,"Kir":1207,"Kon":1132,"Kom":6411,"Lan":2730,"Man":1218,"Mar":2635,"Søn":999,"Syd":1794,"Sve":1310,"Str":999,"Sti":2399,"Sto":1709,"Sta":3406,"Ste":1353,"Tab":1274,"Sog":4806,"Sch":1301,"San":1256,"Reg":1718,"Ros":1109,"åen":1002,"åde":4772,"ål ":1259,"ånd":1467,"åne":2057,"åle":1126,"åre":1034,"ård":2059,"år ":6325,"Ver":993,"Ves":1255,"Uni":1434,"æde":2099,"Tys":1072,"æge":1409,"ægt":2411,"æll":2133,"ækk":2758,"æld":2168,"æns":1603,"æng":3355,"ænd":2342,"ær ":2495,"æsk":1388,"æse":1314,"ært":1291,"ærk":2357,"ære":6027,"ærd":1139,"ætt":1489,"æst":1815,"ævn":987,"The":2590,"bje":1890,"bil":1901,"bin":2468,"blo":1344,"ble":11852,"bli":3619,"bla":2817,"bol":3467,"bog":1211,"bor":5893,"bbe":1641,"be ":1587,"ban":5529,"bal":1468,"bag":1151,"bas":2113,"bar":1770,"bej":2245,"beg":2119,"bef":974,"bed":1902,"ber":10275,"ben":6123,"bel":6286,"bez":990,"bev":1288,"bes":5570,"bet":8125,"ca ":2352,"ce ":2816,"bri":2216,"bro":1760,"bra":1230,"bre":2307,"bru":5207,"bur":1375,"bun":1225,"bum":1745,"by ":6807,"bye":2309,"byg":4928,"am ":2554,"al ":9518,"ain":1470,"aj ":1010,"agt":3576,"anv":1364,"anu":1902,"ann":5158,"anm":3219,"ant":6958,"ans":23197,"ane":4038,"ang":12562,"ani":4609,"ank":4263,"ana":2737,"anc":2097,"and":39458,"amt":2475,"amm":7240,"aml":2016,"amp":2062,"ami":2635,"ame":7259,"ama":1553,"alv":1423,"alt":4977,"als":1981,"alr":2167,"alm":1522,"all":6438,"alg":1439,"ali":5360,"ald":6029,"ale":8208,"ala":1794,"alb":2973,"an ":17044,"akt":3338,"abe":4887,"ad ":3260,"ab ":2197,"aft":1820,"afs":1107,"afh":1105,"afi":1171,"age":8240,"adm":1030,"adi":3138,"ade":6627,"ag ":3450,"ads":1953,"ack":1318,"ach":1746,"ace":1610,"af ":43757,"aye":1830,"at ":17462,"are":4543,"ard":3649,"arb":2537,"ara":2794,"arm":1244,"arl":1801,"ark":7252,"ari":4342,"arv":1066,"arr":1598,"ars":2045,"art":9040,"au ":1067,"asi":1827,"ase":2004,"ask":1161,"ar ":24485,"as ":3170,"avd":1374,"avn":8812,"avi":1508,"ave":5203,"ay ":1009,"av ":1155,"ata":1987,"ast":4061,"ass":3627,"ato":1943,"ate":7241,"ati":16074,"att":4875,"ats":1271,"atu":2072,"jer":5562,"jek":1076,"jem":1846,"jen":1901,"jet":1025,"jan":1060,"je ":1645,"jde":2262,"jor":2291,"fær":971,"fæl":1337,"itu":1780,"itt":1519,"isk":22022,"ism":1473,"iss":2416,"ist":15812,"iv ":2118,"ita":3145,"ite":4554,"iti":5826,"ium":1528,"iva":1011,"ivi":1481,"ive":12562,"ipt":1296,"is ":8187,"ion":21894,"irk":6542,"isi":1236,"ise":3850,"isa":1650,"ire":2379,"it ":2641,"kil":3875,"kib":1205,"kin":2032,"kir":1742,"går":2699,"kis":983,"km ":1307,"ked":1397,"kel":4377,"ken":10401,"kes":1300,"ker":9106,"ket":3222,"ke ":28879,"kra":3109,"kre":5594,"kt ":3134,"kse":1892,"kro":1365,"kri":8221,"kov":1285,"kor":3059,"kon":5299,"kom":8214,"kol":2985,"ks ":2322,"kni":2413,"kke":10909,"klu":1988,"kle":1997,"kla":3032,"kli":1244,"jyl":1350,"jul":1124,"kba":1487,"kat":1697,"kar":1404,"kas":971,"kan":11432,"kal":6243,"kam":1128,"kab":5579,"ka ":2541,"før":6664,"fød":7830,"føl":1523,"ham":1255,"han":5336,"hal":2007,"hav":7061,"har":10554,"he ":3841,"hel":2400,"hed":6692,"her":3250,"hen":2397,"hin":1447,"his":2209,"hje":2112,"gle":3411,"gn ":5722,"gla":1189,"gni":1875,"gne":9082,"gs ":1501,"gsb":1331,"gsk":1233,"gru":5085,"gra":4027,"gt ":9775,"gre":2783,"gst":2115,"gte":3224,"gti":1404,"grø":1000,"gså":4604,"gus":1029,"græ":2818,"ial":2412,"ian":3001,"ic ":1068,"ibo":1076,"ølg":1529,"øn ":1089,"id ":2460,"ibe":2142,"ia ":3660,"øje":1642,"iet":2644,"iel":2523,"ien":9683,"ier":4656,"ig ":12921,"ift":4749,"ør ":3471,"ici":1501,"ich":2160,"ice":1664,"ie ":4513,"ica":1606,"ids":2132,"idt":1926,"idl":3701,"idi":1064,"ide":10399,"idd":1704,"ønd":1642,"øst":4212,"il ":24604,"im ":977,"ika":7541,"ige":19623,"iga":1060,"igh":3457,"igi":1414,"igg":9753,"igt":6078,"igs":1981,"ign":1642,"øre":4495,"ørs":6046,"ørr":1963,"ørt":1508,"ik ":5837,"ørn":1192,"ime":1521,"ind":23824,"ina":3988,"int":3500,"ins":7738,"ine":6155,"ing":36509,"ini":3271,"ink":1131,"iod":1188,"ikl":2029,"ikk":5095,"ike":2959,"ikb":1260,"in ":7601,"ikt":1780,"iks":1269,"ilo":3003,"ill":15072,"ilk":1504,"øve":1134,"ilm":4941,"ilh":993,"ili":3480,"ild":2323,"ile":1492,"io ":1215,"ils":1343,"hol":6737,"hov":3711,"hri":1405,"hvo":4960,"hum":1081,"hun":1070,"hus":3072,"hve":1568,"hvi":2196,"døs":1339,"død":1050,"ffe":2245,"ffi":1018,"fes":1566,"fer":1682,"fen":1036,"fas":1001,"fat":2864,"far":1600,"fam":1418,"fan":1048,"fal":1044,"ezi":1048,"evæ":1177,"eta":3145,"ete":8817,"eti":2099,"esp":2172,"est":19859,"ødt":7615,"ess":3934,"esv":1081,"ev ":11307,"etr":1299,"ets":3278,"ett":4721,"ety":1926,"øen":1132,"ew ":1416,"eve":5776,"eva":1631,"øge":1365,"evi":1173,"øj ":1036,"ey ":1638,"er ":229484,"epa":1085,"eor":1469,"eol":1033,"ød ":1370,"es ":24995,"øbe":3709,"ept":1210,"erk":1552,"erl":2388,"eri":19046,"erg":5102,"erh":1890,"ere":36721,"erf":2743,"erd":3362,"era":4483,"erb":2464,"et ":110723,"esk":4356,"esl":1065,"esi":2775,"øde":1837,"ese":1937,"erv":2764,"eru":2635,"err":4661,"ert":4521,"ers":15033,"ern":16903,"erm":1970,"ero":1448,"egå":1006,"ekr":1629,"eks":4054,"ekt":4817,"en ":174071,"ela":1844,"eld":1116,"ele":6394,"eli":11128,"ell":22146,"elv":2251,"els":21491,"elt":4815,"emb":3551,"ema":1766,"eme":3075,"emm":3388,"emo":1300,"emi":2222,"emt":1052,"emp":1313,"ems":1366,"enf":1295,"ene":9879,"enh":4546,"eng":3572,"enb":1247,"ena":1550,"end":24965,"enc":1233,"enn":5553,"enk":1120,"eni":2823,"ens":17443,"ent":14377,"enr":1262,"egn":6321,"ege":4459,"egi":3950,"eha":1147,"egr":1976,"eho":1258,"eis":2241,"ein":2075,"ejd":2400,"el ":14709,"ejs":1068,"ejl":1164,"eje":2612,"em ":8110,"gis":2194,"giv":4136,"gin":973,"gio":2383,"gie":1747,"ghe":2580,"gge":15510,"gi ":1776,"gen":19301,"geo":1146,"get":6054,"ger":26388,"ges":3499,"gem":1137,"gel":6514,"gde":1133,"ge ":22940,"gad":1042,"gar":1035,"gav":1126,"gam":1188,"gan":5170,"fte":8906,"fun":1949,"ft ":4358,"fra":16811,"fre":2876,"fri":2352,"for":48487,"fol":2294,"fod":2203,"fle":2554,"fla":1188,"flo":1580,"fly":1416,"båd":1117,"fic":1488,"fil":4788,"fik":1836,"fin":3119,"fir":1330,"fis":991,"da ":2439,"dbr":1439,"dbo":3204,"de ":51365,"dby":3214,"dal":1321,"dag":2881,"dat":1966,"dan":12624,"dam":1233,"dda":1566,"dde":3541,"com":1147,"ch ":2261,"cer":3808,"ces":1087,"cen":3123,"cha":1908,"cia":1876,"ck ":2373,"cie":1532,"che":2404,"cke":1007,"ed ":35220,"ebe":1273,"ebo":1021,"ebr":1930,"eal":1397,"eat":1418,"efi":1531,"efo":3934,"eft":4164,"een":1037,"edl":1944,"edi":2262,"ede":18829,"eda":1072,"edt":1636,"eds":6584,"edr":1723,"eci":1115,"ece":1005,"dyr":1288,"dvi":2440,"dve":2150,"don":1291,"dom":2065,"ds ":4746,"dmi":1103,"dni":1787,"dst":6032,"dsp":2071,"dti":2638,"dte":2421,"dtr":1052,"duk":1051,"duc":1998,"dri":1805,"dra":1347,"dt ":18171,"dre":7141,"dro":1133,"dsk":3064,"dsb":2127,"dsa":1334,"dse":2268,"dgi":2184,"dia":1005,"der":53839,"des":7277,"det":27574,"deb":1003,"deh":997,"del":20136,"den":48394,"dem":1809,"di ":1057,"dga":1116,"dle":3278,"dla":1839,"dkr":1756,"dli":6294,"din":2827,"dio":1576,"dis":4268,"dit":1114,"die":2342,"dig":3302,"rhu":1754,"rho":1270,"rga":2811,"ri ":2041,"rgi":1032,"rge":3583,"ret":16711,"res":7617,"rev":3074,"rfa":1970,"rfo":1099,"rds":1442,"rdv":999,"rg ":8008,"reb":1759,"rea":2289,"ref":3055,"red":12454,"rei":2319,"reg":5210,"rem":3072,"ren":13272,"rek":1859,"rel":3506,"rer":8658,"rep":1174,"rda":1335,"rdl":1187,"rdi":2720,"rde":7501,"re ":32700,"rbu":1055,"rbr":1012,"rd ":6821,"ras":1016,"rat":6278,"rbi":1442,"rba":1156,"rbe":2916,"rag":1612,"ran":8553,"ram":2785,"ral":3419,"rak":1777,"raf":2970,"rad":2370,"rs ":4872,"ros":1298,"rot":1028,"rom":2663,"ron":3183,"rop":3118,"rov":4008,"rod":3399,"roc":1755,"rol":2335,"rof":1572,"rog":3109,"rna":2635,"rne":13393,"rni":1274,"ro ":1858,"rma":3375,"rme":5215,"rli":2882,"rla":2011,"rn ":3815,"rks":2467,"rko":1321,"rki":1210,"rke":7276,"rka":1044,"rm ":1940,"næs":1010,"nær":1381,"rip":1436,"rio":1666,"når":1017,"rit":3128,"ris":7929,"riv":2443,"rig":7130,"ril":1328,"rik":9410,"rin":12007,"rim":1705,"ria":2312,"rib":1119,"ric":1788,"rid":1336,"rie":7227,"rif":1151,"rk ":5973,"rug":4144,"rue":1823,"rup":5078,"run":5278,"rum":2612,"ruk":1154,"rus":1691,"rva":1035,"rvi":1176,"rve":2295,"ry ":1399,"rsk":6329,"rsl":985,"rsi":2088,"rso":2553,"rsa":1053,"rse":2094,"rta":1313,"rst":8534,"rte":6848,"rti":3566,"rts":2294,"rt ":7502,"rri":1525,"rre":6580,"sag":1459,"sal":1002,"sam":8006,"sbe":2183,"san":3521,"sat":3398,"ryk":1357,"sho":1591,"sie":1150,"sid":5798,"sk ":33968,"sit":3034,"sis":4442,"sin":4733,"sio":2930,"sik":3367,"sig":4968,"sby":2385,"se ":14360,"sch":1769,"ser":12739,"ses":2382,"set":2238,"sfo":1331,"sep":1174,"sen":14543,"sem":1108,"sel":4270,"sek":1218,"spo":1957,"spr":2788,"spe":2284,"slæ":1631,"spi":8255,"spa":1108,"sol":1008,"som":26634,"son":4747,"sor":1570,"sog":2156,"st ":15643,"sli":981,"slu":1369,"sky":1101,"sla":3115,"sle":2180,"ski":4168,"skl":1367,"sko":3585,"skr":6518,"sku":2545,"ska":8399,"ske":25508,"sni":1817,"sma":1205,"sme":2348,"stæ":1164,"stå":3319,"syd":3714,"stø":3565,"syn":1352,"sys":1619,"sse":5630,"ssa":1137,"sso":1208,"ssi":4603,"ste":30468,"sta":15172,"stn":1107,"sto":7688,"sti":13739,"stj":1076,"stl":3640,"stu":1392,"str":12753,"sty":2236,"sva":1612,"sve":1375,"svi":1847,"tal":8954,"tag":2922,"tab":1032,"tad":2960,"tat":8263,"tar":2918,"tan":5101,"tam":1561,"te ":23992,"ta ":2303,"pa ":1106,"køb":1295,"pe ":3064,"par":4298,"pan":2233,"lå ":2263,"pec":1027,"pen":2557,"per":8319,"pel":1218,"pla":4596,"ple":1353,"læg":3305,"læn":1040,"lær":1638,"pil":7538,"pis":1494,"por":2733,"pop":989,"pos":979,"pol":3456,"ppe":4792,"pst":1130,"pte":1387,"pti":1074,"pri":5464,"pre":2422,"pro":9553,"pun":1123,"præ":2064,"løb":1835,"løs":1373,"mæn":1058,"mær":1139,"mål":1925,"mån":1657,"ra ":16943,"ngi":1084,"ngl":1961,"ngs":7820,"ni ":1743,"nge":21530,"ngd":1418,"nha":2979,"nhe":1266,"nel":5371,"nek":1429,"nen":7128,"nem":2182,"ner":14377,"net":9752,"nes":8157,"ng ":25397,"ned":2644,"nfo":1407,"nce":2989,"ne ":28324,"ndt":10291,"ndr":5292,"nds":9100,"ndo":2080,"ndl":3623,"ndk":1968,"ndi":3782,"nde":48861,"ndb":3975,"nda":1742,"nal":7713,"nan":1050,"nar":1016,"nd ":19511,"nav":5158,"nat":4037,"nas":1015,"na ":3264,"nve":1870,"nus":1183,"nto":1311,"ntr":3209,"nti":2579,"ntl":1237,"nta":2683,"nte":11676,"nsp":982,"nst":8074,"nse":6294,"nsi":1207,"nsk":23035,"nsb":1130,"nt ":5652,"ns ":13305,"nog":1664,"nom":1719,"nor":6469,"nov":1161,"nne":10196,"nni":1944,"nma":3344,"nli":1178,"nla":1269,"nke":3504,"nkt":1750,"jæl":1755,"nie":2401,"nk ":979,"niv":1911,"nis":7071,"nit":1685,"nio":1012,"nin":14093,"nik":1101,"ogs":5045,"ogr":2357,"ogi":3275,"ogl":1041,"ogn":7366,"oge":2091,"ol ":1201,"oci":1344,"ock":2154,"ode":4999,"ods":1493,"of ":2459,"odb":2119,"odu":2868,"og ":59080,"oft":2765,"off":2072,"ofe":1326,"od ":3992,"obe":1539,"nyt":1313,"ote":1650,"ott":1354,"oto":1594,"ost":2113,"ov ":1326,"osk":985,"ose":1440,"ovi":1957,"ovs":2306,"ove":11716,"oun":1213,"our":1852,"opl":974,"ope":1372,"opf":1235,"opa":1354,"os ":2264,"opr":3026,"opt":1105,"ops":1721,"or ":31324,"ork":2427,"orl":1427,"orm":8791,"orn":2079,"ord":14176,"ore":6583,"orf":2453,"org":9163,"orh":1310,"ori":4799,"ort":7693,"ors":6893,"orv":1018,"ot ":1242,"orb":3261,"ora":1624,"old":10067,"on ":17437,"oli":4334,"oll":3644,"olk":2982,"ole":3254,"ols":1497,"olm":1298,"olo":4008,"oka":1272,"om ":32517,"ona":4226,"ond":1810,"one":10149,"ong":2553,"oni":2635,"ono":1620,"ons":4853,"ont":2499,"oma":2195,"ome":4668,"omi":1702,"omh":1267,"omm":13262,"omk":1807,"omp":2560,"omr":2620,"oms":1669,"op ":2057,"la ":1857,"gør":1749,"le ":18274,"lde":9460,"ldt":2484,"lds":2126,"lac":1163,"lad":4353,"lag":4790,"lan":22791,"lam":1129,"lar":1164,"lat":2523,"las":2596,"lav":1728,"lba":1173,"ld ":5587,"lbo":1181,"lbu":1855,"kvi":1085,"kva":1167,"kun":3121,"kul":1926,"kte":2817,"kst":2005,"kso":1301,"kue":1399,"ktr":1143,"kti":4258,"kto":1463,"ktø":1090,"ls ":1795,"lok":1028,"lom":2968,"lod":2232,"log":4064,"los":1196,"lot":1245,"lov":1376,"lmi":1310,"lme":1724,"lti":975,"ltu":1430,"lub":2283,"lsk":4901,"lst":4539,"lta":1027,"lte":2079,"lse":13829,"lre":2112,"lt ":6296,"lge":2455,"li ":1269,"lev":14315,"les":5717,"let":5891,"ler":27840,"lem":6534,"len":6881,"lek":2168,"leg":1270,"led":4289,"lla":4918,"lle":37621,"lli":6167,"lke":3500,"hæn":1282,"lm ":3777,"ll ":2166,"lit":4876,"lis":3436,"lin":9392,"hån":1024,"liv":2785,"lia":1325,"lik":2160,"lil":1691,"lig":34196,"lie":4111,"ma ":1277,"mag":1391,"mar":6718,"mas":1505,"mal":1831,"man":9443,"mat":3311,"mbe":3163,"me ":4862,"med":21647,"meg":1181,"met":7243,"mes":4874,"mer":15160,"mel":6023,"men":16847,"lut":1751,"lys":1458,"høj":2362,"hør":1554,"mpe":2509,"ms ":982,"mod":4612,"mon":1609,"mor":1053,"mt ":4527,"mst":1872,"mrå":2725,"mus":3381,"mul":1155,"mun":9999,"mhe":1090,"min":6763,"mil":3215,"mis":2297,"mid":2156,"mle":1475,"mkr":1669,"mmu":9493,"mme":16141,"vær":6621,"zir":1025,"væg":1206,"ytt":1956,"yst":3197,"ysk":5330,"yre":2022,"yr ":1010,"yde":3637,"yer":2527,"yen":3113,"yd ":1599,"ykk":1287,"yll":1699,"ynd":1352,"ygg":4471,"tør":4175,"tær":1046,"tæn":1204,"tår":2373,"sæt":2008,"så ":5902,"sær":1369,"røn":1765,"rød":1365,"vst":2580,"vir":2222,"vik":2041,"vil":2648,"vin":4397,"vig":1873,"råd":3726,"vid":3491,"vis":5672,"ræs":2950,"ræn":2483,"ræk":2614,"vn ":4721,"vne":3693,"vns":1255,"vok":971,"vor":5368,"ver":19395,"ves":5050,"vet":6585,"vej":1947,"ven":7443,"vem":1052,"vel":2142,"ved":12182,"vde":1584,"ve ":6698,"val":2588,"van":4077,"var":14654,"vat":1120,"usk":1540,"usi":3385,"use":2336,"ust":3006,"uss":1778,"uti":1400,"ute":1658,"us ":6486,"ut ":1499,"ure":2206,"urg":1747,"uri":1026,"urn":1614,"uro":2454,"urt":1085,"ur ":2594,"upp":2843,"umm":1471,"umb":1127,"ume":1120,"uns":1753,"unk":1906,"uni":2675,"und":15503,"una":2326,"ung":3315,"une":7027,"up ":2829,"ukt":2014,"um ":5774,"ult":2011,"uli":1842,"ule":1205,"uld":1327,"un ":2311,"uge":2701,"ugl":1024,"ugu":1009,"ugt":1589,"udb":1145,"udd":1881,"ude":3316,"udg":3260,"udi":978,"ue ":1121,"uce":1989,"uer":2229,"ues":1633,"udv":2056,"uds":2177,"udt":1177,"uel":1000,"ub ":1257,"uar":1965,"ubl":1141,"ubb":1133,"ud ":2370,"typ":1409,"tyr":2052,"tys":3953,"ty ":1687,"træ":3121,"tur":4777,"tut":1397,"tun":1107,"tud":1198,"tyd":2007,"ts ":4804,"tre":5166,"tra":6691,"tri":4615,"tru":5093,"tro":3770,"try":1478,"tv ":1126,"tte":14611,"to ":3441,"tni":3342,"tne":990,"tof":1468,"tod":1141,"tog":1011,"ton":2739,"tol":1446,"tor":9016,"top":1001,"til":27566,"tik":5022,"tif":3307,"tie":1611,"tig":3596,"tit":2754,"tis":8549,"tin":4128,"tio":14633,"thu":1021,"tia":1323,"tid":6831,"tiv":4885,"tje":1256,"tli":4586,"tla":1138,"tle":1766,"tem":5581,"ten":11717,"tek":2453,"tel":3444,"teg":5502,"ted":5198,"th ":1733,"tet":9121,"tes":3522,"ter":41030,"på ":20701,"ti ":3596,"the":2230},"n_words":[9674395,11309170,8090238],"name":"da"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/de b/nlp_resource_data/langdetect/profiles/de

new file mode 100755 (executable)

index 0000000..94ebdbb
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/de
@@ -0,0 +1 @@
+{"freq":{"D":565432,"E":313466,"F":303463,"G":350968,"A":448295,"B":452862,"C":218833,"L":272371,"M":385363,"N":224760,"O":171336,"H":264836,"I":169730,"J":158568,"K":336559,"U":140118,"T":243403,"W":221377,"V":203138,"Q":15370,"P":333284,"S":807982,"R":282432,"Y":16379,"X":9646,"Z":96079,"f":1052401,"g":2011313,"d":3686275,"e":12779820,"b":1254948,"c":2286896,"a":5009484,"n":7859499,"o":2714321,"l":3150399,"m":2035145,"j":85201,"k":1042619,"h":3128015,"i":7539051,"w":833806,"v":645462,"u":3015383,"t":5309288,"s":5151894,"r":6424621,"q":20503,"p":756433,"z":844832,"y":283697,"x":72882,"Ü":10506,"ß":107796,"Ö":13881,"í":8823,"é":38695,"ä":358815,"á":12062,"ü":397123,"ö":249595," l":98174," m":186199," n":127225," o":112357," h":112970," i":1142912," j":27689," k":107425," d":1491422," e":826321," f":160920," g":218820," a":569210," b":294630," c":26306," z":200898," u":467824," t":56510," w":322251," v":371555," p":57089," s":347463," r":56139," J":156270," K":328020," H":253550," I":146651," N":212076," O":159226," L":260916," M":370185," B":437655," C":196607," A":413316," F":291285," G":334059," D":543793," E":298852," Z":93212," Y":15401," S":736164," R":264582," Q":14562," P":316134," W":214460," V":189915," U":132267," T":228797," ä":10185," ö":18488," ü":31572," Ö":12893," Ü":10208,"A ":22760,"Da":90985,"Cl":9563,"Co":64248,"Ch":45017,"G ":11776,"Du":11229,"Do":28882,"Dr":19902,"De":166635,"Di":188563,"Fe":38252,"Fa":46184,"Eu":13931,"Er":67899,"Es":30499,"En":32393,"Ei":81512,"El":17463,"Ge":151044,"Ga":33294,"I ":13640,"Fu":26023,"Fr":58870,"Fo":32048,"Fl":33310,"Fi":36767,"B ":10700,"C ":14623,"Au":73540,"Ar":58134,"At":9258,"As":17569,"D ":9913,"Ba":87454,"Ab":25174,"Ad":11158,"Am":21201,"An":55559,"Ap":14324,"Al":66179,"Bu":55758,"Br":45253,"Ca":33967,"Bi":31885,"Be":143843,"Bo":32121,"Bl":15582,"Ku":30221,"Kl":25047,"Kr":46024,"Ko":56995,"Le":49783,"Li":55021,"La":89578,"Lu":17469,"Hö":9378,"Lo":26047,"Me":64987,"Mi":68245,"Ma":116460,"Mu":29273,"Mo":46837,"Ni":23664,"Ne":44049,"Na":66351,"P ":9001,"No":53500,"Ok":13364,"Ol":9042,"Ob":22144,"Gi":11262,"Gl":11809,"Gr":74845,"Go":17306,"Gu":11007,"Ha":86398,"He":62608,"Hi":21572,"Dé":12333,"Ho":42207,"Hu":11017,"Im":11278,"In":78332,"Is":10695,"Ja":77663,"Je":11227,"Jo":24131,"Ju":32751,"Ka":74688,"Ki":37983,"Ke":14966,"Um":8765,"Un":54096,"Tu":10377,"US":39827,"Tr":32100,"To":24574,"Th":35305,"Ti":19953,"Te":52263,"Ta":27705,"V ":9454,"Sy":15026,"St":193472,"Su":18450,"Wo":21476,"Wi":47351,"Wa":44055,"We":77082,"Vo":36505,"Vi":22665,"Va":14430,"Ve":104056,"Mä":13878,"Pu":10703,"Pr":83211,"S ":48175,"Pe":31242,"Pf":21717,"Pa":56041,"Pl":15401,"Po":48441,"Pi":16961,"Ph":15188,"Os":17404,"Or":58036,"R ":11116,"Kö":16414,"Se":64611,"Sc":120209,"Si":90352,"Sh":10028,"Sp":54743,"So":39076,"Ru":19547,"Sa":63564,"Re":106782,"Ri":24028,"Rh":15491,"Ro":44332,"Qu":12167,"SA":9500,"Ra":38143,"Mü":11354,"b ":47610,"a ":292619,"Sü":22392,"Ze":31894,"Zi":10309,"Zu":19954,"Zw":11243,"i ":187041,"ge":787899,"ga":94286,"fl":45063,"fg":17796,"ff":81186,"fi":73360,"fs":18166,"fr":68277,"fu":19769,"ft":140067,"fo":55471,"he":941699,"hb":12648,"ha":293469,"gn":22363,"gl":78298,"gk":13841,"gi":130788,"gh":20146,"gg":10845,"gu":58531,"gt":76690,"gs":110120,"gr":125748,"go":29970,"dt":100722,"du":88105,"dw":22283,"g ":403400,"ea":71177,"eb":165228,"ec":131420,"aß":17528,"ed":151386,"de":1831853,"dg":9266,"di":398882,"dh":11418,"dk":27018,"dl":45536,"do":69104,"dn":16795,"ds":46937,"dr":54467,"ew":66696,"ex":29126,"eu":212719,"ev":25427,"ey":30641,"ez":98518,"fa":86743,"h ":463041,"fe":136348,"eh":196157,"eg":232810,"ef":68699,"ee":58149,"el":553287,"ek":82123,"ei":1578594,"ep":49360,"eo":41207,"en":2309793,"em":396788,"et":380175,"es":772223,"er":2971302,"ca":38911,"bz":11720,"e ":2082290,"bs":31297,"br":69597,"bu":78689,"bt":20985,"bo":31335,"bl":45392,"bg":11719,"bi":136887,"be":567764,"db":9156,"da":176290,"f ":156375,"cu":10926,"ct":17144,"co":32151,"ck":133244,"ci":25902,"ch":1933159,"ce":45478,"c ":18586,"az":20071,"ay":30095,"ba":134727,"d ":722683,"at":393648,"as":323772,"ar":487358,"ax":8820,"aw":12659,"av":27657,"au":496496,"ak":51121,"al":571224,"ai":78558,"ap":48798,"am":283487,"an":921405,"ac":186135,"ad":167659,"aa":47275,"ab":98737,"ag":124899,"ah":148615,"ae":30527,"af":117969,"nu":74735,"nt":478161,"ns":309893,"nr":20689,"no":90394,"nn":190420,"nz":136146,"ny":13727,"nw":36447,"nv":14218,"oe":10022,"of":64141,"oc":78480,"od":135885,"oa":14277,"ob":51225,"om":176521,"on":667125,"ok":27868,"ol":220141,"oi":24320,"og":71416,"oh":66434,"ot":88470,"os":118609,"ov":54069,"ou":69753,"op":68308,"oo":24531,"or":433879,"r ":1993661,"ow":58643,"oz":19078,"kö":10263,"pe":109404,"pf":29697,"pa":81077,"kü":12292,"pl":26694,"po":71059,"ph":41272,"lä":41208,"pi":98645,"lo":124488,"ln":33301,"lm":52275,"ll":300411,"ls":178640,"lr":10222,"lp":20136,"lv":15360,"lu":97812,"lt":212969,"lz":28989,"hö":49290,"ly":20012,"o ":123676,"iß":9834,"ma":207608,"hü":11610,"mb":77222,"me":410943,"mf":12265,"ml":10335,"mi":238709,"mm":108620,"mp":57358,"mo":55705,"mt":36178,"ms":30817,"mu":48362,"p ":27276,"na":304264,"nb":61884,"nc":42156,"nd":1104110,"ne":802403,"nf":54695,"ng":588331,"nh":45861,"ni":342822,"nk":102273,"nl":39986,"nm":15381,"ki":44691,"ke":186644,"ka":155408,"fü":107647,"m ":660573,"ks":34677,"kt":124687,"ku":45967,"ko":74144,"kr":80423,"kl":44918,"km":33455,"li":562165,"hä":31781,"lh":12595,"lk":36060,"le":501436,"ld":86618,"lg":41891,"lf":32687,"la":312768,"lc":19357,"lb":59934,"n ":2940210,"hr":245096,"hs":72646,"hw":53275,"ht":189239,"hu":73471,"hk":8886,"hi":180846,"hn":163077,"ho":102479,"hl":130889,"hm":50687,"id":80042,"eß":9854,"ic":534870,"ib":46123,"ia":115654,"ih":35970,"ig":262184,"if":71985,"ie":1150659,"hy":13546,"k ":140095,"ir":170539,"is":1295546,"it":570135,"iu":24806,"iv":65304,"ik":165059,"il":298251,"im":289814,"in":1828758,"io":214368,"ip":33386,"je":30947,"fä":14922,"iz":46043,"l ":312169,"ja":19963,"tä":51288,"xi":15762,"z ":144384,"sü":19128,"sä":14628,"wi":176498,"wo":47417,"wu":73059,"rö":38038,"y ":99455,"wa":198781,"rü":63451,"we":246633,"rä":41054,"vi":76382,"vo":299196,"uz":17658,"ve":197477,"va":38019,"x ":21295,"ui":24830,"uk":24301,"ul":92478,"ue":71615,"uf":143869,"ug":74097,"uh":10714,"ur":375391,"us":360270,"ut":230981,"um":178633,"un":921062,"up":64261,"ty":34073,"tz":131414,"tu":172266,"tt":172596,"tw":53648,"ub":43047,"ua":44618,"ud":39409,"uc":128427,"w ":36904,"to":167744,"tn":11002,"tm":18841,"tl":110769,"ts":280176,"tr":215121,"tp":9922,"tg":32640,"tf":25417,"te":1203919,"tk":15341,"ti":420789,"pä":14388,"th":119533,"v ":14300,"tb":22514,"ta":360262,"su":36834,"sv":12561,"ss":289880,"st":1222626,"sy":19371,"sz":17514,"sw":26360,"sl":40519,"sk":49450,"sm":28595,"sp":131491,"so":116426,"sr":21109,"sd":16337,"sc":827974,"oß":20955,"sf":20622,"se":456715,"sh":47160,"sg":56341,"si":322120,"nö":10879,"rz":76103,"u ":108662,"sa":109787,"sb":46727,"rr":85173,"rs":267473,"rt":385464,"ru":174667,"rv":18995,"rw":59814,"ry":19301,"rp":30886,"ro":256692,"rn":187691,"rm":96942,"rl":93135,"rk":131828,"ri":506618,"rh":58397,"rg":185502,"rf":74192,"re":658545,"rd":287337,"rc":86223,"rb":105570,"ra":423969,"t ":1627252,"mö":8912,"qu":17893,"mä":18127,"lü":10612,"s ":1183348,"pt":54028,"pu":30332,"pp":47091,"pr":99340,"ps":12738,"zä":9571,"zö":21005,"wä":21780,"zi":99549,"zb":8992,"ze":213386,"za":20320,"zw":56096,"zu":170081,"zt":32056,"zo":13315,"ye":15230,"uß":27824,"ya":11551,"tü":14407,"yt":8983,"ys":38183,"yr":10616,"yp":13299,"yn":15755,"ym":18550,"yl":11401,"äc":27469,"Üb":9926,"ßb":17940,"ße":45162,"ßt":14023,"ép":12471,"ät":44405,"äu":30285,"äl":29341,"än":69784,"äs":15220,"är":37279,"äd":10841,"äg":13766,"äf":13624,"äh":39944,"ün":62239,"üs":17438,"ür":122114,"üt":16867,"üb":39622,"üc":24866,"üg":9236,"üd":43250,"üh":43336,"öß":14937,"ör":67546,"ös":54393,"ön":22557,"öl":13380,"öm":9902,"öh":14808,"öf":13450," Ga":33168," Ge":150714," Fo":31900," Fu":25992," Fr":58787," Fi":36574," Fl":33229," Ha":86258," He":62512," Go":17225," Gr":74652," Gu":10914," Gi":11192," Gl":11774," Hu":10983," Ho":42078," Dé":12331," Hi":21530," Je":11187," Ja":77578," Is":10643," Im":11222," In":77993," Ka":74525," Ke":14835," Ki":37849," Jo":24054," Ju":32722," La":89327," Le":49575," Li":54819," Kl":25011," Ko":56929," Kr":45960," Ku":30157," Ma":116111," Mi":68090," Me":64810," Lo":25969," Hö":9372," Lu":17420," Ne":43845," Na":66181," Ni":23603," Mo":46674," Mu":29161," Ap":14296," Am":21144," An":55404," Al":65979," Ad":11104," Ab":25090," Ba":87247," Au":73430," At":9235," As":17394," Ar":57936," Be":143584," Bi":31731," Bl":15450," Bo":31991," Br":45134," Bu":55625," Ca":33546," Ch":44814," Cl":9441," Co":63874," Da":90740," Di":188080," De":166199," Dr":19868," Do":28619," Du":11174," El":17420," Ei":81331," Es":30480," Er":67840," En":32299," Eu":13899," Fe":38170," Fa":46079," Sü":22387," Wo":21330," Wi":47141," We":76915," Wa":43907," Zu":19747," Zw":11238," Ze":31828," Zi":10283," Kö":16410," Os":17332," Or":58009," Po":48260," Pl":15331," Pi":16916," Ph":15079," Pf":21701," Pe":31180," Pa":55779," No":53429," Ol":9032," Ok":13357," Ob":22103," Ra":37989," Mü":11350," Qu":12048," Ro":44140," Re":106629," Ri":23976," Rh":15474," Pr":83041," Pu":10674," Mä":13877," Sy":14940," Su":18414," St":192451," Ta":27626," Th":35151," Ti":19864," Te":52071," US":39409," Tr":31991," To":24396," Ru":19497," Sa":63396," Sh":9926," Si":90133," Sc":119896," Se":64226," So":38900," Sp":54582," Va":14382," Ve":103873," Vi":22523," Vo":36449," Tu":10321," Um":8742," Un":54031," je":12055," im":207684," in":455196," is":439990," it":10403," ka":23448," fü":85353," ha":41553," he":33774," gi":8908," gl":12278," gr":31732," ih":21981," hi":13984," ho":9452," ni":22730," ne":14510," na":48500," od":67197," of":20927," nu":10713," no":19195," le":19412," li":42027," la":25753," ku":12534," km":15800," kl":13510," ko":18379," me":26065," mi":107786," ma":27726," ab":26560," am":69685," an":87925," al":101826," au":256133," ba":8860," bi":45326," be":204994," br":14611," bz":9809," er":81246," et":21454," es":15402," en":55368," ei":604709," eh":21203," fr":36324," ge":155650," ch":11442," da":104895," dr":12440," de":1066350," di":258330," du":31949," zu":137283," zw":39753," ze":10132," sü":17881," ru":14611," se":68014," sc":24771," si":106825," sp":27646," so":41648," re":21422," pr":15398," po":16457," wa":91486," we":72625," wu":68805," wi":72114," ve":73784," vo":278143," vi":13609," um":38406," un":417759," st":39848," tr":12290," th":15725," te":8776," Üb":9908," ös":13916," üb":31186,"Fer":11772,"Fil":18131,"Fam":19152,"Es ":23376,"Eur":10652,"Ein":58080,"Eis":15092,"Er ":28197,"Ent":11856,"Gem":51515,"Geb":17190,"Ges":25022,"Ger":10345,"Gen":10761,"Gat":12348,"Fuß":15716,"Flu":13729,"Fra":28567,"Fri":10250,"Fre":14074,"For":20619,"Dép":12076,"Hei":15430,"Her":23305,"Hal":8751,"Han":15746,"Hau":30716,"Gre":10954,"Gra":17232,"Gru":19222,"Gro":14185,"Int":14682,"Ins":19291,"Ind":10322,"In ":14119,"Hoc":11500,"Art":17165,"Aut":13725,"Aus":25209,"Aug":12878,"Auf":14417,"Ban":13867,"Bad":10629,"Bar":8982,"Bas":9343,"Bau":14505,"Als":17057,"Alt":10907,"Ant":9903,"Bun":26145,"Bur":11846,"Car":9966,"Bez":18543,"Bes":11001,"Ber":42078,"Bei":10336,"Beg":14069,"Bre":9847,"Bra":12567,"Dez":9022,"Der":97305,"Deu":28251,"Das":56242,"Chr":9059,"Chi":9883,"Cha":13525,"Com":14437,"Cou":18879,"Die":157652,"Dre":10394,"Dor":10511,"Neu":14698,"Nat":19706,"Nie":12050,"New":10171,"Nam":20762,"Nac":12244,"Nov":10049,"Nor":35685,"Okt":8850,"Obe":15435,"Ort":33777,"Ost":13628,"Pla":10533,"Pfl":9335,"Pfa":9319,"Per":13707,"Par":21713,"Pro":54127,"Pri":9311,"Pol":21608,"Mär":10626,"Jan":11806,"Jah":47539,"Joh":10371,"Jul":9776,"Jun":10796,"Kan":19278,"Kar":13767,"Kir":12749,"Kil":10995,"Kla":9457,"Kon":17432,"Kom":17455,"Kre":22963,"Kri":9328,"Kul":11508,"Lei":11844,"Lan":56584,"Lin":9234,"Lis":12636,"Men":8765,"Met":15567,"Man":15071,"Mal":9073,"Mar":33951,"Mai":15148,"Mon":12046,"Mit":29518,"Min":10015,"Mus":19078,"Süd":20812,"Wil":11137,"Wie":10497,"Wei":14673,"Wel":14947,"Wer":11351,"Wes":22169,"Was":9091,"Wal":11499,"Vor":17501,"Vol":11987,"Zei":17372,"Str":23794,"Stu":11271,"Sta":109473,"Ste":21247,"Tei":19885,"Sit":13505,"Sie":53287,"Ser":9549,"Sep":9592,"Sei":11021,"See":8773,"Spi":16479,"Spr":13123,"Sch":114345,"San":12822,"Rhe":11094,"Rec":10719,"Rei":15453,"Reg":42336,"Rom":8900,"Ver":96243,"Uni":18259,"Unt":29085,"The":20642,"US ":31557,"Tra":12824,"bis":44646,"bil":24249,"bin":15696,"ble":9072,"bli":23577,"be ":15350,"ban":20200,"bal":24830,"bah":12251,"bac":14708,"bau":19849,"bar":18594,"bei":74577,"bef":14886,"bed":10076,"ber":182051,"ben":84728,"bel":16201,"bek":17699,"bez":50353,"bew":10065,"bes":47672,"bet":15480,"bie":27376,"bge":10311,"bzw":9957,"ca ":13669,"ce ":20992,"bt ":13539,"bri":17955,"bra":14742,"bre":12676,"bru":12222,"bst":12657,"bur":42376,"bun":13485,"am ":57496,"al ":67162,"ain":24754,"ais":13530,"ahm":10952,"ahn":21966,"ahl":22279,"ahr":74469,"anu":15776,"anz":50908,"ano":9997,"ann":95807,"ant":49762,"ans":31362,"ane":16424,"ang":74734,"ani":87171,"ank":29449,"ana":30762,"anc":17098,"and":264094,"amt":12217,"amm":42375,"amp":12259,"ami":43004,"ame":78887,"amb":11361,"ama":13307,"alz":12079,"alt":72681,"als":84009,"all":87642,"ali":76090,"ald":16999,"ale":62596,"ala":15461,"alb":20979,"an ":124982,"akt":19509,"abe":38207,"ae ":18008,"aat":34161,"ad ":15688,"ab ":9708,"aft":77933,"aff":9982,"ai ":14358,"age":54066,"adi":20643,"ade":24209,"ag ":17784,"adt":77282,"ack":9729,"ach":145520,"ace":11312,"aye":9954,"at ":58813,"are":33081,"ard":28108,"arc":10050,"arb":19113,"ara":24649,"aro":9777,"arl":16356,"ark":32666,"ari":41626,"arr":11665,"ars":12931,"art":72580,"au ":29621,"asi":19652,"ase":11784,"ar ":124263,"as ":155043,"aut":27334,"ay ":9249,"ata":11051,"ast":27543,"ass":73183,"ato":14091,"ate":62804,"ati":113716,"ath":22536,"auc":75721,"att":42075,"ats":11481,"atu":18974,"atz":18097,"aum":14540,"aup":28995,"aus":149891,"aue":19995,"auf":98508,"itu":19936,"itt":50909,"its":25338,"itz":41656,"ism":9438,"iss":46444,"ist":567635,"ita":30952,"ite":83935,"itg":12750,"iti":55617,"ium":14795,"ivi":10241,"ive":36462,"is ":120788,"ion":169772,"irt":10794,"irk":24645,"isi":17517,"ish":14382,"ise":39335,"isc":434611,"isa":10445,"ire":14375,"ird":46859,"irc":21452,"it ":186965,"itä":20849,"izi":17119,"ize":13649,"kir":9307,"kis":9934,"km ":11509,"ki ":9579,"kei":25216,"kel":18319,"ken":38622,"ker":44613,"key":12420,"ke ":26956,"kra":15849,"kre":43910,"kt ":30020,"kri":11594,"kon":15402,"kom":22572,"ks ":9057,"kma":10480,"kle":18061,"kla":10851,"kat":15038,"für":75640,"kan":77012,"kal":10319,"füh":22557,"ka ":18181,"han":36578,"hal":38788,"hau":37347,"har":21603,"hat":27886,"haf":79189,"hab":9073,"he ":213315,"hel":11970,"hei":73898,"hec":9261,"heu":16839,"hes":34182,"her":185057,"heo":9547,"hen":324438,"hem":37228,"hie":35579,"hic":13745,"hin":28238,"hil":15183,"his":35328,"hl ":14867,"hn ":19219,"hla":28455,"hle":29146,"hli":16905,"hlo":9385,"hlu":9270,"gle":20084,"gli":40281,"gke":10880,"gs ":14145,"gsb":9179,"gsg":11045,"gro":10882,"gru":14203,"gra":25992,"gt ":53228,"gri":24893,"gre":15248,"gst":11232,"gss":10565,"gte":17882,"grö":13018,"grü":19508,"gus":11665,"gun":24014,"ial":21666,"ian":25164,"iat":10334,"ibt":8975,"id ":10846,"ibe":11815,"ia ":36994,"iet":30400,"iel":84385,"ien":123978,"ier":122946,"ies":42285,"ied":66086,"ieg":57690,"ief":8944,"ieh":10954,"ig ":34684,"iec":11989,"ieb":29946,"ift":20078,"iff":26227,"ick":25712,"ich":454249,"ie ":537544,"ica":15678,"ide":33142,"ida":13186,"ieß":9409,"il ":53074,"im ":225234,"ika":58499,"ige":123608,"iga":12204,"igk":10797,"igi":12120,"igu":13208,"igt":19283,"ign":9138,"ihe":9068,"ihr":18848,"ik ":39429,"imm":16766,"ime":10380,"ind":156874,"ina":40189,"inn":28145,"ino":11205,"int":30667,"ins":51098,"inf":10131,"ine":425715,"inh":13926,"ing":93214,"ini":56092,"inl":11619,"ink":12759,"inw":21089,"inz":40189,"ike":34841,"in ":793598,"ilo":17887,"ill":36002,"ilm":27378,"ili":52680,"ild":30027,"ile":21718,"ima":11905,"io ":15324,"hr ":43100,"hol":19495,"hor":9009,"hof":13254,"hoc":13003,"hni":15320,"hnu":17265,"hne":77095,"hme":30297,"hul":13709,"htu":10990,"hts":18003,"hti":8985,"hte":48599,"hst":18465,"hse":23032,"hrt":21145,"hre":87475,"hri":38046,"ht ":81034,"hwa":10659,"hwe":33406,"hum":9803,"hun":26557,"ffe":34346,"fes":13275,"fer":33138,"fen":43852,"fel":18220,"fge":10643,"fas":17434,"fan":11879,"fal":21835,"fah":14992,"ff ":20858,"fe ":14115,"ewä":9115,"eze":57686,"ezi":30267,"eta":11725,"ete":74739,"eti":14295,"eso":11977,"est":128414,"ess":60542,"eue":13801,"eug":13587,"etr":30198,"ett":26303,"etw":18129,"etz":35905,"ew ":11648,"eut":121638,"eur":19487,"ewi":9238,"ewe":18343,"ey ":16388,"erö":10701,"er ":1578023,"eor":17275,"es ":365853,"ept":13647,"epu":9001,"erk":45646,"erl":56900,"eri":118551,"erg":76890,"erh":33322,"ere":140461,"erf":36150,"erd":42731,"era":58724,"erb":63293,"et ":137716,"esi":26333,"esc":45868,"ese":67043,"esa":12858,"erz":20677,"erv":14305,"eru":46595,"erw":47997,"err":51760,"ert":122552,"ers":190257,"ern":139910,"erm":29678,"erp":11457,"ero":15582,"ekt":37882,"en ":1453342,"elb":18844,"ela":19480,"eld":20851,"elc":11326,"elf":9116,"ele":71198,"eli":22926,"elm":9454,"eln":17751,"ell":119434,"els":30215,"elt":58131,"ehö":33094,"eiß":9569,"emb":35370,"ema":36279,"eme":116815,"emi":15129,"enf":15937,"ene":83278,"enh":15362,"eng":41769,"enb":41104,"ena":52256,"end":112707,"eno":10734,"enn":31346,"enk":30270,"enl":8977,"eni":28822,"ens":113000,"ent":181395,"enr":9992,"enz":28843,"ege":66150,"egi":51407,"egr":34898,"egt":35267,"ehm":19125,"ehr":37443,"ehe":47146,"eib":17640,"eic":136250,"eht":23060,"eis":113764,"eim":26661,"eil":79634,"ein":827667,"eih":11402,"eie":15075,"eid":22147,"eig":23796,"el ":101100,"eiz":17159,"eit":174763,"efü":9260,"eka":19118,"em ":162112,"gis":26824,"gin":15255,"gio":29407,"gie":31403,"gen":239581,"get":10602,"ger":96243,"ges":76492,"gew":20591,"geb":50080,"geh":39393,"geg":29832,"gef":16578,"gem":34840,"gel":53887,"ge ":89868,"gab":8826,"gar":15096,"gan":29626,"ga ":12696,"frü":9222,"fte":22800,"ftl":11215,"fun":10974,"fts":18001,"ft ":64901,"fra":26521,"fre":14454,"fri":12770,"for":35090,"fol":14926,"fla":12837,"flu":9809,"fil":11145,"fin":25920,"da ":17513,"de ":197184,"das":76737,"dar":15587,"dam":14425,"ckl":11099,"chä":10997,"ch ":425455,"cha":118322,"chw":51871,"chu":40552,"ck ":31790,"che":671441,"chl":70583,"chi":103178,"cho":20768,"chm":11901,"chn":86780,"chs":68005,"cht":155579,"chr":43254,"cke":51895,"ed ":20514,"eba":8988,"ebe":60498,"ebi":30463,"ebr":17022,"ebu":9376,"eat":10811,"efi":14121,"efe":11305,"ei ":84962,"ega":9107,"eer":10701,"edi":25198,"ede":66213,"eg ":9875,"eck":32288,"ech":81379,"aße":12922,"ee ":17886,"dwe":14218,"dur":31652,"dor":21874,"don":10905,"ds ":13347,"dun":25608,"dri":12382,"dt ":65936,"dre":16392,"dsc":11728,"der":838163,"des":213577,"det":50672,"deu":71719,"del":34612,"den":286768,"dem":104609,"dkr":22973,"dli":27133,"din":14965,"dis":37105,"die":268782,"dig":18151,"rhe":18402,"rha":21798,"rga":19288,"rgi":14417,"rge":50622,"ret":20023,"res":34677,"reu":13265,"rfa":14831,"rfo":9599,"rg ":68697,"rea":10702,"rec":32717,"raß":12628,"rei":196090,"reg":17320,"rem":14552,"ren":145744,"rer":33774,"rf ":19644,"rdn":11514,"rdl":8810,"rdi":14216,"rde":136404,"re ":80803,"rbr":9368,"rch":72681,"rd ":68822,"rap":8929,"rar":8924,"ras":13690,"rat":45378,"rau":34010,"rbi":13086,"rba":21515,"rbe":36419,"rai":10459,"rag":25026,"ran":87263,"ram":18048,"ral":31964,"rab":8938,"raf":18926,"rad":17310,"rac":30425,"rs ":37799,"ros":12129,"rot":11834,"rom":12509,"ron":32055,"rop":24374,"rov":26322,"rod":18103,"rol":12320,"rof":8772,"rog":9738,"rns":12992,"rna":25318,"rne":34331,"rni":11673,"ro ":10671,"rma":31109,"rme":19061,"rmi":10506,"rli":33691,"rle":11502,"rla":23240,"rn ":74871,"rks":11567,"rke":27812,"rm ":16219,"rit":36924,"ris":65792,"rig":20472,"ril":13027,"rik":52744,"rin":60346,"ria":21040,"ric":47382,"rie":98836,"rif":31298,"rk ":38902,"rwe":21555,"rz ":21038,"ruc":12341,"rup":19176,"run":74339,"rum":15222,"rus":14979,"rwa":28214,"ry ":12420,"rsi":21022,"rso":11863,"rsp":15246,"rsc":59984,"roß":19513,"rsa":9024,"rse":16931,"rta":10712,"rst":71108,"rte":100313,"rth":11159,"rti":23089,"rua":8770,"rts":42716,"rtr":18145,"rt ":139444,"rro":10371,"rri":12758,"rre":38535,"rra":9354,"sam":29233,"sbe":20860,"san":12568,"sat":17995,"rze":22030,"sha":10198,"sho":11670,"sge":44768,"sie":44959,"sic":57522,"sit":30295,"sis":59876,"sin":51565,"sio":15881,"sik":20202,"se ":74178,"oße":10850,"sch":817718,"ser":52310,"ses":13066,"set":25883,"seu":12734,"sei":61074,"seh":13958,"see":12215,"sen":104567,"sem":11510,"sel":53325,"spo":10155,"spr":34328,"spe":13644,"spi":46629,"spa":12483,"sow":18519,"sol":10161,"son":33802,"sor":15536,"sre":10655,"st ":524603,"ss ":36271,"sla":20592,"ski":8870,"ska":11035,"so ":11408,"swe":11000,"stä":18451,"sse":110613,"ssa":12522,"sso":12151,"ssi":42403,"sst":37624,"ste":300373,"stf":9710,"sta":116653,"sto":29905,"sti":57375,"stl":38070,"stu":17599,"str":60741,"sun":13352,"sve":8840,"tal":50681,"tag":11693,"taa":33781,"tad":80208,"tbe":10504,"tau":10504,"tat":27116,"tar":23114,"tan":59689,"tam":12219,"te ":230506,"ta ":18923,"pe ":20233,"par":30140,"pan":20821,"läc":9211,"län":14161,"phi":11788,"pen":21299,"per":31265,"pel":10530,"pla":12193,"pie":61526,"por":21385,"pol":21212,"ppe":33120,"pub":8976,"pte":12552,"pts":12222,"pra":20767,"pri":21158,"pre":14559,"pro":24883,"ra ":23418,"ngi":9157,"ngl":32203,"ngs":81612,"ni ":15548,"nge":150664,"nga":14715,"nha":17909,"nhe":16114,"neh":18303,"nel":12298,"nen":124634,"nem":32650,"ner":149716,"net":57023,"nes":53346,"neu":10682,"ng ":256902,"nfo":9002,"nfa":11653,"nce":12344,"nch":15880,"ne ":310275,"nbu":15121,"ndt":9026,"ndu":21347,"ndr":11275,"nds":26772,"ndo":18073,"ndl":16133,"ndk":23064,"ndi":40257,"nde":333302,"nda":14652,"nal":58896,"nam":18687,"nan":39807,"nar":14659,"nac":41308,"nad":10470,"nah":12864,"nbe":18026,"nd ":563951,"nba":15883,"nau":11932,"nat":30766,"na ":34549,"nz ":37982,"nwo":19169,"nve":9948,"nun":33102,"nur":9530,"nua":9860,"nty":16624,"ntw":17792,"nto":23385,"nts":23619,"ntr":27593,"nti":33190,"nth":12699,"ntl":20923,"nta":24938,"nte":156191,"nsp":11166,"nst":73244,"nse":37942,"nsi":11408,"nsc":47879,"nsa":13614,"nsb":9715,"nt ":97967,"ns ":52604,"nom":14191,"nor":17522,"nne":52732,"nni":11264,"nnt":52262,"nns":10154,"nli":11747,"nn ":35057,"nla":20752,"no ":11518,"nke":16356,"nkm":14368,"nkt":16086,"nkr":10153,"nig":30567,"nie":44175,"nic":23391,"nia":9121,"niv":12854,"nis":144657,"nit":17226,"nin":9152,"nik":13041,"ogr":16058,"ogi":20348,"oge":19173,"ohl":8865,"ohn":34927,"och":36284,"ock":28973,"ode":93686,"of ":22612,"odu":15579,"off":17101,"nzö":20567,"obe":24203,"nze":40975,"nzi":11904,"owi":20813,"ozi":9366,"ow ":10053,"oti":8974,"oth":10089,"ote":15159,"ott":13896,"oto":13207,"ost":26924,"ose":16100,"oss":15828,"ovi":27674,"ove":15851,"oun":22912,"our":13749,"oph":11323,"opa":9142,"os ":20626,"or ":51491,"ork":11076,"orm":35190,"orn":17190,"ord":64827,"ore":22869,"orf":23363,"org":24171,"ori":35553,"ort":57075,"ors":22259,"ora":13071,"ola":9778,"old":10163,"on ":380926,"oli":42117,"oll":31645,"olk":11956,"ole":12997,"olg":16083,"olo":30821,"om ":33290,"ona":50054,"ond":30722,"one":41633,"ong":12616,"oni":29183,"onn":12427,"ono":12443,"ons":40673,"ont":24319,"oma":20160,"ome":26297,"omi":13337,"omm":35850,"omp":19252,"omo":11157,"la ":19811,"lb ":10692,"le ":103052,"lch":16014,"lde":27222,"lac":9475,"lag":29983,"lan":114878,"lar":12710,"lat":31357,"las":30573,"lau":19611,"lba":8767,"ld ":26676,"lbe":17468,"kur":12239,"kun":14388,"kte":20495,"ktr":9556,"ktu":9324,"kti":30020,"kto":15493,"ls ":107647,"lon":9380,"lom":14967,"log":27324,"los":22267,"lti":11054,"ltu":37425,"lug":9766,"lsp":9315,"lst":14411,"lte":61749,"lsc":14894,"lt ":63451,"lge":22548,"li ":14630,"les":24231,"let":14648,"ler":100316,"lem":16242,"len":91180,"lek":12889,"lei":58951,"leg":20421,"leb":11626,"lls":25768,"llu":11230,"llt":17543,"lla":17394,"lle":120640,"lli":30333,"ln ":17395,"lm ":19332,"ll ":39906,"lit":40830,"lis":62203,"lin":51826,"lic":170755,"lia":13089,"lik":12998,"lig":41309,"lie":114815,"ma ":15588,"mar":19548,"mal":49935,"man":57586,"mat":30614,"mbe":37028,"me ":36368,"met":20898,"mes":11403,"mer":74569,"mel":10134,"men":133736,"mei":94533,"meh":11820,"mfa":9540,"lve":9179,"lun":37355,"lus":22607,"lz ":10714,"hör":33720,"mpf":9424,"ms ":14667,"mon":13165,"mt ":15320,"mte":10917,"mus":17368,"mun":19505,"min":21460,"mil":28731,"mis":23371,"mit":124264,"mig":9701,"mie":12587,"mmu":13317,"mmt":13008,"mme":52772,"zt ":19861,"zte":9874,"zu ":51921,"zw ":9786,"zug":11138,"zur":37734,"zum":31618,"zun":15443,"zus":11821,"zwe":22370,"zwi":20415,"zei":75965,"zeu":11441,"zes":9884,"zen":43803,"zem":9613,"zel":10023,"zer":23898,"ze ":15773,"zia":11658,"zie":29129,"zig":9649,"zir":18395,"yst":17491,"ußb":14873,"uße":8940,"yer":10735,"tän":11031,"tät":24189,"süd":17890,"wur":70546,"woh":26117,"wes":27777,"wer":58987,"wen":21225,"wel":20147,"wei":83410,"weg":12299,"wir":55249,"wis":33738,"wie":34766,"wic":21374,"wa ":19878,"röß":14321,"wan":13038,"rün":30393,"wal":31257,"war":103047,"rüh":12452,"rüc":12801,"vin":25318,"vie":18884,"von":226355,"vom":22538,"vor":35872,"räg":10007,"ver":144066,"ven":17663,"vem":8808,"ve ":14670,"van":10775,"usi":22265,"usg":18900,"use":25762,"usa":16989,"ust":38480,"uss":45929,"usp":9564,"uti":13537,"ute":38886,"utz":15915,"uts":91328,"uto":20186,"us ":143584,"ut ":20376,"urd":76437,"urc":36382,"ure":16506,"urg":50861,"uri":14126,"urn":9214,"uro":18224,"urs":13370,"urt":10114,"urz":14868,"ur ":74659,"upp":21507,"upt":28543,"ums":9212,"umb":12929,"ume":14114,"umf":9413,"unt":60343,"uns":11344,"unk":20377,"uni":19103,"und":450828,"ung":313251,"ukt":12384,"um ":104918,"ult":19564,"uli":15580,"ule":13595,"ula":10852,"uge":19918,"uft":12442,"ugu":12915,"uf ":70919,"ude":15376,"ue ":12656,"uch":109126,"uck":11441,"uer":24349,"ufe":13329,"ufg":14459,"uen":13989,"uel":10929,"uar":22497,"ubl":12721,"tzt":23155,"tzu":9903,"tze":24373,"ty ":26382,"twa":23393,"tur":43568,"tun":79448,"tum":9791,"tz ":51212,"twi":14763,"twe":10501,"ts ":32847,"tre":34672,"tt ":15987,"tra":75255,"tri":43316,"tru":19097,"tro":30104,"tsc":125606,"tsg":8757,"tsp":10708,"tst":44445,"tte":89035,"tti":9403,"ttu":15931,"to ":13838,"tob":10974,"tom":9404,"ton":34822,"tor":56677,"tik":35201,"tie":28029,"tig":44003,"tit":14488,"tis":66050,"tin":30024,"tim":13478,"tio":114009,"thu":10251,"tiv":26072,"tli":78058,"tla":9588,"tle":19198,"tem":46999,"ten":278529,"tei":81154,"tel":91292,"teh":26143,"tec":9644,"tfa":9048,"th ":15758,"tet":40557,"tes":39452,"ter":318918,"tgl":9160,"tge":13122,"tho":19217,"the":36369,"tha":11467,"zös":20559,"zäh":9158,"épa":12146,"ähl":11359,"ähr":16538,"äch":26798,"äng":21244,"änd":29875,"ält":11293,"ät ":14412,"ärz":9551,"äte":10008,"äuf":9858,"ßte":10219,"ßen":17047,"ßer":9786,"ßba":15077,"ße ":16400,"Übe":9600,"üdl":8991,"ühe":8949,"übe":35449,"ück":19105,"ünd":28744,"ür ":75451,"üns":8869,"ühr":24588,"ößt":9800,"öst":25495,"ösi":20607,"ört":26727,"örd":14857,"öni":10085,"öff":11553,"ürt":8780},"n_words":[87197534,99298261,71857404],"name":"de"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/el b/nlp_resource_data/langdetect/profiles/el

new file mode 100755 (executable)

index 0000000..9ef014b
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/el
@@ -0,0 +1 @@
+{"freq":{"D":1964,"E":1941,"F":1941,"G":1852,"A":4125,"B":2624,"C":3923,"L":2060,"M":3387,"N":1723,"O":1910,"H":2233,"I":2360,"J":1156,"K":1166,"U":1129,"T":2696,"W":1311,"V":1075,"P":2832,"S":4250,"R":1938,"f":2440,"g":5100,"d":7728,"e":26057,"b":4017,"c":7473,"a":25408,"n":18610,"o":18497,"l":12659,"m":7083,"k":2907,"h":7717,"i":21525,"w":1649,"v":2733,"u":9674,"t":15175,"s":12992,"r":19022,"p":4711,"z":1248,"y":3535,"x":1425,"μ":190953,"ν":363085,"ξ":16299,"ο":514934,"θ":54925,"ι":405235,"κ":245170,"λ":175427,"δ":87843,"ε":333395,"ζ":19116,"η":223442,"α":554256,"β":36747,"γ":101330,"έ":90196,"ά":106544,"ί":162652,"ή":98446,"Ω":1026,"Υ":1630,"Τ":16684,"Χ":5356,"Φ":5011,"Ρ":4090,"Π":18130,"Σ":17983,"Ο":20140,"Μ":17575,"Ν":8294,"Κ":18111,"Λ":6273,"Θ":3309,"Ι":8695,"Ζ":1967,"Η":13434,"Δ":9926,"Ε":15220,"Β":10823,"Γ":10307,"ΐ":1269,"Α":27922,"Ό":1024,"Ή":969,"Έ":4042,"Ά":2718,"΄":1288,"ϊ":3454,"ω":82386,"ψ":4318,"ώ":40333,"ύ":61448,"ό":136039,"σ":232383,"ς":240747,"ρ":274072,"π":191347,"χ":59122,"φ":44210,"υ":205289,"τ":474096," o":780," d":1163,"р":660," a":888," t":1268," p":1168," s":849," r":827," J":1101," K":1080," H":2002," I":1555," N":1284," O":1504," L":1764," M":2988," B":2227," C":3233," A":3004," F":1657," G":1632," D":1595," E":1570,"и":935,"о":946," S":3325," R":1624," P":2318,"а":1103," W":1185," V":798," U":829,"е":697," T":2305," Ι":8408," Θ":3256," Λ":6140," Κ":17657," Ν":8118," Μ":17373," Ο":19727," Α":26876," Γ":10190," Β":10719," Ε":14504," Δ":9620," Η":13284," Ζ":1948," Έ":4041," Ή":969," Ό":1018," Ά":2711,"A ":657," υ":7977," τ":169076," χ":14827," φ":8389," ρ":2618," π":75432," σ":77620," ύ":1013," ό":12814," ω":4856," ψ":866," ζ":1711," η":13278," δ":29475," ε":62623," β":11610," γ":22099," α":74025," ξ":1157," ο":35203," μ":51074," ν":14449," κ":74116," λ":7381," θ":8090," ι":6475," Χ":5293," Φ":4909," Υ":1521," Τ":16381," Σ":17547," Ρ":3989," Π":17476," ί":1230," ή":17892," έ":20443," ά":5017," Ω":1004,"Co":724,"H ":665,"C ":687,"O ":936,"Ma":1040,"To":652,"b ":924,"a ":4343,"i ":1712,"ge":831,"he":1545,"ha":1115,"gh":801,"g ":1029,"ea":941,"ed":859,"de":1790,"di":1004,"do":657,"h ":975,"el":1708,"en":2808,"em":659,"et":1060,"es":2097,"er":4530,"ca":818,"e ":6418,"f ":722,"co":859,"ci":771,"ch":1350,"ce":966,"d ":2124,"at":2084,"as":1170,"ar":3025,"al":2276,"ai":668,"am":1144,"an":4262,"ac":959,"ad":799,"nt":1738,"ns":835,"of":654,"om":903,"on":3275,"ol":1227,"os":875,"ou":924,"or":2245,"r ":2839,"pe":702,"lo":869,"ll":1391,"o ":2523,"ma":1208,"mb":957,"me":1201,"na":1567,"nc":799,"nd":1677,"ne":1562,"ng":1277,"ni":1477,"m ":1364,"li":1620,"le":1892,"la":1782,"n ":4655,"ht":808,"hu":877,"hi":783,"ic":1794,"ia":1591,"ig":1086,"ie":1115,"k ":870,"ir":779,"is":1816,"it":1405,"il":1191,"in":3273,"io":1643,"l ":2336,"y ":1749,"vi":759,"ve":914,"x ":1036,"ul":682,"ur":1293,"us":1362,"um":1309,"un":754,"to":1131,"tr":915,"te":1980,"ti":2250,"th":1729,"ta":1485,"ss":802,"st":1761,"se":995,"si":952,"rs":663,"rt":997,"ro":1924,"ri":2929,"re":1965,"rd":715,"ra":2129,"t ":3000,"s ":5020,"ώρα":1560,"ώτο":1121,"ώτη":1214,"ώσσ":997,"ώσε":810,"όεδ":664,"όδο":943,"όγο":980,"όν ":1172,"ός ":18179,"ϊκή":860,"ώθη":1056,"ώνε":1144,"ώνα":1750,"ώνη":931,"ώνυ":1219,"ώμα":1362,"ών ":13524,"ύου":1130,"ύμφ":1059,"ύμε":885,"ύμα":648,"ύντ":1574,"ύνο":857,"ύσε":1378,"ύτε":2598,"ύστ":2139,"ώς ":2025,"ύπο":1064,"ύρι":1436,"ύθη":1068,"όλο":1532,"όλε":1360,"όλη":2622,"όμω":688,"όμα":718,"όμε":2708,"όμο":1617,"όντ":1036,"όνι":1308,"όνο":4918,"ύν ":1686,"όπω":1790,"όπο":2513,"ύο ":1340,"όρο":2945,"όρη":995,"όρε":1336,"όρι":647,"όσμ":1304,"όστ":662,"όσο":1170,"ύς ":3624,"ότε":4438,"ότη":3717,"ότα":2254,"ότι":2195,"χώρ":2395,"χωρ":2665,"χρη":1672,"χρι":1320,"χρο":1146,"χρό":1127,"χολ":997,"χος":1298,"χου":1973,"χνη":657,"χιλ":816,"χικ":844,"χθη":1072,"χημ":1273,"χεδ":695,"χεί":1404,"χει":4759,"χετ":1198,"χαι":840,"χαί":1771,"χαρ":1871,"χαν":1478,"φόρ":1260,"χές":666,"χής":681,"χία":1887,"φων":1670,"φυσ":992,"φος":1513,"φορ":4132,"φου":862,"χε ":1257,"φικ":1262,"φιλ":1198,"φθη":729,"φερ":660,"φία":1007,"χή ":2299,"φαί":1221,"φαι":2017,"φαν":1114,"φαλ":712,"ωτο":763,"ωπα":870,"ωνσ":715,"ωνι":2150,"ωνα":1612,"ωνί":1735,"ωστ":3134,"ωτα":785,"ωτε":1561,"ωτι":1342,"ωση":2679,"ωρι":2402,"ωπο":844,"ωρί":1331,"ωρε":765,"ωμέ":1015,"ωμα":2019,"ωγή":1132,"ωγρ":700,"ως ":10658,"ων ":25196,"τά ":6317,"σήμ":967,"σία":5336,"σίλ":677,"τή ":2856,"σεω":1071,"σει":4874,"τε ":2677,"σημ":4910,"σης":5924,"σας":901,"σαν":2104,"σαλ":862,"τα ":16925,"σμι":908,"σμο":3140,"σμό":3646,"σου":1542,"σπα":974,"σιο":1662,"σικ":3297,"σιλ":2336,"σιμ":2018,"σκε":4485,"τη ":14833,"σια":1815,"σιά":738,"σμα":2223,"σμέ":1983,"τι ":2082,"σκο":2292,"σκη":760,"συσ":899,"σφα":2732,"συγ":2509,"συμ":1798,"συν":5781,"στρ":4375,"στό":2708,"σχε":1174,"σχο":662,"σπο":1081,"το ":35834,"στο":18068,"στι":9089,"στη":20031,"στε":4570,"στα":7955,"στί":782,"σσό":800,"στέ":1100,"στή":4608,"στά":1775,"σσε":911,"σσα":2647,"ρόε":683,"ρόσ":877,"ρότ":966,"ρόμ":822,"ρόν":1720,"ρόπ":819,"ρωτ":3312,"ρωσ":834,"ρωπ":1145,"ρωμ":989,"ρων":1731,"ρχα":2526,"ρχί":1672,"ρχε":1183,"ρχι":1336,"ρχη":823,"ρχο":1048,"ρυσ":751,"σα ":3872,"ρύθ":842,"ρώτ":2338,"ρώπ":1017,"ρών":1362,"σι ":958,"ση ":12367,"σε ":13475,"σο ":1455,"τότ":864,"τός":2183,"τύπ":651,"φή ":1774,"τών":2189,"Α ":1540,"υαρ":1339,"υβέ":734,"υγο":1001,"υγγ":1153,"Β ":722,"υγκ":1660,"Η ":10508,"υθο":2042,"υκλ":727,"υλί":1002,"υλι":722,"υλο":1136,"υμα":785,"υμβ":739,"υντ":1338,"υνθ":722,"υνο":1544,"υνα":2074,"υνε":870,"υνδ":710,"υνί":677,"υνή":706,"υνέ":720,"υμο":1114,"υμπ":1569,"υμμ":700,"υπη":741,"υπά":768,"υπή":1017,"υρο":983,"υρω":883,"υρί":1627,"υρά":671,"υπό":808,"υργ":2911,"υρα":699,"υρκ":776,"υρι":1000,"υπο":3053,"υτα":731,"υτι":3118,"υστ":2102,"υτή":1523,"υσα":1288,"υσί":1021,"υσμ":1067,"υσι":2818,"υση":1190,"΄ ":1237,"υτό":1999,"υτο":2715,"φέα":711,"φέρ":3057,"τάλ":737,"τάθ":737,"τάν":734,"τάσ":1170,"τέλ":1720,"τές":1644,"τέρ":1900,"σότ":899,"τέχ":815,"σωπ":940,"ταφ":747,"ταν":14808,"ταξ":1725,"ταρ":960,"τας":2802,"τασ":3623,"τατ":1252,"ταθ":1117,"ται":21198,"ταμ":1209,"ταλ":2606,"ταγ":1554,"τής":3962,"τό ":2629,"τήρ":1568,"τήμ":1493,"σύμ":1096,"σύν":1327,"τίσ":697,"τίτ":882,"τίο":1122,"τίν":835,"τία":2320,"τεί":2724,"τελ":4578,"τεμ":808,"τει":1045,"τες":1997,"τερ":11689,"τεχ":1125,"τεύ":1209,"τια":1887,"τιο":1754,"τιν":1951,"τισ":2348,"τις":8696,"τιμ":802,"τικ":22901,"της":36695,"τηρ":3076,"την":27288,"τημ":2854,"τηλ":655,"τηκ":1948,"τηγ":801,"τησ":1537,"τητ":3914,"τμή":1027,"τλο":835,"τρω":752,"τρό":832,"τρο":5304,"τρι":3975,"τρε":1070,"τρα":4343,"τρί":1148,"τού":2364,"του":50266,"τομ":1711,"τολ":2689,"τον":13580,"τοπ":1407,"τος":5084,"τορ":3232,"τογ":1175,"τοι":2194,"τοκ":1571,"τοί":1186,"υν ":3485,"τυπ":686,"υς ":13808,"τσι":825,"των":12537,"νή":7879,"νέ":3576,"μό":8452,"μώ":1755,"νί":9927,"να":62303,"νδ":5696,"νγ":910,"νε":15475,"νθ":2946,"νη":14939,"νι":23024,"μμ":4065,"μν":1567,"μο":26826,"μπ":8431,"μυ":2965,"μφ":2313,"μω":1439,"νά":5541,"ξα":1561,"νώ":4161,"ξε":3656,"ξι":1797,"ξη":2494,"νο":34560,"νν":4891,"νσ":1980,"ντ":28913,"νυ":1752,"ξά":881,"νό":10195,"νω":9532,"οδ":7658,"ού ":14320,"ογ":13258,"οε":2833,"οί":10846,"οβ":2978,"οα":1081,"ομ":21099,"ολ":23701,"οξ":882,"ον":36237,"οθ":2077,"οκ":6497,"οι":21065,"οία":2635,"ξο":1094,"οίκ":1187,"οέ":928,"ξύ":1306,"οίο":3618,"πη":2710,"πε":16079,"πα":18678,"πο":60271,"πν":700,"πλ":7083,"πι":10919,"ου":129022,"οφ":4209,"οσ":11327,"οτ":9541,"ορ":21585,"ος":37589,"οπ":16190,"πή":2566,"ού":30889,"πί":4948,"πά":3903,"πέ":3142,"οχ":4794,"ια":39925,"ιβ":2565,"ιγ":2579,"ιδ":8147,"ιε":6521,"ιζ":1197,"ιη":883,"ιθ":2601,"θω":1118,"ιά":10102,"θό":672,"ιέ":2080,"ιή":692,"θώ":1191,"θρ":2531,"θυ":2592,"θι":1852,"θλ":2017,"θν":3266,"θμ":2061,"θο":6015,"κδ":1079,"κε":22387,"κι":6089,"κη":5418,"κά":15567,"ιό":6367,"κέ":4956,"ιω":2086,"κα":65846,"κή":24785,"κί":3700,"ιώ":5059,"ιτ":9070,"ισ":25486,"ις":13132,"ιρ":6633,"ιχ":2933,"ιφ":1609,"ιμ":6598,"ιλ":8668,"ικ":79288,"ιπ":2073,"ιο":29123,"ιν":16499,"λη":19148,"λι":20827,"λκ":1174,"λγ":684,"λε":18373,"λί":8413,"κώ":4252,"λα":15386,"λβ":741,"λά":7390,"λέ":5224,"κό":26772,"κύ":1477,"λή":4583,"κω":1706,"κυ":3709,"κτ":10513,"κο":23779,"κρ":10237,"κπ":708,"κλ":5937,"κκ":1326,"μι":14565,"μη":6074,"με":36995,"μβ":5397,"μα":35176,"λύ":3607,"μή":3752,"μί":4420,"λώ":2414,"μά":7858,"λό":3924,"μέ":15352,"λω":3053,"λφ":1355,"λυ":3470,"λτ":890,"λπ":663,"λο":25987,"λμ":1804,"λλ":20970,"δη":5995,"δε":9279,"δι":23594,"δο":11445,"δρ":8223,"δυ":2966,"εά":775,"δω":1451,"εί":44740,"δώ":1804,"δύ":1829,"δό":1968,"εδ":3726,"εγ":5939,"εβ":1575,"εα":1003,"εζ":688,"εθ":3485,"ει":35123,"εκ":9726,"ελ":15989,"εμ":6248,"εν":19361,"εξ":4958,"εο":2282,"επ":13811,"ερ":40179,"ες":14762,"εσ":7584,"ετ":27314,"εφ":1836,"ευ":8533,"εχ":2179,"εω":4500,"εό":1241,"εώ":702,"ζί":1027,"εύ":4761,"ζα":1710,"ος ":37511,"ζε":6065,"ζι":1237,"ζη":738,"ζο":2939,"ζό":1259,"ζω":1103,"ηγ":3510,"ου ":87770,"ηλ":4366,"ημ":19012,"ην":37308,"ηθ":2646,"ηκ":9969,"ησ":11936,"ητ":11258,"ηρ":6263,"ης":54552,"θέ":3061,"θά":1066,"θή":2250,"ηχ":968,"θε":10322,"θη":10944,"θα":2622,"αί":10379,"ώ ":1686,"ακ":15045,"αι":93478,"αμ":10092,"αλ":21090,"αξ":3125,"αν":58537,"απ":33497,"αο":903,"αβ":3590,"αδ":5509,"αγ":12802,"αζ":1934,"ον ":14807,"αε":1433,"αθ":7116,"αϊ":2364,"βά":3473,"βέ":1564,"αύ":1058,"βί":1262,"βα":6377,"αρ":29289,"ας":38338,"ασ":22621,"ατ":37260,"αυ":7302,"αφ":9354,"αχ":2612,"βρ":7265,"βο":4803,"βλ":1821,"βι":3507,"βη":749,"βε":2725,"γα":7366,"γί":5669,"γέ":2710,"βό":1236,"γή":2815,"γά":3661,"γο":9693,"γρ":8625,"γμ":2369,"γλ":3739,"γν":4085,"γη":1772,"γκ":7566,"γι":14677,"γγ":5019,"γε":9939,"δα":7905,"δά":972,"γό":3291,"δέ":1507,"γύ":735,"δή":3929,"γώ":1054,"δί":4576,"γχ":1165,"γω":3973,"γυ":1102,"ομώ":1007,"ονί":1750,"έτ":5024,"έσ":4850,"ονα":1353,"ές":9695,"έρ":12949,"έπ":1245,"έο":998,"έξ":1325,"έν":21823,"ονδ":706,"ονι":2179,"έω":1309,"έχ":6163,"έφ":948,"έγ":3052,"έδ":1961,"έα":3333,"ομο":3665,"έλ":6166,"έμ":2360,"έι":841,"έκ":2418,"έθ":1065,"ομό":796,"άρ":7787,"άπ":2212,"άσ":7087,"άς":2871,"άν":11519,"άμ":3476,"άο":721,"άξ":842,"άτ":7688,"άχ":1258,"άφ":3485,"άβ":865,"άγ":2736,"ονο":4835,"οντ":6287,"άθ":2400,"άι":984,"άκ":2900,"άλ":9559,"ονό":1460,"άδ":6624,"άζ":2851,"ίς":1393,"ύ ":16375,"ίρ":2411,"ίπ":1897,"ίο":20214,"ίτ":8024,"ίσ":10180,"ίω":4466,"ίχ":2356,"αΐ":1011,"ία":42781,"ίβ":669,"ίε":2902,"ίζ":4719,"ίγ":1307,"ίδ":6660,"ίκ":4798,"ίθ":1008,"ίν":31172,"ίλ":2453,"ίμ":2180,"ήρ":4528,"ήσ":3271,"ής":18133,"ό ":46975,"ήτ":11312,"οπο":11630,"ήθ":4033,"ήκ":2438,"ήλ":1001,"ήμ":8522,"ήν":2806,"οιε":1122,"οιη":754,"οια":988,"φ ":702,"οιό":643,"οκα":975,"οιχ":1283,"οιν":2467,"οικ":4166,"οιο":1378,"ολι":5666,"ολλ":1265,"ολε":1313,"ολη":1127,"ολέ":884,"ω ":2903,"ολή":1110,"ολί":1066,"οκρ":2314,"ομι":2076,"ομη":811,"ομα":4494,"ολό":1136,"ομέ":1196,"ολύ":921,"ομά":4054,"ολυ":875,"ολο":6719,"οδι":937,"οδο":3299,"Υπ":1010,"οει":1399,"Συ":1600,"Στ":5008,"Τα":1846,"Σύ":1280,"Τζ":1148,"Τσ":726,"ς ":240346,"Τρ":1616,"Το":8405,"Χα":1289,"υ ":88180,"Χρ":999,"Φε":912,"Φι":704,"Φρ":703,"οθε":1042,"τ ":3704,"Πό":722,"Πρ":2848,"ξ ":655,"Ρο":774,"Πά":1152,"Ου":930,"ν ":112419,"Ορ":745,"Πο":2485,"Πε":2134,"Πα":4994,"πα ":685,"οβο":653,"Σα":1339,"π ":1661,"Σο":1132,"ογί":3068,"ρ ":3667,"ογέ":1210,"Σε":1940,"Σι":796,"Σκ":862,"ογρ":3223,"Ρω":976,"ογο":1295,"ογι":2141,"ο ":95107,"νός":3015,"νότ":2168,"Νό":654,"νόμ":1784,"λ ":3759,"Ντ":1194,"Νο":1706,"Ολ":1010,"Οκ":750,"Οι":2398,"μ ":2390,"Με":3726,"Μι":1150,"Μέ":678,"Μά":1161,"Μα":4471,"Λο":1279,"ι ":112286,"Νι":687,"νωσ":4336,"Νέ":832,"νων":3168,"Να":926,"Μο":1624,"κ ":2925,"Μπ":2700,"Κα":4969,"Ιω":836,"Κά":943,"Κι":702,"Κε":764,"Ιο":1485,"η ":59406,"Ιτ":907,"Ισ":1161,"Λα":1230,"Κό":910,"Κύ":1401,"Λι":661,"Λε":948,"Κρ":1403,"Κο":2565,"Κω":831,"Κυ":1027,"Θε":1962,"Ηλ":946,"ε ":49298,"Ηρ":654,"νυμ":1332,"Ια":1067,"ντί":2029,"ντα":8117,"Ερ":804,"Επ":1444,"ντε":1807,"Ελ":3849,"Εκ":838,"ντά":1383,"Εθ":920,"ντο":2386,"ντρ":2342,"ντι":5956,"Ευ":1907,"νστ":941,"α ":122436,"Γο":754,"Γε":2955,"Γι":1598,"Γκ":1275,"Δή":1392,"Δε":1218,"Δη":1620,"Δι":2406,"Εί":1505,"Αθ":2048,"νος":3551,"νου":4641,"νοτ":857,"Αγ":2492,"Απ":2382,"νού":1967,"Αν":3971,"Αμ":1723,"Αλ":1805,"Ακ":644,"Αι":969,"Αυ":2140,"Ασ":1614,"Αρ":2646,"ί ":9847,"Βα":2099,"Βι":900,"Βε":785,"Βο":1596,"Βρ":2238,"νοι":1353,"νομ":10454,"νον":1006,"Γα":1202,"νολ":1206,"νορ":724,"έ ":738,"ννή":1417,"ννη":1767,"ή ":39734,"ννο":742,"ά ":28070,"οι ":6175,"Χ ":729,"Ήτ":719,"Έν":837,"Έλ":1540,"Έχ":649,"Άγ":672,"Ο ":12037,"οί ":1669,"νών":1557,"ξύ ":1208,"πόλ":3996,"ρές":813,"ράκ":669,"ράγ":691,"ράσ":941,"ράτ":1685,"ράφ":2049,"ράς":805,"ράμ":655,"ράν":647,"ρίζ":2399,"ρίν":739,"ρία":4134,"ρίδ":772,"ρίω":1265,"ρίπ":1019,"ρίο":5068,"ρίτ":724,"ρίσ":4287,"ρό ":1004,"ρήσ":959,"ραβ":1005,"ραγ":2738,"ραμ":2193,"ραν":1181,"ρακ":3111,"ραφ":4046,"ρατ":4809,"ρασ":2311,"ρας":2953,"ργε":920,"ργι":803,"ργο":1794,"ργό":1216,"ργά":995,"ργί":829,"ργα":2610,"πτι":832,"πτε":965,"πων":724,"πως":2026,"ρμο":1033,"ρνη":961,"ρντ":654,"ρξε":1053,"ροέ":803,"ροκ":927,"ρολ":823,"ρομ":1223,"ρον":3303,"ροβ":944,"ρογ":664,"ροδ":662,"ροε":1475,"ρού":2500,"ροφ":1412,"ρου":6209,"ροτ":807,"ροσ":3339,"ρος":5922,"ροπ":1689,"ρτί":865,"ρτη":992,"ρτο":808,"ρεί":2767,"ρεύ":824,"ρετ":2773,"ρευ":659,"ρεσ":889,"ρες":2082,"ρει":4053,"ρησ":2941,"ρης":1045,"ριά":922,"ριγ":689,"ρια":3527,"ριό":2111,"ρκε":1056,"ριθ":994,"ρικ":7396,"ριλ":1434,"ριμ":904,"ριν":2336,"ριο":5568,"ρισ":7036,"ριφ":871,"ρμα":2357,"ρά ":3405,"πάν":921,"πάρ":1052,"παλ":1326,"παι":1177,"παν":2463,"παγ":893,"παϊ":680,"πατ":824,"παρ":8016,"παί":655,"πίσ":2277,"ούλ":1020,"ούμ":1108,"ούν":3424,"ούσ":2748,"ούς":3386,"πό ":21634,"πήρ":1722,"ούρ":1307,"οτε":3981,"οτι":1757,"οστ":1538,"οσφ":2182,"οσω":886,"οτέ":721,"οτα":918,"οση":738,"οσι":846,"οσπ":775,"ορφ":1231,"ορυ":783,"ορτ":877,"ορι":2946,"ορε":3059,"οργ":1807,"ορο":1078,"οπτ":643,"πο ":1799,"ορα":1218,"ορί":3662,"ορέ":747,"οπό":828,"ορά":1572,"οχή":2609,"οφο":1172,"ουσ":4557,"ους":13764,"ουρ":5299,"ουν":4982,"ουμ":1209,"ουλ":3147,"ουδ":1420,"ουα":1700,"ουά":882,"όγ":2701,"όε":736,"όδ":1693,"όλ":8233,"όκ":1654,"όμ":7862,"όν":10515,"ός":18197,"όσ":5414,"όπ":5405,"όρ":8376,"όφ":825,"όχ":1299,"ότ":13665,"ρι ":2104,"πλα":1198,"ϊκ":1906,"πλε":822,"πλη":1610,"πλο":1278,"ωθ":777,"ωβ":791,"ωδ":719,"ωγ":2280,"ων":32748,"ωπ":2336,"ωμ":4528,"ωρ":6506,"ως":10668,"ωσ":8039,"ωτ":6473,"ρο ":6138,"προ":8229,"πρι":1380,"πρα":1142,"πρώ":2895,"πρό":2160,"πρω":2758,"ώμ":1777,"ών":20079,"ώθ":1069,"ώη":641,"ώδ":730,"ποκ":999,"ποι":5038,"πολ":7906,"πον":1356,"ποί":6780,"ώτ":3447,"ώσ":2701,"ώς":2026,"ώρ":3768,"ποδ":2435,"πογ":674,"ώπ":1387,"ύκ":1365,"ύλ":2704,"ποχ":850,"ύμ":4174,"πού":867,"ύε":938,"ύθ":1852,"πορ":2148,"πος":1087,"ύγ":1143,"ύδ":738,"που":20607,"ποσ":1036,"ποτ":4745,"ύφ":886,"ύχ":810,"ύς":3629,"ύρ":4445,"ύτ":4068,"ύσ":5147,"ύν":5639,"ύπ":2849,"ύο":2732,"τε":30296,"τζ":1331,"τη":93724,"τι":46176,"τλ":1268,"τμ":1495,"πεδ":722,"το":124490,"τρ":19405,"τσ":2719,"στ":76523,"σσ":6194,"σφ":3318,"συ":12181,"σχ":3635,"σω":3204,"τέ":8491,"σό":1919,"τά":12970,"σώ":1166,"τί":8764,"σύ":4398,"τή":11346,"τα":73796,"σε":22382,"σι":15384,"σκ":10283,"ση":23543,"σθ":1011,"σμ":12903,"σο":6917,"σπ":3490,"πει":1514,"πελ":849,"σή":1903,"σέ":940,"περ":10195,"σά":1139,"σα":9280,"σί":8405,"ρα ":9620,"ρφ":1653,"ρυ":3661,"ρχ":9524,"ρρ":1307,"ρτ":5071,"ρσ":1303,"ρώ":6736,"ρύ":1952,"ρω":9925,"ρό":9460,"ρη":8917,"ρθ":1230,"ρε":17142,"ργ":11016,"ρδ":1272,"ρα":38879,"ρβ":958,"ρο":41731,"ρν":3338,"ρξ":1376,"ρλ":780,"ρμ":5151,"ρι":40112,"ρκ":3719,"πυ":748,"πτ":4561,"πρ":20097,"ππ":796,"ρί":22933,"ρή":2864,"ρέ":3880,"πό":28476,"ρά":13738,"πω":3057,"πια":723,"ρη ":2996,"ψε":1003,"ψη":1336,"χω":3038,"χό":665,"χώ":2869,"χρ":6885,"χο":6422,"χτ":889,"χι":3395,"χθ":1552,"χη":2825,"χν":2772,"χα":7239,"χί":2595,"χε":9910,"πισ":2595,"πιτ":871,"φω":2505,"πιο":1239,"χέ":1462,"φό":1995,"χή":3366,"φύ":644,"πικ":1602,"χά":891,"φυ":2356,"φο":8165,"φρ":1732,"φη":1268,"φι":3786,"φθ":1094,"φε":2326,"φή":2402,"φί":1583,"ρε ":822,"φα":6317,"φά":1435,"φέ":4345,"υφ":1198,"υχ":1680,"υψ":1025,"υτ":12113,"υσ":11499,"υς":13842,"υρ":12723,"υπ":8507,"υν":14929,"υμ":7534,"υλ":5896,"υκ":2985,"υθ":4332,"υζ":915,"υγ":5531,"υδ":2390,"υα":2666,"πηρ":828,"υβ":1419,"τώ":3229,"τό":9167,"τύ":1372,"υά":1396,"τω":14910,"ττ":1165,"τυ":2752,"μης":911,"μηχ":778,"μητ":983,"νε ":1648,"μιο":2230,"μισ":919,"νη ":4892,"μια":4477,"μικ":5134,"να ":17430,"μβρ":2160,"μβο":1003,"μερ":4521,"μεσ":1074,"μετ":6749,"μει":655,"μελ":883,"μεν":3875,"μεγ":2940,"μεί":1099,"λώσ":1078,"νή ":1793,"λών":940,"μό ":3080,"λύτ":1526,"μία":2762,"μήμ":1075,"μαχ":648,"μβά":1095,"μαζ":923,"μαν":3924,"ματ":10484,"μασ":1431,"μαί":1647,"λων":1650,"μέτ":1129,"μέσ":1359,"νά ":992,"μέρ":1935,"μέχ":973,"μέλ":884,"μέν":7654,"μέγ":746,"λόγ":1758,"μάτ":1753,"μάδ":1518,"μάζ":1519,"λυμ":1032,"λυτ":817,"λος":3446,"λον":1147,"λογ":7404,"λοί":713,"μο ":3714,"λου":3295,"λού":2344,"λικ":6554,"λια":1551,"λιά":2019,"μη ":2038,"λιό":666,"λιο":1726,"λιτ":3281,"λισ":1155,"λλο":3357,"λλι":1915,"λλε":843,"λλη":7119,"λλα":1712,"λλά":3329,"λλί":726,"νισ":2798,"νιο":1219,"ξη ":1696,"νθρ":1131,"νικ":14169,"νια":1586,"νημ":1520,"νης":3311,"νησ":3074,"νητ":1569,"ξε ":1387,"νει":4020,"νετ":1530,"νεπ":771,"νερ":896,"νες":2492,"νδι":887,"νδρ":1754,"νεί":692,"νδί":1002,"νγκ":862,"ναφ":1731,"νατ":2689,"ναυ":689,"νασ":1113,"νας":6220,"ναν":1504,"νακ":1377,"ναι":23633,"ναγ":847,"νώ ":1108,"νίδ":909,"μών":1634,"νίκ":973,"νίο":894,"νής":1328,"νό ":1732,"νήσ":732,"νία":4805,"νήθ":2044,"νήκ":1169,"μός":2661,"μόν":1144,"νάμ":808,"μων":716,"νω ":758,"μυθ":2305,"ντ ":1585,"μφω":1019,"μον":2292,"μοπ":1626,"μορ":1296,"μος":2596,"μοι":670,"μοκ":1030,"μπο":2312,"νο ":5296,"μπε":1085,"μπι":1180,"μού":4831,"μπα":642,"μου":4833,"μοτ":850,"μοσ":1084,"νι ":660,"μμα":1876,"λή ":1370,"ιών":3452,"κίν":1235,"κία":1185,"κής":8061,"κό ":13722,"καρ":1054,"κας":811,"κασ":794,"κατ":10049,"καθ":2426,"και":39436,"καλ":3288,"καν":3494,"λα ":2059," Ma":1020," O ":728,"κεν":791,"κετ":3212,"κευ":1338,"κεί":1744,"κει":2368,"κεκ":645,"ιφέ":650,"ιχε":1339," Co":711,"ιωτ":1177,"κέν":650,"ιός":767,"λά ":1854,"κές":3367,"ιότ":2001,"κάθ":781,"κάν":679,"κάπ":806,"κάτ":989,"ινο":2651,"ινη":1172,"ινε":1173,"ινι":1101,"ινή":1455,"ινί":997,"ινα":787,"ινό":2461,"ινω":851,"ιου":2729,"ιος":4127,"ιορ":1283,"ιον":1013,"ιολ":889,"ιοι":954,"ιοδ":1147,"ιογ":776,"κο ":960,"ιού":2201,"ιοχ":1758,"ιρι":1611,"ιρά":1156,"ιρα":723,"ιρε":766,"ισμ":7142,"ιστ":11061,"ισσ":1380,"ιση":1365,"ιτο":998,"ιτα":1283,"ιτι":2704,"ιτε":1108,"ιδρ":1035,"ιεθ":1171,"ιεί":1032,"ιδώ":1125,"ιες":926,"ιερ":1150,"κε ":8838,"ιθμ":885,"κη ":1482," To":646,"ικά":10371,"ικί":910,"ική":22548,"ικέ":3180,"ικα":3219,"ικη":1192,"ικε":1161,"ικι":1228,"ικο":9392,"κι ":739,"ικρ":1864,"ικώ":3521,"ιλί":1034,"ιλα":936,"ικό":19394,"ιλι":2810,"ιλε":728,"ιλο":1028,"ιμέ":1283,"ιμο":2191,"μα ":12255,"λαμ":1653,"λαν":1780,"λαι":1164,"λασ":1322,"λατ":899,"λαδ":919,"λαγ":813,"λαβ":1022,"λαί":667,"λητ":1184,"λην":6429,"λημ":897,"ληρ":1062,"λησ":1042,"λης":1640,"ληθ":1186,"με ":14402,"λεσ":1056,"λες":910,"λευ":2243,"λεκ":1059,"λεμ":1535,"λει":2415,"λεγ":742,"λεί":3987,"κων":656,"λίο":1804,"κών":3899,"μή ":801,"λής":941,"λία":3193,"κόρ":832,"λέξ":905,"λέμ":640,"κόμ":1163,"κόλ":651,"λέγ":681,"κότ":1771,"μά ":961,"λές":766,"κός":6008,"κόσ":1288,"λάδ":2237,"κρι":1532,"κρο":1257,"κρά":1666,"κρα":2747,"κρό":1004,"κολ":1260,"κοι":2432,"κοπ":1299,"κον":2063,"κοσ":762,"κος":1289,"κορ":683,"κογ":1034,"λο ":3553,"κου":2843,"κού":6899,"κυρ":1539,"κυβ":687,"κτω":740,"κτο":1065,"κτρ":885,"κτη":2180,"κτι":1278,"κτή":823,"κτα":758,"λη ":4716,"κιν":1980,"κισ":1099,"κκλ":769,"κην":749,"κητ":870,"κης":1489,"κλο":1123,"κλε":952,"κλη":1528," th":948,"ηχα":829,"ηρο":1562,"ηρι":1012,"ηρε":814,"ηρί":1339,"ησε":2338,"ηση":3739,"ησι":2711,"ησί":1393,"ητο":760,"ητε":799,"ητι":2642,"ητή":1996,"ητέ":684,"ητα":2938,"θήν":1152,"θήκ":739,"θαν":924,"ιά ":3103,"θέτ":764,"θέσ":1185,"ηθο":652,"θε ":885,"ηγό":671,"ηγο":953,"ηνα":1733,"ηνι":4662,"ηνο":892,"ηκα":1019,"ηκε":8651,"ηθυ":973,"ημα":7616,"ημέ":1558,"ημο":3252,"ημι":2310,"ημε":2120,"ηλα":1079,"ηλε":1560,"θυσ":990,"ις ":13118,"κα ":1888,"ιβλ":841,"ιδι":2412,"ιδή":920,"ιαδ":1030,"ιαί":809,"ιατ":1212,"ιαφ":946,"ιας":3279,"ιασ":1512,"ιαμ":1313,"ιαν":1844,"ιακ":4564,"ιό ":1664,"θώς":900,"κή ":16317,"ιάδ":1072,"ιάς":1340,"ιάρ":876,"ιάν":812,"ιάσ":850,"κά ":10264,"θηκ":7617,"θην":731,"θετ":1279,"θεσ":1224,"θερ":1269,"θεω":1121,"θεί":1506,"ια ":20899,"ιο ":9017,"θος":942,"θολ":2157,"ιν ":1082,"θνή":736,"θμό":1010,"θνι":1551,"θλη":1496,"ενε":1043,"ενη":691,"ενα":1016,"εντ":2485,"ενν":2479,"ενο":2214,"ενι":1650,"εξα":953,"ενώ":1366,"ενό":1445,"εξά":752,"επι":5002,"επα":2136,"επί":3082,"επτ":1207,"επο":863,"ερα":4072,"εργ":1600,"ερά":780,"ερί":3339,"ερι":10543,"ερε":1426,"ερη":1880,"ερν":709,"ερμ":2853,"ερο":6623,"ερό":795,"εσί":785,"εσα":886,"εση":1176,"εσσ":1054,"εστ":833,"ετά":1901,"ετέ":733,"ετα":16396,"ετί":895,"ετε":731,"ετι":1741,"ετρ":2017,"ετο":896,"ευτ":1808,"ευσ":1149,"ευρ":1328,"ευκ":741,"ευθ":751,"εφα":751,"εχν":1145,"εωρ":1834,"εων":1309,"εύε":718,"είχ":1649,"είτ":3867,"είς":773,"είο":3068,"είν":22546,"δών":1462,"είμ":673,"δύο":1317,"είδ":966,"εία":3700,"εβρ":814,"εγά":1944,"εγα":1515,"εγκ":737,"εδρ":932,"εδο":1081,"εθο":675,"εθν":2241,"εκε":1191,"εκρ":827,"εκτ":2188,"ειτ":1270,"εκα":1164,"εκδ":780,"ειδ":2526,"ειο":1827,"ειν":862,"εισ":862,"εις":3437,"ειρ":2701,"εια":5347,"εμι":793,"εμπ":762,"εμο":808,"ελφ":670,"εμβ":1975,"ελλ":4425,"ελι":1033,"ελο":1356,"ελέ":801,"ελε":3847,"ζον":1351,"ζου":798,"ης ":54494,"εύο":1107,"εύτ":1065,"ζετ":3914,"ζει":1250,"ην ":28066,"δη ":882,"γεί":984,"γεθ":651,"γει":1005,"γεν":3283,"γερ":916,"γετ":873,"δα ":3858,"γγλ":2042,"γγε":798,"γγρ":1089,"γαλ":2463,"γαν":1627,"γασ":694,"γία":4119,"γής":750,"δή ":1030,"γάλ":2004,"γάν":752,"γέν":1475,"βόρ":789,"γωγ":1517,"γρα":5451,"δο ":1489,"γρά":2441,"γού":1541,"γον":1405,"γος":1565,"γορ":893,"γου":2071,"γνω":3224,"γμα":1795,"γλώ":959,"γλι":810,"γκό":699,"γκο":1093,"γκρ":957,"γκε":929,"γκα":1052,"γισ":983,"γιο":2940,"γιν":873,"γικ":1875,"για":6740,"δηλ":1052,"δημ":2586,"δης":760,"δια":7984,"διά":2910,"διο":3225,"δικ":4263,"διε":2036,"ει ":13053,"δεν":1174,"δεκ":905,"δει":815,"δελ":851,"δες":1552,"δεύ":817,"δία":1566,"δήμ":1730,"γών":991,"εί ":5299,"δας":1990,"γων":2003,"γός":1387,"δων":753,"ες ":14729,"δυν":920,"δυτ":1257,"δου":1522,"δοσ":2455,"δος":1488,"δον":679,"δομ":954,"ερ ":1474,"δρύ":879,"δρυ":713,"δρο":2284,"δρα":2109,"εν ":2162,"αίν":1135,"αία":2533,"αίτ":698,"αίρ":1304,"αίο":2174,"αγι":742,"αγγ":1827,"αγο":1254,"αγκ":1429,"αγμ":1117,"αγν":698,"αδι":1357,"αδε":988,"αγω":2920,"αδή":866,"αβε":1016,"ας ":38230,"αρί":2489,"από":22187,"αρά":1986,"απο":6560,"απα":958,"απε":757,"αξύ":1143,"αξι":781,"ανώ":968,"ανό":2218,"αντ":8056,"ανο":2939,"ανθ":906,"ανι":4409,"ανδ":1912,"ανε":2278,"ανα":6682,"ανά":2554,"ανέ":714,"ανή":1615,"ανί":2957,"ατό":1156,"ατε":1300,"ατι":5266,"ατη":1870,"ατρ":1884,"ατο":9284,"ασσ":883,"αστ":6688,"ατά":4672,"ατέ":1441,"ατί":1716,"ατα":6390,"αση":2971,"ασι":3609,"ασκ":1313,"ασμ":1014,"ασί":3461,"αρτ":1786,"αρχ":6414,"αρα":6192,"αρμ":849,"αρο":1166,"αρι":2077,"αρκ":848,"αθο":890,"αθη":1010,"αθμ":688,"αθλ":1208,"αζί":676,"αερ":700,"αλύ":2369,"αμέ":982,"αμβ":1197,"αμα":698,"αμε":1031,"αμμ":1433,"αμο":749,"αμπ":849,"ακό":2346,"αλά":833,"αλα":2736,"αλί":1668,"αλε":761,"αλι":1989,"αλλ":4951,"αλο":1928,"ακή":1031,"αιό":1091,"αιώ":1483,"ακά":695,"ακε":724,"ακα":1680,"ακο":1635,"ακρ":1212,"ακτ":3182,"αθώ":989,"αιδ":651,"αιο":1032,"αιν":1733,"αιρ":2790,"γή ":1222,"βαν":794,"βασ":2971,"γα ":679,"αυρ":642,"αυτ":5333,"αφί":785,"αφή":1070,"αφέ":2568,"αφε":651,"αφι":1185,"αφο":1697,"αϊκ":1749,"βάν":1252,"βάλ":748,"βέρ":1034,"γο ":1180,"βου":1328,"βολ":1321,"βρο":795,"βρί":4251,"βρα":834,"βιβ":715,"βικ":723,"βιο":702,"γκ ":753,"ήτη":852,"ήτα":9260,"ήσε":1365,"ήσο":649,"ίς ":1391,"ήρξ":1088,"ήρι":880,"ήρε":756,"ίο ":4669,"ίες":2648,"ίζο":1090,"ίζε":3305,"ίδο":723,"ίδε":1013,"ίδη":788,"ίδι":1386,"ίδα":1853,"ίας":15390,"ίοδ":915,"ίος":1746,"ίου":12007,"ίπο":938,"ίνε":1855,"ίνα":24006,"ίνη":1713,"ίνο":1856,"ίλι":696,"ίκη":1461,"ίκο":1574,"ίχε":1140,"ίρι":793,"ίτλ":825,"ίτε":1304,"ίτη":995,"ίστ":2304,"ίτα":3434,"ίσκ":3610,"ίση":1856,"ίων":2775,"ίως":1118,"αι ":81620,"αν ":18884,"άνο":1282,"άνν":919,"άνι":1040,"άνε":1521,"άνδ":657,"άνα":777,"άνω":1396,"άντ":1482,"άμε":1147,"άλι":654,"άλη":999,"άλλ":3199,"άλα":1032,"άλο":1498,"άσε":1019,"άρι":1410,"άρκ":849,"άρτ":1022,"άρχ":1270,"άπο":1033,"άφο":2166,"άτω":1906,"άτι":758,"άτο":2114,"άτα":711,"άτη":871,"ές ":9688,"άστ":2257,"άση":1198,"έγε":1024,"έγι":781,"έδρ":1043,"έας":1468,"έλο":1016,"έλα":1017,"έλε":1428,"έλη":777,"έκτ":1194,"ένα":9814,"ένη":1988,"ένε":1716,"ένο":3647,"ένν":695,"έντ":1717,"ένω":1354,"έπε":703,"έρα":2072,"έργ":1441,"έρο":1656,"έρν":854,"έρε":2865,"έρι":1085,"έρχ":675,"έτο":668,"έτρ":1180,"έτη":1275,"έστ":768,"ής ":18113,"έση":981,"έχε":3074,"έχο":1069,"έχρ":1025,"έως":769,"ία ":26814,"ήθη":3124,"ήκε":1241,"ήμο":3064,"ήμε":1045,"ήμα":3234,"ήνα":1396,"άζε":1809,"άζο":714,"άδα":3349,"άδε":985,"άδο":1280,"άκη":718,"άθε":1012,"έα ":1626,"άς ":2864,"άν ":930,"Του":646," θρ":674," ιδ":2116," ηλ":1023," ημ":735," θέ":1277,"Τα ":1230," θά":727," θα":781," θε":3732," κρ":1753," κο":3790," κλ":1289," κυ":2799," λα":1212," κύ":1020," κό":2579," λέ":1007," λε":1503," ισ":2453," κα":52893," κέ":693," κά":2601," κι":1426," κε":1166," μυ":2558,"Σεπ":721," μο":3409," μπ":1646," νη":980," νε":1091," μό":948," να":5646," λο":894," με":23063," μη":1415," μι":5537," μά":792," λό":896," μέ":5756," μί":2039," μα":2448," ον":2891," ολ":648," ομ":3122," οι":5274," νό":1365," νο":3391," ξε":646," ίδ":940," γα":745," βό":838," βι":1203," βο":1444," βρ":3003," αφ":1044," αυ":4681," ατ":926," ασ":3167," αρ":6507," βα":3334," βά":792," αθ":840," αε":736," αγ":3185," αδ":879," απ":30213," αν":12857," αλ":2584," αμ":1021," αι":1819," ακ":2186," δύ":1562," εί":24110," εγ":909," δυ":1764," δο":1194," δρ":920," δε":3712," δη":3127," δι":13223," δή":1731," δί":798," γυ":721," γν":3012," γλ":1409," γρ":1532," γε":4322," γι":8013," ζω":871," ευ":1283," εφ":760," ετ":975," ερ":1138," επ":11178," εξ":2635," εν":5151," εμ":1144," ελ":4400," εκ":4866," ει":1529," εθ":1094," Χα":1287," Χρ":997,"Συν":780,"Στη":2306," άλ":1949," άν":742," έκ":1202," έλ":791," έδ":1173," έγ":922," έχ":2858," έν":9401," έρ":1085," έτ":734," ήτ":9263," Ολ":1008," Οκ":750," Οι":2387," Πε":2131," Πα":4958," Πο":2479," Ου":928," Ορ":742," Πά":1151," Ρο":774," Πρ":2843," Πό":722," ο ":9144," Ρω":975," Σο":1128," Σι":796," Σκ":861," Σε":1934," Σα":1339," π ":930," Τρ":1615," Τσ":726," Το":8380," Τζ":1148," Τα":1842," Σύ":1275," Στ":4987," Συ":1596," Υπ":1004,"Το ":7170," Φρ":703," Φι":702," Φε":912," Ισ":1154," Ιτ":907," η ":9904," Ιο":1482," Κι":702," Κε":764," Κα":4954," Κά":941," Ιω":835," Ια":1066," Ηρ":653," Ηλ":946," Θε":1961," Ντ":1192," Νο":1695," Νό":654," Μπ":2698," Μο":1622," Να":926," Νέ":832," Νι":686," Λο":1279," Μά":1156," Μέ":676," Μα":4467," Με":3720," Μι":1148," Κο":2560," Κρ":1401," Κυ":1025," Κω":831," Κό":910," Κύ":1399," Λα":1229," Λε":940," Λι":661," Βα":2096," Αυ":2137," Αρ":2641," Ασ":1614," Αν":3964," Απ":2366," Αι":968," Ακ":643," Αλ":1804," Αμ":1723," Αθ":2042," Αγ":2491," ή ":7834," Ευ":1907," Εκ":836," Ελ":3840," Εθ":914," Ερ":803," Επ":1439," Εί":1505," Δι":2396," Δη":1617," Δε":1217," Δή":1387," Γε":2953," Γκ":1274," Γι":1594," Γο":753," Γα":1202," Βε":785," Βι":896," Βρ":2238," Βο":1594," Έχ":649," Έλ":1540," Έν":836," Ήτ":719," Χ ":688," Άγ":671," Ο ":11895," Η ":10438," Β ":692," Α ":894,"Παρ":1018,"Παλ":664,"Παν":1277,"Πρω":677,"Πρό":666,"Προ":732,"Πολ":1465,"Περ":871,"Οκτ":727,"Οι ":2013," φω":763,"Μαρ":1413," χα":1979," χι":822," χρ":4665," υψ":758," φα":980," φι":921," φο":1544," φυ":1533," τω":10299," τό":929," υπ":5803," σχ":2338," συ":11696," στ":39224," τα":7256," σύ":3950," τί":845," τέ":1460," τι":4962," τη":55058," τε":2163," τρ":3378," το":79535," τμ":1047," σή":911," σα":762," σπ":865," σε":9337," ση":3591," σκ":2184," πό":3153," πρ":16846," πέ":917," πά":1081," ου":1649," ορ":3515," οπ":6947," πο":24345," πλ":4421," πι":1545," πε":9563," πα":9674," όρ":2611," όπ":2757," όν":3176," ότ":1627," όλ":952,"Μεσ":722," ως":4538," χώ":2083," χω":2280,"Βρί":1153,"Βασ":1048,"Αμε":923,"Ανα":1441,"Αντ":1044,"Αθή":1140,"Αυγ":651,"Αυτ":975,"Αστ":933,"Απο":822,"Αρχ":657,"Αγγ":839,"Αγί":679,"Δημ":1529,"Δεκ":668,"Δια":869,"Δήμ":1366,"Γερ":1165,"Γεν":1089,"Γαλ":1018,"Ελλ":2950,"Εθν":907,"Είν":1389,"Ευρ":1295,"Θεσ":739,"Ιαν":723,"Κων":707,"Με ":1185,"Ιτα":855,"Ιου":1221,"Καλ":850,"Καρ":959,"Κατ":1044,"al ":791,"and":770,"an ":997,"ati":806,"Έλλ":1427,"Ήτα":715,"ion":1164,"he ":670,"ia ":803,"igh":691,"ing":654,"ht ":652,"hum":677,"er ":1543,"es ":1146,"en ":662,"ght":666,"ng ":653,"on ":1503,"mb ":660,"us ":771,"umb":700,"tio":831,"thu":700,"ter":753},"n_words":[6375777,7261876,4927375],"name":"el"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/es b/nlp_resource_data/langdetect/profiles/es

new file mode 100755 (executable)

index 0000000..cdc1956
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/es
@@ -0,0 +1 @@
+{"freq":{"D":116547,"E":296654,"F":128129,"G":121338,"A":269964,"B":167407,"C":324676,"L":239740,"M":232523,"N":101570,"O":64162,"H":81475,"I":139475,"J":79180,"K":34991,"U":62793,"T":132122,"W":35590,"V":93781,"Q":9803,"P":223906,"S":275410,"R":140692,"Y":15953,"X":27052,"Z":15636,"f":602083,"g":868874,"d":3877179,"e":8874725,"b":812441,"c":2912236,"a":7916083,"n":5177793,"o":5444424,"l":3848407,"m":1698678,"j":201978,"k":118503,"h":478078,"i":4816050,"w":47097,"v":521828,"u":2624688,"t":3108332,"s":4177405,"r":4307485,"q":288923,"p":1539196,"z":257162,"y":639511,"x":118459,"²":9268,"Á":8848,"í":298098,"é":232623,"è":9587,"á":241717,"ú":87005,"ó":542725,"ñ":141698," l":925296," m":342591," n":169035," o":224265," h":153575," i":177440," j":57642," k":20882," d":2112466," e":1686012," f":346033," g":128207," a":615947," b":110799," c":841620," y":413167," z":8894," u":509709," t":263819," v":109427," q":191724," p":764411," s":537300," r":229895," J":77814," K":33034," H":77965," I":96403," N":91185," O":55452," L":232579," M":222195," B":158843," C":308303," A":248654," F":121663," G":115612," D":105557," E":280970," Z":14852," Y":15119," X":18791," S":256762," R":131715," Q":9140," P":211939," W":33301," V":79914," U":57665," T":122063," á":26258," é":11575," ú":11089," Á":8823,"A ":15731,"Da":13474,"Cu":13670,"Cl":11053,"Co":80802,"Cr":14045,"Ce":15444,"Ch":44811,"Ci":15164,"Ed":7168,"Du":7509,"Do":14187,"De":29603,"Di":25181,"Fe":19702,"Fa":9899,"Eu":12005,"Es":99956,"En":28786,"El":96353,"Ge":16269,"Ga":21659,"I ":25126,"Fu":23434,"Fr":30636,"Fo":10911,"Fi":10813,"C ":12178,"Au":18315,"Ar":39333,"At":7482,"As":13866,"D ":7805,"Ba":43405,"Ac":8941,"Am":14049,"An":29222,"Ai":9057,"Al":51515,"Bu":16123,"Br":22960,"Ca":88236,"E ":7609,"Bi":10052,"Be":24042,"Bo":25983,"Le":25199,"Li":24708,"La":111930,"Lu":14073,"Lo":44117,"Me":30598,"Mi":26023,"Ma":85855,"Mu":18246,"Mo":36242,"Ni":10392,"Ne":11654,"Na":26109,"Nu":10474,"No":24130,"Gi":8606,"Gr":21868,"Go":14614,"Gu":21771,"Ha":19115,"He":16228,"II":18780,"Hi":11870,"Ho":13033,"Hu":9391,"In":34662,"Is":11404,"Ja":18733,"Je":8830,"Jo":22070,"Ju":22045,"Ka":8249,"Un":40236,"VI":7056,"Tu":7329,"Tr":17446,"To":19269,"Th":18313,"Ti":12341,"Te":21087,"Ta":17232,"V ":8314,"St":15377,"Su":36610,"Wi":9415,"Wa":8415,"Vi":28361,"X ":8775,"Va":20820,"Ve":16615,"Mé":10810,"Pu":11178,"Pr":36130,"S ":9671,"Pe":30523,"Pa":58336,"Pl":10964,"Po":27786,"Pi":20810,"Or":15403,"Se":51296,"Sc":7275,"Si":21213,"So":22342,"Ru":9143,"Sa":70712,"Re":46909,"Ri":14206,"Ro":31238,"Qu":8096,"Ra":15098,"b ":16974,"a ":2807777,"i ":67723,"cá":7737,"ge":97443,"ga":124206,"fl":18249,"fi":107495,"fr":77696,"fu":123473,"fo":79998,"có":10267,"he":77025,"ha":138548,"gn":24260,"cé":16005,"gl":50338,"gi":128766,"gh":9410,"gu":117756,"gr":97493,"cí":12742,"go":114696,"du":66523,"g ":31294,"ea":126973,"eb":49402,"ec":261065,"ed":147913,"de":2140534,"di":323631,"dm":7851,"do":556174,"dr":51266,"ex":62245,"eu":25398,"añ":100385,"ev":65967,"ey":26516,"ez":45865,"fa":71295,"h ":31038,"fe":72710,"eg":169863,"ef":34480,"ee":18956,"el":821140,"ej":32037,"ei":38528,"ep":114508,"eo":63152,"en":1446857,"em":183658,"et":133793,"es":1262459,"er":848036,"eq":18834,"aí":17948,"ca":518269,"e ":2816707,"bs":7367,"br":179247,"bu":47229,"bo":65362,"bl":117456,"bi":135239,"be":67612,"da":466302,"f ":15176,"cu":158454,"ct":134054,"cr":79888,"co":691321,"ck":21843,"cl":46854,"ci":780154,"ch":115875,"ce":230782,"cc":44533,"c ":23067,"az":30061,"ay":59587,"ba":136047,"d ":184141,"at":211115,"as":567489,"ar":691307,"aq":9531,"av":48388,"au":72787,"ak":11907,"al":659633,"ai":51423,"aj":50703,"ao":8102,"ap":77232,"am":297433,"an":805183,"ac":374463,"ad":631439,"ab":144055,"ag":85149,"ah":13118,"ae":43681,"af":21278,"nu":35537,"nt":779819,"ns":140899,"nq":11336,"no":368063,"nn":24107,"nz":31492,"ny":8769,"nv":18820,"oe":24400,"of":39013,"oc":180019,"od":93463,"oa":18159,"ob":123195,"om":334368,"on":689571,"ol":230551,"oi":24324,"oj":12159,"og":57063,"oh":10801,"m²":9191,"ot":93533,"os":699509,"ov":85328,"ou":52929,"op":77349,"oo":15677,"or":691238,"r ":505089,"ox":9374,"ow":10366,"oz":11222,"oy":19941,"lá":14567,"pe":253260,"pa":326975,"pl":67808,"lé":20848,"po":370364,"ph":12436,"pi":101456,"lo":395718,"lm":47465,"ll":193235,"ls":13390,"lp":10103,"lv":15446,"lu":77232,"lt":59369,"ly":9017,"o ":1816298,"ma":323604,"mb":142619,"me":329898,"mi":236057,"mm":8572,"ié":32355,"mp":132049,"mo":207044,"mu":126184,"ió":324878,"p ":13048,"na":683565,"nc":291584,"nd":277346,"ne":311456,"nf":32397,"ng":105777,"ni":309689,"nj":11352,"nk":8262,"nm":8232,"ju":57258,"fí":9213,"jo":44644,"ki":14501,"ke":14800,"ka":12118,"m ":52750,"gó":7060,"gí":14289,"gé":21878,"km":14607,"li":353994,"le":344098,"ld":22533,"lg":19778,"lf":10189,"la":1037960,"lc":15579,"lb":23157,"gú":7200,"n ":1645057,"hr":7436,"dí":25020,"ht":8163,"hu":24887,"hi":87814,"ho":64406,"dé":12258,"id":350652,"ic":527484,"ib":58759,"ia":458387,"ig":130847,"if":53944,"ie":349344,"k ":32633,"ir":119098,"is":363824,"it":317793,"iu":42028,"eñ":26834,"iv":114899,"eó":10608,"ij":12968,"ik":8874,"il":242265,"im":150543,"in":518205,"io":362002,"ip":90246,"je":41244,"iz":77284,"l ":1070460,"ja":41449,"xi":40771,"té":18357,"xp":12413,"tí":29392,"tó":61297,"xt":20556,"só":7077,"z ":55163,"tá":43968,"sé":9023,"sí":13754,"ró":22130,"y ":494983,"wa":13090,"ré":8377,"vi":184322,"rí":60498,"vo":65147,"uz":11349,"uy":29308,"ux":7768,"uv":14743,"rá":28876,"ve":131509,"va":112048,"x ":22885,"ui":101034,"uj":10219,"ul":130016,"ue":529325,"ug":37794,"ur":196055,"us":135539,"ut":75455,"um":62826,"un":688515,"uo":9672,"up":46545,"tu":178275,"tt":18266,"pó":8915,"ub":69875,"ua":157161,"ud":82055,"uc":86063,"w ":7730,"pú":12838,"to":492464,"tl":11756,"ts":11628,"tr":358077,"te":662982,"ti":404422,"th":33828,"tb":14576,"ta":636505,"su":163099,"ss":27514,"st":570360,"sl":23077,"sk":8256,"sm":35452,"sp":146933,"so":177522,"sd":21641,"sc":98040,"se":328771,"sh":16915,"si":316726,"rz":18465,"u ":97650,"nú":7450,"sa":193871,"rr":132506,"rs":76964,"rt":263306,"ru":82530,"rv":23971,"nó":12071,"ry":14114,"ní":12258,"rq":18301,"rp":16192,"ro":473902,"rn":71986,"né":9922,"rm":118903,"rl":28428,"rk":11610,"ri":592813,"rg":83834,"rf":13796,"ná":7351,"re":689451,"rd":86475,"rc":86488,"rb":27212,"ra":734384,"t ":94930,"mú":13777,"mó":11660,"qu":286748,"mí":7526,"mé":17124,"má":63634,"s ":1974557,"pt":30720,"pu":78951,"ló":19506,"lí":47975,"pr":228655,"ps":9053,"zó":8970,"zi":7657,"za":111482,"zu":8538,"zo":36191,"ye":23683,"ya":28514,"uí":10738,"yo":35768,"ué":9949,"² ":9260,"án":62336,"ál":20287,"áf":8040,"ác":15525,"ár":16431,"át":16282,"ás":49088,"á ":27321,"óg":11484,"ód":8472,"ór":13833,"ón":382883,"óm":11589,"ól":14360,"ó ":71778,"ña":56328,"ño":76416,"ín":33019,"ím":10418,"ío":16786,"ít":28434,"ís":34065,"íf":8702,"íc":18334,"íd":7650,"ía":105050,"í ":14487,"él":11392,"én":55163,"és":52145,"ét":12035,"ér":34634,"éx":11216,"éc":11450,"é ":16494,"ún":21741,"úl":7364,"út":8499,"ús":12464,"úb":12720,"ú ":7505,"一":7134," Ga":21582," Ge":16178," Fo":10836," Fu":23266," Fr":30592," Fi":10737," Ha":19064," He":16181," Go":14559," Gr":21740," Gu":21689," Gi":8540," Hu":9382," Ho":12969," Hi":11833," Je":8808," Ja":18701," Is":11343," In":34554," Ka":8187," Jo":22025," Ju":22012," La":111649," Le":25083," Li":24432," Ma":85482," Mi":25830," Me":30504," Lo":44008," Lu":14044," Ne":11540," Na":26019," Ni":10343," Mo":36161," Mu":18130," Am":14012," An":29155," Al":51409," Ai":9020," Ac":8911," Ba":43282," Au":18286," At":7457," As":13733," Ar":39211," Be":23973," Bi":9959," Bo":25824," Br":22869," Bu":16073," Ca":87586," Ce":15402," Ci":15105," Ch":44683," Cl":10942," Cr":13930," Co":80501," Cu":13492," Da":13336," Di":25027," De":29418," Do":13935," Du":7489," Ed":7143," El":96116," Es":98653," En":28605," Eu":11992," Fe":19655," Fa":9820," Wi":9336," Wa":8349," a ":151113," Or":15358," Po":27637," Pl":10897," Pi":20772," Pe":30452," Pa":58153," Nu":10454," No":24018," Ra":15024," Qu":7915," Ro":30970," Re":46794," Ri":14181," Pr":36030," Pu":11158," Mé":10807," Su":36537," St":14732," Ta":17163," Th":18237," Ti":12302," Te":20935," Tr":17351," To":19041," Ru":9128," Sa":70550," Si":21147," Sc":7137," Se":51093," So":22241," Va":20787," Ve":16562," Vi":28257," Tu":7181," Un":40136," ja":7912," im":16980," in":113728," is":9470," it":12073," ju":44169," ha":71347," he":17319," gr":47141," go":9321," gu":12156," id":8758," hi":30296," ho":20186," hu":9280," ni":9094," ne":10791," na":40693," mu":56375," mo":34821," oc":24698," of":15543," ob":20507," nu":10795," no":85354," le":29852," li":28752," la":626496," gé":18729," km":14107," me":61144," mi":48266," o ":71023," ma":85649," lu":16286," ll":24228," lo":186366," ag":19771," ab":21502," ac":45971," ad":18011," am":14490," an":45970," ap":24125," al":116261," au":26320," ar":40593," at":8359," as":24315," ba":52391," bi":11562," bo":13161," br":12518," ca":157850," e ":10258," er":14855," eq":8506," es":539820," en":614552," em":20859," ej":8137," el":384203," fe":21573," fa":54639," añ":28328," ex":38515," fu":116273," fr":61229," fo":35270," fl":10710," fi":33918," ge":20831," ga":13109," cl":15585," co":462664," cr":30029," ce":31950," ch":12983," ci":56484," da":13407," cu":60477," do":35749," de":1878742," di":146031," ed":14319," du":20922," té":10997," tí":7323," ru":8901," sa":21596," se":189308," si":104740," so":62800," qu":191524," mú":9473," ra":15504," re":170810," ro":18871," pu":38713," pr":177892," lí":9079," má":36795," ot":17424," or":52385," pe":108853," pa":137566," pl":26836," po":242173," pi":19178," y ":406137," va":23514," ve":32789," vo":9768," vi":36979," ub":13189," tu":8296," us":12351," ut":9609," un":466456," ta":42321," su":139495," tr":62181," to":30228," th":7488," ti":31564," te":60962," ár":9835," ál":11624,"Fer":7120,"Es ":20079,"Est":40499,"Esp":30688,"Eur":9266,"El ":89488,"En ":18934,"Gar":7699,"Fue":18788,"Fra":22586,"II ":12947,"Gue":7438,"Gra":11989,"Int":10608,"Ind":7667,"Arg":10516,"Bar":13123,"Ale":9873,"Alt":8713,"And":7070,"Ant":10595,"Cal":12289,"Cam":10802,"Cas":14393,"Car":17937,"Can":11389,"Ber":7187,"Chi":15815,"Cen":8021,"Cha":15858,"Cor":11229,"Com":17537,"Col":10868,"Con":24853,"Nac":11329,"Nue":8768,"Nor":14767,"Pla":8680,"Per":15205,"Par":20522,"Pro":17204,"Pri":7235,"Pre":7665,"Méx":8860,"Jos":9116,"Las":10322,"La ":78398,"Los":18518,"Med":8474,"Man":10902,"Mar":33709,"Mad":9566,"Mon":15534,"Mun":7799,"Su ":13104,"Sai":11675,"Sal":8578,"Se ":18407,"San":32494,"Rep":8544,"Val":13044,"Vil":8238,"Uni":27767,"The":12538,"bit":22599,"bio":8582,"bil":7159,"bo ":7079,"blo":8255,"ble":28350,"bli":27042,"bla":52392,"bol":20729,"bié":25416,"bor":11023,"be ":11171,"ban":26993,"bal":10177,"baj":17768,"bas":16282,"bar":17411,"ber":30080,"bia":11473,"bic":15037,"bie":16653,"ca ":145181,"car":45351,"cas":41067,"cat":14539,"can":94395,"cap":14546,"cac":16670,"cab":10529,"cad":51771,"cam":20670,"cal":48767,"ce ":35472,"bri":27454,"bro":15368,"bra":26651,"bre":105758,"bur":7145,"bum":12078,"am ":8792,"ajo":16624,"al ":274116,"aja":11561,"aje":17588,"ain":19507,"ais":7210,"agu":12491,"ago":28675,"anu":10190,"anz":18126,"ano":79509,"ann":7583,"ant":195066,"ans":17426,"ane":17755,"ang":19562,"ani":47254,"ana":70957,"anc":101262,"and":90623,"amo":12426,"amp":25988,"ami":57872,"ame":88516,"amb":41071,"ama":44395,"alu":9523,"alt":15595,"alo":13492,"alm":37560,"all":38280,"alg":10767,"ali":96797,"alc":7683,"ald":9255,"ale":82063,"ala":31798,"an ":104955,"aba":35637,"abe":14437,"abi":26160,"abl":18696,"abo":13547,"abr":22582,"ae ":28175,"aca":16009,"ad ":111062,"aga":11365,"ado":272062,"adr":17031,"adi":20049,"ade":31132,"adu":7585,"aco":10755,"aci":224093,"ach":10349,"ace":30766,"acc":10027,"ada":154737,"act":41870,"aza":10801,"ayo":25160,"aya":7832,"ba ":19878,"aqu":8935,"at ":7824,"arg":22636,"are":34407,"ard":31172,"arc":32672,"ara":95441,"aro":19229,"arn":7316,"arm":9270,"arl":13962,"ari":83070,"arq":11629,"arr":48459,"ars":9729,"art":119072,"asa":20743,"arz":11183,"asi":22274,"asc":10580,"ase":16265,"aso":13076,"ar ":100623,"apa":19232,"ape":7495,"api":11786,"apo":11441,"apr":7236,"as ":405615,"ava":9975,"aut":18769,"arí":13656,"avi":12074,"ave":14502,"ay ":13782,"ata":37974,"ast":48943,"atr":18116,"ato":31490,"ate":31650,"ati":40488,"atu":16694,"aun":7152,"aur":9494,"aus":7903,"jer":7889,"je ":13092,"jo ":28253,"ito":98982,"itu":57305,"eña":13429,"iud":31065,"ism":26830,"isl":10534,"iso":8290,"isp":12009,"ist":177840,"ita":90631,"ite":23119,"iti":14600,"ivo":27807,"eño":12432,"iva":29645,"ivi":28311,"ive":26878,"ipo":17187,"ipi":23278,"is ":44742,"ion":119701,"ior":15897,"ios":44527,"ipa":22022,"ir ":23370,"iri":18435,"isi":29089,"ise":10419,"isc":18799,"ire":21390,"ira":17066,"ja ":14889,"iza":56748,"km²":9037,"gía":13787,"gén":19973,"jul":9258,"jun":20993,"jue":11292,"ha ":23020,"ham":8200,"han":11281,"har":13249,"has":16766,"hab":28769,"hac":13453,"he ":23960,"her":16542,"hin":11792,"hil":16082,"his":17036,"ho ":17368,"go ":53446,"glo":15485,"gle":10695,"gla":7868,"gob":7093,"gni":7865,"gió":54574,"cés":10035,"glé":12339,"gon":9593,"gos":23034,"gru":17556,"gra":45297,"gri":10321,"gre":9359,"gui":14307,"gua":27766,"gue":27456,"gur":7400,"gun":20675,"iam":7087,"ial":56085,"ian":52472,"ias":29422,"iar":10235,"ic ":7674,"iac":10109,"iad":14476,"ibl":8505,"ibi":8164,"ibr":11003,"ibu":11957,"id ":12492,"ibe":11766,"ia ":260890,"iet":7563,"iel":8991,"iem":41853,"ien":137287,"ier":54165,"ies":19531,"ied":10903,"ieg":9534,"ifo":15425,"ife":9766,"ifi":21950,"ict":9719,"ico":124072,"ici":106469,"ich":18974,"ice":11245,"ie ":46477,"ica":217183,"ido":115195,"idi":11705,"ide":56950,"ida":143468,"il ":29082,"ige":12181,"iga":13218,"igl":16846,"igi":27180,"igu":25326,"igo":7750,"ign":13613,"imo":21293,"imp":19247,"ime":39882,"imi":27512,"inc":70215,"ind":21266,"ina":105516,"ino":52448,"int":60589,"ins":15850,"inf":13445,"ine":33717,"ing":45586,"ini":36645,"iod":9277,"inv":7546,"ila":17117,"in ":30692,"ilo":12791,"ill":61746,"ili":73439,"ile":24670,"ima":28669,"io ":149275,"día":18502,"hom":8228,"hos":7240,"hor":7815,"hum":8623,"fes":10458,"fer":25898,"fec":9155,"feb":8937,"fam":38352,"ext":18108,"ez ":27186,"exp":10760,"exi":15850,"eza":8196,"eta":35458,"ete":19089,"eti":13610,"esp":91539,"eso":20238,"est":182109,"aña":34948,"año":60064,"eto":13724,"etr":18301,"epú":7797,"eve":10842,"eva":20123,"evo":10163,"evi":21593,"erí":14511,"ey ":19031,"epe":9088,"er ":98611,"epa":47324,"eos":9539,"eor":7614,"eon":7647,"emá":15175,"es ":719197,"ept":16454,"epo":8010,"epr":9962,"elí":11498,"eri":85302,"erg":11010,"ere":38036,"erf":9607,"erc":31567,"erd":14302,"era":127455,"erb":7502,"et ":12616,"equ":18716,"aís":14636,"esi":46520,"esc":45995,"esd":20115,"ese":28957,"esa":79475,"erv":17764,"err":53481,"ert":62557,"ers":52410,"ern":44545,"erm":26731,"erp":8929,"ero":107826,"en ":609899,"ela":41071,"ele":43829,"eli":19783,"ell":40806,"elo":21079,"eo ":20655,"emb":41784,"ema":33787,"eme":18576,"emo":14924,"emi":17348,"emp":34242,"ene":94564,"eng":10179,"ena":41586,"end":51128,"enc":77537,"eno":37094,"eni":23612,"ens":46652,"ent":408861,"enz":9528,"ego":28111,"egi":66647,"egr":10013,"egu":24464,"ein":14332,"eja":7548,"el ":611266,"ejo":12599,"eje":7469,"gin":15815,"gio":12026,"gid":10945,"gic":10164,"gen":57105,"ger":8119,"ge ":11010,"gad":14992,"gas":7988,"gar":19699,"gal":9045,"gan":23906,"ga ":30329,"fue":90978,"fun":22112,"fra":50839,"fre":13616,"fri":7302,"for":59914,"flo":7804,"fic":57976,"fil":9492,"fin":19583,"da ":212873,"de ":1556339,"dad":141389,"dal":9998,"dae":14141,"das":36381,"dar":9501,"dan":12985,"dam":10048,"cul":40621,"cue":32934,"cua":33441,"ctu":33625,"ctr":9939,"cto":44819,"cti":20454,"cte":9828,"cta":10659,"cur":10605,"cuy":7974,"cla":14341,"clu":14576,"co ":135190,"ció":209489,"con":251381,"col":25189,"com":184452,"cor":25629,"cos":36158,"cre":24696,"cri":31616,"cro":8764,"cci":38750,"cea":16334,"ch ":11118,"cer":30048,"ces":71319,"cen":32729,"cep":7727,"cel":19017,"ced":7879,"cha":26138,"cia":152068,"ck ":13234,"cie":87706,"cid":70600,"che":19885,"chi":20475,"cho":21852,"cil":9620,"cim":9340,"cir":8507,"cis":9341,"cit":8942,"ciu":26750,"cin":17134,"cio":103441,"cip":45552,"ed ":10329,"ebr":19067,"eae":10702,"ead":15707,"eal":19761,"eas":12813,"eat":7412,"ea ":28070,"efi":9256,"efe":15631,"ega":22427,"edi":50600,"ede":34956,"eda":21258,"edo":13652,"edr":9154,"ech":18820,"eci":89707,"ece":32005,"ecc":12316,"eca":7852,"ecu":14868,"ect":45028,"eco":21596,"dur":19421,"dor":54124,"don":18997,"dou":11963,"dos":96466,"doc":7141,"duc":27199,"dri":13291,"dra":9547,"dre":11310,"dro":9953,"dic":47817,"did":11186,"dia":35235,"der":49978,"des":90830,"deb":8351,"dec":9801,"def":7698,"del":228490,"den":85908,"dem":11684,"dep":50829,"deo":9414,"do ":341916,"div":13995,"din":11969,"dio":35658,"dir":18800,"dis":80875,"dit":9088,"die":14888,"dif":14152,"rga":23318,"ri ":7113,"rge":24597,"rgo":16017,"ret":23611,"res":140547,"rev":10637,"rfi":8102,"rea":43224,"ref":13241,"rec":55408,"red":15261,"rei":7333,"reg":68764,"rem":14749,"ren":51706,"rel":19501,"rer":18242,"rep":14882,"rda":8172,"rdo":13679,"rdi":9872,"rde":29313,"re ":155218,"rci":19308,"rch":7367,"rce":15953,"rca":25800,"rd ":11954,"rar":15085,"ras":50104,"rat":31492,"rav":8075,"rag":12449,"ran":146320,"ram":21212,"ral":52901,"rab":19532,"raf":7329,"rad":60032,"rac":43442,"rs ":11514,"ros":47929,"rot":15483,"rom":15707,"ron":42744,"rop":32071,"rov":41024,"rod":21257,"roc":21679,"rol":20667,"rof":11386,"rog":9763,"rno":13811,"rna":28042,"rne":12661,"rni":8562,"ro ":146408,"rma":60914,"rme":24156,"rmi":20846,"rla":8699,"riz":10435,"rio":82353,"rit":87431,"ris":26175,"rig":32503,"ril":18939,"rin":44691,"rim":37251,"ria":74022,"rib":16779,"ric":53993,"rid":24912,"rie":40300,"rtí":7814,"ruc":9429,"rup":19339,"rus":10637,"rva":8468,"rvi":10259,"ry ":9879,"rsi":16295,"rso":23313,"rse":13455,"rta":69414,"rto":22364,"rte":83807,"rti":44079,"rtu":12987,"nía":7088,"rt ":11603,"rqu":18224,"rro":28597,"rri":26111,"rre":31796,"rra":36840,"sad":15576,"sal":12684,"san":10863,"sas":11791,"sar":21220,"sa ":86942,"rzo":11621,"sie":10373,"sid":35818,"sic":33044,"sia":21182,"sit":47886,"señ":8377,"sis":26321,"sin":18695,"sio":18884,"sil":10556,"sim":10529,"sig":27477,"scr":25119,"scu":15948,"sde":20004,"se ":142516,"sca":14799,"sco":24677,"ser":42063,"ses":15571,"seg":17308,"sec":10078,"sep":14018,"sen":28636,"sem":7053,"sel":10295,"spu":8777,"spo":14448,"spe":51710,"spa":61850,"sol":12812,"son":47400,"sor":10955,"sos":12831,"soc":12868,"sob":16402,"su ":57230,"st ":10396,"sla":17876,"smo":24196,"sió":31343,"so ":44831,"stá":22167,"ste":88396,"sta":179440,"sto":49757,"sti":69501,"stu":17380,"str":118933,"sub":9390,"sul":7853,"sup":14503,"sus":22170,"sur":23147,"tal":73680,"tag":7515,"tab":14559,"tac":23593,"tad":77862,"tat":7964,"tas":38619,"tar":41661,"tan":67218,"tam":79203,"te ":284941,"tbo":12533,"ta ":179252,"pa ":14872,"pe ":7492,"par":158326,"pas":11530,"pac":12615,"pal":23552,"pan":11657,"pañ":59522,"pec":57888,"pen":14739,"peo":7168,"per":107782,"paí":9997,"pes":10236,"pel":18436,"pla":29458,"pli":11132,"ple":15398,"plo":9048,"pic":11535,"pie":8675,"pin":12467,"pio":24607,"pit":12508,"por":173351,"pop":7479,"pos":33566,"pon":17239,"pol":27348,"pob":43702,"pod":7343,"lés":13215,"po ":39204,"pub":10168,"pti":14042,"lít":16259,"líc":10958,"lín":8248,"pri":52537,"pre":58569,"pro":108338,"put":7419,"pul":11629,"pue":30894,"mát":8971,"más":38263,"mán":10709,"mér":9468,"que":227040,"qui":47867,"quí":8097,"ra ":231606,"mús":8775,"ncé":9514,"ngo":7965,"ngl":22080,"ngu":15031,"ni ":7789,"nge":13697,"nga":8189,"nen":11810,"neo":11720,"ner":65372,"net":8127,"nes":80792,"ng ":18654,"nea":12399,"nec":28654,"nfo":10116,"nez":7381,"nco":17791,"nci":146898,"ncl":10729,"nce":61430,"nch":9730,"nca":12577,"ne ":57285,"ndu":7292,"ndr":12605,"ndo":65513,"ndi":38911,"nde":54981,"nda":66093,"ncu":16386,"nal":70953,"nam":8218,"nan":12157,"nar":27443,"nac":46410,"nad":35305,"naj":8529,"nd ":14500,"nat":21175,"nas":35377,"na ":398062,"ión":292845,"ntó":37020,"nve":10579,"nue":9759,"nto":136463,"ntu":7064,"ntr":93850,"nti":67583,"nta":102723,"nte":292971,"nso":9665,"nst":30664,"nse":26710,"nsi":26727,"nsa":11053,"nt ":24387,"nqu":11277,"ns ":13747,"noc":38585,"nom":49236,"nos":42446,"nor":29422,"nov":19327,"nne":10554,"no ":158177,"nif":10693,"nie":9958,"nid":51044,"nic":69869,"nia":31660,"niz":14512,"niv":15892,"nis":28903,"nit":8537,"nio":20490,"nim":13518,"ogr":18966,"ogo":8779,"ol ":38964,"oce":17267,"och":8312,"oci":56829,"ock":8945,"oco":9047,"oca":41229,"ode":14611,"odi":11580,"odo":26635,"ocu":9100,"oct":12604,"of ":7638,"oda":9018,"oes":12161,"odu":18410,"ofi":7512,"ofe":9434,"oba":8003,"obr":25648,"obl":50514,"obi":12794,"obe":8533,"nza":20160,"ote":14228,"otr":17397,"oto":14558,"ost":31662,"ota":19189,"osi":18739,"ose":15508,"oso":14720,"ovi":57483,"orí":9114,"ove":16922,"oun":14708,"our":10655,"opo":9237,"opi":14838,"ope":13312,"opa":10564,"os ":567590,"opu":9913,"olí":19344,"or ":251715,"orm":64912,"orn":13575,"oro":14041,"orr":19323,"ord":30127,"ore":48358,"org":21075,"ori":66357,"osa":21383,"ort":65622,"m² ":9185,"ora":46259,"ola":30987,"on ":190943,"oli":25682,"oll":17825,"ole":17479,"olo":38775,"olu":11781,"ogí":11606,"ona":104550,"ond":38242,"onc":23216,"onf":9258,"one":74442,"ong":11339,"onj":7098,"oni":32298,"ono":52521,"ons":50424,"ont":59611,"onv":7285,"oma":33534,"ome":21150,"omb":45255,"omi":25678,"omp":41070,"omo":82407,"omu":58045,"la ":610751,"gún":7107,"le ":58880,"lab":11220,"lac":64631,"lad":24486,"lag":7484,"lan":69592,"lam":24610,"lar":41436,"lat":24612,"las":124120,"lbu":12473,"lon":23409,"lom":10655,"lor":23128,"loc":26941,"log":23375,"los":171022,"lme":34946,"lti":12566,"lto":11513,"ltu":9464,"luc":12530,"lug":7366,"lta":15082,"lgu":7617,"lev":15617,"les":91476,"let":11653,"ler":15144,"lem":26257,"len":30658,"leg":14703,"lec":26734,"lea":8424,"lo ":93016,"lla":74669,"lle":47097,"lli":13816,"llo":31751,"ll ":10675,"lit":23261,"lis":29055,"lio":17662,"lin":23082,"lim":8340,"liz":32987,"lic":48449,"lid":38365,"lia":73957,"lib":11554,"lig":10555,"lie":8152,"ma ":72422,"mac":13622,"mad":34215,"mar":42452,"mas":23691,"mal":12192,"man":63359,"may":20759,"mat":18421,"mba":10176,"mbi":42599,"mbr":71287,"me ":12825,"med":24892,"met":16848,"mes":19527,"mer":60276,"men":167009,"mex":7078,"lva":8274,"lus":8634,"mpi":8494,"mpe":21376,"mpr":14573,"mpo":32948,"mpl":23586,"mpu":10144,"mod":10208,"mon":23296,"mor":11861,"mos":17488,"mpa":16447,"ió ":27428,"mus":8143,"mun":83842,"min":50965,"mil":54218,"mis":20947,"mit":22265,"mic":21219,"mie":39274,"mo ":116388,"ién":27517,"zad":34608,"zac":11303,"zan":7115,"zar":10021,"zon":9132,"zo ":17792,"yor":11730,"za ":34624,"ya ":16470,"yo ":17934,"ués":7665,"tín":7356,"tér":8916,"tón":43482,"tán":19529,"xim":7461,"xic":17547,"tá ":17299,"sí ":7618,"rís":7245,"río":10310,"ría":28707,"rón":9102,"via":12659,"vil":17697,"vin":37424,"vic":9152,"vid":23929,"vie":21693,"viv":7676,"vis":25393,"vo ":33305,"vol":10345,"vos":8624,"ver":42481,"ves":9401,"ven":28441,"vel":14981,"ve ":15016,"val":14785,"van":9128,"var":20642,"vas":9770,"vad":11386,"va ":31522,"uye":9933,"uy ":8325,"usi":15007,"use":7793,"usa":13025,"ust":26126,"uso":9699,"uti":16202,"uta":12332,"uto":18658,"us ":49172,"ura":67622,"ure":8908,"urg":11824,"uri":19845,"uro":19849,"ur ":25587,"upe":15682,"upo":17158,"uma":10015,"ume":13099,"unt":24294,"uni":64830,"uno":27366,"unc":10467,"und":48857,"una":267868,"une":8988,"um ":20862,"ult":21405,"ulo":17762,"uli":17880,"ula":47246,"un ":217532,"uid":13351,"uie":12890,"uil":7952,"uin":8672,"uip":8207,"uis":11000,"ueñ":8502,"uit":13101,"uga":15579,"ugu":12215,"uct":12213,"uda":37565,"udi":17314,"ubr":12417,"uca":7683,"ue ":294774,"uce":7415,"ucc":8218,"uci":25554,"uch":12624,"uev":15569,"uer":46622,"ues":36069,"ueg":19032,"ued":16010,"ueb":8677,"uen":38522,"uel":28446,"púb":11593,"ua ":14731,"uat":7912,"uar":13380,"ual":41530,"uan":21270,"ubi":18564,"ubl":12045,"ud ":8479,"uad":38503,"tur":45480,"tul":9339,"tub":10280,"tua":54339,"tud":17240,"tug":8561,"tre":58815,"tra":116327,"tri":81313,"tru":16821,"tro":72914,"to ":261732,"tod":17241,"tos":55424,"tom":10133,"ton":20463,"tor":94895,"til":28629,"tie":38174,"tig":17405,"tir":9162,"tit":18235,"tis":12154,"tin":45124,"tim":14112,"tip":10749,"tio":15498,"tia":11282,"tic":87300,"tid":24642,"tiv":46603,"tem":32370,"ten":66323,"tel":23707,"teg":9394,"tea":9283,"tec":13760,"th ":7178,"tes":66100,"ter":127000,"the":10645,"éti":7504,"éxi":11167,"éne":20764,"én ":28809,"éri":13573,"érm":8428,"és ":45633,"áti":14202,"án ":27367,"álb":11283,"áni":16413,"ás ":40719,"úsi":8717,"útb":7936,"úbl":12344,"ún ":11275,"óni":12420,"ólo":7805,"ón ":361830,"ógi":7057,"ño ":26522,"ños":18182,"ñol":28121,"ña ":43408,"íti":20855,"íst":9984,"íne":7430,"ín ":16078,"ío ":10995,"ís ":12371,"ícu":14416,"ías":8590,"ía ":90214},"n_words":[70286890,82926999,60413548],"name":"es"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/et b/nlp_resource_data/langdetect/profiles/et

new file mode 100755 (executable)

index 0000000..2bdb2a6
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/et
@@ -0,0 +1 @@
+{"freq":{"D":3604,"E":9114,"F":3428,"G":4270,"A":13995,"B":6049,"C":4433,"L":9649,"M":10132,"N":6533,"O":2986,"H":6531,"I":5403,"J":4779,"K":13384,"U":2720,"T":11888,"W":2107,"V":9682,"Q":266,"P":11422,"S":15988,"R":6826,"Y":596,"X":461,"Z":653,"f":12728,"g":77107,"d":141364,"e":425996,"b":40246,"c":10761,"a":496421,"n":259681,"o":223900,"l":256122,"m":147242,"j":63354,"k":181311,"h":68975,"i":428108,"w":2663,"v":88215,"u":213745,"t":258531,"s":344443,"r":180124,"q":542,"p":61630,"z":3134,"y":5816,"x":1328,"²":200,"Å":72,"Ä":128,"Á":51,"Ü":1682,"ß":76,"Õ":259,"Ö":167,"í":335,"ì":56,"ë":66,"é":703,"è":77,"ç":89,"æ":123,"å":190,"ä":42375,"ã":61,"á":537,"à":86,"ü":30512,"ú":88,"ø":326,"ö":8126,"ô":78,"õ":40539,"ó":356,"ð":63,"ñ":47,"ē":125,"ė":44,"Ā":44,"ā":654,"č":90,"ı":50,"ī":336,"ş":74,"ń":54,"ł":96,"ō":306,"Ž":57,"ž":1084,"Š":453,"š":2423,"ū":213,"ǎ":46,"ə":85,"́":52,"μ":94,"ν":197,"ο":232,"ι":145,"κ":95,"λ":130,"δ":59,"ε":108,"η":90,"α":274,"γ":53,"ά":69,"ί":80,"ω":53,"ό":79,"σ":105,"ς":232,"ρ":156,"π":59,"υ":62,"τ":124," l":23928,"ь":570," m":38337,"э":66,"ю":76," n":13986," o":51528,"я":590," h":8823,"ш":180," i":10696," j":29617," k":61170,"ы":415,"ф":166," d":4378," e":23050,"х":292,"ц":201," f":3289," g":1891,"ч":785,"р":2377," a":29616,"с":1744,"т":1384," b":2333," c":602,"у":739," y":132," x":127," z":161," u":3277," t":29638," w":134," v":33321,"і":130,"ё":86," q":53," p":29550," s":33963," r":13644,"И":192,"Л":141,"К":318,"Н":192,"М":293,"П":266,"О":115,"Б":203,"А":448,"Г":287,"В":339,"Е":66,"Д":138,"З":71," J":4764," K":12980," H":6494,"Ш":75," I":5378,"Ю":47," N":6511,"Я":49," O":2964," L":9614," M":10069,"Э":79," B":6003,"Т":143," C":4377,"У":66,"Р":154," A":13953,"С":395," F":3376," G":4219,"Ч":60,"Ф":101," D":3559," E":9045,"Х":61,"л":1746," Z":652,"к":1535," Y":594,"й":835," X":432,"и":3293,"п":426,"о":3362,"н":2533,"м":786,"г":616," S":15877," R":6777,"в":1990," Q":264,"б":447," P":11356,"а":3415," W":2074,"з":300," V":9645,"ж":145," U":2711,"е":2779," T":11830,"д":957," ä":1217," õ":2318," ö":381," ü":7159," Ā":44," Á":51," Å":72," Ä":128," Ö":164," Õ":255," Ü":1674," ž":64," Ž":57," Š":453," š":125,"ն":64,"ա":118,"ו":53,"י":74,"ר":53,"و":125,"ي":278,"ف":48,"ل":301,"م":185,"ن":161,"ه":60,"د":159,"ح":106,"ت":46,"ب":196,"ة":66,"ا":449,"ع":100,"ش":48,"س":100,"ر":186," А":444," Б":203," В":336," Г":277," Д":137," Е":66," З":71," И":192," К":315," Л":139," М":291," Н":191," О":113,"A ":1491," П":265,"F ":209,"Da":636,"Cu":143,"Cl":241,"Co":1022,"Cr":170,"Ce":161,"Ch":852,"Ci":177,"G ":207,"Ec":84,"Ed":303,"Ea":53,"Du":151,"Do":581,"Dr":153,"De":809,"Di":576,"Fe":322,"H ":271,"Fa":384,"Eu":1055,"Ev":105,"Ex":60,"Er":534,"Et":156,"Es":507,"En":458,"Em":268,"Ep":67,"Ei":131,"El":689,"Ek":108,"Ee":3423,"Eh":79,"Eg":183,"Ge":880,"Ga":592,"I ":1170,"Fu":158,"Fr":833,"Fo":411,"Fl":194,"Fj":45,"Fi":490," б":52," г":58," в":53," д":53," и":51," к":97," н":58," м":69," п":140,"B ":254," о":107," Р":152," С":394," Т":143," У":64," Ф":99," Х":61," Ч":60," Ш":75," Э":77," Ю":47," Я":48,"C ":532," с":79,"Av":172,"Au":837,"Ar":1361,"At":438,"As":1550,"D ":280,"Ba":1524,"Az":50,"Ae":128,"Af":163,"Ag":213,"Ah":170,"Aa":1076,"Ab":517,"Ac":135,"Ad":407,"Am":1404,"An":1589,"Ap":221,"Ai":278,"Aj":171,"Ak":327,"Al":2030,"Bu":590,"Br":896,"Ca":1008,"E ":421,"Bi":534,"Be":1094,"Bo":722,"Bl":189,"Kv":67,"Ku":1642,"Gö":61,"Ky":76,"Kn":47,"Kl":321,"Kr":1260,"Ko":2411,"Le":1403,"Hä":127,"Li":2750,"N ":331,"Gü":52,"La":1509,"Lu":547,"Ly":58,"Hõ":54,"Lo":1151,"Me":1361,"Dž":49,"Mi":1243,"O ":457,"Ma":3890,"Hü":108,"Mc":62,"My":97,"Mu":881,"Mo":1371,"Jä":515,"Ni":728,"Ne":1120,"Na":1009,"P ":412,"Ny":45,"Jõ":269,"Nu":172,"No":1468,"Ok":125,"Ol":339,"Om":123,"On":147,"Oh":110,"Kä":196,"Oi":46,"Od":88,"Of":51,"Jü":111,"Ob":154,"Gi":310,"Gl":141,"Gr":855,"Go":370,"Gu":494,"Gy":49,"J ":73,"Ha":1920,"He":1336,"Hi":1244,"Ho":963,"Hu":297,"Hy":59,"K ":351,"Dü":69,"Id":463,"Ig":87,"Io":55,"Im":143,"In":1490,"Il":192,"Ii":222,"Iv":146,"Is":449,"It":383,"Ir":247,"Ja":1273,"L ":301,"Ji":117,"Je":477,"Jo":991,"Ju":835,"Fü":63,"Ka":3521,"Fö":134,"M ":228,"Kh":87,"Ki":1209,"Ke":1467,"Us":124,"Ut":61,"Ur":136,"Um":60,"Un":420,"Uk":201,"Ul":110,"Ud":58,"Pü":264,"W ":104,"Pö":45,"Ty":76,"Põ":1331,"Tv":68,"Tu":682,"Tr":728,"Ts":169,"To":953,"Pä":660,"Th":747,"Ti":538,"Te":1883,"Ta":4354,"V ":865,"Sy":110,"St":1249,"Sv":158,"Su":1472,"Wo":216,"Wi":599,"Wh":48,"Sä":63,"Rü":62,"Wa":532,"We":428,"Rõ":64,"Vo":452,"Vu":47,"Rä":65,"Vi":1892,"Vl":115,"X ":185,"Va":3312,"Ve":2352,"Uu":347,"Mä":340,"Lü":129,"Pt":77,"Pu":606," م":56,"Pr":1526,"Ps":58,"S ":641,"Lõ":671,"Py":58,"Pe":1263,"Pa":2287,"Kü":279,"Pl":289,"Po":1247," ع":51,"Pi":1126,"Ph":224,"Lä":1080,"Os":432,"Ot":174,"Ou":77," ا":173,"Oo":46,"Op":127,"Or":500,"R ":183," ب":58,"Kõ":416,"Kö":81,"Se":3048,"Sc":514,"Si":1380,"Sh":515,"Sm":85,"Sl":182,"Sk":219,"Sp":264,"So":1693,"Ru":426,"Nõ":704,"U ":154,"Sa":3549,"Nü":46,"Re":985,"Ri":1129,"Nä":146,"Rh":72,"Ro":1769,"Qu":132,"Mõ":229,"T ":213,"Ra":1769,"Mü":121,"Tš":407,"b ":14100,"a ":101956,"Tõ":185,"Tö":129,"Tü":236,"Ya":114,"Yo":251,"Yu":48,"Z ":57,"Sõ":312,"Sö":54,"Sü":198,"Xi":96,"Tä":315,"Tō":50,"Za":127,"Ze":122,"Zh":62,"Vä":634,"Zi":61,"Zo":49,"Zu":94,"Võ":414,"bö":50,"i ":67391,"gd":177,"ge":11091,"ga":17189,"bü":159,"fj":156,"fl":261,"fg":90,"ff":336,"fi":3834,"bä":233,"fs":80,"fr":799,"fu":505,"ft":358,"fo":2004,"j ":178,"gy":115,"he":15992,"ha":9696,"gn":1067,"gm":312,"gl":3003,"gk":631,"gj":169,"gi":15351,"gh":514,"gg":188,"gv":123,"gu":12657,"gt":248,"gs":906,"gr":3204,"gp":765,"go":1593,"dt":600,"du":15412,"dv":357,"dw":178,"dy":147,"dz":76,"g ":7339,"ea":10628,"eb":4711,"ec":855,"ed":12410,"de":25494,"dd":200,"dg":178,"di":20365,"dh":181,"dk":488,"dj":432,"eK":296,"dm":1746,"dl":1518,"do":2346,"dn":649,"dp":77,"ds":1055,"dr":3163,"ew":508,"ex":366,"eu":1294,"ev":10732,"ey":521,"aõ":117,"ez":242,"fa":820,"aü":201,"h ":1684,"fe":1177,"eh":9506,"eg":11045,"ef":1060,"ee":30805,"el":44868,"ek":15889,"ej":1876,"ei":11617,"ep":3591,"eo":4613,"en":30509,"em":17859,"et":21350,"es":49834,"er":30979,"eq":50,"ca":1187,"e ":97234,"by":175,"bs":372,"br":3541,"bu":2050,"bt":50,"bn":97,"bo":1301,"bj":462,"bk":63,"bl":1788,"bh":62,"bi":4858,"bb":142,"bd":109,"be":5461,"db":83,"da":21215,"f ":1128,"cz":59,"cy":123,"cu":367,"ct":474,"cs":86,"cr":248,"co":978,"cm":60,"ck":1210,"cl":163,"ci":834,"ch":3094,"ce":1021,"cc":199,"c ":475,"az":442,"ay":578,"ba":4886,"d ":44215,"at":32525,"as":52565,"ar":30297,"aq":50,"ax":179,"aw":265,"av":11952,"au":7185,"ak":18860,"al":53791,"ai":13608,"aj":8772,"ao":2265,"ap":5317,"am":18369,"an":38127,"ac":1403,"ad":21341,"aa":42535,"ab":9670,"ag":6434,"ah":11107,"ae":3739,"af":1986,"nu":11347,"nt":12596,"ns":4828,"nr":469,"np":151,"no":4692,"nn":17075,"q ":46,"nz":355,"jö":51,"ny":360,"jõ":2814,"nw":57,"nv":314,"oe":1975,"of":1882,"oc":974,"od":6416,"oa":902,"ob":2993,"om":10938,"on":61110,"ok":4533,"ol":25486,"oi":2595,"oj":1236,"og":7246,"kä":1444,"oh":4211,"ot":7307,"hō":45,"m²":200,"os":13528,"ov":3905,"ou":1255,"op":3770,"oo":36377,"or":18504,"r ":11002,"ox":103,"ow":446,"kö":136,"oz":161,"oy":237,"kõ":3255,"pe":8582,"pf":53,"kü":2678,"pa":9082,"pl":1823,"pm":182,"lé":50,"pn":209,"po":8371,"lä":2964,"ph":615,"pi":8736,"pk":178,"lo":11129,"ln":1607,"lm":6318,"ll":19635,"ls":3471,"lr":311,"lp":708,"lv":1889,"lu":12030,"lt":11142,"lz":55,"ly":347,"hõ":638,"o ":5439,"hü":580,"ma":35693,"mb":5069,"mg":52,"dž":411,"mh":80,"me":23829,"mf":109,"hā":79,"mk":376,"ml":264,"mi":40706,"eš":58,"mj":183,"mn":780,"mm":2852,"mp":2268,"mo":4294,"mr":80,"mt":306,"ms":1112,"mv":97,"mu":8559,"iõ":260,"my":155,"p ":1639,"na":27821,"iü":173,"nb":439,"nc":950,"nd":30717,"ne":35835,"nf":748,"ež":190,"ng":16763,"nh":382,"jä":4035,"ni":36973,"nj":320,"nk":2416,"nl":419,"nm":235,"jt":102,"ju":8195,"jn":184,"jo":2230,"jm":158,"kj":201,"ki":13463,"kh":323,"kf":60,"ke":20124,"kd":44,"kb":60,"fü":589,"ka":27697,"m ":10376,"fö":145,"ky":200,"gõ":66,"gö":54,"ks":20790,"kt":6626,"ku":25416,"kv":1230,"ko":25420,"kp":298,"kr":3838,"kk":4692,"kl":3700,"km":2283,"kn":633,"li":57000,"hä":827,"lh":474,"lk":2832,"lj":4573,"le":37295,"ld":8105,"lg":4950,"lf":372,"gü":68,"la":31670,"lc":95,"lb":823,"n ":49802,"hr":621,"hs":195,"hv":2638,"hw":103,"ht":6604,"hu":4453,"hj":2882,"hk":5994,"hh":1052,"hi":9059,"hn":1001,"ho":2898,"hl":335,"hm":902,"dé":47,"id":25348,"ic":2524,"ib":3313,"dü":272,"ia":15748,"ih":2638,"ig":11090,"if":1034,"ie":3981,"hy":186,"dõ":82,"k ":16382,"iq":93,"ir":11061,"is":67525,"it":26582,"iu":2821,"iv":6458,"iw":71,"ix":158,"ii":25093,"aš":218,"ij":1535,"ik":35922,"il":29934,"im":19741,"in":53773,"io":9587,"ip":3170,"je":2855,"až":108,"ji":256,"fä":213,"iz":330,"iy":72,"eõ":59,"l ":31660,"eü":118,"ja":41991,"tä":3641,"xi":267,"té":49,"xt":76,"sö":241,"sõ":3307,"ww":59,"z ":542,"sü":7550,"xa":160,"xe":69,"sä":382,"oš":66,"wi":436,"sé":45,"wn":69,"wo":143,"ws":111,"vv":89,"rõ":329,"vy":59,"rö":278,"y ":2468,"rø":55,"wa":716,"rü":1335,"rā":49,"we":364,"vl":251,"vm":177,"ré":93,"vj":46,"vk":322,"rä":668,"vi":9510,"vg":104,"vt":88,"vu":4481,"vr":127,"vs":891,"vp":221,"rí":55,"vn":468,"vo":2748,"uz":144,"uy":48,"ux":121,"uv":4464,"uu":16971,"ve":10196,"vd":60,"va":34297,"x ":556,"ui":3667,"uj":2076,"uk":5480,"ul":16978,"ue":1272,"uf":259,"ug":4237,"uh":3669,"ur":13993,"us":50648,"ut":13872,"um":10739,"un":13735,"uo":494,"up":1948,"ty":350,"põ":3410,"tz":299,"pö":275,"tu":27878,"tt":4340,"tw":94,"tv":618,"ub":5783,"pü":734,"ua":3006,"ud":21571,"uc":397,"w ":514,"to":11780,"tn":1099,"tm":2505,"tl":5476,"ts":21273,"tr":8177,"tp":259,"tg":91,"tf":183,"te":48864,"tk":949,"tj":657,"ti":30264,"pä":3196,"th":1862,"v ":7055,"tb":129,"tc":91,"ta":41990,"su":20276,"sv":2315,"ss":8143,"st":67054,"sy":172,"sz":75,"sw":139,"sl":2942,"sk":10212,"sn":1521,"sm":4289,"sp":2885,"so":5912,"sr":794,"sd":174,"sc":930,"sf":485,"se":62785,"sh":1322,"sg":140,"sj":1063,"kš":54,"si":30745,"rz":159,"u ":17617,"sa":24031,"nü":262,"sb":374,"rr":3973,"rs":3109,"rt":7363,"ru":7114,"rv":5729,"rw":105,"ry":602,"nõ":791,"rp":648,"ro":11751,"rn":3712,"rm":4085,"né":46,"rl":1539,"rk":3920,"nç":57,"rj":4639,"ri":41739,"rh":803,"nä":2052,"iž":51,"rg":9073,"mā":44,"rf":347,"re":19761,"rd":6093,"rc":505,"rb":1857,"mü":731,"ra":27209,"t ":40235,"mõ":3716,"mö":208,"qu":338,"iš":133,"lā":51,"mä":4794,"lü":1913,"s ":83808,"px":82,"lõ":2040,"py":45,"lö":179,"pt":1489,"pu":3682,"pp":2089,"pr":5736,"hū":50,"ps":1178,"yō":62,"uš":116,"tš":984,"vö":298,"zz":105,"vü":141,"vä":6112,"zh":68,"zi":492,"ze":455,"uü":74,"za":431,"uõ":62,"zy":50,"võ":10296,"zs":86,"zu":144,"zo":270,"zn":53,"zm":49,"yg":56,"ye":201,"yc":105,"yd":131,"tü":1113,"ya":457,"yb":52,"tö":1641,"tõ":1288,"yt":138,"ys":416,"yr":185,"yp":88,"yo":142,"yn":260,"ym":152,"yl":325,"yk":76,"yi":121,"რ":63,"ო":50,"ნ":45,"ი":145,"ლ":57,"ე":73,"ა":150,"² ":198,"án":134,"ä ":222,"ár":59,"äb":871,"Üh":852,"Ül":582,"á ":83,"Ü ":171,"Õi":103,"Õh":47,"Õp":48,"Ök":67,"アアア":314,"ö ":312,"õ ":118,"ón":116,"ín":74,"ía":57,"í ":51,"én":82,"és":48,"ér":80,"ää":6210,"é ":189,"ät":739,"äv":318,"äm":341,"äl":3418,"äo":127,"än":4791,"äp":377,"äs":977,"är":10885,"äe":2551,"äd":252,"äg":1326,"äi":4721,"äh":3974,"äk":203,"üü":2774,"ān":168,"ār":63,"õš":48,"ā ":58,"öö":3301,"ể":72,"üo":89,"ün":5015,"üm":1677,"ül":5410,"üs":2571,"ür":1208,"üp":727,"üv":138,"üt":1243,"üb":167,"üa":118,"üf":48,"üg":358,"üd":830,"ük":2636,"üh":5389,"øy":105,"õõ":933,"öv":135,"ør":74,"øn":53,"õr":3591,"õs":403,"õt":1896,"õm":351,"õn":2300,"õo":45,"õp":1548,"õi":12796,"õj":1645,"õk":165,"õl":2454,"õe":2011,"õg":861,"õh":4577,"õb":159,"õd":527,"öt":705,"ör":689,"ös":334,"öp":203,"ön":372,"öl":253,"öm":99,"ök":454,"öi":89,"ög":138,"öe":80,"öd":707,"öb":82,"ḩ":72,"õz":69,"õv":176,"õu":3856,"ī ":118,"īn":58,"š ":464,"še":571,"ša":283,"šo":109,"šk":120,"ši":439,"šu":129,"št":112,"Ša":47,"Šo":91,"Šv":165,"ōk":47,"ō ":113,"žu":64,"žo":59,"že":84,"ža":274,"ži":342,"ž ":81,"ū ":64,"ი ":59,"あ":71,"ア":442,"가가 ":46," ア":53,"乙":67,"之":249,"丹":47,"临":68,"並":138,"丘":83,"专":99,"三":373,"丁":204," 三":86," 丁":77,"倉":51," 之":80,"ああ":46,"ος":107,"ος ":107,"ς ":231,"α ":91,"アア":377,"ян":79,"ый":89,"ье":98,"ьс":48,"ьн":72,"ха":91,"ци":80,"че":89,"ск":479,"сл":56,"со":89,"се":96,"си":145,"рь":62,"са":149,"рс":87,"рт":59,"ру":71,"тр":186,"то":214,"те":148,"тв":71,"ти":150,"та":205,"сс":83,"ст":346,"ур":101,"ус":74,"ул":50,"ун":50,"уд":52,"уб":58,"ть":45,"фо":51," Ga":590," Ge":860," I ":313," Fo":396," Fu":156," Fr":828," Fi":477," Fl":192," Fj":45," Ha":1914," He":1326," Gy":49," Go":366," Gr":850," Gu":492," Gi":307," Gl":139," Ig":87," Id":463," Dü":69," Hy":59," Hu":297," Ho":962," Hi":1244," Ji":117," Je":475," L ":50," Ja":1269," Iv":146," Ir":247," Is":445," It":383," Im":143," In":1484," Io":55," Ii":222," Il":189," M ":95," Fö":134," Fü":63," Ka":3502," Ke":1459," Ki":1200," Kh":87," Jo":986," Ju":831," N ":212," Gü":52," La":1495," Le":1398," Hä":127," Li":2733," Kl":320," Kn":46," Ko":2402," Kr":947," Kv":67," Ku":1637," Gö":60," Ky":76," Mc":62," Ma":3863," Hü":108," O ":93," Mi":1232," Dž":49," Me":1354," Lo":1146," Ly":58," Hõ":54," Lu":546," Ne":1115," P ":83,"а ":653," Na":1003," Jä":515," Ni":722," Mo":1366," My":96," Mu":873," A ":257," B ":117," C ":205," Ap":221," Am":1400," An":1581," Ak":327," Al":2024," Ai":275," Aj":171," Ag":212," Ah":170," Ae":126," Af":163," Ac":133," Ad":404," Aa":1075," Ab":516," Ba":1515," D ":91," Az":50," Av":171," Au":835," At":438," As":1512," Ar":1360," Be":1090," Bi":531," Bl":186," Bo":717," Br":891," Bu":586," E ":253," Ca":996," Ce":158," Ci":166," Ch":848," Cl":225," Cr":170," Co":1011," Cu":137," F ":65," Da":635," Di":564," De":805," Dr":153," Do":564," Du":150," Ea":51," Ec":83," Ed":300," G ":69," El":689," Ek":107," Ei":131," Eh":78," Eg":183," Ee":3409," Et":156," Es":505," Er":526," Ep":67," En":453," Em":267," Ex":60," Eu":1052," Ev":105," Fe":322," Fa":376," H ":102," Xi":80," Tä":312," Sü":194,"к ":130," Sõ":311," Sö":54,"Ив":73," Wo":210," Wi":583," Wh":45," Sä":63," We":424," Rü":62," Wa":532,"й ":637," Rõ":64," Zu":94,"Ле":48," Võ":412," Zo":49,"Ку":52," Ze":122," Zh":62," Vä":632," Zi":58,"Ко":96,"м ":90," Za":127," Yu":48,"Ка":72," Yo":250," Tü":235," Ya":113," Tö":129," Tõ":184,"л ":110,"Ни":84,"Мо":47,"о ":255," Tš":404,"Ма":85,"Ми":98," Tō":50,"н ":434,"Па":47,"Пе":79,"По":48," a ":162,"с ":91,"р ":253," R ":62,"в ":424,"Ан":87," Kõ":416," Kö":81,"Ал":161," Ou":77," Os":431," Ot":172," Or":497," Oo":46,"Аб":44," Op":127," Po":1235," Pl":283," Pi":1123," Ph":212," Lä":1080," Pe":1250," Pa":2281," Kü":278," Ny":45," Jõ":269," Nu":172," No":1462," Ol":339," Ok":124," On":146," Om":122," Oh":110," Kä":196," Oi":46," Od":87," Of":45," Ob":154," Jü":111," Mõ":229,"Вл":51," Ra":1755," Mü":121," T ":66,"д ":86,"Ви":55," Qu":130," Ro":1766," Re":978," Ri":1126," Nä":145," Rh":72," Lõ":669," Py":57," S ":141,"Бо":74,"г ":58," Pr":1513," Ps":52," Pt":76," Pu":606,"Ва":94," Lü":129," Mä":338," Sy":110," Sv":156," Su":1464," St":1223," Ta":4345," V ":98," Pä":659," Th":744," Ti":535," Te":1863," Tr":722," Ts":168," To":951," Nõ":702," Ru":426,"Ге":81,"Гр":45," Sa":3543," Nü":45,"е ":239," Sh":511," Si":1350," Sc":495," Se":3040," So":1682," Sp":263," Sk":219," Sl":182," Sm":84," Uu":347," Va":3308," X ":95,"и ":220," Ve":2347," Rä":65," Vi":1878," Vl":115," Vo":452," Vu":47," Tu":679," Tv":68," Ty":76," Põ":1331," Pö":45," W ":53," Pü":263," Ud":58," Uk":200," Ul":110," Um":60," Un":417," Ur":135," Us":122," Ut":61," ja":20477," l ":68,"ь ":130," io":82," im":299," in":4488," il":865," ii":156," is":1859," it":246," fü":370," ka":13902," fö":77," m ":441," ki":4980," ke":11286," jn":167," jo":514," jm":152," jt":94," ju":2953," ha":2640," he":1467," gl":111," gr":524," go":49," k ":146,"ы ":90," ib":56," dü":205," id":1072," ig":443," hi":1143," ho":971," hu":967," jä":3184," ni":6713," nd":118," ne":1909," na":1142," p ":48," mu":3133," mo":1838," mm":67," ok":758," ol":7096," om":1854," on":34245," oh":172," kä":1164," oj":83," of":466," ob":937," jõ":1996," nu":369," nt":126," no":1262," nn":71," le":1944," lk":65," hä":549," li":6989," n ":104," la":5115," kv":246," ku":9586," km":1544," kl":989," kr":1633," ko":10755," me":3580," dž":65," mi":14942,"я ":380," hü":385," ma":7824," lu":810," hõ":577," lo":3309," ae":363," af":69," ag":306," ah":192," aa":4258,"Ст":49," ab":842," ac":46," ad":192," am":855," an":2331," ap":575," ai":1079," aj":1910," ak":483," al":5568," av":698," au":1797," ar":3514," at":197," as":4137," d ":153," ba":664," 가가":46," bi":566," be":162," bo":152," bl":78," bu":241," br":141," ca":117," e ":147,"х ":65," b ":66,"т ":156,"Ро":57,"Се":84,"Со":64," er":2077," et":1438," es":2246," en":2378," em":344," ep":75," ei":852," el":2672," ek":498," ef":88," ee":3011," eh":5575," eg":105," fe":136," fa":280," eu":119," ev":98," fu":315," fr":185," fo":525," fl":88," fj":44," fi":1214," bä":181," ge":720," ga":319," i ":993," cm":54," co":181," ch":69," da":137," do":440," dr":125," de":2173," eK":291," di":908,"ч ":585," ed":721," eb":183," du":101,"ль":338,"ма":140,"ме":79,"ми":161," vö":80,"лл":50,"ло":200," vü":80,"ла":291," zo":50," zu":50,"ле":285," võ":8473,"ли":217,"кс":169,"ко":414," vä":4759,"ка":276,"ки":323," tõ":919," tö":1012," tü":520,"йс":53,"ия":126," tä":3294,"им":129,"ин":324,"ик":203," sõ":2172,"ил":197," sö":81,"ии":45,"ий":374,"ич":632,"их":108,"ит":98,"ир":115,"ис":145," sü":5410,"ри":296,"рк":48,"рн":83,"ро":406,"ра":317,"рг":121,"рд":63,"ре":208,"пр":87,"по":93,"па":67,"пе":55,"ос":221,"ор":399,"оп":51,"от":68,"ок":58,"ол":330,"ом":142,"он":240,"ой":87,"ов":918,"ог":103,"од":117,"ое":100,"об":67,"ны":111,"нт":92,"нс":154,"но":345,"нн":108,"нк":74,"ни":279,"не":99,"нг":60,"нд":193,"на":391," tš":115,"мо":119,"ге":109," ru":423," nõ":449,"ги":57," u ":56,"го":156," sa":6645," nü":110," se":7005,"гу":45," sc":47," si":2589," sh":125," sl":105," sk":246," sp":570," so":1701,"да":103,"ве":150,"ви":646," mõ":2887," mö":122,"вн":105,"во":155," t ":61," mü":545," ra":5080,"вс":81," re":2103," ri":3272," nä":1534,"га":89," ro":1241,"бе":93," pu":1946," pr":4024," ps":246,"бо":55," s ":402," lö":96," px":82," lõ":1781," mä":2882," lü":990,"ва":236," os":2510,"ад":144," ot":471,"ае":61,"аз":52," oo":592," op":302,"аб":45,"ав":150," or":1834,"аг":45,"ам":81,"ан":559,"ай":127," kõ":2868,"ак":68,"ал":255," kö":73," pe":3640," kü":1892," pa":3723,"ас":154,"ар":332,"ат":153," pl":542," po":5154,"ая":206,"ба":89," pi":4298," lä":2715," rü":649," rõ":177," y ":67," rö":76,"ив":76,"иг":52,"ид":49,"ие":98," sä":314," x ":107," va":12420," ve":4280," uu":1024," vo":1317,"за":70," vu":102," rä":537," vi":1606," vk":47," vm":99,"ет":178," pü":607,"ес":139,"ер":394,"ео":53,"ен":381,"ем":79,"ел":192," pö":222," põ":3044,"ек":180,"ей":122," tu":3292,"ее":74," us":346," um":867," un":274," uk":117," ul":323," uj":60," ta":4816,"др":169,"до":115,"ди":121," sy":46," st":1273,"де":141," su":5118,"ев":407," tr":1088," ts":358,"ед":85," to":2603," pä":1847," th":387," ti":918," te":10216," Õp":48," Õh":47," Õi":101," Ök":64," Ü ":59," Ül":578," Üh":851," är":293," ää":835," öe":60," ök":219," õl":119," õi":938," õh":408," õp":648," õn":74," öö":76," ür":128," ül":2321," üm":508," ük":1493," üh":2633," 가":51,"가":148,"ōky":44," Šv":165," Šo":91," Ša":47," ža":50,"د ":87,"ة ":66,"ان":46,"ال":176,"ر ":45,"ي ":68,"ن ":97," アア":51,"AS ":102," ко":46,"BA ":64," по":46," Ро":57," Пе":79," Па":47," По":48,"가가":97," Ст":49," Се":84," Со":64,"AO ":53," Ан":87," Ал":161," Аб":44," Ва":94," Бо":74," Ви":55," Вл":50," Ге":81," Гр":45," Ка":71," Ив":73," Мо":47," Ни":84," Ко":95," Ку":52," Ле":47," Ма":83," Ми":98,"Fel":59,"Fer":121,"Fil":156,"Fin":145,"Fir":68,"Fan":92,"Fal":44,"Fai":50,"Era":62,"Eri":232,"Est":182,"Ern":93,"Esi":185,"Eur":956,"Eva":52,"Ehi":44,"Ele":225,"Ela":50,"Eks":56,"End":51,"Ena":97,"Eng":50,"Ene":66,"Emm":77,"Ema":72,"Eli":126,"Ell":63,"Ent":53,"Ger":95,"Geo":499,"Gen":130,"Gio":51,"Gil":46,"Ива":71,"Öko":66,"Gan":54,"Gal":104,"Gam":62,"Gar":101,"Gab":56,"Flo":75,"Fla":65,"Fra":377,"Fri":218,"Fre":186,"Foo":47,"Fon":48,"For":124,"IP ":48,"II ":579,"His":301,"Hii":583,"Hil":89,"Hin":100,"Hel":430,"Hei":240,"Hea":65,"Hen":145,"Her":270,"Haa":140,"Hab":68,"Hal":209,"Hai":81,"Han":256,"Ham":105,"Har":682,"Hau":60,"Gus":86,"Gua":87,"Gui":119,"Grö":69,"Gre":178,"Gri":80,"Gra":216,"Gru":170,"Gro":104,"Gol":71,"Got":58,"Gor":73,"Ing":355,"Inf":74,"Ini":62,"Int":306,"Ins":151,"Ill":72,"Ind":377,"Imp":71,"Iis":118,"Iir":94,"Ida":408,"Hum":47,"IV ":90,"Hor":121,"Hoo":71,"Hom":80,"Hon":86,"Hol":356,"Arg":115,"Arh":59,"Are":119,"Arc":52,"Ara":180,"Arm":142,"Arn":51,"Ark":65,"Ari":84,"App":49,"Apo":82,"Ate":50,"Atl":232,"Asu":1049,"Ast":103,"Ass":139,"Ase":89,"Art":168,"Arv":91,"Aru":68,"Ava":88,"Aut":116,"Aus":402,"Aug":149,"Bai":96,"Bak":55,"Bal":343,"Ban":110,"Bab":69,"Bad":75,"Bar":269,"Bat":57,"Bas":115,"Bau":47," пр":54,"Aar":73,"Aas":446,"Aaf":295,"Aad":67,"Abr":47,"Abe":61,"Aba":83,"Abd":79,"Abi":49,"Ada":90,"Adv":74,"Ado":56,"Ade":47,"Aer":49,"Aeg":46,"Age":54,"Afg":70,"Ain":50,"Air":92,"Al ":56,"Aja":140,"Aka":119,"Akt":63,"Ala":283,"Alb":201,"Alg":178,"Ali":85,"Ale":472,"Alf":50,"Alu":59,"Alt":115,"All":206,"Alp":102,"Ame":1051,"Amb":81,"Ama":69,"Ang":173,"Ani":70,"Ana":136,"And":351,"Ans":55,"Ant":450,"Ann":191,"Bus":55,"Bul":94,"Bur":160,"Bud":75,"Мих":58,"Bru":68,"Cal":198,"Cam":87,"Cas":93,"Car":284,"Cat":48,"Can":91,"Bea":78,"CH ":77,"Ber":414,"Ben":187,"Bel":194,"Bil":84,"Bis":73,"Bir":85,"Bio":136,"CO ":51,"Bla":86,"Bre":116,"Bra":266,"Bro":108,"Bri":271,"Ник":72,"Bol":76,"Bon":63,"Bor":154,"Bos":66,"Bou":71,"Õig":83,"EE ":44,"Det":56,"Des":76,"Dev":50,"Del":63,"Dem":159,"Den":66,"Dep":48,"ан ":122,"Dan":112,"Dar":75,"Dav":137,"ай ":47,"Chr":190,"Che":117,"Chi":112,"Cit":59,"Cla":86,"Cen":74,"Cha":324,"Cri":46,"DV ":45,"Clu":51,"Cor":172,"Com":152,"Col":177,"Con":247,"Cou":57,"FA ":50,"ади":61,"аев":60,"Ege":46,"Egi":118,"али":51,"аль":66,"ано":90,"Ees":3340,"анд":117,"ани":46,"Edu":80,"Edw":63,"Ede":52,"FC ":88,"Dia":62,"Dis":117,"Dio":47,"Dig":58,"Die":54,"Div":45,"ая ":201,"Dre":44,"Dra":54,"Пет":51,"Doy":59,"Don":122,"Dom":77,"Doo":55,"Dor":65,"Nee":123,"Nen":81,"Nel":76,"Nei":118,"Nev":47,"Neu":75,"Net":49,"Nat":157,"Nii":60,"Nig":95,"Nic":90,"Nim":92,"Nik":161,"Jär":285,"New":291,"Nap":53,"Nar":263,"Nan":47,"Nag":44,"Nad":109,"Jõe":111,"Jõg":96,"Jää":129,"OS ":46,"Nov":157,"Nor":978,"Noo":108,"Але":136,"Kär":84,"Obe":69,"Jür":85,"Ott":71,"Ote":47,"Kõi":90,"Kõr":209,"Oli":46,"Ole":65,"On ":49,"Oma":92,"Olü":53,"Ope":62,"Ora":58,"Osc":88,"Osa":64,"Ord":51,"Ori":67,"Org":113,"Ost":56,"Osl":81,"Oss":50,"Lää":780,"Ple":51,"Pla":187,"Pin":124,"Pil":73,"Pik":86,"Pii":306,"Pir":78,"Pih":114,"Pie":94,"Pho":46,"Phi":103,"Läh":64,"Lät":200,"Pea":138,"Ped":50,"Per":283,"Pet":350,"Pen":99,"Pel":65,"Pee":136,"Pat":127,"Pas":71,"Par":721,"Pav":48,"Pau":155,"Paa":107,"Pad":62,"Pan":241,"Pai":107,"Pal":313,"Kül":163,"Pak":101,"Lõu":592,"Pto":68,"Pun":149,"Pur":44,"Pue":49,"Puh":59,"Puu":79,"Pro":404,"Pri":219,"Pre":257,"Pra":604,"Pol":248,"Pom":45,"Poh":46,"Pot":67,"Pos":114,"Poo":305,"Por":187,"RO ":68,"Mär":85," ال":145,"Män":102,"Вла":51,"SA ":770,"Вас":49,"Raa":184,"Rad":71,"Rai":65,"Rah":536,"Ram":65,"Mün":49,"Ran":128,"Rak":98,"SD ":47,"Mõn":60,"Mõi":70,"Isa":72,"Ise":51,"Ita":340,"Isl":190,"Ira":139,"Iva":115,"Jac":79,"Jaa":509,"Jar":46,"Jan":141,"Jam":143,"Jal":74,"Jak":74,"Jen":64,"Jer":96,"Jea":76,"Jee":58,"KP ":62,"Jos":138,"Jor":57,"Joo":51,"Jon":103,"Joh":467,"Joa":63,"Jug":45,"Juh":63,"Juu":106,"Jur":60,"Jul":218,"Jum":146,"Föd":118,"Kaa":255,"Kad":123,"Kab":68,"Kai":254,"Kah":71,"Kag":66,"Kam":188,"Kal":368,"Kak":46,"Kap":63,"Kan":466,"Kau":239,"Kat":178,"Kas":307,"Kar":687,"Ker":134,"Kes":554,"Ket":49,"Ken":126,"Kel":51,"Kem":44,"Kei":251,"Keh":55,"Kee":118,"Kir":431,"Kit":75,"Kin":160,"Kiv":96,"Kil":83,"Kih":48,"Kii":162,"Klo":53,"Kli":49,"Kle":53,"Kla":98,"Klu":55,"Koo":227,"Kon":442,"Kom":339,"Kol":258,"Kos":162,"Kor":274,"Kop":81,"Kog":69,"Kod":97,"Kok":46,"Koi":60,"Koh":156,"Kr ":310,"Kot":63,"Kre":312,"Kra":186,"Kri":267,"Kro":87,"Kru":51,"Kui":148,"Kul":317,"Kun":277,"Kur":369,"Kuu":241,"Kva":53,"Lev":81,"Les":48,"Lep":73,"Leo":109,"Len":251,"Lem":45,"Lei":135,"Leh":50,"Lee":284,"Lea":56,"Lau":185,"Le ":49,"Lak":57,"Lai":145,"Lag":77,"Lah":103,"Lae":57,"Las":93,"Lar":49,"Lap":96,"Lam":60,"Lan":154,"Lad":56,"Laa":70,"La ":89,"ML ":59,"Lib":76,"Lie":81,"Lih":74,"Lig":47,"Lii":1363,"Lil":58,"Lim":58,"Lin":573,"Lip":63,"Lis":111,"Lit":56,"Liv":63,"Lut":62,"Luu":51,"Luk":48,"Lui":44,"Lud":75,"Luc":57,"Lou":126,"Los":88,"Lot":49,"Loh":45,"Lor":56,"Loo":276,"Lon":195,"Lom":53,"Lok":47,"NA ":78,"Mei":81,"Meh":138,"Men":72,"Mel":110,"Mes":92,"Mer":322,"Met":201,"Med":80,"Mee":102,"Man":356,"Mal":230,"Mar":1023,"Mas":191,"Mag":212,"Hüd":45,"Mad":279,"Maj":106,"Mak":124,"Mah":61,"Mai":109,"Mac":68,"Maa":607,"Max":73,"Mau":82,"Mat":240,"Mod":67,"Moh":45,"Mol":99,"Mon":346,"Moo":103,"Mos":352,"Mor":136,"Mot":44,"Mih":126,"Mik":120,"Mic":227,"Mit":88,"Mis":98,"Mil":150,"Min":245,"Mul":70,"Muh":59,"Muu":174,"Mur":95,"Mus":281,"Tän":126,"Täh":115,"лав":45,"лад":53,"ль ":49,"TÜ ":47,"ääm":126,"ääl":395,"ään":2004,"ääk":111,"ääd":51,"ääg":344,"ääb":251,"äät":82,"ääv":158,"äär":2288,"ääs":242,"ää ":101,"Sõn":78,"кса":105,"Sõj":66,"Sõr":64,"ков":99,"кол":81,"кий":216,"Wor":93,"Wol":71,"Wil":257,"Win":131,"кая":100,"Wei":45,"Weh":50,"Wes":117,"Was":59,"War":74,"Wat":77,"Wal":182,"йск":48,"ко ":58,"Vor":103,"Voo":53,"Vol":205,"Vis":61,"Vit":59,"Vla":113,"ная":59,"ое ":83,"ндр":145,"ой ":73,"Väl":103,"Väi":301,"Väs":46,"Vär":64,"ов ":254,"мир":58,"мов":45,"ман":65,"Yor":178,"You":47,"Töö":118,"Tür":179,"льн":72,"на ":209,"льс":44,"三三":54,"Tõn":45,"лов":104,"лив":45,"лен":58,"лек":145,"ра ":49,"Sve":99,"Suu":805,"Sur":85,"Sul":92,"Sup":46,"Sun":86,"Sud":52,"Str":229,"Stu":106,"Sto":184,"Sta":395,"Ste":226,"Tee":206,"Teh":108,"Teg":101,"Tea":178,"Ted":50,"Ten":61,"Tem":174,"Teo":71,"Tei":326,"Tel":114,"Tam":134,"Tan":148,"Tat":66,"Tar":920,"Tav":135,"Tai":140,"Tal":1376,"Tag":78,"Taa":310,"Tab":46,"Tad":48,"Ta ":667,"ори":54,"оро":72,"Ska":93,"Shi":143,"She":124,"Sha":138,"Sim":113,"Sil":212,"Sii":184,"Sih":45,"Sig":69,"Sis":180,"Sir":66,"ост":84,"Sin":138,"Sie":46,"Sib":150,"оль":64,"Ser":245,"Sev":74,"оло":46,"оли":53,"ола":72,"Sep":56,"Sen":68,"Sel":786,"Sem":74,"Sei":87,"Sed":193,"See":1098,"Sea":119,"TV ":56,"äb ":221,"Spa":44,"Spe":59,"Spo":69,"Sof":45,"Soc":51,"Sot":162,"Sou":50,"Sol":103,"Som":77,"Son":96,"Soo":896,"TO ":49,"Slo":106,"äga":245,"äev":922,"ова":78,"äet":56,"äes":339,"äea":55,"äed":62,"äel":380,"äht":1060,"äid":319,"äib":55,"Nõu":525,"ähe":2126,"ähk":56,"ähi":637,"Nõm":138,"äge":296,"ägi":746,"äe ":449,"äbi":596,"ный":73,"SV ":478,"Rus":63,"äda":144,"Rum":92,"äde":48,"änn":112,"änu":150,"Sag":67,"Sai":131,"Sah":102,"Sak":1209,"Sam":213,"Sal":247,"Saa":614,"Sab":44,"Sad":48,"ämm":58,"äna":797,"äni":236,"äng":1228,"äne":1640,"änd":528,"äpp":49,"äps":237,"Sco":47,"Sch":374,"äol":71,"Sav":86,"Sau":94,"Sar":178,"San":281,"är ":134,"äit":1511,"äis":253,"äir":146,"äin":438,"äil":189,"äik":1090,"äij":45,"äig":275,"äie":224,"äiv":81,"ове":46,"älu":94,"ält":126,"ови":380,"äli":683,"älj":1874,"овн":48,"овс":48,"äme":148,"ого":52,"äki":100,"TA ":65,"älg":52,"äld":112,"äle":277,"ävi":138,"SI ":60,"Res":61,"äva":99,"Rev":69,"Näi":96,"нов":187,"ор ":52,"Ris":101,"Rii":508,"Rin":80,"Ric":153,"Rid":55,"ärg":1735,"äri":1473,"ärj":460,"ärk":656,"ärm":139,"ära":1650,"ärd":69,"äre":1128,"Ras":57,"Rau":107,"Rap":154,"äsi":369,"äsk":115,"äse":53,"нко":46,"он ":72,"ärv":1633,"ärn":371,"äru":76,"ärt":890,"ärs":325,"ärr":78,"ätm":48,"äti":288,"ätk":94,"äst":200,"äss":64,"äsu":88,"Rec":53,"Rei":185,"Reg":77,"Ren":63,"Rel":79,"Rep":45,"ätt":141,"äts":56,"Rea":67,"Roh":92,"Rob":131,"Roc":60,"Ros":150,"Rom":120,"Roo":904,"SS ":97,"SO ":46,"нск":92,"сан":105,"Vab":795,"Vaa":45,"Vai":341,"Vah":167,"Vel":86,"Ven":1754,"Vee":94,"сил":46,"ски":218,"ска":112,"ско":106,"Vas":243,"Van":479,"Val":835,"Var":232,"Vih":45,"Vig":101,"Vii":222,"Vid":54,"Vic":79,"Vie":71,"Vir":448,"Vil":366,"Vik":195,"Vin":105,"сов":48,"Ver":155,"Ves":127,"Ukr":194,"Ung":127,"Uni":210,"VR ":44,"Uus":233,"Uur":54,"Uue":46,"Tve":67,"Põl":225,"Põh":1046,"Püh":181,"рий":44,"рис":45,"Pür":45,"ров":215,"VI ":61,"Ter":274,"Tes":65,"The":470,"Päi":111,"Tho":103,"Pär":428,"Tih":59,"Tii":133,"Tim":77,"Pää":51,"Too":201,"Tor":167,"Tol":64,"Tom":75,"Ton":64,"Toi":63,"Tru":76,"Tro":136,"Tri":130,"Tre":108,"Tra":219,"Tur":159,"Tuu":92,"Tul":130,"Tun":106,"ši ":155,"šet":55,"šev":66,"šel":50,"šer":71,"šee":52,"ван":86,"šeh":151,"ša ":68,"šat":45,"Šve":145,"вск":69,"вна":76,"вич":569,"šii":58,"ва ":83,"аси":52,"ато":47,"во ":47,"bje":380,"bja":66,"bis":218,"bit":234,"biv":187,"bio":537,"bip":80,"bir":67,"bik":204,"bil":578,"bim":157,"bin":301,"bii":222,"bij":70,"bo ":72,"blo":54,"ble":274,"bli":550,"bn ":58,"bla":847,"ев ":82,"bod":50,"bok":45,"bol":236,"ей ":78,"bon":137,"boo":67,"bor":247,"bot":147,"bos":56,"bou":65,"be ":205,"bam":130,"ban":365,"bak":284,"bal":373,"bai":111,"baj":47,"bah":73,"bac":66,"bad":310,"baa":286,"án ":48,"bav":70,"bat":316,"bas":493,"bar":1055,"bi ":1023,"bee":78,"bed":61,"bec":51,"ber":2586,"ben":167,"bem":51,"bel":642,"bek":156,"bes":968,"bet":314,"bia":533,"bib":89,"bid":126,"bie":117,"bha":48,"дро":45,"ет ":44,"ca ":308,"car":187,"cas":78,"cat":96,"can":142,"cal":161,"ce ":506,"bri":2225,"bro":86,"bra":389,"bre":198,"bu ":80,"bru":574,"bso":114,"bse":67,"bst":74,"дим":49,"bub":112,"bur":697,"bul":127,"bun":75,"bum":298,"bud":140,"but":54,"bus":333,"дор":45,"by ":122,"aka":1199,"am ":1037,"ake":1191,"aki":1056,"ajo":642,"aju":375,"al ":9437,"aja":7056,"aje":136,"aaž":50,"aik":874,"ail":2117,"aim":1277,"ain":2391,"aio":97,"air":125,"ais":922,"ait":1457,"aiu":220,"aiv":79,"ak ":324,"aig":556,"aie":299,"aid":1846,"aia":317,"ahn":114,"ahk":234,"ahl":63,"ahi":395,"ahj":153,"ahh":236,"ahu":752,"ahv":2279,"ahs":82,"aht":765,"ahr":54,"aho":103,"ahe":4825,"aha":671,"agi":522,"agr":165,"agu":1145,"agn":333,"ago":338,"akä":47,"aol":220,"aok":108,"anu":1617,"anz":132,"ajõ":234,"any":60,"ano":601,"ann":2713,"anm":52,"ant":3838,"ans":1284,"anr":93,"ane":2890,"ang":1938,"ajä":246,"ani":8218,"anj":107,"ank":687,"anl":132,"ap ":65,"ana":2831,"anc":369,"and":7769,"amu":491,"amt":68,"amm":1132,"aml":63,"amo":227,"amp":318,"ams":141,"amj":57,"ami":6164,"adž":93,"ame":2122,"amb":788,"ama":4891,"ao ":126,"alv":587,"alu":1954,"alt":2286,"als":1219,"alr":157,"alp":242,"alo":1054,"aln":583,"alm":1124,"all":6989,"alk":711,"alg":2752,"alh":218,"ahä":77,"ali":9008,"alj":729,"ald":4688,"ale":3211,"alf":70,"ala":5975,"alb":322,"an ":2385,"akv":79,"aks":8379,"akr":533,"aku":957,"akt":1317,"ako":2809,"akp":79,"akk":543,"akl":109,"aba":1901,"abe":658,"abi":1296,"abl":140,"abo":165,"abr":186,"abs":170,"abu":92,"ae ":265,"aca":68,"aab":988,"aaf":943,"aag":470,"aad":3122,"aae":152,"aaj":148,"aak":2014,"aai":1233,"aan":4143,"aal":7268,"aam":1949,"aas":6140,"aar":5151,"aap":765,"aav":886,"aau":47,"aat":3358,"ad ":6207,"ac ":51,"aa ":3615,"ab ":4871,"afo":83,"afr":355,"aft":134,"aff":76,"afe":44,"afi":983,"ai ":831,"aga":2664,"agd":52,"age":905,"aen":249,"ael":441,"aes":76,"aer":147,"aeg":1173,"aee":54,"aed":90,"aek":78,"aeh":45,"ah ":172,"afa":57,"aev":851,"aet":163,"ado":272,"adr":398,"adl":409,"adm":484,"adj":65,"adi":3217,"ade":4451,"ag ":78,"ads":103,"adu":2361,"adv":107,"aco":80,"ack":227,"aci":112,"ach":400,"ace":151,"acc":48,"ada":2882,"af ":128,"act":96,"azo":46,"azi":122,"гор":67,"Šot":73,"avä":555,"aza":97,"др ":56,"avõ":220,"avö":76,"azz":52,"atä":55,"axi":69,"asõ":365,"asü":91,"atõ":66,"atö":93,"atü":106,"aya":58,"aye":71,"ba ":651,"atš":101,"at ":2216,"amõ":135,"arh":476,"anä":68,"arg":455,"arf":44,"are":3776,"ard":1446,"arc":202,"arb":519,"ara":2982,"amü":61,"arp":144,"aro":486,"arn":587,"arm":512,"arl":676,"ark":1022,"anç":55,"arj":940,"ari":5535,"aru":930,"arv":2193,"arr":543,"ars":766,"art":3541,"au ":240,"asa":1531,"anõ":84,"ary":130,"asi":2682,"ash":208,"asc":73,"ase":3414,"aso":395,"asn":174,"asp":282,"ask":962,"asj":302,"asm":178,"asl":169,"aot":356,"aor":55,"aos":1245,"ar ":2000,"akü":111,"apa":845,"ape":400,"api":685,"aph":50,"apn":98,"apl":208,"apo":517,"app":368,"apr":632,"aps":286,"apt":144,"apu":179,"alõ":58,"as ":16624,"alü":198,"amä":398,"ava":6484,"ax ":62,"aut":985,"avs":163,"avo":221,"avl":84,"avi":977,"ave":501,"ay ":224,"arü":99,"awa":117,"arö":60,"avy":49,"arõ":46,"avu":364,"av ":2085,"ata":5828,"asu":6462,"ast":15872,"ass":1978,"asr":56,"asv":597,"atm":137,"atn":57,"atk":174,"atl":435,"atr":525,"ato":1488,"ate":6661,"ati":3865,"ath":254,"apä":659,"aua":310,"aub":254,"att":452,"ats":3184,"atv":82,"atu":6581,"apõ":81,"aul":1069,"aum":65,"aun":225,"aup":177,"aur":292,"aus":715,"aud":1021,"aue":64,"auf":46,"aug":1111,"auh":299,"auk":196,"Tōk":46,"Tšu":55,"Tši":63,"Tše":232,"Ühi":101,"Ühe":728,"Võr":202,"Või":74,"еев":71,"Üli":294,"Üld":58,"Üle":201,"еви":205,"ени":90,"енн":63,"ерн":50,"ерг":55,"екс":141,"ель":84,"етр":46,"ико":106,"ина":63,"ими":56,"иль":70,"ист":52,"иха":59,"ка ":82,"ив ":46,"ие ":51,"ий ":339,"ин ":106,"ич ":575,"ия ":116,"jee":65,"jer":66,"jek":711,"jel":510,"jem":199,"jen":398,"jes":189,"jet":51,"jev":327,"ji ":46,"jad":794,"jaa":1553,"jab":52,"jat":1299,"jas":1811,"jav":659,"jap":328,"jao":620,"jar":182,"jal":3824,"eük":52,"jak":1121,"jan":2708,"jam":517,"jah":222,"jag":412,"jaj":121,"jai":178,"jaz":54,"je ":209,"jms":73,"jne":167,"jok":49,"joo":1404,"jon":315,"jor":223,"jia":91,"jm ":66,"fää":209,"itn":116,"itm":881,"itl":1066,"itr":232,"ito":1263,"itu":2865,"itt":840,"its":4841,"itz":125,"ity":156,"ipõ":57,"iud":60,"isk":1562,"isj":238,"ism":2164,"isl":320,"iso":452,"isn":380,"üdr":236,"isp":759,"iss":1297,"isr":458,"isu":1149,"üdn":80,"ist":16111,"üdi":214,"isv":570,"iv ":471,"ita":4226,"ite":4040,"ith":114,"ipä":225,"iti":2606,"itj":280,"ivs":528,"ivo":234,"ivn":346,"ivu":91,"ühe":2500,"irü":49,"iwa":47,"ühh":198,"ühi":1384,"üha":330,"iup":58,"ius":891,"ium":1351,"iul":72,"iut":86,"iva":1768,"ix ":96,"ügi":120,"ivi":1653,"ive":618,"üga":160,"ipr":172,"ipo":148,"ipp":426,"ipu":354,"ips":94,"ipt":204,"ipi":209,"ipl":275,"ilõ":45,"is ":17934,"ion":1904,"ioo":5170,"iop":63,"ior":225,"ios":453,"iot":121,"iog":121,"iok":89,"iol":466,"iom":85,"ipa":243,"ikü":95,"ipe":504,"iov":51,"ikõ":47,"ir ":371,"iru":697,"irv":50,"irs":78,"irt":114,"iro":264,"irm":315,"irn":410,"irk":973,"irl":76,"iri":2513,"irj":2489,"isi":2505,"ish":182,"ise":17961,"isc":260,"isa":2662,"üda":146,"iu ":159,"inõ":65,"iqu":72,"übi":94,"imä":330,"ire":897,"inä":160,"irg":342,"ira":660,"ird":488,"irc":67,"it ":2043,"imõ":379,"ünn":379,"üno":56,"ünk":61,"üng":61,"ünd":3848,"üp ":170,"üna":270,"ümp":310,"ümm":93,"ümn":303,"itš":421,"ümi":74,"üme":56,"ümf":54,"ümb":638,"ülr":51,"üpo":95,"üpp":83,"üpe":67,"üpi":189,"ünt":213,"ja ":25408,"ül ":45,"itü":46,"itõ":50,"itö":136,"isü":229,"ühr":50,"isõ":144,"ühm":571,"üht":317,"üla":1693,"üle":1718,"üld":476,"ülg":296,"üli":561,"가가가":51,"ülj":132,"ülm":94,"üll":158,"ülo":65,"üks":2052,"ivõ":345,"ivä":146,"üki":160,"ize":56,"ükl":190,"ükk":136,"iza":126,"üm ":72,"kii":670,"kih":644,"kik":66,"kij":101,"kim":281,"kil":445,"kk ":305,"kia":138,"kib":147,"kie":46,"kid":1460,"kiv":697,"kin":1384,"kip":117,"kir":3701,"kis":700,"kit":731,"kiu":59,"kja":72,"km ":1345,"ki ":1992,"kha":79,"kj ":53,"kho":116,"kea":512,"ked":219,"kee":6776,"keh":762,"kei":247,"kek":94,"kej":56,"kem":268,"kel":1252,"ken":530,"kep":122,"kes":5621,"ker":962,"ket":387,"kev":86,"füü":427,"ke ":2101,"kra":1232,"kre":1046,"kt ":481,"ksa":2425,"kse":4827,"ku ":4118,"kro":695,"kru":92,"kri":683,"kpa":66,"kr ":45,"kov":236,"km²":193,"kot":103,"kos":501,"kor":2423,"kop":353,"koo":4137,"kon":8471,"kom":1331,"kol":1558,"kok":400,"koj":93,"koh":2395,"kog":2107,"koe":132,"kod":636,"ööp":158,"öör":516,"öös":287,"ööt":635,"ks ":9327,"ööv":121,"ööb":59,"ööd":419,"öög":123,"kpo":74,"kpi":89,"öök":163,"ööm":71,"ööl":141,"öön":269,"kme":409,"kmi":180,"koa":59,"kob":69,"kne":492,"öö ":232,"kku":1792,"kke":775,"kka":353,"kko":768,"kki":577,"klu":516,"ko ":325,"kma":69,"kle":259,"kla":1096,"klo":288,"kli":1510,"jut":1345,"jus":656,"juv":119,"juu":1477,"jul":461,"juk":80,"jun":529,"jum":545,"jur":167,"jub":134,"juh":1325,"jug":89,"jud":292,"ju ":890,"jt ":90,"kav":342,"kat":1523,"kau":1221,"kar":907,"füs":56,"kas":4782,"kap":253,"kan":1568,"kao":86,"kal":1965,"kam":275,"kaj":243,"kak":404,"kah":1019,"kai":1321,"kag":415,"kae":301,"kad":579,"kab":194,"kaa":1822,"ka ":8332,"föd":110,"ha ":1068,"ham":380,"han":1084,"hap":399,"hai":439,"haj":90,"hak":332,"hal":1865,"hau":155,"hav":82,"har":1689,"has":829,"hat":244,"haf":57,"hae":123,"hag":90,"hab":58,"haa":386,"had":171,"he ":2035,"hek":639,"hel":4104,"hei":379,"heg":134,"hee":149,"hed":836,"hea":267,"hev":124,"het":632,"hes":861,"her":737,"hep":112,"heo":122,"hen":4035,"hem":646,"hi ":803,"hhi":366,"hho":288,"hha":319,"hk ":5071,"hig":50,"hie":100,"hid":172,"hic":69,"hib":91,"hia":196,"hip":104,"hio":120,"hin":1818,"him":313,"hil":853,"hik":677,"hii":392,"hiv":50,"his":1677,"hit":1195,"hir":223,"hja":2408,"hje":45,"hju":407,"hka":48,"hm ":244,"hke":354,"hkl":54,"õõd":171,"hkr":47,"hku":160,"hkv":87,"hn ":212,"õõt":409,"õõr":156,"õõs":83,"hla":75,"hle":125,"hli":44,"ho ":78,"hma":426,"gma":96,"go ":269,"gme":81,"glu":61,"glo":135,"gle":108,"gli":2195,"gla":352,"gko":571,"gno":68,"gni":103,"gne":544,"gna":249,"gmi":89,"glü":44,"gs ":53,"gpa":728,"gol":120,"goo":292,"gon":121,"gos":166,"gor":359,"got":49,"gov":53,"ый ":88,"gu ":2469,"gse":607,"gro":161,"gru":407,"gra":1940,"gt ":50,"gri":373,"gre":257,"gto":66,"gug":104,"gui":112,"guk":582,"gum":403,"gul":521,"gua":174,"gub":77,"gue":138,"gud":1131,"gy ":51,"guv":167,"gut":1041,"guu":62,"gur":250,"gus":4478,"gup":144,"gun":555,"guo":121,"gvi":72,"iai":81,"iah":71,"iak":307,"iaj":141,"iam":367,"ial":852,"iao":48,"dün":222,"ian":848,"iap":68,"ias":2407,"iar":200,"iau":97,"iat":663,"iav":133,"ic ":272,"iaa":1098,"iab":61,"iac":55,"iad":147,"iae":81,"iag":258,"ibl":123,"ibi":192,"ibo":94,"ibn":54,"ibr":97,"ibu":187,"id ":9461,"iba":199,"ibe":483,"ia ":7687,"ib ":1767,"iet":453,"iev":106,"iel":576,"iem":193,"ien":617,"ier":409,"ies":329,"iee":79,"ied":249,"iek":65,"ig ":195,"iec":79,"ifu":44,"ifo":208,"ife":183,"ifi":363,"ifa":59,"icr":74,"ics":57,"ict":114,"icu":98,"ico":150,"ick":168,"ici":96,"ich":758,"ice":186,"ie ":546,"ica":492,"idu":2141,"ids":89,"idr":54,"ido":139,"idm":73,"idn":61,"idl":140,"idi":1042,"idg":52,"ide":8097,"ida":3662,"iib":405,"iia":256,"iif":48,"iig":4040,"iid":2067,"iie":248,"iik":3861,"aši":75,"iin":2367,"iil":963,"iim":756,"iis":1521,"iir":3180,"iip":91,"iiv":1986,"iiu":178,"iit":2428,"il ":6055,"ija":1055,"ije":50,"ijo":86,"iju":112,"im ":1389,"ika":6374,"ige":1448,"iga":2716,"ii ":657,"igl":90,"igm":62,"igh":187,"igi":4211,"igu":1608,"igr":133,"igo":112,"ign":238,"ihe":679,"iha":299,"ihk":182,"ihm":65,"ihh":111,"ihi":253,"iht":687,"ihu":144,"iho":52,"ik ":7328,"imo":225,"imn":64,"imm":144,"iml":60,"ims":252,"imp":192,"idž":109,"ime":7025,"imk":210,"imi":4107,"ip ":128,"inc":157,"ind":2723,"ina":4896,"inb":48,"imt":70,"imu":1850,"inn":6824,"ino":606,"inr":85,"int":1835,"ins":667,"inf":422,"ine":12596,"ijä":97,"inh":82,"ing":10735,"inj":49,"ini":4649,"inl":74,"ink":241,"ioa":102,"inu":4443,"inv":62,"iny":57,"ьев":85,"iko":1851,"ikn":407,"ikm":345,"ikl":1543,"ikk":1983,"iki":2101,"ikh":52,"ike":2715,"ila":1146,"ilb":76,"in ":2156,"ikv":103,"ikt":211,"iku":8444,"ikr":276,"iks":1858,"ilp":103,"ilo":1161,"ill":5963,"ilk":54,"iln":65,"ilm":2925,"ilh":117,"ilj":833,"ili":8126,"ild":409,"ile":1595,"ima":3230,"imb":81,"io ":525,"ils":210,"ilt":420,"ilu":306,"ilv":181,"hol":538,"hom":193,"hon":119,"hoi":327,"hos":103,"hot":60,"hou":76,"hov":70,"hoo":630,"hop":48,"hor":259,"hob":86,"hof":75,"hoe":47,"hod":56,"hni":355,"hno":248,"hnu":57,"hna":53,"hiü":45,"hme":82,"hmi":96,"øya":60,"huk":196,"hul":1525,"hua":81,"htu":834,"htt":114,"hts":526,"htr":54,"htp":46,"htn":93,"htm":114,"htl":468,"hti":872,"htk":120,"hte":1012,"hta":927,"hv ":86,"hst":88,"hu ":482,"hrm":49,"hro":62,"hre":114,"hri":254,"ht ":1267,"hra":59,"hvu":1043,"hwa":51,"hum":318,"hun":86,"hus":694,"hut":421,"hur":174,"huu":56,"huv":197,"hva":963,"hve":131,"hvi":258,"hvk":76,"fi ":130,"ffe":80,"ffi":97,"fes":255,"fer":227,"fen":116,"fek":256,"fel":93,"fia":527,"bän":220,"fga":88,"far":57,"fan":78,"fak":63,"aül":78,"fal":91,"fai":103,"aüh":102,"fac":50,"faa":159,"ff ":69,"fe ":58,"etš":80,"fa ":46,"etü":153,"etö":63,"etõ":90,"eys":46,"esü":78,"exa":132,"ez ":79,"esõ":110,"etä":142,"exi":65,"evõ":561,"evä":227,"ezi":53,"eta":4185,"ete":3476,"eti":2019,"eth":183,"epä":212,"etn":244,"etl":824,"etk":122,"esp":119,"esn":50,"eso":307,"est":15172,"esu":447,"esr":46,"ess":2291,"esw":67,"ev ":1881,"eud":58,"euk":45,"eum":209,"eto":650,"etr":1191,"ets":1555,"ett":1369,"etu":2430,"etv":60,"epõ":75,"ew ":294,"eve":552,"eva":4542,"evo":358,"evk":55,"evi":1332,"eut":162,"eur":217,"eus":372,"ex ":72,"evu":1013,"ey ":327,"erü":78,"epe":305,"epi":674,"elä":86,"eph":149,"er ":4533,"ekü":242,"epa":593,"eot":431,"eos":1290,"eor":477,"eom":115,"eol":658,"eok":113,"eop":87,"eoo":298,"eon":217,"emä":170,"es ":19178,"ept":722,"eps":46,"epu":101,"epl":50,"epp":174,"epo":236,"epr":147,"erk":288,"erl":431,"eri":8620,"erj":357,"erg":1464,"enä":125,"erh":103,"ere":3347,"erf":89,"erc":92,"erd":390,"era":2994,"erb":550,"et ":2344,"emõ":84,"esk":3814,"esl":149,"esm":728,"esh":61,"esi":3721,"esc":105,"ese":2495,"esa":695,"erz":44,"enõ":60,"ery":69,"erv":749,"eru":688,"err":901,"ert":1083,"ers":969,"ern":1144,"erm":1039,"erp":165,"ero":598,"eki":846,"ekk":441,"ekl":132,"eko":1441,"ekr":148,"eks":6278,"ekt":2822,"eku":1116,"ekv":84,"en ":1782,"elb":55,"ela":2671,"eld":1386,"elf":47,"ele":10866,"eli":5612,"elj":466,"elg":374,"elm":278,"eln":482,"elk":721,"ell":3694,"elo":690,"elp":87,"elu":1130,"elv":392,"els":698,"elt":4591,"eo ":141,"emb":1631,"ema":5853,"eme":1702,"emm":105,"emn":126,"emo":658,"emi":4057,"emj":89,"emk":49,"emu":471,"emp":531,"ems":73,"ep ":59,"enf":67,"ene":6080,"enh":138,"ejä":142,"eng":640,"enb":279,"ena":1669,"end":9301,"enc":218,"eno":318,"enn":1302,"enk":163,"enl":78,"eni":2414,"enj":57,"enu":916,"ens":876,"ent":3483,"enr":170,"enz":141,"ejõ":394,"eny":49,"eog":500,"eod":99,"eob":44,"egl":195,"ego":223,"egn":128,"ege":2106,"egi":861,"eha":1078,"egr":208,"egs":221,"egu":1523,"ehn":541,"ehm":64,"ehk":5017,"ehr":86,"eho":88,"ehe":590,"ehi":903,"ehh":383,"ek ":209,"eib":48,"eic":64,"eia":95,"eht":613,"eip":60,"eis":4050,"eir":116,"eim":514,"eil":387,"ein":1247,"eii":45,"eik":96,"eie":63,"eid":2255,"eig":71,"eja":948,"el ":10310,"eit":602,"eiu":148,"eiv":45,"ejo":57,"eje":77,"eke":406,"eka":1625,"em ":2077,"eju":213,"gjo":58,"öta":380,"ötl":187,"ötm":68,"gji":57,"gl ":95,"öst":232,"git":756,"gis":1905,"gir":78,"giv":225,"gil":1090,"gim":411,"gij":262,"gik":642,"gip":428,"gin":524,"gio":325,"gid":469,"gie":61,"gib":455,"gih":62,"gii":53,"gig":185,"örl":72,"gia":2005,"örs":50,"ght":152,"öra":67,"örd":139,"gha":121,"öri":170,"ös ":62,"gga":46,"gi ":5321,"öpm":55,"ör ":108,"gen":1194,"geo":730,"get":321,"geu":170,"ger":573,"ges":288,"gev":1367,"gh ":77,"gee":427,"ged":631,"geb":241,"gei":271,"gem":670,"gel":1762,"gek":61,"gej":286,"öni":115,"gda":70,"önd":159,"gde":51,"ömi":48,"ge ":1988,"gaz":58,"öli":74,"gab":187,"gad":306,"gai":51,"gaa":657,"gas":1340,"gar":467,"bür":50,"gat":540,"gav":357,"gak":106,"gaj":153,"gam":272,"gal":712,"gan":2019,"öko":300,"gap":72,"ga ":9681,"öid":47,"ögi":100,"öel":65,"fur":44,"fta":81,"fti":45,"fun":331,"ft ":139,"fra":194,"fre":107,"fri":412,"fro":53,"for":851,"fos":114,"fot":258,"fon":233,"foo":243,"fol":120,"fla":75,"fli":57,"fo ":79,"fid":70,"fic":72,"fie":44,"fii":98,"fil":1891,"fik":301,"fin":213,"fir":227,"fis":69,"fit":113,"fjo":141,"õla":152,"õle":240,"õlg":110,"õli":251,"õlk":134,"õll":298,"õlm":435,"õlt":294,"õlu":48,"õlv":413,"õju":446,"õkk":88,"õne":668,"õng":76,"õna":926,"õnd":49,"õnn":136,"õni":203,"õnk":86,"õnu":108,"da ":7682,"õmb":51,"õmm":206,"õpe":513,"õpi":263,"õpu":202,"de ":10486,"õpp":472,"dad":930,"daa":286,"dab":1585,"dak":424,"dal":2308,"dai":94,"daj":449,"dag":307,"dae":79,"dat":1521,"das":1349,"dar":435,"dap":141,"dao":265,"dan":602,"dam":1690,"dav":923,"õri":60,"õrj":81,"õrk":196,"dde":47,"õrg":1625,"õra":75,"õrb":100,"õrd":333,"õt ":51,"õrv":475,"õru":187,"õrr":267,"õrm":46,"õda":213,"õde":121,"cul":68,"ctu":66,"õe ":1023,"cto":118,"cti":143,"cy ":83,"õet":187,"ões":152,"õel":416,"õen":137,"õdu":141,"cus":131,"cur":49,"õhk":139,"õhe":49,"õhj":2576,"õhi":1112,"õge":171,"õgi":631,"õi ":5148,"õja":1187,"õiv":218,"õit":583,"õis":2033,"õim":1468,"õik":622,"õie":258,"õid":332,"õig":1596,"õib":513,"õhu":552,"cks":55,"cki":151,"ckh":122,"cla":52,"cle":64,"co ":192,"con":154,"col":137,"com":86,"cor":129,"cos":67,"cot":61,"cs ":57,"öd ":110,"ct ":57,"cra":52,"cro":105,"ödi":95,"öda":96,"öde":377,"õst":108,"cci":47,"õu ":118,"õsa":163,"õtu":46,"õtt":671,"cea":61,"õte":379,"õta":58,"õtm":273,"õtl":100,"õtj":101,"õus":427,"õut":47,"ch ":578,"ces":126,"õua":73,"cen":106,"õue":77,"õud":589,"õug":83,"cel":80,"õuk":759,"õul":55,"õun":1596,"ci ":105,"õva":82,"õve":54,"cha":386,"chw":86,"õtü":68,"chu":131,"cia":158,"ck ":487,"cie":98,"che":615,"chl":138,"chi":411,"cho":153,"chm":45,"chn":57,"chs":85,"cht":224,"chr":50,"cis":100,"cin":92,"cm ":52,"cke":185,"cka":54,"õzs":53,"ed ":5646,"eba":352,"ebe":121,"ebi":253,"ebl":44,"ebo":77,"ebr":684,"ebu":73,"eab":210,"eaa":424,"eag":132,"eae":122,"ead":2747,"eak":483,"eaj":63,"eai":99,"eah":87,"ean":733,"eao":48,"eal":1914,"eam":934,"ear":190,"eas":399,"eap":99,"eav":225,"eat":919,"eau":103,"eb ":2987,"ea ":655,"efi":173,"efo":256,"efa":50,"efe":291,"eff":54,"ei ":1558,"ega":4991,"eft":49,"eej":166,"eek":1260,"eeh":72,"een":1833,"eel":7467,"eem":2642,"eeb":812,"eea":50,"eeg":511,"eed":1212,"ees":3817,"eer":4899,"eep":225,"eev":114,"eet":2444,"edi":900,"ede":2096,"eda":2449,"eg ":436,"edu":707,"edo":157,"edr":231,"eck":136,"ech":210,"eci":98,"ee ":3170,"ef ":57,"ect":151,"eco":81,"dwi":78,"dvu":88,"dwa":83,"dy ":101,"dve":99,"duv":409,"duu":64,"dur":309,"dut":208,"dus":9107,"dva":107,"dor":276,"doo":134,"don":412,"dom":146,"dol":150,"dok":340,"dow":80,"dov":88,"dot":70,"dos":91,"dr ":97,"ds ":128,"dmi":448,"dmu":204,"dne":328,"dni":181,"dnu":82,"dsu":120,"dso":79,"dte":403,"dun":212,"dum":683,"dul":361,"duk":455,"õbe":86,"dub":375,"dua":181,"dud":1109,"dri":1541,"drh":50,"dra":532,"dt ":79,"dre":399,"du ":1711,"dro":385,"dru":114,"dsi":69,"dsa":56,"dse":425,"dha":80,"dge":90,"dgl":50,"dic":84,"did":1413,"dia":1187,"der":1254,"des":3846,"det":1487,"dev":307,"deb":74,"dea":150,"ded":103,"def":181,"dee":433,"deg":743,"dei":268,"del":2624,"dek":762,"den":1386,"dem":499,"dep":286,"deo":437,"di ":4266,"dle":97,"dla":684,"dko":283,"dki":67,"dme":789,"dma":248,"do ":334,"dlu":270,"dli":419,"dja":309,"div":260,"diu":154,"dim":305,"din":5058,"dio":737,"dip":162,"dir":107,"dis":3167,"dit":1143,"die":158,"dif":137,"dig":325,"dii":121,"dik":678,"dil":754,"dka":57,"dju":74,"eKr":290,"rgu":1208,"rhe":141,"näd":70,"rha":144,"näg":90,"näh":356,"näi":1177,"rhi":386,"när":92,"rho":51,"näo":109,"rga":2008,"ri ":5455,"rgk":63,"rgl":133,"rgi":2791,"rgh":68,"rge":1513,"rgs":57,"rgr":60,"rgo":122,"rgm":113,"rgn":141,"ret":637,"res":3334,"rev":282,"reu":99,"müü":230,"rfa":51,"rfe":44,"rfi":65,"rfo":51,"rdu":636,"rds":305,"rdr":70,"rg ":613,"reb":76,"rea":973,"ree":2061,"ref":247,"rec":94,"red":566,"rei":666,"reg":889,"rem":1488,"ren":1721,"rek":855,"rel":1476,"rer":134,"reo":148,"rep":269,"rda":591,"rcu":58,"rdo":137,"rdn":203,"rdk":56,"rdm":90,"rdl":149,"rdi":1642,"rde":1014,"re ":3338,"rbu":230,"rbr":301,"rch":187,"rce":98,"rd ":981,"rao":70,"rap":222,"mür":81,"rar":113,"ras":2388,"müt":281,"rat":2331,"rau":610,"rav":669,"rbi":388,"rbl":48,"rbo":167,"rba":241,"rbe":348,"raj":969,"rai":575,"rah":2331,"rag":255,"ran":3582,"mün":66,"ram":1127,"ral":1912,"rak":1264,"rab":296,"raa":4053,"raf":126,"rae":588,"rad":970,"rac":160,"rpu":103,"rpr":48,"rpo":141,"rs ":316,"rpe":48,"rpa":108,"rr ":55,"rpi":60,"ror":118,"ros":627,"rot":969,"rom":675,"ron":932,"roo":2596,"rop":430,"rou":140,"rov":1089,"row":56,"rob":239,"roa":117,"rod":447,"roc":250,"roj":238,"roi":109,"rol":630,"rok":255,"rof":315,"roe":51,"roh":391,"rog":576,"rno":131,"rns":89,"rnu":569,"rna":702,"riü":55,"rež":141,"rne":1143,"rni":528,"rjä":49,"rmo":236,"rms":58,"rmu":175,"ro ":398,"rma":1727,"rme":627,"rmi":820,"rlo":160,"nää":74,"rli":332,"rld":67,"rle":265,"rla":365,"rn ":318,"rgõ":53,"rkv":211,"rku":265,"rkt":136,"rks":128,"rkm":44,"rko":1059,"rki":431,"rkk":66,"rke":333,"rka":216,"rm ":305,"rju":905,"rjo":50,"rja":2847,"rje":754,"riz":44,"rl ":207,"rip":347,"rio":550,"rir":293,"rit":4861,"ris":3546,"riv":668,"riu":963,"rih":61,"rig":747,"rij":404,"rii":5626,"ril":3640,"rik":3181,"rin":3123,"rim":2490,"ria":2118,"rib":484,"ric":609,"rid":1612,"rie":633,"rif":87,"rk ":868,"rtü":56,"rug":66,"rue":78,"rud":172,"ruc":52,"rup":396,"run":460,"rum":712,"rul":170,"ruk":513,"ruu":777,"ruv":117,"rus":1487,"rut":261,"rva":1548,"rvi":795,"rve":1159,"rvp":144,"rvl":46,"rvu":1089,"rwa":50,"ry ":413,"rsk":243,"rsi":519,"rso":171,"rsc":53,"rsa":171,"rsh":62,"rse":561,"rta":274,"rv ":755,"rst":515,"rss":168,"rsu":190,"rtl":107,"rtm":58,"rtn":92,"rto":217,"rte":1543,"rth":281,"rti":1255,"rub":87,"rua":490,"rts":1053,"rtr":65,"rtu":1315,"rtt":57,"rt ":810,"rro":167,"rri":784,"rre":476,"rra":2127,"ru ":1080,"rry":114,"rru":164,"saa":4775,"sab":140,"sad":632,"sae":148,"sag":380,"sah":226,"sai":500,"saj":656,"sak":2002,"sal":1925,"sam":2292,"sbe":92,"sap":131,"óni":51,"san":1221,"sau":105,"sat":1244,"sas":2183,"sar":1496,"sav":213,"sa ":3636,"ón ":53,"rze":44,"nõu":458,"nõr":64,"nõl":71,"nõi":66,"rvü":53,"sha":261,"sho":138,"she":80,"shi":370,"si ":5360,"sgr":60,"sja":511,"siv":324,"sju":179,"sjo":145,"sfä":198,"sie":142,"sid":2303,"sic":71,"sib":74,"sia":2206,"sk ":538,"shu":57,"sit":1318,"siu":225,"sir":212,"sis":2918,"sip":251,"sin":1981,"sio":5046,"sil":1686,"sim":1606,"sij":144,"sik":2762,"sih":204,"sii":1362,"sif":138,"sig":349,"sda":47,"sdo":45,"sbo":51,"sbu":115,"se ":21232,"sca":191,"sci":49,"sch":453,"sco":130,"sev":717,"sey":55,"ser":1068,"ses":5796,"set":1978,"seu":254,"nüü":243,"sh ":174,"sfi":86,"sfo":68,"sea":1804,"sei":2222,"seh":77,"seg":1695,"see":2074,"sed":3928,"seb":254,"sep":903,"seo":645,"sen":2101,"sem":2170,"sel":10065,"sek":3272,"sej":332,"spu":121,"spo":772,"spr":203,"spe":493,"spi":353,"spa":687,"skü":66,"sot":518,"sou":51,"sov":164,"skõ":91,"sol":473,"som":95,"son":907,"soo":2060,"sop":124,"sor":585,"sos":135,"sog":196,"sof":91,"skä":80,"soa":50,"soc":67,"sob":104,"su ":776,"sri":388,"sre":70,"sra":250,"st ":19438,"smõ":57,"smä":672,"ss ":961,"sli":1453,"slo":247,"slu":141,"sla":864,"sle":179,"ski":1330,"skk":688,"skj":46,"skm":251,"skl":222,"sko":1611,"skn":98,"skp":86,"sks":70,"skr":240,"sku":1933,"skv":335,"sm ":509,"ska":1203,"ske":1189,"sno":114,"sjõ":49,"snu":95,"sna":125,"sni":235,"sjä":141,"sne":888,"smo":245,"smu":122,"so ":126,"sma":1250,"smi":1155,"sme":256,"swi":78,"ssü":155,"ssö":85,"ssõ":135,"stõ":73,"stö":240,"svä":103,"svõ":164,"svö":80,"sse":2400,"ssa":886,"sso":712,"ssk":48,"ssi":2279,"ssu":260,"sst":65,"ste":12441,"stf":123,"spä":99,"sta":12430,"stm":389,"stn":305,"sto":736,"sti":11558,"stj":94,"stk":190,"stl":1467,"stv":203,"stu":4098,"str":2584,"sts":374,"sua":230,"sud":110,"sub":2124,"suh":654,"sug":926,"sul":1089,"sum":385,"suk":274,"sup":112,"sun":473,"suu":3246,"sut":3768,"sus":4456,"sur":769,"suv":746,"sva":1352,"sve":77,"svi":156,"svo":72,"svu":249,"sy ":64,"tai":1177,"taj":1586,"tak":4223,"tal":2877,"tae":166,"taf":74,"tag":732,"tah":420,"taa":2158,"tab":1730,"tac":49,"tad":1273,"tba":65,"tav":3721,"tau":131,"tat":7322,"tas":2453,"tar":1636,"tap":159,"tao":157,"tan":2647,"tam":2975,"tch":74,"te ":12807,"ta ":4265,"сто":56,"стр":66,"ств":57,"pa ":804,"ста":51,"pe ":459,"par":1885,"küt":108,"pat":370,"küs":137,"pas":347,"pau":44,"pad":102,"paa":872,"pab":98,"pag":125,"pae":47,"pak":331,"kül":2089,"pal":1654,"pai":924,"paj":61,"pap":44,"küm":165,"pan":1223,"kün":49,"phe":92,"läb":569,"pha":122,"lät":92,"pho":86,"phi":121,"läh":1032,"pi ":489,"küü":67,"ph ":74,"lä ":65,"pea":2994,"peb":54,"pec":50,"ped":165,"pen":309,"per":2019,"pet":866,"pes":209,"pee":735,"pej":56,"pei":56,"pel":221,"pek":193,"pla":985,"pli":241,"lää":1062,"ple":335,"plo":204,"pko":106,"тан":48,"phy":67,"pia":480,"pid":668,"pie":55,"pig":83,"pii":2460,"pik":984,"pil":982,"pim":106,"pin":1660,"pio":63,"pir":110,"pis":249,"pit":216,"por":922,"pop":279,"poo":3433,"pot":178,"pos":489,"poi":69,"poj":56,"pom":75,"pon":250,"pok":78,"pol":2011,"poe":314,"ps ":115,"ppu":86,"ppi":265,"ppl":73,"ppo":76,"ppa":134,"ppe":738,"тер":49,"pme":107,"po ":62,"pni":140,"pne":50,"pp ":583,"psu":187,"pta":45,"pse":289,"psi":131,"pso":44,"ptu":183,"pub":112,"pud":66,"pte":686,"pti":246,"pto":149,"pts":116,"pra":973,"pru":93,"psa":102,"pu ":143,"pri":927,"pre":785,"pro":2923,"psü":231,"lõh":137,"pur":257,"pus":270,"put":156,"pun":658,"pui":197,"pul":391,"puh":552,"px ":81,"тов":47,"puu":696,"тор":53,"lõp":469,"lõi":316,"lõu":988,"тро":97,"löö":149,"lüm":359,"lül":150,"lük":99,"lüh":844,"lüü":275,"mäe":703,"mäg":380,"män":1220,"mäl":265,"mär":1565,"mää":557,"qua":76,"que":158,"qui":88,"mõe":265,"mõi":1422,"mõj":443,"mõl":89,"mõn":454,"mõr":58,"mõt":400,"mõõ":559,"ra ":2421,"möö":180,"rb ":55,"ngo":283,"ngj":112,"eži":152,"ngi":2407,"ngl":2197,"ngk":485,"ngv":53,"ngu":1971,"ngr":466,"ngt":111,"ngs":158,"ni ":8348,"nge":1413,"ngh":131,"nga":1383,"ngd":44,"nho":71,"jäl":102,"jät":125,"jär":2920,"nha":193,"jäi":81,"nhe":51,"neg":216,"nei":559,"nel":854,"nek":392,"nen":1253,"nem":2691,"nep":231,"neo":254,"ner":1383,"net":1372,"nes":1547,"nev":1928,"neu":153,"ndv":49,"ng ":5293,"nea":209,"neb":1735,"ned":454,"nee":1283,"nfi":63,"nfo":399,"nfl":72,"ney":104,"nez":55,"nfe":71,"nco":82,"nci":160,"nce":327,"nch":174,"ne ":18973,"nbu":170,"ndt":47,"ndu":3781,"ndr":1767,"nds":245,"ndn":73,"ndo":567,"ndl":703,"ndm":736,"ndj":199,"ndk":76,"ndi":9574,"nde":2285,"nda":5981,"ncy":54,"nak":847,"nal":1444,"iül":54,"nam":1141,"nan":791,"nao":524,"nap":666,"nar":696,"nac":71,"nad":869,"nae":164,"naf":95,"nag":661,"nah":165,"iüh":71,"nai":421,"naj":121,"nab":249,"naa":1844,"nbe":157,"nd ":4383,"nba":54,"nav":684,"nau":173,"nat":1014,"nas":5608,"na ":9461,"iõp":167,"가 ":51,"nya":69,"jõe":1151,"nyi":68,"jõg":658,"nz ":90,"nsü":49,"ny ":159,"nux":47,"nve":213,"nuk":391,"nul":392,"num":581,"nun":66,"nui":73,"nuj":150,"nus":1407,"nut":211,"nuv":56,"nur":279,"nua":567,"nud":6307,"nto":594,"ntn":44,"ntu":1152,"nts":3190,"ntr":629,"nti":1551,"nth":107,"ntl":53,"nta":1333,"nte":1954,"nsu":194,"nsp":183,"nso":192,"nst":1654,"nss":87,"nse":418,"nsh":72,"nsi":398,"nsk":423,"nsa":519,"nsb":47,"nu ":581,"nri":214,"nra":73,"nt ":1663,"npr":44,"ns ":389,"nod":52,"nob":45,"nog":106,"nof":50,"nok":61,"nol":406,"noi":73,"noo":957,"nop":56,"nom":241,"non":176,"not":137,"nos":276,"nor":508,"nov":777,"nr ":71,"nne":1409,"nna":9091,"nnm":60,"nno":142,"nni":2211,"nnu":1256,"nns":51,"nme":49,"nma":125,"ndž":60,"nli":99,"jää":730,"nn ":2722,"nla":220,"no ":545,"nke":134,"nkl":53,"nki":267,"nka":177,"nku":165,"nko":182,"nks":69,"nkt":721,"nkr":123,"nji":51,"nja":120,"njo":51,"nij":103,"nii":935,"nih":114,"nig":327,"nif":70,"nie":199,"nid":1206,"nic":153,"nib":61,"nia":1970,"nk ":386,"nix":49,"niu":138,"niv":309,"nis":5167,"nit":1025,"nir":49,"nio":206,"nip":222,"nim":5797,"nin":5067,"nik":3655,"nil":1691,"ogs":64,"ogr":1310,"ogu":2080,"ogt":71,"ogi":2325,"ogl":51,"ogo":115,"ogn":110,"oga":201,"oge":329,"oi ":80,"oht":1247,"kät":121,"käs":428,"kär":67,"ohv":181,"ohu":379,"ohk":157,"ohj":63,"käi":618,"ohi":105,"oho":123,"ohn":161,"oha":1453,"käe":74,"ohe":234,"ois":197,"oir":72,"oiu":71,"oit":264,"oin":102,"oik":52,"oim":998,"oil":59,"oid":535,"ok ":100,"oia":64,"oju":137,"oje":372,"oja":659,"ol ":1399,"oce":44,"och":80,"oci":105,"ock":501,"oco":61,"obs":62,"obu":202,"oe ":109,"ode":1123,"odk":64,"odi":1047,"odo":182,"odn":64,"ods":77,"odr":58,"of ":621,"oda":609,"oel":103,"oeg":287,"oer":126,"oes":47,"oet":688,"oen":399,"odu":2480,"oee":44,"og ":463,"ofi":557,"oft":95,"ofo":118,"off":59,"ofe":202,"oa ":93,"ob ":139,"oan":65,"oam":51,"oal":156,"oak":80,"oaj":111,"oba":165,"od ":481,"oar":70,"oas":64,"obo":116,"obr":390,"obl":782,"obj":309,"obi":400,"obe":361,"jõk":47,"jõu":952,"nza":48,"nze":88,"nzi":54,"kõi":997,"otü":52,"oya":45,"osü":192,"ows":73,"own":56,"kõr":1655,"kõv":81,"oyl":61,"kõl":116,"kõn":322,"otu":561,"ow ":114,"otl":94,"otk":48,"otj":93,"oti":581,"oth":115,"ote":630,"ott":288,"ots":2879,"otr":134,"oto":793,"otm":209,"ost":1657,"osu":161,"ota":435,"otb":45,"ov ":391,"osi":662,"osh":53,"osk":556,"ose":1045,"osf":200,"osp":69,"oss":687,"osm":201,"osl":192,"oso":739,"osn":672,"oy ":48,"owa":44,"owe":70,"ovi":1506,"ovg":80,"ovn":57,"ovo":225,"ovu":110,"ovs":142,"ox ":49,"ova":496,"ove":767,"oug":79,"oui":114,"oul":81,"oun":169,"oup":53,"ous":121,"our":234,"out":116,"opo":355,"opp":88,"opi":377,"opk":103,"opl":127,"ope":659,"oph":152,"opa":1002,"os ":1676,"olü":473,"opu":229,"opr":90,"opt":153,"ops":80,"oon":7646,"ool":6457,"oom":3730,"ooj":448,"ook":1060,"oof":591,"oog":2684,"ood":3903,"oob":808,"ooa":118,"or ":1504,"oov":321,"oot":2071,"oos":2409,"oor":2416,"oop":1217,"ork":293,"orl":106,"orm":1403,"orn":581,"oro":441,"orp":251,"orr":1904,"orc":46,"ord":2640,"ore":923,"orf":173,"org":2150,"ori":3030,"orj":99,"ou ":125,"osa":4585,"onü":106,"ort":1076,"ors":316,"orv":260,"oru":308,"ory":79,"omä":44,"ot ":233,"m² ":198,"omö":62,"orb":147,"ora":643,"ola":1318,"old":420,"on ":39161,"olj":64,"oli":8483,"oll":1578,"olk":349,"olf":126,"ole":2988,"kää":76,"olg":132,"ols":1007,"olt":1144,"olm":1122,"oln":368,"olo":2600,"olu":1563,"olv":65,"ofü":93,"oka":297,"om ":370,"okk":531,"oki":231,"oke":623,"okr":399,"oks":957,"oko":247,"okl":55,"okt":664,"oku":340,"ona":1303,"ond":3491,"onf":139,"one":1232,"ong":640,"oni":6425,"onl":72,"onk":273,"onn":4600,"ono":803,"onp":75,"onr":44,"ons":816,"ont":1242,"onu":172,"onv":154,"ony":92,"oma":3656,"oo ":462,"ome":2088,"omb":266,"omi":1618,"omm":613,"omp":670,"omn":106,"omo":342,"omt":108,"omu":606,"oms":246,"op ":270,"la ":5991,"le ":12620,"lf ":96,"ldg":52,"lde":484,"lda":2949,"ldo":95,"ldn":65,"ldm":61,"ldk":243,"ldj":62,"ldi":712,"ldu":1916,"ldt":53,"lds":102,"ldr":58,"laa":1373,"lab":404,"lac":145,"lad":1522,"lae":732,"lah":1640,"lag":388,"laj":204,"lai":1062,"lal":666,"lak":450,"lan":3578,"güm":58,"lam":1233,"lap":307,"lao":158,"lar":440,"kyō":50,"lat":2238,"las":6761,"lau":1302,"lav":840,"lay":76,"lba":164,"ld ":1107,"lbe":196,"lbi":58,"lbo":45,"lbu":264,"lbr":50,"kvi":101,"kve":117,"kva":933,"kuv":271,"kuu":2381,"kut":1032,"kus":4511,"kur":510,"kup":256,"kun":2636,"kum":1199,"kul":2990,"kuk":399,"kuj":1401,"ky ":44,"kta":184,"kte":370,"ksp":172,"ksu":1368,"kst":539,"ksk":77,"ksj":60,"ksi":1286,"ksh":44,"kso":186,"ksn":51,"ksm":52,"ksl":97,"kub":299,"kud":842,"kug":145,"kuh":220,"kui":2011,"kua":93,"ktr":1077,"kts":955,"ktu":547,"kti":1734,"kto":1139,"gõz":53,"ksü":131,"kyl":45,"lpo":71,"lpe":157,"lpi":167,"lph":47,"ls ":120,"llü":71,"lpt":60,"lol":75,"lok":121,"lon":328,"lom":526,"lop":192,"loo":6341,"lor":264,"lod":57,"loc":108,"loe":826,"loh":62,"log":273,"loi":81,"los":787,"lot":130,"lov":353,"lkõ":156,"lni":82,"lne":1147,"lob":91,"lnu":266,"lmn":103,"lmi":1561,"lme":771,"lma":2491,"liõ":138,"lmu":534,"lms":87,"lti":513,"lto":103,"ltr":71,"lts":498,"ltu":933,"lud":201,"lub":1064,"lua":79,"lug":367,"lue":62,"lsi":290,"lsk":106,"lso":81,"lss":61,"lst":139,"lsu":210,"lv ":77,"lta":252,"lte":588,"lri":102,"lu ":1464,"lse":1684,"lsa":658,"ía ":44,"lt ":7949,"lra":58,"lhu":155,"häv":119,"lho":73,"här":83,"lhe":124,"häi":143,"lha":82,"lgu":794,"lgs":300,"lgn":56,"lgl":54,"lgr":91,"lgo":86,"lgp":735,"lge":1193,"lgi":247,"li ":8571,"lga":1157,"lfr":46,"lfi":78,"lfa":51,"ley":127,"lex":145,"leu":72,"lev":2102,"les":6911,"let":1229,"ler":467,"leo":191,"lep":522,"lem":2382,"len":1713,"lek":2924,"lel":939,"lei":662,"lej":650,"leh":741,"leg":615,"lef":123,"lee":784,"led":276,"lec":56,"leb":814,"lea":143,"lg ":112,"llp":58,"lls":97,"llu":829,"lly":101,"lo ":301,"lla":3994,"lle":7577,"hää":354,"lli":4723,"llk":46,"llo":330,"lko":770,"lku":106,"lka":667,"lke":205,"lki":380,"lkl":73,"lju":697,"ljo":186,"lm ":614,"lje":847,"ll ":1528,"lja":2809,"lit":3006,"lis":12972,"lir":127,"lip":448,"lio":191,"lin":10479,"lim":654,"liz":82,"liv":165,"liu":274,"lic":156,"lid":1460,"lia":1378,"lib":211,"lk ":259,"lik":8420,"lil":1193,"lii":5503,"lij":133,"lig":496,"lih":443,"lie":227,"lif":188,"hõõ":63,"ma ":4860,"mb ":129,"maa":11983,"mac":100,"mab":268,"mah":237,"чес":55,"mai":1139,"maj":980,"mak":919,"hüd":167,"mad":1582,"mae":114,"maf":51,"mag":417,"hüp":189,"map":146,"mar":885,"mas":3246,"mal":2280,"hüm":55,"mam":324,"man":2399,"mav":524,"mat":3086,"mba":251,"mbl":329,"mbi":269,"mbe":2044,"mbr":1530,"mbo":263,"me ":2925,"mbu":182,"med":1125,"mee":3676,"meg":594,"mea":100,"met":4426,"mev":67,"mep":72,"mes":2849,"mer":2169,"mem":157,"mel":1042,"men":2636,"mei":782,"meh":405,"mek":592,"hüü":97,"lva":713,"lve":678,"lvi":208,"lul":347,"luk":457,"lup":150,"luo":49,"lun":505,"lum":505,"lut":683,"lus":3490,"lur":311,"luv":1330,"luu":781,"ly ":163,"lrü":75,"lvk":62,"hõb":45,"hõi":206,"hõl":310,"mpi":452,"mph":51,"mpe":554,"mpr":66,"mpo":299,"mpl":310,"mpu":89,"mps":61,"ms ":217,"mog":59,"mob":84,"moe":58,"mod":191,"moo":1204,"mon":658,"mok":297,"mom":61,"mol":409,"mov":44,"mor":367,"mos":365,"mot":173,"mpa":252,"msa":75,"mu ":287,"mse":406,"mtu":96,"mud":268,"mub":235,"mst":77,"msu":90,"mso":47,"msi":100,"mte":76,"my ":109,"mur":342,"mus":2450,"mut":357,"muu":2318,"muv":182,"mui":212,"muj":55,"muk":66,"mul":772,"mum":89,"mun":762,"džu":46,"dža":146,"mi ":3313,"dži":112,"mjo":107,"min":5859,"mio":159,"mil":5861,"mim":121,"mir":250,"mis":17764,"mip":65,"miv":72,"mit":2152,"mic":74,"mia":602,"mig":164,"mie":74,"mid":2544,"mik":1090,"mii":356,"mo ":151,"mli":87,"mle":44,"mla":87,"mn ":55,"mko":173,"mka":66,"mm ":329,"mnu":50,"mni":67,"mna":105,"mne":450,"mmy":82,"mp ":57,"mmu":480,"mmi":519,"mmo":72,"mma":633,"mme":654,"tš ":386,"tše":222,"tši":133,"tša":111,"zst":65,"võt":836,"võs":96,"võr":892,"võn":91,"võl":153,"või":7974,"võe":178,"vöö":291,"võõ":64,"vür":134,"zi ":155,"väl":2397,"väh":435,"väi":1090,"väg":389,"väe":751,"zen":74,"zer":96,"ze ":109,"zab":50,"uüh":53,"zan":66,"zar":62,"zon":61,"zo ":48,"vää":478,"vär":503,"väs":44,"zia":44,"zin":86,"yst":121,"ysi":65,"ys ":119,"ylä":47,"za ":111,"tüü":554,"yer":56,"ya ":162,"töö":1589,"tüt":130,"tür":79,"yan":90,"tük":102,"tüh":106,"yn ":51,"yle":91,"ylo":45,"yne":47,"yin":76,"tõe":238,"tõm":70,"tõl":200,"tõk":57,"tõu":195,"tõs":109,"tõt":177,"tõr":100,"tän":706,"täh":1822,"täi":727,"xi ":91,"süü":243,"xim":52,"tär":60,"täp":255,"söö":193,"süs":1324,"süt":71,"sük":585,"sül":70,"süm":231,"sün":4272,"xan":102,"süd":144,"süh":336,"süg":181,"sõl":377,"sõj":1120,"sõp":113,"sõn":939,"sõd":233,"sõi":351,"sää":83,"ws ":61,"wor":58,"rüü":199,"wer":86,"wen":48,"säi":178,"wit":44,"wig":128,"wic":50,"win":68,"rõh":178,"röö":217,"wa ":114,"wan":110,"rün":141,"rüo":64,"wal":97,"rük":165,"way":55,"rüt":71,"war":196,"rüh":567,"vri":47,"vsu":116,"vst":161,"vse":403,"vsk":184,"vu ":181,"vut":793,"vus":2887,"vud":63,"vum":73,"vuk":112,"vul":243,"vy ":55,"vib":72,"via":291,"vio":50,"vip":54,"vir":136,"vik":721,"vil":657,"vim":530,"vin":1205,"vig":116,"vih":89,"vii":1360,"vic":51,"vid":522,"vie":84,"viv":99,"vit":1084,"vis":990,"vka":64,"vko":169,"vkj":47,"vla":98,"rää":402,"vli":75,"vo ":249,"vms":66,"vne":367,"vna":66,"voj":65,"vol":333,"vok":90,"von":554,"voo":711,"vor":540,"vos":69,"vpa":151,"rän":173,"vi ":1273,"vgo":75,"veo":115,"ver":1595,"ves":902,"vet":333,"vei":261,"veg":47,"ven":2478,"vem":547,"vel":383,"vek":127,"vea":44,"vee":1876,"ved":504,"ve ":854,"val":8945,"vak":669,"van":2026,"vam":407,"vap":226,"var":1747,"vat":2727,"vas":3460,"vav":166,"vaa":1097,"vab":718,"vae":151,"vad":3860,"vai":1190,"vaj":399,"vag":98,"vah":2967,"va ":3296,"uvõ":268,"uvä":105,"usõ":228,"usü":545,"uuk":89,"uun":1174,"uul":3063,"uum":754,"uub":214,"uua":104,"uug":75,"uud":1301,"uue":262,"ux ":63,"uus":2149,"uur":5973,"uup":59,"uuv":44,"uut":1103,"uvi":326,"uvo":58,"uva":2014,"uve":246,"uvu":186,"usl":1435,"usm":668,"usj":225,"usk":1361,"ush":270,"usi":2532,"usf":104,"usg":54,"usd":49,"use":12729,"usc":44,"usa":1306,"uu ":547,"uv ":1217,"usv":989,"usu":604,"ust":10829,"uss":984,"usr":193,"usp":536,"uso":240,"usn":79,"utk":47,"utl":151,"utm":97,"utn":50,"uth":113,"upä":61,"uti":1297,"ute":1328,"uta":4946,"utz":51,"upõ":82,"utt":121,"uts":1366,"utv":93,"utu":2313,"uto":959,"utr":121,"us ":14520,"umä":125,"ut ":606,"urb":400,"ura":699,"urd":430,"ure":1850,"urg":757,"urj":163,"uri":3709,"url":67,"urk":267,"urm":383,"urn":372,"uro":1163,"urr":134,"urs":299,"urt":336,"uru":948,"urv":239,"uol":59,"uot":45,"uor":83,"uos":126,"ukü":84,"upa":311,"ur ":1500,"upi":356,"upe":239,"upo":146,"upp":283,"upr":65,"upl":50,"upu":118,"ump":59,"umu":239,"umi":3810,"umm":293,"umo":96,"uma":2384,"umb":1389,"ume":800,"unt":871,"uns":729,"unu":969,"unk":808,"uni":2914,"uno":48,"unn":618,"unc":54,"und":1856,"una":2903,"ung":491,"ujä":52,"une":1105,"up ":125,"uks":640,"ukr":117,"uku":265,"ukt":617,"uko":1820,"ukk":131,"ukl":81,"uki":384,"uke":268,"um ":1368,"uka":800,"uju":1494,"ulu":2649,"ult":2105,"uls":130,"ulp":70,"ulo":139,"uln":45,"ulm":184,"ull":465,"ulk":645,"ulj":361,"uli":2783,"ulg":972,"ule":2072,"uld":337,"ula":1652,"un ":198,"uid":849,"uie":51,"uig":91,"uil":85,"uim":112,"uin":284,"uis":365,"uht":1356,"uhu":769,"uk ":199,"uiv":119,"uit":151,"ul ":2180,"uja":426,"ugh":62,"ugi":162,"uge":590,"ugo":56,"ugl":46,"ui ":1419,"uga":910,"uhi":560,"uhe":222,"uho":49,"uhk":168,"ugu":2195,"ugr":50,"uha":362,"uct":46,"uda":566,"ude":2760,"udi":687,"udm":209,"ubu":163,"uca":46,"ue ":313,"uci":50,"uch":100,"uck":66,"uet":68,"uer":113,"ues":193,"püü":159,"ufo":48,"ufi":80,"udu":959,"uds":80,"udt":385,"udo":126,"ug ":76,"udw":51,"uee":109,"uen":148,"uel":155,"pöö":270,"ub ":3808,"põõ":56,"ua ":229,"uat":112,"uas":207,"püs":236,"uar":1172,"pür":52,"uam":44,"ual":222,"uan":196,"ubi":508,"ubj":82,"ubl":189,"ube":338,"uba":529,"ud ":15469,"uak":53,"püh":249,"uai":64,"uad":62,"uaa":347,"tvõ":44,"tvä":73,"tzi":57,"tze":47,"põh":2641,"põi":53,"põl":539,"põr":58,"ty ":254,"trü":150,"tvu":92,"tvo":55,"tve":93,"tva":221,"tur":775,"tus":7900,"tut":545,"tuu":2287,"tuv":618,"tuj":83,"tui":121,"tul":1640,"tuk":346,"tun":1878,"tum":1084,"tup":139,"tub":376,"tua":183,"tud":7345,"tuh":72,"tug":512,"tsü":333,"tz ":121,"tsõ":45,"ts ":1169,"tre":529,"tt ":402,"tra":1815,"tri":3128,"tru":752,"tro":1685,"tu ":1887,"tsa":1214,"tse":5653,"tsc":70,"tsi":9068,"tsj":165,"tsm":159,"tsk":220,"tsl":83,"tso":358,"tsu":2329,"tst":212,"tta":440,"tte":2371,"tti":343,"ttl":46,"tto":143,"ttp":55,"tts":70,"ttu":276,"tme":1276,"tma":236,"to ":641,"tmu":55,"tmo":97,"tmi":756,"tni":437,"tne":300,"tp ":49,"tna":202,"tnu":77,"tno":51,"tof":48,"toe":213,"tod":315,"toc":161,"toi":1340,"toh":46,"tog":150,"toa":50,"tov":120,"tos":269,"tot":150,"tom":456,"ton":1159,"tok":224,"tol":1335,"tor":2218,"too":2528,"top":205,"tkü":52,"tij":48,"tii":1991,"til":2148,"tik":5067,"tif":152,"tie":142,"tih":290,"tig":255,"tir":123,"tit":780,"tis":3212,"tin":1198,"tim":859,"tip":427,"tio":925,"thu":237,"tia":704,"tic":223,"tid":1567,"tiu":100,"tiv":496,"tja":582,"tki":109,"tko":214,"tku":135,"tka":199,"tke":169,"tli":1813,"pää":190,"tlu":1138,"tla":898,"tle":1553,"tem":2042,"ten":994,"teo":974,"tep":235,"tei":2648,"tej":88,"tek":2590,"tel":4094,"tee":4577,"tef":53,"teg":3418,"teh":952,"tea":2198,"tec":57,"ted":509,"tfo":51,"tfa":78,"th ":373,"tev":754,"tet":709,"tes":4048,"ter":4969,"ti ":9417,"tho":135,"thr":60,"pär":1925,"päe":892,"the":496,"thi":183,"päi":92,"tha":245,"ān ":65,"üüp":380,"üür":188,"üüs":557,"üüt":421,"üüa":70,"üüb":92,"üüd":395,"üüg":68,"üüh":62,"üüm":176,"üül":234,"üün":56,"並 ":57,"žik":69,"žis":93,"三 ":165,"žii":61,"žan":88,"丁 ":49,"žaa":84,"ži ":55,"üve":65,"ürs":166,"ürt":60,"üro":72,"ürk":57,"ürg":241,"üri":378,"üre":55,"üra":58,"üs ":51,"ütt":66,"ütu":70,"üti":151,"ütl":183,"ütm":69,"üto":339,"üta":158,"üte":90,"üss":79,"üst":1343,"üsi":1015,"之 ":88},"n_words":[4341644,4941492,4175920],"name":"et"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/fa b/nlp_resource_data/langdetect/profiles/fa

new file mode 100755 (executable)

index 0000000..87dddc1
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/fa
@@ -0,0 +1 @@
+{"freq":{"٬":970,"پ":57795,"و":425936,"ي":735907,"ً":1698,"َ":4564,"ُ":2202,"ف":144242,"ق":86257,"ك":2176,"ل":186547,"م":383367,"ن":505254,"ه":501741,"ّ":1083,"ِ":3108,"ٔ":10105,"خ":65646,"د":475980,"ج":87595,"ح":43181,"ت":373990,"ث":6476,"ب":289206,"ة":899,"ئ":9436,"ا":1111531,"آ":65724,"أ":1456,"ء":1000,"غ":15714,"ع":91539,"ظ":7379,"ط":45348,"ض":9683,"ص":43813,"ش":310066,"س":370480,"ز":155766,"ر":687985,"ذ":11817,"،":43281,"؛":1040,"۲":32624,"۳":25683,"۰":28783,"۱":63091,"۶":17216,"۷":19045,"۴":17256,"۵":18080,"۸":24510,"۹":42744,"گ":80261,"ک":285107,"چ":25230,"ژ":11180," ،":7698," گ":30777," ک":148021," ۸":2786," ۹":2591," ۱":47632," ۲":17985," ۳":5368," ۴":3375," ۵":3167," ۶":2742," ۷":2925," ن":61607," ه":83469," ل":8629," م":155700," ق":22376," ك":1185," ف":28537," ي":42307," و":134423," ص":10068," ش":162851," ط":22340," ض":1200," ر":51088," ذ":1128," س":118308," ز":19079," ع":23850," غ":6362," ا":284499," آ":62929," ج":29359," ح":15097," خ":24724," د":158860," ب":152331," ت":57718," ژ":5441," چ":17369," پ":40145,"کا ":2349,"کت ":2257,"ژوئ":1909,"ژي ":877,"ژه ":1588,"ژان":2068,"گيت":1091,"گير":2604,"کلا":1036,"کلي":1793,"کوچ":1236,"کيل":3244,"کمي":2269,"کنا":990,"کند":3501,"کنو":1241,"کنن":2076,"کوم":883,"کوه":1843,"کرا":939,"کشف":46287,"کشو":4351,"کست":856,"کزي":3453,"کرد":5509,"کار":7408,"کات":897,"کان":3075,"کاي":1649,"کام":1480,"کال":1004,"کبي":947,"کتر":1843,"کتا":2868,"کتب":2985,"کم ":997,"کل ":1774,"که ":49534,"کن ":1159,"کي ":15027,"کز ":2384,"کس ":931,"گور":1227,"گون":2525,"گوي":1532,"گلي":2047,"گفت":2210,"گست":862,"گري":2038,"گرو":2216,"گزا":1135,"گرد":3447,"گرا":2354,"گرف":2543,"گذش":1619,"گذا":1619,"گار":1384,"گاه":8669,"گان":3798,"گي ":4154,"گر ":3440,"ا، ":2188,"پان":3222,"پاد":1299,"پار":1859,"۹۹ ":1855,"۹۸ ":1731,"۹۷ ":955,"۹۶ ":844,"۹۲ ":854,"۹۱ ":894,"ئي ":825,"ات ":10939,"اح ":1044,"اج ":937,"۹۰ ":940,"پرا":1102,"پرد":1609,"پرو":1607,"اب ":5407,"ئن ":919,"پاي":4002,"پتا":3629,"پنج":5682,"۸۹ ":864,"۸۸ ":992,"۸۳ ":1400,"۸۵ ":947,"۸۰ ":846,"۸۱ ":878,"اي ":55848,"بت ":1678,"اه ":14391,"او ":4256,"با ":13059,"ان ":92173,"اً ":1671,"اف ":1676,"پيو":992,"پيش":2404,"پير":873,"ام ":15467,"ال ":24240,"اق ":2509,"اع ":1667,"اط ":832,"ار ":39983,"اد ":13676,"اش ":1063,"از ":51085,"اس ":3550,"اسک":971,"بي ":8868,"ت، ":2631,"اضي":879,"ارگ":1672,"اصل":2729,"اسپ":971,"اعا":905,"اعت":1127,"اطل":2273,"اعي":1202,"۹۹۹":1558,"۹۹۸":1457,"ادا":1809,"احي":1808,"اخت":6534,"احم":966,"احت":888,"ارب":1131,"ارا":5657,"ادي":8285,"اده":8465,"ادب":902,"بق ":16057,"ادر":1545,"ادش":1271,"ازي":4465,"است":105076,"ازه":1433,"ازن":1098,"اسا":2986,"ازم":1894,"ازد":2915,"اري":13153,"اره":35608,"ازا":956,"ارن":1623,"ارو":2419,"ارم":2814,"ارز":893,"ارس":5553,"ارش":1311,"ارص":1726,"بل ":1849,"ارد":9154,"ارت":4139,"اشن":1730,"ارک":47474,"اشي":1130,"اشد":5423,"به ":41777,"اسم":943,"اسل":2665,"اشا":865,"اشت":3533,"اسي":5578,"بن ":2042,"اسر":1003,"بع ":2965,"ائي":1624,"ابا":1519,"ابت":1082,"ابر":3253,"ابع":2818,"ابي":1834,"ابل":1424,"ابق":1003,"ابو":1132,"اتر":1045,"اثر":1056,"اتي":2176,"اتو":1748,"اجر":1089,"ئيه":1089,"بر ":33260,"آب ":1128,"آثا":830,"آبا":1911,"آزا":908,"آذر":4073,"آن ":10516,"آما":970,"آلم":1164,"آمر":3443,"آمد":1740,"آمو":1076,"آنه":16519,"آور":2786,"آهن":945,"آغا":1416,"چه ":1398,"جزي":1617,"خت ":2202,"حل ":927,"جرا":1045,"جري":3087,"جار":1185,"جاد":1192,"جان":5597,"جام":2439,"جاه":2146,"جاي":1843,"حال":1317,"جنو":3614,"جها":2193,"جمي":1716,"جمع":1997,"جمه":2045,"جمو":2028,"خش ":8012,"حي ":1372,"د، ":4587,"خي ":1696,"دا ":1969,"خه ":1063,"حسي":842,"حزب":975,"حرم":1392,"حده":1001,"حدو":1445,"جنگ":1615,"جوا":942,"جود":1945,"ختر":883,"خان":4015,"خار":1012,"خاب":828,"حمد":2876,"در ":87864,"دد ":1157,"دن ":19684,"تگا":1213,"خست":1155,"ده ":55594,"دو ":4399,"ر، ":2027,"دي ":13399,"خته":2886,"ختل":1697,"خدا":1090,"دل ":1012,"دم ":1329,"خلي":855,"دان":9861,"دام":1683,"دال":1294,"داي":1703,"دار":14265,"داز":1707,"داس":1080,"داش":2836,"داخ":1028,"داد":4687,"خوا":3065,"خود":3802,"خور":1687,"دبي":1292,"رج ":831,"درگ":1116,"رت ":5024,"رد ":14918,"را ":12544,"دست":3550,"دسا":1658,"رب ":1902,"دشا":1262,"درس":984,"درج":847,"درا":1451,"دري":2646,"درو":1489,"دول":1633,"دون":991,"دوم":3136,"دوي":2445,"دهم":3138,"دوا":1480,"دود":1919,"دور":3332,"دهٔ":2250,"دها":1941,"دهد":1019,"دني":1772,"دهس":4371,"دمي":947,"دند":2383,"ذار":1352,"رف ":16580,"ديو":1392,"ديم":1100,"دين":2614,"ديا":931,"ديد":3091,"دير":977,"رش ":1790,"رس ":3514,"رز ":943,"دما":1034,"دلب":839,"اقي":1204,"افي":974,"اقع":6290,"الل":1166,"ان،":3374,"الي":6506,"الن":1060,"اما":3065,"الم":2177,"اله":1801,"امب":7272,"امر":1231,"الت":2172,"الب":1010,"الا":3840,"الد":1099,"الع":862,"افز":1059,"افر":840,"افت":2627,"اين":37088,"ايل":1631,"ايي":11337,"ايه":1592,"ايس":842,"انگ":3856,"ايش":2420,"ايا":4829,"ايت":2340,"ايج":4814,"انک":896,"ايد":1943,"ايز":1142,"اير":12515,"، ":41769,"بخش":8437,"اپ ":1341,"اهر":1198,"اهش":1637,"اني":16598,"باي":4838,"انه":7584,"انو":5568,"اهد":836,"انق":1002,"اها":4950,"بان":6032,"انن":2230,"بال":3191,"اند":8054,"باد":2479,"باز":4359,"انز":1366,"بار":4430,"انش":5048,"باش":6504,"انس":4593,"باس":1834,"انص":1766,"امن":1026,"انا":2522,"امه":4047,"امو":1040,"انت":3295,"بات":1162,"امي":5640,"انج":1903,"امل":2517,"اوي":1440,"اول":3794,"اور":2685,"الک":1567,"اوت":2392,"اهي":3496,"اوا":1252,"امپ":1281,"تا ":5488,"بسي":1565,"برگ":2294,"ايگ":1002,"بدا":1231,"برا":9676,"برد":2157,"برخ":1084,"برو":884,"برن":1758,"بري":2753,"بزر":3175,"بست":2397,"تر ":6150,"بعد":1118,"بني":1199,"بنا":1507,"بند":1464,"ا ":87899,"ب ":20308,"ح ":3843,"خ ":4523,"د ":139796,"بين":3092,"بيم":902,"بيس":5590,"بيش":2092,"ت ":137024,"بيا":1448,"بير":1134,"ث ":1238,"بور":941,"ج ":5421,"بود":10929,"تي ":8134,"اک ":1290,"تن ":2136,"ته ":15461,"تم ":2196,"ثر ":1178,"؛ ":940,"تبر":18681,"تال":1624,"تبا":2109,"تان":31858,"تام":3922,"تاه":3675,"تاي":3199,"تاد":4918,"تاش":890,"تار":5154,"تاب":4019,"تخا":898,"تحد":1114,"تري":7869,"ترو":1889,"ترا":2202,"ترن":839,"جا ":15939,"تصا":1521,"تشر":878,"ترک":1945,"تصد":3250,"تشک":1423,"تعد":829,"تغي":867,"اژه":1048,"تفا":3357,"تقا":902,"تقو":993,"تهر":2967,"تها":877,"ثار":853,"تند":2837,"تمي":4044,"تلف":1376,"تما":2359,"تلا":1147,"تيم":1139,"تين":1737,"تيا":863,"تون":1309,"تول":2859,"تور":2332,"توس":3860,"توا":4394,"پس ":2219,"تيک":1005,"اکن":1376,"اکت":3167,"اکس":936,"جه ":2283,"جي ":1241,"حت ":897,"پت":4019,"پا":14295,"پس":3568,"پد":1266,"پر":7121,"گ ":9285,"پو":2856,"په":1230,"پن":7042,"پي":8594,"پل":1723,"ک ":69398,"مک":3060,"ي،":6981,"وي":29381,"وو":995,"يب":4572,"يا":96279,"يع":3352,"يز":10370,"نگ":14413,"يس":23115,"يش":9793,"يص":2081,"يخ":3819,"يد":17899,"ير":33847,"يت":13742,"يج":6291,"يح":1015,"نک":2007,"يه":14576,"ين":90729,"وچ":1800,"يو":11558,"يق":4160,"يم":13074,"يل":23315,"يف":3191,"وپ":2022,"يي":14011,"وژ":952,"وک":2822,"پ ":2440,"وگ":1347,"يچ":1842,"يپ":878,"يژ":1013,"يک":36397,"يگ":5574,"فع":2000,"فض":915,"فز":1100,"فر":14718,"فس":1042,"فد":1178,"فت":17008,"فا":9831,"قع":6601,"قط":1598,"قش":1281,"قس":1226,"قر":7195,"قد":3410,"قت":1605,"في":8057,"قب":1853,"فه":2747,"فو":4734,"فن":1501,"قا":9232,"فل":2016,"فق":1179,"ل،":834,"قي":10959,"قل":3288,"قم":3612,"قه":3627,"قو":3406,"لق":1885,"لف":2767,"لط":1095,"لع":1660,"لد":3211,"لح":1516,"لز":963,"لس":4361,"لر":1340,"لا":28241,"لت":4427,"م،":1074,"لب":3816,"مع":25874,"مغ":887,"مص":1370,"مط":1157,"مف":852,"مق":2922,"مل":8979,"مت":12979,"لو":11064,"مج":4572,"ن،":5004,"لي":31085,"لم":9307,"لل":2270,"له":9135,"مب":9205,"لن":3401,"ما":76186,"مز":2088,"مر":23149,"مش":4307,"مس":6882,"مخ":2590,"مح":8770,"مد":9936,"نظ":3887,"نع":1089,"نط":1891,"نم":4839,"نق":3830,"نف":2887,"نج":25669,"ه،":2861,"مي":78888,"نت":9394,"مو":18704,"مه":13616,"نب":2742,"نا":34982,"من":11960,"نص":3159,"نش":8281,"نس":7940,"نز":3319,"نر":3727,"ند":45085,"نخ":1510,"مپ":1882,"هف":7528,"هل":3945,"هم":15355,"وئ":3203,"وا":39119,"هن":8401,"نه":35756,"هب":1561,"ها":75779,"نن":4681,"هت":1153,"نو":29290,"هج":3131,"ني":39040,"هد":4476,"هز":15706,"هر":26999,"هش":9098,"هس":7389,"هص":1630,"وع":5118,"وق":2152,"وف":3392,"ون":20803,"وه":7280,"ول":18645,"وم":15237,"هي":7368,"وت":7324,"مچ":1311,"هو":6697,"هه":1141,"وب":8255,"ود":37999,"وح":978,"لک":3276,"وج":4297,"وس":20717,"لگ":942,"وز":11168,"ور":41200,"وط":1483,"وض":931,"وص":985,"هٔ":10019,"وش":6987,"چ ":1702,"خو":10471,"دت":1296,"دا":45095,"خه":1201,"دب":2008,"دخ":1393,"خي":3813,"خل":2639,"خط":1338,"خر":3213,"خد":1570,"خص":2448,"خش":9472,"خس":1563,"تگ":2018,"ده":71326,"دو":21727,"ر،":2098,"دي":30900,"دف":1399,"دق":1055,"دل":3645,"دم":4441,"دن":24729,"ذا":2266,"دد":1619,"در":101792,"دش":2498,"دس":6715,"جي":2986,"جو":5371,"حت":1860,"حا":5566,"جن":6990,"جه":5500,"حب":830,"جل":2106,"جم":10111,"جس":1114,"جز":2725,"جر":5927,"جد":2691,"بک":2261,"خت":10589,"حي":3848,"د،":4720,"تک":1874,"حم":4596,"خا":9456,"حو":1605,"حق":1478,"حل":2638,"حص":1359,"حز":1102,"حر":3367,"حس":2640,"حد":3791,"اژ":1490,"تغ":1106,"تف":4099,"تم":9655,"تل":4375,"تق":4035,"تو":18858,"ته":20979,"تن":7644,"ثا":1467,"تج":1247,"تح":3912,"تر":24685,"تخ":2986,"تد":1435,"تش":4284,"تص":5949,"تس":1020,"تع":2873,"جا":32040,"جب":888,"جت":900,"تي":16351,"اک":9717,"ثر":1600,"اگ":2405,"بع":5160,"به":44819,"بن":6697,"بل":4961,"بق":17149,"بخ":9337,"بد":4503,"اً":1680,"بت":3170,"اي":143043,"اه":29729,"او":19244,"بط":1115,"بش":1384,"بز":4117,"بس":4538,"بر":58307,"اپ":4030,"تا":68031,"تب":22518,"بو":16880,"بي":27336,"ت،":2720,"ا،":2256,"ائ":3170,"از":65740,"ار":177821,"اد":40749,"اض":1760,"اص":6315,"اش":15541,"اس":126776,"ات":19285,"ئو":1601,"اب":23054,"ئن":1012,"اخ":9486,"اح":6972,"اج":4954,"ئي":4511,"اث":1612,"اف":9698,"اق":12353,"ام":46608,"ان":164721,"با":53255,"ال":53226,"اع":7941,"اغ":1903,"اط":5875,"اظ":989,"آب":4072,"آث":840,"ٔ ":10096,"آذ":4112,"آر":1739,"آس":1327,"آز":1293,"آغ":1527,"آف":915,"آل":2867,"آم":7747,"آو":3278,"آن":29292,"آه":1034,"آي":2113,"غي":2298,"عي":7120,"غر":4096,"عل":6634,"عم":5201,"غا":3393,"عن":4842,"عه":5570,"عت":3100,"عد":3656,"عر":22149,"عض":1560,"عا":7561,"عب":3193,"ظا":1618,"طل":3586,"طق":2226,"شگ":3723,"ظر":2221,"شک":5793,"طي":2187,"طه":1305,"طو":2844,"طر":3523,"ضي":1604,"سک":2829,"طا":2296,"طب":17352,"ضو":1678,"سپ":5713,"صل":3684,"صف":2604,"ضر":876,"زگ":1068,"صو":4924,"صن":1205,"ضا":2719,"صي":2649,"شف":46579,"شص":3397,"شش":4939,"شع":1023,"رک":59526,"صد":15627,"صر":2425,"رگ":10959,"شم":40834,"شن":7520,"صا":3027,"شه":20211,"شو":16524,"صت":1929,"شي":10582,"سع":1222,"سط":4899,"دگ":4253,"سف":2299,"رپ":863,"سي":82351,"شت":20229,"رچ":966,"سو":9339,"شب":2714,"سه":5822,"شا":14113,"سن":5406,"سم":5040,"سل":6428,"شر":11318,"شد":71050,"شخ":1686,"زش":2575,"زه":4419,"سب":3293,"زن":6220,"سا":40397,"ست":157670,"زو":2068,"زم":7708,"سد":1583,"سر":10053,"زي":19577,"سخ":1157,"دک":1541,"رس":21469,"رش":6643,"رر":874,"رز":5465,"رص":2328,"رض":1657,"رع":995,"رل":1461,"رق":7164,"رف":22038,"رو":41463,"ره":48555,"زب":4854,"زا":26314,"رن":9102,"رم":13248,"ري":64671,"زر":4516,"زد":7554,"ذر":4784,"ذش":1625,"رآ":1627,"رب":15211,"را":79279,"رت":10552,"رج":4101,"ذي":851,"رخ":3587,"رح":1384,"حک":1290,"رد":33206,"ف ":71140,"ع ":15503,"غ ":1398,"ص ":2346,"ط ":7167,"ظ ":1176,"ر ":213685,"ز ":65089,"س ":20728,"ش ":24544,"ً ":1683,"ي ":278753,"ه ":289561,"ن ":207616,"و ":126016,"ق ":24581,"م ":45325,"ل ":55421,"۳۰":2261,"۳۱":1756,"۳۲":1475,"۲۷":1672,"۲۶":1845,"۲۹":1827,"۲۸":1810,"۲۳":1837,"۲۲":2072,"۲۵":2015,"۲۴":2310,"۴۲":932,"۴۳":828,"۴۰":1179,"۴۱":851,"۳۹":874,"۳۸":3317,"۳۷":1442,"۳۶":1469,"۳۵":1620,"۳۴":1319,"۳۳":1412,"۱۰":3498,"۰۱":1251,"۰۰":7262,"۰۳":960,"۰۲":955,"۰۵":991,"۰۴":954,"۰۷":971,"۰۶":1030,"۰۹":925,"۰۸":1126,"۲۰":6610,"۲۱":2252,"۱۴":2656,"۱۳":8246,"۱۲":3243,"۱۱":2760,"۱۸":4100,"۱۷":2716,"۱۶":2670,"۱۵":2740,"۱۹":21618,"۷۴":853,"۷۳":1099,"۷۶":1037,"۷۵":996,"۷۰":1130,"۷۲":902,"۷۱":971,"۶۸":828,"۶۹":864,"۸۷":1462,"۸۶":1335,"۸۵":1706,"۸۴":1157,"۸۳":1867,"۸۲":1238,"۸۱":1392,"۸۰":1426,"۷۹":1133,"۷۷":1205,"۷۸":1210,"۵۰":1455,"۵۲":836,"۵۱":867,"۵۴":849,"۵۳":863,"۴۴":839,"۴۵":921,"۴۸":900,"۴۹":832,"۶۱":861,"۶۰":1533,"۶۵":892,"۵۷":978,"۵۸":881,"۵۵":896,"۵۶":829,"۵۹":829,"۸۹":1660,"۸۸":1649,"۹۰":1972,"۹۳":2257,"۹۴":1978,"۹۱":1915,"۹۲":1938,"۹۷":3928,"۹۸":5762,"۹۵":1920,"۹۶":2485,"۹۹":9457,"۸ ":9234,"۷ ":8195,"۹ ":8815,"کي":21693,"کس":3863,"کش":52864,"کر":10838,"کز":5951,"کد":1167,"کت":10947,"کو":9889,"که":50638,"کن":11029,"کم":5441,"کل":6076,"کب":1527,"کا":21352,"گف":2232,"گل":4014,"گن":1256,"گه":1120,"گو":7699,"گي":9941,"گذ":3598,"گز":1726,"گر":18517,"گش":860,"گس":1433,"گا":16359,"۵ ":8279,"۶ ":7892,"۳ ":8356,"۴ ":8000,"۱ ":8359,"۲ ":7902,"۰ ":11742,"چي":2258,"چن":2891,"چه":10435,"چو":1224,"چک":1330,"چا":2573,"ژا":3316,"ژه":1974,"ژي":1330,"ژو":2750,"چين":1123,"چني":1160,"چها":6583,"چهل":2101,"چند":1391,"چار":831," ، ":7275," خ ":829," و ":107907," حا":2893," جن":6563," جه":2834," جل":883," جم":4408," جو":1700," جد":1121," جز":2226," جر":1081," جا":5217," تي":2383," اک":4067," خل":1477," خي":1014," خو":9064," دا":22315," خر":1716," خد":1173," خط":1264," حق":954," تک":1140," حم":1000," خا":5629," حو":858," حز":987," حر":1265," حس":1494," حد":1443," به":41159," بن":5266," بل":1919," بع":1479," بز":3261," بس":2873," بر":23110," اي":60506," اه":1491," او":12009," بخ":8934," بد":2040," اق":1653," اف":3495," ال":9180," با":32041," ان":13519," ام":5995," اط":2539," اع":2096," اد":2220," ار":5819," از":47219," اس":93321," اش":2136," اص":3471," اب":2944," ات":1821," اث":1151," اج":1670," اح":1812," اخ":1959," تو":11109," ته":3565," تن":2119," تم":1148," تل":1644," تق":2105," تع":1944," تش":1637," تص":1087," تر":7158," تخ":1066," تج":1001," تح":1857," تا":10536," تب":1758," بو":12627," بي":13492," آم":7369," آل":2819," آو":2892," آه":1013," آن":28846," آي":1817," آذ":4109," آر":1633," آث":831," آب":3531," آغ":1486," آس":1300," آز":1278," شک":1757," طو":1797," عض":849," عر":3450," عا":1962," عب":2630," غر":3490," عل":4955," عم":2914," عن":2426," غي":1131," سط":837," سف":999," سي":58158," شب":2526," سه":2789," سو":5501," سم":1003," سن":2452," شا":6389," سل":2036," شر":8538," شخ":981," شد":64662," شص":1903," شش":4604," شو":11010," شي":3635," شم":35388," شن":2904," شه":16707," صد":3342," سپ":4409," صف":1164," صو":2043," صن":981," طب":16842," طر":1891," دس":4487," در":93973," دي":6271," دو":15789," ده":8568," دن":1430," دل":1101," حک":1033," را":15933," رس":3186," رش":1541," ري":3051," رف":1064," رو":19239," زب":3421," زا":2852," رم":1079," زي":3601," سد":907," سر":6965," زم":4334," سب":1207," زن":2927," سا":27260," ست":1210," ۲۱":1529," ۲۰":5779," ۱۹":20860," ۱۵":1981," ۱۶":1865," ۱۷":1946," ۱۸":3370," ۱۱":2053," ۱۲":2491," ۱۳":7504," ۱۴":1848," ۳۰":1169," ۲۸":971," ۲۹":1004," ۲۶":1106," ۲۷":943," ۲۴":1600," ۲۵":1220," ۲۲":1386," ۲۳":1186," ۱۰":2514," ۷ ":1034," ۶ ":835," ۹ ":923," ۸ ":918," پل":1189," پن":6100," پو":1775," پي":6231," پا":11101," پر":5507," پد":1204," پس":3103," لا":2038," مل":2782," مق":2722," مع":22757," مط":1111," مص":1331," مس":5314," مش":3603," مر":12704," مد":3474," مح":8486," مخ":2396," لي":1375," مج":4247," لو":1161," مت":6834," ما":10891," مب":1057," نف":1957," نق":2575," نم":3573," نظ":2990," نخ":1392," نر":962," نز":1060," نس":1428," نش":1872," نص":1002," نا":13232," من":6720," مه":4441," مو":8861," مي":39390," هف":7486," هن":3049," وا":10473," هم":7875," هر":3177," هز":15471," هس":2372," هش":6627," نو":12914," ها":28631," نه":5384," ني":6690," هج":2983," فر":9595," فع":1613," فا":3850," فل":1560," قا":3036," فو":3446," قب":1076," في":4001," قد":1747," قر":6081," قم":3484," قل":1179," قو":1126," يک":22683," وج":1401," هي":1336," هو":2401," وس":1484," وز":1216," ور":1520," ول":1293," مک":1516," وي":5201," يا":14595," نگ":1352," يو":1770," ۱ ":999," ۵ ":990," ۴ ":1026," ۳ ":1037," ۲ ":1134," کت":2907," کر":7960," کش":52338," کل":3525," کن":7370," کم":2467," کو":5973," که":48270," کي":3060," کب":1107," کا":11187," گا":3551," گل":1221," گف":2153," گي":3912," گو":4533," گر":10421," گذ":2142," ژا":2692," ژو":2172," چا":2040," چي":1498," چن":1727," چه":9110,"۶۰ ":1024,"۲۰۰":3965,"۳۸۳":931,"۲۸ ":1132,"۲۹ ":1101,"۲۶ ":1164,"۲۷ ":991,"۲۴ ":1524,"۲۵ ":1126,"۰۰۰":1569,"۲۳ ":1040,"۲۲ ":1199,"۲۱ ":1215,"۲۰ ":1572,"۳۱ ":837,"۱۹۸":3784,"۱۹۹":7313,"۱۹۳":1178,"۱۹۲":829,"۱۹۵":879,"۱۹۴":946,"۱۹۷":2720,"۱۹۶":1373,"۱۳۸":2507,"۳۰ ":1283,"۰۰ ":2532,"۱۲ ":1158,"۱۱ ":1041,"۱۳ ":1130,"۱۴ ":1282,"۱۵ ":1270,"۱۶ ":1284,"۱۷ ":1225,"۱۸ ":1153,"۱۹ ":1122,"۱۰ ":1507,"فر ":1868,"فت ":4170,"قع ":6048,"فار":2458,"فاد":2141,"فاع":864,"فتا":2708,"فته":4673,"فتم":1892,"فتص":1637,"فه ":1283,"في ":1976,"عرو":1419,"عرف":15753,"عرب":2186,"عرا":897,"عدا":1093,"عبد":885,"عات":1421,"عال":2039,"عبا":1366,"عاد":1232,"غرب":3482,"عيت":1749,"عمل":1014,"عنا":1018,"عمو":1947,"عنو":2161,"غان":879,"عني":1446,"غاز":1437,"علا":887,"علي":2182,"علو":970,"عما":1036,"علم":1702,"غير":1279,"صي ":884,"شما":36739,"شهر":16753,"شنا":4807,"صاد":1085,"شمي":1876,"رکت":2292,"رکز":5506,"رکي":1378,"شور":5763,"شود":8535,"شهو":994,"شون":1223,"شير":1302,"شيد":1499,"شين":1156,"ضي ":873,"رگا":1053,"رگذ":955,"رگر":1703,"رگز":904,"سپت":3558,"سپا":1035,"صول":885,"صور":2307,"ضاي":1046,"صلي":1295,"طي ":1075,"طه ":1047,"ظر ":999,"طبق":15902,"طرا":958,"عت ":1441,"عد ":843,"طلس":913,"طلا":2183,"طقه":1497,"طور":1362,"ظام":977,"شکي":1859,"شکل":1326,"شگا":3095,"عه ":4310,"عي ":3265,"زد ":1250,"ري ":28175,"رن ":1348,"ذشت":1620,"زب ":861,"ره ":40871,"رو ":1622,"ديگ":2478,"ديک":1038,"رق ":1211,"ذرب":3735,"رم ":3802,"رجه":899,"ردم":1119,"رده":3960,"ردن":2001,"ردي":2386,"ردا":4731,"رتب":927,"ربي":3967,"رتي":854,"راک":1311,"ران":23644,"ربا":5086,"راه":2985,"راو":1025,"راي":12001,"ربر":1171,"راف":1247,"رام":1664,"رال":1192,"راب":3402,"رائ":1115,"راح":1002,"رات":2813,"رار":4645,"راد":1759,"راز":1040,"راس":2348,"زش ":1033,"رفت":3769,"سر ":1698,"زي ":8767,"رصد":2148,"رشي":1128,"ست ":82115,"رسا":1284,"رست":10190,"رشت":1369,"رسم":1031,"رسي":3685,"زه ":3212,"رزش":844,"سم ":1194,"زدي":1099,"زده":4341,"ريک":4778,"روپ":963,"ريه":3010,"ريو":908,"رين":6859,"ريل":1861,"ريق":887,"ريا":4939,"ريخ":2752,"ريت":1617,"ريز":1529,"رنگ":1040,"ريس":1451,"روه":2711,"روي":3959,"ري،":1064,"روف":1550,"رون":2388,"روم":1326,"رور":969,"روز":5199,"رود":4045,"رهٔ":1083,"روس":7942,"روش":1626,"رهن":1422,"روا":2639,"زاي":1000,"زان":1010,"رها":4101,"زبا":3526,"زار":18512,"رند":2708,"زاد":3573,"رمي":2432,"رنا":1487,"رما":3636,"رمز":959,"سط ":3314,"رقي":4879,"شد ":26077,"شر ":1296,"شش ":1401,"سن ":1047,"سه ":5126,"زرگ":3314,"شت ":4426,"سي ":13405,"ستگ":1285,"شف ":46304,"زيک":1213,"سري":988,"سرو":889,"سرا":2106,"دگي":1827,"شه ":1221,"دگا":1978,"زما":4471,"سام":2387,"سال":16867,"سان":4639,"ساي":1790,"زند":3098,"ساخ":3059,"ساس":1619,"ساز":3943,"زنا":1261,"زمي":2413,"ساب":1251,"ستا":39209,"ستب":15399,"ستر":2167,"ست،":1386,"ستف":2194,"ستن":2498,"زيا":1067,"ستم":1666,"ستي":2717,"سته":2853,"ستو":1258,"زيس":843,"زير":3410,"رگ ":3861,"صر ":1320,"سلا":2748,"سمت":1146,"شي ":3088,"صت ":1755,"صد ":14363,"رک ":48005,"شرق":5570,"صل ":1169,"شده":27411,"شدن":16377,"شصت":1903,"شرک":1669,"شصد":1494,"ششم":1955,"ششص":1494,"شتا":2276,"سوم":2657,"سوي":1270,"شتر":2340,"شتص":1588,"شبه":953,"شاه":3881,"شان":2849,"شام":1210,"شار":1758,"سند":1687,"شاخ":862,"سمي":1232,"شخص":1423,"سيل":1493,"سين":2125,"سيم":1189,"سيق":1188,"سيو":851,"شتم":2005,"سيص":1797,"سنگ":854,"سيس":2418,"سير":939,"سيد":2026,"شتي":869,"سيا":50847,"شته":5322,"يکا":4258,"يکم":1999,"يکي":12431,"يگر":3114,"يگا":1094,"يچ ":1170,"وچک":1078,"يقي":1448,"يلا":5819,"يلي":2362,"يلم":2163,"يما":2897,"يله":1142,"يلو":2447,"يند":2629,"ينج":15434,"ينا":1201,"يمن":836,"ينت":984,"يمي":2048,"ينو":1227,"يني":2869,"ينه":1667,"يهٔ":843,"يوس":866,"يوا":1011,"يون":3682,"ينگ":934,"وپا":979,"يصد":1797,"يشه":906,"يشت":1412,"يسي":2944,"نگي":1776,"نگل":2493,"يسه":1037,"يسن":1399,"يزي":2379,"يست":11437,"يزه":870,"يسا":894,"نگا":2016,"يره":2645,"يرو":2532,"يري":2723,"يزد":1123,"يعي":880,"يجا":4729,"يتا":2607,"يتي":884,"يده":2751,"يدن":1012,"يدي":1302,"يرا":13163,"يرد":1088,"يخي":912,"يدا":2030,"يدل":878,"ياف":1602,"يال":2762,"يان":11075,"يبا":1219,"يام":955,"ياه":1109,"ياي":3214,"ياس":2249,"يار":48948,"ياز":1404,"ياد":1729,"يات":2044,"ياب":1358,"يک ":14180,"وز ":3949,"ور ":11858,"ود ":25900,"نقش":885,"هٔ ":10011,"نفر":1610,"وش ":1865,"وس ":2204,"نمو":979,"نند":4531,"هاس":15525,"هار":8046,"نما":2997,"ها،":830,"وع ":1666,"نقل":1074,"وط ":936,"نيز":2529,"نيس":1397,"نير":1331,"نيا":4775,"ني،":840,"نوي":5351,"نون":2065,"نور":1647,"نود":1940,"نوب":3858,"نهم":1851,"نوا":6754,"نوع":1934,"نوش":1776,"نوز":853,"هاي":33269,"هان":4405,"نها":19728,"نهص":1628,"هدا":927,"وف ":1708,"نيک":1404,"نين":1804,"نيم":949,"نيو":887,"هجر":2343,"هست":7247,"وم ":4571,"هري":1954,"هشت":6878,"ون ":8984,"هرا":3739,"هرم":1056,"هره":1437,"هزا":15536,"هرس":8280,"ول ":5820,"وي ":9400,"ي، ":6814,"وه ":3934,"هشم":1388,"هصد":1629,"معي":1766,"معم":1600,"معن":1773,"معر":16933,"معا":1278,"هد ":1760,"هر ":8315,"مقا":1363,"منا":1581,"ناب":867,"مند":1285,"ناخ":1262,"منت":1332,"منط":1759,"ناس":3745,"نار":1067,"ملل":1010,"مله":1083,"ملي":2015,"مهم":1073,"مهو":1618,"موا":1351,"موج":939,"مور":2297,"موز":1395,"مود":1287,"موس":2124,"موع":2058,"نام":12986,"نان":3607,"ناي":2070,"نتش":1291,"نتر":1378,"مون":1614,"مول":1953,"ميل":5431,"ميد":1645,"مير":1559,"ميا":3140,"نتي":1863,"نجم":2428,"نجا":19529,"مين":23588,"ندا":3431,"ند،":1301,"نخس":1148,"ندو":1099,"نده":6796,"ندر":1455,"ندس":875,"ندي":2668,"هل ":2462,"نزد":2165,"هم ":4541,"نسا":1177,"نست":1454,"ندگ":2266,"نسو":981,"نشا":1366,"وب ":4521,"نصد":1688,"وت ":2984,"هي ":3587,"نطق":1746,"نظا":1022,"نظر":1892,"نشگ":2179,"ونا":1554,"ومي":5536,"وند":2535,"ولي":4939,"ومت":2798,"يع ":1026,"ولت":1350,"ولا":2437,"ولد":1454,"يش ":4417,"يس ":3929,"نگ ":4653,"يق ":1173,"ويژ":863,"يف ":1505,"ويچ":897,"مکا":1239,"ويي":925,"وين":2044,"ويه":2289,"ويم":1400,"ويز":853,"ويس":4678,"وير":1085,"وني":3417,"ونه":1782,"وها":1116,"يي ":11355,"يه ":12282,"يو ":1257,"يم ":5175,"ين ":61177,"يل ":7962,"هفت":6908,"هنگ":2686,"واژ":1094,"مچن":1005,"وتب":889,"وجو":2093,"لکت":1073,"هنر":1330,"وار":3450,"واز":2127,"هند":2551,"واد":2014,"همي":4888,"وئي":1673,"واب":2536,"وئن":956,"هما":980,"هور":2961,"وبي":1735,"همچ":1222,"هوا":1704,"واه":871,"واي":1826,"واق":6339,"وال":1232,"وان":9374,"وام":2251,"وري":9228,"وست":8309,"وزه":1349,"وزن":1058,"وزي":1584,"يا ":15452,"وسط":3323,"وسي":3688,"وشت":1878,"يب ":1599,"ود،":1062,"ودر":855,"ودن":1173,"ودي":1606,"وده":3280,"ورا":3355,"ورز":1216,"ورش":1426,"ورد":3566,"ورت":2597,"وره":2981,"ورو":1136,"يد ":7931,"ير ":7783,"يز ":4213,"يت ":7713,"يج ":845,"يخ ":2265,"وعي":1123,"وعه":1942,"لد ":1738,"لس ":1850,"لت ":2709,"لا ":1918,"لب ":937,"م، ":1039,"لف ":1009,"له ":6963,"ما ":2248,"لم ":2584,"قمر":3187,"قلا":1065,"قيق":1249,"قوي":1155,"فعا":1470,"فرد":869,"فرا":3846,"فرو":1305,"فري":1360,"فرم":1232,"فره":1581,"فزا":918,"قه ":2804,"قي ":7279,"فيز":835,"فيل":2814,"قدي":1049,"قرن":906,"قرا":4415,"قال":936,"قاب":1236,"فوت":1061,"فور":2032,"قان":1259,"لله":1097,"لما":2054,"ماع":870,"لمي":1688,"مات":1140,"مار":39512,"لند":1567,"ماد":1634,"لمل":950,"نس ":921,"نش ":1280,"ند ":23182,"ه، ":2793,"مي ":37976,"نج ":1914,"ني ":21211,"مشه":1314,"مرک":5626,"نو ":876,"نه ":11112,"مسي":1271,"ها ":10366,"مست":1149,"مسا":1677,"مري":7089,"مرو":1152,"مرد":2093,"مرا":2087,"مرب":1215,"مدي":1420,"مدر":931,"مخت":1330,"مدا":1385,"محر":1442,"محل":1270,"محم":2301,"ليو":834,"لين":2134,"مجم":2201,"مجل":977,"ليت":1597,"ليا":2311,"متو":1555,"ليس":3144,"ليد":1495,"ليل":1108,"لوي":1612,"لوم":2898,"متر":3763,"متح":1213,"مال":4589,"مام":1588,"مان":14800,"ماه":2044,"ماي":4595,"مبر":7239,"مد ":3240,"لو ":1092,"مت ":2632,"ن، ":4905,"لي ":12663,"مه ":7560,"نا ":1758,"من ":2003,"نت ":1363,"مل ":2783,"لاح":1096,"لاد":4785,"لار":867,"لاب":1067,"لات":2844,"لاق":1169,"لاف":888,"لاس":997,"لاع":939,"لبر":1058,"لاي":1580,"لام":3536,"لان":2170,"لال":908},"n_words":[8069793,10004435,6796528],"name":"fa"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/fi b/nlp_resource_data/langdetect/profiles/fi

new file mode 100755 (executable)

index 0000000..fdb4174
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/fi
@@ -0,0 +1 @@
+{"freq":{"D":15745,"E":23140,"F":13820,"G":13964,"A":40046,"B":22964,"C":22892,"L":28771,"M":37698,"N":19249,"O":13807,"H":28813,"I":21256,"J":16698,"K":43440,"U":7805,"T":36899,"W":9333,"V":24290,"P":38188,"S":77764,"R":24157,"Y":10043,"X":1821,"Z":1858,"f":29615,"g":68514,"d":158713,"e":1194104,"b":43435,"c":44492,"a":1814181,"n":1349748,"o":934203,"l":885783,"m":433706,"j":314669,"k":691662,"h":239330,"i":1579260,"w":11494,"v":295575,"u":751889,"t":1169051,"s":1099978,"r":475344,"q":1633,"p":248621,"z":7860,"y":250687,"x":8131,"é":2342,"ä":426790,"ö":56679,"š":2606," l":70812," m":89685," n":43615," o":180629," h":39470," i":21438," j":161666," k":172428," d":8045," e":69090," f":6572," g":3193," a":72510," b":5870," c":2712," y":39165," u":13744," t":117334," v":98965," p":89643," s":126171," r":34331," J":16375," K":41900," H":27293," I":17045," N":17648," O":11538," L":26471," M":33953," B":21331," C":19265," A":36093," F":12406," G":12952," D":13481," E":21275," Z":1697," Y":9517," S":71766," R":22636," P":35182," W":8721," V":22294," U":6908," T":34378," ä":2798,"A ":2604,"Da":2655,"Co":4892,"Ch":3205,"Do":1694,"De":2688,"Di":2497,"Fa":1585,"Eu":3024,"Et":2349,"Es":2046,"En":2950,"El":2967,"Ge":2029,"Ga":2319,"I ":2891,"Fr":2159,"Fo":2415,"Fi":2245,"C ":2932,"Au":2202,"Ar":3564,"As":1696,"D ":1819,"Ba":4270,"Am":2679,"An":4463,"Al":7577,"Bu":1605,"Br":4547,"Ca":3954,"Bi":1625,"Be":3488,"Bo":2874,"Ku":5528,"Ky":1627,"Kr":2024,"Ko":6559,"Le":5504,"Hä":4384,"Li":5491,"La":6972,"Lu":2444,"Lo":3355,"Me":4707,"Mi":5018,"Ma":12276,"Mu":3221,"Mo":4384,"Ni":3815,"Ne":4302,"Na":2575,"P ":1899,"No":3154,"Gr":2750,"Ha":6094,"He":6887,"II":1871,"Hi":2221,"Ho":2925,"In":4312,"Is":2133,"It":2586,"Ja":4546,"Jo":4525,"Ju":3085,"Ka":13363,"M ":1871,"Ki":5554,"Ke":4172,"Un":1530,"Tu":4391,"Tr":2204,"To":3873,"Th":5318,"Ti":2566,"Te":3907,"Ta":5984,"St":4721,"Su":13153,"Wi":2315,"Wa":2313,"Vu":2187,"Vi":4932,"Va":6002,"Ve":5654,"Pu":2910,"Pr":3317,"S ":3929,"Pe":5861,"Pa":6975,"Po":6867,"Pi":3820,"Se":20320,"Si":6786,"Sh":1709,"So":3993,"Ru":2993,"Sa":10443,"Re":3883,"Ri":2393,"Ro":4731,"Ra":6455,"b ":4269,"a ":521046,"Yh":4604,"Tä":2310,"i ":187878,"ge":9008,"ga":10572,"fi":6772,"fr":2666,"fo":3746,"hd":23028,"he":38383,"ha":33064,"gn":1824,"gl":5520,"gi":17723,"gh":2779,"gu":2320,"gr":3324,"go":5014,"du":5956,"dy":8630,"g ":5897,"ea":17917,"eb":2211,"ec":3419,"ed":14005,"de":60149,"di":25899,"do":15043,"ds":2160,"dr":3184,"ew":1828,"eu":16676,"ev":21342,"ey":5808,"fa":2058,"h ":5313,"fe":3904,"eh":18148,"eg":3976,"ef":1593,"ee":73047,"el":141223,"ek":30007,"ej":4585,"ei":64027,"ep":7092,"eo":9816,"en":311708,"em":29073,"et":103119,"es":102393,"er":103228,"ca":4693,"e ":102313,"br":3098,"bu":11145,"bo":3540,"bl":1718,"bi":4927,"be":5208,"da":16197,"f ":4183,"cu":1883,"ct":2366,"co":5044,"ck":6770,"ci":3249,"ch":7835,"ce":5416,"c ":2562,"az":1738,"ay":3699,"ba":5388,"d ":12256,"at":82974,"as":103098,"ar":90264,"av":32524,"au":43994,"ak":51088,"al":188447,"ai":159817,"aj":32823,"ao":2348,"ap":28311,"am":44148,"an":226249,"ac":6773,"ad":13018,"aa":139980,"ab":3846,"ag":5956,"ah":19189,"ae":6848,"af":2453,"nu":19006,"nt":79878,"ns":48167,"nr":2630,"np":4065,"no":34422,"nn":76172,"ny":10224,"nv":4829,"oe":6505,"of":6585,"oc":5124,"od":21394,"oa":12733,"ob":3163,"om":49202,"on":220535,"ok":63100,"ol":88347,"oi":101083,"oj":13195,"og":7134,"oh":26450,"kä":31248,"ot":48882,"os":73162,"ov":18707,"ou":20507,"op":25063,"oo":22586,"or":48012,"r ":15286,"ow":2927,"kö":4127,"oy":1883,"pe":39610,"pa":50151,"pl":2749,"po":19855,"lä":49440,"ph":2309,"pi":40558,"lo":48127,"lm":34905,"ll":141072,"ls":4754,"lp":5969,"lv":6764,"lu":74348,"lt":40738,"ly":10157,"o ":46103,"ma":107323,"mb":5971,"iä":11662,"me":68275,"mi":93628,"mm":26269,"mp":15152,"mo":20189,"mu":39868,"iö":6377,"my":13711,"p ":3699,"na":88104,"nc":4077,"nd":15417,"ne":115628,"nf":1558,"ng":26517,"nh":5001,"jä":31523,"ni":80364,"nj":4418,"nk":37841,"nl":4563,"nm":4904,"ju":18610,"jo":83376,"ki":95289,"ke":65747,"ka":163517,"m ":8821,"ky":14212,"ks":65607,"kt":5970,"ku":90075,"ko":85957,"kr":5885,"kk":48142,"kl":2002,"kn":1816,"li":159675,"hä":6582,"lk":30410,"lj":7180,"le":81668,"ld":2991,"la":154382,"lb":9921,"n ":656910,"hr":2420,"ht":42173,"hu":10726,"hj":14036,"hk":3966,"dä":2655,"hi":25267,"hn":1754,"ho":13688,"hl":1555,"hm":8011,"id":26485,"ic":10236,"ib":2776,"ia":70345,"eä":2738,"ih":17748,"ig":6859,"if":2884,"ie":58968,"hy":6074,"k ":7419,"ir":40208,"is":262822,"it":142893,"iu":5141,"iv":30487,"ii":85125,"ij":23513,"ik":78594,"il":97254,"im":65870,"in":288307,"io":39520,"ip":11864,"je":25016,"ji":8870,"l ":13054,"ja":144917,"tä":77533,"sä":41776,"vy":5926,"y ":23865,"wa":2667,"rä":7609,"vi":49369,"vu":43029,"vo":19782,"uv":30695,"uu":71208,"ve":29647,"va":118391,"x ":5327,"ui":22148,"uj":5727,"uk":45990,"ul":59174,"ue":19283,"ug":2138,"uh":10868,"ur":38607,"us":87239,"ut":63413,"um":30283,"un":78305,"uo":96118,"up":18134,"ty":54901,"tu":95699,"tt":139450,"ub":2312,"ua":10701,"ud":16569,"uc":1658,"w ":2157,"to":89637,"tl":2100,"ts":24815,"tr":16390,"te":142275,"tk":13593,"ti":135644,"th":11168,"pä":16825,"ta":258052,"su":49746,"sv":13201,"ss":117492,"st":215124,"sy":12228,"sl":7481,"sk":39138,"sn":1670,"sm":5723,"sp":7200,"so":30621,"sr":2112,"sc":2341,"se":139811,"sh":5275,"sj":2476,"si":171906,"nö":1934,"u ":38729,"sa":155918,"rr":9979,"rs":10921,"rt":23316,"ru":25282,"rv":12255,"ry":7951,"rp":3591,"ro":37223,"rn":7182,"rm":9935,"rl":3938,"rk":30938,"rj":32055,"ri":107434,"rh":7195,"nä":20276,"rg":6568,"re":35490,"rd":7095,"rc":2086,"rb":1938,"ra":66068,"t ":86930,"mä":30120,"s ":74921,"px":2388,"py":4209,"lö":4854,"pt":2602,"pu":33575,"pp":18507,"pr":6312,"ps":3220,"yä":2288,"yö":17077,"vä":26642,"yy":14572,"yh":29899,"ye":9120,"yd":4538,"ya":1921,"tö":12963,"yv":6669,"yt":27603,"ys":23779,"yr":6697,"yp":4560,"yn":15218,"ym":10818,"yl":20580,"yk":18251,"yi":8456,"ä ":141477,"äa":1925,"ö ":8048,"ää":49135,"ät":13038,"äv":12353,"äy":16618,"äm":13914,"äl":20535,"äo":2938,"än":46392,"äp":3824,"äs":18046,"är":23375,"äe":2662,"äi":25110,"äh":12610,"äk":13701,"äj":7869,"öö":1835,"öy":2134,"öt":2960,"ör":2049,"ös":12022,"ön":8537,"öl":2694,"öm":1579,"ök":3180,"öh":1825,"öi":4386," Ga":2281," Ge":2007," Fo":2388," Fr":2153," Fi":2200," Ha":6075," He":6845," Gr":2717," Ho":2907," Hi":2219," Ja":4528," Is":2125," It":2580," In":4291," Ka":13310," Ke":4144," Ki":5524," Jo":4519," Ju":3077," La":6921," Le":5486," Hä":4383," Li":5091," Ko":6537," Kr":2018," Ku":5494," Ky":1626," Ma":12197," Mi":4965," Me":4668," Lo":3341," Lu":2432," Ne":4266," Na":2563," Ni":3790," Mo":4358," Mu":3189," Am":2670," An":4363," Al":7546," Ba":4234," Au":2195," As":1683," Ar":3519," Be":3463," Bi":1601," Bo":2840," Br":4522," Bu":1591," Ca":3824," Ch":3183," Co":4800," Da":2631," Di":2463," De":2676," Do":1656," El":2955," Et":2345," Es":2034," En":2918," Eu":3021," Fa":1558," Tä":2303," Wi":2296," Wa":2289," Yh":4598," Po":6838," Pi":3788," Pe":5834," Pa":6894," No":3136," Ra":6410," Ro":4711," Re":3856," Ri":2382," Pr":3297," Pu":2902," Su":13126," St":4422," Ta":5960," Th":5301," Ti":2528," Te":3868," Tr":2184," To":3832," Ru":2990," Sa":10406," Sh":1689," Si":6699," Se":20290," So":3959," Va":5984," Ve":5638," Vi":4890," Vu":2183," Tu":4371," ja":74622," in":3365," il":6233," is":1907," it":4582," ka":41758," ki":21829," ke":20426," jo":60523," ju":13868," ha":11651," he":10326," hy":3213," ih":2463," hi":4639," ho":2199," hu":2999," jä":12144," ni":12782," ne":5502," na":3130," my":11678," mu":27269," mo":4701," ol":30354," om":3788," on":110013," kä":15552," oh":5375," oi":2526," of":2700," ny":2745," nu":2351," no":7612," le":7943," hä":3832," li":11414," n ":4861," la":17994," ku":35062," ky":5412," kr":3070," ko":26681," me":12340," mi":8054," ma":22033," lu":14733," ly":2009," lo":4509," am":2608," an":4850," ai":10388," aj":3176," al":26047," av":3055," au":2714," ar":5107," as":8761," er":7980," et":8124," es":9200," en":13349," ei":4106," el":17426," fi":2104," de":3555," di":1711," ed":3633," vä":7263," ym":1774," yl":9242," yk":6097," yh":19431," tä":4808," ru":4569," ry":2522," sa":23518," se":22713," si":27049," so":8389," ra":14800," re":3082," ri":4303," nä":4180," ro":4580," pu":9566," pr":4112," s ":2297," px":2387," py":2739," mä":2284," os":9956," ot":1666," ov":7670," op":3466," or":1769," pe":20224," pa":14359," po":10381," pi":14437," lä":10166," sä":4588," va":34158," ve":7508," uu":2456," vo":6724," vu":30542," vi":12396," ty":5276," tu":19858," us":4512," ur":1664," ul":2520," ta":38855," sy":5905," st":3683," su":25086," to":18439," th":5224," pä":9746," ti":8862," te":13642," ää":2036,"Ete":1912,"Eur":2720,"For":1614,"Hel":2818,"Int":2022,"Alb":1684,"Bri":2185,"Nor":1531,"Per":1731,"Par":1783,"Poh":2941,"Ran":1941,"Kal":1586,"Kan":2320,"Kau":1621,"Kar":2566,"Kir":1567,"Kun":1618,"Hän":3486,"Mar":3439,"ään":18825,"ääk":4426,"äät":2724,"äär":4190,"ääs":2129,"ää ":10320,"Vuo":2097,"Yhd":3250,"Suo":9477,"Sta":1658,"Sen":4830,"äht":3405,"ähd":1555,"ähe":3536,"ähi":1621,"Ruo":1578,"änn":2773,"äns":2545,"änt":2999,"Sak":1930,"ämi":2674,"äni":2131,"äjä":6711,"äne":3700,"Se ":11672,"äs ":1575,"ämä":7782,"äos":2009,"äka":1964,"äis":10749,"äin":8551,"äiv":1941,"ält":2814,"äli":6173,"älk":2158,"äll":6310,"äks":2395,"äki":2767,"än ":27597,"äve":2052,"ävi":2773,"ärä":1957,"äyt":13595,"äri":6126,"ärj":7258,"ät ":5667,"äsi":4968,"äse":2119,"ärv":3216,"äst":3327,"äss":3767,"ätt":2038,"ävä":6336,"Ven":3558,"Val":1869,"The":3947,"Tur":1761,"ber":2683,"ce ":2305,"bri":1634,"bum":8758,"aka":6269,"ake":7201,"aki":5604,"aji":6912,"ajo":3036,"al ":4672,"aja":18366,"aje":2029,"aih":3713,"aik":14735,"ail":13159,"aim":1862,"ain":38398,"ais":49469,"ait":14001,"aiv":4010,"aid":2730,"ahm":1834,"aht":3966,"ahd":4670,"aha":3293,"anu":4220,"ano":5725,"ann":13433,"anm":2516,"ant":14494,"ans":18785,"ane":3551,"ang":3334,"anh":3197,"ani":13808,"anj":1817,"ank":6571,"ana":14302,"anc":1814,"and":6936,"amm":4533,"amo":2056,"amp":3132,"ami":8254,"ame":4479,"ama":17130,"alv":4153,"alu":15472,"alt":16363,"alo":7802,"alm":6123,"all":43431,"alk":10647,"ali":18156,"ale":9704,"ala":40225,"alb":7609,"an ":106763,"aks":12179,"aku":8003,"akt":1737,"ako":3232,"akk":5053,"ae ":1542,"aaj":5209,"aak":5208,"aai":4807,"aan":46070,"aal":13155,"aam":4542,"aas":4039,"aar":7457,"aav":4444,"aat":11110,"aa ":30258,"ai ":13611,"ael":1895,"adi":4196,"ack":1788,"ada":3189,"at ":26536,"arh":2305,"are":4235,"ard":3175,"ara":7612,"arp":1607,"aro":2059,"arm":2145,"arl":1651,"ark":11086,"arj":11397,"ari":15453,"aru":3626,"arv":4834,"arr":3315,"ars":2606,"art":6849,"asa":4886,"asi":8287,"ase":5553,"aso":1854,"ask":4888,"ar ":2391,"apa":10451,"ape":2308,"api":2097,"app":6574,"apu":2355,"as ":6418,"ava":19468,"aut":8391,"avo":2634,"avi":5086,"avu":2827,"ata":7640,"asu":5324,"ast":26494,"ass":30398,"asv":4804,"atk":3390,"ato":4227,"ate":6056,"ati":12192,"att":13242,"ats":1884,"atu":3704,"aul":4866,"aup":10709,"aur":1674,"aus":6854,"aud":2731,"auh":2050,"auk":3794,"jel":4529,"jen":8431,"jes":7295,"jet":1556,"ji ":2297,"jaa":5054,"jat":4403,"jas":4641,"jal":7569,"jak":4713,"jan":11104,"jai":10124,"jou":4303,"joh":4795,"joe":2427,"jol":3356,"jok":24849,"joi":16729,"jon":9520,"jot":6559,"jos":6649,"jia":1786,"itk":2090,"ito":6246,"itu":9149,"itt":43813,"its":13426,"ity":9715,"isk":6128,"ism":2973,"isl":1855,"iso":4721,"isp":1964,"iss":30541,"isu":9410,"ist":90841,"isy":1690,"ita":17408,"ite":16460,"iti":5058,"ivo":1659,"ivu":2547,"isä":6664,"iva":11789,"ivi":6748,"ive":3576,"ipp":2283,"ipu":1889,"ilä":2764,"ilö":2966,"is ":8294,"ion":10934,"iop":2837,"ios":4401,"iot":1986,"ikä":1979,"ioi":3973,"iol":2452,"ipa":1808,"ipe":1630,"irt":2993,"iro":1572,"irk":4448,"iri":6484,"irj":10801,"isi":26753,"ise":59209,"isa":5742,"ire":2058,"inä":3666,"ira":5520,"it ":5916,"ja ":90836,"itä":10709,"ivä":3912,"kii":3407,"kik":3240,"kij":2090,"kim":2775,"kil":12288,"kia":4204,"kie":9439,"kiv":2180,"kin":14280,"kio":1639,"kir":13427,"kis":5163,"kit":6283,"ki ":12342,"kea":4096,"kee":4463,"keh":4191,"kei":5212,"kem":2757,"kel":5236,"ken":9401,"kes":10316,"ker":7027,"keu":3655,"ket":2834,"kev":1653,"ke ":2125,"kre":1700,"ksa":4741,"kse":20115,"ku ":3951,"kot":3952,"kou":3762,"kos":5488,"kor":5268,"koo":4082,"kon":11074,"kom":3392,"kol":7289,"kok":7259,"koj":1821,"koi":14746,"koh":3422,"koe":1980,"kku":3944,"kke":3841,"kka":15414,"kko":10268,"kki":11721,"ko ":7803,"jul":11521,"kat":4283,"kau":16540,"kar":4289,"kas":12031,"kap":6608,"kan":20757,"kal":9712,"kam":1710,"kak":4624,"kah":2946,"kai":21386,"kaa":14665,"ka ":40830,"ha ":1581,"han":4990,"hai":2275,"hal":6945,"har":4587,"hah":1631,"haa":1766,"he ":6389,"hdo":2167,"hdy":6548,"hde":8081,"hdi":4168,"hel":4032,"hei":7575,"hee":1552,"het":1965,"her":2750,"hen":6953,"hem":2103,"hin":8148,"his":2803,"hit":5065,"hja":5248,"hje":3366,"hjo":5005,"gle":2192,"gla":2321,"gra":1797,"ial":8370,"ian":13283,"ias":7499,"ic ":1670,"iaa":4991,"ia ":28344,"iet":9042,"iel":9758,"iem":2939,"ien":20960,"ier":2897,"ies":3794,"ied":2257,"ieh":1857,"iek":1604,"eä ":1619,"ich":1751,"ie ":2310,"ica":1971,"idi":2094,"ide":16311,"ida":3705,"iid":1915,"iik":12079,"iih":2036,"iin":34905,"iil":1871,"iim":1601,"iis":3357,"iir":7370,"iip":1650,"iiv":3159,"iit":11463,"ija":15115,"ijo":3212,"ika":17282,"ii ":2634,"igh":2049,"ihe":3769,"iha":1816,"ihm":2273,"ihi":4976,"iht":1902,"imo":4420,"imm":10403,"imp":1982,"ime":14110,"imi":21669,"ind":1904,"ina":17571,"imu":3688,"inn":9244,"ino":7309,"int":21117,"ins":3814,"ine":62207,"ijä":3525,"ing":10847,"ini":7961,"ink":6694,"ioa":2138,"inu":2993,"inv":2235,"iko":8699,"ikk":20680,"iki":6034,"ike":7607,"ila":13660,"in ":123439,"iku":7225,"iks":5910,"ilp":4235,"ilo":6040,"ill":27290,"ilm":11405,"ilj":1888,"ili":8485,"ile":2304,"ima":7267,"io ":5938,"ilt":4359,"ilu":7728,"hmä":2450,"hol":1937,"hon":2485,"hoi":2109,"hmi":2965,"hmo":1850,"hty":9392,"htu":3085,"hto":2163,"hti":9794,"hte":8765,"hta":4518,"htä":1902,"huo":1796,"hum":3543,"hyv":1693,"etä":4989,"evä":1690,"eta":9667,"ete":9398,"eti":3164,"est":38413,"ess":23541,"eud":1841,"eto":4814,"etr":5809,"ets":2385,"ett":36313,"etu":5501,"ety":2077,"erä":4263,"eve":1697,"eva":7898,"evi":4106,"euv":2196,"eut":3244,"eur":5135,"eus":2033,"esä":2111,"ey ":1541,"evy":4434,"elä":9755,"er ":8358,"eor":2097,"eol":1627,"emä":1894,"es ":8151,"erk":7307,"eri":25262,"erg":2239,"erh":1625,"enä":7730,"ere":5494,"era":4710,"et ":16346,"esk":9110,"esi":14483,"ese":2664,"erv":1561,"eru":10609,"err":4242,"ert":7186,"ers":4983,"ern":2916,"erm":2674,"ero":5460,"eki":2975,"ekk":1967,"ekn":1637,"eko":1843,"eks":9087,"ekt":2330,"en ":216998,"ela":5301,"ele":9915,"eli":28185,"elj":2641,"elm":10985,"ell":36182,"elo":8595,"elu":6803,"els":2931,"elt":10887,"ely":1692,"ema":7235,"eme":2562,"emm":5417,"emo":1546,"emi":5347,"emp":2813,"ene":8165,"eng":2981,"ena":5955,"end":1729,"enn":9973,"enk":6752,"eni":7989,"ens":15471,"ent":18077,"ekä":5887,"ehd":2924,"ehi":4544,"eht":6057,"eis":18168,"eim":3841,"eil":7078,"ein":8482,"eik":4720,"eid":4335,"eja":2716,"el ":2656,"eit":8348,"öss":1852,"gis":3146,"gin":6890,"gia":4299,"ght":1865,"ös ":7736,"gen":2252,"ger":1575,"ön ":5342,"gas":2278,"gan":2134,"fri":1576,"for":2137,"fil":1743,"da ":1964,"de ":3246,"daa":2346,"dal":1725,"das":2350,"dan":3795,"ck ":3175,"ed ":1799,"ean":1991,"eal":1781,"eat":2428,"ea ":2811,"ei ":3830,"een":37770,"eel":9070,"ees":6176,"eet":6503,"edi":2493,"ede":4270,"edu":2200,"edo":1603,"ee ":9590,"dys":6375,"dus":2698,"don":2736,"dol":2034,"dos":3976,"dia":2516,"der":2863,"des":8657,"det":3714,"del":8280,"dek":2312,"den":24947,"dem":1603,"di ":1662,"din":5138,"dio":5021,"dis":6234,"rhe":3181,"rha":2376,"näj":3467,"näk":2091,"näi":1855,"ri ":14183,"rgi":2577,"ret":2238,"res":3703,"nä ":7166,"rea":2509,"ree":2885,"rei":4803,"ren":6049,"rel":2990,"rdi":1756,"re ":2848,"rd ":1882,"ras":4899,"rat":4668,"rau":3008,"raj":2533,"rai":2122,"ran":13051,"ral":4384,"rak":6377,"raa":6078,"rad":3325,"rs ":1794,"ros":3755,"rot":3070,"rom":2805,"ron":4039,"roo":4243,"rov":1686,"roc":2312,"roi":1996,"rna":1810,"rne":1656,"rni":2033,"ro ":2723,"rma":3198,"rme":1785,"riä":3343,"rmi":2119,"rko":8356,"rki":6693,"rkk":7003,"rke":4145,"rka":1913,"rjo":6361,"rja":16764,"rje":7814,"rio":2073,"rit":12816,"ris":12311,"rii":3347,"ril":5111,"rik":9617,"rin":19152,"rim":2324,"ria":8783,"ric":1741,"rie":2399,"näy":2474,"ruo":2205,"run":2801,"ruu":3504,"rus":12209,"rva":2385,"rvi":3707,"rve":3560,"rvo":2466,"ry ":2410,"rsi":3925,"rta":4520,"rto":3696,"rte":2990,"rti":3436,"rtt":1850,"rt ":1812,"rro":1797,"rre":2394,"rra":3831,"saa":11689,"sai":4605,"sak":3935,"sal":8568,"sam":4120,"san":10417,"sat":2386,"sas":5453,"sar":9940,"sav":3217,"sa ":88364,"ryh":2885,"si ":33316,"siv":2877,"sie":3768,"sia":14727,"sit":13812,"sis":15595,"sin":22898,"sio":5975,"sil":7537,"sim":10671,"sij":10631,"sik":6439,"sii":15427,"se ":7339,"sev":3309,"ser":1954,"ses":19353,"set":8009,"seu":4708,"sei":5713,"see":16044,"sen":44413,"sem":6469,"sel":8992,"sek":9571,"spa":2113,"sot":3315,"sol":2285,"son":3580,"sop":1591,"sos":3124,"sod":2739,"sof":1780,"soi":4463,"st ":1959,"sli":1829,"sla":3001,"ski":7594,"sko":4863,"sku":10246,"ska":6718,"ske":7078,"sma":1538,"siä":2875,"smi":2339,"ssä":20775,"stä":20839,"stö":6003,"syn":3354,"syy":2751,"sse":2037,"ssa":81782,"sso":2391,"ssi":7227,"ste":34759,"sta":74007,"sto":18448,"sti":27582,"stu":19044,"str":3547,"sty":7941,"suk":6559,"suo":12490,"suu":15921,"sut":2418,"sva":8410,"svi":2330,"tai":23079,"taj":9294,"tak":4197,"tal":15729,"taa":33105,"tav":11283,"tau":2602,"tat":3910,"tas":5911,"tar":15686,"tap":4732,"tan":13725,"tam":13647,"te ":4200,"ta ":96137,"pa ":1685,"par":4524,"paa":2641,"pah":2670,"pak":1785,"pal":14678,"pai":9231,"pan":6844,"läp":1604,"län":3940,"läm":2523,"läi":5398,"läh":6121,"pi ":2782,"lä ":21592,"per":21853,"pet":1794,"pel":8616,"lää":3392,"pia":3590,"pid":1828,"pie":3232,"pii":5873,"pil":2184,"pin":5017,"pis":5110,"pit":5090,"por":1632,"poi":2560,"poh":4180,"pol":4180,"ppu":2417,"ppi":4768,"ppa":7267,"ppe":1921,"pro":3960,"pur":1783,"pus":1680,"pun":10901,"puo":7107,"pul":1657,"puh":2589,"px ":2372,"puu":2017,"mä ":7257,"mäi":6703,"män":3876,"mäs":1650,"mää":5076,"ra ":5712,"ngi":7939,"ngl":4558,"ni ":6341,"nge":2486,"nga":3203,"jän":3177,"jäl":4462,"jäs":2729,"jär":9906,"nha":2067,"nei":4215,"nel":5383,"nen":63104,"ner":2732,"net":11819,"nes":5079,"neu":1692,"ng ":3916,"nee":7398,"jä ":5912,"nce":1684,"ne ":6951,"ndo":1777,"ndi":2475,"nde":2171,"nda":1665,"nak":1691,"nal":6875,"nan":9972,"nai":6761,"naa":4824,"nd ":4059,"nat":3023,"nas":6250,"na ":39872,"myö":9305,"iö ":2528,"ntä":3484,"nsä":4958,"nva":1615,"num":1758,"nus":3629,"nut":8387,"nty":5978,"nto":10581,"ntu":4377,"ntt":4440,"nti":13349,"nta":21023,"nte":11336,"nso":2855,"nss":4946,"nse":1616,"nsi":11891,"nsk":3706,"nsa":12409,"nnä":1593,"nt ":1924,"ns ":1897,"nol":2730,"noi":8508,"nom":2361,"non":3636,"not":1919,"nos":4123,"nne":16039,"nna":31714,"nno":6361,"nni":12231,"nnu":4777,"nme":1667,"nma":1873,"jää":2989,"nla":2791,"no ":2124,"nke":2631,"nki":13461,"nka":13056,"nko":2285,"nja":2100,"nii":4930,"nie":2772,"nia":6492,"nis":11071,"nit":5273,"nim":13793,"nin":16562,"nik":3354,"nil":2835,"ogi":4118,"oi ":3679,"oht":5278,"käs":5110,"kär":1779,"ohj":13237,"oho":2246,"oiv":1619,"ois":23808,"oir":1655,"oit":25419,"oin":13137,"oik":5181,"oim":14211,"oil":4642,"oih":1702,"oid":5498,"käy":11169,"oje":6434,"oja":5307,"ock":3160,"ode":9371,"odi":1977,"odo":4669,"of ":2718,"oda":3354,"oel":2434,"oen":2647,"ofi":1682,"kä ":7989,"oa ":4765,"oal":3299,"nyk":2547,"nyt":4401,"nvä":1760,"otu":2486,"otk":3192,"oti":5562,"ote":5798,"ott":10457,"ots":2991,"oto":4571,"ost":14630,"ota":8496,"osi":11836,"osk":3907,"ose":3031,"oss":12304,"oso":3047,"ovi":3135,"ova":10956,"ove":2498,"ouk":4542,"oul":4682,"oun":1887,"ous":2951,"out":1762,"opp":4159,"opi":5727,"ope":4607,"opa":3651,"os ":5287,"opu":2089,"oon":7289,"ool":1601,"oom":1672,"or ":1961,"oot":2091,"oos":2166,"oop":3454,"ork":3647,"orm":3104,"orn":1628,"oro":2190,"ord":2513,"ore":3159,"org":2171,"ori":12794,"osa":15087,"ort":3782,"ot ":3256,"ora":3303,"ola":6203,"on ":146201,"oli":32153,"oll":16426,"ole":10977,"kää":1980,"olt":1744,"olm":4749,"olo":5842,"oly":2026,"olu":4176,"oka":27584,"okk":2554,"oki":3937,"oke":2183,"oks":6588,"oko":8846,"oku":9166,"ona":6281,"one":7607,"ong":1915,"oni":10795,"onk":9689,"onn":20892,"ono":2022,"ons":4191,"ont":4754,"oma":18393,"ome":15724,"omi":7452,"omu":1601,"la ":45674,"le ":23672,"laa":8510,"lah":2594,"laj":7083,"lai":41589,"lal":1802,"lak":2624,"lan":15670,"lam":1915,"lat":4073,"las":9219,"lau":6037,"lbu":8824,"kuv":12915,"kuu":20315,"kut":5564,"kus":5968,"kup":2448,"kuo":2056,"kun":18716,"kul":5464,"kuk":1991,"ksi":34515,"kso":2000,"kue":1696,"kui":3945,"kti":3367,"kyi":1827,"kyl":5052,"llä":17871,"lok":9030,"lon":3666,"lom":4409,"lop":2359,"log":4285,"loi":6423,"lpa":4160,"los":4127,"lot":1540,"lou":2403,"ljä":2124,"lmi":8971,"lme":5378,"lma":14737,"lti":5458,"ltt":2378,"lue":13574,"lsi":2865,"lta":20774,"lu ":5034,"lmä":3813,"hän":3427,"li ":38792,"lev":10595,"les":3513,"let":2489,"ler":1630,"lem":3429,"len":7655,"lek":2031,"lel":2977,"lei":7201,"leh":3152,"lee":7823,"llu":3725,"lo ":3012,"lla":45545,"lle":23098,"lli":38165,"llo":8003,"lko":3906,"lku":3582,"lka":14232,"lke":3142,"lki":3385,"ljo":1577,"lje":1833,"ll ":2046,"lit":6604,"lis":35039,"lip":2086,"lio":3340,"lin":25643,"lim":2511,"liv":3460,"lia":6855,"lik":3266,"lil":3081,"lii":12989,"lij":5839,"lie":2208,"ma ":15016,"mb ":3008,"maa":25280,"mah":1595,"mai":6076,"mak":3049,"mar":3413,"mas":6492,"mal":15003,"man":17338,"mat":9985,"me ":2801,"mee":1671,"met":10390,"mes":9381,"mer":13701,"mel":4799,"men":18334,"mei":2490,"iä ":10723,"lve":3316,"lvi":1540,"luk":6577,"lui":2536,"luo":7753,"lun":5262,"lut":6301,"lus":6284,"luv":9141,"luu":4990,"ltä":6950,"lyh":1732,"lym":1752,"mpi":5319,"moo":2276,"mon":6257,"mpa":1715,"mmä":7757,"mpä":2564,"mua":1679,"mus":7282,"mut":3853,"muu":6734,"mui":3639,"muk":6397,"muo":5752,"mi ":13252,"min":18710,"mil":5082,"mis":25017,"mit":6945,"mia":4076,"mie":6732,"mik":3016,"mii":3025,"mo ":3241,"mmi":7415,"mma":5671,"mme":2557,"väl":6253,"vä ":5493,"vää":1811,"vän":2110,"vät":4019,"yvi":1744,"yty":2014,"ytt":9230,"yte":5446,"ysv":6099,"yst":4193,"ysi":1754,"yri":2968,"yt ":5088,"ymä":1567,"ys ":5219,"ylä":5863,"yny":2515,"yvä":4334,"yyl":1824,"yys":2076,"yyp":2141,"yyt":1690,"yy ":2993,"ye ":3730,"yde":2513,"yee":3728,"yks":11428,"yky":3192,"yn ":4172,"yle":4657,"yli":5834,"yll":2070,"ymi":2202,"ymp":3885,"yne":1918,"ynt":3210,"yi ":2699,"yhm":2893,"yhd":7827,"yht":15566,"yis":3850,"tön":2023,"tös":1710,"tö ":2585,"täm":6823,"tän":1885,"täh":1876,"täj":3398,"tä ":34022,"tää":13926,"täy":1596,"tär":1635,"täv":7438,"sää":2844,"sä ":25127,"säl":3359,"säk":2119,"säv":1927,"vuo":32425,"vun":2514,"vul":3123,"vy ":2051,"via":2642,"vio":1556,"vir":4221,"vil":2731,"vin":6040,"vii":6538,"vie":4226,"vit":3912,"vis":7340,"voi":10127,"von":1808,"vos":2863,"räi":2223,"vi ":2985,"ver":6645,"ves":2270,"ven":5385,"vel":7217,"ve ":1928,"val":26751,"vak":1862,"van":11048,"vap":1548,"var":8953,"vat":20575,"vas":8886,"vaa":7863,"vai":7839,"va ":18614,"uuk":1525,"uun":9740,"uul":10457,"uud":5759,"uus":11398,"uur":9061,"uut":11944,"uvi":3760,"uvo":2424,"uva":18498,"uvu":5385,"usl":2474,"usk":5912,"usi":8604,"use":5829,"usa":2048,"uu ":9094,"ust":26922,"uss":6964,"utk":3663,"uti":2698,"ute":5204,"uta":9377,"utt":14676,"uts":1830,"utu":7065,"uto":3590,"us ":19751,"ut ":13926,"ura":6710,"ure":2812,"urh":2266,"uri":10072,"uro":4160,"uru":1961,"uod":11474,"uon":20919,"uol":10441,"uom":19569,"uok":4826,"uot":13021,"uor":5597,"uos":6389,"upe":2549,"upu":10042,"umi":12508,"uma":5258,"umb":3567,"ume":3010,"unt":10189,"unu":3435,"unk":6506,"uni":5890,"unn":17239,"und":1617,"una":4707,"ung":5391,"une":2125,"uks":15716,"uku":7377,"uko":2518,"ukk":6046,"uki":2552,"uke":1610,"um ":2055,"uka":8994,"ulu":17346,"ult":3742,"ulo":2329,"ull":6128,"ulk":14188,"uli":3634,"ule":2174,"ula":6501,"un ":17537,"uin":4870,"uis":6404,"uht":1985,"uhu":1855,"uje":1776,"uit":1991,"uja":2763,"ui ":2881,"uha":2389,"ude":10287,"udi":3267,"ue ":4574,"uet":2172,"uee":7376,"työ":3659,"ua ":4041,"uas":1855,"tyv":3209,"tyy":6164,"tye":7861,"tyi":5279,"tyk":3846,"tym":2979,"tyn":4512,"tys":6857,"ty ":6437,"tur":3509,"tus":10527,"tut":5144,"tuu":8595,"tuv":3012,"tuj":1880,"tui":4969,"tul":5059,"tuk":7469,"tun":13048,"tum":4274,"tuo":5296,"tua":2491,"tud":2510,"ttö":2890,"ttä":18426,"tra":3772,"tri":7061,"tro":3179,"tu ":15401,"tsa":2112,"tse":11636,"tsi":5021,"tsu":1775,"tta":37250,"tte":15326,"tti":29985,"tto":7420,"ttu":17456,"tty":9077,"to ":10851,"tiö":3363,"toj":3671,"toi":21414,"tkä":1786,"toa":3897,"tos":4171,"tot":2445,"tom":3430,"ton":10160,"tok":5162,"tol":5536,"tor":8049,"too":2936,"top":1727,"tii":20914,"til":10225,"tik":4482,"tie":14185,"tit":2814,"tis":9101,"tin":13829,"tim":2695,"tio":13178,"thu":3317,"tia":5511,"tiv":2672,"tki":4098,"tka":4817,"pää":9932,"tem":2906,"ten":19694,"teo":3855,"tei":13515,"tek":5492,"tel":25974,"tee":16061,"teh":4545,"th ":1573,"tet":23609,"tes":1638,"ter":13778,"ti ":27214,"pär":2295,"the":3039,"päi":2568,"yön":1725,"yös":8569,"yöh":1625,"yä ":1547},"n_words":[15184556,16912812,13033049],"name":"fi"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/fr b/nlp_resource_data/langdetect/profiles/fr

new file mode 100755 (executable)

index 0000000..1d6d3ba
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/fr
@@ -0,0 +1 @@
+{"freq":{"D":116102,"E":120408,"F":121384,"G":111406,"A":238769,"B":169565,"C":285959,"L":345504,"M":204991,"N":98243,"O":65813,"H":89479,"I":164982,"J":77783,"K":45111,"U":61602,"T":117987,"W":42348,"V":73826,"Q":14589,"P":196230,"S":238878,"R":128546,"Y":18365,"X":15982,"Z":13917,"f":625832,"g":841835,"d":2804209,"e":9206578,"b":583197,"c":1872040,"a":5160230,"n":5070939,"o":3525396,"l":3535844,"m":1717377,"j":148113,"k":150020,"h":692514,"i":4746975,"w":64787,"v":659313,"u":3519294,"t":4403208,"s":4479915,"r":4208721,"q":428703,"p":1557910,"z":87206,"y":309342,"x":204437,"É":44421,"ï":13787,"î":22540,"ê":38749,"é":1751958,"è":218696,"ç":101170,"â":19710,"à":277569,"û":11801,"ù":7682,"ô":30698,"œ":8733," l":1232220," m":320276," n":242807," o":228420," h":94558," i":171950," j":98827," k":16664," d":1997342," e":1227865," f":345437," g":129922," a":696872," b":124278," c":594010," y":7295," u":524523," t":219372," w":8382," v":133967," q":138426," p":728591," s":616330," r":286749," J":75893," K":42648," H":85423," I":133876," N":84973," O":54606," L":335414," M":194304," B":161567," C":266111," A":214898," F":109790," G":105282," D":105238," E":106937," Z":13187," Y":17633," X":12755," S":214333," R":117248," Q":13527," P":183259," W":40332," V":62074," U":57563," T":104796," à":274935," î":9932," é":209998," ê":8160," É":44088,"A ":15041,"Da":22440,"Cl":11868,"Co":65041,"Cr":12941,"Ce":29429,"Ch":51129,"Ci":6652,"Du":7894,"Do":13538,"De":21502,"Di":17162,"Fe":8715,"Fa":10235,"Eu":11691,"Es":11694,"En":18617,"El":32899,"Ge":14692,"Ga":20211,"I ":15646,"Fr":44421,"Fo":14809,"Fl":7090,"Fi":9942,"C ":21532,"Au":19631,"Ar":26711,"At":6889,"As":11086,"D ":7702,"Ba":35723,"Am":14264,"An":31376,"Al":37486,"Bu":10594,"Br":27743,"Ca":55063,"Bi":10547,"Be":29116,"Bo":31176,"Bl":7237,"Le":129356,"Li":24542,"La":91448,"Lu":9746,"Lo":35045,"Me":21247,"Mi":27542,"Ma":80328,"Mu":9199,"Mo":39904,"Ni":11148,"Ne":15700,"Na":15519,"No":31277,"On":6946,"Gi":8289,"Gr":22075,"Go":10803,"Gu":15739,"Ha":29460,"He":19781,"II":12760,"Hi":8034,"Ho":13597,"In":22404,"Il":64459,"Is":6832,"It":7525,"Ja":19690,"L ":40055,"Je":19939,"Jo":19144,"Ju":9216,"Ka":11065,"Un":43594,"Tr":15916,"To":17150,"Th":22226,"Ti":6718,"Te":13283,"Ta":12863,"V ":9181,"St":21014,"Su":20295,"Wi":11355,"Wa":11101,"We":6825,"Vi":21333,"Ré":14676,"Va":15441,"Ve":10793,"Pr":25219,"S ":10997,"Pe":15558,"Pa":62058,"Pl":7693,"Po":24431,"Pi":18433,"Ph":7701,"Ou":6784,"Or":12154,"Se":26414,"Sc":10127,"Si":15134,"Sh":8197,"Sp":6886,"So":23717,"Ru":6655,"Sa":56793,"Re":19635,"Ri":11472,"Ro":36071,"Qu":12001,"Ra":14786,"b ":23149,"a ":715548,"Yo":8948,"i ":246446,"ge":153192,"ga":79217,"bé":13640,"fl":14307,"ff":44067,"fi":98156,"fr":118819,"fu":33433,"fo":104257,"he":135815,"ha":144005,"gn":87721,"cé":45974,"cè":9053,"gl":38751,"gi":139568,"gh":16776,"gu":62416,"gr":93539,"go":42919,"du":266692,"g ":46447,"ea":84524,"eb":15026,"ec":142148,"ed":28114,"de":1288806,"di":214915,"dm":11618,"do":95110,"ds":13995,"dr":52545,"ew":13282,"ex":53989,"eu":311458,"ev":34640,"ey":24158,"ez":12097,"fa":75922,"h ":43257,"fe":44423,"eg":22246,"ef":23663,"ee":14949,"el":261273,"ei":65911,"ep":62933,"eo":13735,"en":1013332,"em":298038,"et":487438,"aï":8404,"es":1468583,"aî":9959,"er":561408,"ca":174843,"e ":4165476,"by":10945,"bs":8710,"br":109877,"bu":41947,"bo":48835,"bl":80107,"bi":56922,"be":65791,"da":250547,"f ":44719,"cy":11950,"cu":58711,"ct":159489,"cs":7871,"cq":7986,"cr":84742,"co":398094,"ck":30588,"cl":49217,"ci":192973,"ch":228563,"ce":285004,"cc":27518,"c ":86602,"az":14616,"ay":48306,"ba":99079,"d ":356687,"at":402587,"as":164469,"ar":580946,"aq":13150,"ax":8225,"av":102326,"au":313806,"ak":16011,"al":437033,"ai":491839,"aj":9782,"ao":15479,"ap":112014,"am":169689,"an":984305,"ac":155600,"ad":94574,"ab":78438,"ag":129146,"ah":12966,"ae":24602,"af":19038,"nu":48492,"nt":781378,"ns":405957,"nr":20379,"nq":10241,"no":177623,"nn":194465,"nz":7383,"ny":16039,"nv":29268,"oe":7051,"of":34594,"oc":112798,"od":60253,"oa":9189,"ob":47107,"om":320717,"on":978231,"ok":8561,"ol":182032,"oi":182944,"og":68867,"oh":10520,"ot":93297,"os":119510,"ov":56208,"ou":495961,"op":94277,"oo":30133,"or":397296,"oq":6742,"r ":618020,"ox":6981,"ow":11940,"oy":26184,"pe":191299,"pa":336824,"lè":14938,"pl":106081,"lé":66665,"po":236682,"ph":72167,"pi":70246,"lo":174519,"hé":40769,"lm":23015,"hè":8383,"ll":348027,"ls":35163,"lp":13659,"lv":7890,"lu":105730,"lt":35681,"ly":20798,"o ":123736,"ma":245390,"mb":101583,"me":421982,"iè":67396,"mi":181182,"mm":149391,"ié":40043,"mp":121199,"mo":139179,"mt":11044,"ms":10324,"mu":103041,"my":7264,"p ":21047,"na":254840,"nc":212957,"nd":247872,"ne":645258,"nf":26140,"ng":123325,"ni":252369,"nk":9186,"ju":35235,"jo":42449,"fé":31064,"ki":18400,"ke":23430,"ka":17710,"m ":101180,"ko":10158,"gé":41926,"li":366432,"le":1033424,"ld":17364,"lg":18291,"lf":6672,"la":653743,"lb":19702,"n ":1301691,"hr":18002,"ht":13842,"hu":32067,"hi":111052,"hn":14386,"ho":82392,"hl":7215,"dé":209539,"id":89273,"ic":228813,"ib":37956,"ia":126735,"ig":111904,"if":68146,"ie":498651,"hy":15296,"k ":41221,"iq":189141,"ir":210609,"is":657338,"it":513778,"iu":11403,"iv":116504,"ix":21397,"ik":10626,"il":316521,"im":87035,"in":595401,"io":414857,"ip":60451,"je":33885,"iz":8646,"l ":543603,"ja":25992,"xi":22025,"tè":15940,"té":190035,"xp":12895,"xt":10929,"z ":20913,"xa":6853,"xe":16992,"wi":9300,"sé":88528,"rô":7192,"y ":99277,"wa":16319,"we":7882,"rè":35139,"ré":242093,"vi":188474,"vu":6788,"vr":43893,"rê":8320,"vo":57579,"ux":99749,"uv":71595,"ve":234012,"va":93360,"x ":120860,"ui":230155,"uj":8246,"ul":117513,"ue":407659,"uf":8137,"ug":28998,"ur":515467,"us":227928,"ut":191898,"um":73719,"un":620155,"up":64955,"ty":19621,"tu":167178,"tt":77237,"ub":50581,"ua":51699,"ud":59261,"uc":57765,"w ":13938,"to":173702,"pé":53976,"tl":8663,"pè":21910,"ts":102393,"tr":321129,"te":602785,"ti":612679,"th":96741,"v ":6801,"où":7384,"tb":13422,"tc":11543,"oû":9564,"ta":328001,"su":146804,"ss":210582,"st":753670,"sy":23798,"sl":12189,"sk":12848,"sn":9845,"sm":19680,"sp":78469,"so":222811,"sq":17522,"sc":60065,"se":403227,"sh":19364,"si":325826,"u ":509520,"sa":131590,"rr":83294,"rs":168244,"rt":289803,"ru":65582,"rv":27573,"ry":20802,"rq":9625,"rp":15297,"ro":341033,"rn":84154,"né":177701,"rm":89580,"rl":33141,"rk":16207,"nç":94772,"ri":487470,"rg":66626,"rf":12647,"re":763207,"rd":96680,"rc":70956,"rb":28741,"ra":505286,"t ":1634972,"qu":422607,"mê":10277,"mé":91911,"mè":10378,"s ":1913426,"pt":48826,"pu":65796,"pp":68088,"pr":230460,"ps":19103,"zi":10741,"ze":10497,"za":12596,"zo":10854,"vé":17663,"ye":16656,"yc":12861,"ya":27740,"yt":7956,"ys":37929,"yr":15599,"yp":14150,"yo":8853,"yn":14467,"ué":92385,"ym":16691,"yl":12792,"Ét":26242,"ât":11174,"à ":276969,"éé":15388,"îl":9937,"êm":12225,"êt":20220,"él":51000,"éo":28234,"ép":86639,"ém":44293,"én":55491,"és":107336,"ét":140932,"éq":14455,"ér":173640,"év":51715,"éb":22743,"éa":37956,"éd":70000,"éc":110985,"éf":19719,"ée":230327,"ég":99948,"èm":27722,"èn":11542,"èr":67257,"ès":34878,"èt":16045,"èv":6684,"èg":9962,"èc":26515,"ço":8440,"ça":89329,"é ":372600,"ût":9917,"ù ":7557,"ôt":12300,"œu":7751,"一":9376," Ga":20103," Ge":14539," Fo":14685," Fr":44293," Fi":9743," Fl":7048," Ha":29378," He":19697," Go":10717," Gr":21904," Gu":15637," Gi":8161," Ho":13528," Hi":7850," Je":19786," L ":35977," Ja":19604," Is":6751," It":7499," In":22151," Il":64299," Ka":10962," Jo":19013," Ju":9181," La":90821," Le":128061," Li":24245," Ma":79831," Mi":27364," Me":21128," Lo":34906," Lu":9698," Ne":15538," Na":15371," Ni":11097," Mo":39732," Mu":9105," C ":11416," Am":14164," An":31164," Al":37253," Ba":35535," Au":19508," At":6802," As":10968," Ar":26331," Be":28995," Bi":10366," Bl":7174," Bo":30964," Br":27614," Bu":10535," Ca":54551," Ce":29219," Ch":50964," Cl":11738," Cr":12852," Co":64524," Da":22266," Di":16958," De":21321," Do":13217," Du":7843," El":32823," Es":11656," En":18307," Eu":11642," Fe":8673," Fa":10157," Wi":11260," We":6753," Wa":11001," Yo":8931," a ":54049," Ou":6671," Or":12055," Po":24205," Pl":7593," Pi":18345," Ph":7574," Pe":15479," Pa":61683," No":30970," On":6799," Ra":14631," Qu":11878," Ro":35894," Re":19473," Ri":11439," Pr":25007," Su":20225," St":20131," Ta":12776," Th":22049," Te":13049," Tr":15810," To":17022," Sa":56588," Sh":8106," Si":14799," Sc":9966," Se":26149," So":23336," Sp":6786," Va":15389," Ve":10579," Vi":21221," Ré":14605," Un":43425," ja":19099," l ":229130," je":16653," im":17160," in":90112," il":33523," is":6946," it":13099," jo":28748," fé":16770," ju":33224," ha":21033," gr":51361," go":9168," gu":9190," hi":17321," dé":147001," ho":26370," hu":11315," ne":11993," na":26573," mu":26866," mo":87695," on":19443," oc":20805," of":17687," ob":10074," no":93418," le":471758," li":50278," n ":10043," la":426807," gé":20899," me":31539," mi":31668," ma":94790," lu":10283," lo":32481," af":11272," ag":11352," ab":10657," ac":41475," ad":17435," am":34528," an":80768," ao":8597," ap":47388," ai":15942," al":42510," av":56190," au":168550," ar":49629," at":13572," as":18267," d ":197071," ba":44107," bi":13538," be":11213," bo":15457," bl":6713," bu":7813," br":18993," ca":62413," et":342046," es":474914," en":328723," em":8216," el":14313," fe":12823," fa":58915," eu":10657," ex":33671," fu":26386," fr":103021," fo":71702," fl":7457," fi":46439," ge":16870," ga":14369," cl":20052," co":290856," cr":32388," ce":54595," ch":72895," ci":26401," da":165596," cu":9192," cy":6653," do":48107," dr":11453," de":1120211," di":79077," du":223778," té":10776," ru":15648," sa":43475," se":95411," sc":20238," si":97963," sp":20322," so":138476," qu":138128," ra":22818," re":79730," ri":12843," né":88113," ro":35414," pu":26022," pr":170981," s ":23984," mê":10232," mé":23004," ou":87553," op":9623," or":44807," pe":62711," pa":204808," lé":7304," pl":62779," po":141674," pi":14858," ph":17219," sé":19742," va":14483," ve":21390," vo":19720," vi":65672," ré":111803," ut":13716," un":501687," ta":13187," où":7340," sy":15329," st":19632," su":114473," tr":64495," pé":8718," to":29268," th":29162," ti":14570," te":41930," Ét":26213," à ":274908," êt":8158," év":15420," éq":11336," ét":84332," ép":12565," él":18021," éd":13385," éc":33500," ég":11829," îl":9913,"Eur":7732,"En ":11566,"Ell":27452,"Fra":34835,"For":6793,"II ":8125,"Her":7505,"Hau":10362,"Gra":11197,"Ind":7362,"Il ":58031,"Bar":7002,"Bas":7999,"All":7697,"Ang":8461,"Cal":11943,"Car":9781,"Can":10563,"Ber":8508,"Bel":7820,"Bou":9492,"Dan":8952,"Chi":7925,"Cen":6692,"Cet":9727,"Cha":27535,"Cor":8959,"Com":17296,"Col":7940,"Con":15750,"Cou":6779,"New":7835,"Nor":17044,"Pie":8398,"Par":29868,"Pro":9444,"Ita":6863,"Jea":13129,"Les":29698,"Le ":85946,"Lan":7053,"La ":64716,"Lou":8888,"Loi":7362,"Man":8487,"Mar":33987,"Mon":17275,"Mic":7917,"Sud":6967,"Sta":7499,"Son":7414,"Sai":21598,"San":8752,"Val":6713,"Uni":28327,"Un ":8531,"The":11621,"bit":13396,"bil":9380,"ble":36471,"bli":30327,"bor":11093,"bou":14756,"be ":11661,"ban":14082,"bal":23571,"bat":8684,"bas":21924,"bar":8519,"bec":7102,"ber":20504,"bel":11445,"bie":9676,"ca ":7169,"car":22613,"cat":26927,"can":25116,"cap":8531,"cad":7391,"cal":21842,"cai":32564,"ce ":152975,"bri":16551,"bra":7577,"bre":74429,"bum":11288,"but":12601,"by ":8395,"am ":12290,"al ":83242,"ail":20993,"ain":126816,"air":72297,"ais":165056,"ait":69408,"aie":8332,"acé":7099,"agi":9624,"agn":32293,"ago":7536,"anv":10025,"ano":12803,"ann":37434,"ant":198270,"ans":198981,"ane":14041,"ang":47054,"ani":45704,"ana":30499,"anc":97402,"and":98722,"amm":17042,"amp":23230,"ami":37168,"ame":15452,"amb":9570,"ama":14537,"alt":7552,"alo":12342,"all":56591,"ali":100273,"ale":107350,"ala":20659,"alb":12381,"an ":76640,"abe":9364,"abi":15595,"abl":19961,"abo":9408,"abr":8483,"ae ":16557,"ac ":8751,"aff":8179,"ai ":16674,"aga":7441,"age":55470,"ado":7415,"adm":9823,"adi":21924,"ade":15733,"aci":10019,"ach":16602,"ace":28163,"acc":10028,"ada":13056,"act":43926,"até":6761,"ays":15243,"aya":8788,"aqu":12595,"at ":36684,"arg":12036,"are":21800,"ard":35646,"arc":26378,"arb":8126,"ara":32792,"aro":14440,"arn":8384,"arm":11551,"arl":15187,"anç":91515,"ari":62241,"aru":7211,"arq":6770,"arr":20556,"ars":16927,"art":134713,"au ":118568,"asi":7486,"ase":11574,"ar ":120208,"api":10314,"aph":15548,"apo":12040,"app":42097,"apr":9346,"as ":37163,"amé":27799,"ava":24401,"aux":51164,"aut":60224,"avr":9575,"avo":10413,"avi":13083,"ave":40354,"ay ":10427,"ata":13379,"aoû":8368,"ast":22288,"ass":51465,"atr":15439,"ato":12284,"ate":44406,"ati":192812,"ath":17738,"auc":6672,"att":16852,"ats":23409,"atu":15332,"aul":9810,"aum":7753,"aur":11354,"aus":19732,"aud":9105,"jet":8119,"jeu":19460,"jan":10693,"fév":8788,"fér":12432,"jou":35022,"itr":12118,"ito":10908,"itu":87676,"itt":10859,"its":10569,"ism":13926,"iso":19540,"isp":7420,"iss":50166,"ist":125656,"ita":62348,"ite":57973,"ith":6663,"iti":60924,"isé":36821,"iva":20746,"ix ":16528,"ivi":25932,"ive":55895,"is ":216454,"ion":363871,"iol":6761,"ipa":18735,"ipe":13929,"ir ":34075,"iro":12325,"iné":18867,"iri":8297,"isi":28415,"ise":101298,"isc":8611,"isa":28074,"iqu":188846,"ire":120253,"ira":8690,"irc":8347,"it ":122098,"ité":60635,"gén":18856,"jus":7076,"jui":19529,"ham":23355,"han":29050,"hau":9292,"har":26334,"hab":12359,"he ":52407,"hel":8715,"hef":7684,"hes":11848,"her":19826,"hie":13715,"hin":15050,"hil":13377,"his":19375,"hiq":6944,"dé ":18734,"déb":7151,"déc":35759,"go ":7689,"cée":7075,"céd":16748,"gle":10717,"gli":6716,"gla":15951,"gno":9326,"gni":10059,"gne":50271,"gna":8886,"gou":8451,"gro":28558,"gra":41927,"gre":10583,"gui":7723,"gue":37672,"ial":35445,"ian":23438,"iat":15622,"ic ":12032,"ibl":8749,"ibu":7144,"ia ":21961,"ieu":43579,"iel":22526,"ien":138676,"ier":92906,"ies":16655,"iff":14012,"ifi":22387,"ict":17983,"icu":12669,"ico":11245,"ick":6729,"ici":37411,"ich":23588,"ice":26902,"ie ":169464,"ica":66266,"idi":10544,"ide":31414,"ida":13896,"if ":16206,"il ":58832,"ige":6657,"igh":9785,"igi":21897,"igu":11721,"ign":41717,"idé":15214,"imp":16150,"ime":18408,"imi":12406,"inc":41459,"ind":21942,"ina":36398,"ino":12991,"int":75915,"ins":44722,"inf":11189,"ine":105011,"ing":34736,"ini":40626,"ila":9244,"in ":120797,"ilo":10879,"ill":136561,"ilm":14297,"ili":45030,"ile":18930,"ima":15975,"io ":17855,"ils":11236,"hol":10188,"hom":18219,"hon":13352,"hor":8438,"dév":10462,"dée":10321,"déf":8940,"dém":9364,"dép":53945,"dér":20431,"dés":15740,"ht ":8492,"hum":15199,"ffe":7771,"ffi":11627,"fes":12569,"fer":7877,"fam":26250,"fai":22917,"fac":6862,"ext":9480,"ez ":9334,"exp":11699,"exi":10548,"eta":8136,"ete":7713,"eti":12647,"esp":33176,"est":489584,"ess":57266,"eul":7484,"ett":36757,"ew ":8505,"eve":10111,"eva":7495,"evi":7979,"euv":6803,"eut":10699,"eur":177418,"eus":14865,"eux":38960,"ey ":17748,"er ":177809,"es ":856843,"ept":19543,"epu":14012,"elé":11860,"epr":16261,"erl":8779,"eri":17387,"erg":13600,"ere":10113,"erc":19130,"era":14672,"erb":9918,"et ":392437,"esc":6799,"eu ":31850,"erv":19978,"err":46160,"ert":38461,"ers":78150,"ern":42947,"erm":29874,"ero":7440,"en ":352124,"ela":11282,"ele":8299,"eli":13751,"elg":9644,"ell":93604,"elo":21097,"els":10727,"emb":53506,"ema":21014,"eme":149235,"emi":31587,"emp":25298,"ene":6850,"ena":20675,"end":38663,"enc":42120,"enn":54421,"eni":9389,"enu":8876,"env":9265,"ens":48323,"ent":371396,"enr":18661,"eil":18380,"ein":23896,"eig":7921,"el ":58534,"gis":10364,"giq":10802,"gin":20723,"gio":57044,"gie":22112,"ght":8779,"gen":26739,"ger":14731,"ges":19429,"ge ":72294,"gar":9985,"gal":16426,"gan":17016,"ga ":6882,"fus":7249,"fut":21703,"fra":95189,"fri":7633,"for":45771,"fon":27020,"foo":11436,"foi":12370,"fic":24328,"fil":25221,"fin":20051,"ffé":6845,"da ":14864,"de ":945562,"dai":12767,"dae":8896,"dat":11688,"dan":177922,"cul":27372,"ctu":20091,"ctr":11700,"cto":18088,"cti":59878,"cte":30379,"cré":24660,"cla":14396,"cle":15005,"clu":9348,"ché":8767,"co ":9868,"cié":10273,"con":120854,"col":28490,"com":148564,"cor":20969,"cou":37141,"cs ":7503,"cqu":7392,"ct ":7934,"cra":8267,"cri":32184,"cro":10112,"cci":7074,"cea":6989,"ch ":14828,"cer":17030,"ces":36299,"cet":8987,"cen":24433,"cem":11283,"cel":14093,"cha":54442,"cia":30366,"ck ":14947,"cie":55297,"cid":7429,"che":76684,"chi":31813,"cho":10460,"chn":7132,"cir":7170,"cis":7169,"cit":9428,"cin":16999,"cip":23717,"cke":8035,"ed ":9999,"ec ":39615,"ean":16858,"eau":41781,"eff":6903,"ech":13667,"ef ":8454,"ect":52251,"eco":14578,"dur":7123,"don":36748,"dom":8813,"dou":8007,"ds ":11646,"dmi":10099,"doc":6954,"dui":12311,"duc":16031,"dri":6694,"dra":6827,"dre":21768,"du ":216503,"dro":13106,"dic":10492,"dia":18227,"der":20812,"des":229646,"deu":19332,"dev":7361,"del":7755,"den":24656,"dep":13089,"di ":9657,"do ":8222,"div":14424,"din":10138,"dio":11927,"dir":13444,"dis":29420,"dit":31296,"die":29137,"dif":14101,"rga":14359,"ri ":10992,"rgi":6778,"rge":19992,"ret":19172,"res":111968,"reu":15390,"rg ":10612,"rea":7335,"rec":29178,"reg":9663,"rem":41279,"ren":49926,"rel":19572,"rer":6909,"rep":18635,"rdi":12299,"rde":11104,"re ":401675,"rco":8273,"rch":25796,"rce":14227,"rd ":49795,"rap":22951,"ras":10406,"rat":61280,"rav":14677,"rbe":7330,"rai":37514,"rag":12671,"ran":191224,"ram":14978,"ral":42593,"rab":8579,"rad":17757,"rac":18041,"rs ":108005,"ros":12152,"rot":13239,"rom":20207,"ron":45128,"rop":30358,"rou":56658,"rov":25371,"rod":19072,"roc":18144,"roi":33087,"rol":7836,"rof":11414,"rog":8618,"rna":26829,"rne":26551,"rni":14539,"riè":7237,"nér":15715,"ro ":12314,"rma":27965,"née":51678,"rme":33077,"rmi":8879,"rle":10727,"rla":9260,"nça":87637,"né ":83238,"rip":8171,"rio":12752,"riq":23356,"rit":49518,"ris":61412,"riv":19174,"rig":30515,"ril":14567,"rin":32694,"rim":9133,"ria":17879,"rib":8141,"ric":64397,"rid":9964,"rie":84165,"rk ":9714,"ruc":9646,"rus":10779,"rvi":9893,"rve":7324,"ry ":13884,"rsi":16639,"rso":15966,"rse":12239,"rta":21499,"rto":8213,"rte":72339,"rth":9356,"rti":82832,"rts":8395,"rtu":7199,"rmé":10475,"rt ":67054,"rqu":9541,"rro":9520,"rri":15769,"rre":35748,"rra":13162,"sac":6821,"sai":15869,"san":31853,"sat":19662,"sa ":20364,"si ":23508,"sie":23358,"sid":13927,"sic":8539,"sit":96579,"sis":11253,"siq":13014,"sin":20761,"sio":46892,"sil":9278,"sig":20886,"scr":8195,"se ":185840,"sca":7156,"sci":11470,"sco":9152,"ser":33436,"ses":32384,"seu":18614,"sea":8788,"sei":15724,"sec":11236,"sep":15495,"sen":31564,"sem":23920,"sel":11350,"spo":15398,"spe":6813,"spa":15911,"sou":38682,"sol":8864,"son":104745,"sor":19583,"soi":7516,"soc":22585,"st ":465410,"squ":17455,"sla":8458,"siè":10850,"sme":14217,"stè":10526,"sys":7499,"où ":7381,"sse":73979,"ssa":25709,"sso":20070,"ssi":65158,"ssu":9524,"ste":86262,"sta":32395,"spé":11204,"sto":25947,"sti":51286,"spè":18097,"str":64918,"sud":12096,"sui":13416,"sul":7775,"sup":9440,"sur":74845,"tai":73243,"tal":50318,"tag":11022,"tab":9930,"oût":9507,"tba":12570,"tat":51850,"tar":13094,"tan":65935,"tam":9043,"tch":9033,"te ":229351,"ta ":14870,"pe ":50543,"par":234438,"pat":9116,"pas":16097,"pay":9300,"pag":21493,"pal":18293,"pan":7834,"phe":9797,"pha":8008,"pho":13907,"phi":21732,"peu":15687,"pen":18365,"per":42656,"pet":8398,"pes":12928,"pel":20669,"pla":26028,"pli":9068,"ple":16401,"plo":8634,"plu":41224,"lé ":12711,"phy":6923,"pio":10675,"pir":7633,"pit":8267,"por":43095,"pop":8694,"pou":69236,"pos":40507,"poi":8423,"pon":18257,"pol":30878,"ps ":10703,"ppo":8721,"ppa":15829,"ppe":26475,"lév":6751,"lég":7457,"lée":13954,"pub":20867,"pte":18796,"pti":14976,"pri":39208,"pre":41906,"pro":84520,"pui":23064,"pul":10006,"prè":17131,"pré":40058,"mé ":9595,"mêm":9754,"mée":9397,"méd":12609,"mét":11009,"mér":37720,"qu ":18907,"qua":24258,"que":267169,"qui":99689,"qué":9551,"ra ":22507,"ngl":20995,"ngu":17741,"ni ":11818,"nge":18618,"nga":7486,"ndé":17148,"nel":18137,"nen":6872,"nem":17361,"ner":14652,"net":8307,"nes":51020,"neu":14088,"ng ":26468,"nfo":8493,"nct":9124,"nco":15938,"nci":43333,"nce":105515,"nch":14886,"ne ":496030,"ndu":10752,"ndr":20713,"ndo":10176,"ndi":34156,"nde":56096,"nda":37414,"nal":54750,"nan":26247,"nar":12696,"nad":12377,"nag":10703,"nai":30209,"nd ":51106,"nau":12991,"nat":48959,"na ":18193,"nté":16371,"ny ":7135,"nvi":18890,"nve":7658,"nue":12712,"nto":18893,"nts":41197,"ntr":72123,"nti":46425,"nta":41915,"nte":98733,"nsu":6773,"nso":9167,"nst":29806,"nse":30814,"nsi":22894,"nsc":6653,"nu ":15262,"nné":23464,"nre":13930,"nt ":427954,"nqu":6929,"ns ":274209,"nol":9967,"noi":11530,"nom":60135,"non":15485,"not":12329,"nor":18442,"nov":11258,"nou":9106,"nne":96393,"nna":28812,"nni":16118,"nnu":18917,"no ":11795,"nif":7747,"nie":43637,"nic":15379,"niv":18250,"nis":60898,"nit":15799,"niq":25898,"nio":9717,"nim":8520,"nin":8256,"ogr":17742,"ogi":23895,"ogn":7673,"oi ":11549,"ois":67090,"oir":45748,"oit":23632,"oin":16797,"ol ":10751,"och":11107,"oci":26047,"ock":12402,"oca":13060,"occ":8851,"ode":18688,"oct":13097,"of ":10906,"odu":17793,"off":9016,"ofe":9519,"obr":10777,"obi":7081,"oye":7463,"oya":11498,"osé":9652,"oue":24956,"ouc":7361,"oti":7550,"ote":10769,"oto":12004,"opé":11013,"ost":16493,"ota":13987,"otb":12106,"osi":13992,"ose":15543,"oss":13431,"ovi":29641,"ouv":52919,"ove":15893,"oug":6908,"oui":10077,"oul":19390,"oup":35466,"ous":44108,"our":142367,"out":30934,"opo":10738,"opp":14092,"ope":8783,"oph":15160,"os ":20357,"opu":8920,"or ":16010,"oot":13483,"ork":6806,"orm":40453,"orn":11249,"orr":9505,"orc":7615,"ord":43495,"ore":17630,"org":20050,"ori":52286,"ou ":81997,"ort":98537,"ors":17665,"omé":6755,"ot ":11871,"ora":19855,"ola":10628,"on ":413868,"oli":37368,"oll":18132,"ole":20481,"olo":39631,"olu":16814,"om ":32719,"ona":46449,"ond":64776,"onc":25758,"onf":7947,"one":18885,"ong":21441,"oni":23679,"onn":93395,"ono":18517,"ons":103783,"ont":114426,"ony":7110,"oma":27935,"ome":15382,"omb":20959,"omi":18767,"omm":120540,"omp":47351,"omo":9810,"omt":10618,"la ":409873,"le ":650000,"lab":8668,"lac":18171,"lag":11480,"lai":40035,"lan":67550,"lam":6667,"lar":10935,"lat":30778,"las":17285,"ld ":8004,"lbu":11677,"llé":6984,"ls ":27190,"lon":33452,"lom":9149,"lop":13744,"lor":20550,"loc":9951,"log":34746,"loi":10734,"los":7562,"lié":10397,"héo":7867,"lti":6832,"lub":8118,"lue":8111,"li ":8120,"leu":30125,"les":193810,"let":24641,"ler":15063,"lem":54863,"len":12174,"lec":18621,"lo ":9843,"lla":30932,"lle":228673,"lli":25721,"llo":13913,"lm ":12927,"ll ":25434,"lit":56867,"lis":75238,"liq":19529,"lio":7237,"lin":24717,"lim":6784,"liv":7668,"lic":17589,"lia":14317,"lib":8142,"lig":13313,"lie":66363,"lif":7647,"ma ":11524,"mb ":9981,"mai":41522,"mag":14210,"mar":34078,"mas":7661,"mal":10704,"man":59224,"mat":42658,"mba":7370,"mbl":12331,"mbr":52251,"mbo":7187,"me ":138747,"met":15733,"mes":29770,"mer":17959,"mem":10190,"men":192406,"lui":7872,"lut":8740,"lus":46191,"mpi":17291,"mpr":7983,"mpo":28665,"mpl":18530,"mps":7384,"mpt":6961,"ms ":7519,"moi":10596,"mod":8999,"mon":39570,"mor":34638,"mot":9918,"mou":10824,"mpa":13773,"mmé":6819,"mus":17057,"mul":7053,"mun":72605,"mi ":7506,"min":39720,"mil":42847,"mis":18157,"miq":9951,"mit":10387,"mie":22944,"ièr":39842,"ièm":14714,"ié ":12315,"mmu":65417,"iét":13514,"miè":11922,"mma":8876,"mme":60859,"yst":10800,"ys ":16246,"yen":7179,"yan":8923,"ué ":23787,"uéb":10376,"uée":52636,"tér":21675,"tés":15841,"tél":10092,"tée":11073,"tèr":7376,"tèm":7268,"té ":120644,"sée":25892,"sér":11984,"sé ":33254,"réé":15299,"vri":19364,"vre":15576,"vra":7557,"vir":10748,"vil":38576,"vin":28833,"vic":9378,"vid":11495,"vie":25896,"vit":11987,"vis":24817,"ré ":18385,"rès":23870,"réc":11353,"rée":15149,"réf":7765,"réa":27396,"rén":7293,"rég":60534,"rét":7637,"rés":42404,"rép":7979,"voi":21769,"vol":18467,"ver":63394,"ves":12081,"ven":32893,"vem":17149,"vel":23577,"vea":6695,"vec":28438,"ve ":39545,"val":17132,"van":23469,"var":6981,"vai":21124,"uté":12504,"ux ":89405,"uve":48851,"uvr":11649,"usi":30125,"use":23041,"ust":23088,"uss":29122,"uti":33117,"ute":37011,"uto":14655,"utr":16881,"us ":97767,"ut ":58626,"ura":21007,"ure":61643,"urg":15629,"uri":18044,"urn":17785,"uro":15520,"urs":51288,"urt":11255,"ur ":263487,"upe":34564,"uma":8198,"umb":11805,"ume":16466,"uni":34130,"una":10830,"une":273015,"um ":21626,"ult":20316,"uli":14760,"ule":23978,"ula":21824,"un ":279711,"uil":20013,"uin":15450,"uip":9137,"uis":42982,"uit":34639,"ul ":11106,"ui ":87556,"uct":19646,"ude":13798,"udi":11213,"ue ":252733,"uch":11064,"ueu":15091,"uer":16125,"ues":65033,"uen":9815,"uel":31495,"ub ":7072,"uat":10437,"uar":8894,"uan":14849,"ubl":25725,"ud ":22607,"tué":65263,"typ":6932,"ty ":7203,"tré":9963,"tur":39432,"tut":8273,"tud":13647,"tue":18585,"ts ":96064,"tre":117480,"tra":79361,"tri":46224,"tru":15862,"tro":41105,"tta":10904,"tte":35818,"ttr":6953,"pée":8023,"péc":10887,"to ":12714,"pér":17722,"tiè":7263,"toi":21756,"tob":11135,"tou":31577,"tom":9528,"ton":34164,"tor":25952,"til":20545,"tif":22436,"tie":47885,"tir":8477,"tiq":62118,"tit":40283,"tis":20099,"tin":33045,"tim":9651,"tio":235114,"thu":11875,"tia":8957,"tic":25452,"tiv":30333,"thé":16932,"pèc":18042,"tem":70682,"ten":37907,"tel":13125,"tec":11914,"th ":10711,"teu":75009,"tes":56728,"ter":82954,"ti ":26511,"tho":14369,"the":16191,"Éta":23397,"ège":6746,"èce":21913,"ère":67047,"ète":9916,"ène":11368,"ème":27688,"ès ":31596,"édé":24476,"éga":13682,"égi":62907,"écé":13741,"édi":27690,"éce":13512,"éci":16144,"ée ":173192,"écu":7487,"écr":22748,"éco":23770,"éfi":6764,"ées":42716,"éen":8156,"ébe":6903,"éal":20524,"éve":11864,"évi":9923,"éré":8391,"évo":13471,"évr":9038,"éta":51936,"éti":16412,"éte":7748,"étr":10893,"étu":6864,"été":39872,"éme":8284,"émi":12731,"émo":10213,"éna":8958,"éni":9260,"éli":8934,"éle":12690,"éma":11177,"éo ":6991,"équ":14445,"éra":46783,"ére":13799,"éri":83768,"éro":16442,"éné":26266,"ése":23781,"ési":28918,"épa":51263,"épu":10400,"épo":7454,"élé":15180,"és ":45990,"ême":12159,"êtr":10139,"çai":86644,"ût ":9033,"ôte":8179,"île":9715,"éé ":7606},"n_words":[66338594,78580813,56850284],"name":"fr"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/gu b/nlp_resource_data/langdetect/profiles/gu

new file mode 100755 (executable)

index 0000000..5ab783f
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/gu
@@ -0,0 +1 @@
+{"freq":{"ૈદિ":382,"g":235,"d":312,"e":960,"c":304,"a":1076,"n":720,"o":584,"l":382,"m":289,"h":369,"i":764,"u":324,"t":728,"s":517,"r":627,"ોટ ":345,"ેસા":764,"ોલ ":730,"ોર ":574,"ોદ ":1827,"ેત્":227,"ેતી":2222,"ેતમ":2186,"ેતપ":357,"ેડબ":253,"ેડા":1366,"ેડી":300,"ેગા":229,"ેગો":375,"ેઘર":242,"ૈકી":6300,"ેશમ":527,"ેશન":12436,"ેવી":831,"ેવા":710,"ઇડર":265,"ેરા":387,"ેરી":718,"આહવ":288,"ેલા":24917,"ેલી":519,"ેલુ":9935,"ેલો":381,"ોઇ ":458,"ેન્":278,"ેપુ":457,"આવે":34862,"ા":337683,"િ":47127,"સ":31472,"હ":20294,"શ":32541,"ષ":5409,"વ":91695,"લ":111041,"ળ":3931,"ર":102867,"ય":39143,"મ":113670,"ભ":35403,"બ":10569,"ફ":1198,"પ":49237,"ન":84304,"ધ":9131,"દ":38743,"થ":6321,"ત":89107,"ણ":9770,"ઢ":1233,"ડ":18443,"ઠ":3507,"જ":54268,"ઝ":1439,"ટ":6287,"ઘ":2525,"ચ":20557,"છ":25106,"ક":72592,"ખ":14557,"ગ":61691,"ઓ":8101,"એ":23599,"ઉ":5095,"ઈ":409,"અ":6168,"ઇ":1975,"આ":43598,"ં":82987,"૫":1391,"૪":875,"૩":2115,"૨":1146,"૯":1054,"૮":946,"૭":1034,"૬":461,"૧":5611,"૦":1269,"ૈ":7159,"ો":34921,"્":89060,"ૌ":562,"ુ":82336,"ી":42473,"ૃ":539,"ૂ":4236,"ે":108368,"આણં":435,"આદિ":1564,"ેશ ":483,"અને":1581,"અન્":344,"e ":271,"અમદ":630,"ેમ ":2812,"ેર ":1681,"ેલ ":1022," ૧":4255," ૩":409," ૨":679," ૫":978," ૪":492," ૭":700," ૯":551," ૮":625,"અગિ":1051," વ":14987," શ":3197," ર":16267," લ":5654," સ":11956," હ":3244," થ":1672," ત":31864," ધ":1718," દ":21808," ડ":1738," ઠ":222," ભ":34182," બ":4095," ય":383," મ":24848," ન":6795," ફ":765," પ":35455," છ":24245," ચ":2656," ઘ":628," ટ":479," ઝ":829," જ":21642," ઓ":682," ગ":30845," ખ":8068," ક":14981," ઉ":4757," એ":23366," આ":43205," ઇ":661," અ":6143,"આંગ":703,"્ચિ":11645,"્ટ્":549,"ોતર":377,"ોદર":1858,"ોનગ":236,"ોટા":473,"ોટી":225,"ોડા":794,"આઠ ":685,"ોની":800,"ોનો":2167,"ોરી":514,"ોળી":244,"ોલી":442,"ંવત":748,"ંબા":470,"ંબુ":281,"ંમત":254,"ંઠા":1406,"ંડવ":256,"ંદુ":455,"્ધ ":712,"ંદો":312,"ંધી":504,"ંતર":406,"ંચા":1418,"ંચમ":1337,"ંગા":221,"્ર ":966,"ોકો":3591,"્ય ":7092,"ંગણ":713,"ંખે":303,"ંગર":288,"્ષ ":789,"્વ ":2508,"એવા":6093,"્ષન":1137,"્ષિ":1509,"્વા":430,"્વે":772,"્વન":6820,"્વર":251,"્યન":12109,"્યત":720,"્યમ":432,"્યપ":428,"્યવ":2272,"્યા":2476,"્યુ":248,"્મદ":563,"્મા":375,"્લો":1068,"્લા":13052,"્રો":432,"્રે":602,"્રમ":649,"્રદ":871,"્રા":2175,"્રિ":476,"્રી":593,"્રહ":375,"્દ્":258,"્થા":288,"્તા":298,"્તી":856,"્તર":2535,"એક ":15869,"ઉદે":246,"ઉપલ":606,"ઉપર":392,"ઉત્":2557,"ઉમર":329,"િત":853,"િણ":1494,"વિજ":359,"ીં":343,"િમ":11976,"િપ":550,"િન":1570,"વાય":500,"િવ":4622,"વાર":525,"િશ":322,"ીક":534,"વામ":999,"િલ":14752,"ીઓ":697,"િય":2671,"િર":803,"વાસ":1799,"ીજ":386,"િહ":232,"વિક":492,"િસ":578,"વાલ":357,"ીત":406,"ીદ":281,"ું":22062,"વાદ":862,"વાન":404,"ીય":2619,"ીમ":860,"ીન":8731,"વાડ":2612,"ીપ":459,"ુક":18441,"ીવ":394,"ુખ":4304,"ીર":426,"ીલ":229,"વાગ":247,"વાઘ":254,"ીસ":510,"ુચ":222,"ુજ":13015,"ાં":48849,"ાઉ":298,"ાઇ":474,"ાક":921,"ાઓ":6449,"ાઘ":283,"ાખ":279,"ાગ":19994,"ાજ":13842,"ાચ":245,"ાટ":1064,"ાડ":4936,"િં":1255,"ાણ":2500,"ાથ":1359,"ાત":14028,"ાદ":2250,"ાન":14000,"ાપ":2091,"ાબ":1765,"ામ":34603,"ાય":4603,"ાર":20818,"ાલ":24953,"ાળ":1774,"વિર":286,"િક":2870,"ાવ":3657,"ાષ":849,"ાસ":4564,"ાહ":1263,"િજ":517,"વિસ":266,"હત":7478,"સી":2165,"સુ":1962,"સે":1528,"સા":8757,"સિ":770,"હવ":567,"સો":764,"હર":240,"સ્":3025,"સૌ":277,"હુ":529,"સા ":522,"હે":3680,"હા":2824,"હિ":2043,"હી":349,"હો":1278,"હ્":385,"શ્":12458,"ષન":1142,"સગ":988,"સર ":276,"શહ":485,"શિ":374,"શા":1696,"શુ":2253,"શી":262,"સં":2001,"ષ્":864,"સમ":828,"સન":417,"સવ":569,"સર":997,"ષા":458,"સદ":328,"સત":251,"સણ":439,"ષિ":1579,"વદ":549,"વન":7118,"વર":2659,"વલ":1055,"શક":498,"વગ":256,"વસા":2722,"વડ":3280,"વત":1294,"વણ":314,"વે":36512,"શન":12531,"વૈ":520,"શમ":627,"વ્":2839,"વસ":7405,"વી":2081,"વિ":2302,"વા":17902,"લો":6663,"લ્":14395,"લે":641,"લા":40018,"લિ":942,"લી":2736,"લુ":28591,"લસ":736,"લવ":315,"વસ્":826,"વસે":1125,"ળી":539,"વં":274,"ળા":1514,"રો":1975,"ર્":7275,"રુ":979,"રી":7304,"રૂ":908,"રે":1966,"રસ":718,"વાં":660,"રહ":1839,"રા":36128,"રિ":1193,"રવ":1135,"લબ":632,"લપ":735,"લય":580,"લન":2427,"લક":304,"લગ":215,"રક":1797,"રગ":229,"રખ":218,"યવ":2310,"રજ":562,"યા":7096,"રડ":414,"યુ":510,"રણ":534,"રત":14232,"રથ":239,"રદ":1032,"શમા":566,"યે":847,"રન":646,"રપ":682,"યો":569,"રબ":221,"રમ":2425,"મર":703,"મમ":2823,"મલ":319,"મહ":10705,"રં":397,"મી":561,"યડ":242,"મુ":6197,"મા":44661,"મિ":1415,"યત":1490,"યપ":480,"યન":12830,"મે":837,"યમ":548,"મ્":803,"મો":2623,"બ્":1122,"ભર":899,"મજ":2296,"મગ":222,"મખ":281,"મણ":336,"મત":581,"મથ":816,"ભા":32622,"ભિ":388,"મપ":589,"ભો":320,"મદ":1417,"મધ":3415,"મન":2890,"બર":1834,"બહ":293,"મં":287,"બી":424,"બુ":465,"બા":2768,"બિ":244,"બો":581,"બે":417,"પો":705,"પ્":4053,"બન":257,"પલ":826,"પહ":227,"પશ":13852,"પર":1594,"પૂ":2675,"પૈ":6312,"પે":306,"પુ":5227,"પી":742,"પિ":245,"પા":6287,"ન્":1623,"નો":5861,"પણ":494,"પત":282,"પટ":291,"પડ":321,"પછ":431,"નવ":1459,"નર":658,"ધ્":3586,"ધો":283,"નપ":579,"નન":237,"ને":4269,"નુ":10812,"પં":2789,"ની":3990,"નિ":691,"ના":45180,"નસ":606,"ધા":1073,"ધુ":315,"ધી":641,"દે":14545,"ધન":837,"દ્":1191,"દો":459,"ધર":916,"સી ":1244,"નગ":2570,"દશ":215,"દસ":731,"દહ":218,"દા":3639,"દિ":5058,"દી":575,"દુ":1289,"દર":3345,"થવ":580,"વેલ":34942,"શના":12337,"થી":1041,"થા":882,"તો":531,"વૈદ":382,"તે":6014,"દક":1517,"થય":782,"ત્":12219,"થમ":1093,"થક":777,"તી":4432,"તુ":553,"તા":26724,"તિ":1123,"તન":712,"ણે":327,"તપ":511,"તર":3967,"તલ":356,"તમ":2718,"ણા":2366,"ણી":846,"ણવ":795,"તઘ":640,"ડો":2767,"ડુ":286,"ડે":1027,"ણં":571,"ડી":3535,"ડિ":410,"ડા":5615,"ડવ":598,"ડર":315,"ડભ":218,"ડબ":277,"ઠા":1831,"ટ્":673,"ટે":774,"વ્ય":2778,"ટિ":249,"સે ":1144,"ટી":761,"છે":23574,"જન":365,"છી":486,"ઝઘ":245,"જય":244,"છો":305,"જબ":1127,"જર":11973,"જે":2020,"જો":465,"જિ":13950,"જા":1399,"જુ":2555,"જી":794,"જ્":13119,"ઝર":271,"શુપ":2187,"ઝા":567,"સંવ":756,"ટક":225,"સંત":320,"સંખ":325,"ટા":840,"ટલ":348,"ગા":17980,"ગુ":12125,"ગિ":1160,"ઘડ":264,"ગી":427,"૯ ":627,"ગ્":891,"ગો":1143,"ઘર":948,"ઘો":548,"ચર":560,"ચમ":1383,"ચા":2321,"ચિ":12038,"ચી":259,"જં":215,"ચો":437,"ચ્":251,"જક":251,"શહે":470,"૫ ":1091,"કર":2446,"કમ":270,"કલ":513,"કપ":478,"ખં":251,"કડ":644,"ખલ":227,"ક્":3347,"કો":5466,"કે":1500,"૭ ":792,"કુ":6686,"કૃ":229,"કા":21625,"કી":6922,"કિ":350,"કહ":564,"કવ":583,"ગવ":1115,"ગલ":272,"ગર":2876,"ગમ":18397,"ખ્":4351,"૮ ":673,"ખે":6309,"ગન":575,"ગણ":1117,"ગઢ":779,"ખા":2071,"૧ ":1168,"શાળ":1055,"શાસ":252,"એવ":6158,"૨ ":320,"૩ ":1541,"૪ ":593,"ઓન":283,"એક":16184,"૦ ":810,"ઉદ":317,"ઉત":2595,"ઉપ":1216,"સગવ":904,"ઉમ":378,"આં":1102,"અગ":1187,"અં":436,"ઇડ":287,"ષના":751,"આહ":296,"ષનો":362,"આવ":35199,"આદ":1695,"આઠ":804,"આણ":440,"અમ":826,"અર":256,"અન":2071,"ંવ":912,"ંસ":486,"ંત":1429,"ંથ":310,"ંદ":2195,"ંધ":939,"ંબ":1038,"ંભ":281,"ંમ":287,"ંક":540,"શ્ચ":11655,"ંગ":2855,"ંખ":377,"ંચ":4016,"ંજ":464,"ંટ":564,"ંડ":982,"ંઠ":1433,"હે ":1152,"શ્ર":320,"શ્વ":348,"૧૩":1467,"૧૧":1091,"૧૯":283,"૧૦":694,"વડો":2345,"ોટ":1277,"ોડ":1517,"ોજ":373,"વલી":267,"ોન":3584,"ોધ":322,"ોત":616,"ોદ":3809,"ોગ":284,"ોક":3826,"ોઇ":523,"્ટ":1147,"્ત":4385,"્ણ":325,"્દ":602,"્થ":651,"્ધ":1001,"્પ":336,"્બ":262,"વર્":2136,"્ક":609,"્ગ":267,"્ચ":11827,"ોમ":362,"ોલ":1612,"ોય":246,"ોર":2045,"ોવ":257,"વલસ":582,"ોળ":645,"્સ":302,"્ષ":3855,"્વ":11263,"્લ":14321,"્ર":8621,"્ય":26212,"્મ":1641,"ૂર":2771,"ુદ":699,"ુધ":815,"ુન":510,"ુણ":524,"ુત":213,"ુમ":369,"ુર":8763,"ુપ":2377,"ુવ":804,"ુસ":354,"ુલ":6351,"ૂચ":717,"વનો":376,"વનુ":6376,"ૃત":300,"ેક":249,"ેત":5179,"ેડ":2071,"ેટ":618,"ેઠ":219,"ેજ":326,"ેગ":653,"ેઘ":271,"ષા ":222,"વતા":228,"ેર":3345,"ેલ":37195,"ૈક":6315,"ેશ":13804,"ેવ":2024,"ેન":1094,"ેપ":540,"ેમ":3602,"ૈદ":389,"ેસ":990,"હિં":776,"હાલ":1485,"હાર":571,"હિન":919,"ઇ ":1018,"આ ":2702,"ાં ":41350,"ઓ ":7041,"હેર":525,"હેલ":239,"હેવ":536,"હેસ":730,"એ ":663,"ાઇ ":270,"હોદ":867,"હ્મ":326,"ાઓ ":6349,"ં ":62940,"ાગ ":214,"ાડ ":990,"ાદ ":1109,"ાણ ":231,"ાત ":12086,"ાન ":979,"ામ ":12051,"ાલ ":1653,"ાર ":2510,"ાય ":3046,"ાવ ":357,"િક ":1785,"ાસ ":1162,"ષિણ":1462,"ે ":34827,"ો ":11870,"સણા":264,"ષ ":917,"સ ":4627,"સમો":292,"વ ":3614,"શ ":598,"સરા":281,"ષ્ટ":602,"િ ":698,"ુ ":1292,"ી ":24520,"ા ":103799,"સવા":466,"સુર":1060,"દ ":4991,"થ ":242,"સીઓ":544,"ન ":4557,"સુદ":463,"ધ ":850,"સાડ":665,"સાત":801,"સાણ":973,"સાગ":213,"પ ":563,"સાય":2235,"સામ":236,"સાર":662,"સાવ":281,"સાબ":1406,"બ ":1292,"મ ":27791,"સોન":293,"ર ":16908,"ય ":11483,"લ ":10111,"હતા":268,"હત્":6808,"ળ ":723,"ક ":19636,"ગ ":1433,"સ્વ":268,"સ્થ":430,"ચ ":2010,"સ્ટ":235,"સ્ત":1316,"સ્ક":359,"જ ":4325,"ટ ":1263,"ડ ":2185,"ઠ ":958,"ઢ ":645,"હવે":238,"ણ ":3405,"હવા":327,"ત ":27700,"ૂચ ":710,"િત્":251,"ાસા":294,"ાસિ":270,"ાસી":1732,"ાહો":869,"ાષા":325,"ાસણ":337,"ુલ ":6018,"ાસર":237,"ાષ્":514,"ાલન":2232,"ાલપ":364,"ાલય":562,"ંગ ":904,"ારે":564,"ાર્":428,"ારો":350,"ારી":908,"ારા":1301,"ારત":13028,"ારમ":342,"ારડ":261,"ાયત":689,"ાયડ":229,"ામા":16128,"ુર ":3220,"ાવી":500,"ાવા":1445,"ાવલ":274,"િકે":222,"િક્":471,"ંચ ":1075,"ાવત":219,"ાળા":1219,"ાલો":667,"ાલુ":18139,"ાલી":427,"ાલિ":223,"ાલા":242,"ાનો":1081,"ંટ ":317,"ાનપ":429,"ાના":6337,"ાનુ":3493,"ાની":787,"ંજ ":235,"ાદર":601,"ામપ":347,"ંત ":433,"ામન":2336,"ામમ":2770,"ાબર":1409,"ાપ્":387,"ુદ ":457,"ાપી":523,"ાપુ":503,"ાપા":321,"ંદ ":612,"ીદા":229,"ીનગ":468,"ીના":6429,"ીને":1070,"ીની":270,"ીનો":264,"િસ્":315,"િલ્":13910,"િલો":366,"િવસ":2730,"િવા":1650,"િનો":226,"િના":989,"િયા":2403,"ાંટ":436,"ાંઠ":1422,"ાંડ":461,"ાંગ":1094,"ાંચ":1295,"ાંત":590,"ાંધ":547,"ાંદ":419,"ાંસ":263,"ીઓ ":502,"િમ ":11653,"િપ ":367,"િત ":360,"િણ ":1447,"ાણા":1231,"ાણી":404,"ાતી":550,"ાત્":264,"ાથમ":992,"ાતે":354,"ાટી":295,"ાટે":280,"િંમ":251,"િંદ":527,"ાડી":1596,"ાડા":2060,"ાજક":237,"ાજી":257,"ાજ્":12491,"ીય ":442,"ાકી":371,"ાગમ":18270,"ાગન":475,"ાઉદ":238,"ું ":21442,"ૂર્":2560,"ેટ ":236,"ુણા":437,"ુજબ":1107,"ુજર":11840,"ુપા":2208,"ુધન":587,"ીયન":389,"ીમા":379,"ીયા":1731,"ીમખ":262,"ીસમ":292,"ુકા":17641,"ુકો":529,"ુખ્":4233,"ુરી":2300,"ુરુ":440,"ુરા":1358,"ુરત":829,"ુવા":696,"તઘર":640,"ણવા":740,"દસ ":512,"ણાવ":406,"દા ":938,"તપુ":465,"તનગ":252,"દી ":294,"તના":216,"દુ ":479,"તમજ":2184,"તમા":321,"તરી":269,"તરા":350,"તો ":423,"થા ":316,"થી ":996,"નવ ":475,"થવા":574,"ના ":41544,"ને ":3899,"ની ":3744,"નો ":5537,"દરા":2352,"દરમ":423,"તું":277,"તાલ":18057,"તાર":306,"તાપ":566,"તાન":302,"ધા ":271,"તેમ":3056,"તેર":1418,"તેન":315,"દક્":1484,"ત્ત":2727,"થમિ":987,"ત્વ":7552,"ત્ય":281,"ત્ર":1452,"થયે":642,"નપુ":541,"પી ":564,"ધીન":468,"ધાર":249,"ધાન":290,"નવસ":483,"નસવ":370,"ધ્ય":3481,"નર્":560,"દુધ":605,"દેપ":250,"દેશ":13431,"દેવ":535,"ધની":595,"દાવ":797,"દિક":394,"દાર":330,"દાદ":262,"દિવ":4303,"દાહ":863,"નગર":2214,"નગઢ":247,"પર ":286,"પણ ":420,"દોદ":293,"દ્વ":331,"દ્ર":524,"ધરા":484,"બા ":381,"પટે":233,"પાવ":384,"પાર":273,"પાલ":2355,"પાટ":403,"પાડ":735,"પાં":1274,"પશ્":11653,"પશુ":2197,"પલબ":596,"પરા":655,"પંચ":2575,"નું":10559,"નાં":923,"નાર":240,"નામ":306,"નાન":1138,"પછી":428,"ન્ય":559,"ન્દ":395,"બહુ":235,"બાક":373,"બાર":800,"બાય":217,"રજ ":269,"મા ":715,"મી ":253,"યડ ":217,"યન ":455,"બરક":1386,"મો ":1343,"પૂર":2616,"પુર":4959,"પૈક":6303," આ ":2656,"પોર":340," એ ":485,"પ્ર":3459,"પ્ય":389,"માં":41157,"માટ":294,"માન":402,"માણ":276,"માત":359,"માલ":325,"માર":263,"મિક":1063,"મહત":6802,"મહા":1848,"મહિ":850,"મહુ":233,"મહે":917,"યત્":701,"મેઘ":271,"મુખ":4261,"મુજ":1125,"મુવ":353,"યતઘ":640,"મપુ":552,"રે ":767,"મમા":2808,"મધ્":3365,"મદા":1324,"રી ":4562,"મના":2419,"રો ":232,"મજુ":2185,"મખે":264,"રા ":5544,"મતન":247,"મથક":765,"ભિલ":318,"ભાર":12956,"ભાગ":18607,"ભાષ":335,"બોર":287,"રત ":13401,"યા ":2563,"રમ ":485,"યો ":254,"ભરૂ":710,"બ્ર":421,"બ્ધ":597,"ળા ":1138,"રેગ":384,"ળી ":385,"રોત":365,"રાં":597,"રાય":238,"રામ":591,"રાવ":421,"રિક":325,"રાષ":510,"રાડ":230,"રાણ":287,"રાત":11870,"રાથ":991,"રાપ":541,"રાજ":13365,"રીય":783,"રીન":986,"રું":396,"રીક":260,"રિય":414,"રૂચ":710,"રવા":911,"રહવ":219,"રહે":1153,"રહ્":327,"રપુ":361,"રમ્":382,"રમા":804,"લો ":1417,"લા ":25318,"રના":235,"યેલ":764,"લી ":1812,"રદે":816,"રડી":227,"યાલ":588,"યાર":1934,"યાન":576,"યાપ":305,"રકા":1582,"લય ":553,"યવસ":2216,"યપૂ":410,"મોડ":217,"મોટ":454,"યનો":326,"યના":11717,"મ્ય":415,"યમા":407,"લન ":2219,"મ્બ":232,"લ્લ":14131,"વે ":1140,"લોલ":465,"લોડ":396,"લોદ":449,"લોક":3642,"વા ":7780,"વી ":1602,"લુક":18002,"લીમ":321,"લુણ":389,"લિય":312,"લું":9913,"વસ ":2535,"લાન":6932,"લિપ":369,"લાસ":286,"લાવ":223,"લાલ":307,"લામ":5764,"લસા":677,"શક ":371,"વર ":236,"લબ્":597,"વદ ":505,"લપુ":412,"વત ":759,"વડ ":417,"ર્ષ":1963,"ર્વ":2626,"ર્ય":411,"ર્મ":900,"કી ":503,"કા ":605,"કે ":834,"કો ":1354," ૧૦":680,"૧૦ ":535,"૧૧ ":1036,"૧૩ ":1412,"ગઢ ":538," ૧૩":1456," ૧૧":1079," ૧૯":274,"ગર ":1959,"કડી":283,"કડા":244,"કરવ":314,"કરી":1162,"કવા":445," હો":303," હિ":944," હા":540," સો":475," સૌ":273," સ્":714," સિ":322," સા":3562," સુ":1717," હત":654," સમ":395," સર":370," સત":226,"કાલ":238,"કામ":9196,"કાર":676,"કીન":6304,"કુલ":6004," લુ":420," લી":386," લિ":414," લા":253,"કહે":527," લો":3728,"કાં":1539," રહ":1203," રા":13805,"કાન":2517,"કાઓ":6055," સં":1919," શા":1268," શિ":264," શહ":475," સગ":922,"ઘર ":662," શ્":290," વા":1551," વિ":1924," વસ":1926," વ્":2581," વૈ":515," વે":236," વડ":2142," વર":2148," શક":474," વલ":624," વદ":498," પછ":431," પટ":261," પણ":391," નો":313," પા":2926," પુ":759," પૂ":2113," પૈ":6312," પર":651," પશ":13847," પહ":226," ધા":386," દ્":305," દે":13156," નગ":894," ધર":567," ના":1491," નિ":408," નસ":380," ને":386," પં":2780," ધો":248," નવ":1299," નર":628," મધ":3389," ભિ":335," ભા":32144," મથ":749," ભર":862," મે":521," મો":1041," મા":2519," મુ":5993," મહ":9472," પ્":3289," પો":348," બો":476," બે":295," મં":217," બી":247," બા":1559," બહ":292," ડા":372,"કોન":2841," ડે":860,"કોળ":215,"કોટ":356,"ક્ષ":1887,"ક્ર":890," ત્":349," થય":778," દક":1478," તે":5306," થવ":379," દર":551," દુ":722," દિ":2858," દા":1294,"કેટ":221," દસ":704," તર":348," તિ":233," તા":24717," ૯ ":497," ગો":553," ગ્":552," ગુ":12029,"ખેડ":1595,"ખેત":4414," ગા":16841,"ગના":399," ૮ ":570," ખે":5659," ખા":1675," ગણ":237," ચર":392," ઘો":266," છો":266,"ગણવ":693," ઝઘ":243," છે":23548," ચો":383," જં":214," ચા":714," ચિ":300," ઝા":412," જ્":480," જુ":259," જા":631," જિ":13843," જો":332," જે":1921,"ખાસ":931," એવ":6158,"ખાન":217," એક":16184,"ખાત":404," ૫ ":907," ૪ ":431," કહ":562," કવ":319," કુ":6308," કા":1279," કો":831," ૭ ":640," કે":929," ક્":383," કડ":504," કપ":373," કલ":240," કર":2121," ઇડ":271," આહ":296," આવ":35196," આદ":1582," ઉત":2589," ઉપ":1212," ઉમ":372,"ગવડ":906,"ગરહ":219,"ગમા":18349,"ખ્ય":4323," અં":432," અગ":1187," આં":1036," અન":2069," અર":252," અમ":825," આઠ":804," આણ":440,"ગાં":635,"ગામ":16798,"ગિય":1057,"ગુજ":11824,"ઘડી":254,"ગોર":435,"ગોધ":222,"ગ્ર":743,"ઘરજ":242,"છી ":438,"ઘોડ":335," જ ":2898,"છે ":23415,"જબ ":1105,"ચરો":362,"ચાર":560,"ચાય":582,"ચિમ":11652,"ચાં":471,"ચાગ":369,"જી ":285,"જા ":331,"ચમહ":1210,"જે ":397,"જકો":214,"ઝઘડ":245,"જિલ":13824,"જુર":2204,"છોટ":245,"જરા":11822,"ઝાલ":270,"ઠા ":1548,"ટી ":375,"ટા ":336,"જેવ":714,"જેત":374,"જ્ય":12917,"ટે ":225,"ડી ":2334,"ડા ":4375,"ડર ":279,"ટેલ":254,"ટાઉ":239,"ડો ":637,"ણી ":405,"ણા ":1630,"ટ્ર":578,"ડેર":603,"ડેડ":232,"તી ":3871,"ડોદ":1844,"તે ":714,"ડિય":272,"ડાસ":222,"ણંદ":569,"ડીય":740,"તિ ":370,"તા ":7106,"તર ":2912,"થક ":760,"ડાં":355,"ડબ્":254,"ણે ":229},"n_words":[2118540,2468202,1874859],"name":"gu"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/he b/nlp_resource_data/langdetect/profiles/he

new file mode 100755 (executable)

index 0000000..38a8c9d
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/he
@@ -0,0 +1 @@
+{"freq":{"D":3214,"E":2525,"F":2735,"G":2624,"A":5600,"B":4044,"C":5446,"L":2999,"M":4837,"N":2358,"O":1895,"H":2508,"I":3050,"J":1816,"K":1288,"U":823,"T":4063,"W":1765,"V":1278,"P":3992,"S":6051,"R":3127,"f":3967,"g":6300,"d":10934,"e":38551,"b":4292,"c":11175,"a":34025,"n":25648,"o":25621,"l":18280,"m":9009,"k":3209,"h":10406,"i":28660,"w":2312,"v":3224,"u":12118,"t":20514,"s":19158,"r":26616,"p":7557,"z":1550,"y":5684,"x":3018," o":1293," d":1423,"р":1134," a":1241,"с":1017," c":818," t":1122," p":2507," s":1080," J":1761," K":1182," H":2237," I":2087," N":1804," O":1286," L":2498," M":4152," B":3287," C":4445," A":4293," F":2284," G":2345," D":2725," E":1939,"л":851,"к":858,"и":1731,"о":1736,"н":1202," S":4832," R":2670,"в":941," P":3269,"а":1783," W":1618," V":962,"е":1219," T":3332,"ז":63772,"ח":143673,"ה":590284,"ו":795043,"ג":118718,"ד":182547,"א":355837,"ב":395285,"מ":381378,"ן":91462,"ל":422282,"ם":165990,"ך":21160,"כ":123771,"ט":120550,"י":838057,"ץ":10248,"פ":163464,"ק":174715,"צ":88276,"ס":156906,"נ":265593,"ף":12208,"ע":168037,"ש":294877,"ר":442678,"ת":390314,"ְ":2319,"ִ":1697,"ֵ":751,"ֶ":1684,"ַ":1974,"ָ":1859,"ֹ":1678,"ּ":3241,"ي":1156,"ل":1891,"م":1083,"ن":836,"ب":960,"ا":2457,"ر":948,"A ":1146,"Co":1139,"Ch":747,"Ca":914," ט":10158," י":46710," כ":41422," ל":87440," מ":140680," א":111429," ב":199095," ג":26594," ד":18598," ה":315869,"Ma":1436," ו":76633," ז":14443," ח":27536," ת":26310," ש":120088," ר":28151," ק":30023," צ":12233," פ":33679," ע":48214," ס":29027," נ":37369,"Th":1329,"St":822,"Pa":795," ا":1195,"a ":5141,"i ":1770,"ge":1176,"he":2979,"ha":1572,"g ":1420,"ea":1448,"ec":949,"ed":1174,"de":2377,"di":1239,"do":869,"h ":1424,"el":2456,"ei":783,"en":3848,"em":940,"et":1553,"es":3076,"er":6931,"ca":1297,"e ":9963,"be":993,"da":916,"f ":1405,"ct":774,"co":1157,"ci":1010,"ch":2176,"ce":1369,"c ":854,"d ":3184,"at":3174,"as":1657,"ar":4336,"au":801,"al":3282,"ai":824,"am":1581,"an":5294,"ac":1575,"ad":954,"nt":2386,"ns":1298,"no":898,"nn":849,"of":1227,"oc":763,"om":1498,"on":5076,"ol":1724,"ot":992,"os":1322,"ou":1121,"op":789,"or":3356,"r ":4009,"pe":961,"ph":746,"lo":1342,"ll":2162,"o ":2412,"ma":1706,"me":1669,"mi":920,"mo":751,"na":1919,"nc":982,"nd":2342,"ne":2187,"ng":1840,"ni":1853,"m ":1750,"li":2362,"le":2918,"la":2312,"n ":6713,"hi":1088,"ho":885,"id":1119,"ic":2833,"ia":1960,"ig":907,"ie":1690,"k ":1105,"ir":937,"is":2730,"it":1899,"il":1849,"in":4459,"io":2236,"l ":3308,"y ":3108,"vi":786,"ve":1332,"x ":2437,"ul":958,"ur":1480,"us":2545,"um":935,"un":962,"tu":790,"tt":809,"to":1696,"tr":1218,"te":3049,"ti":3191,"th":2015,"ta":1915,"ss":1070,"st":2602,"so":786,"se":1404,"si":1360,"rs":966,"rt":1408,"ry":760,"ro":2308,"rn":818,"ri":3441,"re":2927,"rd":1160,"ra":3300,"t ":3529,"s ":8103,"px":1778,"一":845," Ma":1422," Ca":896," Co":1129," Pa":776," St":815," Th":1324," of":1007," de":821," px":1777,"זר":6935,"זק":1127,"חב":10029,"חז":2980,"חו":22900,"חה":3687,"חד":8305,"זי":12406,"זל":838,"זם":1284,"זכ":3534,"זנ":886,"זמ":4439,"זע":2250,"חר":11087,"חק":7976,"חת":6995,"חש":6932,"טב":2858,"טא":3408,"טו":20425,"טה":4110,"טח":2065,"חי":19519,"חל":11067,"חם":835,"חמ":5926,"חנ":2382,"חס":4114,"חצ":1267,"הת":16129,"הש":20373,"הר":18303,"הק":13593,"הצ":7002,"הפ":12266,"וה":16855,"וד":26125,"וג":17753,"וב":35986,"וא":44658,"וּ":1323,"הכ":8371,"וֹ":1575,"הט":3770,"הי":51622,"הז":2998,"הח":11926,"הה":6606,"הו":49027,"הע":15156,"הנ":11770,"הס":11464,"המ":50070,"הן":3379,"הל":10568,"הם":6285,"ות":101531,"וש":17323,"וצ":14838,"וץ":2534,"ור":67093,"וק":18906,"זו":10251,"זה":6566,"זא":1585,"וי":25913,"וך":3767,"וכ":13432,"ול":56167,"וו":21707,"וז":11389,"וח":11917,"וט":7960,"וס":26165,"וע":21211,"וף":4260,"ופ":26005,"ום":16031,"ומ":32989,"ון":31711,"ונ":45275,"גת":2080,"דב":1971,"דג":1438,"דא":1968,"גע":907,"גר":12191,"גש":1319,"גם":5839,"גל":15851,"גנ":4603,"גן":1862,"גמ":1889,"גו":18230,"גה":2711,"גד":7511,"גי":15377,"גז":1369,"הא":24412,"הב":13059,"הג":10416,"הד":6369,"דפ":866,"דצ":1312,"דק":1275,"דר":19078,"דש":2624,"דת":5288,"דל":3528,"דמ":4712,"דם":3790,"דנ":1611,"דן":1253,"דע":6022,"דס":1191,"דה":10078,"דד":1385,"דו":30532,"די":39839,"בא":28974,"את":18469,"אר":23727,"אש":17310,"אצ":2077,"אק":3488,"אפ":6737,"אף":1142,"אנ":19673,"אס":5720,"אן":3277,"אמ":12397,"אם":2757,"אל":31779,"אכ":1022,"אך":1814,"אי":39448,"אט":3314,"אח":13816,"אז":5466,"או":54111,"אה":8465,"אד":6606,"אג":2909,"אב":7150,"גא":2681,"גב":4647,"בש":16669,"בת":15296,"בצ":5367,"בק":6122,"בר":44457,"בס":9775,"בע":20054,"בפ":5660,"במ":24479,"בנ":12394,"בן":3469,"בי":60852,"בל":12579,"בכ":5400,"בז":1800,"בו":37124,"בט":3788,"בח":6845,"בג":4476,"בב":4468,"בה":14562,"בד":9533,"ממ":12115,"מן":5860,"מנ":16156,"מס":13321,"מע":13284,"מפ":8474,"מצ":11869,"מק":11494,"מר":20980,"מש":30914,"מת":18345,"נד":8678,"נג":16204,"נב":2857,"נא":5195,"נח":5856,"נז":1832,"נו":50382,"נה":26549,"נכ":2770,"ני":73546,"נט":9488,"נן":997,"נמ":4014,"נם":1134,"נל":2549,"לנ":7233,"לס":4718,"לע":5121,"לל":8431,"לם":5118,"למ":14400,"לן":1558,"לר":5001,"לש":7046,"לת":11787,"לפ":7565,"לצ":2285,"לק":11094,"מו":54355,"מה":19324,"מד":15338,"מג":4215,"מב":11717,"מא":19955,"מל":12739,"מכ":7596,"מי":43286,"מט":8749,"מח":12385,"מז":4310,"כד":4207,"כה":6173,"כו":21072,"כז":3111,"כח":1672,"כי":15596,"כך":1583,"כא":3499,"כב":6283,"כג":1009,"כפ":2033,"כת":9752,"כש":2355,"כר":5552,"כמ":4368,"כם":777,"כל":14948,"ככ":754,"כס":2535,"כנ":8682,"כן":3171,"לז":1198,"לח":8146,"לה":26418,"לו":38179,"לך":2679,"לכ":7962,"לט":7328,"לי":67951,"לג":4986,"לד":10370,"לא":20135,"לב":10370,"טק":1116,"טס":846,"טנ":3561,"טע":1653,"טל":6264,"טן":1424,"טמ":2400,"טי":27400,"טכ":861,"יו":59458,"יז":7368,"יח":8678,"יט":18165,"יב":19301,"יג":6611,"יד":26912,"יה":49212,"יא":28186,"טר":15887,"טת":2335,"יר":32678,"יק":28848,"יצ":10768,"יץ":1467,"יפ":9697,"יף":1039,"יע":8301,"יס":16568,"ינ":36790,"ין":20736,"ימ":19188,"ים":101424,"יל":28185,"יכ":8588,"יך":2854,"יי":52949,"יש":24046,"ית":80424,"צמ":5084,"צע":4237,"צפ":3530,"צח":1679,"צי":17340,"צט":759,"צל":2956,"צא":6953,"צב":5471,"צג":1091,"צד":1322,"צה":5120,"צו":14859,"פצ":904,"פק":4972,"פת":9249,"פר":34424,"פש":3647,"רץ":4177,"רפ":7179,"רק":7125,"רצ":5822,"רס":9590,"רנ":6801,"רע":2837,"רם":1942,"רל":4398,"רן":1612,"רמ":9087,"רי":69906,"רט":11059,"רכ":12424,"רך":5453,"רה":18046,"רו":57537,"רז":1504,"רח":8185,"רא":29022,"רב":18351,"רג":11231,"רד":8475,"קר":20408,"קש":3566,"קת":4771,"קצ":3292,"קפ":1285,"קע":1184,"קס":4328,"קנ":4930,"קן":2101,"קמ":2688,"קם":1371,"קל":7255,"קי":22013,"קט":8069,"קו":31546,"קד":4923,"קה":11443,"קב":8684,"קא":6360,"צת":2627,"צר":9786,"עד":7161,"עה":5494,"עו":21588,"עז":1654,"עט":1111,"עי":23608,"על":30126,"עם":5137,"עמ":4465,"ענ":3092,"עס":1118,"סע":1202,"סס":1542,"ספ":16242,"סף":1568,"סר":7306,"סק":6961,"סת":3711,"עב":7995,"סד":6581,"סה":2338,"סב":2454,"סג":2607,"סח":1327,"סט":14456,"סו":21766,"סכ":1718,"סל":4205,"סי":26035,"סנ":2332,"סם":1071,"סמ":4060,"נע":2683,"נס":7827,"ננ":1118,"נק":6861,"נצ":2907,"נפ":4308,"נת":16355,"נש":4892,"נר":4214,"סא":1865,"פט":6029,"פי":27041,"פה":6116,"פו":28843,"פח":3684,"פנ":5942,"פס":4846,"פע":6986,"פל":6474,"פן":2295,"פב":1568,"פא":3156,"פד":1152,"פג":1094,"עש":4774,"ער":15071,"עק":2148,"עצ":4716,"עת":7895,"שת":9019,"שש":1558,"תב":4462,"תא":5204,"תו":25811,"תה":9277,"תג":1436,"תי":25751,"תח":9940,"תל":3463,"תם":2303,"תכ":2023,"תנ":5503,"תמ":4850,"תן":2959,"תפ":5557,"תע":2232,"תר":13462,"תש":2068,"תק":7602,"רש":6651,"רר":1758,"רת":19384,"שא":5518,"שג":2036,"שב":15227,"שה":14417,"שד":1381,"שו":28172,"שט":3529,"שח":6900,"שי":32057,"שך":1218,"שכ":3883,"של":59496,"שם":6738,"שמ":16387,"שנ":26253,"שע":3886,"שפ":9870,"שק":2676,"שר":23791,"תת":1846,"מ ":2663,"ן ":91356,"ס ":23921,"ע ":17848,"ף ":12199,"ִי":1007,"פ ":1749,"ץ ":10234,"צ ":3355,"ק ":22523,"ר ":101491,"ש ":18495,"ת ":251160,"ב ":39174,"א ":61709,"ד ":40754,"ג ":17864,"ו ":72231,"ה ":204656,"ח ":14016,"ז ":8988,"י ":129045,"ט ":20042,"כ ":3096,"ך ":21128,"ם ":165828,"ל ":123422,"ال":1187," יי":2324," יכ":1029," יל":1306," ימ":1454," יע":851," יצ":2302," יר":1739," יש":8585," כג":1001," כב":1385," כא":2275," כו":3554," כד":2689," כך":799," כי":5212," כח":844," חל":3208," חמ":880," חי":4768," חס":883," חר":777," חש":1430," טו":2163," טב":768," טל":1002," טי":1699," טר":1404," יח":2213," יו":7458," יה":3689," יד":8356," מז":2002," מח":5968," מט":3237," מי":10487," מכ":3995," מל":5214," מא":7744," מב":4174," מג":2553," מד":6437," מה":5964," מו":15023," מש":13103," מר":5197," מק":5213," מצ":3521," מת":7226," מס":7731," מנ":4010," מן":1074," ממ":8626," מפ":3253," מע":6426," כר":1353," כת":1870," כש":1438," כפ":1233," כנ":1474," כל":6776," כמ":3311," לט":1128," לי":6245," לכ":2758," לה":9774," לו":4443," לז":853," לח":2914," לא":10716," לב":3660," לג":1530," לד":1145," לת":2908," לש":5145," לר":2977," לק":2667," לצ":1757," לפ":4810," לע":3788," לס":2540," לנ":1953," למ":9220," לל":1387," בפ":5193," בע":12834," בס":7728," בר":10133," בק":4194," בצ":3678," בת":9609," בש":14953," גב":1847," גא":1205," בד":5369," בה":6389," בב":3563," בג":3763," בח":3909," בט":1774," בו":5324," בז":1578," בכ":4321," בל":4682," בי":25678," בן":1774," בנ":6905," במ":21078," גר":3583," דב":749," גד":1588," גו":3424," גי":2015," גל":1445," גם":4780," אפ":2121," אק":1199," אצ":767," אס":1813," אנ":6385," בא":23971," אש":4957," אר":6566," את":12538," או":23084," אז":1939," אח":7629," אב":4176," אג":950," אד":3113," אל":10175," אם":984," אמ":5812," אי":11144," אך":1639," זה":3518," זו":3424," וק":1350," ור":2477," וש":2610," ות":1633," ונ":3060," ומ":9400," ופ":2170," וע":3597," וס":1644," וח":1862," וז":955," וו":1631," ול":4945," וכ":2933," וי":4333," חב":4303," חד":1040," חו":5710," זר":823," זמ":1557," זי":910," זכ":1452," הג":7501," הד":4857," הא":22400," הב":10880," דר":2719," דמ":1254," די":3533," דו":3757," דה":785," וב":8620," וג":1290," וד":892," וה":11812," וא":5274," הר":14413," הש":17912," הת":11571," הפ":9733," הצ":5867," הק":10138," הס":10053," הנ":10137," הע":13912," הם":2069," הל":5334," הן":954," המ":46981," הי":41072," הט":3324," הכ":7288," הו":33156," הה":5396," הח":10762," הז":2732," שה":7711," שב":7588," שח":2817," שט":1041," שו":5579," רש":1366," שא":2492," שפ":2381," שע":1892," שר":1669," שק":787," של":48394," שכ":1941," שי":8345," שנ":9947," שמ":8425," שם":3634," רא":3404," רב":4590," רו":6814," רח":840," קר":4233," קש":1007," רפ":810," רק":1211," רצ":860," רי":2280," רכ":802," תח":2475," תי":1934," תו":4939," תא":1279," שש":1373," שת":1185," תק":2224," תש":1190," תר":2157," תע":967," תפ":1385," תנ":1167," תכ":774," תל":1823," סי":4571," סל":908," סמ":1246," סג":795," סב":864," סד":1598," סו":5658," סט":1619," נש":1719," נת":1130," נע":1238," נפ":2034," נק":2006," נכ":1356," ני":4715," נמ":1957," נג":1220," נב":1056," נא":920," נח":2096," נו":11348," נה":1061," עת":798," עצ":1752," עק":770," ער":2088," עש":1568," ענ":811," עמ":1580," עם":3883," על":19873," עי":2972," עו":4351," עד":2831," עב":2405," סק":894," סר":2435," ספ":4534," פר":8948," פו":6232," פי":5929," פל":1804," פע":2438," פנ":1331," פס":1467," פא":1358," קל":1932," קט":1426," קי":3820," קצ":927," קנ":905," צר":1714," קו":8076," קא":953," קב":2948," צי":2521," צפ":808," צמ":781," צו":1425," צב":1575," ال":1017," ב ":10067," ג ":4498," ד ":1157," ה ":7037," א ":1385," י ":1129," כ ":2336," ל ":2779," ו ":2034," ז ":1154," מ ":2074," ס ":1112," צ ":1288," ר ":1507,"The":1046,"al ":1171,"and":1073,"an ":1270,"ati":1099,"מוז":5024,"מוד":3483,"מוג":886,"מוב":939,"מהל":1373,"מהמ":885,"מדר":908,"מהו":1225,"מדי":5777,"מדו":1516,"מדע":3235,"מגו":862,"מבר":4568,"מגד":820,"מבו":1915,"מבי":1059,"מבנ":1260,"מאת":1015,"מאר":916,"מאפ":1026,"מאנ":883,"מאל":2405,"מאי":3462,"מאז":946,"מאו":3114,"מאה":2706,"מלו":953,"מלא":1080,"מלכ":2095,"מלך":1227,"מלי":1534,"מלח":3450,"מכו":2726,"מכי":1235,"מים":6416,"מיל":2833,"מין":1783,"מינ":3538,"מיש":1371,"מית":4649,"מיק":1216,"מיר":984,"מטר":3264,"מיי":3085,"מיד":2520,"מיה":1218,"מיו":3528,"מחק":931,"מחל":1800,"מחי":970,"מטי":1746,"מחש":1462,"מחו":2697,"מחז":1008,"מזר":2302,"מוע":3080,"מוס":2260,"מונ":5841,"מון":1066,"מוצ":2037,"מופ":1468,"מוי":943,"מוח":898,"מול":1345,"מוכ":1297,"מזו":770,"מוש":3780,"מות":5842,"מוק":2142,"מור":3868,"מפי":1187,"מפל":1393,"נו ":5903,"מפו":1340,"מפר":1910,"מעו":2266,"מעל":1128,"מספ":3471,"נד ":1870,"מעב":969,"מער":4843,"מעש":819,"נה ":21979,"מנה":2231,"מנו":2766,"מנט":976,"מני":6289,"is ":1053,"ion":1503,"מסו":3392,"מסי":756,"מנת":816,"מסג":979,"נג ":1904,"ממל":1294,"ממו":4003,"ממש":2774,"נם ":1133,"מתא":1357,"מתי":1175,"מתח":1378,"מתו":2071,"מתמ":1624,"מתק":1096,"מרו":1617,"מרי":5941,"מרח":1042,"ני ":19093,"מרץ":1409,"מרכ":2955,"משו":2959,"משח":2455,"משך":1130,"משי":1615,"מרת":774,"משפ":4826,"משק":861,"משר":976,"משת":2362,"משל":2762,"משמ":3131,"משנ":2281,"מצו":2210,"מצב":1295,"נח ":1477,"מצא":2270,"מצע":1849,"מצי":1590,"מקו":5589,"מקב":868,"נט ":1605,"מצר":828,"מקר":1322,"נק ":1115,"נס ":1624,"נן ":997,"נר ":886,"נת ":12553,"סט ":2719,"סד ":1188,"סה ":2203,"סם ":1071,"נאי":1779,"סי ":2536,"סל ":766,"נהל":1094,"נהי":761,"נוד":953,"נוב":2639,"נוא":1590,"נהר":1042,"נדי":1685,"נדו":1190,"נהג":967,"נדר":1651,"נגל":9186,"נגד":1024,"סף ":1568,"נגר":1057,"סס ":885,"נבח":833,"נטי":3381,"נטר":1455,"ניו":3890,"ניה":4469,"ניב":1798,"נחש":1738,"נטו":1177,"סר ":1369,"he ":1611,"סק ":1452,"נוי":2975,"נול":6258,"נון":1701,"נונ":778,"נוס":3397,"נוע":4528,"נוצ":1379,"נור":1169,"נות":10949,"נוש":1445,"נלא":901,"נכת":1073,"נית":14496,"סת ":2182,"ניס":1942,"ניק":1685,"ניי":6008,"נימ":1196,"נים":13661,"נסו":805,"נסי":2174,"נמצ":1867,"עו ":882,"נפו":1293,"עד ":4359,"נסת":1066,"עה ":5479,"נער":986,"נקר":1955,"עי ":4052,"נשי":2815,"נצי":1255,"נקו":1045,"נקצ":758,"עם ":5134,"נתי":767,"נתו":1033,"על ":22307,"סבי":1330,"סגר":950,"סגנ":827,"סוג":2713,"סוד":1173,"סוב":810,"סדר":3650,"ia ":834,"סוי":1607,"סוף":1248,"סול":1222,"סון":1399,"סור":2397,"סות":809,"סופ":3199,"סטי":3364,"סיד":979,"סיב":1007,"סיו":1804,"סיה":1411,"סטר":2818,"ער ":2948,"סטו":3180,"סיפ":1197,"סינ":1401,"סיס":1561,"סיל":886,"סים":1765,"סימ":1300,"סיט":1729,"סיי":2145,"סיכ":1356,"עת ":4300,"סית":2310,"סלו":858,"סמו":1244,"ing":1041,"in ":936,"ספט":1488,"ספי":2004,"ספו":1293,"ספר":9874,"סקו":1337,"סקי":1760,"סרט":3915,"סרי":1021,"עבר":4731,"עבו":2091,"עדו":1037,"עוב":1493,"פו ":941,"עסק":909,"פה ":6052,"עני":1193,"עמו":895,"עמי":1194,"עמד":1340,"עלי":4061,"עלה":1199,"עלו":1136,"עלת":1040,"עיק":3105,"עיר":4131,"עית":1602,"עיי":870,"עיל":2019,"עים":3131,"עין":910,"עיו":894,"עור":1459,"עות":4912,"עול":5515,"עונ":916,"עוס":1354,"עוד":1596,"עזר":762,"er ":2469,"es ":1643,"פן ":2293,"en ":849,"עתי":2916,"ent":903,"עשר":1306,"עשי":1214,"עשו":933,"עשה":950,"ערי":962,"ערכ":3489,"ערך":1379,"פי ":4902,"ערב":4397,"ערו":1170,"פט ":1621,"עקב":1399,"עצמ":2020,"עצה":792,"פר ":7389,"פת ":4142,"פונ":2515,"פון":2263,"פול":5205,"פוב":883,"פות":3286,"פור":5443,"פוצ":750,"פופ":985,"פוס":1005,"פוע":1359,"פטמ":1413,"פטי":1246,"פיה":1485,"פטר":1084,"פחה":834,"פחת":1765,"פבר":1306,"פוא":886,"פאר":769,"פעי":1681,"פעל":1386,"פעו":1775,"צה ":4704,"פרט":1640,"פרי":7391,"פרו":4724,"פרנ":1094,"פרס":3792,"פרד":2122,"צי ":1464,"de ":816,"פשי":825,"פרש":1302,"פרק":1005,"פקי":1813,"פלו":847,"פלג":1367,"פלי":1158,"פים":2239,"פינ":1223,"פיע":1348,"פיו":1127,"פיז":1279,"פיי":2109,"פיל":2559,"פיר":2169,"פיק":1363,"פית":1710,"צב ":1365,"פני":3690,"פנה":1015,"פסי":1405,"פסו":1262,"צא ":3406,"צת ":2388,"צר ":2282,"פשר":1288,"פתח":1344,"פתי":2367,"צע ":1320,"קו ":1976,"קט ":953,"קי ":3109,"קל ":917,"קב ":846,"קה ":10128,"צוע":1224,"צות":4135,"צור":3752,"צוו":876,"צוי":1042,"ציא":1461,"ציב":1399,"קר ":5102,"קת ":3999,"ציר":1681,"צים":1036,"ציי":1115,"ציו":2783,"ציה":2306,"צלי":914,"קם ":1371,"צאה":819,"קן ":2098,"צבי":1113,"צבא":1757,"קס ":1455,"צאת":990,"קדמ":1302,"קדו":1269,"קהי":992,"קונ":1555,"קומ":2802,"קופ":3480,"קור":4356,"קות":1519,"קוב":1891,"רץ ":4170,"קוו":1390,"קוד":2387,"קוט":813,"קול":4389,"קום":1590,"רן ":1611,"קאי":4278,"קבל":961,"קבו":5125,"קבי":757,"רס ":2957,"קלא":1049,"קלי":2877,"קלו":814,"קמה":931,"שא ":795,"רק ":2876,"קטי":1305,"קטו":2534,"רר ":986,"קיב":1075,"רש ":946,"קטר":975,"קים":4228,"קיס":929,"קיד":1911,"קיו":786,"קיי":2845,"קית":1359,"רת ":15678,"קיצ":864,"קיר":818,"רה ":17356,"רד ":3185,"צעו":1152,"צעי":1124,"צפו":2771,"רו ":3945,"רא ":2013,"צמב":1291,"צמח":843,"רג ":1850,"רב ":5168,"רל ":1007,"רם ":1940,"רט ":4971,"רח ":1986,"רך ":5443,"צרפ":2865,"רי ":10968,"צרי":2230,"רחש":787,"רחי":1692,"רחב":1963,"רחו":949,"ריא":2270,"רטי":3060,"שר ":8585,"רטו":1441,"רות":11282,"רוש":2596,"רור":832,"רוק":2070,"רופ":3572,"רוע":945,"רוס":3581,"רונ":2892,"רון":3292,"רומ":4126,"רום":2275,"רוי":1525,"רוח":842,"רוט":809,"רוו":1191,"רוז":748,"רוב":4518,"רוא":1983,"רדי":2706,"רבי":6491,"רבע":1063,"ראת":808,"ראש":9879,"רבו":2717,"רבה":870,"רגנ":814,"רגל":1905,"רגי":1770,"רגו":2907,"תב ":1936,"רנס":871,"רנט":865,"רני":2334,"רמו":1109,"רמנ":3395,"רמי":1311,"רכה":807,"רכז":2614,"רכו":1721,"רכי":2246,"רכת":2068,"רלי":1812,"ריו":3530,"ריט":3551,"ריה":4718,"ריד":1253,"רים":13911,"רינ":921,"ריי":3646,"ריל":1756,"ריכ":1158,"ריק":5899,"ריס":1584,"שת ":3670,"רית":8516,"רכב":1910,"קצי":1354,"שו ":1123,"שה ":6000,"שג ":788,"קסי":1168,"קני":2966,"שב ":2846,"ראה":1168,"ראו":1207,"ראל":9123,"ראי":2021,"שם ":6732,"של ":44424,"קשר":1129,"שך ":1218,"קשו":1188,"קרא":3185,"שי ":4651,"קרב":1789,"קרו":2813,"קרי":3474,"שהי":1689,"שהו":2779,"שהת":1193,"שוו":1653,"שוי":825,"שוב":2695,"שות":2730,"שור":3173,"שומ":896,"שון":3078,"שול":846,"שונ":5449,"שחק":4451,"תר ":6554,"שטח":1349,"שיא":1478,"שיב":1320,"שיח":755,"שיו":1010,"שיט":1510,"שיי":2304,"שימ":3122,"שים":3750,"שינ":1048,"שיצ":765,"שית":2397,"שיש":1013,"שיר":3655,"תת ":1033,"שכו":948,"שלת":1194,"שלי":4040,"שלט":1378,"שלו":3513,"שלה":1787,"שמא":2250,"שמה":1020,"שמש":2616,"שמע":1452,"שמו":3471,"שמי":1858,"שנה":2722,"שנו":3666,"שנת":8163,"שני":8407,"תה ":7963,"רסי":2582,"רסם":846,"רעי":965,"רפת":3016,"רפי":1377,"רפו":1781,"תו ":7307,"רקי":1347,"תח ":1273,"רצו":3053,"רצי":1126,"רשו":916,"רשי":915,"תי ":6224,"תל ":1457,"רתו":1195,"רתי":1593,"רשת":1744,"px ":1770,"תם ":2303,"שאי":1679,"תן ":2959,"שבת":1116,"שבי":2213,"שבע":790,"שאר":864,"שבה":1333,"שבו":2486,"תחו":3127,"תחי":1668,"תחת":1243,"תוא":930,"תוך":1588,"תוח":1128,"תונ":1543,"תוכ":2531,"תול":1229,"תות":809,"תור":2674,"תוצ":851,"תופ":747,"תכו":755,"תלמ":974,"תים":1902,"תיי":3333,"תיו":2454,"תיה":1049,"תיב":1533,"תיא":1180,"תית":3825,"תיק":1981,"תנה":885,"תנו":2011,"תמש":815,"תמט":1174,"תמו":1033,"תפי":902,"תפת":844,"תפק":1672,"שפע":1069,"שפו":1080,"שפה":1050,"שפי":949,"שפח":2708,"שפט":1854,"שתמ":820,"שתי":1677,"שרת":856,"שרי":1851,"שרא":9189,"שרו":1075,"שרה":794,"שרד":795,"תאי":785,"תאו":1106,"תהל":1012,"תאר":1331,"ng ":993,"ne ":883,"nd ":899,"תקו":3171,"תקי":1081,"תרח":857,"תרו":1096,"תרג":858,"תרב":1108,"of ":1020,"on ":2345,"le ":936,"אב ":829,"אה ":7609,"או ":12752,"אז ":1578,"אי ":11190,"אך ":1814,"אל ":12893,"אם ":2755,"אן ":3273,"אחד":3813,"אזו":2699,"אות":6901,"אוק":2466,"אור":6849,"איט":1941,"איו":924,"אטר":826,"איס":807,"איר":3360,"איל":857,"איי":2254,"אינ":4275,"אין":766,"אימ":1541,"אים":3388,"בר ":12599,"אחר":6475,"אחת":1793,"אדו":1100,"אדם":2266,"בע ":2728,"אוד":916,"אוג":1935,"אוח":876,"אוט":953,"אוו":1522,"אול":3101,"און":955,"אונ":2600,"אומ":3134,"אופ":3417,"אוס":2423,"אמר":4875,"אמצ":1763,"אמנ":1370,"אמי":1391,"אמו":966,"גד ":977,"אסי":1374,"אסט":928,"אנש":1036,"אנר":800,"אנט":1124,"אני":2535,"אנג":9388,"אנד":988,"אנו":1034,"בת ":4691,"אית":2756,"איש":1805,"אלק":946,"אלג":769,"אלב":2498,"אלו":1949,"אלה":1036,"אלי":7303,"ארד":814,"ארג":2148,"גי ":1572,"ארב":1076,"ארו":1606,"ארץ":2338,"ארצ":2697,"ארק":837,"ארי":2497,"ארכ":746,"אשו":5612,"אשר":5633,"אשי":2353,"גה ":2608,"גו ":887,"אפי":1390,"אפר":2492,"אפש":1248,"באי":3225,"באמ":1587,"באל":995,"באז":1071,"באו":6670,"בבי":1380,"באר":2896,"באנ":6445,"באפ":1580,"בגו":776,"בדי":2083,"בדו":915,"בגר":1359,"גל ":2819,"אתר":938,"גם ":5839,"גן ":1860,"אף ":1141,"אס ":1146,"בא ":1578,"את ":15913,"אש ":2340,"אר ":5565,"בי ":5655,"בה ":7790,"בו ":4295,"בד ":1438,"אבו":828,"אבי":2570,"בן ":3463,"בל ":2984,"גנו":1314,"גני":1049,"הה ":812,"הו ":1555,"גור":2881,"גות":1342,"גוף":967,"דר ":2321,"גיי":929,"גים":1431,"גיל":1327,"גיע":1045,"דש ":1335,"גיה":3078,"דת ":3965,"גית":995,"גלו":929,"גלי":10256,"דבר":1224,"דוב":1184,"דול":3841,"דום":754,"דומ":1571,"דון":1523,"דוג":872,"דוד":964,"דות":4827,"דור":4722,"דוק":822,"דוע":2536,"גרו":886,"גרת":1011,"גרי":1714,"גרפ":891,"גרמ":3486,"הם ":6282,"הל ":908,"הן ":3375,"במח":1274,"במו":1362,"במי":1660,"במא":2636,"במד":1407,"במה":1632,"במב":1705,"במר":2370,"במק":1759,"במש":1939,"במע":1091,"במס":1456,"בלי":2952,"בלו":1708,"בלת":1123,"בעב":1312,"בעו":2020,"בעל":3414,"בעי":5046,"בער":1666,"בעת":1302,"דה ":9750,"בפב":1248,"דו ":2707,"בנו":3853,"בנה":1480,"בני":4749,"בסו":1238,"בסד":891,"בסי":1809,"בספ":2568,"בוצ":4259,"בור":4953,"בות":6458,"בחי":1679,"בחו":1131,"בהם":920,"בהי":972,"בהו":872,"בדצ":1243,"בדר":2390,"בוע":1416,"בוס":1642,"בונ":896,"בום":1558,"בול":2422,"בוי":829,"בוה":789,"בוד":2124,"ביש":3045,"ביר":2459,"בית":7075,"גת ":1847,"בכל":1398,"בכי":1314,"בחר":970,"גר ":827,"בימ":897,"בין":7303,"ביל":1749,"בים":5704,"ביט":1549,"ביי":2219,"ביצ":869,"ביע":1697,"בינ":3973,"ביו":8219,"ביה":1061,"ביד":1090,"ביב":2499,"ביא":912,"גבי":1029,"גבו":1377,"גאו":1715,"דן ":1252,"גוב":1126,"גוד":1622,"גון":2245,"גוס":1728,"גול":1378,"גדר":1056,"גדו":3497,"גבר":964,"דע ":2354,"ברו":5256,"ברה":2480,"די ":14259,"ברא":1602,"בקר":1360,"בקי":1190,"בקו":797,"בצר":1026,"בצע":1020,"בצפ":918,"בצו":754,"בפר":1402,"בפו":753,"בפי":884,"דם ":3784,"דל ":1350,"בתי":1021,"בתק":1270,"בתח":2033,"בתו":2504,"בשי":1378,"בשמ":809,"בשם":1347,"בשל":1420,"בשנ":7415,"ברת":3153,"ברט":849,"ברי":11218,"ברס":1667,"הקב":1121,"הצפ":881,"הצל":875,"הצי":1591,"הרא":4672,"הרב":2013,"זי ":1210,"הקר":1616,"הקש":833,"הקת":1394,"הקל":776,"הקי":1474,"הקה":1071,"הקד":827,"הקו":2620,"השב":876,"הרפ":831,"הרש":823,"הרו":2886,"הרכ":769,"הרי":1168,"התא":1307,"התו":1514,"השת":1139,"השמ":1266,"השם":895,"השנ":4351,"השפ":1366,"השו":2146,"השח":757,"השי":2832,"השל":1958,"העת":967,"הער":1024,"העו":4093,"העב":1724,"העל":1178,"העי":2782,"זה ":4910,"הפע":812,"הפס":746,"הפר":2442,"זו ":2407,"הפו":2450,"הפי":1400,"הצב":868,"המח":2130,"המז":1081,"המי":3193,"המד":2330,"המו":7722,"המב":1301,"המא":3556,"המצ":1978,"המק":2404,"המר":1784,"המש":5759,"המע":1738,"המפ":1404,"הממ":1866,"המנ":1147,"המס":1855,"המכ":1676,"המל":2091,"המת":2867,"הנו":2020,"הנח":758,"הני":1012,"הנפ":766,"הנמ":908,"הסו":2019,"הסי":1362,"הסד":840,"הסר":1363,"הספ":2367,"היש":1505,"היר":1734,"ות ":80938,"הכו":1820,"הכל":1158,"הכי":931,"הכנ":1262,"הלא":944,"הלה":874,"הלו":1160,"הלי":2332,"הלכ":875,"הלך":1189,"ויל":1139,"וים":1245,"וימ":813,"וינ":891,"ויה":1089,"ויו":3873,"ויז":1668,"ויי":2636,"ויד":832,"וטו":1573,"וטי":1439,"זר ":1170,"וחר":794,"וכן":1385,"וכנ":2823,"וכל":1110,"וכי":1321,"וכו":764,"וכה":1041,"וכב":1408,"ויר":1363,"ויק":1124,"ווא":1655,"us ":1690,"ווה":2251,"ווי":6670,"וול":1093,"וון":1540,"והא":894,"והו":1891,"והי":2099,"והמ":1272,"ודע":1169,"ודר":1718,"ודת":964,"וחד":1468,"וחו":1384,"וזע":1797,"וזי":3484,"וונ":2693,"וות":1300,"וור":1036,"ואר":4871,"ואל":1667,"ובה":2620,"ובו":2126,"ובי":4910,"ובד":1206,"ובא":1004,"ואה":1373,"ואו":1083,"ואי":2839,"וגר":1479,"וגמ":793,"ודל":997,"ודי":6497,"ודה":2328,"ודו":2620,"ובר":4203,"ובת":944,"ובע":1673,"ובל":2998,"ובמ":2420,"וגי":5063,"וגו":2244,"וגד":821,"זם ":1283,"התח":1134,"התי":1325,"התמ":749,"התנ":1704,"התפ":1798,"התק":1851,"התר":1227,"וט ":1289,"דצמ":1291,"וי ":5737,"דרה":2116,"וז ":2245,"וח ":3705,"דעי":2475,"וד ":6343,"וה ":2998,"וב ":6671,"וג ":4167,"דמו":2336,"דמי":1260,"וא ":27697,"דלי":756,"דיר":1020,"דית":2578,"הר ":1518,"דיה":1129,"דיו":2800,"דינ":5116,"דין":785,"דים":4959,"דיי":1568,"היי":4423,"היל":1466,"היו":4371,"היח":928,"היס":2192,"הים":937,"הינ":2071,"היג":810,"וש ":3750,"היה":10243,"היד":1593,"היא":15411,"הטו":996,"החש":794,"ור ":18092,"החו":2245,"החי":1691,"החל":2789,"וק ":4196,"החב":1254,"הזמ":843,"הופ":1656,"הור":1218,"הוק":1750,"הוצ":992,"הונ":877,"הול":1454,"הוו":1959,"וץ ":2534,"הוד":5252,"הוא":27359,"ההי":939,"ההו":815,"ופ ":863,"הדר":1419,"הדי":781,"וף ":4256,"הדו":1789,"הגר":1312,"הגי":1488,"הגב":784,"וע ":5599,"הגו":1529,"הגד":2288,"הבר":3864,"האנ":1474,"האמ":2115,"האל":2120,"האר":2435,"tio":1159,"וס ":5753,"הבי":3093,"הבו":902,"האד":1377,"האי":3937,"האו":3742,"האח":1546,"ון ":31676,"ום ":16021,"ול ":7891,"ter":1142,"דרו":3914,"the":876,"דרי":1886,"דרך":2231,"וך ":3767,"דרת":2145,"חבר":6406,"חבי":909,"חות":2707,"חור":1374,"חוק":2664,"חופ":953,"חומ":2243,"חדש":1272,"חוז":2017,"חום":1881,"חול":1611,"חוב":1301,"חוד":1401,"חיר":890,"טת ":2178,"חית":881,"טר ":2558,"חיו":928,"חיד":1965,"חיי":3491,"חיל":2274,"חים":2086,"חינ":1388,"חין":819,"חמה":852,"חמי":1161,"יא ":17223,"חמת":1678,"חלו":1698,"חלל":804,"חלק":3950,"זמר":1390,"זמן":1228,"טו ":1328,"טה ":3869,"זער":1824,"זרח":2887,"טי ":5165,"טח ":798,"טן ":1424,"חס ":1181,"זהו":873,"זור":3460,"חק ":2379,"חר ":3837,"זיה":1878,"זיק":3731,"זית":1113,"חת ":4976,"זכו":1572,"ולר":980,"ולק":1057,"ולת":1183,"ולפ":880,"ולנ":2842,"ולם":4221,"ולמ":1167,"ולל":2126,"ולי":9326,"ולט":1565,"ולו":7007,"ולה":3768,"ולד":6790,"ולא":1327,"וכר":1278,"ומת":1301,"ומש":1030,"ומר":2913,"ומי":8418,"ומנ":2251,"ומה":2366,"ומד":1168,"ומב":746,"ומט":1282,"ומח":813,"ומו":3468,"ומא":1433,"וסס":1023,"וסף":1300,"וספ":1419,"וסק":2400,"וסד":1576,"וסט":3543,"וסי":4415,"וסו":1601,"ונס":965,"ונת":1474,"ונק":1268,"חב ":1144,"ונג":1320,"ונא":1208,"וני":18082,"ונח":1810,"ונט":1231,"ונו":5615,"ונד":1458,"ונה":7200,"חה ":3681,"ועצ":1483,"ועת":846,"ועד":3169,"ועה":1693,"חד ":5298,"ועו":1416,"ועל":2118,"ועי":1952,"וצא":2429,"ופי":5387,"ופה":2721,"ופו":2815,"חו ":753,"ופק":800,"ופר":3436,"ופש":817,"ופת":1896,"ופן":1487,"ופס":1055,"ופע":1474,"וקם":1123,"וקס":776,"וקמ":1227,"וקו":1308,"וקי":1776,"וקט":2107,"חי ":1719,"ורב":1210,"ורג":4279,"ורא":1280,"וקר":2331,"וצי":1905,"וצה":1895,"וצו":1018,"וצת":1866,"וצר":3353,"ושי":1977,"ושה":1203,"ושו":851,"ושל":2823,"ותה":2054,"ורט":1801,"ורי":12062,"ורך":1570,"ורד":1610,"ורה":4165,"ורו":3649,"ורנ":1343,"ורס":1879,"ורכ":1436,"ורמ":1653,"ורר":1139,"ורק":1753,"ורת":4134,"ורש":1062,"ושב":1741,"ושא":1174,"ושג":868,"חם ":833,"ותו":3558,"ותח":851,"ותי":4897,"ותם":860,"ותר":5363,"חל ":1502,"יית":6408,"ייר":2338,"ייש":1691,"ייצ":1194,"ייק":972,"יינ":2765,"ייס":1621,"יכי":851,"יכו":4344,"יחס":2243,"יטי":4701,"יטל":2302,"יטו":2202,"יטה":1667,"יטר":974,"יטת":1710,"יטנ":845,"ייט":1193,"ייך":816,"יים":14831,"ייל":1292,"יין":3012,"יימ":2364,"ייב":892,"ייד":1305,"ייה":4423,"ייח":1458,"יוס":1553,"יונ":4734,"יון":4211,"יומ":966,"יום":4082,"יול":2364,"יות":18525,"יור":2161,"יוצ":1391,"יזי":2803,"יזם":1225,"יחד":770,"יחו":1331,"יחי":2092,"ידע":1040,"ידת":830,"יהו":5488,"יהם":1573,"יהן":839,"יוו":4083,"יוח":1326,"יעי":1794,"יעו":1404,"יעה":1184,"יסר":789,"כה ":5083,"ינס":944,"ינט":1703,"יני":8382,"ינל":1089,"ינא":757,"כב ":1369,"ינג":1937,"ינה":5280,"ינו":10255,"יסי":1267,"יסט":5187,"יסד":879,"יסה":750,"יסו":2347,"ינת":1901,"ימי":5064,"ימו":4842,"ימפ":1867,"ימה":1856,"ימת":1210,"ימש":796,"ילד":1523,"ילה":3661,"ילו":5745,"ילי":5359,"יכר":764,"ילת":1987,"טרי":3898,"טרה":1144,"טרו":3685,"טרת":804,"טרנ":1010,"ידו":5598,"ידה":2025,"ידי":10222,"יגה":763,"יגו":1454,"יבר":2757,"יבת":1107,"יבי":2538,"יבו":5867,"יבה":1458,"יאנ":1337,"יאל":1853,"יאט":1005,"יאה":813,"יאו":2501,"טון":1905,"טונ":1205,"טוס":1037,"טור":5386,"טות":1114,"יק ":3514,"יר ":9468,"טיו":1116,"טיב":1924,"יש ":4075,"טים":3865,"טינ":2155,"טיי":2059,"טבע":1372,"יע ":1962,"יף ":1039,"טוב":2684,"יץ ":1463,"טמב":1415,"טני":1443,"טית":3454,"ית ":68847,"טיס":800,"טיפ":954,"טיק":2311,"טכנ":772,"טלי":1241,"טלו":1568,"טלק":1324,"יז ":756,"יו ":8828,"יט ":1800,"חקי":1130,"חקן":1010,"חקנ":990,"חקר":1503,"יח ":1007,"חסי":1544,"יג ":1633,"יב ":3393,"יה ":40026,"יד ":4579,"ין ":20713,"יס ":3129,"חשו":1442,"יך ":2851,"חשב":3253,"חרת":762,"חרי":2444,"חרו":2409,"יי ":1538,"ים ":101313,"יל ":6999,"כנו":1432,"כני":2511,"כנס":2070,"כפר":832,"לת ":6746,"כינ":1581,"כימ":930,"כיר":1145,"כיב":953,"כיו":2320,"כיה":761,"כים":1975,"כיל":789,"לר ":750,"כמו":1809,"כלכ":1033,"כלל":3229,"כלי":2030,"כלו":1791,"כבי":2214,"כבת":844,"כגו":871,"כדו":2961,"כאש":1171,"לס ":944,"כוכ":1222,"כול":4477,"כון":1365,"כונ":3439,"כזי":1062,"כות":3899,"לק ":2384,"כדי":968,"כוח":1334,"לי ":12633,"לך ":2677,"לט ":1236,"לן ":1554,"לם ":5107,"לל ":3884,"לא ":4586,"לב ":1394,"לה ":14910,"לד ":5523,"לו ":5117,"כר ":1517,"כת ":4160,"כן ":3170,"יפי":1104,"יפו":4212,"יפה":1175,"כו ":1279,"כז ":1400,"יצו":3383,"יצי":2337,"יצא":1573,"יקר":3694,"יקת":771,"יקנ":1394,"יקט":1251,"יקי":1290,"יקל":1432,"יקא":4207,"יקה":6636,"יקו":3145,"ירת":2005,"ירו":10154,"ירה":3648,"כי ":2345,"ירא":823,"ירי":3756,"ישר":9232,"ישו":3196,"ישה":1288,"כך ":1583,"ישי":4067,"כל ":4979,"יתה":3362,"יתו":3399,"יתי":1065,"יתן":1501,"כם ":777,"מת ":6084,"מר ":3374,"מש ":2759,"מס ":899,"מן ":5852,"לתו":964,"לתי":1345,"מל ":1062,"לקו":1461,"לקי":2709,"לקט":1068,"לשנ":1062,"לשי":1010,"לשו":879,"מי ":7001,"לרא":1055,"לרי":1005,"לרו":1023,"מה ":11039,"לעת":1312,"מד ":1904,"לפר":766,"לפנ":1939,"לפי":2283,"מו ":5183,"למע":1288,"למש":1364,"למו":2080,"למד":977,"למי":2394,"למנ":879,"לנד":1704,"לני":1139,"לנו":2241,"לכל":1891,"לכה":1199,"לכו":898,"ליק":1573,"ליצ":890,"ליש":1631,"לית":12132,"לין":1391,"לינ":1320,"לים":8070,"לימ":1810,"ליפ":1428,"ליס":1055,"לכת":1177,"ללי":1608,"לוש":1494,"לות":5393,"לומ":2089,"לום":1009,"לונ":1458,"לון":862,"לוי":1233,"לול":1458,"לור":772,"לוק":1039,"לוס":1978,"לחי":1661,"לטו":1868,"לטי":2006,"לחמ":2604,"ליא":1164,"ליד":1413,"ליה":3296,"ליג":1396,"ליח":784,"ליט":3150,"ליו":3604,"ליז":849,"ליל":2282,"ליי":2952,"ליך":906,"לגו":927,"לגב":764,"לבנ":1140,"לבי":1775,"לבו":2832,"לדו":1001,"לדה":1350,"לדי":1178,"לוג":4860,"לוב":1152,"לוח":1147,"לוו":2569,"להת":947,"להק":2345,"להי":1584,"לאו":4844,"לאח":2966,"לאי":1340,"לאס":784,"לאר":955,"כרי":914,"כרו":1009,"כתי":946,"כתב":2605,"כתו":1006},"n_words":[7414842,8743276,5934016],"name":"he"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/hi b/nlp_resource_data/langdetect/profiles/hi

new file mode 100755 (executable)

index 0000000..104ed58
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/hi
@@ -0,0 +1 @@
+{"freq":{"ौद्":366,"ोली":389,"ोर्":465,"ोमी":388,"्तम":479,"्तन":384,"्तर":2639,"्त्":1391,"्थल":430,"्ता":2582,"्ती":679,"्ति":2706,"्तु":954,"्थि":2834,"्था":3750,"्द्":869,"्दी":1870,"्दू":492,"्दे":907,"्ट्":2750,"्टे":7603,"्टी":634,"्टि":711,"्तक":539,"्बन":468,"्मक":445,"्यक":2308,"्मा":2864,"्मि":1000,"्रं":360,"्मी":437,"्यत":638,"्यय":385,"्यम":558,"्रक":2642,"्यव":609,"्रच":370,"्रज":367,"्यु":778,"्या":6530,"्धा":551,"्धि":463,"्ना":572,"्पन":572,"्पा":591,"्प्":3747,"्गत":419,"्का":992,"्कृ":823,"्कर":450,"्जा":403,"्ञा":1890,"्टर":764,"्चि":1116,"ोंन":705,"ौर ":706,"ं":116003,"ः":743,"ँ":3754,"आ":15110,"इ":21960,"अ":27216,"ऊ":1116,"ई":8670,"उ":14369,"ए":27071,"ओ":3623,"ऑ":695,"ऐ":1181,"ग":41430,"ख":13380,"क":215389,"औ":11975,"छ":5467,"च":24607,"घ":4688,"ट":40532,"ञ":2576,"झ":1114,"ज":58287,"ठ":3871,"ड":21061,"ढ":1924,"ण":16159,"त":129370,"थ":26984,"द":62970,"ध":21789,"न":137720,"प":89801,"फ":9525,"्ग ":934,"ब":39694,"भ":28885,"म":108014,"य":96626,"र":228209,"ल":79901,"व":82288,"ष":22409,"श":41726,"ह":118206,"स":149246,"़":11159,"ि":139433,"ा":290518,"े":193119,"ू":22463,"ृ":6345,"ी":110466,"ु":44034,"ौ":4425,"्":228350,"ो":68898,"ै":59521,"ॉ":2831,"।":45019,"०":5718,"१":6322,"्क ":738,"६":1789,"७":1738,"८":1943,"९":4350,"२":3762,"३":1587,"४":1437,"५":1969,"्न ":1500,"ोजन":549," ख":3003," ग":13738," औ":11821," क":119739," ओ":938," ऐ":1154," ऑ":666," ट":5844," ज":31363," झ":690," च":7337," छ":3781," घ":3397," इ":19284," आ":12686," अ":26917," ए":21008," ऊ":806," उ":13706," ई":1344,"्फ ":515,"्म ":3545,"ोड़":535,"्य ":9507,"्र ":7477,"ोते":895,"ोती":1270,"ोता":2537," २":2506," ३":490," १":5155," ।":6381,"्व ":2678,"ोनो":409,"ोने":1142," प":54146," फ":4760," न":21169," म":54702," य":22634," ब":23290,"्ष ":1625," भ":19961," ड":2225," द":21719," ध":2884," त":12822," थ":8999," ह":72412," स":78945," ल":14663," र":25548," श":12231," व":27670,"्स ":1196,"्च ":578,"्ट ":1436,"्ञ ":468,"्ड ":1049,"्ठ ":1088,"्ण ":1354,"्थ ":1814,"ोगि":441,"ोगो":559,"्त ":4240,"्ध ":2352,"्द ":1588,"्सा":631,"्ष्":370,"्हे":874,"्हो":949,"्स्":3660,"्ली":1260,"्ला":820,"्ले":511,"्रद":1892,"्रथ":561,"्रत":2596,"्रण":709,"्यू":989,"्रप":384,"्रम":2375,"्रभ":518,"्यो":1594,"्रय":1925,"्रव":977,"्रश":494,"्रह":1076,"्रस":1694,"्रा":9061,"्रि":3453,"्री":4034,"्रै":386,"्रे":9372,"्रो":1860,"्षण":540,"्षे":3114,"्षि":1460,"्षा":1408,"्वत":875,"्वर":1044,"्वव":540,"्वप":609,"्वी":1298,"्शन":1985,"्वे":430,"्वि":632,"्वा":6601,"०० ":579,"२००":1298,"१९९":360,"१९६":420,"१९७":415,"ंत ":1187,"ंड ":1010,"ंग ":1786,"ंक ":375,"ंभ ":447,"ंद ":452,"ंश ":574,"ंस ":426,"ंह ":485,"आप ":923,"अंत":806,"अंग":1106,"अक्":469,"इन ":809,"ंचत":1845,"ंचा":2297,"ंग्":1940,"ंगा":789,"ंगी":464,"ं। ":4299,"ंको":451,"ंक्":1624,"ंख्":1085,"ंगल":563,"ंटे":1825,"ंडि":369,"ंजा":609,"ंने":716,"ंबं":450,"ंपा":1007,"ंत्":1705,"ंति":457,"ंतर":775,"ंद्":705,"ंदी":875,"ंदि":932,"ंदर":556,"ंबर":485,"ंयु":368,"ंस्":2087,"ंसा":366,"�":2362,"थे ":819,"दन ":568,"थी ":791,"था ":4899,"त् ":405,"थम ":523,"तंत":641,"तो ":1178,"ं ":59244,"ः ":691,"ताओ":415,"तान":1545,"ताब":607,"तार":557,"तिक":1335,"दो ":833,"तिज":375,"तिन":613,"ँ ":2207,"तिय":1006,"तिर":505,"तिह":787,"आ ":1631,"नई ":675,"धन ":595,"दी ":4353,"दू ":610,"तरी":448,"तरा":413,"तरह":445,"दा ":857,"तर्":854,"दि ":1483,"तमा":404,"तमि":373,"ण्ड":966,"णों":461,"दर ":576,"दल ":742,"तथा":2577,"तत्":438,"णाल":368,"थ ":4109,"द ":7269,"ध ":3357,"न ":42221,"ड ":6850,"था।":3059,"ठ ":1539,"थाप":1123,"ण ":9157,"थान":1787,"त ":29579,"थित":2566,"धी ":490,"ज ":2925,"दक्":984,"ञ ":468,"धा ":464,"नट ":1785,"ट ":6235,"धि ":2431,"थवा":400,"च ":2310,"छ ":1066,"क ":31192,"तों":455,"ग ":7730,"ख ":1929,"त्स":696,"त्व":1830,"त्प":791,"त्र":11745,"त्य":2756,"त्म":761,"त्त":3587,"ए ":4565,"तीन":446,"तीय":3767,"तीस":449,"ई ":6870,"ै ":18248,"दार":977,"दान":855,"दाय":435,"दिल":1253,"दिश":630,"दिर":1086,"े ":101676,"दिय":1540,"दित":1005,"दिन":569,"न् ":786,"दुर":430,"ू ":2066,"दूर":671,"दूस":592,"ि ":14043,"नी ":5102,"ी ":75983,"ु ":3758,"ा ":97285,"़ ":2122,"दस्":366,"ह ":13867,"ने ":12971,"स ":15469,"ष ":2544,"श ":4366,"व ":7299,"ल ":18408,"नि ":406,"दर्":1109,"ना ":7450,"र ":61007,"थी।":911,"य ":22189,"म ":13770,"भ ":1015,"नव ":411,"ब ":2705,"फ ":1049,"थे।":1504,"प ":5685,"टना":394,"डल ":378,"डी ":506,"डा ":698,"टर्":362,"ड़ ":675," �":578,"टती":1811,"ञान":1620,"ट्र":7028,"ट्ट":617,"टेश":7359,"ढ़ ":636,"टीक":471,"णु ":439,"णी ":648,"णा ":355,"तक ":1940,"ति ":4868,"ता ":14635,"तु ":1219,"ती ":9089,"ते ":6248,"तन ":484,"ड़े":606,"ड़ी":1222,"ड़ा":1450,"डिय":1027,"तम ":378,"तर ":1819,"जे ":3791,"जा ":1910,"ज़ ":425,"जी ":1578,"चेन":368,"चीन":1126,"चुन":405,"जो ":3111,"चिक":478,"चित":1154,"चार":1698,"चाल":2122,"चिम":876,"जंक":1375,"च्च":632,"छोट":541,"जन्":1234,"टक ":496,"जधा":578,"जनस":468,"जनी":553,"छूट":1815,"जस्":392,"टन ":357,"जहा":441,"ज़ी":477,"जिस":3147,"जिल":1663,"जिन":942,"जित":402,"जार":493,"जाब":559,"जान":2165,"जात":5721,"जैस":743,"ज्य":2693,"ज्ञ":2457,"जीव":1088,"टर ":1798,"टे ":2183,"टि ":384,"टी ":1658,"टा ":740,"ंघ":459,"ंख":1287,"ंग":7039,"ंक":4013,"ंड":2775,"ंट":3074,"ंज":1716,"ंच":5039,"केन":435,"ंश":984,"ंस":3555,"ंह":717,"ंव":676,"् ":2686,"खने":369,"ंध":1776,"ंद":4729,"ंन":763,"ंथ":399,"ंत":5036,"ंय":471,"ंप":2156,"ंभ":782,"ंब":1984,"ो ":17876,"ँच":523,"कोई":460,"कों":1045,"आत":950,"इं":1275,"कृत":1818,"आज":527,"अस":694,"अव":2719,"आक":698,"अल":1444,"अर":2196,"अभ":1051,"अम":1289,"अब":415,"अप":3105,"आई":372,"अध":2198,"अन":4741,"अथ":432,"इत":740,"इट":387,"आस":422,"इक":606,"आव":1056,"आम":383,"आय":903,"आर":1257,"आप":1303,"कृष":566,"आद":1231,"आध":1054,"आन":511,"अं":2476,"आं":508,"अत":785,"अक":781,"अग":575,"ं।":6630,"उप":2592,"उस":2266,"इन":2901,"इल":681,"इस":13512,"आ।":435,"ईस":387,"उत":2684,"उन":3336,"उद":1042,"कोच":453,"कोड":3985,"एं":534,"कोल":359,"एँ":425,"कोश":370,"एक":15570,"क्ट":677,"क्त":3551,"क्य":529,"क्श":1460,"क्ष":8266,"क्र":2787,"क्स":4316,"ओर":445,"ओं":2444,"ऐस":714,"एव":3043,"एल":360,"एस":427,"गर":2860,"गय":3634,"गल":1055,"गव":557,"खे":1130,"खो":528,"गम":494,"ख्":2681,"गभ":616,"खा":2157,"गठ":414,"खि":516,"खी":461,"घं":1798,"गढ":741,"गण":767,"गत":1406,"गद":372,"गए":383,"क्":22746,"कै":618,"के":37285,"खन":943,"गई":554,"को":17366,"कॉ":440,"कि":12948,"की":20919,"गं":513,"का":35945,"कृ":2446,"कु":3003,"कू":402,"कस":766,"कव":956,"कह":3138,"कल":2049,"खक":372,"कम":1276,"कर":12591,"कप":515,"कथ":448,"कन":1065,"कड":382,"खं":419,"कत":2310,"कट":630,"और":11428,"कई":777,"कं":723,"चु":923,"ची":1816,"जं":1545,"चि":3214,"चा":5438,"चे":1055,"चौ":426,"च्":1117,"० ":1980,"जग":567,"गढ़":657,"चत":1993,"चन":1667,"चर":750,"चल":1479,"घा":478,"चं":451,"। ":33428,"गति":373,"चक":424,"गा":3720,"गह":412,"गी":1381,"गि":1175,"गु":1958,"गो":2483,"ग्":4970,"गे":620,"गै":364,"घर":423,"टन":928,"ञा":1890,"टत":1909,"टा":1764,"टल":396,"टर":2646,"४ ":778,"झा":416,"३ ":798,"टक":827,"जो":3936,"जै":1167,"जे":4339,"जू":402,"जी":3381,"जु":839,"जा":12605,"जि":7022,"ज़":2064,"घंट":1796,"२ ":974,"ज्":5651,"जन":5056,"जध":594,"छू":1841,"छा":395,"जह":523,"जस":483,"१ ":954,"जर":910,"जल":806,"जब":740,"छो":713,"जय":526,"जम":644,"ठा":568,"५ ":1070,"टे":10515,"ठन":369,"ट्":8239,"टो":662,"टी":2663,"टि":1894,"ड़":5317,"डा":1474,"डि":2023,"डी":831,"डल":514,"६ ":930,"गभग":596,"तं":976,"तः":367,"ढ़":1322,"७ ":816,"ड्":669,"डो":499,"डे":781,"डु":358,"णि":851,"णी":758,"णु":577,"णा":1135,"तक":2674,"८ ":811,"णन":433,"९ ":917,"तव":480,"ति":11566,"ता":20016,"तु":2358,"ती":14469,"तत":547,"तथ":2628,"तप":364,"तन":1343,"ण्":1311,"तम":1364,"णो":495,"तल":566,"तर":5109,"थव":456,"था":11678,"थी":1994,"थि":3214,"ते":6961,"तो":1865,"त्":23294,"थम":587,"थल":512,"दक":1341,"दस":512,"दश":363,"दृ":463,"दू":2227,"दु":1528,"दी":5182,"दि":8815,"दा":4680,"थो":462,"थे":2431,"दन":776,"दल":1038,"दर":2412,"थ्":491,"धा":4715,"नट":1830,"नत":1382,"खें":445,"नद":975,"धी":935,"धि":5621,"धु":599,"दो":2067,"दौ":382,"द्":15486,"नई":683,"दे":6364,"धन":912,"नक":3879,"नग":1441,"धर":1283,"नर":642,"नल":477,"नव":1823,"नन":633,"नप":443,"ध्":2713,"नम":981,"नी":7183,"पं":1090,"नु":2801,"ने":15948,"नस":1797,"नह":1786,"ना":16910,"नि":11710,"पक":1243,"नो":2338,"न्":16749,"पत":2217,"पन":4849,"पद":2167,"पड":542,"पट":576,"पश":1179,"पह":3589,"पस":581,"पल":544,"पय":630,"पर":12235,"बई":388,"पे":1223,"पै":501,"पू":3275,"पृ":1255,"पा":8076,"पि":1966,"पी":1550,"बं":1558,"पु":5232,"फल":545,"फर":388,"प्":26390,"पौ":356,"पो":892,"बन":3146,"फे":662,"बद":869,"फि":861,"बड":1139,"फी":943,"फा":687,"बज":3798,"फ़":1678,"फो":368,"फ्":1074,"बर":2061,"बल":818,"गया":3134,"भग":1053,"बस":1999,"बह":1601,"बि":1756,"बा":6278,"बु":789,"बी":1868,"मं":2240,"बे":1048,"बै":523,"बो":1031,"भर":525,"ब्":3946,"मक":1369,"भव":561,"भी":5128,"यं":862,"भि":1870,"भा":13836,"मत":879,"मण":522,"भू":2019,"भु":462,"मन":2122,"मध":940,"मद":711,"भौ":389,"भो":416,"मल":693,"यक":2875,"भ्":381,"मय":1214,"मर":1260,"मश":384,"मस":725,"मह":3586,"मृ":555,"यत":1327,"मू":1671,"ख्य":2528,"यद":785,"मि":6854,"मा":15028,"मु":5029,"मी":3565,"रं":1975,"मो":1712,"यम":1479,"म्":5925,"मे":33130,"यन":1546,"यप":564,"मै":980,"रख":1463,"यव":775,"रग":385,"रक":5780,"यय":399,"यर":1126,"या":29338,"रज":1027,"यह":10437,"रच":1272,"रद":2279,"रथ":786,"रत":13386,"यू":1784,"रण":4290,"यु":3494,"यी":833,"यि":596,"रय":1959,"रम":3670,"रभ":636,"यो":8068,"रब":740,"रप":839,"ये":5448,"रन":4212,"लम":734,"लय":1888,"लब":457,"लन":1624,"लत":1279,"लग":2035,"लक":1497,"र्":27657,"रो":5583,"रै":502,"रॉ":358,"रे":18745,"गां":472,"रू":3986,"लं":804,"री":10641,"रु":2131,"रि":9664,"रा":30478,"रह":3970,"रस":3428,"रश":605,"रव":1845,"रल":964,"वं":3802,"ल्":5184,"लो":3980,"चन ":356,"लै":601,"ले":6664,"लु":426,"ली":6031,"लि":10617,"ला":9912,"लव":4226,"शब":1373,"वो":877,"वै":1045,"शन":10719,"वे":7461,"व्":3139,"शर":623,"वह":1716,"वव":621,"वश":922,"वस":1629,"वृ":595,"शत":728,"वा":18296,"वि":15882,"वी":3437,"वप":710,"वन":1882,"वध":1955,"शक":894,"वल":726,"वर":5872,"वय":393,"वज":391,"वक":455,"वत":1673,"षे":3151,"सन":2988,"सप":658,"सभ":1408,"सब":1580,"सम":9143,"ष्":6623,"सर":4398,"सल":818,"सव":526,"षा":3622,"सट":463,"षि":1967,"सत":766,"सद":867,"शै":363,"शे":1186,"श्":6992,"शो":892,"षय":360,"सक":10544,"शह":1374,"शी":1148,"सं":12512,"शु":1050,"शा":4600,"शि":3780,"गवा":390,"षण":1876,"है":50827,"हे":2158,"हु":7130,"ही":5060,"हि":5996,"हा":10508,"ह्":694,"हो":8889,"हन":827,"से":22788,"सु":1954,"सी":5308,"हत":2956,"सू":1760,"चल ":435,"सि":5630,"सा":11391,"सह":931,"सस":498,"हव":548,"हस":476,"हल":1812,"हम":833,"स्":33294,"हर":3284,"सै":435,"सो":953,"़त":356,"ात":13410,"ाथ":2015,"ाण":2291,"ाठ":433,"िं":3492,"ाड":1607,"ाट":1520,"ाब":2717,"ाभ":451,"ाप":4193,"ाफ":948,"ान":23672,"ाद":7103,"गुर":518,"ाध":1672,"ाव":4478,"िख":1296,"िक":17099,"ाल":13682,"ार":38751,"ाय":6583,"िए":2749,"ाम":8252,"िज":2534,"ाह":4362,"िच":656,"ास":7258,"ाष":4485,"िग":560,"ाश":1899,"़ी":1936,"ां":6856,"़ा":2260,"़ि":839,"ाँ":2505,"़ो":448,"ाइ":2283,"ाई":2655,"़े":807,"ाउ":506,"ाओ":2028,"ाक":3759,"़्":460,"ाए":1076,"ाच":1779,"ाज":7904,"ाग":3365,"ाख":925,"ीड":730,"ुं":2936,"ा।":4894,"ुई":845,"ीत":2135,"ुआ":2021,"ीप":1050,"ीन":3683,"ुए":1050,"ीम":1042,"ीय":7229,"ीब":439,"ील":1370,"ुओ":472,"ीर":2043,"ुग":719,"ुख":2469,"ीव":1580,"गीत":642,"ुक":2798,"ुज":614,"ुछ":884,"ीस":1060,"ुट":385,"िट":1437,"ीं":3186,"गिक":498,"िण":1209,"गाल":415,"ित":17874,"िद":3772,"िध":2081,"िन":9694,"िप":1804,"िब":505,"िभ":1498,"िम":3142,"िर":6200,"िय":18344,"िल":8308,"ीक":2676,"िश":4603,"िव":3402,"िस":8212,"िष":1899,"ीच":800,"िह":1477,"ीज":592,"ीट":1012,"ेव":2738,"ेश":12536,"ैक":523,"ेल":9698,"ेय":725,"ेर":2606,"ेम":795,"ेब":355,"ेप":664,"ेन":6508,"ैद":560,"ैत":376,"ैज":408,"ेह":634,"ेस":4417,"ेष":1173,"ैर":490,"ैल":1189,"ैन":999,"े।":1956,"ॉन":366,"ैस":1223,"ें":31320,"ेक":2519,"ेख":1744,"गों":941,"ेट":1393,"ेड":787,"ैं":11503,"ेत":4873,"ेद":951,"ेग":371,"ेज":1606,"ृत":2832,"ृष":1881,"गोल":437,"ुत":2428,"ुण":594,"ुड":541,"ुन":2123,"ुध":368,"ुद":1873,"ुप":1093,"ुर":6463,"ुम":1318,"ग्ल":427,"ुल":2104,"ुष":645,"ुस":2362,"ुव":1118,"ग्र":3597,"ूच":523,"ूट":2497,"ूत":791,"ी।":1527,"ून":1187,"ूप":2981,"ूब":457,"ूम":650,"ूर":5314,"ूल":1317,"ूष":1076,"ूस":930,"ूह":483,"्व":16523,"्श":2596,"्ष":9849,"्स":6797,"्ह":2245,"्भ":823,"्म":10658,"्य":26066,"्र":57172,"्ल":4149,"ची ":462,"ौत":437,"ौद":469,"ोर":2026,"ोल":2394,"ोब":410,"ोम":1176,"ोस":741,"ोष":453,"ोह":815,"ोश":547,"ोव":518,"्ण":2398,"्त":17955,"्ड":2149,"्ट":15553,"्ठ":1556,"्ञ":2460,"्फ":981,"्ब":2118,"्प":6879,"्ध":4156,"्न":3593,"्थ":9797,"्द":7520,"ौर":1279,"्ज":1507,"्च":2621,"्ग":2050,"्क":4547,"ों":15790,"ै।":21739,"ोई":504,"ॉर":357,"ॉल":485,"ोज":1283,"ोड":4892,"ोट":1429,"ोद":429,"ोत":5502,"ोप":971,"ोध":664,"ोन":2390,"ोक":1863,"ोच":771,"ोग":4303,"००":2062,"१०":362,"१८":536,"१९":2833,"२०":1612,"९६":475,"९५":380,"९८":427,"९७":465,"९९":438,"जन ":1003,"जब ":495,"चती":1825,"चना":962,"गई ":460,"के ":35060,"का ":19876,"कि ":2326,"की ":19870,"कल ":368,"कम ":375,"कर ":3798,"और ":11340,"कित":562,"किन":799,"काफ":651,"काम":601,"कार":8097,"काल":1674,"काश":1174,"काव":390,"कास":610,"किप":365,"किय":4402,"किल":674,"किस":2621,"कीय":409,"कुछ":880,"कुल":589,"कुम":405,"कां":514,"कान":492,"कहत":1259,"कहल":373,"कहा":1242,"कला":836,"गी ":381,"कवि":783,"कम्":429,"कर्":878,"गा ":602,"करा":372,"करे":537,"करत":2060,"करण":990,"करन":3111,"कथा":387,"कता":1005,"कते":720,"गर ":1575,"खंड":407,"गत ":745,"खा ":869,"कंप":421,"को ":9463,"ओं ":2427,"एवं":2948,"एक्":3566,"कई ":776,"ओर ":406,"उसक":842,"उसे":365,"उपन":475,"उपय":550,"एँ ":417,"एक ":11574,"आदि":993,"आधा":542,"आर्":522,"आवश":646,"उन ":374,"इति":479,"अथव":383,"अधि":1401,"अनु":2084,"अध्":641,"अन्":1665,"अने":519,"अपन":2305,"इस ":3686,"अभि":793,"अमे":560,"अलग":451,"अर्":1665,"अवध":1855,"उन्":1250,"उद्":651,"उनक":1565,"उत्":2559,"उस ":606,"इनक":960,"इन्":786,"इसक":6193,"इसम":1168,"इसी":419,"इसे":1083,"इस्":411,"ेश्":531,"ेशो":391," ओर":436," कं":518," कई":773," और":11423," कन":365," कम":1021," कर":8774," कल":965," कव":757," कह":3125," की":14772," कि":9638," का":20292," कृ":765," कु":2268," के":32645," कै":557," गई":551," को":14690,"ेशन":8153," गए":382," क्":4800,"ेवा":915," एक":15549," एव":3020," ऐस":714,"ै। ":17713,"ोई ":492," चौ":410," जग":453," चु":690," ची":475," जं":1512," चि":1251," चा":1105," चे":563," जर":355," जल":669," जब":682," छो":700," जह":416," छू":1830," जन":2746," ज्":953," जि":5438," जा":9396," जु":639," जी":1466," जो":3609," जै":1100," गर":416," गय":3607," खे":428," गण":526," घं":1793," खा":713," गो":922," ग्":1394," गु":1570," गा":1413," । ":3304," चल":837," चर":408," अं":2469," अब":411," अप":3085," अन":4720," अध":2196," अथ":432," आक":690,"ेता":622," अल":1382," अर":2119," अम":1263," अभ":1046," आज":524," अस":670," अव":2716," आत":825,"ेत्":3123," इं":1185," अक":778," अग":573," आं":486," अत":780,"ेपा":393,"ेन्":1060,"ेना":477,"ेयर":374," इस":13352," इल":428," इन":2514,"ों ":14584," इत":729," आस":419," आर":1183," आम":378," आय":881," आव":1052," आन":491," आद":1214," आध":1052," आप":1294,"ेलव":3726," उस":2203," उप":2582," उन":3214," उद":990," उत":2673,"ेरि":755," वा":3866," वी":554," वि":11396," वृ":367," शत":617," वस":497," वह":1459," व्":2079," शर":525," वे":1801," वै":880," शब":1343," शु":774," सं":11345," शि":1818," शा":2156," शह":1270," सक":1827," श्":1605,"ैसे":672," शक":380," वर":2508," लग":1362," ला":1268," लि":5639," ले":1874," लो":1934," रख":1043," रज":470," या":6370," यह":10383," रच":735," यू":702," यु":969," यो":670," ये":2040," रा":8448," रि":478," रह":2139," रे":6398," रू":2374," रु":449," लं":500," रो":897,"ोर ":430," हो":7690," हि":3553," ही":2071," हा":1189," हु":3748," हे":505," है":50701," सम":6692," सभ":777," सब":1516," सन":1812," सर":2776," सट":433," सद":698," सत":571," हम":409," स्":16029," हर":704," सो":571," सा":6452," सि":2848," सह":892," से":17371," सी":1037," सु":1695," सू":1297," दर":820," थे":2324," दू":1295," दृ":400," दी":559," दु":776," दा":748," दि":4337," दक":983," त्":639," तो":1173," था":4674," थी":1702," तम":403," तर":977," तथ":2624," तत":379," ता":833," ति":580,"ोग ":2225," ती":840," तु":355," तक":1670," डा":569," डि":508," ट्":3950,"ोच ":425," मो":707," मे":29038," मै":859," मू":889," यद":611," मा":5157," मि":3653," मी":694," रं":422," मु":2959," मह":3455," मर":397," भौ":360," मन":1109," मध":857," भू":1742," भी":4132," भा":10893," भर":368," ब्":1174," बे":646," बै":464," बो":710," बा":3995," बि":1259," बी":1196," मं":1754," बु":605," भग":427," बह":1547," बस":477," फ्":403," बर":576," बल":422," बद":685," बन":2421," फे":481," फा":416," बज":3787," फ़":822," फि":728," बड":1101," प्":19145," पो":399," पि":711," पा":4190," पु":2921," पी":496," बं":729," पे":601," पै":416," पृ":1225," पू":2103,"ोड ":4034," पर":10662," पश":1088," पह":3539," पड":455," पट":420," पद":1827," पत":1117," न्":487," नह":1717," नि":5215," ना":4741," पं":1035," नी":536," ने":3413," ध्":387," नव":592," धा":800," नद":808," दे":3244," द्":5503," दो":1318," धर":1010," नग":840,"ेंट":533,"ेंद":417," ई ":361,"ेल ":4266,"ेर ":787,"ेस ":3757,"ेष ":525,"ेश ":2529,"े। ":1586,"ेकि":361,"ेक्":824,"ैंड":368,"ैं।":6253,"ेज़":466,"ेजी":411," व ":823,"ृष्":1735,"ें ":29625,"ृति":1045,"ृत्":636,"ेन ":4101,"ेद ":371,"ेज ":388,"ेट ":613,"ैं ":4429,"ेक ":779,"ुष्":405,"ुवा":512,"ुला":407,"ुरा":1009,"ुरस":482,"ुर्":818,"ुरु":791,"ुरू":359,"ुमा":600,"ुनि":743,"ुना":530,"ुद्":1350,"ुत्":627,"ूसर":611,"ूषण":1023,"ूर्":2859,"ूरी":482,"ूटत":1807,"ुस्":700,"ुसा":1149,"िकल":459,"ाला":1369,"ालि":2288,"ाली":1852,"ाले":1192,"ावि":362,"ावा":595,"ुत ":1429,"िका":3179,"िकि":844,"िकी":876,"ावर":399,"िको":367,"िक्":1744,"ाषा":2056,"ासन":527,"ाष्":2240,"ाशि":573,"ासक":514,"ाहि":1577,"ासि":587,"ासा":469,"ासी":381,"ाहर":429,"ास्":1433,"िज्":1407,"ुर ":2020,"िता":1136,"िति":559,"ुल ":595,"ित्":3274,"िद्":3069,"िधि":924,"िधा":871,"िनट":1785,"िना":538,"िनि":795,"िन्":3490,"िपी":396,"िभि":546,"िभा":779,"ियन":355,"ियम":677,"िमी":551,"िमा":800,"ियो":3134,"िये":2350,"िया":10338,"िर्":2740,"िरा":423,"िलत":401,"िले":1007,"िल्":2138,"िलो":516,"िला":1775,"िसम":787,"िष्":1088,"ी। ":1157,"िशा":959,"िश्":1982,"िशे":677,"िसक":1052,"िवा":1432,"िवे":381,"ीका":588,"िवर":417,"ूप ":2670,"िहा":1270,"ून ":537,"िसे":557,"िसी":1508,"िस्":2756,"ीडि":468,"ुंब":372,"ुंच":1928,"ीटर":673,"ूल ":705,"ीति":750,"ूर ":759,"ुआ।":435,"ूह ":393,"ुओं":451,"ीर्":408,"ीमा":484,"ुक्":1845,"ीवन":532,"ुख्":1068,"ृत ":890,"ीं ":2774,"ित ":12024,"िण ":703,"िन ":1347,"िल ":1061,"ीक ":803,"ांग":686,"ांस":610,"िम ":1068,"ांत":1630,"िय ":463,"िर ":1319,"ीच ":562,"ाएँ":397,"िश ":406,"िस ":650,"ा। ":4040,"ुआ ":1392,"ीत ":751,"ागर":849,"ाक्":373,"ाकि":1058,"ाका":641,"ाकर":472,"ाओं":1931,"िंद":1176,"ाड़":911,"िंह":577,"ुए ":894,"िंग":982,"ाटक":402,"ाजा":802,"ाजि":402,"ाज्":2259,"ीप ":552,"ाजन":843,"ाजध":594,"ाजस":382,"ाची":795,"ीन ":2404,"ुई ":687,"ाने":2242,"ाना":1838,"ानि":2065,"ानी":2791,"ुख ":1161,"ानव":590,"ानस":622,"ाध्":419,"ापन":842,"ान्":1734,"ानो":532,"ादी":544,"ादा":388,"ादि":1157,"ानत":633,"ाधि":410,"ानक":524,"ाति":524,"ाता":4840,"ाती":1316,"ाते":918,"ात्":3549,"ील ":655,"ीय ":6964,"ीर ":855,"ारी":1527,"ारि":1016,"ारा":6234,"ारस":394,"ार्":5244,"ारो":922,"ारू":465,"ारे":707,"ालय":1677,"ामि":517,"ामा":1032,"ायक":414,"ाम्":377,"ायन":449,"ामी":544,"ाया":1613,"ायी":366,"ारक":420,"ारण":1386,"ारत":7763,"ाबा":770,"ाब्":555,"ामक":498,"ुछ ":876,"ापा":517,"ाप्":901,"ाफी":651,"ां ":1658,"़ी ":1669,"ाई ":2362,"़े ":628,"हों":935,"है।":21737,"होत":4643,"होन":1130,"ह्म":356,"़ा ":1290,"ाँ ":1565,"ाग ":1189,"ाथ ":1651,"ाद ":3112,"ाण ":964,"ात ":1397,"ान ":8758,"ाज ":1013,"ाश ":423,"ाव ":1068,"िक ":8100,"ाह ":929,"ास ":2767,"ाम ":3709,"िए ":2584,"ाब ":747,"ाल ":3360,"ार ":11273,"ाय ":1113,"समु":515,"समा":1211,"समू":429,"सरक":1452,"समे":1977,"सम्":2440,"सर्":919,"सबस":1305,"समय":1034,"ष्य":501,"ष्ण":694,"ष्ठ":1362,"सभी":533,"सभा":753,"ष्ट":3181,"षेत":2985,"सन्":672,"हत्":826,"हते":1441,"सेन":499,"सें":450,"सीम":423,"सिर":645,"सूर":646,"सूच":437,"सां":361,"साह":1353,"सिद":1352,"सित":526,"साध":457,"सार":1977,"साम":1236,"साय":536,"सिक":706,"साग":441,"सिं":774,"साथ":1234,"सहा":390,"ससे":419," १८":501," १९":2789," २०":1512,"हला":582,"हले":581,"हरा":369,"स्व":2405,"स्य":682,"स्म":485,"स्ल":386,"स्थ":6865,"स्प":4184,"स्ट":9331,"स्त":5132,"स्क":2303,"सेव":720,"हैं":10612,"हें":816,"हुत":957,"हुई":797,"हुए":951,"हुं":1933,"हुआ":1762,"हिन":1798,"हित":1670,"हीं":1893,"हास":1027,"हाव":428,"हार":1703,"हान":874,"हिं":1060,"हाँ":938,"हां":1185,"हे ":650,"है ":18074,"सकी":3365,"सका":2827,"सके":2127,"सत्":357,"षिण":921,"सटी":407,"षित":444,"हो ":1330,"शेष":769,"शों":440,"हा ":1528,"सकत":1521,"ही ":2790,"श्व":2006,"श्र":1917,"श्य":1102,"श्च":1235,"शहर":1243,"से ":20466,"सी ":3799,"हर ":1676,"संच":2031,"शिय":571,"संक":898,"संग":1492,"संख":1034,"शिव":510,"संघ":401,"संब":719,"संप":1355,"संय":444,"संस":2227,"शित":453,"शाह":383,"शास":1407,"शिक":1243,"शाल":480,"वेश":377,"वेद":482,"शता":469,"सर ":563,"सा ":1620,"व्य":2741,"शब्":1328,"वों":401,"वर्":3433,"षा ":2805,"वरी":565,"ववि":516,"शक्":405,"वश्":752,"वस्":1030,"सन ":1657,"वाद":1110,"वान":862,"वाच":378,"विक":1766,"वाल":2753,"वास":1153,"वार":6241,"वाय":555,"वाम":390,"वित":981,"विद":1842,"विध":1382,"विज":1255,"वाह":694,"विच":389,"वीं":679,"विष":806,"विश":2580,"विस":375,"विव":625,"विभ":1112,"वीप":490,"वता":365,"वधि":1770,"वपू":515,"शा ":783,"षण ":1678,"शी ":479,"वंश":456,"शन ":10109,"वे ":5027,"वि ":440,"वा ":1780,"वी ":1441,"ल्प":398,"ल्म":761,"ल्य":463,"ल्ल":1775,"लोक":1046,"लोग":827,"लों":837,"वह ":1195,"लोम":397,"वल ":387,"लेख":932,"लेक":881,"लिय":3022,"वर ":730,"लाक":373,"लात":527,"लिए":2317,"लाल":465,"लिख":696,"लिक":529,"लित":2228,"लिप":379,"वन ":938,"लवे":3731,"लना":388,"लता":462,"लती":425,"वं ":2957,"लगभ":597,"लगा":459,"रेस":3665,"रोग":432,"रों":2025,"र्श":1019,"र्व":3266,"र्स":388,"र्ष":1579,"र्म":3443,"र्भ":362,"र्य":2627,"र्थ":2552,"र्द":1098,"र्ध":360,"र्न":513,"र्फ":531,"र्ब":360,"र्ट":763,"र्ड":507,"र्ण":1686,"र्त":1633,"र्ग":1726,"र्क":924,"र्ज":853,"र्च":479,"रीक":801,"रिव":843,"रिय":2208,"रीय":1696,"रीर":445,"रुप":452,"रूप":2871,"रें":640,"रेज":901,"रेल":6023,"रेन":3698,"रसा":555,"रसि":1094,"रहत":499,"रहा":649,"रहे":489,"रस्":963,"ले ":3374,"रां":1067,"रान":1776,"राप":737,"रात":590,"राण":691,"राज":5799,"राच":861,"राक":432,"रिट":356,"रित":976,"राष":2127,"रास":461,"राम":1378,"राय":1077,"रार":802,"रिक":2624,"राव":460,"ला ":4369,"रयो":985,"रयु":635,"रम्":388,"रमा":570,"रमु":1096,"रवा":666,"ली ":4841,"रने":2577,"रना":959,"रदे":1187,"रदा":576,"रभा":427,"योज":542,"योग":2669,"यों":3685,"या।":1162,"युक":1333,"युत":419,"युद":407,"यान":790,"याप":561,"यात":2560,"याद":888,"यास":655,"यिक":384,"याल":1423,"यार":471,"याय":535,"रता":1516,"रति":1618,"रती":4166,"रते":811,"रत्":447,"रथम":512,"लय ":1619,"यूट":437,"यून":379,"रणा":516,"रचन":643,"रक्":994,"रखे":433,"रजि":411,"यां":725,"याँ":525,"यहा":1528,"लन ":775,"रे ":1761,"मस्":459,"महा":2097,"महत":756,"यक्":1167,"रु ":369,"यका":377,"री ":6215,"मृत":432,"मूह":442,"मूल":727,"मुद":734,"मुख":2339,"रंभ":445,"मुं":391,"मीट":638,"रंग":680,"मिल":1800,"मित":732,"मिन":2223,"मार":2023,"माल":612,"मिक":732,"रो ":378,"माण":1011,"माध":371,"मात":697,"मान":5551,"माज":766,"मां":436,"मों":431,"लग ":468,"यदि":507,"में":27306,"मेर":842,"मेल":1950,"यता":597,"रका":3522,"म्र":418,"म्प":956,"म्ब":1357,"म्म":1860,"रत ":4510,"रण ":3158,"या ":17033,"यु ":436,"यी ":716,"भिन":962,"भाव":792,"भाष":2240,"भार":7799,"भाग":1261,"रम ":841,"यंत":450,"ये ":4867,"मध्":813,"भूम":411,"भूष":1010,"रल ":672,"रा ":9045,"मर्":364,"रह ":1087,"मा ":1358,"भगव":386,"मी ":1758,"मे ":1785,"यन ":960,"बहु":1203,"बसे":1366,"बाल":449,"बार":924,"बाद":1868,"बां":380,"यर ":634,"मंत":477,"मंद":933,"बिह":404,"बीच":558,"मंड":367,"यम ":869,"रक ":417,"यह ":8676,"बोल":432,"ब्र":1124,"ब्द":1896,"प्य":460,"प्र":23891,"प्त":1355,"भा ":824,"भी ":4811,"मन ":544,"फ़ि":394,"बजे":3647,"फिल":452,"मय ":1094,"बड़":987,"बदल":617,"यक ":1025,"फेर":414,"बना":1428,"बन्":526,"फ्र":464,"पहु":2050,"पहल":1024,"पश्":999,"बी ":558,"बा ":412,"पर्":1203,"परम":473,"पयो":528,"परि":1837,"परा":615,"मक ":839,"पृष":917,"पुस":378,"पूर":2834,"पुत":428,"पुर":3647,"पीड":441,"बंग":529,"बंध":817,"पास":574,"पित":787,"पाक":1033,"पान":519,"पात":387,"पाद":1538,"पार":1007,"पाल":950,"पक्":435,"फी ":766,"न्त":2330,"न्ध":640,"न्न":1957,"न्द":3701,"न्य":2792,"न्म":1190,"न्ह":2015,"नों":1406,"पद्":1215,"पदा":458,"भग ":605,"पनी":874,"पना":1055,"पन्":743,"पने":1546,"बर ":1142,"पत्":1249,"पति":612,"पड़":437,"नमे":432,"नवर":402,"नसं":440,"नदी":699,"ध्य":2164,"नुस":1344,"पंज":567,"नीत":724,"नेप":393,"नेत":472,"नेक":552,"बई ":387,"नही":1669,"नसभ":406,"निव":567,"निर":2352,"निय":1801,"निध":617,"नित":1221,"नाव":392,"निक":2637,"नाय":738,"नाम":2777,"नार":680,"नान":715,"नात":423,"नाथ":458,"नाट":450,"नाड":359,"नाग":557,"द्द":569,"द्व":5706,"द्र":2473,"द्य":2403,"द्म":1046,"द्ध":2842,"धर्":969,"नका":1135,"नकी":794,"नके":1054,"देख":471,"देव":1125,"देश":3481,"दों":393,"दोन":372,"धित":713,"धिय":418,"धार":1754,"धिक":1763,"धान":1811,"पर ":7035,"नता":441,"नते":555,"नगर":1205},"n_words":[3436892,4107546,2722787],"name":"hi"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/hr b/nlp_resource_data/langdetect/profiles/hr

new file mode 100755 (executable)

index 0000000..116cfa0
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/hr
@@ -0,0 +1 @@
+{"freq":{"D":8690,"E":5169,"F":5109,"G":8706,"A":14720,"B":12556,"C":10193,"L":6822,"M":14153,"N":10511,"O":10729,"H":9113,"I":13451,"J":5359,"K":11989,"U":4743,"T":10711,"W":3179,"V":5850,"Q":585,"P":15109,"S":23054,"R":9319,"Y":1450,"X":701,"Z":4251,"f":16193,"g":88687,"d":148916,"e":436858,"b":60942,"c":64868,"a":586818,"n":342270,"o":415815,"l":177559,"m":139424,"j":258985,"k":204303,"h":49394,"i":501698,"w":7347,"v":140168,"u":209613,"t":201984,"s":238514,"r":271701,"q":1755,"p":126499,"z":85944,"y":9646,"x":2022,"í":886,"é":1412,"á":1674,"ó":546,"đ":8171,"ć":16773,"Č":895,"č":46294,"Ž":887,"ž":23555,"Š":1432,"š":26211," l":9617," m":20259," n":49295," o":45630," h":4791," i":61249," j":56434," k":46966," d":31478," e":6507," f":5286," g":21986,"р":912," a":19142,"с":742," b":17477,"т":629," c":5752," z":19710," u":45274," t":21468," w":820," v":15539," p":67380," s":79773," r":20001," J":5152," K":11465," H":8434," I":11848," N":9837," O":7822," L":5986," M":13514," B":11861," C":9435," A":13018," F":4631," G":8304," D":7573," E":4598,"л":648," Z":4104,"к":817," Y":1407," X":526,"и":1209,"о":1253,"н":943," S":19621," R":8667,"в":563," Q":537," P":14387,"а":1816," W":3048," V":5326," U":4477,"е":917," T":9972," č":5596," Č":887," ž":5028," Ž":838," Š":1403," š":4901,"A ":857,"Da":1321,"Co":1834,"Cr":1328,"Ce":588,"Ch":1843,"Du":764,"Do":1328,"Dr":1133,"De":1138,"Di":1002,"Fe":631,"H ":574,"Fa":563,"Eu":879,"Ge":716,"Ga":972,"I ":821,"Fr":1113,"Fo":779,"Fi":649,"Au":808,"Ar":1684,"At":729,"As":560,"D ":829,"Ba":2710,"Af":545,"Am":1341,"An":1467,"Al":1655,"Bu":1011,"Br":2042,"Ca":2160,"Bi":1321,"Be":1685,"Bo":2115,"Ku":844,"Kr":1748,"Ko":2488,"Le":881,"Li":1283,"La":1685,"Lu":593,"Lo":1036,"Me":1818,"Mi":1887,"O ":2633,"Ma":5853,"Mu":793,"Mo":1972,"Nj":820,"Ni":1096,"Ne":1404,"Na":3401,"No":2003,"Ob":956,"Gl":526,"Gr":2253,"Go":1536,"Gu":881,"Gv":612,"Ha":1340,"He":1374,"Hi":570,"Ho":905,"IS":2154,"Hr":2959,"Hu":536,"In":4619,"Is":978,"Ja":1450,"L ":681,"Iz":610,"Je":1026,"Jo":711,"Ju":1563,"Ka":3681,"Ki":952,"Tu":1072,"Tr":1314,"To":1609,"Th":1212,"Ti":993,"Te":1282,"Ta":1685,"St":1916,"Sv":940,"Su":1303,"Wi":722,"Wa":1107,"Vo":662,"Vi":1040,"Va":1022,"Ve":1522,"Pu":746,"Pr":3018,"S ":720,"Pe":1604,"Pa":2898,"Pl":816,"Po":3639,"Pi":867,"Os":790,"Ov":672,"Op":1011,"Or":735,"Se":1283,"Sj":776,"Si":1385,"Sh":659,"Sl":1203,"Sr":1393,"Sp":846,"So":1277,"Ru":1026,"U ":1401,"Sa":3386,"Re":2977,"Ri":1491,"Ro":1097,"SO":2074,"Ra":1418,"b ":1522,"a ":202747,"Za":2365,"Ze":593,"i ":135497,"ađ":2045,"gd":857,"ge":5429,"ga":10157,"ać":1902,"fi":4106,"fs":661,"fr":2013,"ač":10342,"fu":639,"fo":2404,"j ":16449,"he":4881,"ha":5859,"gn":997,"gl":6895,"gi":6788,"gh":714,"gu":4729,"gr":14473,"go":15835,"du":5516,"dv":2741,"g ":19299,"ea":3988,"eb":4045,"ec":3773,"ed":22606,"de":10224,"dg":624,"di":30316,"dj":3005,"dm":1005,"dl":806,"do":12624,"dn":15638,"ds":4759,"dr":13255,"ew":844,"eu":1755,"ev":10791,"ey":838,"ez":15435,"fa":1476,"h ":20168,"fe":2011,"eh":1279,"eg":7983,"ef":1181,"ee":1826,"el":24518,"ek":14461,"ej":2546,"ei":1534,"ep":5699,"eo":3348,"en":44674,"em":19226,"et":18394,"es":16599,"er":32266,"ca":14154,"e ":160018,"br":6048,"bu":5481,"bn":1412,"bo":6809,"bj":1695,"bl":6540,"bi":13442,"be":6031,"db":626,"da":22107,"f ":1430,"cu":2623,"ct":683,"cr":1908,"co":2679,"ck":2130,"ci":21232,"ch":4164,"ce":10558,"c ":2983,"az":15697,"ay":1948,"ba":10149,"d ":22851,"at":29447,"as":22343,"ar":30709,"aw":1498,"av":28227,"au":3869,"ak":15591,"al":35013,"ai":3078,"aj":16895,"ao":6448,"ap":10100,"am":17849,"an":70440,"ac":13980,"ad":21936,"aa":799,"ab":3879,"ag":6276,"ah":2814,"ae":1668,"af":2077,"nu":8144,"nt":10450,"ns":14593,"ič":14290,"no":45797,"nn":1399,"nz":952,"ny":715,"oe":678,"of":2150,"oc":4342,"od":40790,"oa":2451,"ob":12953,"om":28810,"on":23346,"ok":14795,"ol":17826,"oi":3961,"oj":40426,"og":22833,"oh":1159,"ot":12114,"os":23349,"ov":31034,"ou":2922,"op":11191,"oo":1568,"or":33494,"jč":526,"r ":11028,"ow":1054,"oz":7848,"pe":6182,"pa":15403,"pl":8816,"pn":1287,"po":36246,"ph":606,"pi":11043,"pj":1476,"lo":18651,"ln":5989,"lm":1222,"ll":3111,"ls":2651,"lu":8623,"lt":2358,"o ":61519,"ma":31717,"mb":2862,"dž":894,"me":24452,"ml":1963,"eš":3371,"mi":10591,"mj":5104,"mn":1665,"mp":2261,"mo":11753,"mr":1000,"ms":2000,"mu":3945,"p ":2149,"na":87240,"nc":5972,"nd":11672,"ne":33576,"nf":891,"ež":1706,"ng":7899,"ni":54450,"nj":27833,"nk":2801,"ić":2802,"jv":1605,"eđ":3970,"ju":19590,"eč":2671,"js":8691,"jn":2724,"jo":4699,"eć":4631,"jk":855,"ki":28341,"ke":16969,"kc":1026,"ka":37663,"m ":37899,"ks":2734,"kt":4893,"ku":14523,"kv":1615,"ko":63668,"kr":10108,"kl":4333,"km":1244,"kn":1083,"li":41243,"lk":741,"lj":24378,"le":17764,"ld":889,"lg":643,"la":37196,"lb":2414,"n ":24345,"hr":2448,"hv":1089,"ht":807,"hu":1937,"hi":4652,"hn":862,"ho":4536,"id":4699,"ic":20781,"ib":2814,"ia":4852,"ih":18991,"ig":5433,"if":1923,"ie":2060,"k ":14010,"ir":12272,"is":21664,"it":16417,"iu":848,"iv":14019,"aš":3959,"ij":57037,"ik":23850,"il":22797,"im":27723,"in":47299,"io":10059,"ip":4978,"je":134763,"až":1877,"ji":26676,"iz":19026,"l ":6966,"ja":39549,"z ":7997,"ož":2534,"oš":2893,"wi":861,"rč":1709,"y ":3377,"wa":2878,"we":956,"vl":4881,"vj":3877,"vk":562,"vi":24026,"vu":3419,"vr":6622,"vs":1342,"vn":9959,"vo":21455,"uz":3112,"uv":1349,"ve":22017,"vc":593,"va":35406,"x ":785,"ui":1893,"uj":4079,"uk":4559,"ul":5933,"pć":2758,"ue":1973,"ug":8425,"uh":1544,"ur":9708,"us":10059,"ut":7404,"um":7672,"un":8708,"uo":923,"up":11863,"tu":9961,"tt":1393,"tv":7233,"ođ":1449,"ub":5211,"ua":3882,"ud":5733,"uc":1692,"w ":858,"to":28843,"tn":5773,"tm":575,"tl":1860,"oć":686,"ts":8790,"tr":16932,"oč":4671,"tp":896,"te":22658,"tk":2453,"tj":1700,"ti":34096,"th":3097,"v ":4865,"ta":38983,"su":13292,"sv":6451,"ss":1881,"st":55588,"sl":7897,"sk":52271,"sn":7159,"sm":3278,"sp":5046,"so":5247,"sr":3449,"sc":1522,"se":24429,"sh":2381,"sj":4697,"si":10381,"rz":1007,"u ":87089,"sa":16094,"rr":1162,"rs":8936,"rt":5687,"ru":17434,"rv":8435,"ry":930,"rp":1334,"ro":34834,"rn":10194,"rm":2849,"rl":1655,"rk":2924,"rj":894,"ri":44000,"rh":1312,"rg":3269,"iž":1025,"re":32485,"rd":3070,"rc":1885,"rb":2322,"ra":64303,"t ":14252,"qu":1390,"iš":6125,"s ":15910,"pt":1447,"pu":6628,"pp":658,"pr":27731,"ps":2712,"už":5556,"uš":2233,"rž":4670,"zg":1072,"rš":1722,"zi":22879,"zb":2089,"zd":2018,"ze":4698,"za":19270,"zv":4353,"uč":5083,"zr":2246,"zu":2322,"zo":3146,"zn":8371,"zm":2468,"zl":2200,"uć":2114,"ye":523,"ya":1738,"yo":520,"á ":773,"é ":601,"ć ":1732,"ći":5749,"ću":1057,"ća":4248,"će":3354,"či":7487,"čj":2822,"čk":13325,"čl":554,"če":5735,"ča":5202,"č ":1473,"đe":2942,"đa":1580,"čn":7182,"ču":1818,"đu":2924,"š ":1063,"šć":1355,"še":2480,"ša":1860,"šp":635,"šn":3132,"šk":2964,"šl":591,"ši":3525,"šu":567,"št":7272,"žu":2110,"žn":3428,"žb":852,"že":4351,"ža":6201,"ži":5092,"đen":1909,"đer":597,"đu ":1688,"đun":708,"čun":831," Ga":961," Ge":702," Fo":742," Fr":1109," Fi":643," Ha":1336," He":1372," Go":1534," Gr":2247," Gu":878," Gv":612," Gl":525," Hu":528," IS":2110," Hr":2946," Ho":895," Hi":565," Je":1023," Ja":1447," Iz":607," Is":977," In":4605," Ka":3666," Ki":914," Jo":707," Ju":1556," La":1670," Le":867," Li":1265," Ko":2481," Kr":1745," Ku":843," Ma":5817," Mi":1882," Me":1804," Lo":1032," Lu":587," Ne":1391,"а ":562," Na":3399," Nj":820," Ni":1084," Mo":1968," Mu":787," Am":1333," An":1450," Al":1637," Af":533," Ba":2689," Au":794," At":724," As":558," Ar":1651," Be":1676," Bi":1296," Bo":2110," Br":2033," Bu":1004," Ca":2140," Ce":585," Ch":1828," Cr":1326," Co":1819," Da":1296," Di":998," De":1131," Dr":1125," Do":1300," Du":759," Eu":878," Fe":623," Fa":554," Wi":714," Wa":1098," Ze":593," Za":2362," a ":4274," Ov":665," Os":789," Or":732," Op":1008," Po":3623," Pl":812," Pi":864," Pe":1599," Pa":2881," No":2002," Ob":956," Ra":1412," Ro":1089," Re":2970," Ri":1491," Pr":3007," Pu":743," Sv":937," Su":1301," St":1875," Ta":1677," Th":1189," Ti":990," Te":1277," Tr":1295," To":1604," Ru":1022," Sa":3371," U ":1282," Sh":655," Si":1373," Sj":776," Se":1273," So":1270," Sp":836," Sr":1393," Sl":1201," Va":1018," Ve":1517," Vi":1035," Vo":661," Tu":1066," ja":1334," iz":11704," je":51007," im":3911," in":3359," il":7148," is":4046," ka":7800," ki":1128," jo":810," ju":3163," ha":604," he":608," gl":3276," gr":8161," go":6544," ih":786," ig":1120," hi":648," ho":696," hr":1744," nj":3220," ni":1874," ne":6451," na":34503," mu":1067," mo":4787," mn":528," ok":4011," on":1068," od":15714," of":856," ob":6528," no":2407," le":827," lj":2291," li":2543," la":2172," ku":1972," kn":829," km":1048," kl":1410," kr":4664," ko":26854," me":3676," mi":2188," mj":1905," o ":1165," ma":4891," lo":1177," am":2645," an":1498," ak":848," al":3416," au":1442," ar":1418," at":615," ba":2490," bi":5965," be":1110," bo":2421," bl":1053," bu":691," br":3222," ca":552," et":559," en":2154," el":1166," ek":584," fa":621," fr":941," fo":883," fi":1669," ge":1007," gd":551," ga":1134," i ":27993," cr":1408," ce":1531," ci":1052," da":4574," do":6878," dr":6375," de":2927," dj":1233," di":5389," dv":2035," du":1555," zn":2259," zr":767," zv":702," za":12881," ze":1108," zb":633," už":620," ru":1327," u ":32404," sa":9931," se":16886," sj":3429," si":2544," sn":698," sm":1890," sl":3915," sk":4532," sr":2957," sp":2436," so":944," ra":7698," re":4140," ri":4141," ro":2280," pu":1716," pr":23703," s ":3500," os":3950," ot":2810," ov":1276," op":3149," or":1765," oz":1224," pe":1664," pa":2832," pl":5604," po":28804," pi":966," pj":1218," va":1250," ve":3149," uz":1739," vo":2912," vr":3315," vi":2766," vj":724," vl":1147," ud":648," tv":902," tu":1007," us":1218," ut":721," ur":562," up":1243," um":849," un":999," uk":1055," ul":803," ug":653," ta":2808," st":9162," sv":5803," su":10580," tr":4273," to":2817," th":929," ti":2177," te":5426," če":1938," čl":530," či":1960," št":1607," ši":1148," šp":620," ži":2289," že":880," žu":1586,"Eur":771,"Fra":831,"Her":776,"Gra":1106,"Gor":695,"Ind":3698,"Hrv":2864,"ISO":2034,"Ara":578,"šće":547,"Ame":663,"Car":617,"Bra":541,"Bri":609,"Bos":708,"Chi":621,"Cha":594,"Nal":560,"Nje":555,"Nov":1094,"Opć":766,"Per":568,"Par":726,"Pro":696,"Pri":721,"Pre":828,"Juž":532,"Kal":564,"Kan":524,"Kar":752,"Kra":872,"Man":821,"Mal":573,"Mar":1189,"Mad":576,"Zag":834,"Sta":754,"Sje":767,"Srb":562,"Sre":533,"Slo":726,"Rus":582,"San":791,"Rep":1934,"SO ":2049,"Vel":771,"The":928,"šen":553,"še ":976,"što":1559,"šte":2571,"šti":879,"šta":884,"štv":1041,"ško":1181,"šnj":2421,"šin":692,"šir":1400,"ški":826,"bje":654,"bja":1027,"bit":2079,"biv":585,"bio":1738,"bil":3046,"bin":696,"bij":1580,"blj":1003,"bli":4296,"bla":688,"bol":1257,"boj":638,"bog":903,"bič":654,"bno":610,"bor":1232,"be ":1311,"ban":1537,"bal":1792,"bav":860,"bar":843,"bi ":1256,"ber":918,"ben":1811,"ca ":10450,"car":656,"can":1075,"ce ":5505,"bri":882,"bro":2066,"bra":1828,"bu ":732,"bum":1939,"buh":796,"aka":2848,"am ":2209,"ake":1450,"aki":683,"aji":1629,"ajn":1118,"ajs":690,"aju":3559,"ajv":1325,"al ":2419,"aja":1758,"aje":2556,"ain":623,"ak ":2449,"aj ":1426,"aha":673,"agr":1324,"agu":525,"ago":1080,"anu":1384,"ano":4034,"ant":2834,"ans":7065,"ane":2342,"ang":1485,"ani":9755,"anj":10858,"ank":1213,"ana":11419,"anc":2218,"and":3683,"amo":1853,"ami":1538,"ame":3781,"amb":801,"ama":5199,"ao ":5760,"alu":766,"alt":538,"als":661,"alo":1706,"aln":4903,"all":806,"ali":7309,"alj":2503,"ale":2883,"ala":6700,"alb":1948,"an ":10347,"aku":618,"akt":1211,"ako":4291,"aba":762,"abi":791,"ae ":719,"aca":3812,"ad ":3345,"ac ":1484,"afs":539,"afi":554,"ai ":709,"aga":937,"age":778,"ado":1169,"adr":1028,"adn":3155,"adi":2550,"ade":1153,"ads":935,"adu":1422,"aci":6004,"ach":727,"ada":5689,"azn":685,"azi":7453,"azl":1063,"azv":1013,"azu":592,"aze":804,"aza":1040,"azb":1010,"aya":527,"ba ":2218,"at ":2601,"arh":564,"are":1147,"ard":1409,"ara":6301,"aro":3575,"arn":1712,"ark":1048,"ari":4189,"aru":579,"ars":3100,"art":1393,"asa":826,"asi":1226,"ash":669,"ase":3354,"asn":1043,"asp":540,"ask":1064,"asl":534,"ar ":2545,"apa":4410,"api":1243,"apo":975,"apr":526,"aps":650,"apu":900,"as ":2262,"ava":8033,"aut":1108,"avo":1516,"avn":4358,"avl":3013,"avi":4083,"ave":3087,"ay ":531,"awa":719,"avu":1466,"av ":1100,"ata":3446,"ast":9058,"atn":1424,"atk":562,"atr":1028,"ato":2245,"ate":2791,"ati":6984,"ats":4995,"atu":1171,"aus":601,"jeg":2764,"jed":10627,"jec":1103,"jer":2418,"jek":4521,"jel":6299,"jem":5086,"jen":9322,"jez":10141,"jes":4061,"jet":3631,"jev":7451,"jač":750,"ji ":16045,"ažn":588,"jat":632,"jav":2585,"jal":1858,"jak":822,"jan":5965,"jam":1147,"je ":61320,"ješ":1621,"jni":1204,"joj":1250,"jom":1082,"jiv":803,"jim":3670,"jin":1845,"jih":2364,"ječ":1963,"jeć":1172,"eća":1346,"eće":940,"eći":1617,"ito":2406,"itu":706,"its":544,"isk":1391,"isl":604,"iso":539,"isn":807,"isp":569,"isu":779,"ist":10340,"iv ":1925,"ita":3250,"ite":2457,"iti":4001,"ivo":1642,"ivn":1941,"iva":3738,"ivi":1447,"ive":1512,"is ":1700,"ion":3221,"ipa":1930,"ir ":678,"iro":1457,"iri":1540,"isi":976,"ish":564,"ise":556,"isc":574,"isa":1555,"ire":1683,"ira":5142,"it ":890,"ja ":23119,"iz ":4867,"izu":877,"izv":2232,"izr":793,"izo":774,"izn":759,"izm":2019,"izl":668,"izi":2042,"izd":806,"iza":2198,"kih":5870,"kim":2628,"kin":753,"km ":900,"ki ":16671,"ked":578,"ke ":14258,"kci":941,"kra":4133,"kre":1063,"ku ":4982,"kro":1000,"kru":1711,"kri":1186,"kov":2238,"kot":547,"kor":2342,"kop":1083,"kon":3505,"kom":6854,"kol":2347,"koj":25320,"kog":6388,"kod":903,"knj":735,"ko ":9564,"kla":1495,"klo":701,"klj":755,"eđu":2748,"još":665,"jve":805,"eđe":812,"juj":863,"jug":1520,"jud":1914,"jsk":7836,"jst":574,"eči":628,"ju ":11452,"eče":573,"kaz":825,"kat":974,"kar":1304,"kas":686,"kan":1885,"kao":3422,"kal":1067,"kam":671,"kak":633,"kad":1513,"kac":594,"juž":1378,"ka ":22352,"juč":696,"juć":551,"ha ":913,"han":1101,"har":926,"he ":1979,"her":717,"hin":716,"hit":527,"go ":855,"gle":1361,"gla":3728,"god":2754,"gom":912,"gon":803,"gos":769,"gor":1668,"gov":5098,"gu ":1902,"gru":1369,"gra":9421,"gre":1427,"grč":1302,"ian":1493,"ic ":604,"iba":598,"ibe":551,"ia ":1869,"ifi":824,"ih ":17225,"icu":773,"ici":6028,"ich":645,"ice":4697,"ie ":604,"ica":6876,"idi":521,"ide":1150,"ida":944,"ašn":1360,"ašt":576,"il ":644,"ija":16185,"ije":24598,"iji":7426,"ijo":787,"ijs":5406,"iju":2114,"im ":10462,"ika":7961,"ige":790,"iga":952,"igi":547,"igr":1302,"iho":1185,"ik ":6489,"imo":723,"imp":613,"ime":3556,"imi":856,"inc":1602,"ind":1666,"ina":9909,"ino":2039,"int":1177,"ins":3389,"ine":10127,"ing":2574,"inj":1865,"ini":6764,"inu":1508,"iko":2904,"iki":780,"ike":2426,"ila":3734,"in ":2960,"iku":1705,"ilo":2029,"ill":863,"ilm":540,"ilj":1386,"ili":10385,"ile":865,"ima":9207,"io ":4778,"ils":642,"ilu":562,"hov":1213,"hrv":1503,"hva":1061,"fer":566,"ez ":610,"ezu":527,"eza":1009,"ezn":818,"eze":1246,"ezi":9990,"eta":3704,"ete":898,"eti":2144,"etn":2390,"etl":530,"etk":539,"esn":1243,"est":6673,"eto":1100,"etr":912,"ets":1480,"etu":996,"etv":760,"eve":4353,"eva":1803,"evo":961,"evn":623,"evi":1701,"er ":4432,"epa":580,"es ":2168,"epu":2109,"epo":647,"epr":650,"eri":7215,"erg":698,"ere":1639,"erc":963,"era":4007,"et ":2066,"esk":1461,"esm":595,"esi":709,"ese":1073,"esa":706,"erv":605,"eru":1435,"ert":600,"ers":1367,"ern":3520,"erm":720,"ero":2614,"eki":802,"eko":2778,"eks":1589,"ekt":2568,"eku":789,"en ":4377,"ela":3182,"ele":2918,"eli":4944,"elj":5961,"ell":698,"elo":2967,"elu":1458,"ema":3624,"eme":6131,"eml":1258,"emo":840,"emi":1481,"ene":3561,"eng":1640,"ena":7266,"end":836,"enc":1218,"eno":6088,"eni":7665,"enj":3407,"enu":1213,"ens":1955,"ent":3912,"ego":1890,"egi":1521,"ek ":753,"el ":1052,"ejs":573,"eke":1782,"eka":2350,"em ":3991,"gl ":618,"gij":2556,"gin":523,"gi ":831,"gen":1325,"ger":827,"gdj":702,"ge ":1704,"gar":946,"gal":662,"gan":1740,"ga ":4609,"ađa":855,"ađe":658,"fra":927,"ača":1970,"ače":950,"ačk":3501,"fri":721,"ači":1814,"fsk":622,"aču":958,"for":1658,"ač ":583,"aće":585,"aća":1021,"fil":896,"fik":566,"fin":592,"da ":9164,"de ":2788,"dal":1347,"daj":785,"dat":824,"dar":1385,"dan":5150,"dam":626,"dav":567,"cus":960,"co ":549,"cu ":955,"cea":713,"ch ":519,"ces":1492,"cen":997,"ceg":685,"ci ":6216,"cha":750,"ck ":1290,"che":856,"chi":796,"cij":9280,"cim":1128,"cir":569,"cio":1059,"ed ":1254,"eba":923,"ebe":581,"ean":810,"ea ":773,"duž":619,"ega":1085,"edn":7588,"edi":4719,"ede":1425,"eda":3130,"eg ":2170,"eds":1513,"edo":1202,"eci":935,"eca":935,"dvi":741,"dvo":690,"dva":1103,"drž":4140,"don":1767,"dom":1052,"dol":970,"dok":659,"dov":1189,"dos":617,"diš":1956,"dna":2624,"dne":953,"dni":3564,"dnj":1612,"dno":6490,"dob":1518,"dst":1252,"dra":1195,"dre":1145,"du ":2280,"dru":5517,"dsk":2664,"dic":3889,"der":1371,"des":966,"den":1450,"di ":4014,"do ":2419,"dje":2862,"dim":541,"din":5519,"dio":1844,"dis":1126,"dij":7896,"rađ":876,"rga":1240,"ri ":5834,"rgi":678,"ret":1800,"res":1271,"rev":690,"rez":931,"rać":609,"rač":1555,"reb":1913,"rea":805,"ree":521,"red":6133,"reg":1745,"rem":2802,"ren":2797,"rek":1056,"rel":612,"rep":724,"rdi":692,"re ":4525,"rce":928,"raz":4813,"rd ":686,"rap":1294,"ras":1520,"rat":4060,"rav":4237,"rbi":882,"raj":3616,"rag":678,"ran":11269,"ram":1816,"ral":3163,"rak":1775,"raf":1147,"rad":6716,"rac":1934,"ros":2073,"rot":1439,"rom":2618,"ron":1652,"rop":1763,"roz":1266,"rov":2438,"rob":584,"roa":633,"rod":8265,"roc":1470,"roj":2300,"roi":1622,"rol":793,"rok":933,"rog":1195,"rno":2932,"rič":3609,"rna":1479,"rne":1371,"rnj":845,"rni":2710,"ro ":1317,"rma":1223,"rmi":655,"rlo":551,"rn ":540,"rkv":521,"rka":695,"ređ":981,"reć":667,"raž":797,"rje":601,"riz":799,"rip":1837,"rio":889,"rir":879,"rit":2510,"ris":3174,"riv":1004,"rig":523,"rij":11045,"raš":597,"ril":1430,"rik":1383,"rin":1848,"rim":1849,"ria":651,"rib":805,"ric":1431,"rid":583,"rug":4029,"rup":1471,"run":520,"ruk":844,"rus":1028,"rva":5151,"rvi":1279,"rve":1037,"rvo":605,"ry ":556,"rsk":5201,"rta":726,"rst":2436,"rti":871,"rt ":714,"ru ":2471,"sad":738,"sam":1694,"san":1142,"sat":604,"sas":3039,"sav":1014,"sa ":5478,"ruž":889,"ruš":721,"ruč":2722,"rzi":592,"shi":607,"si ":1399,"sje":4603,"sis":550,"sin":1783,"sil":686,"sim":836,"sij":991,"se ":14671,"ser":1178,"set":665,"sh ":559,"seb":745,"sel":4338,"spo":2263,"spr":578,"spe":611,"spa":672,"sov":613,"son":650,"sob":1342,"su ":8627,"sre":2282,"st ":3926,"slj":632,"sli":1331,"slo":1832,"slu":1752,"sla":2093,"ski":17159,"skl":527,"sko":15307,"skr":553,"sku":5305,"ska":7019,"ske":5955,"sno":3094,"sna":922,"sni":2255,"sne":697,"smj":1004,"sma":1116,"ste":2346,"sta":16592,"sto":9996,"sti":9753,"stv":3654,"stu":2357,"str":6456,"sus":1535,"sva":788,"sve":1415,"svi":1441,"svj":804,"svo":1576,"taj":996,"tak":2161,"tal":4191,"tac":782,"tav":5990,"tat":1128,"tar":4163,"tan":6149,"tam":583,"te ":6089,"ta ":9339,"pa ":1847,"pe ":1308,"par":1595,"pat":647,"pad":4813,"pal":580,"pan":3273,"pi ":571,"per":1812,"pet":668,"pla":1804,"plj":816,"pli":1041,"ple":3986,"plo":1052,"pje":1454,"pij":889,"pin":4572,"pis":2255,"poz":2191,"por":5631,"pop":1191,"pov":2463,"pot":1881,"pos":3892,"poj":1457,"pog":612,"pom":933,"pon":975,"pok":1642,"pol":3220,"pod":6429,"po ":2082,"psk":1887,"pub":2165,"pti":586,"poč":712,"pra":3170,"prv":1850,"pri":7027,"pre":6249,"pro":8677,"put":1060,"pun":676,"pul":698,"iše":1062,"išn":1267,"išt":2503,"qui":570,"ra ":9271,"ngo":698,"ngl":1753,"ni ":16130,"nga":751,"nej":882,"nek":2622,"nen":652,"nep":734,"ner":1509,"net":1026,"nes":1293,"ng ":2367,"nač":2757,"nez":1162,"nci":2911,"nce":931,"ne ":19985,"ndu":534,"ndo":1052,"ndi":5037,"nde":837,"nda":1627,"ncu":940,"nak":2307,"nal":5099,"nam":1658,"nan":1732,"nap":971,"nar":3446,"nac":3781,"nad":1618,"nag":771,"naj":3818,"nd ":1313,"nav":631,"nat":3622,"nas":6912,"naz":2805,"na ":42477,"mož":877,"nut":1280,"nto":781,"ntr":1002,"nti":2109,"nta":2295,"nte":1843,"nst":1771,"nsk":10758,"nu ":5412,"ičn":3327,"ičk":8118,"iči":1329,"iča":967,"nt ":937,"niš":631,"ns ":584,"noa":520,"nog":6035,"noj":3137,"nom":6601,"nos":6166,"nor":535,"nov":4240,"nič":2517,"ića":608,"no ":15032,"nka":782,"nji":3978,"nje":12914,"nja":6205,"ić ":1370,"nju":1904,"njs":879,"njo":1594,"nij":6542,"naš":1014,"nih":6201,"nic":5054,"niz":1589,"nis":1359,"nit":879,"nir":714,"nim":4415,"nin":1303,"nik":4325,"ogr":2375,"ogu":995,"ogi":1476,"ogl":635,"ogo":1802,"oga":1651,"oj ":12832,"ois":1145,"oim":653,"ok ":1418,"oju":1294,"ojs":576,"ojo":759,"ojn":1052,"oji":10382,"oje":6734,"oja":6046,"ol ":664,"oiz":1012,"oce":973,"oci":1114,"ock":1115,"obu":893,"ode":1274,"odi":8132,"odo":921,"odn":4204,"ods":1138,"odr":4259,"of ":888,"oda":2801,"odu":910,"og ":13004,"oan":559,"oba":2782,"od ":14807,"obo":933,"obr":1117,"obl":1881,"obn":555,"obj":1316,"obi":2599,"oz ":529,"ozn":3312,"ozi":1205,"oza":1421,"oti":1557,"ote":985,"otr":921,"otp":614,"oto":4095,"ost":8908,"ota":912,"ov ":1004,"osi":1554,"ose":1141,"osl":1958,"oso":1519,"osn":2717,"ovj":940,"ovi":7022,"ovn":2887,"ovr":829,"ovo":6487,"ova":7007,"ove":3130,"opć":1972,"opo":690,"opi":1778,"opl":1169,"ope":858,"opa":775,"os ":1176,"opu":936,"opr":664,"ops":861,"or ":2008,"orm":1284,"orn":2821,"oro":4229,"ord":815,"ore":2892,"org":1291,"ori":7402,"osa":1092,"ort":1833,"ors":1607,"oru":1306,"ot ":522,"ora":3600,"ola":1856,"on ":4329,"olj":2400,"oli":4446,"ole":1119,"ols":789,"olo":3261,"olu":1664,"oka":2684,"om ":17269,"oke":734,"okr":3164,"oko":3273,"oku":2145,"ona":3954,"ond":619,"one":1941,"ong":1053,"onj":777,"oni":2936,"ono":2047,"ons":1609,"ont":1171,"onu":585,"oma":2281,"ome":2911,"omi":1374,"omp":729,"omo":1785,"la ":9649,"le ":3757,"lac":1275,"lad":1494,"lag":644,"lak":731,"lan":3996,"lam":781,"lar":880,"lat":2578,"las":3015,"lav":4030,"laz":5016,"lbu":1947,"kva":701,"kup":5219,"kul":1164,"ksi":936,"ktr":973,"kođ":596,"ktu":654,"kti":1269,"kuć":703,"lok":712,"lon":791,"lom":1506,"lop":558,"log":2016,"lov":4118,"lno":1832,"lni":2255,"lne":712,"lob":538,"lič":1936,"lna":885,"ltu":737,"lub":608,"lsk":2111,"lu ":2550,"liš":638,"lj ":865,"li ":12760,"les":1936,"let":657,"lem":4158,"len":1436,"lek":2007,"led":614,"lo ":4167,"lla":657,"lle":663,"lli":529,"ljs":545,"lju":4071,"lje":10652,"ll ":619,"lja":4832,"lji":2209,"lit":2066,"lis":1339,"lin":3083,"lim":1508,"liz":1420,"lic":2147,"lia":609,"lik":6521,"lij":3213,"lig":541,"ma ":15645,"luž":1294,"mac":714,"maj":623,"mak":533,"mar":837,"mas":812,"mal":2079,"man":3666,"mat":2524,"mbi":905,"me ":5240,"med":673,"met":3414,"mer":3791,"mel":710,"men":6548,"mač":1334,"lum":944,"loš":641,"mpi":753,"mog":976,"mon":1050,"mor":2726,"mos":601,"mot":653,"mu ":1457,"msk":1673,"mun":787,"mi ":1004,"međ":2505,"mje":5038,"min":2090,"mil":697,"mir":1036,"mis":832,"ešt":1289,"mit":939,"ešk":669,"mij":1629,"mo ":1420,"mlj":1522,"mno":900,"rža":4071,"ča ":974,"čan":800,"čar":558,"čaj":693,"uča":545,"zra":1677,"če ":853,"učj":2470,"uči":687,"čav":1111,"čen":1159,"čet":1250,"čes":595,"zu ":768,"zva":612,"zvi":939,"či ":1484,"zvo":1982,"zum":625,"čij":837,"čic":566,"čit":844,"čin":2554,"čko":4001,"čka":1912,"čke":2028,"čki":5096,"čju":1626,"češ":556,"čla":542,"čni":1699,"čno":3306,"čna":751,"čne":1006,"zi ":3239,"zem":1449,"zer":1084,"ze ":973,"zbo":641,"zda":1154,"zac":771,"zbe":843,"zaj":1076,"zam":819,"zan":918,"zal":859,"zap":3007,"zav":531,"zas":565,"zon":794,"zme":1381,"zna":6328,"zno":753,"zič":837,"zni":942,"zla":762,"uća":616,"zli":1013,"ući":766,"zic":1241,"zij":2320,"rši":715,"zin":1079,"zil":1675,"zik":7193,"zir":891,"ziv":3073,"za ":7407,"ya ":644,"ože":1175,"ću ":879,"ćin":2919,"ći ":2008,"rčk":1144,"oš ":708,"ošk":698,"vrš":1066,"wa ":713,"viš":1245,"vrt":868,"vrs":1358,"vri":916,"vre":806,"vsk":989,"vu ":2689,"vir":547,"vil":1092,"vim":1483,"vin":3680,"vih":903,"vij":3997,"vic":888,"vid":903,"vit":817,"vis":1291,"već":1856,"vje":3831,"vla":1440,"vlj":3383,"vo ":3548,"vne":801,"vna":1303,"vno":3450,"vić":621,"vni":4024,"vod":2717,"vog":1164,"voj":3212,"vol":582,"vom":1226,"vor":5059,"vot":928,"voz":1030,"vi ":4716,"vač":1765,"vez":1580,"ver":4932,"vet":804,"vać":849,"ven":3207,"vel":1857,"ved":700,"ve ":5319,"val":1770,"vak":779,"van":5697,"var":1878,"vat":5563,"vac":751,"vaj":1521,"va ":12711,"uz ":1098,"usk":1818,"usi":743,"ust":3150,"uti":590,"ute":706,"uta":1448,"uto":1403,"us ":1654,"ut ":910,"ura":1400,"ure":1189,"uri":1158,"urn":947,"uro":1486,"urs":550,"uru":580,"upa":2662,"upi":4500,"upe":980,"upo":759,"upr":748,"upn":605,"umi":603,"umj":671,"uma":1379,"umb":688,"ume":724,"unu":749,"uni":993,"und":654,"una":2557,"up ":535,"uko":757,"ukl":657,"um ":2138,"uka":702,"uju":1023,"ult":1215,"pći":2432,"uli":687,"ula":1753,"uhv":776,"uje":2393,"ugi":1126,"uge":731,"ugo":2301,"ugl":583,"uga":1630,"ugu":1137,"uda":924,"udi":2722,"ue ":562,"uci":556,"ug ":588,"ua ":834,"ual":550,"uan":840,"ubl":2337,"tvu":537,"tvr":1039,"tvo":2139,"tve":1133,"ođe":1121,"tva":2106,"tur":2517,"tup":876,"tud":1103,"tre":2014,"oče":731,"tra":6680,"će ":1908,"oči":564,"tri":3139,"tru":1417,"tro":2790,"očn":2463,"tu ":3006,"tsk":7605,"toč":2840,"ćen":895,"to ":5478,"tni":2342,"ća ":3122,"tna":778,"tič":3116,"tno":1901,"toc":579,"toj":1710,"toi":723,"tog":984,"tov":1949,"tom":1889,"ton":1847,"tok":3601,"tol":1732,"tor":3179,"top":785,"ćan":554,"tij":2254,"til":813,"tik":1009,"tih":692,"tir":1327,"tit":1406,"tis":916,"tin":3058,"tim":1630,"tio":1426,"tic":2096,"tiv":2430,"tje":1395,"tko":583,"tka":775,"tla":700,"tem":1590,"ten":2148,"tek":931,"tel":2393,"th ":723,"tet":818,"ter":4781,"ti ":9529,"the":1111,"živ":2628,"žni":894,"žno":1666,"že ":1842,"žbe":582,"žav":3922,"žan":794,"ži ":866,"žen":1273,"užb":678,"uže":864,"uži":889,"užn":2163,"žup":1446,"ušt":795},"n_words":[5153330,5928363,4281211],"name":"hr"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/hu b/nlp_resource_data/langdetect/profiles/hu

new file mode 100755 (executable)

index 0000000..77ab7c4
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/hu
@@ -0,0 +1 @@
+{"freq":{"D":13198,"E":18562,"F":13797,"G":13466,"A":98368,"B":24663,"C":21970,"L":15870,"M":28130,"N":17752,"O":8481,"H":13530,"I":14875,"J":7703,"K":21077,"U":4237,"T":19473,"W":4434,"V":10406,"P":22295,"S":35593,"R":12093,"Y":1303,"X":1480,"Z":4534,"f":108322,"g":317259,"d":216557,"e":996693,"b":219927,"c":121096,"a":1016786,"n":579901,"o":467276,"l":670215,"m":298372,"j":133885,"k":454857,"h":131050,"i":506132,"w":6657,"v":203150,"u":147118,"t":700784,"s":637262,"r":536637,"q":1410,"p":129287,"z":429255,"y":242745,"x":7186,"É":1936,"Á":2646,"í":56628,"é":327830,"á":372527,"ü":54519,"ú":35451,"ö":107023,"ó":117740,"ő":81190,"ű":27405," l":36854," m":86086," n":55262," o":27009," h":46816," i":30318," j":25021," k":101750," d":20317," e":90097," f":58652," g":17434,"р":1511," a":216424,"с":1133," b":38681," c":32807," z":5071," u":7248," t":73839," v":67319," p":25823," s":75268," r":41479," J":7511," K":20282," H":12799," I":11800," N":16438," O":7214," L":14736," M":26568," B":23082," C":19809," A":94593," F":12742," G":12445," D":12015," E":17107,"л":1197," Z":3901,"к":1191," Y":1159,"и":1900,"о":2301,"н":1605," S":32225,"в":1255," R":11088,"а":2749," P":20591," W":4161," V":9030," U":3712,"е":1700," T":17702," á":18269," í":4682," é":57281," ö":8570," ó":3751," ü":2190," ú":3722," Á":2459," É":1858," ő":1879,"A ":56345,"Da":1481,"Cs":2770,"Co":4107,"Ce":1232,"Ch":2651,"Du":1186,"Do":1533,"De":2255,"Di":1628,"Fe":3066,"Fa":1242,"Ez":1912,"Eu":1996,"Er":1727,"El":1696,"Eg":3224,"Ge":1854,"Ga":2079,"I ":2334,"Fr":2249,"Bí":1300,"Fo":1548,"Fi":1170,"B ":1118,"C ":1489,"Au":1680,"Ar":2744,"Ba":3859,"Az":15235,"Ac":1577,"Am":2415,"An":3658,"Al":3816,"Bu":2584,"Br":2522,"Ca":4180,"Bi":1847,"Be":4063,"Bo":3301,"Kr":1187,"Ko":2867,"Le":3145,"Li":2545,"La":4095,"Lo":1996,"Me":3040,"Mi":3545,"Ma":10861,"Mu":1674,"Mo":3340,"Ne":3381,"Na":3799,"Ny":3322,"No":1352,"Ol":1338,"Gr":1713,"Go":1444,"Gy":1283,"Ha":3592,"He":2397,"II":1513,"Dé":1163,"Ho":2637,"In":4213,"Is":1406,"Ja":1924,"Je":1294,"Jo":1519,"Ka":3817,"Ki":3017,"Ke":1650,"Tu":1174,"Tr":2483,"To":1807,"Th":2533,"Ti":1466,"Te":2749,"Ta":2278,"V ":1123,"Sz":12584,"St":3173,"Wi":1325,"Vi":2150,"Va":2270,"Ve":1656,"Pr":2035,"S ":1580,"Pe":2500,"Pa":5475,"Po":2747,"Pi":1727,"Or":1697,"Kö":2908,"Se":1702,"Sc":1225,"Si":1430,"Sp":1130,"So":1614,"Sa":3389,"Re":2307,"Né":1630,"Ro":2833,"Ra":1803,"b ":16527,"a ":242203,"bö":1784,"i ":117842,"bó":6751,"ge":24009,"gf":2198,"ga":25428,"gb":5764,"bé":2495,"ff":1119,"fi":9218,"fr":3611,"fu":1929,"fo":23259,"j ":9703,"gy":101312,"gz":1512,"dá":7767,"he":27807,"ha":38767,"gn":5176,"gm":1339,"cé":2844,"gl":4237,"gk":2851,"gj":4145,"gi":17202,"gh":4135,"gg":3431,"gv":1711,"gu":3889,"gt":2295,"gs":3570,"gr":6943,"cí":4683,"go":14059,"dt":3427,"du":4251,"dv":2240,"dz":1317,"g ":45451,"ea":6887,"eb":8727,"ec":7541,"ed":20502,"de":31392,"dd":1273,"di":23518,"dj":21891,"do":18282,"dn":1552,"ds":5503,"dr":4675,"ex":2202,"eu":3957,"ev":16778,"ey":1469,"ez":42514,"fa":14955,"h ":4414,"bá":5025,"fe":22515,"eh":6429,"eg":88315,"ef":4014,"ee":2573,"el":138888,"ek":55065,"ej":8422,"ei":11205,"ep":13034,"eo":3448,"en":123802,"em":38648,"et":109362,"es":66857,"er":112461,"ca":6636,"e ":99285,"bs":1600,"br":4886,"bu":6371,"bo":9382,"bl":2460,"bi":10991,"bb":21421,"bd":1747,"be":56227,"db":1466,"da":33610,"f ":3042,"cu":2732,"ct":3944,"cs":39940,"cr":1412,"co":5597,"ck":3543,"ci":18130,"ch":10237,"ce":11684,"cc":1311,"c ":4126,"az":52261,"ay":1779,"ba":62023,"d ":20001,"at":73551,"as":35129,"ar":73118,"av":6465,"au":6414,"ak":76786,"al":112498,"ai":30999,"aj":22921,"ao":1282,"ap":21474,"am":34363,"an":99045,"ac":9905,"ad":30543,"aa":1143,"ab":12613,"ag":47002,"ah":3611,"ae":13109,"af":4252,"nu":6592,"nt":48640,"ns":9377,"no":16464,"nn":10307,"nz":2533,"ny":61139,"gő":1259,"jó":1888,"ká":8866,"of":2805,"oc":6235,"od":12616,"ob":7903,"jú":2913,"ké":20553,"om":30097,"on":54876,"ok":33935,"ol":57719,"oi":1954,"og":16189,"oh":1599,"ot":26104,"os":55594,"ov":15603,"ou":3364,"op":11263,"oo":1729,"or":73234,"kí":3018,"kú":6878,"r ":40419,"ow":1527,"kó":3434,"kö":37773,"oz":36942,"lá":37200,"pe":16459,"pa":16699,"kü":4521,"pc":1541,"pl":5060,"lé":24839,"po":17893,"ph":4251,"pi":8966,"pj":2198,"pk":2115,"lo":28055,"ln":6206,"lm":14166,"hé":2687,"ll":38085,"ls":10986,"lr":2355,"hí":1890,"lv":12472,"lu":8529,"lt":43305,"lz":1116,"ly":54235,"o ":10039,"ma":45288,"mb":11466,"me":75090,"iá":11532,"ml":3581,"mi":28248,"mn":1344,"mm":4364,"mp":6217,"mo":16948,"ms":1960,"mu":7004,"fő":4516,"ió":9250,"mz":4734,"p ":5660,"na":50058,"nb":3564,"nc":11966,"nd":37316,"ne":56223,"já":30832,"nf":2395,"ng":19748,"nh":1725,"ni":26541,"nj":1322,"nk":8011,"nl":3257,"jé":12386,"jt":5016,"ju":2496,"jn":3362,"jo":4214,"jl":2908,"fé":13187,"ki":30779,"kh":2892,"ke":42527,"gá":10089,"kc":1158,"kb":6547,"fü":2239,"ka":34729,"m ":27269,"fö":2942,"jz":1414,"gó":4573,"dő":5431,"gö":3372,"ks":5542,"kt":6729,"ku":13047,"kv":1892,"ko":37181,"gí":1165,"kr":7237,"kk":6940,"kl":3302,"gé":11029,"km":3318,"kn":3693,"li":35208,"lh":5532,"lk":11727,"lj":5903,"le":93067,"há":9402,"ld":10308,"lg":5358,"lf":2782,"la":70668,"lc":4145,"lb":4771,"gú":1727,"n ":153664,"hr":1649,"dí":4512,"dó":5588,"ht":1620,"hu":2691,"hi":9568,"hn":1454,"ho":21034,"dé":9871,"id":22338,"ic":13315,"ib":5712,"ia":30517,"ih":1907,"ig":15930,"if":9201,"eá":1335,"ie":7180,"dö":1970,"hy":1869,"k ":156709,"ir":14921,"is":42797,"it":23842,"iu":6023,"iv":7173,"bő":3863,"ii":3341,"ij":1228,"ik":46279,"il":27833,"im":8040,"in":53934,"io":8559,"ip":5320,"fá":1392,"je":23937,"jd":2673,"iz":8720,"l ":91636,"ja":22570,"xi":1625,"té":34673,"tí":4348,"tó":18526,"sö":2411,"só":2652,"sú":4418,"z ":78459,"sü":3136,"tá":43166,"wi":1167,"sé":27679,"sí":6485,"ró":12762,"rö":8024,"y ":64628,"rú":3555,"rü":11432,"wa":1485,"sá":25703,"ré":19532,"vi":19889,"vt":2486,"rí":1306,"vo":15593,"mű":8899,"nő":6159,"ve":52514,"rá":28508,"va":38110,"x ":2815,"ui":1873,"uk":4884,"ul":23432,"ue":1945,"ug":7493,"ur":14404,"us":30910,"ut":12436,"um":11242,"un":8785,"up":2459,"ty":4352,"tu":13656,"tt":50460,"tv":7094,"ub":2532,"ua":1854,"pü":4993,"ud":8185,"uc":2378,"to":52152,"tn":2524,"pé":3054,"tm":2785,"tl":6382,"ts":8729,"tr":17609,"pí":3835,"tf":1412,"te":87104,"pá":7384,"tk":5205,"tj":7119,"ti":40828,"th":9478,"v ":9584,"tb":6187,"tc":1163,"ta":76171,"su":4875,"sv":2378,"ss":22768,"st":26226,"sz":174760,"lő":14903,"sl":2577,"sk":7704,"sn":2526,"sm":5506,"sp":4512,"so":26449,"sr":3213,"sd":1132,"sc":3372,"sf":1789,"se":34881,"sh":2900,"si":20642,"nö":2962,"rz":5827,"u ":5228,"sa":40529,"sb":5688,"rr":6334,"rs":21922,"rt":47601,"ru":7619,"rv":8829,"nó":1459,"kő":1313,"ry":3017,"rp":2572,"ro":47249,"rn":9437,"né":20022,"rm":20825,"rl":4279,"rk":7954,"rj":4223,"ri":45410,"rh":2668,"rg":7757,"rf":3837,"re":67985,"ná":12107,"rd":9230,"rc":7289,"rb":5639,"ra":55047,"t ":159290,"mú":1197,"mó":2093,"qu":1194,"mí":1861,"mé":15355,"má":27550,"lü":5880,"s ":157503,"lö":4000,"pt":5892,"pu":3980,"ló":16644,"pp":2547,"lí":3237,"pr":7474,"ps":3440,"yő":1310,"zü":5181,"zú":1280,"zá":31274,"yú":1973,"yü":3112,"zé":19023,"zí":5603,"zö":10272,"zó":20498,"vű":1517,"yá":13547,"yí":1801,"yé":5272,"yó":2584,"vő":2313,"tű":4358,"vö":1632,"zz":3816,"sű":1526,"tő":14413,"zf":1314,"zg":2806,"zh":3151,"zi":25268,"zb":1632,"zd":4010,"ze":71368,"vá":29898,"za":30193,"yz":1903,"zv":1698,"rű":2676,"ső":8855,"zs":12765,"zu":3963,"zt":36837,"zo":19843,"zn":6608,"ví":2765,"zp":2246,"zk":4768,"vé":17783,"zm":2937,"zl":11760,"yg":1834,"yh":1922,"ye":40237,"uá":1400,"yf":2056,"yc":1116,"ya":26830,"tü":4027,"yb":3764,"tú":2750,"tö":12442,"rő":4773,"yv":4653,"yu":3843,"yt":3497,"ys":7991,"yr":1838,"yp":1398,"yo":12260,"yn":3914,"ym":1973,"yl":2832,"yk":2442,"yi":17109,"ző":8108,"yű":1984,"Ál":1439,"ám":8760,"án":50938,"áp":2119,"áj":7251,"ák":18338,"ál":59120,"ág":29984,"áh":4265,"áb":34134,"ác":4739,"ád":14308,"áz":7779,"áv":4411,"áu":1148,"ár":52483,"át":21141,"ás":47264,"á ":2111,"óz":2079,"ós":5892,"ót":4344,"óv":1296,"óa":1234,"ób":3397,"ój":4542,"ói":2619,"óg":4159,"óf":2044,"ód":5384,"óc":1867,"ór":2907,"óp":3032,"ón":2969,"óm":2752,"ól":15303,"ók":6328,"ó ":44110,"ív":3409,"íz":3248,"ín":4459,"ím":4915,"ír":8949,"ít":21446,"íg":1483,"ík":1149,"íl":1523,"íj":1882,"éz":4750,"ék":26204,"él":30309,"éj":1119,"ép":16482,"ém":6476,"én":36437,"és":84643,"ét":19103,"ér":22242,"év":16532,"éb":17184,"éd":3694,"éc":1321,"éh":4561,"ég":31689,"é ":4130,"úz":1141,"ün":2126,"ül":37857,"ür":1396,"üt":3250,"üz":1153,"üg":2645,"ük":3502,"úl":1994,"új":3185,"út":2986,"ús":1934,"úr":1723,"úa":6976,"úg":2023,"öv":8122,"öz":26938,"ú ":9189,"öt":9388,"ör":17835,"ös":7442,"ön":10766,"öl":8906,"öm":1336,"ök":5801,"ög":3454,"öd":2116,"öb":4016,"ő ":26112,"őv":2032,"őr":2463,"ős":10918,"őt":2983,"őz":2353,"őd":2691,"őb":2103,"őg":1179,"őe":1463,"ől":9494,"ők":4433,"őj":2432,"ői":2735,"őn":2715,"ű ":13555,"űk":1684,"űe":1599,"űs":1405,"űr":1219,"űv":2905,"一":1237," Ál":1439," Ga":2065," Ge":1837," Bí":1298," Fo":1535," Fr":2248," Fi":1161," Ha":3587," He":2393," Gy":1279," Go":1438," Gr":1700," Ho":2631," Dé":1163," Je":1288," Ja":1919," Is":1400," In":4206," Ka":3809," Ke":1641," Ki":3002," Jo":1515," La":4057," Le":3135," Li":2524," Ko":2859," Kr":1178," Ma":10822," Mi":3523," Me":3025," Lo":1988," Ne":3354," Na":3783," Mo":3325," Mu":1669," A ":54120," Am":2399," An":3650," Al":3798," Ac":1573," Ba":3855," Az":15203," Au":1680," Ar":2712," Be":4051," Bi":1842," Bo":3283," Br":2520," Bu":2577," Ca":4107," Ce":1230," Ch":2631," Co":4068," Cs":2760," Da":1472," Di":1622," De":2234," Do":1490," Du":1183," El":1692," Eg":3214," Er":1720," Ez":1906," Eu":1996," Fe":3060," Fa":1224," Wi":1310," a ":114242," Kö":2904," Or":1691," Po":2728," Pi":1722," Pe":2496," Pa":5457," Ny":3316," No":1346," Ol":1338," Ra":1786," Né":1627," Ro":2824," Re":2297," Pr":2030," Sz":12567," St":3106," Ta":2270," Th":2520," Ti":1460," Te":2728," Tr":2468," To":1786," Sa":3377," Si":1425," Sc":1193," Se":1692," So":1606," Sp":1107," Va":2262," Ve":1647," Vi":2142," Tu":1156," ja":1594," je":10035," in":4519," il":2853," is":10067," ir":2442," fü":1523," ka":7871," fö":1646," ki":17579," ke":13720," jo":1938," fé":4201," gy":6161," ha":19271," he":6504," cé":1611," cí":4405," gr":1159," id":3467," ig":3043," hi":3276," dé":2801," ho":8126," dí":1823," ne":18984," já":7979," na":5781," fő":4149," mu":2133," mo":4422," ok":1477," ol":6794," ké":11002," of":1186," ny":10296," no":1409," há":4031," le":17706," li":2353," la":7930," ku":2128," gö":2536," km":2432," kr":1168," ko":9227," me":30782," mi":10796," ma":18644," lo":1099," hí":1381," ad":3539," am":14965," an":8296," ak":3658," al":19507," au":2171," ar":2746," as":2187," ba":7443," az":39674," bi":2851," be":18442," bo":2566," bu":1302," er":6741," es":6369," en":1270," em":3989," el":22279," eg":37244," fe":16017," bá":1342," fa":10457," ez":6961," fu":1228," fr":2101," fo":11286," bé":1167," fi":3986," ge":1465," ga":1837," ci":1469," da":2485," cs":21597," do":1754," de":6050," di":2713," vé":3902," ví":1450," vá":10832," ze":2004," tö":9167," té":2457," tá":3920," nö":1405," sa":2016," se":2587," sp":2365," so":5755," ra":2479," re":19463," né":13996," ro":3461," pr":3595," má":8283," mé":4247," os":10567," kí":1660," or":4197," kö":27883," pe":2664," lá":2021," kü":3017," pa":5441," lé":4236," po":4686," pi":1682," ró":1121," rö":2141," va":20643," ve":9072," nő":1649," mű":3958," vo":11603," vi":8365," ré":10099," tu":3046," ut":3907," ur":1101," ta":23929," sz":51220," st":2457," su":1760," tr":1755," pé":1624," to":2214," ti":2324," te":19251," pá":3006," át":2658," ál":11522," év":4437," ér":4015," és":39564," én":2210," ép":2514," él":3293," ír":3245," ók":1246," ön":2076," ös":3774," ót":1104," új":1846,"ől ":7803,"ője":1778,"ők ":2201,"őne":2127,"ősz":2500,"ős ":1549,"ősí":1918,"ősö":1177,"őtt":1492,"őbb":1227,"ői ":1993,"Áll":1152,"Eur":1570,"Ez ":1152,"Fra":1443,"Bír":1270,"Int":2403,"Az ":14884,"Ame":1184,"Bud":1205,"Car":1146,"Cse":1140,"Egy":3037,"Nem":1251,"Nag":2455,"Nye":1809,"űvé":1367,"Köz":2207,"Pas":2299,"Par":1124,"űve":1432,"űkö":1244,"Lad":1325,"Mar":2612,"Mag":3698,"éhe":3730,"ék ":14028,"ége":6434,"égb":1296,"égi":4433,"égy":1459,"él ":2635,"éde":1275,"ég ":10442,"ébe":12999,"éba":2195,"éve":6732,"ésé":4210,"év ":4758,"ész":20296,"éte":4730,"élő":1204,"ést":1350,"ésr":1270,"épü":1343,"épí":1687,"étr":1526,"éze":2382,"érő":1195,"éme":4235,"ép ":2557,"éne":8073,"éni":1139,"ént":3323,"ény":11240,"ékh":1465,"éke":3632,"éko":2177,"égé":2685,"én ":10291,"éli":2101,"éle":4096,"élk":1173,"ély":3515,"élt":1344,"ére":3670,"érf":1845,"ét ":8716,"ért":4687,"érs":1117,"éri":1359,"ése":7910,"ési":1788,"épe":3380,"ér ":1942,"éps":1130,"élé":8715,"és ":42926,"ésű":1277,"Szl":3665,"Sza":1311,"Sze":2596,"The":1583,"ául":1145,"áts":1437,"áto":1839,"biz":1578,"áss":1306,"ást":1822,"ász":3797,"áró":1875,"ásá":7057,"árá":5593,"áva":2230,"bol":1519,"átó":1225,"áté":2725,"áz ":1260,"ázi":1122,"bor":3409,"áza":2446,"áll":11176,"bbi":2623,"ált":12559,"bba":2240,"ály":14827,"be ":8763,"áma":1434,"ban":38532,"bal":3567,"baj":2665,"áku":1940,"bad":1154,"án ":13154,"ála":2623,"bar":1493,"áli":4198,"álh":2859,"áno":1436,"ány":14546,"ánu":1287,"ámo":1867,"ána":12065,"bda":1545,"áni":2143,"ájá":1672,"áló":2208,"ás ":11271,"bi ":2519,"ár ":6296,"ber":6263,"ben":26680,"bel":7243,"bes":1771,"bet":1775,"ása":4529,"árt":3175,"árs":2861,"áso":5295,"ási":3907,"ásb":2951,"át ":7693,"ámí":1237,"áro":12322,"árm":2982,"ári":3231,"ára":5673,"ács":1314,"áci":2905,"ág ":10677,"ádj":10076,"ábó":2969,"ága":2054,"ágb":3476,"ca ":1909,"ágo":3161,"ági":2804,"ák ":11048,"áho":4041,"ája":2888,"ál ":2859,"ágá":1372,"áki":3435,"ce ":2292,"ám ":1135,"bri":1367,"bra":1211,"bur":1175,"bum":2164,"ád ":2121,"ábo":1867,"ább":3556,"ába":23979,"ajz":1349,"aka":2195,"am ":2190,"agá":2325,"aki":3799,"afé":1601,"ajn":3205,"ajt":2171,"al ":17585,"aja":1802,"ajd":2547,"ain":2508,"ait":1256,"ak ":42544,"adó":2898,"aj ":6376,"agy":29473,"adá":3539,"agj":1385,"ago":2554,"anu":1841,"any":6283,"ano":1477,"ann":3140,"ant":4051,"ans":1439,"ane":1370,"ajá":1272,"ang":7307,"ani":3989,"ank":1265,"ana":3610,"anc":3826,"and":5977,"amm":1385,"amo":3002,"amp":1099,"ami":6863,"ame":12652,"amb":1439,"ama":2681,"alv":1163,"alu":2639,"alt":1806,"alr":1104,"alo":6119,"alm":6541,"all":3854,"alk":5861,"ali":4354,"alc":1755,"ale":2695,"ala":25236,"alb":2678,"an ":50569,"akr":1682,"aku":2116,"akt":1697,"ako":5426,"akk":1936,"aba":1856,"abb":3158,"abd":1557,"ae ":11230,"ad ":2344,"ai ":23587,"aga":3600,"ado":2879,"adi":3222,"ag ":2506,"adt":1359,"aci":1651,"ach":1191,"ace":1933,"ada":7719,"acs":1177,"azo":3899,"azi":1454,"azt":1117,"azg":1460,"aza":1771,"azd":1290,"azz":1187,"az ":35775,"asú":1719,"atá":8809,"ató":6586,"azá":1314,"ba ":10534,"bb ":12760,"at ":13186,"are":1125,"ard":2003,"arc":2552,"ara":12278,"aro":4590,"arm":1949,"arl":1467,"ark":2718,"ari":4156,"aru":1342,"arr":1237,"art":20941,"asa":1245,"aso":1347,"ar ":7389,"akö":2063,"apa":2996,"akú":5988,"alá":20616,"ape":1622,"apc":1186,"apj":1950,"api":1102,"apo":2602,"apt":1225,"aló":3345,"as ":5129,"ava":2849,"aut":1403,"avi":1292,"ará":1634,"asá":3004,"arú":1364,"asz":10871,"atb":1153,"ata":7891,"ast":1692,"ass":5488,"atl":1204,"apí":1933,"atr":1521,"ato":8994,"apá":1330,"ate":2686,"ati":9317,"ath":1161,"att":4210,"aur":1366,"jed":1383,"jel":12132,"jez":1371,"jes":2679,"jai":1887,"je ":3339,"fér":1780,"fél":10120,"job":1168,"jno":2503,"jog":1359,"jle":1493,"itr":1237,"ito":1407,"itt":1847,"isk":1278,"ism":3684,"iss":1661,"ist":5127,"isz":6268,"ita":2682,"ite":2144,"iti":1991,"ius":3051,"ium":2277,"inő":1958,"iva":2264,"ügg":1434,"irá":5579,"ive":1942,"is ":16119,"ion":3768,"ilá":4340,"iro":2666,"ise":2504,"ire":1148,"ira":1291,"it ":5468,"ült":4085,"ülé":2870,"izá":1108,"ja ":16271,"ül ":11962,"ből":3435,"ük ":2123,"itá":2293,"üle":10807,"izo":1300,"ize":1447,"kif":1538,"kik":1160,"kil":1182,"kia":2258,"kin":1795,"kir":2041,"kis":3777,"kit":1528,"úsz":1202,"km ":2029,"ki ":6708,"khe":1740,"ked":3722,"kel":6145,"gál":2697,"ken":2595,"gán":1121,"kes":2444,"gás":1179,"ker":9767,"gár":1661,"ket":5479,"kez":5201,"ke ":3579,"kra":2981,"koz":3084,"kot":2662,"kos":4432,"kor":12163,"kon":4605,"kom":1428,"kol":2622,"kok":2194,"kod":1381,"gés":1802,"kna":1132,"kiá":3411,"géb":1114,"gép":2262,"gén":2847,"kke":2328,"kka":1690,"kbe":2008,"kba":2863,"kat":6062,"kar":2888,"kas":1212,"kap":3160,"kan":1437,"kal":4211,"kai":6519,"füg":1411,"ka ":5099,"föl":2822,"gyé":3341,"gyá":1929,"gyü":2858,"ha ":1609,"han":3792,"haj":2261,"hal":5044,"har":4293,"has":5102,"hat":10778,"hag":1327,"had":1308,"gyű":1250,"he ":2263,"hel":6775,"heg":1841,"hez":5105,"het":6443,"dás":3281,"dár":1447,"her":2201,"hiv":1273,"gla":1955,"gna":2223,"gió":1365,"cél":1462,"úak":6906,"gok":1862,"gol":3486,"gon":2487,"gos":1601,"got":1210,"gra":2375,"gre":1699,"cím":4480,"gsz":2045,"gy ":37866,"gus":1307,"gya":12084,"gye":12610,"gyh":1320,"gyi":8702,"gyk":1565,"gym":1148,"gyo":4080,"úgó":1191,"gys":4096,"iai":5083,"ial":1462,"ian":2020,"iat":1462,"iad":1714,"id ":1248,"ibe":1135,"ia ":14894,"ig ":5385,"ifo":5710,"ife":1711,"ics":1889,"ici":1201,"ich":2336,"ice":1195,"ie ":1156,"ica":2195,"ide":4043,"ida":9550,"ika":8060,"ige":3208,"iga":2520,"ii ":1284,"idé":1418,"ik ":22104,"imp":1521,"inc":1595,"ind":5305,"ina":5669,"ino":2798,"int":11506,"ine":3645,"ing":3990,"ini":2901,"ink":1323,"iká":2172,"inu":1420,"iko":1515,"ike":2523,"ila":1887,"in ":5995,"idő":1933,"iku":4841,"ilo":2015,"ill":7614,"ilm":1881,"ili":3110,"ile":2438,"hoz":7697,"hol":2205,"hon":1202,"hog":3581,"hos":1589,"hor":2048,"dék":1979,"dél":3343,"dés":3202,"dó ":3604,"dít":1872,"díj":1758,"fen":1212,"bán":1642,"fek":1857,"fel":10451,"fej":4036,"fia":1202,"ező":2970,"fal":3069,"faj":8821,"etű":3115,"ető":6086,"ezé":5371,"etü":1875,"esü":1885,"ez ":6988,"eté":6862,"erű":1796,"ezd":1515,"erő":1876,"evé":4056,"ezt":2772,"eze":13658,"ezh":1979,"ezi":1930,"etb":3254,"eta":1302,"ete":14739,"eti":8705,"etl":3489,"etk":2537,"est":5914,"ess":3758,"elő":7242,"esz":10684,"epü":3424,"etr":2609,"ets":2876,"ett":18422,"etv":2394,"erá":1228,"eve":8161,"eré":4519,"eur":1117,"esí":1394,"erü":9565,"epe":2041,"epi":1162,"er ":12986,"es ":29789,"elü":5526,"epl":1338,"elé":3820,"erk":1552,"eri":12722,"erj":2568,"erg":1768,"ere":20225,"erc":2570,"erd":1315,"era":2862,"erb":2020,"et ":26086,"emé":3302,"esi":1683,"ese":5978,"erz":2655,"erv":3920,"err":1968,"ert":6946,"ers":4641,"ern":2834,"erm":4064,"ené":1314,"ero":1595,"eki":1276,"egé":3370,"ekt":2496,"en ":52180,"ela":1439,"ele":27299,"eli":3526,"elj":2109,"elh":1530,"ehé":1275,"elm":4378,"eln":2146,"elk":2863,"ell":9606,"elo":1349,"elv":9038,"els":6778,"elt":4823,"ely":23022,"emb":4386,"ema":1264,"eme":6876,"eml":2503,"emi":1303,"emp":1725,"emz":4591,"ene":6376,"eng":4545,"end":20330,"enc":3429,"ejé":1183,"enn":3397,"enl":1598,"eni":2643,"ens":3074,"ent":14391,"eny":3762,"egk":1751,"egj":2290,"egn":2926,"ege":4339,"egf":1828,"egi":1596,"egh":1485,"egr":1473,"egs":1610,"egt":1312,"egy":47388,"edé":2069,"ehe":2302,"ek ":33389,"ein":1947,"el ":15987,"eit":1126,"ejt":1562,"ejl":2010,"eje":2098,"eke":7178,"ekb":1825,"em ":9006,"ött":6080,"gje":1874,"öss":4328,"gja":1441,"git":1317,"gia":2402,"ört":3402,"örz":1373,"öná":1325,"gha":1393,"ös ":1742,"gi ":5691,"ör ":1444,"gen":2834,"get":4617,"ger":3980,"ges":2578,"gel":2023,"öny":3378,"gek":1625,"önb":1254,"ge ":3243,"gbe":1509,"öld":3665,"gaz":3489,"gba":3483,"ölt":2040,"gas":2172,"gar":1902,"gat":6541,"gal":3993,"ga ":3541,"ök ":2555,"ög ":1983,"ból":6420,"fra":1911,"özö":5568,"özé":2985,"for":12637,"fon":1546,"fol":3847,"özi":2766,"öze":2220,"özs":4673,"özp":1522,"özl":1118,"övé":1547,"fog":4106,"özt":1797,"örü":1396,"fil":2271,"fin":1712,"övi":1986,"öve":4144,"örö":5177,"da ":3729,"de ":4718,"dal":6673,"dae":8117,"dat":1797,"das":1500,"dar":6693,"dap":1398,"cti":1331,"csá":1388,"ciá":1338,"ció":4016,"cs ":2427,"öbb":3612,"cse":2576,"csa":18273,"cso":5464,"csi":2762,"csk":1549,"cea":1428,"ch ":1376,"cer":1367,"cen":1114,"ci ":1364,"cha":1323,"cia":3115,"ck ":1275,"che":1523,"chi":1879,"ed ":1845,"ebb":4696,"ebe":1132,"eae":1455,"ea ":1332,"efo":1148,"ei ":4004,"ega":1438,"edi":4113,"ede":6411,"eg ":8600,"ech":1220,"ect":1158,"ecs":1616,"dor":1576,"don":3362,"dom":3952,"dol":1610,"dot":2319,"djá":9950,"djé":9456,"dsz":3860,"dul":1851,"dt ":1182,"dia":1805,"der":2519,"des":1303,"det":5300,"dez":2835,"del":4174,"den":3777,"di ":3532,"dja":1819,"din":2017,"dig":1984,"dik":7071,"rga":1547,"ri ":7426,"rgi":1214,"rge":1233,"ret":2640,"res":5836,"óta":1799,"rfi":1520,"red":5622,"ósz":1200,"reg":2954,"rem":1197,"ren":20766,"rek":3056,"rel":1449,"nál":7068,"rep":4197,"rde":1294,"ós ":2694,"re ":12803,"rci":1283,"rce":1814,"ópa":1701,"rd ":1700,"ras":2085,"rat":3432,"rba":1762,"rbe":1412,"raj":1914,"rai":1551,"rag":1242,"ran":7745,"ram":2669,"ral":2852,"rak":6278,"rab":2185,"rad":2745,"rac":1357,"rs ":1186,"ror":2542,"ros":13308,"rot":1270,"rom":4552,"ron":3143,"rop":1359,"roz":4272,"rov":1366,"rod":2730,"roc":1647,"rol":2608,"rok":2835,"rog":1810,"rny":1765,"rna":1947,"rne":1337,"rni":1311,"nél":1390,"ném":3320,"nép":2342,"név":7964,"rma":4453,"rme":7442,"riá":1542,"nég":1166,"rla":1110,"rke":1521,"rje":2570,"rit":2732,"ris":3358,"ril":1221,"rik":5031,"rin":5644,"ria":3879,"ric":1848,"rid":1666,"rie":1106,"rif":2877,"rk ":1195,"rsé":1351,"rtá":1222,"rté":4693,"rul":1250,"rus":2059,"rva":1143,"rve":3364,"ry ":1179,"rsa":2647,"rse":2444,"rsz":11249,"rta":3812,"rtj":2461,"rto":16676,"rte":3860,"rti":1302,"óba":1208,"rmé":1800,"rmá":4220,"rt ":8024,"rre":1962,"rra":1492,"sab":1704,"sai":1775,"sak":2444,"sal":14426,"óma":1626,"sba":3589,"sap":1951,"san":2277,"ójá":1365,"sat":1742,"sas":1771,"sa ":7871,"óko":1362,"ók ":2577,"ól ":13016,"ója":2231,"rvá":1234,"rze":1233,"ógi":2210,"ói ":1956,"növ":1656,"rzs":1300,"rvé":1214,"si ":8670,"sid":1199,"sil":1988,"sik":1599,"rző":1316,"se ":7438,"ser":4397,"set":1567,"seb":2714,"sen":5233,"sem":1195,"sel":3187,"sek":2815,"spo":1141,"spa":1306,"sol":2274,"son":4142,"sop":2367,"sor":6863,"sod":2739,"sok":5478,"st ":5212,"sle":1305,"sko":1582,"ska":1477,"ske":1576,"sme":3505,"ssé":2608,"ssá":1740,"sz ":8870,"lőt":1770,"lős":2401,"stá":1316,"sza":14173,"svá":1185,"sze":35971,"szo":9131,"szt":27632,"szu":2442,"szi":9598,"szl":6370,"szk":3277,"szn":4368,"sse":3446,"ssa":2570,"ssz":9449,"ste":4883,"sta":3478,"sto":1355,"sti":2035,"str":2018,"sug":1404,"lő ":3629,"sré":1266,"tai":1258,"tak":4038,"tal":17363,"tag":2404,"tbe":3293,"tba":1669,"tat":5289,"tar":19006,"tan":4747,"te ":9032,"ta ":15756,"szá":22866,"szü":3008,"szé":7434,"szí":4720,"szö":3515,"szó":5026,"ozó":12847,"ozá":2383,"kúa":5586,"pa ":1509,"pcs":1390,"par":3913,"pat":2664,"kül":3938,"pai":1203,"pap":1164,"pan":2344,"pha":1287,"láb":2286,"ped":1503,"lád":12468,"lán":2463,"pen":1323,"per":4376,"lát":1961,"pes":3470,"lás":5210,"lág":4159,"pel":1327,"lál":4812,"plo":1196,"pia":1507,"por":4386,"pos":1197,"pon":4538,"pol":3552,"lét":2548,"lés":5210,"lén":1237,"lék":10125,"lég":1202,"pjá":1227,"psz":1291,"plő":1147,"ló ":6853,"pte":2465,"lít":2551,"pri":1456,"pre":1161,"pro":3235,"lós":1760,"lóg":2572,"pus":1929,"lód":1195,"lön":2431,"lül":5325,"már":3617,"más":7074,"mán":10674,"máj":1466,"még":1185,"mét":1869,"mér":2829,"més":1202,"mél":2519,"mén":4256,"mít":1327,"mód":1366,"ra ":14404,"ngo":3206,"ni ":7437,"nge":3949,"ngy":1650,"jáh":2978,"neg":1502,"nel":1635,"nek":17344,"ják":1953,"ján":4452,"nem":11176,"jár":7148,"ner":1420,"net":3267,"ját":5567,"nes":2093,"nev":9590,"ng ":3420,"jáb":8030,"ncs":2007,"nci":3666,"nce":2318,"ne ":3513,"ndr":1401,"nds":3385,"ndo":2353,"ndj":9787,"ndi":2220,"nde":6999,"nda":1524,"nak":21124,"nal":2777,"nap":2250,"nae":1236,"nag":5845,"nai":2208,"nd ":4041,"nba":1457,"nat":2233,"na ":6605,"mze":3907,"iós":1792,"iój":1245,"nyb":1400,"nya":6957,"nye":12400,"nyi":4978,"nté":5969,"ny ":9196,"nul":2608,"nus":1717,"nty":1140,"nto":3218,"nti":4462,"ntj":1553,"nta":2379,"nte":5991,"nsz":1210,"nse":1158,"nt ":15590,"ns ":1607,"nká":1105,"nok":4021,"nop":1219,"nos":3762,"nov":1101,"nne":2064,"nna":2413,"nni":1136,"nny":1741,"niá":1146,"jéb":6198,"jén":1719,"jéh":3083,"nle":1203,"no ":1255,"nka":1134,"nid":1544,"nic":1428,"nia":2714,"nk ":1863,"nis":2046,"nik":2308,"ogr":1697,"ogl":2068,"oga":3293,"ogy":4073,"ok ":16218,"ol ":3707,"ock":1291,"ode":1450,"odi":3096,"odo":1217,"of ":1189,"oda":2951,"kál":1176,"kán":1252,"káb":1589,"nyí":1276,"obb":3582,"nyo":6087,"nyv":3444,"nyu":2805,"nyt":1370,"nys":1658,"ntő":1470,"jú ":1653,"nyá":1816,"nyé":1280,"oz ":6136,"osí":1185,"köv":1585,"köt":1840,"ozt":1266,"köz":23037,"kön":3656,"köl":1449,"kör":4099,"ozo":1583,"köd":1357,"ová":8369,"ozi":2582,"oza":5292,"ott":13732,"oto":1925,"osz":17179,"ost":2095,"ítő":1100,"oss":3713,"oso":1195,"orú":1625,"osá":2471,"ovi":1122,"ova":1834,"ove":1321,"orá":3688,"íté":2953,"opo":3410,"olá":1371,"ító":2222,"os ":17347,"oló":3495,"opt":1890,"or ":6588,"ítá":3043,"orm":10619,"orn":2184,"oro":8033,"orr":2134,"ord":2929,"oná":1361,"ore":1109,"org":1652,"ori":4752,"osa":4828,"ort":5009,"ors":11126,"orv":1840,"omá":8665,"ot ":4277,"orb":1551,"ora":2591,"ízi":1186,"íto":2971,"ola":3942,"old":2725,"olc":1133,"on ":14829,"oli":4956,"oll":1909,"olg":2986,"ols":1101,"olt":11581,"oln":1426,"olo":2069,"oly":9282,"olu":1943,"okb":1937,"oka":3243,"om ":4999,"okk":1292,"okr":1131,"oks":2384,"íte":4142,"oko":2728,"ív ":1366,"okt":1201,"író":2841,"ona":4624,"ond":2206,"one":1152,"ong":1637,"oni":3479,"ono":2799,"ons":1750,"ont":8687,"ony":4114,"oma":1728,"omb":2831,"omi":1564,"omm":1333,"kék":1183,"kép":5158,"kén":3827,"írá":2059,"omo":2114,"két":4231,"kés":3743,"ímű":2569,"la ":5399,"le ":4396,"lcs":2466,"írt":1125,"lda":1565,"ldi":1183,"lab":1923,"lad":1437,"lag":3296,"laj":1625,"lal":2932,"lak":15098,"lan":5737,"lam":7155,"lap":6224,"lat":10133,"las":4338,"ld ":2188,"lbu":2257,"dő ":2164,"kus":4984,"kul":4809,"ksá":2221,"ksz":1790,"gó ":1820,"gör":2727,"llí":1578,"llá":2364,"lló":2888,"lon":1455,"lom":5872,"lor":1462,"log":1806,"los":2605,"lot":1131,"lov":8327,"lne":1797,"ljá":1491,"lmi":2811,"lme":1597,"lma":4921,"lna":1780,"lto":3300,"lsz":1378,"lta":8440,"lte":3310,"lu ":1887,"lmé":1286,"lre":1816,"lt ":21002,"ldá":1177,"lha":4212,"li ":5160,"ház":2853,"lev":1937,"les":5249,"let":23881,"hár":2058,"lep":3720,"lem":6723,"len":17272,"lek":3977,"lel":1659,"leh":1502,"leg":14221,"háb":1477,"lla":10657,"lle":8630,"lli":2652,"llo":1404,"lko":4418,"lka":2131,"lke":2520,"lgá":2219,"lje":1805,"ll ":3110,"lja":2062,"lit":2107,"lis":6935,"lin":2766,"lim":1567,"lid":1319,"lia":3789,"lik":2243,"lyó":1710,"ma ":5513,"mai":2750,"maj":1342,"mad":7309,"mag":9145,"mar":2289,"mas":1406,"mal":2064,"man":2348,"maz":4614,"mat":4172,"mba":2545,"mbe":4662,"me ":1765,"meg":18261,"iáb":6342,"met":6641,"mes":7852,"mer":9128,"mel":20048,"men":4193,"mek":1223,"mez":3626,"lva":1622,"lve":3172,"lul":1401,"lus":1938,"ly ":11120,"lvt":1957,"ltá":1280,"lsó":1363,"lya":5925,"lyb":1133,"lyi":2296,"lye":12093,"lté":1277,"lvá":1133,"lyo":1466,"lyn":2379,"lys":1166,"lyt":1183,"lső":6342,"lyá":9265,"mpi":1413,"mpl":1595,"mon":2735,"mok":3026,"mos":2616,"mot":1595,"moz":1966,"ió ":2881,"mus":1737,"mut":1302,"mun":2084,"mi ":6215,"min":10577,"mil":1543,"mit":2000,"mia":1836,"mik":1748,"mma":1863,"tő ":4350,"től":2706,"tős":1505,"tőn":1924,"sű ":1297,"zná":3356,"zt ":3494,"víz":2022,"zte":5806,"zti":1839,"zta":2495,"ztr":2410,"zto":1911,"zsi":1572,"ső ":5513,"zul":1547,"ztá":11872,"zté":2691,"rű ":1401,"zsé":5276,"zza":1374,"zga":2049,"zi ":4058,"zhe":2186,"zet":19059,"zes":1480,"zen":10515,"ván":2981,"zem":3342,"zel":4337,"vál":6881,"zek":2284,"vák":7162,"vár":8676,"zer":19180,"ze ":4791,"zda":1164,"zab":2585,"zad":1388,"zak":6902,"zal":1294,"zat":8108,"zot":2710,"zor":2657,"zom":1281,"zon":5431,"zok":2301,"zol":2432,"zpo":1664,"vén":5253,"véd":2112,"vég":2913,"vét":1848,"vés":2023,"zió":1200,"zke":1119,"zle":1674,"zlo":7817,"zig":3735,"zin":2369,"zil":1212,"zik":7225,"zis":1133,"yve":2842,"yug":2984,"ysz":2663,"yok":1279,"yom":2043,"yol":1936,"yos":2298,"yob":2167,"za ":3143,"ről":1883,"ysé":2805,"ye ":3860,"yei":1140,"yek":4530,"yed":1362,"yes":5416,"yer":1837,"yen":2702,"yel":9386,"yez":2468,"yet":5968,"ya ":3038,"yag":1568,"ybe":1927,"yba":1297,"yar":8873,"yan":3850,"tül":2293,"yal":1350,"yak":3842,"yai":1283,"yhá":1178,"yne":2282,"yi ":5728,"yik":7147,"tív":1267,"téz":2544,"tér":3350,"tét":2097,"tés":9077,"tén":6927,"tél":1509,"ték":6622,"töb":3419,"tör":6044,"tó ":7302,"tól":4069,"tán":3887,"tár":7602,"tás":11209,"táv":1125,"ták":3376,"tál":11084,"táj":1226,"sú ":1586,"sül":2209,"só ":1339,"sök":1579,"ség":20101,"sén":1506,"sér":1918,"sét":1189,"sít":5115,"sár":2700,"sát":1861,"sán":1607,"ság":11788,"sáb":4560,"rög":2754,"rök":1818,"röv":1876,"rúg":1325,"rül":11029,"ró ":3203,"vtu":1747,"ról":2801,"róp":2151,"vir":1492,"vil":4652,"vid":4003,"vis":3046,"réb":2703,"rég":2911,"rés":10041,"vol":10664,"von":3245,"vi ":1300,"vez":9822,"ver":7753,"rás":8381,"ves":3105,"vet":5802,"veg":1557,"rág":1665,"ven":5494,"rán":5049,"rál":4428,"vel":6375,"vek":3167,"ráb":2450,"ve ":7296,"val":9128,"van":3020,"var":2245,"vat":2280,"vas":2577,"vad":1432,"vag":11793,"va ":3234,"műv":2529,"műk":1274,"nős":2336,"utó":1685,"utá":2191,"női":1248,"nő ":1717,"mű ":3511,"uró":2193,"usz":5860,"ust":1128,"uta":3446,"uto":1342,"us ":17567,"ura":2400,"urg":1418,"uri":1391,"ulá":1430,"uma":1476,"umb":1113,"unk":2047,"uni":1375,"um ":4397,"ult":3281,"ula":2897,"uk ":1640,"ul ":10317,"uga":4002,"uda":1985,"udo":3577,"pül":4756,"tvá":1162,"trá":1500,"tve":3654,"tur":1775,"tus":2989,"tul":1430,"tum":1120,"tud":4114,"tté":1199,"ttá":1546,"tsé":3581,"pít":3633,"tre":3344,"tt ":30966,"tra":4155,"tri":3477,"tro":2690,"tsz":2410,"tta":4727,"tte":6235,"tti":1842,"to ":1211,"tjá":2955,"tos":4461,"tot":7575,"tkö":1448,"toz":15230,"tom":3113,"ton":3995,"tok":4029,"tol":3082,"tor":5700,"til":2521,"tik":5482,"tis":1923,"tin":5413,"tio":2102,"tiz":1109,"tja":3013,"tke":1731,"tla":1565,"tle":4022,"tem":4106,"pán":1462,"ten":6944,"tei":1486,"tek":7221,"pál":1428,"tel":10568,"teg":2075,"th ":1102,"tet":11629,"tes":7803,"ter":19863,"pár":2273,"ti ":14986,"the":2658,"tha":1854,"ző ":4019,"zöt":4004,"zöv":1986,"zör":1244,"zül":3298,"zás":4765,"zár":4252,"zám":6211,"zál":1821,"zág":9532,"záz":1557,"yüt":2463,"zó ":13810,"vű ":1466,"zít":1657,"zín":2823,"zép":2406,"zér":1246,"zés":6463,"zén":2337,"zél":1670,"zék":2328,"ülö":2566,"ütt":2514,"ülő":1827,"yáb":1633,"yán":7191,"yár":2315,"tű ":2810,"vő ":1785,"yó ":1102,"yéb":2439},"n_words":[10929783,12338513,8457220],"name":"hu"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/id b/nlp_resource_data/langdetect/profiles/id

new file mode 100755 (executable)

index 0000000..578fd6c
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/id
@@ -0,0 +1 @@
+{"freq":{"D":29898,"E":10468,"F":12839,"G":16234,"A":46756,"B":45618,"C":22345,"L":19113,"M":41589,"N":19093,"O":9096,"H":16882,"I":48750,"J":24390,"K":57099,"U":11179,"T":39498,"W":10090,"V":6089,"Q":1321,"P":55685,"S":65231,"R":21435,"Y":7072,"X":1468,"Z":2501,"f":36925,"g":365165,"d":485064,"e":840847,"b":246328,"c":70473,"a":2100570,"n":996656,"o":305761,"l":430448,"m":362974,"j":64260,"k":378208,"h":280863,"i":864649,"w":50622,"v":31442,"u":508685,"t":522270,"s":482635,"r":589018,"q":2068,"p":245252,"z":10934,"y":153007,"x":3585,"é":2192," l":29162," m":98895," n":17327," o":22384," h":14754," i":38006," j":15556," k":88933," d":239066," e":8087," f":10185," g":12086," a":116958," b":88325," c":8643," y":62811," z":1713," u":21958," t":81597," w":11107," v":3008," p":104408," s":135289," r":13775," J":23985," K":55803," H":16124," I":42065," N":16611," O":7318," L":17821," M":38172," B":43424," C":19640," A":41294," F":11657," G":14895," D":28065," E":8736," Z":2373," Y":6861," S":61440," R":19599," Q":1212," P":52451," W":9492," V":4735," U":10169," T":36300,"ا":1474,"A ":3175,"Da":7353,"Co":3999,"Ce":1371,"Ch":3483,"Ci":2915,"Du":2494,"Do":1602,"De":6798,"Di":6852,"Fe":1983,"Fa":1382,"Er":1543,"Ge":3292,"Ga":2686,"I ":4993,"Fr":1829,"Fo":1312,"Fi":2806,"C ":1897,"Au":2055,"Ar":4506,"As":2577,"D ":1256,"Ba":19721,"Ag":2188,"Ab":1488,"Ac":1714,"Ad":1447,"Am":5332,"An":4093,"Ap":1629,"Al":4784,"Bu":4215,"Br":3076,"Ca":4296,"Bi":2492,"Be":8472,"Bo":3000,"Ku":3549,"Kr":1685,"Ko":9825,"Le":3207,"Li":3131,"La":6311,"Lu":1669,"Lo":2427,"Me":8886,"Mi":3917,"Ma":15663,"Mu":4166,"Mo":3510,"Ni":1382,"Ne":3573,"Na":5130,"P ":1286,"Nu":1142,"No":3476,"Ok":1174,"Gr":2085,"Go":1818,"Gu":2419,"Ha":5613,"He":2039,"II":2521,"Hi":2733,"Ho":2454,"Hu":2080,"Ib":1304,"Ia":3224,"In":24134,"Is":3276,"It":2124,"Ir":1207,"Ja":10872,"Je":6536,"Jo":2100,"Ju":3132,"Ka":22425,"M ":1852,"Ki":1891,"Ke":13666,"Ut":2746,"Un":3136,"Tu":2495,"Tr":1946,"To":2609,"Th":2978,"Ti":8403,"Te":9113,"Ta":6615,"V ":1165,"Sy":1338,"St":3734,"Su":10264,"Wo":1285,"Wi":2707,"Wa":3162,"We":1219,"Vi":1573,"Pu":4020,"Pr":10851,"S ":1994,"Pe":15767,"Pa":12458,"Po":3505,"Pi":2342,"Or":1349,"Se":18572,"Si":5355,"Sh":2006,"Sp":1564,"So":3044,"Ru":2528,"Sa":8331,"Re":4622,"Ri":1937,"SM":1268,"Ro":3385,"T ":1420,"Ra":5456,"b ":5402,"a ":312256,"Ya":1618,"Yo":2051,"Yu":2161,"i ":261146,"ge":17252,"ga":100134,"fi":10254,"fr":1949,"fu":1399,"ft":1686,"fo":4136,"he":8947,"ha":50757,"gn":2069,"gl":1807,"gk":15524,"gi":27056,"gh":4457,"gg":24987,"gu":19428,"gs":5762,"gr":8256,"go":7739,"du":22038,"dy":1437,"g ":127207,"ea":8168,"eb":53877,"ec":18556,"ed":14792,"de":45859,"dd":1120,"di":143405,"do":23309,"ds":1430,"dr":3663,"ew":5523,"eu":3102,"ev":3178,"ey":2387,"fa":4277,"h ":162608,"fe":4040,"eh":16895,"eg":14415,"ef":2165,"ee":3361,"el":59121,"ek":27553,"ej":8227,"ei":5982,"ep":21232,"eo":11093,"en":149316,"em":59371,"et":34760,"es":60149,"er":212475,"ca":27397,"e ":42795,"br":3158,"bu":59966,"bo":6681,"bl":3420,"bi":20522,"be":67748,"da":219838,"f ":6725,"cu":3577,"ct":1798,"co":4648,"ck":2674,"ci":8787,"ch":7724,"ce":8123,"c ":1919,"az":1822,"ay":22914,"ba":74854,"d ":16186,"at":148965,"as":93999,"ar":156214,"aw":19383,"av":2545,"au":33011,"ak":85647,"al":170145,"ai":50563,"aj":9897,"ap":26307,"am":87049,"an":494871,"ac":7825,"ad":111448,"aa":15819,"ab":26392,"ag":30377,"ah":179811,"ae":6490,"af":4082,"nu":12849,"nt":61715,"ns":21631,"no":11352,"nn":7780,"nz":1307,"ny":39684,"oe":1902,"of":3903,"oc":2742,"od":8904,"oa":2254,"ob":5836,"om":19983,"on":61017,"ok":11631,"ol":35874,"oi":1595,"og":9163,"oh":4041,"ot":23779,"os":12530,"ov":13798,"ou":5529,"op":9093,"oo":2776,"or":39625,"r ":75373,"ow":2582,"oy":1610,"pe":68224,"pa":90070,"pl":2952,"po":12602,"ph":1712,"pi":13857,"lo":16018,"ln":1524,"lm":6776,"ll":7645,"ls":1781,"lu":24730,"lt":3200,"ly":1874,"o ":23320,"ma":94460,"mb":26120,"me":93044,"ml":1630,"mi":26777,"mn":2215,"mm":2111,"mp":24012,"mo":8478,"mu":28574,"p ":11473,"na":77075,"nc":11437,"nd":56574,"ne":39080,"nf":2387,"ng":238792,"ni":56068,"nj":13796,"nk":3871,"ju":15959,"jo":2256,"ki":23132,"kh":4590,"ke":56774,"ka":127350,"m ":50860,"ky":1397,"ks":9835,"kt":10395,"ku":22809,"ko":27878,"kr":3523,"kk":1592,"kl":3754,"km":2765,"kn":3265,"li":60829,"lk":2913,"le":44927,"ld":2217,"la":192799,"lb":1849,"n ":334938,"hr":1702,"ht":1424,"hu":19954,"hk":1872,"hi":19755,"hn":2129,"ho":5306,"hl":1668,"id":19549,"ic":8689,"ib":13081,"ia":81624,"ih":9019,"ig":10843,"if":6720,"ie":7742,"k ":77693,"ir":31102,"is":68271,"it":40678,"iu":5028,"iv":4491,"iw":2077,"ij":2865,"ik":61148,"il":48904,"im":30225,"in":117362,"io":16681,"ip":11977,"je":5678,"ji":4743,"iz":1367,"iy":1200,"l ":56560,"ja":34513,"z ":1416,"wi":10306,"wo":1229,"y ":11456,"wa":31269,"we":3398,"vi":15930,"vo":1681,"ve":8269,"va":3932,"x ":1658,"ui":4586,"uj":4080,"uk":44283,"ul":29290,"ue":2818,"uf":1241,"ug":8297,"uh":9753,"ur":39636,"us":36462,"ut":30979,"um":25849,"un":84334,"up":32836,"ty":2279,"tu":61107,"tt":3054,"ub":9153,"ua":56784,"ud":13238,"uc":2268,"w ":1917,"to":20692,"tn":2005,"tl":1402,"ts":2489,"tr":20074,"te":90779,"tk":2568,"ti":55141,"th":5635,"ta":176003,"su":25989,"ss":5417,"st":32797,"sy":2246,"sw":1219,"sl":3160,"sk":6158,"sn":2055,"sm":3761,"sp":3394,"so":6557,"sc":1962,"se":100046,"sh":4276,"si":103815,"u ":69102,"sa":100996,"rr":2177,"rs":13384,"rt":29454,"ru":41105,"rv":1161,"rw":1931,"ry":4130,"rp":3731,"ro":34136,"rn":12088,"rm":13211,"rl":14368,"rk":15615,"rj":5203,"ri":102805,"rh":3147,"rg":10415,"rf":1249,"re":30519,"rd":11856,"rc":2704,"rb":14366,"ra":143028,"t ":74952,"qu":1325,"s ":74416,"pt":3193,"pu":23123,"pr":13816,"ps":1371,"zi":2204,"za":3196,"ye":4611,"ya":121025,"yu":3182,"ys":1566,"yo":3138,"yi":2638,"一":3113," Ga":2665," Ge":3275," Fo":1303," Fr":1822," Fi":2802," Ha":5588," He":2029," Go":1803," Gr":2062," Gu":2405," Ib":1298," Ia":3220," Hu":2077," Ho":2444," II":1738," Hi":2728," Je":6511," Ja":10843," Ir":1206," Is":3262," It":2123," In":24109," Ka":22393," Ke":13629," Ki":1879," Jo":2086," Ju":3129," La":6277," Le":3183," Li":3097," Ko":9805," Kr":1679," Ku":3543," Ma":15603," Mi":3888," Me":8848," Lo":2414," Lu":1657," Ne":3562," Na":5110," Ni":1377," Mo":3486," Mu":4147," Ap":1628," Am":5319," An":4070," Al":4777," Ag":2185," Ac":1711," Ad":1437," Ab":1471," Ba":19679," Au":2049," As":2567," Ar":4490," Be":8434," Bi":2483," Bo":2979," Br":3065," Bu":4209," Ca":4213," Ce":1368," Ci":2897," Ch":3462," Co":3963," Da":7319," Di":6832," De":6775," Do":1546," Du":2487," Er":1536," Fe":1977," Fa":1366," Wo":1267," Wi":2689," We":1211," Wa":3147," Yu":2156," Yo":2043," Ya":1612," Or":1346," Po":3467," Pi":2336," Pe":15730," Pa":12421," Nu":1139," No":3465," Ok":1169," Ra":5409," SM":1160," Ro":3363," Re":4599," Ri":1930," Pr":10827," Pu":4013," Sy":1332," Su":10248," St":3681," Ta":6595," Th":2952," Ti":8388," Te":9090," Tr":1942," To":2586," Ru":2526," Sa":8312," Sh":1982," Si":5333," Se":18549," So":3026," Sp":1555," Vi":1551," Tu":2469," Un":3130," Ut":2746," ja":5269," je":2335," in":24841," il":1562," is":2175," it":1708," ka":17469," kh":1422," ki":3127," ke":41380," ju":6965," ha":6291," gr":1294," gu":1601," ib":1921," ia":3559," hi":4380," hu":2034," ne":5231," na":8812," mu":5887," mo":2739," ol":12623," of":1811," no":1685," le":5736," li":3289," la":14587," ku":3200," km":2201," kl":1852," ko":17290," me":68772," mi":4388," ma":16460," lu":3991," lo":1345," ag":1308," ab":1554," ad":60411," an":11749," ap":1720," ak":5080," al":4506," aw":1213," ar":2749," at":18641," as":3479," ba":29062," bi":8314," be":42050," bo":2101," bu":6066," ca":3227," en":1364," ek":1536," fo":1399," fi":5527," ge":4483," ga":3124," co":1499," ce":1281," da":93500," do":1270," de":28323," di":109723," du":4523," za":1414," ya":62479," ru":1822," sa":30188," se":78628," si":6482," sp":1184," so":1196," ra":4660," re":4944," ro":1171," pu":5843," pr":10257," or":4567," pe":52401," pa":26959," po":5413," pi":2294," wa":3549," wi":6744," tu":4514," ut":3268," um":2172," un":12469," ta":18998," st":3288," su":11745," tr":2305," to":2153," th":1170," ti":9886," te":42313,"Fil":1716,"Ger":1344,"II ":2070,"Han":1227,"Har":1306,"Ing":3276,"Int":1699,"Ind":17352,"Ia ":3146,"Ara":1730,"Aus":1437,"Bah":2306,"Bal":1493,"Ban":5008,"Bar":4958,"Bat":1605,"Agu":1534,"Ame":4085,"Ang":1356,"Ber":2116,"Ben":1538,"Bel":2649,"Bri":1173,"Des":2124,"Dal":2000,"Cha":1289,"Dia":1644,"Neg":1421,"Nov":1144,"Per":8357,"Pem":1634,"Pen":3260,"Pas":1289,"Par":2408,"Pad":2035,"Pan":1988,"Pul":1756,"Pro":7786,"Pre":1253,"Pol":1113,"Ita":1993,"Isl":1234,"Jan":1394,"Jak":1458,"Jep":2416,"Jer":2596,"Jaw":4820,"Jul":1161,"Kab":8059,"Kal":5302,"Kan":1250,"Kat":1399,"Kar":2399,"Ker":1724,"Kep":1438,"Kel":1127,"Kec":4407,"Kon":1421,"Kom":1142,"Kor":1136,"Kot":4453,"Lau":1252,"Men":2499,"Mer":1254,"Man":2093,"Mal":2386,"Mar":3752,"Mas":1454,"Mus":1132,"Yun":1506,"一一":1315,"Sur":1395,"Sum":1934,"Sul":1400,"Sun":2471,"Sta":2065,"Ten":4508,"Tan":2075,"Sin":1312,"Ser":4461,"Sep":1413,"Sel":5571,"Sem":1247,"Seb":1132,"Rus":1267,"Sam":1292,"San":2004,"Rep":1522,"Rom":1304,"Uni":2252,"Uta":2484,"Ter":1521,"The":1859,"Tim":5598,"bis":1615,"bit":1641,"bil":2430,"bin":3990,"bih":2863,"bli":2292,"bol":2158,"ban":15219,"bak":1477,"bal":2565,"bai":2045,"bag":18039,"bah":11866,"bad":2660,"baw":2021,"bat":5802,"bas":2119,"bar":6280,"beb":2207,"bed":1156,"ber":44372,"ben":5468,"bel":6123,"bek":1139,"bes":5623,"bia":3415,"bid":1129,"ca ":1339,"car":5372,"can":2310,"cam":11583,"ce ":2031,"bu ":2451,"bua":22699,"bup":9962,"bur":1967,"bul":1642,"buk":3601,"bun":4326,"bum":1823,"buh":2607,"but":6527,"aka":36392,"am ":27176,"aki":3783,"akh":2140,"al ":32136,"aja":7514,"aik":1989,"ain":9801,"air":2314,"ais":1591,"ait":2311,"ak ":26644,"ahk":1403,"ahi":5150,"ahu":13410,"aha":23711,"agi":8426,"agu":2010,"anu":3628,"any":11108,"ano":1219,"ann":4127,"ant":18067,"ans":3241,"ane":1770,"ang":133187,"ani":10218,"anj":5111,"ank":1863,"ap ":5145,"ana":17620,"anc":4861,"and":16448,"amu":2163,"amp":6554,"ami":2968,"ame":2230,"amb":4802,"ama":38387,"alu":3924,"alo":1291,"all":1612,"ali":20004,"ale":3013,"ala":101814,"an ":259764,"aks":3177,"aku":4198,"akt":4736,"aba":6181,"abi":1719,"abu":12017,"ae ":1556,"aca":2365,"aan":12880,"aat":2166,"ad ":3820,"ab ":3122,"afi":1142,"ai ":28231,"aga":15818,"age":1445,"aer":3528,"ah ":131776,"adi":10057,"ade":2164,"ach":1161,"ace":1304,"ada":91074,"ayu":1260,"aya":17559,"ba ":1212,"at ":42706,"arg":2616,"are":4682,"ard":2694,"ara":50200,"aro":1477,"arn":2327,"arl":1195,"ark":4616,"ari":39797,"aru":4864,"ars":1240,"art":10017,"au ":21880,"asa":28749,"ary":2056,"asi":28272,"ase":1504,"ask":2058,"ar ":23543,"apa":12599,"api":3283,"apu":2004,"as ":18260,"aut":2942,"ay ":1252,"awa":16458,"awi":1328,"ata":52964,"asu":4212,"ast":4229,"ass":1429,"asy":1183,"atk":1321,"ato":2873,"ate":14253,"ati":9780,"aua":1165,"atu":20191,"aup":1527,"aus":1167,"jen":2753,"jad":5570,"jaa":2051,"jab":1150,"jar":5491,"jal":2507,"jak":2749,"jan":3425,"jo ":1155,"ito":1897,"itu":7300,"ism":1635,"isu":1798,"ist":11957,"ita":13383,"ite":4451,"iti":3900,"iwa":1676,"ium":1147,"iun":2068,"ivi":1396,"ive":2266,"ipu":1250,"ipi":1541,"is ":23018,"ion":11007,"ipa":2509,"ipe":2501,"ir ":10536,"irk":1299,"iri":9503,"isi":11065,"ise":5218,"isa":7947,"ire":1684,"ira":4533,"it ":5451,"ja ":5112,"kim":1495,"kil":1164,"kin":2100,"kir":1475,"kis":1425,"kit":4346,"km ":1129,"ki ":8986,"khi":1575,"keb":2854,"kec":8512,"ked":2012,"kek":1191,"kem":4126,"kel":7010,"ken":6683,"kep":3726,"kes":1631,"ker":5150,"ket":3912,"ke ":5970,"ksa":1691,"ku ":4682,"kot":9086,"kon":4219,"kom":5415,"kol":2400,"koh":1184,"ks ":1167,"kny":1410,"kka":1328,"ko ":1260,"kla":1828,"juk":1809,"jun":1767,"jum":1474,"jua":1961,"jug":4990,"kaw":1325,"kat":13174,"kar":11177,"kas":4851,"kap":2757,"kan":62967,"kal":5122,"kam":1705,"kai":2738,"kad":1254,"kab":2943,"ka ":12884,"ha ":2059,"ham":2420,"han":14832,"hak":1134,"hal":1973,"har":6037,"has":12725,"hat":1841,"haa":1690,"had":1765,"he ":2996,"her":1241,"hi ":1844,"hid":1896,"hin":3929,"hir":6378,"hka":1709,"go ":1252,"gku":1576,"gor":1284,"got":2192,"gsa":3115,"gu ":1706,"gra":2940,"gri":3408,"gur":1410,"gus":2168,"gun":9840,"iam":1372,"ial":6166,"ian":16893,"iap":1176,"ias":3614,"iat":1370,"ibi":1510,"ibu":4263,"id ":2050,"iba":3456,"ibe":2500,"ia ":44404,"ier":1178,"ies":1321,"ifi":1731,"ih ":5574,"ich":1544,"ie ":1390,"ica":1838,"idu":1993,"idi":3417,"ide":3199,"ida":6661,"if ":2797,"il ":7886,"ija":1503,"im ":3888,"ika":23689,"iga":3238,"igu":3825,"iha":2592,"ik ":17419,"imp":2775,"ime":1812,"imi":1994,"ind":4767,"ina":9693,"imu":7350,"inn":1767,"ino":1354,"int":9752,"ins":11171,"ine":2698,"ing":27890,"ini":22825,"ink":1199,"iny":3792,"iko":1474,"iki":7614,"ike":5441,"ila":14804,"in ":16335,"iku":2328,"ill":2052,"ilm":5734,"ili":13211,"ile":1187,"ima":9934,"io ":1907,"hny":1246,"hub":1262,"hun":11971,"hus":1117,"fat":1257,"eta":14851,"ete":4068,"eti":4646,"est":3135,"esu":1622,"ess":1214,"etu":1676,"evi":1298,"ey ":1761,"ewa":3433,"epe":3912,"er ":19709,"epa":9668,"eor":7717,"es ":8727,"ept":1302,"epu":3274,"erk":7438,"erl":11827,"eri":25792,"erj":4499,"erg":3829,"erh":2837,"ere":6493,"erc":1371,"erd":7020,"era":35580,"erb":12574,"et ":5262,"esi":21745,"ese":3800,"esa":15867,"eru":21470,"ert":16167,"ers":10880,"ern":7132,"erm":8938,"erp":2943,"ero":2183,"eki":2304,"ekn":1342,"eko":2677,"eks":2683,"ekt":2574,"eku":2083,"en ":21818,"ela":27699,"ele":4883,"eli":4636,"ell":1619,"elo":2885,"elu":8272,"emb":14950,"ema":7624,"eme":9102,"emo":1229,"emi":9424,"emu":4943,"emp":7479,"ene":6504,"eng":44211,"ena":14758,"end":12541,"enc":3844,"eni":5826,"enj":6735,"enu":4939,"ens":2961,"ent":14866,"eny":6205,"ege":2018,"egi":2598,"ehi":1472,"ek ":3218,"ein":1306,"eja":5845,"el ":6722,"eke":1577,"eka":8278,"em ":3699,"gka":11656,"git":1200,"gin":2402,"gio":1293,"gia":7505,"gha":1865,"ggu":3578,"ggr":3634,"ggo":2263,"ggi":3715,"gga":11266,"gi ":10809,"gen":4341,"ger":4633,"gem":1548,"gel":2369,"ge ":1412,"gah":5405,"gai":13025,"gas":1879,"gar":10391,"gat":2080,"gam":3520,"gal":5628,"gan":35620,"gap":1320,"ga ":16084,"for":2821,"fil":3873,"fik":2255,"fis":1267,"da ":32556,"de ":5057,"dak":4551,"dal":74713,"dah":2729,"dae":2768,"dat":2016,"das":2265,"dar":32215,"dap":6287,"dan":52470,"dam":1256,"day":1876,"ch ":1540,"cer":1324,"cha":1339,"ck ":1406,"che":1327,"chi":1177,"cil":1660,"cis":2553,"ed ":1578,"eba":14092,"ebe":8142,"ebi":3337,"ebr":1132,"ebu":26049,"ea ":1655,"ei ":1901,"ega":7504,"eh ":13927,"edi":3728,"ede":1589,"eda":4173,"edu":2208,"eci":1667,"eca":14054,"dus":1247,"don":16622,"dok":1141,"dun":4005,"dup":1272,"dul":1434,"duk":5272,"dua":3278,"dud":2657,"dra":1669,"du ":1319,"did":3706,"dia":10670,"dib":6007,"der":4755,"des":7802,"del":1249,"dek":2240,"den":19332,"dem":1173,"di ":63731,"do ":1904,"dim":2715,"din":3800,"dio":1486,"dip":4666,"dir":8308,"dis":11623,"dit":5425,"dig":4149,"dik":8070,"dil":3331,"rha":2148,"rga":5059,"ri ":42938,"rge":1577,"ret":2887,"res":4698,"rg ":1147,"rea":2388,"rej":1691,"ren":4953,"rek":3031,"rda":4163,"rdi":3109,"re ":2800,"rbu":1584,"raw":1318,"rd ":2133,"rap":3154,"rar":1749,"ras":8094,"rat":11399,"rbi":1817,"rba":6850,"rbe":3317,"raj":2995,"rai":2568,"rah":9741,"rag":1442,"ran":36962,"ram":3611,"ral":3814,"rak":7284,"rab":2762,"raa":1221,"raf":1467,"rad":6359,"rs ":1283,"rpe":1250,"ros":2826,"rot":1395,"rom":1135,"ron":3571,"rop":2512,"rov":9832,"rod":2363,"rog":1358,"rny":1495,"rna":5736,"rmu":1260,"ro ":1992,"rma":9740,"rle":9267,"rla":2281,"rn ":1378,"rki":1226,"rke":4730,"rka":5975,"rja":3509,"rip":1275,"rio":1652,"rit":4490,"ris":9168,"ril":2736,"rik":14926,"rin":9510,"rim":1709,"ria":5936,"rib":1211,"ric":1356,"rid":1193,"rie":1621,"rk ":1641,"rya":1832,"ruh":2011,"rup":16158,"run":3050,"rum":2131,"ruk":1365,"rus":4704,"rut":3005,"ry ":1765,"rsi":3955,"rsa":2467,"rse":3931,"rta":11752,"rte":3710,"rti":7667,"rua":2111,"rtu":2198,"rt ":1478,"ru ":3013,"saa":3516,"sah":3766,"sai":1253,"sak":1265,"sal":14489,"sam":5305,"san":11536,"sat":14608,"sas":2791,"sar":11521,"saw":1247,"sa ":26535,"shi":1399,"si ":37996,"sid":1884,"sia":23497,"sit":3002,"siu":2036,"sir":1424,"sis":6271,"sin":5946,"sio":5312,"sil":3164,"sim":1558,"sik":5796,"sih":1154,"sif":1869,"se ":1750,"ser":7000,"ses":3161,"set":3675,"seh":1426,"sed":1550,"sec":2609,"seb":38986,"sep":5282,"seo":6619,"sen":4315,"sem":4262,"sel":6726,"sek":6032,"sej":4109,"spe":1482,"son":1660,"st ":1847,"ss ":1345,"sli":1139,"sla":1693,"ska":2806,"smi":1231,"sme":1407,"sya":1486,"ssa":1575,"ste":6115,"sta":5899,"sto":1411,"sti":6848,"stu":1610,"str":8471,"sua":5405,"sum":1276,"suk":5658,"sun":2892,"sut":1776,"sus":2249,"sur":1552,"tai":3534,"tak":12396,"tal":4630,"tah":16201,"tab":1684,"tau":16309,"tat":1881,"tas":11773,"tar":15390,"tap":2003,"tan":48487,"tam":7817,"te ":2719,"ta ":30047,"pa ":5126,"par":4662,"pat":19772,"pas":2458,"pad":19421,"pak":16990,"pal":3741,"pai":3130,"pan":11608,"pi ":3245,"pen":20640,"pem":8687,"per":28237,"pes":2547,"pel":2885,"pil":1386,"pin":3678,"pis":1169,"por":1652,"pop":1746,"pos":1337,"pon":1531,"pok":1819,"pol":2768,"pua":1330,"pub":1942,"pte":1271,"pri":2203,"pre":1413,"pro":9219,"pur":2238,"pus":1843,"put":3676,"pun":4895,"pul":5693,"ra ":33891,"ngo":1466,"ngi":5434,"ngk":15500,"ngu":4341,"ngs":5639,"ni ":27725,"nge":5034,"ngg":24626,"ngh":3138,"nga":45848,"neg":5658,"nen":1469,"ner":3870,"net":1984,"nes":16997,"ng ":124434,"nci":3629,"nce":1815,"nca":3309,"ne ":3872,"ndu":6040,"ndr":1343,"ndo":17880,"ndi":8610,"nde":4365,"nda":13200,"ncu":1191,"nak":8053,"nal":12498,"nam":10027,"nan":12711,"nar":2327,"nah":2814,"nai":1146,"nd ":3909,"nat":2083,"nas":5218,"na ":14205,"nya":32061,"nye":2679,"nyi":2079,"nul":1194,"nun":2161,"nus":2178,"nur":1832,"nua":1507,"nto":2680,"ntu":18006,"ntr":1617,"nti":5215,"nta":22900,"nte":7428,"nst":1796,"nse":1390,"nsi":12613,"nt ":2342,"ns ":1443,"nol":1247,"nom":2318,"nny":5115,"no ":2934,"nka":1820,"nja":9105,"nju":2843,"nia":6702,"niv":1591,"nis":7959,"nit":2077,"nin":2190,"nik":2713,"ogr":1789,"ogi":3408,"ok ":3549,"ol ":2833,"ode":3528,"of ":1858,"odu":2149,"oh ":1516,"obe":1842,"ote":1332,"oto":2549,"ota":16038,"osi":2975,"ose":2201,"oso":1148,"ovi":10349,"ove":2391,"oun":1267,"ope":1909,"opa":1279,"os ":1902,"opu":1750,"or ":7039,"orm":2622,"oro":1416,"ord":1550,"ore":2472,"org":2231,"ori":3198,"ort":1972,"ora":11685,"ola":5593,"on ":12856,"oli":4593,"ole":13717,"olo":5388,"oka":1673,"om ":1677,"oko":1983,"ona":7292,"ond":2230,"one":17406,"ong":6397,"oni":2889,"ono":3029,"ons":2868,"ont":3055,"oma":3069,"ome":2094,"omb":1474,"omi":2626,"omp":4769,"omo":1445,"omu":1988,"la ":8248,"le ":2787,"lah":85384,"lag":1642,"lai":7338,"lal":2146,"lak":4119,"lan":18336,"lam":23788,"lap":1829,"lar":2452,"lat":9794,"las":5560,"law":2074,"lau":6152,"lay":9124,"kut":2035,"kus":1523,"kur":1949,"kup":1223,"kun":2321,"kum":1981,"kul":1651,"kuk":1717,"kte":1325,"ksi":4460,"kua":1980,"ktr":1281,"ktu":2650,"kti":1994,"kto":2651,"lok":1231,"lon":2362,"lom":2722,"log":4305,"lny":1353,"lmu":1446,"lua":4860,"lta":1301,"lu ":3267,"li ":7633,"lev":1141,"les":2673,"let":9284,"ler":1546,"lem":2195,"len":2677,"lek":2289,"leh":12733,"leb":3217,"lla":1957,"lle":1326,"lli":1227,"lka":1991,"lm ":4360,"ll ":1780,"lit":5426,"lis":7079,"lir":1515,"lip":1370,"lin":6938,"lim":6542,"lia":5362,"lik":11324,"lih":1781,"ma ":21495,"mah":2164,"mai":3887,"mak":3420,"mad":1850,"mar":3160,"mas":10730,"mal":1793,"man":22564,"mat":17173,"mba":10647,"mbi":2114,"mbe":6808,"me ":2693,"mbu":4891,"med":1539,"met":2294,"mes":1768,"mer":26378,"mem":15519,"mel":4982,"men":35186,"luk":2200,"lui":1193,"lun":1652,"lum":2455,"lus":1551,"lur":4662,"mpi":3325,"mpe":2325,"mpo":2760,"mpu":6558,"mod":1185,"mon":1666,"mor":1409,"mpa":7594,"mu ":1626,"mud":2039,"mua":1660,"mur":7325,"mus":3149,"muk":2006,"mul":2774,"mum":2269,"mun":4790,"mi ":4545,"min":4316,"mil":8646,"mis":1673,"mit":1227,"mia":1700,"mik":1662,"mla":1505,"mny":1911,"zam":1194,"yu ":1225,"ya ":33584,"yat":1924,"yar":2043,"yan":64564,"yak":4682,"yah":7474,"yai":2823,"yi ":1127,"wi ":1323,"wil":6167,"wa ":9713,"wan":5957,"wal":2130,"wak":2165,"wat":1765,"war":3553,"was":1762,"wah":1890,"vin":9845,"vis":1830,"ver":3522,"usi":7004,"use":1166,"usa":6560,"usu":2654,"ust":3983,"uti":2003,"ute":2083,"uta":7869,"utu":1851,"utr":2712,"us ":12050,"ut ":11892,"ura":9315,"uri":1909,"urk":1245,"uru":7051,"uny":2077,"upa":26147,"ur ":13684,"upu":1725,"ump":1843,"umu":2414,"umi":1414,"uml":1462,"umn":1617,"uma":4069,"umb":3461,"ume":1307,"unt":12867,"unu":2087,"uni":6942,"unc":1388,"und":1967,"una":10560,"ung":21765,"une":1335,"up ":3437,"uks":1606,"uku":6493,"uko":1586,"uki":1626,"um ":7458,"uka":7728,"uju":3230,"ulu":3880,"ult":1653,"uli":4656,"ula":12496,"un ":20290,"uk ":22268,"ul ":3263,"ui ":2114,"uga":6118,"uha":3495,"uda":4275,"udi":2630,"ubu":2471,"uh ":4643,"udu":3551,"ua ":5771,"uat":8034,"uas":4449,"uar":6223,"ual":1917,"uan":9121,"ubl":2137,"uba":1945,"uah":19677,"ty ":1866,"tur":4927,"tus":3183,"tuj":1345,"tul":1791,"tuk":15174,"tun":2681,"tum":1823,"tua":3005,"ts ":1124,"tra":8844,"tri":6158,"tru":1229,"tro":2826,"tu ":22458,"to ":3427,"tny":1162,"tob":1155,"ton":2911,"tok":1669,"tol":2048,"tor":4842,"til":2355,"tik":7507,"tif":2658,"tig":1707,"tit":1315,"tis":2474,"tin":8673,"tim":3584,"tio":3164,"tia":2861,"tid":3087,"tiv":1124,"tka":2458,"tem":9574,"ten":16531,"tek":2322,"tel":6089,"th ":1540,"tet":1414,"tes":1167,"ter":45895,"ti ":10357,"the":1811},"n_words":[11077227,12709440,9643042],"name":"id"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/it b/nlp_resource_data/langdetect/profiles/it

new file mode 100755 (executable)

index 0000000..4ba4b40
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/it
@@ -0,0 +1 @@
+{"freq":{"D":94947,"E":67243,"F":92398,"G":104535,"A":191905,"B":119758,"C":221153,"L":203562,"M":160653,"N":84931,"O":53123,"H":49392,"I":191375,"J":30427,"K":33649,"U":59673,"T":99599,"W":33788,"V":71164,"Q":12325,"P":151391,"S":231227,"R":109065,"Y":11206,"X":16332,"Z":10278,"f":518611,"g":920085,"d":2363397,"e":6056669,"b":494596,"c":2150346,"a":6155041,"n":4141668,"o":4513622,"l":3766393,"m":1338197,"j":22860,"k":110896,"h":452585,"i":5976536,"w":49843,"v":585518,"u":1754682,"t":3725316,"s":2419415,"r":3264659,"q":101625,"p":1281861,"z":475996,"y":114701,"x":30592,"È":18809,"ì":9651,"é":22484,"è":321778,"à":93588,"ù":30198,"ò":27447,"ó":6194," l":332142," m":240472," n":389032," o":184207," h":31309," i":549255," k":13980," d":1574321," e":446281," f":277424," g":138336," a":703335," b":75718," c":722915," z":6698," u":458470," t":223758," v":125009," q":64842," p":570240," s":706865," r":244819," J":29715," K":31685," H":46792," I":159850," N":78119," O":44164," L":197779," M":152789," B":113710," C":207226," A":159909," F":86857," G":100079," D":87417," E":59714," Z":9584," Y":10426," X":11662," S":213852," R":103083," Q":11829," P":141839," W":31372," V":61312," U":56839," T":91377," è":311053," È":18792,"A ":27808,"Da":15987,"Cu":5620,"Cl":7833,"Co":63300,"Cr":10676,"Ce":10902,"Ch":29704,"Ci":11685,"Du":6024,"Do":13966,"De":15564,"Di":21598,"Fe":11756,"Fa":10362,"Eu":7860,"Es":8695,"En":5820,"El":6080,"Ge":15850,"Ga":14081,"I ":31564,"Fu":8170,"Fr":19816,"Fo":12748,"Fi":15192,"C ":10682,"Au":9085,"Ar":21848,"As":8912,"D ":5691,"Ba":27783,"Am":10014,"An":18633,"Al":34245,"Bu":8230,"Br":18440,"Ca":56504,"Bi":8237,"Be":19580,"Bo":19802,"Le":25820,"Li":19778,"La":88640,"Lu":9671,"Lo":25268,"Me":19948,"Mi":23733,"Ma":59219,"Mu":9761,"Mo":30287,"Ni":7514,"Ne":23857,"Na":17893,"No":19720,"Gi":22185,"Gl":5829,"Gr":18092,"Go":9531,"Gu":8537,"Ha":17031,"He":8149,"II":13036,"Ho":8979,"In":30727,"Il":66084,"Is":9021,"It":12730,"Ja":8255,"L ":27703,"Jo":9534,"Ka":7944,"Un":28071,"Tr":15672,"To":14627,"Th":17010,"Ti":6315,"Te":15797,"Ta":10754,"UA":13360,"V ":7192,"St":32757,"Su":12977,"Wi":7930,"Wa":7819,"Vo":7254,"Vi":20021,"Va":13663,"Ve":14211,"Pu":5612,"Pr":25455,"S ":9118,"Pe":16282,"Pa":38366,"Po":20730,"Pi":22494,"Or":10767,"Se":20797,"Sc":24538,"Si":21736,"Sh":7322,"Sp":9077,"So":16973,"Ru":7444,"Sa":46373,"Re":30166,"Ri":14281,"Ro":32671,"Qu":10556,"Ra":11841,"b ":11012,"a ":2254795,"i ":1483483,"ge":88624,"ga":67666,"fl":8575,"ff":35291,"fi":134642,"fr":78002,"fu":41990,"fo":66122,"he":195889,"ha":63814,"gn":70158,"gl":120822,"gi":234393,"gh":24919,"gg":70910,"gu":55229,"gr":73387,"go":71768,"du":52294,"g ":29202,"ea":91863,"eb":24191,"ec":137759,"ed":128701,"de":782039,"dd":10103,"di":854362,"do":146993,"ds":9108,"dr":32135,"ew":7817,"ex":7461,"eu":18301,"ev":52077,"ey":12656,"ez":26509,"fa":71893,"h ":27064,"fe":57833,"eg":172770,"ef":19354,"ee":16955,"el":899715,"ei":89786,"ep":27874,"eo":41068,"en":625515,"em":132418,"et":253878,"es":434221,"er":718824,"eq":6537,"ca":399283,"e ":2094441,"br":59095,"bu":40562,"bo":29867,"bl":42370,"bi":141440,"bb":48744,"be":43765,"da":306546,"f ":15395,"cu":62362,"ct":11024,"cq":7195,"cr":59094,"co":609419,"ck":21526,"cl":55423,"ci":319342,"ch":241963,"ce":223557,"cc":104089,"c ":16295,"az":131617,"ay":15144,"ba":66599,"d ":145378,"at":664795,"as":217940,"ar":494405,"av":77540,"au":53901,"ak":11813,"al":679169,"ai":64110,"ap":84421,"am":172413,"an":700812,"ac":102427,"ad":97495,"ab":100158,"ag":138596,"ah":7533,"ae":26244,"af":30789,"nu":38735,"nt":634979,"ns":87660,"nq":7056,"no":374486,"nn":92022,"nz":76063,"ny":7751,"nv":12211,"oe":12640,"of":30817,"oc":106098,"od":71766,"oa":9353,"ob":27798,"om":291133,"on":807692,"ok":6829,"ol":302572,"oi":49498,"og":89760,"oh":6209,"ot":113582,"os":173480,"ov":109964,"ou":37230,"op":117448,"oo":15155,"or":452213,"r ":177153,"ow":12656,"oz":6935,"oy":5914,"pe":242170,"pa":234524,"pl":24851,"po":213425,"ph":7578,"pi":128997,"lo":210541,"lm":44463,"ll":700194,"ls":12139,"lp":10018,"lv":11056,"lu":64908,"lt":90822,"ly":7335,"o ":1645460,"ma":263312,"mb":52388,"me":350394,"mi":170562,"mm":42029,"mp":106005,"mo":153835,"mu":105465,"p ":14900,"iù":27078,"na":431794,"nc":198192,"nd":220346,"ne":784634,"nf":25150,"ng":109071,"ni":335162,"nk":7990,"ki":12045,"ke":15323,"ka":11574,"m ":76534,"ko":6777,"km":10394,"li":478088,"le":450199,"ld":17670,"lg":10033,"lf":7624,"la":677771,"lc":22009,"lb":26576,"n ":671873,"hr":6559,"ht":9126,"hu":11107,"hi":93831,"hn":5725,"ho":18964,"id":103779,"ic":450726,"ib":38087,"ia":461965,"ig":105629,"if":52751,"ie":183937,"k ":32183,"ir":101293,"is":299704,"it":487446,"iu":44781,"iv":129892,"ik":7413,"il":319428,"im":199167,"in":697560,"io":541230,"ip":107291,"iz":100179,"l ":908043,"ja":6741,"z ":9054,"tà":85596,"wi":6023,"vv":10000,"y ":65822,"wa":12380,"we":6401,"vi":166809,"vo":78376,"uz":20664,"ve":175153,"va":133125,"x ":19811,"ui":79181,"ul":72491,"ue":88946,"uf":10300,"ug":23193,"ur":134766,"us":100555,"ut":118876,"um":71437,"un":561799,"uo":58100,"up":47527,"ty":10365,"tu":164890,"tt":381279,"ub":48232,"ua":156844,"ud":43002,"uc":34894,"w ":10150,"to":743121,"tl":8532,"ts":9120,"tr":295588,"te":580573,"ti":577841,"th":33664,"ta":698330,"su":137066,"sv":16162,"ss":213977,"st":447357,"sl":9152,"sk":10657,"sm":16619,"sp":87152,"so":218207,"sc":157523,"sf":9364,"se":363584,"sh":16912,"si":401810,"rz":16932,"u ":59520,"sa":125213,"rr":72120,"rs":82412,"rt":207644,"ru":63955,"rv":21724,"ry":12041,"rp":15121,"ro":352219,"rn":64837,"rm":75961,"rl":22967,"rk":10623,"ri":614338,"rg":44544,"rf":10278,"re":622304,"rd":75640,"rc":66009,"rb":30426,"ra":583297,"t ":103375,"qu":99430,"s ":156468,"pu":55098,"pp":84071,"pr":255723,"ps":6191,"zz":76375,"zi":221598,"ze":20937,"za":99515,"zo":37143,"ya":7485,"È ":18782,"à ":92132,"ò ":26518,"ì ":8878,"é ":9885,"è ":314074,"ù ":29830," Ga":14003," Ge":15739," I ":16475," Fo":12451," Fu":8160," Fr":19775," Fi":15103," Ha":16981," He":8121," Go":9484," Gr":17959," Gu":8468," Gi":22095," Gl":5798," Ho":8931," L ":24925," Ja":8214," Is":8984," It":12700," In":30290," Il":65842," Ka":7887," Jo":9477," La":88189," Le":25663," Li":19618," Ma":58913," Mi":23636," Me":19838," Lo":25207," Lu":9633," Ne":23724," Na":17829," Ni":7466," Mo":30189," Mu":9681," A ":5979," Am":9971," An":18493," Al":34065," Ba":27690," Au":9040," As":8840," Ar":21718," Be":19500," Bi":8121," Bo":19624," Br":18373," Bu":8165," Ca":56136," Ce":10868," Ci":11600," Ch":29611," Cl":7713," Cr":10596," Co":62928," Da":15272," Di":21504," De":15426," Do":13718," Du":5966," El":6042," Es":8654," En":5749," Eu":7835," Fe":11726," Fa":10272," Wi":7870," Wa":7761," a ":110030," Or":10710," Po":20608," Pi":22451," Pe":16220," Pa":38161," No":19629," Ra":11758," Qu":10458," Ro":32569," Re":30042," Ri":14247," Pr":25353," Su":12949," St":32098," Ta":10678," UA":13306," Th":16944," Ti":6253," Te":15675," Tr":15562," To":14480," Ru":7431," Sa":46303," Sh":7238," Si":21606," Sc":24393," Se":20659," So":16887," Sp":9003," Va":13628," Ve":14047," Vi":19885," Vo":7231," Un":27982," l ":52713," im":22658," in":295683," il":151179," is":14605," it":18185," ha":23853," gi":32407," gl":14007," gr":40025," go":5702," gu":9712," id":6931," ne":275511," na":25351," mu":20050," mo":56127," ol":7856," om":7514," og":8277," oc":9930," of":10734," nu":14604," no":68303," le":52423," li":35936," la":143866," km":9911," me":51316," mi":29028," o ":42065," ma":81024," lu":16900," lo":28809," af":7222," ag":11338," ab":64448," ac":16193," ad":25196," am":20786," an":81988," ap":29511," ai":8950," al":183776," av":17618," au":20712," ar":36060," at":28661," as":31346," d ":20488," ba":32317," bi":8838," be":7889," bo":6391," br":13522," ca":114268," e ":219384," er":25454," et":12895," es":52932," en":13079," ep":5911," el":14730," fe":18710," fa":56978," fu":35844," fr":63957," fo":40785," fi":57936," ge":26299," ga":8284," i ":33110," cl":12326," co":356836," cr":19933," ce":24853," ch":106561," ci":55235," da":211997," cu":28570," do":30811," de":613533," di":670575," ec":32925," ed":39893," du":23486," ru":12186," sa":17824," se":114506," sc":42709," si":147221," sp":37866," so":80656," qu":64678," ra":31475," re":103663," ri":75848," ro":21093," pu":36089," pr":197319," os":7051," ot":10340," ov":5976," op":14803," or":49602," pe":114303," pa":91709," po":66694," pi":52800," va":18746," ve":35685," vo":17500," vi":51221," tu":14428," us":12554," ut":9880," un":408903," ul":5818," ta":13154," st":115114," sv":15397," su":116897," tr":76246," to":12684," th":12060," ti":18079," te":75278," È ":18766," è ":310990,"Eur":6532,"Gio":7152,"Fra":14090,"For":6200,"II ":9095,"Gra":9265,"Int":6283,"In ":9661,"Il ":64107,"Bas":6130,"Alt":6625,"Cal":8462,"Cam":7263,"Cas":9838,"Car":10034,"Can":7902,"Chi":9585,"Cen":5742,"Cha":12533,"Cor":7549,"Com":13539,"Col":6543,"Con":19924,"Dis":7107,"Nel":12164,"Nor":11147,"Per":7527,"Par":12134,"Pro":9921,"Pre":7483,"Que":6463,"Ita":11783,"Le ":10050,"La ":72068,"Man":6976,"Mar":24416,"Mon":12589,"Sta":17125,"UA ":13253,"Si ":7499,"Sai":6635,"Sco":15601,"San":19570,"Reg":7644,"Rom":11444,"Ven":6082,"Val":6880,"Uni":19062,"The":12364,"Tra":6212,"bit":77836,"bil":24167,"bli":33195,"bor":7965,"bbl":30259,"bbe":6273,"be ":7531,"bbr":5663,"ban":14724,"bal":6018,"bat":9698,"bas":13208,"bar":7268,"ber":18766,"bia":7881,"ca ":152253,"car":46789,"cas":12364,"cat":57544,"can":40911,"cap":15975,"caz":8224,"cam":17670,"cal":28832,"ce ":48205,"bri":12930,"bro":7660,"bra":15040,"bre":21272,"bum":18883,"am ":7918,"al ":120078,"ain":14219,"aio":8047,"agl":13391,"agg":47904,"agi":14545,"agn":23437,"ago":14318,"anz":27062,"ano":80778,"ann":47068,"ant":158501,"ans":9496,"ane":24020,"ang":15547,"ani":57213,"ana":44733,"anc":100934,"and":75292,"amm":16889,"amo":10418,"amp":23753,"ami":24192,"ame":55568,"amb":14366,"ama":14741,"alt":24272,"alo":9405,"alm":19827,"all":157126,"ali":107974,"alc":17413,"ald":5979,"ale":157930,"ala":17702,"alb":21166,"an ":45440,"abb":6484,"abi":71766,"abo":7042,"ae ":8196,"ad ":23773,"aff":9278,"afi":13627,"ai ":19697,"aga":10288,"age":5874,"aes":7300,"ado":12933,"adr":12192,"adi":16689,"ade":11907,"acq":6540,"aco":8070,"aci":10043,"ach":7116,"ace":12782,"acc":33357,"ada":10503,"acr":5776,"azi":114771,"azz":11554,"at ":6931,"arg":6155,"are":73118,"ard":28986,"arc":21226,"ara":44876,"aro":14834,"arn":6502,"arm":9201,"arl":10615,"ari":92056,"arr":17226,"ars":9758,"art":120284,"asa":12008,"asi":18399,"asc":31970,"ase":8737,"ar ":13924,"api":8043,"apo":15567,"app":40715,"as ":11094,"ava":20339,"aut":20581,"avo":14059,"avi":12649,"ave":19075,"ay ":7096,"avv":6341,"ata":122912,"ast":41964,"ass":66552,"atr":11363,"ato":251278,"ate":43614,"ati":96839,"att":98894,"atu":21242,"aur":8293,"aus":7205,"ito":68066,"itu":73079,"itt":66676,"ism":9170,"iso":15645,"isp":27507,"iss":18851,"ist":119446,"ita":148002,"ite":23677,"iti":31286,"ivo":23510,"ius":6126,"ium":7439,"iun":5847,"iut":11741,"iva":34645,"ivi":28519,"ive":42061,"ipo":9185,"ipi":8806,"is ":24623,"ion":300543,"ior":37852,"ios":7307,"ipa":67260,"ipe":7498,"iov":6740,"iro":6686,"iri":10262,"isi":24222,"ise":9930,"isc":25942,"isa":8063,"ire":38092,"ira":11739,"irc":16765,"ità":54075,"izz":49660,"izi":46425,"km ":6935,"ha ":20767,"ham":6367,"han":10716,"har":10403,"he ":154969,"het":5812,"her":11167,"hi ":17347,"hie":13497,"hia":17408,"hin":6740,"hil":8553,"hit":6455,"go ":24875,"gle":14019,"gli":101433,"gno":22948,"gni":13103,"gne":7169,"gna":23696,"gol":13409,"gon":12443,"gru":16613,"gra":38555,"gre":12522,"gui":11363,"gua":15610,"gue":16931,"gur":5768,"iam":18301,"ial":41395,"ian":67167,"ias":18220,"iar":13087,"iat":27828,"ic ":7518,"iac":6819,"ibi":9969,"ibr":7455,"ibu":6636,"iaz":6940,"ibe":7052,"ia ":244503,"iet":16601,"iem":9287,"ien":42565,"ier":26963,"ies":15028,"iff":8035,"ife":10392,"ifi":25651,"ico":88289,"ici":66289,"ich":34469,"icc":12587,"ice":31036,"ie ":52789,"ica":189725,"ido":7371,"idi":19231,"ide":49954,"ida":12753,"il ":154014,"igl":35255,"igh":7328,"igi":24518,"igu":7702,"ign":12535,"imo":36242,"imm":6781,"imp":19629,"ime":69205,"imi":17819,"inc":72864,"ind":26998,"ina":84297,"ino":48220,"int":56511,"ins":16076,"inf":11205,"ine":59357,"ing":57070,"ini":52386,"ioc":18011,"inv":7003,"ila":12536,"in ":183514,"ilo":8196,"ill":25371,"ilm":20062,"ili":45534,"ile":34997,"ima":41217,"io ":144991,"ilu":7192,"ffe":9164,"ffi":11476,"fes":8200,"fer":25051,"fia":6078,"fas":16462,"fat":6791,"far":5672,"fam":16618,"fan":7946,"età":10656,"ezz":10468,"ezi":12836,"eta":25147,"ete":11093,"eti":19239,"esp":10424,"eso":7266,"est":82383,"ess":82490,"eto":8361,"etr":19381,"ett":136219,"eve":12042,"eva":15707,"evo":6501,"evi":15209,"eur":6939,"ey ":9468,"er ":129403,"eor":7000,"es ":37108,"epu":6598,"eri":107300,"erg":9054,"ere":60624,"erf":7449,"erc":19250,"era":90400,"et ":14734,"equ":6340,"esi":40808,"esc":25003,"ese":123684,"esa":17222,"erz":5997,"erv":15974,"err":35138,"ert":35208,"ers":54254,"ern":33474,"erm":28661,"erp":7756,"ero":53515,"en ":21820,"ela":13747,"ele":33688,"eli":13565,"ell":470580,"elo":7068,"eo ":15585,"emb":16989,"ema":19885,"eme":23252,"emo":10570,"emi":32972,"emp":19288,"ene":64829,"eng":5768,"ena":16730,"end":39915,"enc":6461,"eno":18323,"enn":25781,"eni":17944,"enu":8919,"ens":28995,"ent":321381,"enz":41200,"egl":30932,"ego":7039,"egn":18128,"egg":11766,"egi":67882,"egu":13973,"el ":344686,"giu":12377,"gis":8322,"gin":21943,"gio":125636,"gic":7947,"gia":27457,"ght":6164,"ghi":6769,"ghe":8042,"ggi":57223,"gge":11031,"gi ":14154,"gen":34367,"get":10150,"ger":10735,"ge ":13833,"gar":8686,"gat":8038,"gan":16471,"ga ":14336,"fra":61392,"fu ":20614,"for":37768,"fon":16839,"fic":48822,"fig":8947,"fil":22511,"fin":27532,"da ":136024,"de ":72497,"dal":94565,"dai":6473,"dat":26742,"dar":7604,"dan":11831,"cun":8420,"cul":6650,"cui":21314,"cur":6089,"cla":11488,"cli":30380,"co ":105436,"cog":6308,"con":172605,"col":60224,"com":159257,"cor":35975,"cos":24601,"cop":26258,"cqu":7001,"cre":13257,"cra":6453,"cri":26431,"cro":12281,"cci":18510,"cch":12126,"cco":24015,"cca":13415,"cce":29193,"ch ":7853,"cer":13344,"ces":73387,"cen":46057,"cel":12841,"ced":6856,"ci ":29747,"cha":6310,"cia":75274,"ck ":12816,"cie":22050,"cid":13163,"che":152251,"chi":60026,"cil":6171,"cir":15709,"cis":9575,"cit":48155,"ciu":9832,"cin":18966,"cio":18635,"cip":33626,"ed ":34167,"ebb":8622,"ebr":5973,"ean":5957,"eal":11897,"eat":17904,"ea ":31185,"efi":7167,"ei ":71643,"ega":13314,"edi":40474,"ede":36599,"ecl":13991,"eci":22204,"ece":13292,"ecc":23316,"eca":6015,"ee ":7306,"eco":39931,"dur":10435,"dut":6584,"duz":6137,"dor":7110,"dop":9690,"don":12153,"dov":9865,"dot":18765,"ds ":7341,"due":12229,"dri":5744,"dra":9112,"dre":9661,"dro":6212,"dic":30670,"dia":36200,"der":31418,"des":28656,"det":11258,"dec":5661,"def":6138,"deg":19555,"dei":52282,"del":486810,"den":37113,"deo":6268,"di ":549365,"do ":64324,"div":20925,"diz":13866,"din":21333,"dio":28106,"dip":45197,"dir":19963,"dis":42122,"dit":11525,"die":7033,"dif":12635,"rga":12245,"ri ":84427,"rgi":7768,"rge":8891,"rgo":7492,"ret":51197,"res":79309,"rev":9081,"rfi":5771,"rds":5842,"rea":31808,"rec":19082,"red":9839,"reg":68923,"rem":13549,"ren":40494,"rel":11321,"rda":7163,"rdo":8984,"rdi":19945,"rde":7719,"re ":253820,"rco":11417,"rci":11839,"rch":14217,"rca":18883,"raz":24666,"rd ":22088,"rap":12296,"rar":11138,"ras":20760,"rat":94270,"rav":8469,"rbi":17348,"rai":8283,"rag":15270,"ran":117955,"ram":19333,"ral":33868,"raf":16450,"rad":21432,"rac":15856,"rpr":6371,"rs ":8921,"ros":16960,"rot":12347,"rom":18807,"ron":34271,"rop":27028,"rov":41355,"rod":22675,"roc":16930,"roi":16969,"rol":9837,"rof":8044,"rog":13121,"rno":15812,"rna":22371,"rne":10164,"rni":10544,"ro ":93036,"rma":38511,"rme":10556,"rmi":17565,"rla":7860,"riz":22621,"rio":45747,"rit":48367,"ris":50562,"riv":16323,"rig":24728,"ril":9910,"rin":43294,"rim":44423,"ria":61665,"rib":8788,"ric":75415,"rid":11323,"rie":41936,"rif":9551,"rk ":5699,"rup":18084,"rus":6787,"rut":6215,"rva":7812,"rvi":6676,"rve":5812,"ry ":9246,"rsi":24369,"rso":28714,"rsa":7860,"rse":7118,"rta":21350,"rto":33655,"rte":47947,"rti":80543,"rt ":10303,"rro":11867,"rri":17046,"rre":16669,"rra":22088,"sal":9289,"san":12698,"sat":15313,"sar":8168,"sa ":55092,"rzo":7427,"si ":104255,"siv":14099,"sie":10542,"sid":14077,"sic":27928,"sia":21469,"sit":73146,"sis":21720,"sin":20546,"sio":35066,"sil":9310,"sim":20855,"sig":13260,"scr":20145,"se ":150356,"sca":15235,"sce":19663,"sci":47092,"sch":12701,"sco":34425,"ser":44878,"ses":5630,"set":14434,"seg":23827,"sed":9878,"sec":24038,"sen":42117,"sem":26599,"spo":16816,"spe":38890,"spi":8159,"spa":13861,"sot":9626,"sol":26000,"son":52029,"sop":6399,"sor":13321,"soc":11137,"su ":15907,"st ":20759,"smo":8359,"so ":74484,"sse":52141,"ssa":30610,"sso":53075,"ssi":64743,"ssu":6194,"ste":66836,"sta":130887,"sto":49939,"sti":78547,"stu":12684,"str":82216,"sua":16483,"sud":8747,"suc":8338,"sul":27351,"sup":11429,"suo":21188,"sur":6713,"svi":7639,"svo":5978,"tal":71028,"tag":17728,"taz":13579,"tav":10674,"tat":99866,"tas":10040,"tar":29860,"tan":110605,"tam":12199,"te ":231097,"ta ":297475,"pa ":11956,"pe ":6955,"par":126674,"pat":12618,"pas":6581,"pag":17667,"pal":29532,"pan":6881,"pi ":12873,"pec":11403,"pen":11283,"per":160544,"pet":24231,"pes":8448,"pli":9280,"ple":7659,"pia":16405,"pic":12371,"pie":7531,"pin":8248,"pio":15425,"pir":5858,"pit":11446,"por":30452,"pop":13161,"pot":6913,"pos":33968,"poi":5808,"pon":18443,"pol":43083,"poc":5679,"ppr":9310,"ppi":6671,"ppo":29526,"ppa":24732,"ppe":7033,"po ":47633,"più":26636,"pub":29314,"pra":8794,"pri":70446,"pre":86755,"pro":88068,"put":6255,"pun":7305,"qua":45208,"que":36643,"qui":15702,"ra ":147867,"ngo":19829,"ngl":15051,"ngu":15413,"ni ":109423,"nge":12593,"ngh":7552,"nga":7243,"neg":12898,"nei":14330,"nel":251833,"nen":19286,"nem":6968,"ner":29386,"net":10607,"nes":22568,"ng ":19777,"nea":10437,"nfi":6288,"nco":13364,"nci":58066,"ncl":16907,"nce":63829,"nch":32511,"nca":8638,"ne ":381891,"ndu":5794,"ndr":9579,"ndo":51716,"ndi":44494,"nde":40952,"nda":41143,"nal":55844,"nam":6348,"nan":9718,"nar":20198,"nag":9973,"nd ":21375,"nat":56733,"nas":7852,"naz":16138,"na ":222844,"iù ":27049,"nve":7397,"num":9332,"nut":9355,"nto":104437,"ntr":54646,"nti":140368,"nta":97179,"nte":203922,"nso":6392,"nse":23034,"nsi":31575,"nt ":19040,"nqu":6994,"ns ":8742,"nol":12956,"nom":37789,"non":21278,"not":13854,"nos":16869,"nor":18667,"nov":9880,"nne":24481,"nna":15245,"nno":16579,"nni":28556,"no ":229968,"nif":7757,"nie":7902,"nic":33089,"nia":32535,"niz":16719,"niv":11731,"nis":28026,"nit":37389,"nio":10735,"nim":18502,"ogr":18460,"ogi":20692,"ogo":10374,"ogn":10473,"oge":8216,"ogg":8490,"oi ":11644,"oir":5807,"oid":15471,"ol ":6484,"oce":16535,"och":7177,"oci":18254,"ock":8603,"oco":10221,"oca":18569,"occ":17275,"ode":8882,"odi":13909,"odo":23349,"of ":9685,"oda":9043,"odu":9647,"obi":7836,"nza":33286,"nze":8647,"nzi":18694,"nzo":13784,"oti":8184,"ote":14816,"ott":46439,"oto":17159,"ost":54505,"ota":15242,"osi":22379,"ose":9147,"oss":25102,"oso":10625,"ovi":33581,"ova":26672,"ove":33842,"oun":6402,"our":8721,"opo":34311,"opp":9241,"ope":36127,"os ":8709,"opr":18087,"or ":15387,"orm":33668,"orn":18923,"oro":17981,"orr":13295,"ord":34705,"ore":83573,"org":19011,"ori":84933,"osa":10641,"osc":18331,"ort":43269,"ors":13700,"orb":16181,"ora":29450,"ola":56821,"on ":116999,"oli":48219,"oll":21894,"ole":18360,"olt":34437,"olo":80006,"olu":13657,"ona":73743,"ond":62359,"onc":11700,"onf":10972,"one":239015,"ong":9187,"oni":83803,"onn":10167,"ono":78642,"ons":27379,"ont":70151,"oma":36812,"ome":71129,"omb":8981,"omi":23159,"omm":10870,"omp":38351,"omo":18571,"omu":77250,"la ":502615,"le ":302428,"lci":6000,"lcu":7598,"lab":6177,"lac":10059,"lan":31109,"lam":6780,"lar":23739,"lat":27051,"las":21662,"lav":9746,"laz":14283,"ld ":6147,"lbu":19083,"lpi":7148,"lon":12253,"lom":6131,"lor":18602,"loc":10691,"log":22625,"los":5990,"lme":20869,"lti":14409,"lto":12474,"ltr":18612,"lta":21939,"lte":12298,"li ":126320,"lev":10337,"les":31571,"let":25767,"ler":10315,"lem":6948,"len":14630,"leg":16217,"lo ":116614,"lla":366876,"lle":95889,"lli":26880,"llo":51317,"lm ":18058,"ll ":147686,"lit":48006,"lis":18952,"lio":27570,"lin":48858,"lim":8378,"liz":23775,"liv":6284,"lic":53902,"lia":69213,"lib":8948,"lig":7530,"lie":14089,"ma ":77830,"mag":30019,"mar":19705,"mas":10652,"mal":8482,"man":52234,"maz":7709,"mat":39139,"mba":7166,"mbi":12940,"mbr":16743,"me ":86094,"med":14225,"met":29612,"mes":11952,"mer":34722,"mem":5945,"men":156332,"lup":7071,"luo":8412,"lun":8513,"lus":8431,"mpi":23542,"mpe":12689,"mpr":10131,"mpo":28943,"mpl":10446,"mod":11161,"mon":36802,"mol":12121,"mor":12412,"mos":9186,"mot":7018,"mpa":14258,"mus":15113,"mun":79273,"mi ":19370,"min":44712,"mil":12857,"mis":11697,"mit":12512,"mic":16950,"mia":19829,"mig":18385,"mo ":49530,"mmi":13753,"mma":13393,"mme":10561,"zzo":8684,"zza":55596,"zi ":8253,"ze ":11813,"zaz":5784,"zat":34965,"zon":11265,"zo ":22095,"zia":26147,"zie":6922,"zio":172644,"za ":45565,"tà ":85470,"vve":6371,"via":17063,"vil":14796,"vin":31306,"vic":6909,"vid":11446,"vie":12590,"viz":5900,"vit":13527,"vis":26147,"vo ":24368,"vol":30564,"vor":8888,"vi ":11655,"ver":60261,"ves":9627,"ven":39876,"vel":9674,"ve ":30393,"val":16987,"van":19808,"vam":6468,"var":11852,"vat":11790,"va ":54959,"uzi":17041,"usi":20123,"use":8740,"usc":8066,"usa":11990,"ust":15108,"uss":11959,"uti":17848,"ute":8898,"uta":15229,"utt":27857,"uto":37177,"us ":15875,"ura":49253,"ure":16806,"urg":6288,"uri":11168,"uro":17633,"uog":8121,"uol":7555,"uov":6581,"ur ":9581,"upe":12180,"upp":25887,"umb":6348,"ume":24760,"uo ":15893,"unt":14471,"uni":39506,"uno":18306,"una":118143,"ung":13147,"une":69547,"um ":26907,"ult":16466,"ull":18007,"ula":8409,"un ":268027,"uin":7055,"uis":8264,"uit":22856,"ul ":12226,"ui ":25799,"udi":18251,"ue ":29020,"ucc":13459,"uer":10140,"ues":18932,"uff":8520,"uen":11155,"uel":14803,"ua ":25910,"uat":61583,"uar":12747,"ual":28454,"uan":11940,"ubi":6952,"ubb":30175,"ud ":7866,"uad":8171,"ty ":9152,"tur":35100,"tut":19514,"tui":7703,"tun":9180,"tua":67153,"tud":14358,"ttà":16625,"tre":43645,"tra":110475,"tri":61377,"tru":14883,"tro":62451,"tta":55228,"tte":61167,"tti":63839,"tto":137927,"ttr":17103,"ttu":21997,"to ":551865,"tog":6135,"tos":6930,"tom":8273,"ton":24892,"tol":23856,"tor":90775,"til":16701,"tif":7476,"tie":11090,"tig":6347,"tir":6554,"tit":35734,"tis":12185,"tin":33317,"tim":54555,"tip":9392,"tio":18064,"tia":7439,"tic":92757,"tiz":5615,"tiv":40919,"tem":30137,"ten":60350,"tel":26672,"tea":13828,"tec":11962,"ted":10497,"th ":7445,"tes":26748,"ter":139440,"ti ":212534,"the":11405},"n_words":[55820958,65476626,49460182],"name":"it"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ja b/nlp_resource_data/langdetect/profiles/ja

new file mode 100755 (executable)

index 0000000..5a76401
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ja
@@ -0,0 +1 @@
+{"freq":{"é":1545,"и":1279,"а":1241," 『":8564," 』":1624," 。":2126," 、":3623,"あ":3435630,"。":214195,"、":312995,"　":2941,"々":3019,"』":21225,"『":21300,"」":29519,"「":29647,"〜":2758,"ア":2611969," あ":52272," ア":58543,"乱":13623,"九":1493,"乗":1644,"久":1407,"主":13646,"丼":69358,"中":24049,"両":2394,"並":1334,"丞":80623,"丕":5670,"世":14223,"丈":118136,"三":6461,"上":13976,"下":7225,"不":54571,"与":2254,"一":26873,"丁":71935,"万":1628,"任":2340,"以":6436,"令":1766,"代":19956,"他":3009,"付":3689,"人":34784,"交":21043,"京":8000,"五":1508,"井":2181,"争":3320,"予":2003,"事":34820,"二":5188,"使":59250,"住":2454,"位":7990,"作":23623,"何":10362,"体":13204,"佐":59544,"伊":1406,"企":2899,"会":27025,"伝":3991,"休":80474,"信":13921,"係":2046,"保":5439,"価":32733,"供":2235," 分":1406,"営":4802,"問":2693,"商":2606,"員":5415,"品":9370,"和":9812,"周":2042,"呼":9439,"命":2467,"味":3638,"含":3079,"名":22983,"同":12059,"吉":1498,"合":22154,"各":2831,"向":3485," 号":4482,"域":5560,"城":2545,"基":5538,"土":3313,"園":3175,"地":19745,"在":13117,"回":5170,"四":2207,"団":7524,"因":1258,"国":40277,"器":3147,"写":1418,"再":1714,"内":9686," 丞":3334," 世":5944," 丈":5457," 丁":2886,"処":1526,"優":2947,"共":7413,"具":1237,"入":4949,"全":10040,"八":1560,"公":10338,"児":1791,"党":2623,"元":7877,"光":3006,"先":1806,"催":4293,"倫":19778,"個":1623,"原":7882,"受":3764,"取":4325,"反":2686,"及":4976,"参":3018,"司":1601,"号":9329,"台":3682,"可":2330,"口":3431,"化":11592," 佐":1439,"区":8504,"医":2840,"南":6511,"協":4447,"博":1814,"単":3157,"千":2088,"十":1946,"半":2617,"劇":2223,"力":7128," 人":2934,"加":4595," 代":2394,"務":6021,"動":13267," 休":1652,"分":13117," 丼":3391,"初":7471,"別":4976,"利":4213,"制":7969,"則":1331,"前":8950,"創":2681,"崎":2237," 大":1534,"工":4508,"州":9754,"川":7871,"山":10018,"属":5936,"展":2319,"屋":2375,"局":4799,"少":2464,"小":30514,"導":2344,"将":1891,"専":2793,"島":7916,"岩":1342,"当":6576,"形":7101,"役":2410,"影":1589,"式":10389,"引":1979,"張":1336,"強":2073,"応":3170,"念":2270,"律":2571,"後":10708,"得":2357,"年":104829,"平":6377,"帝":2972,"布":1306,"常":2950,"師":2527,"建":3787,"店":2005,"庁":1327,"広":4783,"度":5771,"座":1332,"大":35199,"央":2180,"天":5593,"太":2851,"変":3794,"外":5603,"多":7893,"女":5796,"始":3033,"委":1623,"場":13501,"報":4192,"境":2223,"売":8204,"声":8316,"子":14064,"存":4270,"学":37448,"安":3733,"定":14058,"実":7367,"宗":1706,"宮":21279,"客":1436,"家":48873,"富":1327,"察":1679,"対":8681," 回":2984,"曲":6533,"書":6577,"曜":3792,"昭":3661,"映":4870,"星":2861,"時":14954,"果":2006,"査":2002,"木":3174,"本":36912,"朝":4495,"期":7726,"月":55829,"有":6525,"最":8063,"松":2561,"東":13912,"来":4168,"条":3471,"村":4202,"料":2840,"文":11450,"於":1575,"施":4063,"旅":1301,"族":3755,"旧":3190,"日":77379,"放":12785,"改":2906,"支":3198,"教":10820,"数":8554,"整":1308,"技":5374,"投":1259,"所":11171,"手":10767," 年":95958,"戦":13450,"戸":3149,"成":12592,"提":2781,"推":1511,"接":2250,"挙":1713,"持":4610,"指":8229,"情":3302,"急":1371,"感":1443,"愛":3173,"港":2061,"済":2489,"清":1263,"湾":1437,"源":1888,"演":3153,"気":4829,"民":7826,"水":5727,"江":2655,"決":2408,"河":1989,"治":7454,"波":1773,"派":2998,"活":6032,"流":4367,"浜":1603,"消":1291,"深":1258,"機":12320," 時":1629,"権":6128,"横":1498,"標":2070," 月":53298,"武":2753,"止":2016,"正":6725,"死":5205,"歌":3328,"次":4580,"欧":1451,"母":1932,"毎":2982,"比":1391,"殺":5325,"校":9373,"株":3423,"業":15026,"楽":6757,"植":1547,"検":1870,"構":5270," 日":47110,"様":2976,"石":2879,"知":6637,"県":18156,"省":2816,"着":1339,"皇":3000,"的":19048,"目":10668,"直":2224,"白":1686,"発":20771,"登":4069,"病":1859,"症":1381,"町":8545,"画":10139,"田":7594,"由":2916,"用":19757,"産":5875,"生":16324,"番":8301,"略":5228,"界":7422,"環":2235,"理":11169,"球":4459,"現":13270,"王":7473,"独":3082,"状":2926,"物":13688,"特":7417,"照":1250,"然":1440,"無":2728,"点":4161,"火":1398,"置":8277,"美":2548,"群":2041,"義":5850,"習":1334,"素":3072,"約":3788,"紀":4911,"級":2880,"統":4477,"経":5810,"組":11006,"結":4672,"続":3548,"編":3445,"総":5306,"線":8126,"米":2827,"系":6844,"等":8344,"策":1400,"第":13434,"算":1686,"積":1366,"究":4684,"空":6782,"程":1685,"種":6006,"立":12201,"競":4491," 番":1554,"神":6975,"社":17675,"示":2628,"移":1880,"称":14999,"科":5836,"福":3372,"要":4830,"規":3792,"視":1426,"親":1689,"観":2376,"解":3594,"西":7073,"補":1253,"裁":1509,"製":5884,"衛":3099,"術":5314,"行":22080,"衆":5487,"表":10987,"警":1923,"議":4992,"護":2333,"調":2776,"読":1601,"説":4782,"語":21957,"認":2658,"論":4375,"設":10824,"記":10247,"計":5326,"言":6896,"話":3787,"評":1446,"路":5085,"超":1256,"足":1256,"起":2835,"賞":3546,"資":3066,"質":3136,"象":3259,"谷":1834,"近":4077,"農":1628,"載":2905,"転":2711,"車":9144,"身":6802,"自":10797,"者":18526,"聞":12087,"聖":2242,"聯":10502,"育":4279,"能":5243,"華":1394,"般":3860,"航":2825,"興":1400,"艦":4184,"色":1827,"英":9312,"信ああ":2064,"葉":3002,"著":2162,"風":1713,"食":1713,"領":3103,"項":2228,"類":3952,"馬":3385,"駅":2902,"館":2783,"高":11470,"連":10914,"造":5408,"進":3210,"送":12678,"通":11192,"速":1858,"遺":1795,"選":8114,"過":1669,"運":6072,"達":1651,"郡":4845,"部":17131,"都":8516,"郎":2090,"配":2845,"金":5425,"野":8402,"量":2343,"重":4451,"鉄":5783,"銀":1566,"録":3006,"関":12598,"間":11298,"開":13989,"門":4039,"降":1592,"限":2003,"院":3827,"陸":3351,"阪":2944,"防":2270,"離":1558,"電":7996,"隊":3468,"際":6340,"青":1618,"非":1816,"面":3571,"響":1740,"音":7019,"始ああ":1913,"殺ああ":2295,"）":148109,"（":149030,"：":12250,"＝":3484,"～":3083,"交味あ":2597,"使ああ":9134,"使アア":1628," （":4296," ）":9852,"価ああ":5083,"行ああ":10289,"組ああ":2099,"表ああ":2782,"一種あ":1572,"類ああ":1338,"手ああ":1245,"場合あ":2928,"世界大":1450,"大ああ":1950,"多あ。":1570,"多ああ":2150,"構成あ":1846,"倫ああ":2719,"成ああ":4644,"部ああ":1870,"戦ああ":1623,"続ああ":1414,"等学校":2276,"売ああ":5009,"声ああ":1329,"通ああ":1426,"送ああ":5822,"生ああ":4620,"ア語：":1902,"造ああ":1673,"ア連休":1985,"世紀あ":1284,"用アア":1501,"作品あ":2764,"ア選手":1921,"用ああ":9493,"佐売あ":1480,"。 ":13103,"、 ":31983,"』 ":1427,"不聯あ":1405,"あ ":66354,"地域あ":1988,"ア ":32799,"催ああ":3126,"あ連":2419,"あ通":3320,"あ選":1976,"あ運":3219,"あ都":1640,"あ金":1288,"あ重":1969,"ア語":8308,"あ電":1906,"あ際":1362,"あ音":2155,"あ関":6111,"あ開":8435,"あ間":2317,"あ認":1333,"あ記":3845,"あ設":5283,"ア系":1829,"あ製":2477,"あ表":4499,"あ行":10711,"あ規":1437,"あ言":4181,"あ解":1569,"あ西":1435,"あ近":1371,"あ起":2247," 『ア":2743,"ア連":2828,"あ高":3598,"ア郡":1249,"ア選":2094,"ア教":1868,"ア放":1489,"ア文":1270,"あ無":1446,"あ特":3612,"あ物":2014,"あ独":1271,"ア朝":1242,"、第":1727,"あ現":2680,"あ王":1453,"あ用":4828,"あ生":5118,"あ略":1610,"あ登":3137,"あ発":11033,"あ目":3554,"あ知":3796,"あ構":3048,"ア州":5899,"、特":1256,"あ機":2062,"あ毎":1746,"あ殺":2375,"あ死":2199,"あ正":1615,"。現":1855,"あ水":1343,"、現":2451,"あ活":3109,"あ流":1320,"ア番":2807,"あ自":4181,"あ聞":2418,"あ聯":1675,"あ者":1261,"））あ":1638,"あ著":1250,"ア社":1403,"あ移":1304,"あ称":1389,"あ神":1585,"あ社":1649," あ ":1635,"あ第":4428,"、英":3619,"あ立":1390,"、自":1350,"あ続":1310,"あ総":2605,"あ結":2983,"あ経":2414,"あ組":1899,"あ統":1376,"ア王":2616,"あ置":2229,"あ国":6666,"あ基":3151,"あ地":4866,"あ呼":9152,"、小":1990,"あ含":2942,"あ同":5129,"あ名":6504,"あ合":3012,"あ各":1270,"あ加":1535,"あ務":1340,"あ動":1382,"あ初":2158,"あ分":4627,"あ制":2425,"あ利":1785,"あ前":1691,"あ創":1816,"あ原":2367,"あ参":1928,"あ取":2481,"あ受":2846,"あ単":1416,"あ南":1637,"あ倫":4845,"あ内":1663,"あ入":1803,"あ全":3374,"あ公":3342,"あ共":2253,"あ元":1846,"、大":2783,"あ乱":3962,"あ上":3029,"あ下":1378,"あ不":14639,"あ丈":32792,"あ三":1486,"あ丞":19819,"あ世":2331,"あ中":9465,"あ丼":18051,"あ主":4771,"あ他":2009,"あ付":1399,"あ代":2198,"あ企":1331,"あ事":5294,"あ交":5865,"あ人":7782,"あ作":6813,"あ何":3230,"あ佐":13423,"あ位":2504,"あ使":13500,"あ伝":1495,"あ会":1351,"、国":2207,"あ休":17591,"あ信":2085,"あ保":1720,"あ価":8783,"、同":2022,"あ一":16360,"あ丁":17347,"あ東":3287,"あ本":3519,"あ有":3083,"あ書":1761,"あ最":4488,"あ時":2327,"あ映":1487,"あ日":8737,"あ文":2466,"あ教":1976,"あ数":1984,"あ支":1653,"あ放":6620,"あ改":1615,"ア大":3211,"あ提":2042,"あ指":5951,"ア国":2219,"あ持":3443,"ア地":1445,"あ所":2803,"あ戦":3270,"あ成":1753,"あ手":1859,"。本":2143,"あ形":2406,"あ当":1618,"、本":1964,"ア合":4791,"、東":2694,"あ後":3036,"。日":2366,"、日":7073,"あ建":2122,"あ広":2188,"ア共":1242,"ア公":1517,"あ属":1897,"あ小":6851,"ア使":2442,"あ学":2960,"あ存":2964,"あ子":1693,"あ家":4670,"あ宮":4858,"あ定":2671,"あ実":3992,"あ対":5932,"ア事":1661,"ア人":2775,"ア休":2911,"ア佐":2436,"ア作":1731,"あ外":1294,"あ多":5332,"あ変":2304,"あ大":8249,"あ天":1336,"あ女":1848,"ア丁":1757,"ア丞":2294,"ア不":5250,"ア丈":4131,"ア丼":2746,"あ始":1317,"あ場":4272,"あ声":3268,"あア":156757,"ああ":1909463,"あ。":138896,"あ『":3993,"あ』":1518,"あ「":15609,"あ」":3632,"第二次":1255,"あ、":216446,"、あ":21490,"。「":2184,"、『":1828,"、「":4201,"。ア":19354,"、ア":64525,"々あ":2319,"。あ":13106,"『あ":1353,"」あ":19753,"「あ":2018,"」、":1525,"」。":1249,"『ア":7054,"』あ":7906,"「ア":7862,"、使":1676,"、価":1248,"、休":3597,"、佐":2726,"。休":1576,"、人":2001,"、丞":3869,"。丁":1388,"、不":3337,"、丁":3572,"、丈":7156,"、一":1931,"。丼":1300,"。丞":1247,"、主":1393,"、丼":4218,"、中":2406,"。丈":2918,"アア":2154092,"アあ":155866,"ア」":6771,"ア『":1584,"ア』":6210,"ア。":10635,"ア、":16488,"ア）":16388,"ア（":33336,"ア＝":3214,"場ああ":3434,"入ああ":1917,"現在あ":4587,"、）":1834,"』（":8152,"」（":2340,"あ）":37153,"あ（":5782,"会社あ":2364," ああ":28860," あ、":7361," あア":3298," アア":57211,"次世界":1361,"日）あ":7258,"不身あ":2968,"日（ ":2303,"一部あ":1349,"作曲家":1274,"基ああ":1599,"録ああ":1363,"対ああ":2786,"あ行あ":9249,"分ああ":1730,"設ああ":2171,"使究あ":1369,"記ああ":2072,"ア番組":2746,"位置あ":2308,"株式会":2868,"小ああ":3121,"言ああ":1584,"家ああ":5177,"あ置あ":2118,"あ総称":1606,"所属あ":1341,"主あ":3885,"丼ア":1464,"丼あ":19189,"使用あ":2638,"乱あ":3541,"与あ":1930,"不あ":8122,"丈ア":3173,"下あ":2803,"上あ":5913,"丈あ":27006,"丈、":1500,"丁ア":1890,"丁あ":18102,"一あ":5817,"丞あ":21257,"丕あ":1261,"世あ":2310,"不ア":1421,"中あ":5030,"丞ア":1844,"人あ":9122,"人。":1476,"人ア":1242,"他あ":1896,"付あ":2208,"代あ":6798,"代ア":1373,"争あ":1758,"事あ":9346,"交あ":4806,"不家":3233,"中国":2514,"作あ":7059,"何あ":2412,"体あ":6471,"位あ":2264,"佐あ":15860,"丼休":1592,"丼丞":2055,"丼丈":2151,"丼丁":1684,"丼丼":1418,"中使":3153,"会あ":6508,"丞佐":1535,"不使":1765,"休。":1523,"休あ":23904,"丞丈":2889,"丞丁":2038,"休ア":2507,"丞丞":1742,"不交":1422,"丈休":2694,"丈使":1243,"丈丈":5575,"丈丞":2374,"丈丼":3223,"丁休":1545,"一価":1575,"不不":1538,"丁丞":1826,"丁丈":3406,"丁丼":1794,"丈丁":3527,"信あ":4017,"交味":2982,"全 ":1304,"以下":1516,"、英語":1539,"係あ":1321,"供あ":1476,"価あ":9568,"使あ":19446,"中央":2121,"使ア":1968,"事交":2728,"丞家":1297,"一種":2197,"事業":2439,"二次":1389,"世界":5865,"佐丁":2480,"佐価":1425,"佐佐":1269,"倫あ":5771,"休丈":2771,"不治":2327,"休佐":1324,"休休":1921,"主義":2043,"人物":1891,"企業":2138,"一般":3547,"不聯":2198,"催あ":3689,"前 ":1371,"宮ああ":1909,"作家":1397,"作品":5289,"世紀":2728,"価使":2601,"佐売":2092,"あ目的":2533,"あ発表":1274,"あ第 ":2940,"内ああ":1292,"丞 ":1754,"丈 ":1322,"丁 ":1266,"丼 ":1638,"在あ":8533,"地あ":4409,"国あ":10614,"国ア":2658,"団あ":1724,"あ音楽":1319,"合衆":4737,"問宮":1556,"可能":1378,"名称":3513,"器あ":1492,"使（":1266,"動車":2200,"営あ":2139,"休（":1962,"和国":2518,"会（":2933,"分類":1336,"分野":1390,"多あ":4892,"大あ":3257,"あ開発":3310,"外あ":1908,"声あ":2287,"売あ":5710,"地区":1390,"地域":3263,"学ああ":2317,"国家":3119,"地不":3338,"在位":1282,"場あ":5556,"域あ":3413,"団体":2330,"あ開催":2692,"基あ":1982,"ア語あ":2144,"あ間あ":1859,"あ関あ":3802,"全国":1795,"加あ":2048,"力あ":3213,"共和":2791," 人あ":1389,"共同":1280,"化あ":4450,"和 ":3168,"動あ":4507,"務あ":2872,"前あ":3176,"制あ":1242,"定ああ":5359,"別あ":1751,"初あ":3667,"交通":1522,"代表":2575,"使用":3017,"使理":2476,"京都":3402,"佐県":1437," 世あ":1718,"会議":1333,"分あ":3773,"位置":2501,"ア語 ":2251,"使究":4609,"元あ":1489,"一部":2055,"不身":4258,"作曲":2528,"会社":6167,"内あ":4013,"語ああ":3397,"休画":2321,"入あ":2695,"全あ":1427,"品。":1238,"品あ":4556,"丈（":2884,"丁（":1789,"参加":1290,"員あ":2076,"丞（":2210,"丼（":1739,"含あ":3008,"協会":2160,"味あ":2957,"呼あ":8214,"化学":1687,"利用":1828," 世紀":2518,"制度":1248,"及あ":4790,"受あ":2030,"取あ":2208,"号あ":3953,"名あ":9335,"同あ":1953,"合あ":7017,"向あ":2543,"制作":1942,"区あ":2907,"家路":2125,"度あ":3326,"年ア":1913,"年あ":20990,"年、":1688,"広あ":1712,"帝国":1623,"形あ":1829,"年代":2422,"島県":1368,"専門":1642,"当あ":2190,"あ運営":1448,"小説":1987,"対象":1561,"式あ":3352,"川あ":1268,"州あ":2489,"州ア":2755,"常あ":1310,"小治":1760,"学者":3831," 年 ":49544,"成 ":1708,"当時":1631,"応あ":1648,"念あ":1283,"得あ":1713,"後あ":4649,"式会":2869,"平成":1905,"大学":6830,"あ設置":1507,"大戦":1559,"委員":1355,"女小":1372,"女子":1397,"子ア":1884,"子あ":3923,"あ起あ":1522,"場合":3557,"始あ":2180,"あ製造":1274,"子ああ":1318,"国際":4256,"子アア":1695,"あ設立":2139,"家 ":3003,"大会":3699,"物ああ":1862,"大阪":2112,"存在":3074,"家人":2908,"あ記休":1293,"年 ":51327,"島あ":1817,"あ表記":1710,"学校":6834,"家律":2208,"実施":1278,"天皇":1405,"学あ":6550,"定あ":7748,"宮あ":4230,"家ア":2140,"家、":2464,"家。":3423,"家あ":12300,"業ああ":1572,"あ言あ":2286,"小あ":7489,"対あ":3405,"属あ":3978,"局あ":2353,"東不":1238,"東京":5181,"本名":1618,"放送":10914,"映画":3085,"曲家":1330,"施設":1651,"曜日":2035,"教育":2974,"校あ":3206," 日 ":12276,"最初":1340,"楽あ":1408,"時間":1682,"業あ":4045," 月 ":44960," 日あ":15057,"株式":3061," 年（":5941," 年）":9287,"時あ":3451,"教会":1452,"教休":1250,"文使":1758,"文化":2253,"族あ":1560,"日あ":17278,"化ああ":2739,"昭和":3444,"時代":5531,"来あ":2732,"立ああ":3753,"あ放送":5748,"あ東京":1323,"果あ":1257,"日本":21674,"曲あ":2900,"あ日本":7358,"文学":1588,"月あ":5637,"有あ":1810,"最あ":1431,"書あ":2853,"選手権":2431,"本ア":2290,"本あ":10875,"期あ":4322,"日 ":12954,"手権":2431,"、東京":1480,"所属":2119,"月 ":45103,"数あ":3827,"あ戦あ":1312,"教あ":1272,"技術":2097,"務ああ":1643,"年（":6245,"年）":9425,"提供":1355,"指あ。":1899," 年あ":19179," 年、":1517," 年ア":1448,"家（":2915,"成あ":6012,"情報":2756,"戦あ":3394,"あ持あ":3321,"手あ":2666,"あ指あ":3829,"所あ":3296,"持ああ":1475,"ア合衆":4629,"戦争":2199," 年代":2242,"指あ":4364,"持あ":3953,"あ属あ":1826," 日）":9957," 日（":2924,"あ存在":2820,"点あ":3120,"ア不身":1594,"あ家律":1357,"江戸":1490,"あ広あ":1286,"活丞":1276,"活動":3023,"校（":1251,"動ああ":2193,"、日本":6502,"。日本":2210,"あ国家":1342,"あ地域":1419,"次世":1446,"構造":1383,"毎佐":1485,"日）":10122,"日（":3117,"加ああ":1583,"あ場合":3488,"派あ":1250,"流あ":1921,"正式":1441,"あ多あ":3964,"あ大あ":1447,"機能":1261,"機関":2979,"権あ":1708,"あ国際":1794,"機あ":2329,"共和国":2495," 月あ":5223,"あ対あ":3165,"殺あ":2872,"構成":2233,"止あ":1308,"称あ":9039,"称。":1304,"種あ":3083,"社会":2785,"発表":1469,"種ああ":1425,"第 ":9134,"目的":2889,"社あ":6402,"示あ":1874,"社ア":1383,"あ用あ":3710,"あ生あ":1801,"県家":2001,"発生":1334,"知あ":3526,"番組":5337,"発売":5037,"登場":2565,"略称":3022,"用語":1380,"目あ":4296,"あ知あ":3198,"あ登場":2375,"県あ":2733,"あ発売":3969,"界大":1455,"発あ":3815,"的あ":13077,"環境":1290,"理学":1713,"町あ":2460,"画あ":2581,"現在":6030,"用あ":12045,"用ア":1581,"自動車":2063,"界あ":1868,"独立":1445,"生あ":6030,"産あ":1671,"現ア":1355,"現あ":1628,"理あ":2327,"運営あ":1562,"特別":1260,"大学あ":1847,"衆国あ":2595,"ア放送":1421,"特あ":1759,"物あ":5383,"あ活丞":1241,"般あ":1349,"あ活動":1498,"艦あ":1387,"能あ":2381,"、現在":1773,"英 ":1379,"。現在":1490,"義あ":2621,"大会あ":2008,"総称":1629,"者。":2220,"者、":1458,"者あ":10349,"者ア":1305,"聯あ":3977,"聞あ":3281,"総合":1331,"置あ":6978,"経済":1991,"線あ":3253,"続あ":2112,"組佐":2309,"競馬":1452,"系使":1769,"等学":2286,"競技":1586,"組あ":3887,"組。":1569,"ア州あ":1713,"結あ":1657,"ア州ア":2558,"紀あ":1438,"系あ":2247,"素あ":1485,"あ殺あ":1580,"約 ":1619,"第二":1891,"立あ":5248,"究あ":1391,"空あ":1393,"等あ":3921,"あ構成":1941,"科学":1994,"象あ":2019,"設立":2500,"設置":1708,"賞あ":1603,"製造":1966,"計画":1610,"あ。 ":6469,"あ、 ":22576,"ああ ":39188,"論あ":1674,"記休":1374,"表記":2830,"身あ":4282,"車あ":2896,"路あ":1577,"象ああ":1255,"開ああ":1704,"起あ":1745,"設計":1269,"言語":1842,"質あ":1680,"郡あ":1437,"部あ":6774,"関ああ":4613,"。アア":19255,"日本ア":2092,"日本あ":9641,"、アア":64286,"路線":1359,"設立あ":2048,"送あ":7525,"通あ":2755,"造あ":2715,"連あ":1360,"進あ":1274,"『アア":7047,"近あ":1367,"』ああ":2158,"「アア":7790,"」ああ":7507,"間ああ":1397,"『ああ":1282,"載あ":2293,"称ああ":4464,"「ああ":1846,"通称":1887,"選手":4697,"、ああ":21181,"都事":2774,"。ああ":12750,"野あ":1575,"運営":1939,"運動":1644,"語：":3289,"連合":1965,"通信":1344,"連休":3247,"自治":1375,"（昭和":2128,"線（":1472,"航空":2161,"営ああ":1448,"自動":2405,"あ、）":1236,"ああ）":35187,"ああ（":4198,"英語":4067,"ああ自":1831,"ああ行":2651,"ああ表":1577,"ああ記":1483,"ああ言":2300,"ああ設":1654,"ああ開":2170,"あ、同":1707,"ああ丞":6562,"ああ一":3244,"ああ丁":5744,"ああ不":4855,"ああ丈":10564,"ああ丼":5871,"ああ主":1236,"ああ中":2062,"ああ価":2294,"ああ使":4600,"ああ作":2558,"ああ佐":5045,"ああ倫":1458,"ああ交":2092,"ああ人":3676,"ああ事":3171,"ああ休":6525,"あ、国":1738,"ああ他":1319,"あ、丁":2287,"あ、一":1711,"あ、不":2123,"あ、丈":4708,"あ、丞":2433,"あ、佐":1739,"あ、休":2616,"あ、人":1506,"あ、主":1290,"あ、丼":2884,"あ、中":1791,"あ。丈":1605,"ああ大":3222,"ああ多":2733,"ああ学":1481,"ああ家":2341,"ああ宮":1422,"ああ小":2540,"あ。日":1290,"あ、日":6150,"あ、東":1989,"ああ後":1790,"術あ":1484,"ああ分":1352,"ああ全":1444,"ああ公":1285,"あ、大":2131,"行あ":14000,"ああ名":2507,"ああ同":2111,"あ、小":1427,"ああ呼":3646,"ああ地":2280,"ああ国":2987,"ああ場":2933,"表あ":3761,"あ、現":2057,"ああ指":1536,"ああ戦":1238,"ああ最":1780,"ああ日":4356,"ああ放":1785,"あ、英":2463,"ああ第":1398,"ああ特":1792,"ああ現":1367,"あ、第":1316,"ああ目":1662,"ああ発":3602,"ああ知":2352,"ああ用":1755,"ああ生":1691,"設あ":3061,"記あ":3151,"言あ":3035,"製作":1647,"説あ":2100,"語あ":8268,"話あ":1899,"ああ。":128417,"ああ、":114147,"ああ『":1790,"ああ「":7669,"ああ」":2702,"行不":1963,"語 ":3303,"あ『ア":1268,"あ「ア":4056,"あ」あ":2473,"あ、ア":42684,"あ。ア":9708,"あ。あ":10112,"あ、あ":15664,"あ、「":2860,"あアア":151645,"あアあ":3890,"衆国":4735,"要あ":2289,"ああア":66491,"あああ":1201680,"時代あ":3504,"発表あ":1306,"高等":2629,"昭和 ":2986,"社アア":1256,"社ああ":2086,"丞（あ":1427,"本あア":1472,"本ああ":2442,"州アア":2688,"品ああ":1797,"本アア":2199,"分野あ":1258,"録あ":1741,"書ああ":1478,"鉄家":3515,"野球":2343,"設置あ":1442,"月ああ":2434,"間あ":6535,"開あ":1967,"関あ":5450,"有ああ":1305,"隊あ":1519,"関係":1758,"際あ":1785,"開催":3071,"面あ":1724,"項あ":1394,"開発":5246,"類あ":2486,"目的あ":2661,"電気":1300,"駅あ":1276,"音楽":3430,"曲ああ":1651,"高あ":1592,"属ああ":2285,"あ事あ":1426,"あ丼あ":5850,"あ丁あ":4319,"あ一あ":4453,"あ丈あ":8854,"あ上あ":1542,"あ不あ":2773,"あ中あ":2093,"あ丞あ":5276,"あ使あ":4014,"あ価あ":2254,"あ佐あ":3168,"あ作あ":1416,"利用あ":1328,"あ丈丁":1285,"あ丈丈":1864,"あ休あ":5479,"あ中使":2747,"あ他あ":1258,"あ丁丈":1300,"点ああ":1562,"あ利用":1457,"あ含あ":2902,"あ名あ":1668,"あ呼あ":8161,"記休あ":1285,"あ名称":2151,"高等学":2175,"丈（あ":1828,"存在あ":2940,"あ基あ":1594,"東京都":2018,"あ交味":2831,"呼ああ":6929,"あ不治":1330,"あ世界":2001,"あ一種":2121,"あ作品":1789,"あ価使":1553,"あ一般":1707,"あ人物":1374,"味ああ":1844,"あ一部":1521,"あ作曲":1773,"あ休画":1355,"あ代表":1400,"あ使用":2721,"あ位置":2381,"あ使究":1788,"あ務あ":1339,"表記あ":2011,"あ取あ":1899,"あ受あ":1927,"アあ作":1244,"アあ丼":1341,"アあ丁":1432,"アあ一":1999,"アあ不":1243,"アあ丈":2548,"アあ丞":1512,"アア、":16393,"アア。":10631,"アア」":6705,"アア』":6191,"アア『":1500,"アアア":1729078,"アアあ":151645,"アあア":19640,"アああ":55529,"アあ、":4837,"ア』あ":2640,"ア。ア":1791,"ア」あ":4452,"ア、ア":8332,"アあ開":1642,"アア語":8298,"アア系":1825,"アア社":1262,"アア番":2788,"受ああ":1416,"アア王":2579,"アア教":1744,"アア放":1421,"アア州":5894,"アア大":2784,"アア国":1719,"放送あ":6907,"アア合":4757,"アア公":1422,"アア休":2327,"アア事":1564,"アア人":2578,"アア作":1525,"アア佐":1706,"アア使":2109,"アア丼":1996,"アア丞":1556,"アア丈":2749,"アア不":4656,"アア ":31584,"ア。 ":1580,"アあ ":4215,"名ああ":2758,"最初あ":1276,"学校あ":2147,"番組。":1557,"番組あ":2493,"向ああ":1587,"知ああ":3322,"校ああ":1456,"含ああ":1792,"アア連":2243,"アア選":2079,"号ああ":1619,"ア』（":1980,"アア＝":3088,"アア）":16080,"アア（":33305,"合ああ":3958,"＝アア":3113,"年代あ":1428,"開発あ":3113,"登場あ":2088,"略称あ":1402,"発売あ":3963,"）あ ":2592,"義ああ":1444,"機関あ":1580,"あ）あ":31163,"あ（あ":1574,"あ）、":1476,"あ）。":2164,"）あア":7291,"）ああ":30137,"）あ、":47800,"（ああ":48329,"）、ア":1251,"（アア":20979,"ア＝ア":3175,"ア（ア":4269,"ア）あ":13245,"ア（あ":3617,"団体あ":1276,"ア（ ":1748,"置ああ":4412,"合衆国":4735,"丁アア":1421,"上ああ":2005,"丈ああ":11397,"丈あア":1286,"丁ああ":8466,"一ああ":3370,"一あ。":1355,"平成 ":1642,"年） ":3918,"年（ ":2247,"年）あ":3507,"式会社":2868,"国あア":1384,"国ああ":2054,"国アア":2374,"年（昭":1867,"載ああ":2044,"地ああ":1883,"者ああ":4317,"丼ああ":8612,"在ああ":4158,"中ああ":1478,"丞アア":1255,"目ああ":1258,"丞ああ":10116,"不ああ":3457,"車ああ":1306,"丈アア":2349,"与ああ":1754,"界大戦":1365,"名称あ":2696,"事ああ":3528,"起ああ":1658,"開催あ":2711,"年あア":3603,"年ああ":7456,"年アア":1814,"乱ああ":1859,"人ああ":3885,"活動あ":2178,"的ああ":3866,"発ああ":2687,"交ああ":1899,"組佐あ":1397,"会（あ":1757,"））":1846,"（）":1332,"丈丁あ":1330,"丈丈あ":1676,"休アア":1927," ）あ":8664,"会ああ":2436,"休ああ":11780,"（昭":2135,"代ああ":2253,"代アア":1273,"（現":2804,"（英":2015,"付ああ":1616,"＝ア":3230,"）ア":1792,"（ア":21128,"）あ":114771,"）」":1247,"（あ":48841,"）。":7779,"）、":5848,"対象あ":1241,"（ ":12526,"） ":6625,"： ":1725,"～ ":2043,"作ああ":4059,"何ああ":1301,"佐ああ":8185,"体ああ":2330,"当ああ":1803,"日ああ":8218,"日あア":2005,"、）あ":1731,"』（あ":3555,"』（ア":2402,"中使あ":2391},"n_words":[10754229,8353071,5774482],"name":"ja"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/kn b/nlp_resource_data/langdetect/profiles/kn

new file mode 100755 (executable)

index 0000000..7a599ba
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/kn
@@ -0,0 +1 @@
+{"freq":{"ುರಾ":118,"ುರು":200,"ುರಿ":88,"ುರ್":91,"D":56,"E":62,"A":159,"B":71,"C":144,"M":84,"N":67,"O":52,"I":126,"ುವಂ":99,"T":67,"P":98,"S":150,"R":60,"ುಳ್":57,"f":152,"g":241,"d":295,"e":941,"b":240,"c":337,"a":826,"n":692,"o":625,"l":414,"m":349,"k":67,"h":439,"i":767,"w":97,"v":70,"u":387,"t":822,"ುಲೈ":89,"s":460,"r":646,"p":275,"y":151,"x":98,"ುಮಾ":253,"ುಹಿ":52,"ುಸ್":88,"ುವೆ":67,"ುವು":360,"ುವಿ":157,"ುವಾ":127,"ುವರ":100,"ೂಕಿ":129,"ೂಕು":69,"ುಷ್":56,"ುಟು":84,"ುಟ್":157,"ುಡಿ":58,"ುನಾ":71,"ುನಿ":57,"ುತಿ":69,"ುತ್":1508,"ುದು":441,"ುದಾ":117,"ುದರ":81,"ುದ್":264,"ುದೇ":91," o":90," i":63," a":111," c":81," t":207," p":123," s":73," r":80,"ೀರಿ":62,"ೀರ್":102," I":78," M":62," B":59," C":95," A":102," S":86," P":57,"ುಖ್":182,"ುಗಳ":475,"ೀವಿ":81,"ೀವನ":57,"ುಕೊ":61,"ುಕ್":140,"ುಂಬ":175,"ುಂದ":93,"ೀಡಿ":58,"ೀಟರ":52,"ಿಸಿ":1001,"ಿಸು":679,"ಿಸೆ":100,"ಿಸ್":325,"ಿಹಾ":108,"ೀನ್":58,"ೀತಿ":89,"ೀಡು":53,"ೃಷ್":171,"ೆಯ ":906,"ೃತಿ":133,"ೃತ್":89,"ೇಕ ":98,"b ":108,"a ":86,"ೂಲಧ":77,"ೇ":5578,"ೆ":15840,"ು":25709,"ೂಲಕ":96,"ೀ":3205,"ೃ":757,"ೂ":3992,"್":52148,"ೌ":422,"ೈ":1240,"ೋ":2829,"ೊ":2928,"ೂರು":339,"ೂರಿ":178,"he":101,"ೂರ್":274,"೧":1776,"೦":1187,"gh":67,"೯":1054,"೮":516,"೭":491,"೬":491,"೫":520,"೪":472,"೩":669,"೨":1050,"ಐ":161,"ಒ":2005,"ಓ":71,"ಕ":16773,"ಖ":1270,"ಗ":16945,"ಘ":299,"g ":53,"ಚ":3279,"ಛ":94,"ೂರದ":77,"ಜ":4576,"ಞ":287,"ೂರನ":53,"ಟ":6023,"ಃ":62,"ಂ":14035,"ಅ":4390,"ಇ":2785,"ಆ":1877,"ಉ":1108,"ಈ":873,"ಊ":90,"ಏ":267,"ಎ":1847,"ಲ":19400,"ಳ":7999,"ರ":33568,"ಶ":3961,"ೂಮಿ":116,"ಷ":3572,"ವ":18565,"ಸ":14830,"ಹ":6186,"ಾ":27485,"ಿ":36903,"ಣ":3561,"ಡ":6448,"ಠ":200,"ಧ":2658,"ದ":26224,"ಥ":2061,"ತ":20028,"ಫ":753,"ಪ":9255,"en":84,"ನ":22644,"ಯ":14816,"ಮ":11705,"es":68,"ಭ":2607,"er":150,"ಬ":6283,"e ":261,"f ":64,"co":54,"ce":55,"d ":123,"at":112,"ar":92,"al":103,"an":163,"nt":61,"of":64,"on":129,"or":85,"r ":107,"mb":124,"na":66,"nd":87,"ng":73,"li":80,"le":62,"n ":194,"ht":76,"hu":109,"ic":76,"ia":57,"ig":78,"is":75,"it":67,"ೆದ ":99,"in":127,"io":72,"l ":78,"y ":108,"x ":86,"um":122,"to":56,"te":89,"ti":95,"th":193,"ta":55,"st":72,"ri":121,"re":74,"ra":68,"t ":164,"ೂಡಿ":63,"s ":195,"px":81,"ೂನ್":106," ಇ":2754," ಆ":1860," ಅ":4360," ಏ":265,"ುಖ ":181," ಎ":1695," ಊ":90," ಉ":1101," ಈ":872," ಖ":225," ಗ":2051," ಕ":5524," ಒ":1995," ಓ":65," ಐ":147," ಟ":281," ಜ":2387," ಚ":1161," ಘ":128,"ೀಯ ":460," ನ":3476," ಪ":4927," ಫ":377," ಬ":2936," ಭ":1657," ಮ":5657," ಯ":687," ಡ":389," ತ":2162," ದ":2528," ಧ":353," ಹ":3705," ಸ":5985," ರ":2258," ಲ":607," ವ":3839," ಶ":1153," ೬":68," ೭":68," ೮":53," ೯":53," ೨":675," ೩":313," ೪":88," ೫":77," ೧":1407,"ಾಂಶ":59,"ಾಂತ":277,"ಾಂಕ":60,"ಾಂಗ":132,"ಾಂಡ":169,"ೀತ ":103,"ಿಯ ":653,"ಿಮ ":103,"ಿನ ":1775,"ಿಧ ":52,"ಿದ ":402,"ಿತ ":279,"ಿಣ ":191,"ಿರಾ":52,"ಿರಿ":113,"ಿರ್":396,"ಿರು":1115,"ಂದ ":957,"ಿಲಿ":76,"ಿಲ್":730,"ಿಳಿ":79,"ಿಳು":52,"ೀಕರ":77,"ೀಕ್":78,"ಿವರ":110,"ಿವೃ":57,"ಿವಿ":95,"ಿವಾ":109,"ಿವೆ":147,"ಿಶ್":268,"ಿಸಬ":56,"ಂಬ ":229,"ಿಷ್":256,"ಿಸಲ":564,"ೃತ ":73,"ಿನಿ":232,"ಿನಾ":81,"ಿನಲ":259,"ಿನ್":254,"ಿಪ್":89,"ಿಭಾ":100,"ಿಯಂ":74,"ಿಮೆ":73,"ಿಯನ":554,"ಿಯಮ":111,"ಿಮಾ":95,"ಿಯು":246,"ಿಯೆ":150,"ಿಯೇ":59,"ಿಯೊ":67,"ಿಯರ":85,"ಿಯಲ":402,"ಿಯವ":134,"ಿಯಿ":76,"ಿಯಾ":494,"ಿಡಿ":78,"ಿಟ್":81,"ಿತ್":753,"ಿತರ":53,"ಿತವ":177,"ಿತಾ":52,"ಿತಿ":175,"ಿತು":240,"ಿದ್":1341,"ಿನಗ":88,"ಿಧಾ":82,"ಿನದ":58,"ಿದವ":67,"ಿದರ":307,"ಿದಾ":54,"ಿದೆ":1016,"ಿದು":62,"ಾಷ್":299,"ಾಸದ":54,"ಾಷೆ":295,"ಾಸನ":52,"ಾಷಾ":53,"ಿಗೆ":716,"ಿಗಾ":54,"ಾಶಿ":63,"ಾಹಿ":266,"ಾಸ್":461,"ಾಸಾ":72,"ಾಸಿ":149,"ಿಜ್":143,"ಿಟಿ":66,"ಾರವ":174,"ಾರಾ":172,"ಾರಿ":438,"ಾರು":321,"ಾರೆ":385,"ಾರ್":898,"ಾಲದ":109,"ಾಲಯ":138,"ಾಯಕ":132,"ಾಮಾ":287,"ಾಮಿ":92,"ಾಯನ":84,"ಾರಂ":66,"ಾಮ್":142,"ಾರಗ":62,"ಾರಕ":63,"ಾಯಿ":381,"ಾರದ":175,"ಾರತ":711,"ಾರಣ":117,"ಾಯು":52,"ಾರರ":107,"ಿಕಿ":78,"ಿಕಾ":233,"ಾವಣ":88,"ಾವಳ":55,"ಿಕೆ":452,"ಿಕೊ":215,"ಿಕ್":429,"ಾವು":111,"ಾವಿ":144,"ಿಗಳ":606,"ಾವ್":94,"ಾಲು":54,"ಾಲೂ":145,"ಾಲಿ":154,"ಾಲ್":298,"ಾಲೆ":294,"ಾಳಿ":56,"ಿಕವ":96,"ಿಕರ":56,"ಿಕದ":55,"ಾನಿ":272,"ಾನೆ":71,"ಾಧ್":78,"ಾನವ":177,"ಾನ್":541,"ಾಪಕ":91,"ಾದಿ":124,"ಾಧನ":72,"ಾದವ":55,"ಾದರ":121,"ಾನದ":188,"ಾದ್":141,"ಾನಗ":87,"ಾಮದ":98,"ಾಪು":93,"ಾಪಿ":108,"ುವ ":1824,"ಾಪ್":60,"ಿಂದ":1207,"ಿಂತ":88,"ಾಡಲ":88,"ಂಗ ":59,"ಾಡು":222,"ಾಡಿ":260,"ಾಟ್":67,"ಿಂಗ":466,"ಾಟಿ":63,"ಾಟಕ":338,"ಾತ್":294,"ುರ ":111,"ಾತನ":55,"ಾತಿ":122,"ಾತು":101,"ಾಣದ":83,"ಾಣಿ":182,"ಾಣು":83,"ಾಗ್":60,"ಾಗಿ":2495,"ಾಗು":715,"ಾಗೂ":350,"ಾಗರ":116,"ಾಗಲ":56,"ಾಗವ":78,"ಾಗದ":149,"ಾಕ್":170,"ಾಗಗ":52,"ಾಕಾ":66,"ಾಕಿ":70,"ಂತ ":350,"ಾಜ್":424,"ಾಜಿ":104,"ಾಜಧ":76,"ಾಜಕ":69,"ಾಚಾ":64,"ಂಡ ":134,"ಸರಾ":57,"ಸರಿ":110,"ಸರು":177,"ಸರ್":187,"ಸಮಾ":89,"ಸಮು":96,"ಷ್ಣ":158,"ಷ್ಟ":615,"ಷ್ಯ":148,"ಸಸ್":84,"ಸಲ್":204,"ಸಲು":129,"ಸಲಾ":407,"ಸೂರ":160,"ಸುಮ":185,"ಸುವ":474,"ಸುತ":345,"ಸಿನ":84,"ಸಿಯ":74,"ಸಿರ":109,"ಸಿಸ":62,"ಸಾರ":89,"ಸಾಯ":93,"ಸಾಮ":345,"ಸಿಕ":210,"ಸಾಲ":56,"ಸಾಧ":119,"ಸಿದ":904,"ಸಾಹ":178,"ಸಿಂ":68,"ಸಾಗ":104,"ಸಾಂ":87,"ಸೇವ":115,"ಸೇರ":192,"ಸೆಪ":83,"ಹದಿ":73,"ಸೆಂ":112,"ಹತ್":113,"ಶ್ಚ":149,"ಶ್ರ":321,"ಶ್ವ":281," ದ ":69,"ಶೇಷ":56," ನ ":221,"ಶೋಧ":52," ರ ":72,"ಷಿಣ":245,"ಷೇತ":113,"ಷೆಯ":163,"ಷೆಗ":96,"ಸತ್":52," ದೂ":111," ದಾ":111," ದಿ":826," ನಂ":173,"ಾಜ ":54," ತೆ":86," ದಕ":243," ತಾ":436," ತಿ":392," ತೀ":71," ತು":140," ತನ":105," ತಯ":72," ತಮ":160," ತರ":68," ತಂ":275," ಡಿ":173," ಡಾ":64," ಟ್":64," ಟೆ":75," ಮಲ":65," ಮರ":148," ಮಹ":275," ಮೂ":466," ರಂ":117," ಮೀ":91," ಮು":503," ಮಾ":930," ಮಿ":112," ಮ್":74," ಮೊ":287," ಮೋ":54," ಮೈ":149," ಮೆ":105," ಮೇ":303," ಬ್":257," ಭೂ":150," ಮತ":1392,"ಾದ ":915," ಭಾ":1269," ಭೌ":68," ಮಧ":122," ಮನ":105," ಫ್":115," ಬಳ":309," ಬಲ":53," ಬರ":237," ಬಹ":140," ಬಾ":244,"ಾಣ ":79," ಬಿ":202," ಬೀ":65," ಮಂ":165," ಬು":58," ಬೆ":486," ಬೇ":140," ಪೋ":52," ಪ್":2223," ಬಗ":131,"ಾತ ":80," ಬಣ":53," ಬದ":90," ಫೆ":69," ಪಶ":122," ಪರ":473," ಪೂ":178," ಪು":317," ಬಂ":203," ಪಿ":89," ಪಾ":318," ಪಕ":77," ನ್":129," ನೈ":64," ನೋ":69," ಪದ":311," ಪತ":98," ಪಟ":103," ಪಡ":148," ನಲ":117," ನವ":140," ನೀ":205," ಪಂ":148," ನೇ":153," ನೆ":157," ನಿ":719," ನಾ":379," ಧರ":127," ನಗ":232," ದೇ":439," ದೊ":195," ದ್":175," ನಡ":222," ನದ":175,"ಾನ ":293," ಧಾ":127," ನಟ":71," ವೇ":103," ವೈ":156," ವೆ":106," ವ್":384,"ಾಮ ":87," ವಸ":135," ವಿ":1423," ವಾ":355," ಶತ":99," ಶ್":256," ಶಿ":219," ಶಾ":196," ಸಂ":1268," ವರ":800,"ಾಲ ":93," ಲೋ":89," ಲ್":70," ಲೇ":112," ಲಿ":71," ಯು":227," ಯಾ":161," ರಚ":137,"ಾರ ":382," ರೋ":82," ರೇ":54," ರೀ":66," ರೂ":111," ರಾ":953," ರಿ":89," ರಸ":87," ರವ":64,"ಾಯ ":86," ರಲ":134,"ಿಕ ":878,"ಾಳ ":52," ಹು":167," ಹೆ":576," ಹೇ":108," ಹಾ":768," ಹಿ":369," ಹೊ":508," ಹೋ":156,"ಾಸ ":73," ಸದ":81," ಹಂ":55," ಸಣ":52," ಸರ":313," ಸಲ":64," ಸಮ":357," ಸೆ":143," ಸೇ":325," ಹದ":68," ಹಣ":89," ಸೂ":152," ಹತ":79," ಸು":425," ಸಾ":912," ಸಿ":289," ಸಸ":67," ಸಹ":107," ಹಳ":93," ಹಲ":156," ಹರ":112," ಸ್":892," ಸೌ":54," ಸೋ":68," ಸೈ":56," ಅಂ":408,"ಸ್ಥ":756,"ಸ್ಪ":98,"ಸ್ಯ":165,"ಸ್ಕ":258,"ಸ್ಟ":445,"ಸ್ತ":931," ಆಟ":96," ಇಂ":246," ಆಡ":86," ಆಚ":60," ಅವ":421," ಆಗ":261," ಅಸ":94," ಅಲ":154," ಆಕ":73," ಅಮ":216," ಅಭ":111,"ಸ್ವ":337,"ಸ್ಸ":91," ಅರ":279," ಅಪ":94," ಅದ":274," ಅಥ":645," ಅನ":330," ಅಧ":417," ಅತ":346," ಅಡ":73," ಆಂ":116," ಅಗ":62," ಅಕ":181,"ಹರಿ":90,"ಹಲವ":148,"ಹಳ್":97," ಊರ":63," ಎನ":126," ಎತ":68," ಎಂ":869," ಇಪ":84," ಇನ":85," ಇಲ":141," ಇರ":180," ಇವ":618," ಉಂ":56," ಆದ":131," ಆಧ":97," ಆಫ":103," ಆಯ":65," ಆರ":223," ಆಲ":68," ಆಳ":54," ಆವ":57," ಆಸ":93," ಇತ":184," ಇದ":1044," ಉಪ":269," ಉದ":185," ಉತ":320,"ಹಾಸ":180,"ಹಿತ":251,"ಹಿಡ":63,"ಹಾರ":180," ಒಳ":146,"ಹಾಕ":59,"ಹಾಗ":543,"ಹಿಂ":230," ಕಂ":258," ಕರ":597,"ಹುಟ":85," ಕಲ":170," ಕನ":397," ಕಥ":54,"ಹುದ":165," ಕಟ":67," ಕಣ":68," ಖಂ":56," ಕಡ":102," ಕ್":901,"ಹಿಸ":111," ಕೆ":308," ಕೇ":216," ಕೊ":245," ಕೋ":98," ಕಾ":889," ಗಂ":57," ಕಿ":200," ಕೂ":99," ಕು":307," ಕೃ":156," ಕವ":70," ಏಪ":62," ಎಲ":152," ಎರ":162," ಏಷ":55," ಒಂ":1369," ಒಬ":182," ಒಟ":63," ಚೆ":63," ಚಿ":486," ಚಾ":99,"ಹೆಸ":279," ಜಗ":78," ಜನ":678,"ಹೆಚ":200," ಜಲ":69," ಜೋ":71," ಜೊ":56," ಜಿ":489," ಜಾ":210," ಜು":99," ಜೀ":215," ಜೂ":94,"ಹೊಂ":267," ಜ್":80,"ಹೇಳ":76," ಗಣ":150,"ಹೋಗ":60,"ಹೊಸ":67,"ಾಗ ":150," ಖ್":54," ಗಳ":238," ಘಟ":60," ಗಾ":155," ಗು":365," ಗಿ":82,"ಹೊರ":79," ಗ್":584," ಗೋ":85," ಚಂ":64,"ಹ್ಯ":58," ಚಲ":126," ೨೦":198," ೧೯":643," ೧೨":59," ೧೧":53," ೧೪":61," ೧೬":69," ೧೫":80," ೧೮":164," ೧೭":110," ೩೦":63," ೨೫":63,"ಳ್ಳ":382,"ಳೆಯ":131," ೧೦":72,"ಶ್ ":57,"ಳೂರ":208,"ಳುವ":137,"ವಣೆ":61,"ಷ್ ":79,"ಷೆ ":91,"ಲ್ಯ":182,"ಲ್ಲ":5639,"ಲ್ಪ":335,"ಲ್ಕ":111,"ಲೋಹ":64,"ಲೊಂ":55,"ಳನ್":731,"ಲೆಂ":287,"ಲೇಖ":144,"ಲೆಕ":53,"ಲೆಗ":94,"ಲೆಯ":429,"ಲಿಯ":252,"ಲಿರ":303,"ಲಿಸ":109,"ವ್ ":88,"ಲೂಕ":200,"ಳಿಂ":159,"ಳಾಗ":68,"ಳಿನ":117,"ಳಿತ":74,"ಳಿದ":77,"ಳಿಕ":53,"ಳಿಗ":310,"ವಂತ":117,"ಳಿಸ":102,"ಳಿಯ":166,"ಷದ ":67,"ಳಸಲ":88,"ಳಲ್":724,"ಷಗಳ":63,"ವೇದ":77,"ಶದಲ":55,"ವ್ಯ":542,"ಸಂಘ":68,"ಶಿಷ":59,"ಶಿಸ":74,"ಸಂಖ":127,"ಶಿವ":89,"ಸಂಗ":227,"ಸಂಕ":77,"ಶಿಯ":58,"ಸಂದ":52,"ಸಂಸ":304,"ಸಂಯ":79,"ಸಂಪ":131,"ಸಂಬ":98,"ಷದಲ":459,"ಶಸ್":149,"ಶಾಸ":280,"ಶಾಲ":53,"ಶಿಕ":64,"ವರನ":53,"ವರೆ":141,"ವರಾ":53,"ವರು":761,"ವರಿ":316,"ವರ್":898,"ವಳಿ":93,"ಸಿ ":303,"ವಲ್":60,"ವತ್":64,"ವಧಿ":53,"ವದಲ":53,"ವನ್":720,"ವಾಗ":1342,"ವಾದ":543,"ವಾತ":70,"ವಾಣ":65,"ವಾಡ":79,"ವಾಮ":73,"ವಾಯ":90,"ವಾರ":197,"ವಿಗ":77,"ವಾಸ":169,"ವಾಲ":67,"ವಿಕ":218,"ವಾಹ":85,"ವಿಜ":170,"ವಿಧ":170,"ವಿದ":296,"ವಿನ":163,"ವಿತ":66,"ವಿಭ":117,"ವಿರ":130,"ವಿಮ":65,"ವಿಯ":106,"ವಿವ":124,"ವಿಲ":62,"ವಿಸ":121,"ವಿಶ":329,"ವಿಷ":94,"ವೀಪ":59,"ವುಗ":98,"ವುದ":425,"ಸ್ ":791,"ವೃತ":84,"ಶತಮ":88,"ವೆಂ":214,"ವವಿ":62,"ಶಕ್":84,"ಸು ":82,"ಶಗಳ":199,"ವಹಿ":89,"ವಸ್":273,"ಮೊದ":199,"ಯನಿ":63,"ಯನ್":821,"ಮೈಸ":89,"ಯಮ್":96,"ಮ್ಮ":313,"ಮ್ರ":69,"ಮ್ಯ":104,"ಲಿ ":3648,"ರಕಾ":170,"ಲಾ ":127,"ಯಲ್":813,"ಯಲಾ":117,"ಯರು":78,"ರಚನ":78,"ಲೂ ":94,"ಲು ":400,"ರಗಳ":381,"ಯವಾ":336,"ಯವು":63,"ಯವಸ":110,"ರಕ್":198,"ಯವರ":158,"ಯವನ":100,"ಮಹಾ":203,"ಲದ ":112,"ಮಾಜ":102,"ಮಾನ":565,"ಮಾಣ":151,"ಮಾಡ":381,"ಮಾತ":89,"ಮಾಹ":71,"ಮಾರ":439,"ಮಾಲ":61,"ಮಿಕ":75,"ಮಿಯ":113,"ಮಿತ":67,"ಮೂರ":118,"ಮೂಲ":335,"ರಂದ":193,"ರಂಭ":108,"ರಂಗ":139,"ಮಿಸ":75,"ಮಿಳ":54,"ಮುಂ":173,"ಮುದ":121,"ಮುಖ":451,"ಮೆರ":66,"ಮೇಲ":169,"ಮೇರ":140,"ಯತೆ":52,"ಲಯ ":63,"ಯದಲ":149,"ಯೋಗ":170,"ಯ್ಯ":64,"ರರಾ":68,"ರರಂ":62,"ರಮು":225,"ರಮಾ":162,"ರರು":63,"ಳಿ ":203,"ರಲ್":544,"ರಶಸ":112,"ಳು ":1195,"ರವು":117,"ರವಾ":329,"ರವಿ":59,"ರವರ":144,"ರವನ":89,"ರಸಿ":199,"ರಸಾ":67,"ರಸ್":141,"ಯಸ್":73,"ರಚಿ":80,"ಲೆ ":252,"ಲೇ ":70,"ರಜ್":65,"ಯಾಂ":162,"ಯಾಕ":83,"ಲೈ ":93,"ಯಾಗ":387,"ಯಾದ":236,"ಯಾತ":133,"ಯಿಂ":142,"ಯಾಟ":89,"ಯಾಪ":150,"ಯಾನ":165,"ಯಾವ":134,"ಯಾಯ":90,"ಯಾರ":155,"ಯಾಲ":340,"ಯಾಸ":109,"ಯಿತ":172,"ಯಿಸ":93,"ರಡನ":86,"ಯುಕ":79,"ರಡು":80,"ಯುತ":201,"ಯುದ":79,"ಯುರ":59,"ಯುವ":150,"ರತದ":423,"ರಣೆ":97,"ರಣಿ":54,"ಲ್ ":636,"ರಣವ":59,"ರತ್":73,"ರತಿ":180,"ರತೀ":142,"ರದರ":55,"ರದಲ":281,"ರದೇ":250,"ಯೆಯ":95,"ಯೆಗ":53,"ರದಾ":69,"ರದಿ":71,"ರನ್":145,"ರಪಂ":68,"೨೦೦":138,"ರ್ಮ":390,"ರ್ಯ":406,"ರ್ವ":378,"ರ್ಶ":130,"ರ್ಷ":704,"ರ್ಸ":72,"ರ್ಟ":71,"ರ್ಡ":92,"ರ್ತ":167,"ರ್ಣ":195,"ರ್ದ":177,"ರ್ಥ":249,"ರ್ನ":341,"ರ್ಧ":61,"ರ್ಪ":57,"ರ್ಗ":212,"ರ್ಕ":216,"ರ್ಜ":96,"ರ್ಚ":113,"ವನ ":77,"ವದ ":73,"ಲಕ್":112,"ರೂಪ":165,"ರುವ":1148,"ರೀಯ":191,"ರೀತ":68,"ರೀಡ":60,"ರಿಸ":509,"ರಿವ":56,"ರೀಕ":90,"ರಿಲ":102,"ರಿಯ":766,"ರುದ":52,"ರುತ":402,"ರುಗ":103,"ರಿಂ":256,"ರಾಟ":71,"ರಾತ":76,"ರಾಣ":153,"ರಾನ":117,"ರಾದ":125,"ರಾಕ":64,"ರಾಗ":268,"ರಾಜ":696,"ರಾಚ":61,"ರಿಟ":72,"ರಿತ":118,"ರಿದ":175,"ರಿನ":273,"ರಾಯ":125,"ರಾಮ":296,"ರಾರ":74,"ರಾವ":128,"ರಿಕ":602,"ರಾಷ":292,"ರಿಗ":296,"ರಾಸ":60,"೧೯೭":59,"೧೯೬":77,"ಳೆ ":85,"೧೯೯":64,"೧೯೩":78,"೧೯೨":62,"೧೯೫":67,"೧೯೪":74,"ರಾಂ":137,"ರೋಗ":76,"ರೋಪ":59,"ರೆಗ":378,"ರೆದ":71,"ರೆಯ":331,"ವಿ ":195,"ವಾ ":561,"ವು ":807,"ಲವು":161,"ಲವಾ":103,"ಳಕೆ":83,"ಲಯದ":79,"ವೆ ":451,"ಲಾಯ":82,"ಲಿಗ":61,"ಲಿಕ":80,"ಲಾವ":62,"ಲಿನ":461,"ಲಿದ":161,"ಲಾಗ":564,"ಲಿಂ":75,"ಲಾದ":129,"ಳಗೊ":121,"ವೂ ":55,"ಶದ ":217,"ಲದಲ":69,"ವರ ":374,"ಲಧಾ":75,"ಲನಚ":87,"ಪ್ಟ":118,"ಪ್ರ":2498,"ಪ್ಯ":75,"ಪ್ಪ":318,"ಪೂರ":201,"ಪಿಸ":139,"೬ನೇ":54,"ಬಂಧ":106,"ಬಂದ":116,"ಪುಟ":63,"ಪುರ":302,"ಮದ ":92,"ಪಶ್":122,"ಪಿಯ":85,"ಪಾರ":115,"ಪಾಲ":72,"ಪಾತ":114,"ಪಾದ":107,"ಪರ್":164,"ಪಯೋ":105,"ಪರಮ":64,"ಪರಿ":273,"ಫ್ರ":149,"ಬರಿ":53,"ಬರು":198,"ಬರೆ":81,"ಬರ್":426,"ಬಳಕ":77,"ಮ್ ":312,"ಬಣ್":56,"ಬದಲ":69,"ಫೆಬ":56,"ಯದ ":342,"ಮೇ ":71,"ಯನ ":58,"ಮೆ ":97,"ಮಿ ":137,"ಬಗ್":95,"ರಾ ":66,"ರಿ ":689,"ಬ್ದ":64,"ಬ್ಬ":413,"ಬ್ಯ":92,"ಬ್ರ":245,"ಬೇಕ":58,"ಬೆಳ":196,"ಬೆಲ":56,"ಬೆಟ":70,"ಬೇರ":65,"ಬೆಂ":161,"ಬುದ":201,"ಮಂಡ":85,"ಮಂತ":82,"ಬಾಲ":61,"ಬಾರ":62,"ಯೇ ":65,"ಯೆ ":129,"ರದ ":417,"ಬಹು":249,"ರತ ":132,"ರಣ ":118,"ಯೂ ":79,"ಯು ":397,"ಯಿ ":133,"ಬಳಸ":168,"ಯಾ ":166,"ಬಳಿ":64,"ಬಲ್":56,"ಯಕರ":55,"ಯಕ್":233,"ಯಗಳ":256,"ಮರಾ":59,"ಮರ್":71,"ಮಧ್":119,"ಲಕ ":102,"ಮನ್":73,"ಮನೆ":57,"ರ್ ":1282,"ಭೂಮ":114,"ಮದಲ":87,"ಮತ್":1394,"ಭಾಷ":355,"ಭಾವ":93,"ಭಿನ":73,"ಭಾಗ":330,"ಭಾರ":739,"ಯಂತ":301,"ರೆ ":804,"ರು ":2581,"ಮಗಳ":92,"ಭವಾ":52,"ರೀ ":91,"ಮಕ್":73,"ರೂ ":136,"ಥೆಯ":123,"ದನೆ":56,"ದನ್":281,"ದರಿ":133,"ದರು":453,"ದರೆ":272,"ದರೂ":102,"ದರಲ":91,"ದರ್":155,"ದಲಾ":57,"ದಲು":62,"ದಲ್":2055,"ದವನ":56,"ದವು":75,"ದವರ":138,"ದಸ್":52,"ಪದ ":80,"ದಾಗ":201,"ದಿನ":874,"ದಿದ":212,"ದಿಸ":72,"ದಿರ":178,"ದಿಯ":160,"ದಾದ":115,"ದಿಂ":295,"ದಿಕ":52,"ದಿಗ":178,"ದಾರ":359,"ದಾಯ":84,"ದಾನ":52,"ದುರ":81,"ದುಕ":71,"ನಂತ":150,"ನಂದ":72,"ದೂರ":133,"ಧತಿ":57,"ದೇಶ":726,"ದೇವ":195,"ಧನೆ":64,"ದೊಡ":174,"ದ್ದ":1224,"ದ್ಧ":445,"ದ್ವ":154,"ದ್ಯ":488,"ದ್ರ":521,"ಪಿ ":59,"ಧರ್":147,"ನಕ್":67,"ನಗಳ":229,"ನಗರ":304,"ಪು ":78,"ನಚಿ":88,"ನಡೆ":173,"ನಡು":55,"ಧಿಸ":90,"ನಡದ":107,"ಧಿಯ":80,"ಧಾತ":90,"ಧಾನ":211,"ಧಾರ":211,"ಧಿಕ":334,"ನದಲ":85,"ಪ್ ":121,"ನನ್":63,"ನದಿ":214,"ನಪ್":62,"ಧ್ಯ":309,"ನಲ್":436,"ನವನ":120,"ನವರ":180,"ನವಾ":90,"ನವು":53,"ನವೆ":88,"ನುವ":99,"ನೀರ":61,"ನೀಡ":120,"ಪಂದ":55,"ನಿಸ":295,"ಪಂಚ":105,"ಫ್ ":93,"ನಿವ":80,"ನಿಲ":70,"ನಿಯ":276,"ನಿರ":415,"ನಿಗ":99,"ನಾದ":62,"ನಾಲ":99,"ನಿಕ":181,"ನಾಮ":59,"ನಾಯ":110,"ನಾರ":70,"ನಾಗ":147,"ನಾಥ":61,"ನಾಟ":338,"ನಿಂ":150,"ನಾಡ":89,"ನೆಗ":163,"ನೇಕ":77,"ನೆಲ":86,"ನೆಯ":329,"ಪಕರ":55,"ಬಿ ":56,"ನ್ಸ":171,"ನ್ಯ":464,"ನ್ಮ":57,"ನ್ನ":3456,"ಪಕ್":91,"ಪದವ":93,"ಪತ್":245,"ಪತಿ":53,"ಬ್ ":71,"ಪಡೆ":154,"ಪಡು":141,"ಪಡಿ":52,"ಪಟ್":240,"ಪನ್":57,"ಪನಿ":89,"ಪದ್":89,"ಡೆಸ":85,"ಡೆದ":156,"ಡೆಯ":184,"ಥವ ":102,"ಡ್ಡ":232,"ಡಿತ":54,"ಡಿನ":68,"ಡಿದ":293,"ಡಿರ":86,"ಡಿಯ":235,"ಡಿಸ":241,"ಡಿಕ":76,"ಡಿಗ":58,"ಡುತ":146,"ಡುವ":315,"ಡುಗ":71,"ತ್ ":173,"ಡೆಗ":53,"ಥೆ ":99,"ಣಗಳ":143,"ಣರಾ":65,"ತಂದ":116,"ತಂತ":159,"ತಂಡ":77,"ದಲ ":85,"ಣದಲ":85,"ದರ ":265,"ದೇ ":231,"ದೆ ":1948,"ಣಿಗ":82,"ಣಿಯ":81,"ಣಿಸ":84,"ಣಿತ":57,"ದಿ ":230,"ದೂ ":160,"ಣವಾ":87,"ತಗಳ":66,"ದು ":3600,"ತರಾ":116,"ತಮ್":117,"ತರದ":65,"ತಯಾ":73,"ಣ್ಯ":74,"ತಮಾ":89,"ಣ್ಣ":250,"ಣೆಯ":103,"ತದೆ":688,"ತನ್":101,"ದ್ ":83,"ತದಲ":113,"ಣೆಗ":103,"ದಂತ":81,"ತುಗ":83,"ತೀಯ":160,"ತೀರ":77,"ತುವ":100,"ನನ ":106,"ತಿಂ":268,"ತಾನ":117,"ತಾದ":59,"ತಾರ":223,"ತಾಯ":91,"ತಿಗ":244,"ತಿಕ":121,"ತಾಲ":214,"ತಿನ":126,"ತಿತ":52,"ತಿದ":136,"ತಿರ":166,"ತಿಯ":448,"ತಿಸ":85,"ತಿಹ":93,"ತಿಳ":56,"ನದ ":230,"ಧಿ ":63,"ತವೆ":173,"ನಡ ":231,"ತವಾ":246,"ನಿ ":257,"ದಕ್":378,"ನಾ ":82,"ತ್ವ":161,"ತ್ಸ":90,"ತ್ಮ":97,"ತ್ಯ":494,"ತ್ರ":1536,"ತ್ಪ":98,"ತ್ನ":61,"ತ್ತ":3794,"ತೆಯ":160,"ತೆಗ":110,"ಥೆಗ":52,"ನ್ ":1307,"ಥಾನ":142,"ಥಾಪ":134,"ಥಿತ":69,"ನೆ ":323,"ನೇ ":743,"ನೂ ":58,"ಥವಾ":557,"ದಗಳ":58,"ನು ":2464,"ಟಿ ":147,"೧೦ ":57,"ಟು ":213,"ಟೆ ":76,"ಜಿನ":61,"ಜಿಲ":446,"ಜಾಲ":52,"ಜಾತ":92,"ಜುಲ":91,"ಜೀವ":240,"ಟ್ ":637,"ಜೂನ":90,"ೆನ":244,"ೆದ":362,"ೆಬ":114,"ೆಪ":117,"ೆರ":365,"ೆಯ":2599,"ೆಮ":88,"ೇಖ":200,"ೆವ":58,"ೇಕ":249,"ೆಳ":254,"ೆಲ":491,"ೆಸ":577,"ೆಹ":62,"ೇಗ":75,"ೇಜ":68,"ೇಟ":120,"ೇದ":127,"ೇತ":222,"ೆಕ":157,"ೆಗ":1392,"ೆಚ":219,"ೆಟ":300,"ೆಡ":111,"ೇಂ":180,"ೈನ":129,"ೈವ":57,"ೈಲ":128,"ೈಸ":184,"ೊಂ":945,"ೇಮ":53,"ೇನ":121,"ೈಕ":103,"ೇಳ":166,"ೇಶ":901,"ೇವ":379,"ೇರ":613,"ೇಯ":97,"ೇಲ":240,"ೈಟ":55,"ೇಷ":160,"ೇಸ":99,"ೈದ":86,"ೂಪ":194,"ೂರ":1041,"ೂಮ":136,"ೂತ":53,"ೂನ":169,"ೂಲ":387,"ion":67,"ುಪ":145,"ುಬ":131,"ುಭ":63,"ುಮ":379,"ುರ":798,"ುಡ":140,"ುಣ":155,"ುತ":1697,"ುದ":1164,"ುನ":256,"ೂಚ":55,"ುಹ":87,"ೂಟ":104,"ೂಡ":144,"ುಲ":232,"ೂಕ":272,"ುಳ":152,"ುವ":2954,"ುಸ":149,"ುಷ":80,"ೆಂ":1198,"ೃತ":380,"ೃದ":63,"ೃಷ":194,"ಜ್ಯ":449,"ಜ್ಞ":280,"್ರ":7384,"್ಯ":5185,"್ಮ":1098,"್ಭ":55,"್ಳ":385,"್ಲ":6122,"್ಸ":715,"್ಷ":1648,"್ಶ":149,"್ವ":1581,"ೋತ":79,"ೋದ":91,"ೋನ":101,"ೋಧ":91,"ೋಪ":132,"ೋಜ":72,"ೋಟ":142,"ೋಡ":153,"ೊಲ":98,"ೊಳ":303,"ೋಕ":95,"ೋಗ":340,"ೊಸ":100,"ೊಬ":113,"ೊಮ":81,"ೊರ":184,"ೊತ":77,"ೊನ":118,"ೊದ":217,"ೊಡ":288,"ೊಟ":69,"ೊಗ":56,"್ಪ":943,"್ಬ":531,"್ಫ":53,"್ದ":1537,"್ಥ":1015,"್ನ":4079,"್ಧ":516,"್ಡ":394,"್ತ":5341,"್ಣ":606,"್ಞ":286,"್ಠ":60,"್ಟ":2345,"್ಚ":591,"್ಜ":168,"್ಕ":1695,"್ಗ":432,"ೌರ":85,"ೌಲ":59,"ೌತ":60,"ೋಹ":109,"ೋಷ":61,"ೋಸ":106,"ೋವ":84,"ೋಶ":58,"ೋಳ":74,"ೋಲ":103,"ೋರ":455,"ೋಮ":87,"ೋಬ":137,"೦೦ ":84,"ಜಗತ":55,"ಟದ ":65,"೧೯":659,"೧೮":176,"೧೭":128,"೧೬":85,"೧೫":91,"೧೪":72,"೧೨":74,"೧೧":70,"೪ನ":53,"೨೦":221,"೨೧":53,"೦೦":269,"೧೦":100,"೦ರ":61,"೦ದ":80,"ಜಧಾ":74,"ಜನಪ":57,"ಜನನ":108,"ಜನರ":79,"ಜನಸ":53,"ಜನವ":106,"ಜನಿ":197,"೯೯":86,"೯೭":74,"೯೮":65,"೯೫":81,"೯೬":94,"೯೩":91,"೯೪":81,"೯೧":62,"೯೨":81,"೯೦":57,"೯ರ":54,"೮ರ":60,"೫೦":53,"೬ರ":52,"೬ನ":59,"೫ರ":52,"೫ನ":55,"೩೦":80,"೨೪":61,"೨೫":72,"೨೨":62,"೨೩":65,"೨೯":62,"೨೬":59,"೨೭":59,"ಏಷ":56,"ಒಂ":1369,"ಒಬ":183,"ಒಟ":63,"ಒಳ":146,"ಕಂ":287,"ಚರಿ":67,"ಕಪ":63,"ಕನ":521,"ಕಥ":62,"ಕದ":288,"ಕಳ":92,"ಖಕ":55,"ಕಲ":280,"ಕರ":1275,"ಕಮ":65,"ಕಗ":118,"ಕಕ":54,"ಕತ":77,"ಕಣ":99,"ಕಡ":111,"ಖಂ":114,"ಕಟ":188,"ಕೆ":1476,"ಕೇ":322,"ಖನ":70,"ಕೈ":57,"ಕೊ":554,"ಕೋ":259,"ಕ್":4088,"ಖರ":77,"ಕವ":314,"he ":68,"ಕಾ":1989,"ಗಂ":72,"ಕೀ":159,"ಕಿ":814,"ಕೂ":238,"ಕು":604,"ಕೃ":380,"ಗನ":75,"ಖ್":433,"ಗಮ":88,"ಚಲನ":114,"ಗಲ":148,"ಗರ":505,"ಗವ":163,"ಗಳ":4635,"ಗಗ":111,"ಗಡ":160,"ಖಾ":80,"ಗದ":413,"ಗತ":118,"ಗಣ":242,"ಗೆ":1418,"ಗು":1317,"ಗೂ":445,"ಗಿ":3142,"ಗೀ":235,"ಘಟ":107,"ಗಾ":676,"ಗಸ":140,"ಗ್":1370,"ಗೌ":65,"ಗೋ":407,"ಗೊ":342,"ಚಂ":92,"ಚಕ":52,"ಚನ":130,"ಚದ":87,"ಚಿಮ":122,"ಚಿನ":103,"ಚಲ":161,"ಚಿಸ":104,"ಚರ":168,"ಚಾರ":154,"ಚೆ":127,"ಚಾ":329,"ಚಿ":1078,"ಚೀ":83,"ಚು":203,"ಜಕ":77,"ಜಗ":96,"ಚಿತ":435,"ಚ್":491,"ಚಿಕ":171,"ಜನ":825,"ಜಧ":76,"ಜಲ":84,"ಜರ":102,"ಜಯ":76,"ಜು":167,"ಜೀ":273,"ಜೂ":99,"ಜಿ":789,"ಜಾ":390,"ಜೋ":82,"ಜೊ":58,"ಜೆ":97,"ಜ್":917,"ಟಕ":392,"ಟಗ":141,"ಞಾ":215,"ಟಣ":83,"ಟದ":127,"ಟನ":129,"ಟರ":243,"ಟವ":113,"ಟಾ":225,"೨ ":199,"೧ ":173,"೪ ":159,"ಂಸ":383,"ಂಶ":181,"ಂವ":61,"ಂಯ":79,"ಂಬ":1236,"ಂಭ":148,"ಂಪ":432,"ಂದ":5006,"ಂಧ":211,"ಂತ":1631,"ಂಥ":91,"ಂಟ":300,"ಂಡ":1562,"ಂಚ":262,"ಂಜ":146,"ಂಗ":1536,"ಂಖ":130,"೩ ":171,"ಂಘ":78,"ಂಕ":337,"ಅಗ":63,"ಅಕ":181,"೬ ":165,"ಅತ":348,"ಅಡ":73,"ಆಂ":116,"ಚೀನ":76,"೫ ":171,"ಅಂ":414,"ಆಸ":93,"೮ ":158,"ಇದ":1048,"ಇತ":185,"ಆಫ":104,"ಆಧ":97,"ಆದ":131,"ಆವ":58,"ಆಳ":54,"ಆಲ":68,"ಆರ":228,"ಆಯ":65,"ಆಚ":60,"ಅವ":422,"ಆಗ":261,"೭ ":172,"ಅಸ":94,"ಆಟ":97,"ಇಂ":249,"ಆಡ":86,"ಅಪ":94,"ಅದ":275,"ಅಥ":647,"ಅನ":333,"ಅಧ":417,"ಅಲ":154,"ಆಕ":75,"ಅಮ":219,"ಅಭ":111,"ಅರ":281,"ಚ್ಚ":307,"ಉತ":320,"ಉದ":186,"ಇಲ":141,"ಇರ":180,"ಇವ":619,"ಇಪ":84,"ಇನ":88,"ಉಂ":56,"೯ ":167,"ಊರ":63,"ಉಪ":269,"ಟಕ ":183,"ಎಂ":888,"ಎಲ":159,"ಎರ":162,"ಎಸ":63,"ಏಪ":62,"ಎಎ":79,"ಎತ":68,"ಎನ":130,"ಲೂ":332,"ಳದ":98,"ಲೇ":336,"ಳನ":786,"ಲೆ":1225,"ಲೈ":134,"ಲೋ":199,"ಲೊ":116,"ಲ್":7197,"ಳಲ":778,"ಳಕ":140,"ಲವ":387,"ಳಗ":277,"ಲಸ":91,"ಲಾ":1226,"ಲಿ":5377,"ಲೀ":90,"ಲು":562,"ಳೆ":375,"ಳ್":432,"ಜಿ ":102,"ಳವ":134,"ಳಸ":184,"ಳಾ":158,"ಳು":1449,"ಳೂ":229,"ಳಿ":1334,"ವಂ":199,"ರೇ":335,"ರೆ":1812,"ರು":4505,"ರೀ":683,"ಲಂ":108,"ರೂ":345,"ರ್":5601,"ರೈ":117,"ರೋ":413,"ರೊ":96,"ರವ":930,"ರಶ":156,"ರರ":275,"ರಲ":578,"ರಳ":88,"ರಾ":3110,"ರಿ":4229,"ರಸ":502,"ರಹ":213,"ಲನ":192,"ಲಧ":79,"ಲದ":305,"ಲತ":56,"ಲರ":69,"ಲಯ":223,"ಲಗ":86,"ಲಕ":273,"ಷರ":95,"ಸಕ":90,"ಷವ":55,"ಸಗ":60,"ಷನ":59,"ಶೇ":96,"ಶೈ":56,"ಶೋ":71,"ಶ್":926,"ಶಿ":491,"ಶಾ":465,"ಶು":62,"ಶೀ":53,"ಸಂ":1411,"ಷತ":59,"ಷಣ":159,"ಷದ":539,"ಷಗ":72,"ಶಸ":149,"ಸಲ":766,"ಸರ":765,"ಹಗ":69,"ಹಕ":66,"ಸವ":133,"ಷೇ":140,"ಸನ":125," of":57,"ಸಮ":419,"ಷ್":1158,"ಸಬ":77,"ಸಣ":55,"ಹಂ":68,"ಷಿ":475,"ಷೆ":352,"ಸದ":188,"ಸತ":71,"ಷಾ":85,"ವಲ":178,"ವಳ":120,"ಶಕ":218,"ವಯ":77,"ವರ":2811,"ವದ":177,"ವಧ":55,"ವನ":1004,"ವತ":235,"ವಣ":147,"ವಜ":56,"ವಕ":121,"ವಗ":66,"ಷಕ":61,"ಶವ":121,"ಶರ":52,"ವ್":725,"ಶಬ":54,"ವೊ":56,"ವೇ":345,"ಶನ":143,"ವೈ":184,"ಶದ":301,"ವೆ":860,"ಶತ":100,"ವೃ":137,"ವೂ":60,"ವು":1384,"ವೀ":183,"ವಿ":2666,"ವಾ":3557,"ವಹ":172,"ವಸ":327,"ಶಗ":201,"ವವ":155,"ಸಾ":1235,"ಸಿ":2058,"ಸಸ":92,"ಸಹ":114,"ಸೆ":366,"ಹನ":99,"ಸೇ":360,"ಹದ":135,"igh":65,"ಸೂ":287,"ಹಣ":147,"ಹತ":133,"ಸೀ":80,"ಸು":1321,"ಹರ":198,"ಸ್":4080,"ಸೌ":55,"ಸೋ":90,"ಸೈ":69,"ಹಸ":55,"ಹವ":83,"ಹಳ":189,"ಹಲ":179,"ಹಾ":1242,"ಹಿ":907,"ಹೀ":57,"ಹೆ":605,"ಹೇ":123,"ಹು":431,"ಹೂ":52,"ಹ್":175,"ಹೊ":532,"ಹೋ":187,"ಿಪ":236,"ಿನ":2977,"ಿಧ":213,"ಿಮ":515,"ಿಭ":159,"ಿಬ":92,"ಿಡ":230,"ಿಟ":228,"ಿದ":3371,"ಿತ":1841,"ಿಣ":314,"ಿಹ":188,"ಿಷ":392,"ಿಸ":2818,"ೀಟ":93,"ಿಲ":986,"ಿಯ":3289,"ಿರ":1992,"ಿವ":655,"ಿಶ":439,"ಿಳ":170,"ೀಕ":215,"ೀನ":225,"ೀಪ":138,"ೀಯ":562,"ೀಮ":67,"ೀಡ":224,"ುಂ":466,"ೀತ":316,"ೀಸ":69,"ುಚ":64,"ುಟ":277,"ೀರ":351,"ೀಲ":114,"ುಕ":335,"ೀವ":282,"ುಖ":454,"ುಗ":730,"ಾಂ":1000,"ಾಜ":887,"ಾಚ":176,"ಾಖ":102,"ಾಗ":4356,"ಾಕ":469,"ಾಪ":496,"ಾಭ":74,"ಾಬ":132,"ಾಧ":258,"ಾದ":1706,"ಾನ":1975,"ಾಣ":559,"ಾಥ":81,"ಾತ":838,"ಾಟ":601,"ಾಡ":732,"ಿಂ":1911,"ಿಜ":269,"ಾಶ":207,"ಿಗ":1656,"ಾಷ":669,"ಾಸ":1090,"ಿಚ":104,"ಾಹ":451,"ಾಲ":1507,"ಾಳ":273,"ಿಕ":2705,"ಿಖ":52,"ಾವ":819,"ಾಮ":995,"ಾಯ":1047,"ಾರ":4215,"ತಗ":107,"ಣವ":210,"ತಕ":168,"ಣು":184,"ಣಿ":524,"ಣಾ":151,"ಣನ":52,"ಣದ":261,"ಣರ":86,"ಣಗ":184,"ಣಕ":131,"ತಂ":382,"ಡೆ":640,"ಡೇ":56,"ಡು":959,"ಡ್":778,"ಡಳ":71,"ಡಲ":204,"ಡವ":88,"ಡಿ":1466,"ಡಾ":222,"ಡನ":199,"ಡದ":306,"ಜ್ ":103,"ಡರ":323,"ಡಕ":55,"ಡಗ":107,"ಟಿ":799,"ಟೀ":72,"ಟು":403,"ಟೆ":354,"ಟೇ":63,"ಟೋ":167,"ಟ್":2145,"ನದ":576,"ನತ":67,"ಧಿ":651,"ನಡ":623,"ಧಾ":588,"ನಟ":88,"ನಚ":96,"ನಗ":604,"ಧವ":68,"ನಕ":147,"ಧರ":218,"ದ್":2979,"ದೊ":278,"ಧನ":157,"ದೇ":1188,"ದೆ":2081,"ದೂ":324,"ಧತ":62,"ದಾ":1139,"ದಿ":2445,"ದೀ":64,"ನಂ":266,"ದು":3947,"ದಸ":55,"ದಲ":2370,"ದವ":372,"ಥ್":61,"ದರ":1541,"ಥೆ":287,"ದದ":103,"ದನ":456,"ಥಿ":158,"ಥಾ":314,"ಥವ":694,"ದಗ":126,"ತ್":6565,"ಥಮ":57,"ದಕ":447,"ಥಳ":111,"ತೇ":59,"ತೆ":663,"ತೋ":73,"ತೊ":91,"ತಾ":1049,"ತಿ":2648,"ತೀ":321,"ದಂ":194,"ತು":2278,"ತವ":560,"ತಶ":56,"ಥಗ":54,"ht ":63,"ತಹ":98,"ತಯ":76,"ಣ್":346,"ತಮ":315,"ತರ":927,"ತಲ":96,"ತತ":62,"ಣೆ":345,"ತದ":1260,"ತನ":354,"ತಪ":55,"ಪೋ":69,"ಪ್":3265,"ಬಗ":149,"ಫಿ":80,"ಬಣ":56,"ಫೆ":85,"ಬದ":151,"ಪರ":638,"ಪಯ":118,"ಪಶ":129,"ಪವ":80,"ಪು":628,"ಬಂ":329,"ಪೀ":61,"ಪಿ":494,"ಪಾ":718," ri":59,"ಪೇ":72,"ಪೆ":86,"ಪೂ":243,"ನೈ":115,"ನೊ":113,"ನೋ":140,"ನ್":5580,"ಪಕ":225,"ಪಗ":72," px":81,"ಪಟ":267,"ಪಡ":351,"ಪತ":304,"ಪದ":446,"ಪನ":246,"ನಪ":122,"hum":105,"ನನ":201,"ಧ್":402,"ನಮ":102,"ನಲ":470,"ನರ":234,"ನವ":655,"ನಸ":130,"ನಿ":2138,"ನಾ":1363,"ನೂ":140,"ನು":2795,"ನೀ":276,"ಪಂ":242,"ನೇ":1015,"ನೆ":1144,"ರಜ":113,"ಯಾ":2655,"ಯಸ":104,"ರಚ":194,"ರಕ":548,"ರಗ":490,"ಯಶ":64,"ಯವ":894,"ಯರ":301,"ಯಯ":55,"ಯಲ":992,"ರಬ":144,"ಯೋ":318,"ರಭ":83,"ಯ್":184,"ರಮ":614,"ರಯ":73,"ಯೇ":137,"ರನ":308,"ರಪ":191,"ಯೊ":109,"ರತ":1050,"ರದ":1229,"ರಧ":52,"ಯೆ":362,"ಯಿ":713,"ರಡ":219,"ಯು":1152,"ಯೂ":247,"ರಣ":519,"ಮಹ":299,"ಮಸ":60,"ಯಗ":270,"ಮವ":111,"ಯಕ":439,"ಮಲ":102,"ಮರ":288,"ಮಯ":65,"ಭ್":65,"ಯಮ":248,"ಮ್":861,"ಮೊ":334,"ಮೋ":102,"ಮೈ":166,"ಮೆ":345,"ಯನ":1088,"ಮೇ":483,"ಯದ":586,"ಮೂ":556,"ಯತ":120,"ರಂ":561,"ಮೀ":223,"ಮು":926,"ಮಾ":2202,"ಮಿ":718,"ಮಗ":156,"ಭವ":125,"ಬ್":957,"ಮಕ":218,"ಮದ":290,"ಮಧ":132,"ಮನ":323,"ಭೌ":76,"ಮಟ":57,"ಭಾ":1580,"ಭಿ":188,"ಯಂ":366,"ಮಣ":82,"ಭೂ":208,"ಮತ":1458,"ಬಹ":299,"ಬಸ":64,"ಫ್":310,"ಫೋ":52,"ಬಳ":371,"ಬಲ":150,"ಬರ":893,"ಬೆ":594,"ಬೇ":205,"ಭದ":68,"ಬೈ":80,"ಚನೆ":74," th":169,"ಬಾ":388,"ಬಿ":415,"ಬೀ":91,"ಮಂ":262,"ಬು":293,"ತರ ":414,"ೂ ":1096,"ಡದಲ":57,"ಡನೆ":57,"ಡನೇ":52,"ಾ ":1624,"ಡನ್":62,"ು ":14575,"ೀ ":229,"ಿ ":8907,"ತನ ":68,"ಹ ":251,"ಶ ":222,"ವ ":2270,"ಸ ":186,"ಷ ":133,"ತು ":1867,"ಲ ":511,"ಳ ":1320,"ತೆ ":302,"er ":60,"ಮ ":601,"ಯ ":3177,"ರ ":2974,"ತಹ ":86,"ಪ ":166,"ಡರ್":274,"ಬ ":384,"ಡಲಾ":92,"ಥ ":133,"ದ ":5745,"ತಿ ":665,"ಧ ":252,"ಡಳಿ":71,"ತಾ ":62,"ನ ":3185,"೦ ":330,"ಟೆಂ":84,"ght":64,"ಣದ ":121,"ಟುಂ":86,"ಟಿಕ":121,"ಟಿದ":82,"ಟಿನ":67,"ಟಿಯ":94,"ಟಿಸ":83,"ಣು ":78,"ಣಿ ":100,"ಣೆ ":124,"ತದ ":400,"ಟ್ರ":464,"ಟ್ಟ":861,"್ ":7021,"ಟೋಬ":94,"ೊ ":102,"ೋ ":134,"ೈ ":170,"ೆ ":6915,"ೇ ":1293,"ಟನೆ":65,"ಡ್ ":396,"ಃ ":52,"ಂ ":134,"ಞಾನ":208,"ಡೆ ":91,"ಟವಾ":66,"ಟರ್":142,"ಟ ":308,"ಜ ":97,"ತ ":1468,"ಣ ":786,"ಠ ":61,"ಡ ":654,"ಗ ":398,"ಖ ":205,"ಡದ ":222,"ಕ ":1763,"ಡು ":260,"ಟಗಾ":53,"ಟಗಳ":71,"ಡಿ ":212,"ಡಾ ":70,"ಟಕದ":131,"ಆ ":74,"ಈ ":714,"ಕಗಳ":105,"ಗು ":199,"ಗೂ ":397,"ಗಿ ":1320,"ಗೆ ":1195,"ಖಂಡ":114,"ಕಟ್":111,"ಗ್ ":190,"ಕತೆ":52,"rig":61,"ಗರ ":186,"ಗಳ ":1047,"ಗದ ":146,"ಕಂಡ":123,"ಕಂಪ":101,"ಕ್ ":357,"ಕು ":163,"ಕಿ ":188,"ಒಳಗ":120,"ಕಾ ":103,"ಕೆ ":697,"ಕರ ":75,"ಒಟ್":62,"px ":80,"ಒಬ್":183,"ಕದ ":188,"ಒಂದ":1351,"ಚಂದ":71,"ಗ್ಗ":161,"ಗ್ಲ":159,"ಗ್ಯ":52,"ಗ್ರ":702,"ಗೊಳ":102,"ಗೋರ":246,"ಗೋಳ":55,"ಚ್ ":122,"ಗೆಯ":68,"of ":53,"ಗೊಂ":193,"ಗಾಗ":97,"ಗಾಂ":64,"ಗಿನ":109,"ಗಿರ":313,"ಗಿಸ":155,"ಗೀತ":186,"ಗುಂ":62,"ಗಾರ":173,"ಗಾಲ":60,"ಗಿತ":64,"ಗಿದ":943,"ಗುರ":169,"ಗುವ":283,"ಗುಣ":80,"ಗುತ":373,"ಗಳಲ":710,"ಗಳನ":727,"ಗಳೂ":224,"ಗಳು":1084,"ಗಳಾ":97,"ಗಳಿ":589,"ಚು ":103,"ಗವಾ":67,"ಗಸ್":126,"on ":65,"ಖ್ಯ":409,"ಗರದ":116,"ಗದಲ":154,"ಗತ್":77,"ಗಣಿ":86,"ಗಡಿ":73,"ಗಗಳ":101,"ಕ್ಷ":939,"ಕ್ರ":668,"ಕ್ಯ":360,"ಕ್ತ":356,"ಕ್ಟ":186,"ಕ್ಸ":118,"ಕೋಟ":60,"ಕ್ಕ":985,"ಕೊಂ":167,"ಕೊಳ":121,"ಕೊಡ":66,"ಕೆಲ":163,"ಕೃಷ":124,"mb ":102,"ಕೃತ":250,"ಕೆಗ":100,"ಕೆಟ":113,"ಕೇಂ":134,"ಕೆಯ":206,"ಕಿಸ":52,"ಕಿರ":95,"ಕಾಸ":55,"ಕಾವ":68,"ಕಾಶ":87,"ಕಾಲ":240,"ಕಾಯ":81,"ಕಾರ":767,"ಕಿನ":176,"ಕಿತ":62,"ಕುರ":58,"ಕೂಟ":53,"ಕೂಡ":86,"ಕೀಯ":82,"ಕುಟ":89,"ಕಾಣ":65,"ಕಿಂ":74,"ಕಾನ":57,"ಕಾದ":114,"ಕಾಗ":98,"ಕಾಂ":80,"ಕಲಾ":56,"ಕಲ್":114,"ಕವಾ":159,"ಕವಿ":72,"ಕರಣ":88,"ಕರೆ":221,"ಕರಿ":97,"ಕರು":101,"ಕರಾ":121,"ಕರ್":467,"ಕದಲ":61,"ಕನ್":423,"ಉತ್":319,"ಉದ್":135,"ಉಪಯ":103,"ಎಸ್":57,"ಏಪ್":62,"್ಥೆ":228,"್ಥಾ":284,"್ಥಿ":119,"್ದವ":56,"್ದರ":183,"್ದೇ":157,"್ದು":363,"್ಧತ":61,"್ದಾ":350,"್ದಿ":75,"ಎಂಬ":368,"ಎಂದ":448,"್ಣು":59,"್ಣಿ":64,"್ತದ":721,"್ತರ":338,"್ತವ":225,"್ತನ":109,"್ತಿ":826,"್ತು":1699,"್ತೆ":63,"್ತಾ":369,"್ಥಳ":111,"್ತ್":311,"್ಪತ":108,"್ಪಡ":151,"್ಪಟ":116,"್ಪನ":87,"್ಪಾ":63,"್ಪಿ":65,"್ನಡ":386,"್ಧಿ":63,"್ಧಾ":55,"್ನಾ":402,"್ನಿ":122,"್ನೂ":64,"್ನು":2402,"್ನೆ":94,"್ನೊ":55,"್ಮಾ":125,"್ಮಿ":138,"್ಯಗ":175,"್ಯಕ":269,"್ಮದ":87,"್ಮನ":56,"್ಯಂ":192,"್ಯು":176,"್ಯಾ":1532,"್ರಜ":97,"್ರಗ":251,"್ಯವ":564,"್ರಕ":297,"್ಯರ":131,"್ಯಮ":116,"್ಯಯ":54,"್ಯನ":119,"್ಯದ":366,"್ಮೆ":64,"್ಯತ":58,"್ಬರ":145,"್ಮಕ":73,"್ಲೂ":97,"್ಲಾ":202,"್ಲಿ":4561,"್ಲದ":116,"್ಳು":109,"್ಳಿ":142,"್ಲೊ":95,"್ಲೇ":124,"್ಲೆ":484,"್ಲ್":60,"್ರವ":349,"್ರಶ":142,"್ರಸ":283,"್ರಹ":151,"್ರಾ":846,"್ರಿ":942,"್ರೀ":518,"್ರು":70,"್ರತ":249,"್ಯೂ":155,"್ರಣ":55,"್ರದ":625,"್ಯೆ":160,"್ರಪ":133,"್ರಮ":468,"್ರಭ":71,"್ರರ":86,"್ರಯ":66,"್ರೆ":357,"್ರೈ":58,"್ರೇ":163,"್ರ್":78,"್ರೋ":120,"್ಕ್":92,"್ಕರ":72,"್ಕೃ":119,"್ಕೂ":68,"್ಕು":73,"್ಕೆ":541,"್ಕಿ":142,"್ಕಾ":226,"್ಚಾ":65,"್ಚಿ":204,"್ಚು":144,"್ಗೆ":104,"್ಟರ":54,"್ಟಣ":72,"್ಟದ":57,"umb":108,"್ಞಾ":215,"್ಚ್":78,"್ತಕ":66,"್ಣವ":52,"್ಡ್":103,"ಎಎಎ":72,"್ಟೆ":135,"್ಟೋ":119,"್ಟ್":671,"್ಟವ":59,"್ಟಿ":399,"್ಟಾ":78,"್ಟು":217,"ಎತ್":68,"್ವವ":82,"್ವರ":147,"್ವಿ":108,"್ವಾ":248,"್ವಹ":80,"್ಶನ":54,"್ವೀ":99,"್ವದ":124,"್ವತ":116,"್ಷರ":65,"ಎಲ್":119,"್ಷಿ":380,"್ಷೇ":121,"್ವ್":52,"್ಷಗ":60,"್ಷಣ":115,"್ಷದ":530,"್ಸ್":339,"್ಷ್":53,"್ಸೆ":62,"್ಸಿ":119,"ಎನ್":124,"thu":103,"ಎರಡ":156,"the":64," ಆ ":74," ಈ ":713,"ೋಗಿ":116,"ೊಳಿ":52,"ೊಳ್":156,"ಂಸ್":307,"ೊಮ್":67,"ಂತೆ":162,"ಂತ್":364,"ಂತರ":298,"ೋಬರ":94,"ಂತಹ":96,"ಂತಿ":112,"ಂತಾ":62,"ಂದೆ":162,"ಂದೇ":82,"ಂದ್":319,"ಂಧಿ":104,"ಂದರ":313,"ಂದಿ":541,"ಂದಾ":143,"ಂದು":2122,"ಂದೂ":183,"ಂಡದ":109,"ಂಡರ":290,"ಂಟ್":69,"ಂಡ್":203,"ಂಡಿ":293,"ಂಡು":195,"ೋತ್":57,"ಂಭವ":62,"ಂಬು":192,"ಂಬಿ":59,"ಂಬಾ":53,"ಂಬರ":342,"ಂಯು":54,"ಂಪ್":93,"ಂಬಂ":87,"ಂಪು":57,"ಂಪಿ":53,"ೋಡಿ":64,"ಂಪನ":65,"ಅಂದ":78,"ಅಂತ":160,"ೋರಾ":52,"ೋರಿ":296,"ೋರ್":74,"ೈಸೂ":88,"ೈಸ್":52,"್ಕ ":65,"್ಗ ":60,"ೊಂಡ":342,"ೊಂದ":551,"್ಣ ":181,"್ಡ ":146,"ಂಕ್":62,"್ದ ":187,"ಂಗಳ":447,"ಂಖ್":129,"ಂಗದ":91,"ಂಗಡ":60,"್ಧ ":174,"್ತ ":281,"ಂಕರ":53,"್ಥ ":67,"ಂಚದ":64,"ಂಗೀ":161,"ಂಗಾ":120,"ಂಗ್":337,"್ಟ ":206,"್ಲ ":152,"ೊದಲ":197,"ೊಬ್":70,"್ಷ ":91,"್ವ ":146,"್ಪ ":91,"ೊಟ್":56,"್ನ ":174,"್ರ ":555,"್ಯ ":846,"್ಮ ":242,"್ಬ ":139,"ೊಡ್":169,"ಇದು":518,"ಇದೆ":55,"ೆಪ್":108,"ಇದರ":166,"ಇದನ":181,"ೆಬ್":91,"ಇನ್":84,"ೆದು":87,"ೆದಿ":70,"ೆನ್":134,"ಇತಿ":65,"ಇತರ":85,"ೆಲ್":157,"ೆಳೆ":86,"ೆಳಗ":64,"ೆಲವ":94,"ೆಲೆ":75,"ೇಖಕ":54,"ೇಖನ":56,"ೆಯಲ":526,"ೆಯು":360,"ೆಯಾ":224,"ೆಯಿ":119,"ೆಯನ":236,"ಆಸ್":78,"�":222,"ೆರೆ":114,"ೆರಿ":98,"ೆಸರ":280,"ಆರ್":99,"ೆಸ್":116,"ೆಸಿ":79,"ಆಫ್":99,"ೇತ್":125,"ೇಶವ":74,"ೇಶ್":84,"ೇಶಿ":67,"ೇಶಗ":141,"ೇಶಕ":79,"ಉಂಟ":54,"ೇವೆ":92,"ೇಶದ":265,"ೇವಾ":53,"ೇವಿ":64,"ೇಲಿ":68,"ೇಲೆ":114,"ೇರಿ":374,"ೇರೆ":53,"ೇರು":61,"ಇವರ":508,"ಇಲ್":125,"ಇರು":142,"ಇಪ್":83,"ಅತಿ":137,"ಅತ್":191,"ಅಥವ":638,"ೇಶ ":126,"ಅಕ್":155,"ೆಂಗ":155,"ೆಂಬ":309,"ೆಂಟ":57,"ೆಂಡ":313,"ೆಂದ":240,"ಇಂಡ":64,"ಇಂದ":55,"ಇಂಗ":69,"ಆದರ":63,"ಅಸ್":52,"ಆಗಸ":106,"ಆಗಿ":88,"ೆಗಳ":621,"ೆಗಾ":62,"ೆಗೆ":326,"ೆಗೊ":53,"ೆಗೋ":241,"ಅರ್":136,"ಅಮೇ":112,"ಅಮೆ":58,"ೆಕ್":125,"ಅವರ":214,"ಅವು":57,"ಅಲ್":103,"ೆಚ್":218,"ೆಟ್":262,"ಅನು":103,"ಅಧ್":96,"ಅನೇ":75,"ಅನ್":71,"ೇಂದ":169,"ಅದರ":105,"ಅಧಿ":313,"ಅದು":55,"ಅಭಿ":89},"n_words":[514512,567770,395860],"name":"kn"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ko b/nlp_resource_data/langdetect/profiles/ko

new file mode 100755 (executable)

index 0000000..1f30ef9
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ko
@@ -0,0 +1 @@
+{"freq":{"정리이":69,"바뀌었":102,"²":131,"·":6405,"×":65,"í":141,"é":560,"è":77,"ç":72,"젤레스":103,"ä":74,"á":169,"ü":183,"ö":136,"ó":126,"참여했":93,"ā":193,"ī":75,"참여하":278,"방글라":64,"ˈ":80,"́":127,"μ":101,"ν":231,"ο":310,"ι":218,"κ":112,"λ":169,"δ":75,"ε":146,"η":94,"α":368,"β":61,"γ":80,"ά":91,"ί":112,"ω":66,"ό":88,"σ":105,"ς":279,"ρ":207,"개발 ":675,"π":78,"υ":78,"τ":170,"ь":58,"я":64,"ы":64,"ч":103,"р":321,"с":201,"т":190,"у":110,"К":57,"С":60,"л":238,"к":231,"й":118,"и":464,"о":480,"н":337,"м":118,"г":82,"в":283,"а":488,"е":318,"д":121," ·":941,"장이었":82,"람이 ":326,"و":102,"ي":163,"ل":186,"م":122,"ن":119,"د":84,"ب":85,"ا":266,"س":58,"ر":100,"람의 ":253,"사상가":98,"산에 ":201,"았다 ":914,"미리 ":99," К":57,"포트 ":65,"놓고 ":101," С":60,"사와 ":395,"산업 ":379,"미를 ":234," ا":105,"람은 ":91,"래에 ":110,"람을 ":297,"물질을":114,"로그 ":136,"물질이":123,"물질의":91,"미르 ":149,"농구 ":70,"생명과":67,"장인물":96,"정리로":90,"장으로":586,"나에서":75,"야기 ":98,"전문적":89,"전송 ":81,"크기가":57,"이팅 ":102,"아들 ":170,"설로 ":135,"나오는":326,"인트 ":75,"· ":502,"의회 ":231,"장이다":475,"랑을 ":65,"사원 ":75,"라질 ":121,"롭게 ":172,"방검찰":64,"사용 ":140,"렸다 ":419,"생산 ":224,"안된 ":100,"é ":165,"란시스":115,"키고 ":206,"서대문":75,"가로서":123,"ể":108,"∼":121,"사이 ":125,"정서 ":66,"찰청 ":197,"사인 ":233,"사일 ":104,"프가니":112,"날에는":70,"따른 ":260,"바라 ":72," ∼":97,"사의 ":834,"가리킨":657,"가리키":646,"서만 ":126,"가마쿠":67,"종교적":110,"딸로 ":77,"사상의":73,"사상이":110,"사상을":91,"표에 ":65,"차이를":67,"야구 ":394,"러드는":78,"생명체":80,"서를 ":347,"성남시":83,"거는 ":139,"코프 ":86,"소가 ":256,"ああ ":309,"밀도는":453,"련된 ":456," 《":4309," 》":621," 〈":549," 『":98," 「":147,"㎞":62,"사적 ":294,"㎢":2033,"㎡":66,"초대 ":388,"가리아":156,"생물학":287,"키가 ":87,"애나 ":67,"ㆍ":388,"あ":1627,"》":5027,"《":5040,"〉":601,"〈":607,"』":119,"『":118,"」":186,"「":184,"가르침":80,"사전 ":487,"ア":1597," ㎞":62," ㎢":2029,"산의 ":346,"알고리":206,"장치 ":174,"아동 ":67,"산이 ":146,"전시 ":66,"크다 ":72," あ":325,"정상 ":60," ア":198,"산은 ":120,"않다 ":121,"백과사":636,"전반에":79,"드시 ":89,"사소송":106,"산을 ":359,"제르바":102,"크는 ":102,"전반적":63,"로구 ":380,"생물의":77,"사자 ":61,"처리하":131,"론과 ":129,"배경으":124,"사장 ":72,"세네갈":68,"남쪽 ":131,"않는 ":497,"바로 ":210,"민들의":77,"이토 ":77,"성된 ":636,"랑스에":91,"랑스어":228,"랑스와":71,"민들이":87,"나이며":210,"남아시":146,"들어 ":501,"악단 ":108,"》에서":140,"내지 ":84,"乱":136,"乩":112,"乞":60,"乙":3650,"乘":609,"乎":59,"么":100,"之":14530,"丹":2134,"丼":110,"临":105,"丫":150,"中":433,"両":683,"並":12874,"丙":894,"丘":5494,"丛":58,"丟":765,"丞":224,"丑":412,"专":172,"丕":293,"且":380,"丈":1178,"三":20984,"성동 ":77,"세대 ":267,"一":284,"丁":10402,"万":311,"仕":271,"仗":58,"亳":204,"人":513,"亥":108,"亨":114,"프는 ":76,"亞":4771,"亟":76,"亀":254,"亂":4287,"于":245,"성공회":174,"佇":187,"佛":194,"佐":64,"미디어":463,"조건을":75,"俘":194,"侶":558,"侃":89,"侏":196,"세계의":336,"자이자":420,"세계적":310,"자이언":74,"세계에":387,"나이다":1203,"남아메":109," 冲":247,"럭비 ":120,"이터 ":437," 兌":119,"아닌 ":431,"년대부":66,"것과 ":178," 僅":104,"咀":213," 傭":293,"개를 ":145,"吾":169,"圓":737,"國":1227,"在":61,"차원의":57,"国":85,"차이다":148,"조는 ":153,"冲":466," 丙":282," 丘":1641," 丟":262," 丞":90," 丑":173," 丕":104," 丈":484," 三":5820," 一":101," 丁":2901," 万":65,"남아프":161,"작용하":94,"兌":396,"傭":628,"傀":77,"무총리":119,"약간 ":80,"僅":349,"倣":128,"倉":3517,"倖":139,"드와 ":234," 侶":151,"차이나":64," 俘":77,"厥":67," 倉":1228," 佛":82," 佇":65,"정부 ":492,"라에서":173,"자유주":102," 侏":88," 人":94," 亳":67," 仕":88," 両":199," 並":3333," 中":269," 丹":1249," 之":4377," 乘":159," 乙":1383,"자전거":100," 亂":1372," 亀":76," 于":69," 亞":903,"유하는":80,"로나 ":78,"감독하":72," 大":668,"드에 ":180,"尙":86,"彦":59,"차이가":126,"자이며":181,"年":139,"랑스의":605,"남아있":64,"大":1261,"세기경":68," 吾":77," 咀":127,"쟁이 ":102,"세계화":76,"작이다":110,"유하고":203,"때로 ":69,"아는 ":196," 圓":305," 國":341,"발로 ":61,"세기부":84,"月":80,"인천광":247,"이트 ":656,"반대로":62,"이틀 ":64,"디렉터":60,"日":322,"敎":382,"拼":71,"바르 ":85,"바른 ":114,"셔널 ":194,"너먼트":71,"드어 ":66,"체되었":67,"표적 ":60,"센고쿠":141,"전문대":61,"바니아":130,"렸던 ":58,"년대에":224,"제로는":86,"거나 ":1548,"淸":143,"포함 ":105,"받는다":110,"나이티":86,"편하여":83,"나이트":65,"재연구":60,"상스 ":59,"나치 ":142," 敎":71," 日":116,"있지만":276,"사에 ":412,"ος":126,"眞":108,"的":166,"개되었":111,"사업 ":385,"노동 ":64,"키기 ":205,"상북도":406,"ς ":268,"전문가":188,"랑수아":66,"물체의":76,"서로 ":648,"α ":114,"爲":79,"의할 ":57," 淸":90,"전사 ":94,"임진왜":116,"의해 ":1904,"남으로":73,"아노 ":115,"크기의":78,"세기에":313,"의학 ":91,"의한 ":415,"장에서":426,"세기의":90,"유형문":67,"창시자":86,"밴드 ":173,"래의 ":272,"장애인":188,"자치 ":104," 眞":65,"자적인":76,"제부 ":75,"硏":108,"인텔 ":106,"의하고":60,"자치도":165,"자치단":144,"응하여":78,"가문 ":83,"ск":57,"람에게":86,"입한 ":113,"정부의":282,"개념이":228,"개념을":110,"개념으":199,"때는 ":184,"저우 ":125,"라이다":188,"라이더":86,"산식품":614,"표준 ":235,"라이나":257,"녀는 ":98," · ":456,"갖는다":62,"나의 ":717,"а ":76,"절에 ":78,"라이브":286,"й ":90,"의하는":116,"н ":64,"성되었":222,"성되어":523,"내부에":78,"в ":66,"내부의":59,"크로 ":125,"라이버":78,"상적 ":74,"어가 ":375,"남아 ":134,"가들의":84,"아를 ":247,"가들이":99,"ч ":81,"라이며":60,"아르 ":58,"ко":62,"ич":83,"세를 ":145,"ор":58,"ов":121,"но":66,"악단이":78,"ви":93,"ан":66,"첫번째":86,"반대하":127,"정부를":59,"색의 ":117,"재지이":107,"적에 ":98,"驿":62,"라이언":123,"라이온":80,"鬪":67," 靑":75,"라이스":87,"민법 ":58,"색을 ":142,"나인 ":170,"색은 ":70,"성모 ":93,"나이 ":96,"라이선":82,"아니라":536,"로는 ":3890,"정보통":139,"처에 ":89,"간되었":75,"전에 ":1190,"드리아":129,"청북도":246,"아니아":146,"표시하":91,"이터베":227,"최근 ":95,"년대 ":557,"이터를":186,"서부 ":369,"아니스":70,"鎭":93,"않는다":348,"송국 ":85,"초로 ":510,"인터내":76,"정보화":76,"인터넷":668,"난을 ":59,"정부에":116,"정부와":65,"임한 ":86,"靑":123,"자치구":154,"인터페":232,"감독을":67,"아누스":72,"감독의":83,"감독이":123,"타고 ":84,"아로 ":63,"적인 ":4808,"적이 ":256,"총괄하":84,"꾼":109,"꾸":368," 갈":698," 각":2309," 가":18345," 간":2103," 같":2403," 개":12530," 객":258," 값":267," 감":1783," 갑":208," 갖":887," 강":4617,"꿈":119,"꿀":77,"꼬":171,"꼭":124,"꼴":114,"꼽":179,"저장 ":80,"꽃":296,"꺼":75,"께":1579,"껍":85,"긴":797,"기":57951,"길":1653,"글":2723,"긍":81,"긋":58,"금":4364,"급":4129,"깥":84,"깨":247,"깔":123,"깝":68,"깃":99,"김":2072,"깊":200,"까":5090,"권":5693,"궁":939,"궐":79,"궤":302,"귀":1039,"전은 ":325,"규":2589,"균":820,"근":3205,"그":19235,"극":2043,"관":22357,"괄":288,"광":6130,"괴":686,"이트로":57,"교":18288,"구":31163,"국":45098,"굴":660,"굳":64,"군":9174,"굿":62,"굽":77,"겐":407,"겔":142,"겉":64,"게":11654,"것":8442,"검":1618,"겸":469,"겹":91,"겼":193,"경":17015,"결":4818,"격":2956,"겨":1106,"겪":88,"견":1448,"겠":90,"물학 ":86,"계":15438,"과":26837,"곽":179,"공":20325,"곰":87,"곱":284,"곳":1654,"골":1102,"곤":459,"곧":180,"곡":2975,"고":47107,"값":459,"갑":443,"감":2792,"갖":908,"강":6713,"갔":159,"같":2537,"객":963,"개":15756,"가":55413,"각":5239,"간":9024,"갈":1359,"갱":78,"갤":95,"걀":59,"건":6543,"거":8987,"걸":1105,"아니다":144,"라운드":96,"날의 ":126,"점에 ":197,"전을 ":820,"버그 ":126,"이트를":58,"반드시":88,"드리드":67,"램을 ":192,"이티드":93,"전의 ":495,"발되었":131,"작한 ":541,"라오스":74,"져서 ":63,"널리 ":525,"사연구":82,"전이 ":205,"전인 ":79,"의회는":86," 뛰":288,"콘텐츠":192,"몇":457,"명":15332,"몰":624,"몽":572,"몸":435,"못":710,"몬":786,"모":11378,"목":6926,"멘":418,"멜":262,"메":5400,"멕":373,"멍":84,"멀":284,"멸":484," 뚜":58,"며":20876,"면":9744,"멤":213,"먼":1045,"머":1633,"먹":424,"므":521," 러":1462," 럭":156," 런":332," 레":2242,"산업기":64," 렌":152,"발달하":67,"믹":261,"미":17448,"믿":187,"민":16451,"흑해 ":60,"뮌":153," 랭":89," 랩":96," 랴":57,"뮤":598,"뮬":208," 띠":142," 라":5313," 락":86,"의하면":109," 람":83," 란":706," 래":122," 랜":122," 랑":74," 뜨":65,"묘":914," 뜻":2081," 띄":61,"물":10422,"묻":74,"문":17304,"묶":127,"무":13084,"묵":131," 디":2243,"룰":113,"룽":97,"룹":837,"룸":158,"룡":457,"룬":460,"루":6886,"룩":203," 등":10121," 들":1303,"료":2691,"뢰":281,"조를 ":342," 득":75," 드":1545," 듀":145," 뒷":121," 뒤":936,"롱":332,"롭":295,"롬":294,"롯":651,"롤":575,"론":3597,"로":96384,"록":4253," 둥":104,"례":1168," 둘":552,"렸":708,"령":3201," 두":3055,"렴":153," 둔":309,"렵":166,"련":2464,"렬":396,"력":5587,"려":5019,"렛":83,"렘":205,"렐":151,"렉":716,"레":10369,"렌":918,"렀":295," 될":193,"렇":142,"매":4904,"맥":997,"맡":729,"맨":507,"맷":142,"맵":77," 또":8172,"맺":226,"맹":992,"링":1187,"만":12443,"많":2444,"작품 ":175,"마":15987,"막":1783,"말":8244,"망":1651,"맞":936,"맛":168,"리":49017,"릭":1201,"린":3639,"릴":663,"릿":184,"립":9031,"림":3191," 떠":221,"름":4869,"릇":64," 떨":873,"릉":452,"륭":84,"륨":139,"률":1185,"를":36449,"른":4259,"반도체":99,"르":15981,"륵":60,"륜":185,"류":4914,"륙":763," 때":4328,"발매 ":81," 땅":251,"뤼":174,"자치시":91,"뤄":108," 딸":364," 따":3445," 딴":80," 몽":316," 못":580," 몸":407," 몰":372," 몬":267," 모":6818," 목":4311,"뻗":112," 묘":385,"블":1922,"븐":182," 먼":230," 머":458," 먹":353,"브":4958," 메":2219," 멕":332," 멜":173," 멘":88," 멀":265,"빗":71," 면":2578,"빛":405,"반도에":120," 멸":242,"빙":357,"빅":263," 멤":202,"비":15033,"빈":832,"빌":1411,"빵":82,"빼":105,"빠":443," 명":7874," 몇":341,"빨":181," 뮤":321,"전용 ":96,"뿌":188," 미":9777,"최고 ":196," 민":2788," 믿":184,"뿐":444,"뿔":93,"램의 ":106," 및":5809," 밀":1074," 받":1853," 반":3359," 밝":338," 발":8680," 밑":142," 밖":243," 박":1386," 바":4874,"뼈":195," 무":4302," 묵":81," 묶":123," 물":2886," 문":7777," 묻":68,"뽑":108," 뮌":99,"래스카":63,"벽":406,"벼":128,"벳":78,"벨":1138,"베":5169,"벡":239,"벤":610,"벗":142,"법":11136,"범":2511,"번":6501,"벌":1869,"버":5133,"볼":1272,"본":10004,"복":3536,"보":17044,"반도의":107,"병":2570," 루":1582,"별":5261,"변":3473,"배":4005,"백":2909,"밴":506,"밸":82," 로":5189," 록":326,"밥":125,"밤":208,"방":12878,"밭":81,"밑":146,"밖":309,"바":9518,"박":2316,"받":2627,"반":9065,"발":14073,"밝":341,"밀":2017,"및":5896,"밍":463," 롱":64,"뱅":123," 롤":179,"뱀":171," 론":58," 롯":72," 마":7115," 막":816," 많":2225," 만":6286," 링":189," 맛":124," 맞":858," 망":371," 말":7151,"불":5804,"붉":133," 리":3407,"부":33314,"북":7080,"분":9424,"붙":797," 린":107,"붓":60,"붕":269," 릴":78,"문학 ":248,"뷰":194," 맨":215," 매":2433," 맥":407," 맡":725," 맹":113,"코틀랜":227," 맺":219,"뷔":356," 류":171,"봄":175,"봇":228,"봉":1368," 뤼":59,"미로는":60," 르":285," 를":2001," 깨":169,"뉘":227,"뉜":158,"뉴":2112,"눌":108,"눈":377,"눅":106,"누":1558," 길":1008," 긴":363,"눔":57," 기":21288," 까":271," 깊":185," 김":1910," 깃":81,"니":10368,"닉":426,"닌":1054,"닐":137,"닛":171,"님":381,"닝":259,"살았던":73,"다":127946,"닥":218,"단":12729,"닫":65,"달":3219,"닭":75,"닮":58,"답":174,"담":2045,"닷":134,"당":8583,"닿":168,"늄":159,"는":119185,"느":753,"늘":901,"능":2952,"늦":65,"늬":121,"던":5053,"덜":472,"더":3069,"덕":1078,"델":977,"데":8452,"덱":63,"덴":904,"덩":135,"덮":120,"덧":84,"덤":174," 꼽":102,"댐":73,"자치주":85," 꼭":115," 꼬":116,"댄":184,"대":50380," 꽃":178,"돕":110,"돔":57,"동":26013,"도":42958,"독":6183,"돈":384,"돌":1249,"돼":177,"상시키":58," 꾸":91,"전세계":81," 꿈":78," 게":2860," 겉":64," 검":993," 것":7525,"끼":482," 겐":93,"끌":362,"끊":85,"끈":119,"끄":190,"끝":732,"뒤에 ":105," 걸":1004," 건":2744,"끔":84," 거":3157,"뀌":174,"사업에":67," 갤":90,"냥":141," 관":8286," 광":2185," 곧":169," 곤":198," 고":8430," 곡":823,"냈":722," 골":477,"냉":198," 공":10814," 곳":1375," 곱":95,"냐":427," 곰":60," 과":4291,"날":1685," 계":3522,"남":10366,"납":236,"났":694,"낭":223,"낮":351,"낱":236,"낳":117,"내":7694,"낸":788,"낼":133," 겪":88," 겨":197," 격":403," 견":241,"낌":58," 결":2608,"낚":60," 겹":79,"낙":450," 겸":396,"나":29545," 경":10290,"난":2693,"논":1278,"노":7620,"녹":472," 권":1787," 궁":394," 굴":151,"라우저":117,"녕":139," 군":3102,"념":1682," 굳":62," 국":11641," 구":9126,"년":50612,"녁":59,"녀":836," 교":6257,"넷":970,"넨":74,"넬":161,"넣":275,"넥":130,"네":5012,"넘":422,"널":1956,"넓":520," 괴":232,"너":1765,"넌":123," 글":778," 긍":64,"뇨":62," 금":1723," 급":1151," 근":1629," 극":746," 그":13695," 균":198,"뇌":397," 규":1420," 귀":590,"의해서":178,"사업이":73,"놓":470,"높":1411,"사업으":67,"사업을":303,"놀":416," 궤":202,"놈":67,"사업의":104,"농":2894,"또":8222," 뉴":1506," 느":260," 는":22679," 늘":155," 늦":61,"적은 ":2122," 능":471,"뚜":90," 닌":150," 니":852," 닛":81,"자치체":176,"의하여":356," 단":4273," 닮":58," 달":1640," 닭":57," 다":9191," 닥":60," 당":2886," 닿":61," 담":1163," 대":30352," 댄":143,"뛰":296,"개로 ":124," 더":1674," 덕":284,"적을 ":427,"뜻":2127," 던":104,"뜨":161," 데":3003," 덴":317," 델":206,"띄":63," 덧":76," 덩":92," 덮":105,"라":32052,"락":940,"띠":194,"랜":2761,"랙":360,"래":5947,"랑":3926,"랐":156,"람":3445,"랍":361,"랄":216,"란":5893," 동":9304," 돕":101,"략":655,"랴":119," 돌":648,"랭":236,"랫":313," 돈":178,"램":1320,"랩":147," 도":8642," 독":3941,"드로이":60," 돼":72,"량":2327," 되":3560,"럿":63,"럽":1862," 된":1374,"럼":922,"런":983,"럴":253,"러":8043,"럭":336,"될":754," 끈":60,"됨":160," 끊":84," 끌":148," 끝":706,"됐":304,"되":20943,"된":15250,"둥":406," 난":587," 나":7418," 낙":348,"둔":452,"두":5194,"둑":155," 끼":191,"둘":641," 냉":157,"사에서":413,"뒤":1008,"반면 ":75,"뒷":122," 남":6428," 납":167," 낮":336," 낭":128," 날":631," 낸":132," 낳":116," 낱":228," 내":4309,"듬":206,"듭":58,"듯":149," 너":176,"들":14979,"든":2442," 넘":356,"등":12499," 널":543," 넓":459,"듈":72,"듀":325,"득":672,"드":16369,"따":3543,"딱":94,"딴":97,"딸":420," 년":48504,"딩":848,"디":7187,"딕":142," 넷":115,"딘":221,"딜":107," 넣":235," 넥":74," 네":2034," 높":1360," 놓":344," 놀":187," 농":2185," 논":982," 노":3844," 녹":420,"땅":275,"때":4902,"떻":123,"너무 ":59,"떨":893,"적의 ":167," 뇌":233,"떠":388,"떡":75,"떤":684," 눈":259," 누":647,"최":4895," 즉":844," 즐":195," 증":1529,"촌":871,"초":5803,"촉":534,"총":3611,"촬":196," 쥐":90," 짧":228,"취":1270,"책으로":129,"번길 ":62,"출":6229,"춘":568,"추":3536,"축":3493,"충":2231," 질":809,"춤":162," 진":3320," 직":1757," 지":18217," 짜":63," 징":255," 짐":132,"춰":72," 집":1925," 짓":61," 족":223," 조":9648,"챔":331," 존":1937,"품에 ":127," 졸":350,"채":1834,"책":2191," 좀":85," 좁":161," 종":5121," 좋":260," 좌":466," 젠":66,"찌":102,"찍":137,"찰":1432,"찾":410,"창":3609,"참":2285,"찬":711,"차":8220,"착":921," 주":18427," 죽":642,"쳐":1531," 중":15239," 준":801,"쳤":211," 줄":1147,"천":5793,"처":4102,"척":639,"첼":120,"첸":85,"체":12407,"청":5830,"첫":1006,"첩":200,"첨":248," 죄":242,"철":3109,"콤":255,"콥":103,"콩":581,"콰":92,"콕":119,"코":6706,"콜":1028,"가루 ":61,"콘":1127,"켰":242,"케":2788,"켄":203,"켈":258,"켓":371,"켜":465," 찍":69,"쿨":196,"쿤":100,"쿠":2769,"쿼":162,"쿄":510,"뮤직 ":109,"차지했":173,"쾌":99,"차지하":297,"차지한":93,"카":9618,"칼":897,"칸":851,"칠":510,"칭":3334,"침":1007,"칩":118,"친":1771,"칙":994,"치":17866,"층":1482,"츠":2145,"측":1172,"컷":121,"컴":2082,"컵":693,"컬":664,"컫":637,"컨":597,"커":1445,"하게 ":1996,"캡":77,"캠":218,"캘":291," 쪽":266,"캐":1331,"캔":130,"캄":196," 씨":235,"줄":1396,"준":5183,"중":19356," 알":4152," 안":3009," 않":2884,"상업 ":66," 아":13909," 악":593," 앞":452," 앙":302," 압":445," 암":698," 앨":831," 애":1646," 액":406," 앤":321,"남서부":201," 약":2469," 야":1729,"쥐":185," 앵":118," 얇":61," 양":2789,"즌":623,"즉":849,"즈":4475," 어":3828," 억":453,"즐":322," 얼":375,"증":2825," 얻":540," 언":2186,"즘":584," 업":1023," 엄":237," 없":1907," 엔":883," 에":9796," 엑":221," 엘":468,"직":4986,"지":53776," 엠":95,"진":12339,"질":3981," 역":3833," 여":5825,"짓":174," 연":8997,"짐":242,"집":3847," 열":2188,"징":1361," 염":317,"짜":260,"짝":66," 영":8578," 였":167,"짧":228," 옆":106," 예":3334,"째":2610," 옛":404,"젝":447,"제":29441,"정":32064,"접":3114,"점":4065,"절":1922,"젊":125,"전":32225,"저":3863,"적":24870,"져":2433,"젤":356,"젠":397,"졌":1373,"남은 ":81,"졸":392,"족":4176,"설립 ":259,"조":17636,"존":3442,"종":10038,"좁":164,"좀":88,"좌":857,"좋":275," 쓰":2084," 쓴":417," 쓸":74,"죄":910,"주":32946,"죽":799,"제목으":67,"때까지":243," 음":3514," 읍":148," 은":18917," 을":2254," 의":13217," 응":652," 율":199," 유":10518," 육":1054," 윤":588," 으":1482," 융":126," 잎":227," 있":30440," 잉":461,"내어 ":68," 임":1668," 입":1910," 잇":347," 잃":123," 잘":752," 잔":258," 작":5027," 자":10494," 읽":214," 일":35680," 인":14925," 익":323," 이":43056,"가니스":126," 잡":554," 잠":484," 장":5226," 잭":118," 재":4073,"쯤":58," 전":16035," 적":1958," 저":2089," 절":733," 젊":120," 정":13855," 점":1342," 접":1777," 제":14773,"록된 ":160," 왜":181," 왕":3468," 왔":270," 와":1869," 완":998," 옹":146,"이하 ":136," 옵":60," 옷":75," 온":1154," 올":1715," 옮":203," 오":7708," 옥":336," 욕":102," 요":2831," 왼":84," 외":2265," 원":4962," 월":24581,"쪽":5565," 움":282," 웅":75," 워":458," 우":4532," 운":3277," 울":664,"이한 ":104," 용":2153,"킨다 ":728," 위":16296," 윌":374,"쟁에서":112," 윈":737," 웰":69,"쫓":57," 웹":637," 웨":474,"엘":1142," 쇼":552,"에":103036,"엑":271,"엔":1496,"었":13976," 쇠":160,"엇":117,"없":2196,"업":9251,"엄":815," 수":15278," 숙":319," 순":1149,"염":794,"엽":300,"연":14124,"열":3577,"역":14028,"여":21379,"엠":137,"인한 ":217,"양":9401,"얇":61,"얄":66," 송":804,"얀":299,"얼":821,"언":4279,"남의 ":128,"얻":540,"어":34822,"억":684," 셰":135,"앗":139,"압":954,"암":1495,"앙":2107," 셸":66,"았":1705,"앞":482,"아":34158,"악":3379,"않":2900,"안":8284,"알":4503," 손":892,"앵":203," 솔":307,"약":5189,"야":6719,"애":2683,"액":892,"앤":486,"앨":898," 소":11517," 속":3191," 성":6301," 센":729," 섹":60," 세":12291," 설":6591," 선":5904," 섬":1862," 섭":109,"살인 ":59," 셀":192,"씬":77,"씨":1169,"씩":216," 셋":131," 샤":410,"쓸":88," 섞":117," 서":10967," 석":713,"ال":101,"쓰":3096,"쓴":426," 삭":82," 사":27351," 삼":1735," 삽":115," 살":1329," 삶":242," 산":3915," 샌":184," 색":449," 새":1434," 상":6949," 생":4620," 샘":83,"·기":95,"쑤":93,"하고 ":9739,"천시 ":225,"내에 ":438,"쟁":2447,"잭":163,"재":12501,"잡":1143,"잠":605,"장":20415,"잘":775,"자":37700,"작":11270,"잔":750,"잉":715,"있":31718,"잎":326,"잃":128,"잇":387,"입":4066,"임":6953,"익":1489,"이":165423,"읽":225,"일":45548,"인":47192,"응":1476," 썼":110,"의":128021,"을":57826,"은":55316,"음":8005,"읍":818," 써":106,"융":702,"으":50795,"유":15783,"육":5384,"윤":825,"율":1139,"윗":76,"위":24705,"윈":893,"윌":390," 쌍":317,"웨":2585,"웠":203," 쌀":75,"웹":654," 쌓":108,"웰":170," 십":304,"워":2364," 심":1298,"웅":321," 실":3412,"움":1073," 신":6386,"웃":221," 싸":282,"월":25278,"원":21744," 싱":605,"용":15333,"울":4973," 시":17424," 식":1730,"우":13058,"욱":203,"운":8938," 슬":354," 스":7245,"상에 ":382," 승":1448,"요":6682,"욕":736," 습":160,"아나톨":89," 슈":770,"왼":87,"외":4313,"완":1509,"와":17969," 쉬":131,"왈":71,"왕":5728,"왔":670," 쉽":226,"왜":332,"오":14295,"옥":684,"온":2639,"옮":206,"올":2277,"옷":147,"옵":67,"옴":62,"옹":454," 술":278,"였":9350,"영":13611,"옆":120," 숨":146,"예":5314," 숫":155,"옌":106," 숭":142," 숲":99,"옛":409,"솔":474,"손":1670,"속":9079,"소":20178,"하계 ":200,"송":3868," 뷰":70,"쇄":396," 블":858,"아래 ":167,"쇼":889,"쇠":250," 브":1829,"섞":131,"석":3098,"서":56814,"센":1955,"섹":86,"세":16513,"성":22377,"섬":2116,"섭":330,"섯":386,"설":10723,"선":15146,"션":2286,"셜":299,"셔":528,"셋":215,"셈":160,"셉":104,"셀":498," 붙":702,"셸":100,"셰":320," 붕":186," 불":4668," 붉":127," 부":9201," 북":4515," 분":5145," 벤":222," 벡":158," 베":2451," 벨":486,"전부터":113," 벼":76," 벽":136," 별":990," 변":2214,"삭":178,"사":52637,"삶의 ":107,"삶":243,"살":2436,"산":15857," 병":912,"삼":2361,"삽":121,"상":21682,"색":2020,"새":1818,"샌":195," 보":8766," 복":1706," 본":2950,"샘":118," 볼":809,"생":10052," 봄":131,"샤":806," 봉":588,"샹":63," 방":5542," 밤":177," 밥":73," 밴":412," 백":1337," 배":2328," 뱀":75,"청사 ":99,"아랍 ":69," 번":5470," 벌":737,"은행이":86," 버":1308," 벗":132," 법":2859," 범":953," 뿐":232,"일컫는":617," 뿌":163,"선되었":89,"따로 ":85,"쏘":63," 뽑":105,"·관":58,"·공":88,"쌓":116,"쌍":397,"쌀":100,"법과 ":132,"썼":127,"·교":69,"써":1331,"스":47455,"슨":692,"슭":65,"슬":1694," 뻗":111,"슷":450,"책이다":168,"슴":118,"습":1034,"승":3768,"슈":1775,"슐":101,"ن ":67,"·경":73,"실":7842,"신":14225,"십":781,"심":4418," 뼈":65,"싱":1106,"싼":122,"싸":555,"삶을 ":67,"시":47707,"식":10614,"숫":164,"숨":182," 빈":391," 빌":529,"숭":331,"술":6046," 비":6448," 빅":231," 빛":315," 빙":124,"숲":113,"숙":641,"수":29032,"순":2152,"쉬":296,"입출력":62,"쉽":270," 빨":157," 빠":372," 빼":98," 빵":61,"이프 ":175,"·연":85,"남서쪽":252,"·유":61,"·일":61,"·의":63,"·이":73,"아날로":71,"·정":112,"·전":81,"따라 ":1673,"남자 ":113,"·조":86,"문화 ":681,"학과 ":472,"·중":79," 표":2914,"양군 ":70," 풋":113," 품":311," 풍":535," 풀":299," 푸":531,"·수":74," 프":8266," 플":1603," 퓨":70," 필":1793," 핀":310," 한":15848," 하":15057," 학":3148,"·시":65," 피":2290," 픽":92," 행":4241," 했":855," 핫":66," 합":1642," 함":2804," 항":2107," 할":1260," 핸":86," 해":6139," 핵":492," 헌":637," 허":1093," 헝":262," 향":878," 혁":595," 현":7804," 혈":203," 협":1285," 혐":108," 형":3039," 혜":164," 헤":908," 헨":184," 헬":298," 호":4483," 혹":1415," 혼":618," 환":1277," 활":2664," 화":2857," 확":1802," 홋":133," 홈":481," 홍":939," 홀":264," 황":1758," 획":175," 회":3401," 횡":97," 효":1076," 후":4257," 훈":284,"년까지":1214," 휘":398," 휴":496," 흉":74," 흘":204," 흙":65," 흐":416," 흑":305," 흔":541," 희":405," 흥":265," 흡":176," 흰":126," 힌":195," 히":1017," 힘":385," 힙":241," 힐":117,"퓨":1797," 탈":393," 탄":758," 타":2520," 탁":97," 택":119," 태":2959," 탕":57," 탑":249," 탐":420," 킬":163,"풍":1318,"품":4217,"풋":127," 키":961,"풀":607," 킹":119,"·비":57,"푸":1058," 크":2443," 큰":1228," 클":1230,"표":6870," 큐":81,"퐁":58," 퀴":87," 퀸":110,"헨":491,"헬":461,"헤":1267,"험":1933,"헝":266," 퇴":255,"허":1930,"헌":1331,"향":3265," 통":6217," 톰":97,"했":6623,"행":10972," 톤":103,"햄":124," 토":1698,"해":24304,"핵":714,"핸":99,"할":7046,"학교 ":1462,"합":7081,"함":6807,"핫":70,"항":4366,"핑":263,"하":77112,"학":20317,"한":70049,"핀":882,"필":2371," 텔":530," 텐":79,"피":5014,"픽":1356," 텍":216," 테":1319," 터":808,"례로 ":60," 털":121,"프":15322,"책임을":92,"플":2945,"픈":291,"훌":63,"훈":752,"후":8266,"·사":104," 티":587," 파":4031," 팀":912," 팝":144,"훼":98," 팔":592," 판":1566,"회":16026,"획":1122," 특":3708," 트":1409,"효":1679," 틀":69,"횡":121,"프라 ":59,"로드 ":165,"홀":471,"홍":1228,"홉":95,"홈":507,"홋":134,"화":22028,"확":2379,"활":4194,"환":3339," 튀":144,"황":3546," 튜":103,"혁":1204,"혀":399,"현":10322,"혈":413," 투":1343,"혐":136,"협":3198,"혔":85," 툴":85,"형":7186,"혜":431,"호":10715,"혹":1549,"혼":1062," 폴":656," 폰":261," 포":4550," 폭":878,"힙":246," 편":1366,"힘":444," 펼":268,"힐":206,"히":4751,"힌":432," 펠":137," 폐":695,"·소":58," 평":2091,"흔":563,"흐":924,"흑":351," 퍼":552,"흘":226,"흙":94,"·서":57,"흉":89,"흰":134," 펑":85," 펜":287," 페":1515,"흥":1435," 펄":70,"흡":255," 펀":62,"희":960," 펌":73,"휘":984," 팽":80," 팩":61," 패":1010," 팬":166,"나지 ":80,"휴":649,"키":6923,"킬":397,"킨":1373,"킷":112,"킴":109,"킹":435," 철":1617," 청":1846," 첨":172," 첫":1002," 체":2408,"탠":112," 초":3045," 촉":322," 촌":94,"탱":122," 총":2665,"타":10431,"탁":475,"탄":2414,"탈":2257,"탑":422,"탐":512,"탕":687,"택":1124,"태":6303," 책":837," 채":1344,"퀸":127," 챔":308,"퀴":217," 찬":190," 착":270," 차":4515," 창":2458," 찾":380," 참":2071,"퀘":136," 찰":178,"크":10562," 처":2216," 척":276,"큼":124," 천":2298,"큰":1271,"러스 ":211,"클":2167,"가능성":147,"큐":382,"통":12139,"톱":213,"톰":149,"톤":509,"토":6975,"톡":100,"톨":856," 춤":75," 충":1498,"퇴":541,"낮은 ":181," 취":867," 최":3844,"털":869,"턴":933,"터":15066,"턱":68," 촬":156," 춘":234," 출":3768," 추":2201," 축":1679,"·문":85,"템":1444,"텐":677,"텔":1214,"텍":454,"테":4250," 캠":159,"튀":186," 캡":60,"설되었":185,"튜":408,"특":6554,"트":19866,"튼":328," 커":696,"틀":1105," 컬":177," 컨":381," 컵":92," 컴":1913," 켄":86," 케":706," 켈":106,"틴":890,"틱":267,"티":5222,"틸":257," 측":539," 층":841,"투":4120," 치":1335," 친":965,"툼":59,"툰":121,"툴":145," 칭":340," 칩":93," 침":590,"퉁":60," 칠":288," 칼":602," 칸":291," 카":3225," 캄":163," 캘":285," 캔":83," 캐":1108,"펑":115,"내외 ":104,"펙":119,"페":3911,"펜":502,"펀":122,"펄":94,"펌":98,"펴":73,"편":2910,"펼":268,"펠":315,"폐":1040," 쿼":94," 쿠":694," 쿨":64,"평":3725,"폴":1296,"폰":647,"폼":238,"강남구":284," 퀘":74,"포":9485,"폭":1205,"팜":57,"·보":96,"팝":166,"판":4057,"팔":900,"파":8561,"팀":1526,"팅":740," 콘":727," 콜":444,"팽":145," 코":2601,"팩":183,"패":1550,"팬":246,"팡":90," 콩":260," 콤":104,"퍼":1823," 쾌":61,"정부가":179,"쟁이다":79,"라이터":66,"쟁으로":65,"사실상":154,"창원시":139,"키는 ":1035,"정부기":114,"라이트":130,"정보를":393,"미상 ":70,"정신 ":68,"라이프":136,"건강 ":58,"양과 ":150,"체로서":85,"네덜란":393,"세로 ":103,"사실을":90,"태가 ":128,"이후 ":1284,"이탈리":1091,"정보의":92,"조로 ":180,"정보원":60,"간단한":90,"응하는":87,"한국 ":743,"간단히":170,"초기에":174,"·철":281,"내셔널":212,"정부는":58,"이타마":68,"크를 ":175,"가며 ":64,"상위 ":110,"사전》":334,"초기의":150,"드리히":131,"각류 ":61,"）":98,"（":82,"－":78,"，":74,"：":84,"정보교":91,"�":268,"가능한":325,"가능하":380,"정보기":60,"천문학":192,"자체를":63,"품의 ":269,"가를 ":483,"하기 ":3613,"프로 ":559,"사진 ":92,"상은 ":241,"거가 ":72,"상을 ":1272,"등록되":68,"내의 ":242,"로도 ":664,"산스크":81,"배를 ":124,"가리 ":150,"인해 ":467,"품을 ":440,"았던 ":129,"품은 ":147,"암동 ":90,"상의 ":1371,"나아가":97,"재지는":153,"로그래":368,"로그램":1142,"일한 ":393,"성되는":97,"정식 ":393,"상이 ":314,"이크로":588,"성된다":104,"상인 ":81,"가대표":141,"람으로":164,"량에 ":62,"품이 ":87,"람이다":230,"서버 ":134,"흔히 ":416,"클럽 ":144,"거리 ":240,"들에게":459,"곡가 ":133,"장한 ":247,"크바 ":154,"亂三並":62,"亂三三":97,"개봉한":58,"가의 ":726,"들어가":153,"고가 ":108,"처를 ":60,"초가 ":61,"정적 ":84,"가상의":121,"주가 ":198,"주간 ":66,"력과 ":185,"《삼국":90,"전시키":66,"거를 ":112,"낱말이":57,"전직 ":79,"차원 ":181,"쿠오카":77,"애를 ":72,"었고 ":892,"런던 ":156,"디아 ":131,"드에서":276,"널드 ":60,"들어서":94,"제이 ":61,"제인 ":105,"간을 ":392,"제일 ":77," �":76,"일하게":128,"려고 ":142,"미술 ":75,"디오 ":872,"간의 ":1177,"제작 ":170,"가장 ":2202,"청남도":310,"남성 ":89,"간은 ":169,"아라비":118,"청나라":162,"타공공":108,"라서 ":346,"박람회":78,"드워드":102,"사람에":109,"사람으":156,"사람의":207,"사람을":267,"사람은":77,"에게 ":2107,"각이 ":58,"산부 ":97,"사람이":649,"제의 ":613,"디언 ":96,"디어 ":334,"쓰인 ":79,"각을 ":142,"차와 ":63,"가사키":57,"사령관":124,"》에 ":263,"가인 ":211,"각의 ":236,"거대한":110,"들었다":123,"이하의":127,"풀로 ":63,"미스 ":121,"제정 ":71,"강에 ":127,"남시 ":83,"제적 ":226,"주고 ":119,"애니메":619,"결과를":92,"밴드이":74,"이해하":86,"방법 ":120,"갈의 ":77,"따서 ":170,"결과로":66,"타나 ":60,"재한 ":154,"》은 ":258,"》을 ":87,"려가 ":67,"타낸 ":57,"간인 ":57," ال":89,"간이 ":309,"차에 ":82,"》의 ":292,"재해 ":65,"인하여":149,"주군 ":164,"라마이":104,"양경찰":118,"러가지":95,"처럼 ":457,"업과 ":165,"세기 ":1033,"없고 ":95,"때로는":64,"전에는":239,"각종 ":375,"랫동안":75,"가수이":107,"주교 ":119,"제조 ":126,"정식으":79,"정신적":68,"정신을":88,"믹스 ":77,"가족 ":81,"이후로":131,"키나파":72,"亂之 ":165,"섬과 ":107,"개발과":79,"경기 ":281,"페테르":78,"포유류":74,"사법 ":61,"전적 ":75,"거래 ":75,"채무자":67,"젊은 ":80,"並國三":63,"들이며":112,"나에 ":58,"남부에":295,"정에 ":426,"무함마":66,"레고리":71,"크라이":244,"이트에":84,"절을 ":58,"亂亞 ":92,"전쟁 ":655,"이트의":60,"이트이":90,"전자 ":409,"아들로":224,"작해 ":65,"들이다":337,"키나와":91,"야기를":124,"상당한":67,"상대적":107,"크로아":167,"들이었":59,"발매되":369,"들이자":64,"발매된":203,"드컵 ":186,"개발사":89,"크로소":373,"라북도":260,"크로스":85,"박사 ":73,"네그로":106,"등에서":329,"亂丁 ":125,"산물 ":91,"크래프":177,"렀다 ":170,"개발된":147,"개발되":172,"안드레":67,"亂三 ":194,"서기 ":77,"새로 ":127,"안드로":92,"아들인":70,"아들이":502,"亂丘 ":69,"亂並 ":148,"체계에":64,"석기 ":98,"의회에":71,"저수지":79,"폐지되":230,"체계이":67,"체계적":153,"의회의":78,"문학의":69,"라스 ":165,"문학자":106,"이풀로":78,"포츠 ":283,"전주 ":94,"성군 ":116,"체고비":106,"성구 ":108,"вич":79,"드웨어":181,"문학에":61,"세계 ":1954,"점성술":118,"점이 ":245,"문화교":73,"간에 ":460,"가와 ":443,"프가 ":107,"장치를":95,"문학상":112,"개발에":94,"점의 ":91,"개발원":63,"개발을":139,"거로 ":203,"가에 ":200,"제에 ":283,"점을 ":418,"들에서":74,"체계를":103,"점은 ":106,"들어진":480,"들어지":82,"드이다":163,"찰스 ":128,"제어 ":96,"들어졌":219,"개발자":101,"무형문":79,"타는 ":77,"사람들":653,"박물관":464,"물학적":58,"문화를":243,"정이 ":195,"체는 ":221,"내버스":159,"성과 ":408,"나오 ":65,"나온 ":159,"물학자":103,"체결된":76,"정의 ":333,"야구장":186,"발생 ":60,"정을 ":810,"발매하":71,"발매한":190,"야구의":96,"나와 ":213,"잡한 ":94,"문화방":94,"정상적":60,"장치이":90,"정은 ":136,"성경 ":87,"개발하":315,"개발한":476,"남부의":85,"제와 ":152,"세가 ":207,"변경 ":58,"ич ":79,"얀마 ":62,"책을 ":239,"조사 ":371,"정안전":64,"책의 ":157,"아서 ":221,"작하여":246,"작하였":243,"문화예":200,"설된 ":121,"사스 ":116,"문화에":102,"크리스":321,"문화와":78,"결국 ":113,"미아 ":71,"앨범 ":191,"문화사":76,"따라서":342,"얻는 ":71,"라에 ":165,"레드 ":122,"전자의":59,"들을 ":1666,"중국 ":1103,"태는 ":61,"중구 ":288,"라엘 ":114,"들은 ":1170,"어도 ":114,"작했으":59,"들의 ":1830,"저장하":84,"문화체":1127,"사실 ":77,"문화의":338,"문화유":176,"정에서":302,"미야 ":93,"남부 ":379,"남북 ":100,"전쟁에":156,"문화재":744,"문화적":145,"전쟁이":130,"전쟁의":144,"전쟁을":114,"조선 ":921,"조성 ":57,"포인트":69,"참조 ":61,"크리트":128,"정책 ":233,"양력 ":94,"체를 ":702,"역과 ":276,"타나는":152,"클리드":62,"등에 ":576,"끝에 ":190,"亞之三":65,"표면에":74,"크립트":86,"들이 ":2342,"버드 ":86,"미에 ":63,"어권에":64,"타난다":90,"들인 ":119,"디미르":96,"타낸다":124,"뜻으로":193,"것도 ":92,"타내는":318,"박스 ":90,"뮤지컬":91,"미와 ":58,"계가 ":265,"뜻이다":146,"재판소":104,"랑스 ":981,"날로그":71,"게는 ":216,"새로운":612,"亞三並":82,"점으로":208,"亞三三":101,"亞並三":64,"자회사":108,"책은 ":69,"제였다":63,"점이다":97,"클라우":75,"클라이":98,"라비아":197,"중기 ":60,"클래식":69,"경과 ":111,"클래스":59,"반민족":66,"성공적":59,"자협회":65,"제에서":125,"세는 ":97,"들에 ":340,"세계대":169,"종류로":101,"아메리":675,"아르헨":188,"세계사":60,"성공하":63,"포지션":88,"야를 ":102,"정의된":86,"정의되":69,"정으로":195,"세계를":63,"어느 ":223,"힙합 ":161,"어는 ":592,"크로프":81,"책에 ":96,"삼성 ":130,"체로 ":467,"초기 ":285,"족문화":157,"클럽이":60,"야마 ":336,"성당 ":102,"정이다":317,"드인 ":58,"미생물":76,"여객 ":65,"장하기":60,"아마추":89,"장하고":143,"령과 ":60,"드의 ":963,"종류의":195,"종류이":108,"장한다":129,"장하는":775,"산시 ":253,"창설되":77,"성경의":62,"미사일":222,"개선 ":77,"떻게 ":119,"쳤다 ":134,"성격을":73,"잔티움":169,"임했다":113,"라인 ":429,"미의 ":189,"등의 ":1883,"등이 ":948,"가상 ":104,"선거로":128,"민사소":63,"생명 ":64,"철도의":57,"이후에":218,"입하는":64,"이후의":91,"람에 ":59,"란을 ":119,"방송 ":364,"전에서":232,"야로 ":92,"런던에":68,"랍어 ":64,"년경 ":243,"적용된":77,"적용되":101,"갈리아":61,"건담 ":64,"전차 ":64,"란의 ":103,"사망하":111,"사망한":67,"사망했":57,"알렉산":236,"미술관":126,"사무관":181,"사상 ":967,"민을 ":66,"란이 ":83,"아름다":121,"가서 ":69,"어가는":73,"산림청":142,"입하여":105,"입하였":66,"품부 ":541,"일환으":83,"민의 ":363,"타로 ":84,"나머지":129,"아르메":80,"러를 ":61,"크스 ":85,"년간 ":196,"로가 ":240,"주기 ":97,"풋볼 ":85,"민이 ":110,"히틀러":64,"거구에":103,"내부 ":106,"임하였":204,"알려졌":143,"알려져":958,"거구제":58,"중간 ":77,"반부터":67,"러리 ":73,"알려진":400,"알려지":101,"亞亂 ":111,"선거구":247,"적이고":205,"철도역":58,"제시하":89,"제시한":63,"전철 ":57,"서는 ":3203,"강력한":114,"선거는":100,"나서 ":76,"亞亞 ":112,"라의 ":696,"라이 ":172,"초구 ":243,"샤를 ":93,"가문의":115,"철도공":82,"亞之 ":255,"사무소":268,"사무실":1870,"적으로":6876,"새롭게":76,"적이다":168,"선교사":68,"아랍어":113,"저작권":130,"전체 ":252,"아래에":87,"亞三 ":239,"작하는":103,"적용하":90,"어난 ":785,"었기 ":131,"전으로":231,"전자기":148,"어나 ":278,"라운 ":61,"亞丁 ":83,"산성 ":74,"적이며":61,"방식 ":87,"려는 ":293,"키며 ":125,"작품은":98,"작품을":152,"작품으":141,"작품의":71,"작품이":311,"가수 ":210,"어날 ":66,"작하고":60,"작품에":75,"평화 ":62,"亞並 ":263,"년과 ":135,"방법이":266,"방법으":220,"설가이":93,"방법을":165,"민족 ":187,"결과 ":148,"방법원":75,"선거인":76,"따르는":60,"차의 ":172,"작했다":157,"선거에":114,"설계되":58,"설계된":59,"처리 ":166,"전이다":158,"래시 ":66,"라와 ":158,"넘는 ":95,"래식 ":58,"사무를":174,"생물 ":118,"점차 ":76,"따르면":285,"종로구":365,"등을 ":1775,"서기관":301,"서도 ":661,"듯이 ":69,"あああ":724,"점에서":270,"생들이":69,"가스 ":119,"등은 ":87,"유일하":126,"작사 ":62,"유일한":211,"반의 ":260,"일본식":166,"남동부":196,"유전자":195,"라트비":70,"터가 ":163,"반이 ":63,"이비드":96,"루고 ":252,"과는 ":512,"의자 ":89,"보고 ":146,"뒤를 ":85,"일반화":65,"익보호":63,"이브러":106,"의의 ":313,"타르 ":110,"밖의 ":84,"타를 ":73,"가타 ":92,"래픽 ":199,"드니 ":63,"흥을 ":81,"반은 ":58,"반을 ":232,"체가 ":464,"래한 ":106,"내로 ":72,"라틴어":370,"게르만":68,"남동쪽":276,"했다 ":3529,"공과대":62,"변경되":110,"바의 ":87,"일반직":142,"로레슬":124,"드는 ":685,"하다는":80,"체계 ":105,"바일 ":130,"사람 ":176,"일스 ":70,"바이 ":61,"이벤트":59,"자리에":188,"하다고":79,"의원 ":442,"가지의":94,"흥원 ":58,"검사 ":107,"하다가":230,"흐스탄":57,"강점기":429,"일본군":83,"자리잡":146,"일반적":1125,"일반의":63,"공공기":200,"일반인":59,"검색 ":91,"갖춘 ":115,"하느님":102,"보가 ":128,"동차가":80,"하로 ":57,"하는데":508,"용하지":112,"삼국지":82,"민지 ":138,"플리케":102,"용하였":172,"삼동 ":85,"해도 ":118,"발에 ":103,"공간을":116,"계로 ":314,"롤라이":60,"용해서":66,"공간의":61,"공간이":89,"공간에":115,"임무를":62,"필름 ":57,"용하여":659,"응을 ":89,"한나라":81,"자리를":100,"나들목":286,"이베이":60,"하나의":423,"하나이":1414,"하나인":156,"돌프 ":87,"뀌었다":80,"건물이":107,"되었는":98,"강으로":96,"레슬링":130,"가지로":201,"해결하":97,"하나였":68,"상당 ":305,"원후 ":60,"벨기에":181,"레슬러":60,"보》 ":158,"표한 ":280,"의와 ":140,"삼국시":89,"용하며":100,"결되어":107,"되었다":5986,"강이다":118,"되었던":228,"도쿠가":148,"용했다":68,"반에 ":227,"원회 ":556,"이스 ":652,"레스타":99,"도한 ":102,"차로이":131,"음이 ":93,"되었고":629,"이베리":109,"육청 ":61,"공개되":85,"이바지":269,"차례의":95,"용한다":393,"가지는":177,"음의 ":177,"미지 ":78,"용하는":892,"가진다":98,"흥에 ":88,"하라 ":165,"건설 ":140,"민주 ":136,"이미지":182,"워크에":63,"밖에 ":144,"음을 ":323,"응용 ":195,"의에 ":179,"곡동 ":69,"음은 ":101,"강원도":332,"용하기":171,"동차의":79,"동차이":57,"력에 ":158,"나무 ":204,"가지가":83,"든다 ":84,"견되었":130,"용하고":267,"가지고":705,"임명되":96,"타마 ":101,"해군의":95,"받아 ":360,"방송되":109,"방송된":59,"인민공":988,"울특별":2233,"방이 ":101,"크에 ":114,"워크를":60,"려운 ":66,"배우 ":114,"고도 ":1655,"고고학":81,"나라의":375,"나라에":236,"태로 ":448,"자료를":144,"인물이":326,"나라와":67,"방의 ":527,"삼각형":57,"나라이":90,"창단되":64,"원형 ":65,"산된 ":75,"제국 ":423,"방송국":244,"방송공":57,"방은 ":58,"미이다":94,"채널이":57,"방을 ":182,"타디움":74,"민운동":60,"로리다":73,"들도 ":99,"학대학":68,"행된 ":137,"자로서":148,"학명 ":83,"인먼트":266,"원한 ":71,"고구려":194,"쟁력 ":98,"나로서":116,"가치 ":92,"되었을":105,"되었으":1444,"합뉴스":77,"방사성":62,"이므로":99,"방사선":62,"됐으며":61,"되었지":87,"인물로":179,"이센 ":70,"로마의":132,"하면 ":687,"보기 ":63,"되어있":155,"하며 ":3509,"핀란드":235,"제공 ":74,"로마에":76,"천구 ":60,"이션 ":833,"정규 ":302,"고대 ":811,"보급 ":119,"천군 ":115,"이묘로":166,"법률 ":166,"정권 ":76,"한다는":384,"산광역":497,"했던 ":888,"련을 ":63,"나라가":92,"음에 ":134,"일리아":483,"발전 ":299,"광고 ":99,"사도 ":73,"력이 ":338,"인사 ":61,"가이자":300,"인리히":70,"쓰고 ":113,"점기 ":106,"력의 ":179,"동한 ":119,"베네치":57,"위치 ":109,"히브리":114,"하루 ":70,"인류학":75,"력을 ":1046,"곡가이":164,"고는 ":197,"간이다":99,"발생하":361,"발생한":413,"이상 ":271,"려의 ":141,"원칙으":57,"간으로":125,"럽에서":169,"거리에":131,"력은 ":109,"정구 ":74,"원칙이":63,"미에서":158,"한다고":160,"산당 ":105,"본과 ":65,"려져 ":976,"나라를":93,"정과 ":186,"민에게":63,"발이 ":61,"변경하":100,"가이며":162,"산동 ":114,"자라고":70,"이면서":108,"이사 ":68,"나라로":69,"살던 ":66,"방에 ":408,"베네수":63,"자라는":57,"행동 ":67,"하마 ":73,"밝은 ":60,"하도록":282,"미어리":61,"발의 ":57,"발음 ":82,"생긴 ":128,"인류의":69,"발을 ":206,"재를 ":220,"이를테":61,"련이 ":125,"거리를":101,"가이다":1033,"나라는":63,"허가 ":79,"로서 ":2731,"장되어":72,"음악 ":533,"크와 ":130,"받을 ":101,"발생시":61,"받은 ":355,"미얀마":91,"련의 ":320,"관급 ":76,"제가 ":421,"하를 ":73,"킬로미":80,"제도 ":358,"간에서":92,"석과 ":60,"제국에":76,"나라 ":614,"령에 ":106,"있어 ":371,"방식을":150,"인인 ":98,"방식으":206,"인이 ":565,"이전 ":225,"방식의":98,"방식이":202,"이저 ":232,"민주당":259,"받았다":382,"산되었":75,"받아들":112,"일의 ":934,"산면 ":79,"음으로":564,"민족주":86,"선과 ":211,"일이 ":179,"서구 ":260,"려시대":128,"민족이":83,"민족의":109,"일은 ":171,"일을 ":555,"원회가":79,"제공한":140,"제공항":114,"해당되":71,"제공하":545,"제국이":104,"제국의":612,"바에서":60,"제국을":60,"동일한":145,"가에서":199,"도지사":58,"유통 ":76,"이사회":105,"이자 ":2852,"표하는":228,"정된 ":457,"산드로":63,"관광 ":81,"사들이":79,"태를 ":373,"천광역":247,"함되어":129,"설계 ":121,"점기에":78,"산드리":57,"장소 ":58,"체나 ":63,"재산 ":60,"정권을":71,"합되어":80,"둘러싸":178,"나뉜다":143,"점기의":205,"위한 ":2048,"인의 ":1094,"천동 ":70,"상대로":69,"해를 ":391,"원하여":159,"설과 ":84,"위해 ":2768,"위치한":3432,"제거하":73,"위치하":1705,"례에 ":90,"위치해":340,"나로 ":1168,"민족문":161,"상대방":151,"크의 ":348,"후쿠오":77,"가운데":1210,"인은 ":257,"히로시":66,"정교회":151,"인을 ":537,"임에 ":72,"이션의":58,"이션을":57,"나누어":113,"위하 ":73,"이션이":144,"사립 ":60,"로스 ":346,"이사장":71,"터널 ":62,"원하기":109,"경기장":259,"경기이":114,"원하고":76,"졌고 ":82,"베리 ":112,"사망 ":84,"로미터":82,"제는 ":260,"이선스":78,"익을 ":208,"학문 ":67,"제국과":91,"계된 ":80,"정동 ":164,"이의 ":892,"개신교":188,"방송통":78,"공기 ":138,"계기로":63,"처드 ":102,"되자 ":84,"가였다":74,"관계 ":273,"생기는":109,"하드웨":180,"정구역":186,"로바키":85,"원하는":219,"정도 ":616,"정기관":402,"원한다":72,"령을 ":243,"터넷 ":430,"령은 ":67,"백작 ":70,"려진 ":441,"관과 ":150,"이상의":447,"방송하":72,"철도 ":369,"동조합":172,"생된 ":67,"이상이":81,"낸다 ":189,"인용 ":127,"위치에":98,"들기 ":69,"경기를":148,"레이더":57,"객이 ":71,"레이드":84,"령의 ":138,"유지하":156,"택되었":69,"산맥 ":122,"레일리":489,"정당 ":75,"반직 ":94,"령이 ":148,"공군 ":73,"키백과":58,"음악을":113,"음악의":121,"선거 ":137,"자본주":105,"음악이":74,"제강점":66,"로써 ":1184,"레이블":111,"위치를":101,"일에 ":1924,"쓰는 ":227,"로버트":170,"음악에":71,"베르 ":73,"함된다":101,"방송사":59,"후한 ":94,"루는 ":422,"경기에":89,"레이션":267,"등과 ":260,"방송의":67,"음에는":128,"해로 ":77,"방송이":79,"레이스":190,"방송을":83,"받지 ":116,"래했다":68,"레이시":146,"설가 ":74,"이용 ":83,"서관 ":112,"일어 ":137,"바이잔":107,"레임워":62,"레이저":82,"바이올":96,"바이오":87,"생각하":93,"강제 ":60,"음악가":117,"로부스":65,"래픽스":66,"자발적":68,"익에 ":75,"나를 ":121,"필리프":61,"필리핀":358,"낙동강":59,"같이 ":509,"보는 ":244,"반이다":146,"바이에":102,"과가 ":110,"레이오":85,"공고 ":59,"미터 ":147,"전기의":97,"했는데":97,"레이어":194,"전기적":63,"너가 ":60,"래하였":76,"개인 ":152,"이온 ":68,"레이크":71,"상남도":478,"이오 ":93,"례의 ":123,"경기도":912,"사단법":1681,"전까지":207,"래프트":205,"레이트":129,"과거 ":210,"레이터":99,"했다고":109,"인어 ":69,"개의 ":1769,"표현한":83,"표현하":178,"자베스":57,"인에 ":164,"보다 ":794,"이완 ":109,"이와 ":277,"했다는":112,"경기는":58,"생대 ":149,"크스주":60,"로부터":1174,"바이러":161,"룬다 ":177,"저기압":67,"강이 ":84,"백질 ":70,"공격 ":71,"반으로":349,"등급 ":68,"서가 ":115,"보건복":107,"별로 ":156,"이언 ":67,"나며 ":96,"이어 ":549,"개월 ":84,"계는 ":211,"장르의":62,"이에 ":1223,"키보드":63,"사를 ":1011,"같은 ":1766,"흑인 ":59,"제나 ":71,"경계를":120,"값이 ":80,"전광역":266,"겼다 ":99,"일부이":124,"동쪽에":157,"동쪽은":73,"동쪽으":984,"적도 ":70,"경계로":137,"프의 ":110,"결된 ":163,"값을 ":97,"강의 ":289,"두번째":73,"운행하":134,"표하였":66,"되지 ":512,"강을 ":180,"해당하":270,"해당한":149,"가진 ":547,"가지 ":694,"탈리아":1095,"강은 ":65,"이안 ":61,"공개 ":76,"이아 ":80,"냈다 ":524,"로비치":78,"공간 ":182,"나뉘어":85,"민주화":70,"유주의":100,"율적인":84,"율적으":85,"일부터":916,"타리오":112,"일본제":58,"일부로":118,"일본어":214,"일본에":462,"방지 ":78,"일본의":1226,"일본이":57,"일본인":94,"음식을":58,"유전체":77,"견된 ":134,"일부를":110,"음식이":66,"학부 ":102,"터는 ":441,"받았으":82,"임스 ":235,"민주주":682,"하려고":72,"고기 ":102,"의적 ":151,"하려는":180,"건물 ":86,"나다의":116,"표현이":66,"강서구":79,"임시 ":75,"민족행":60,"유전학":62,"사로 ":539,"감을 ":118,"고급 ":121,"이에서":435,"보급을":72,"관리 ":537,"이어서":70,"전동차":102,"강화 ":81,"방으로":94,"인에게":123,"례를 ":88,"이에른":91,"레마이":67,"발전에":578,"게임 ":779,"법률에":63,"벨라루":58,"장에 ":303,"보교환":61,"과대학":155,"포함해":70,"포함한":517,"포함하":554,"발전시":125,"량이 ":162,"계산 ":77,"두었다":75,"별도의":74,"족과 ":94,"작성하":88,"법률의":65,"법률이":85,"이었던":250,"이었다":1368,"발전을":350,"량을 ":270,"이언트":66,"계속 ":137,"량은 ":105,"인에서":57,"항구 ":67,"탄소 ":65,"히어로":65,"래이다":109,"일에는":57,"조금 ":68,"아가 ":238,"표적인":377,"보급하":100,"상가 ":71,"이용되":115,"국과 ":645,"크에서":153,"개혁 ":71,"자주 ":141,"이오스":98,"키아 ":114,"버스 ":485,"량의 ":196,"일어난":375,"일어나":250,"일어났":108,"이언츠":61,"난다 ":199,"유클리":61,"발전하":100,"참가했":85,"참가한":160,"참가하":322,"국계 ":80,"이었지":81,"이었으":387,"국경 ":65,"등록 ":59,"꼽힌다":72,"이어지":79,"보드 ":159,"거에는":64,"있지 ":161,"포항시":66,"이올린":74,"자신들":66,"고려대":68,"라크 ":131,"개최하":103,"동하는":170,"가톨릭":535,"해가 ":130,"항공 ":158,"방에서":152,"작이 ":58,"발전과":240,"본거지":83,"의정부":87,"발원하":120,"사기 ":58,"작은 ":650,"작을 ":162,"키에 ":57,"자인 ":552,"작의 ":72,"본격적":88,"동했다":59,"프를 ":82,"들면 ":66,"보고서":76,"반응을":70,"공립 ":63,"포함된":182,"바이트":101,"포함되":256,"자의 ":1228,"살고 ":126,"포하는":115,"나다 ":436,"동하고":132,"백악기":102,"산군 ":87,"합과 ":58,"조가 ":151,"자유 ":190,"산구 ":126,"방영된":79,"방영되":154,"들목 ":196,"음주의":62,"본관은":510,"함과 ":91,"나는 ":720,"국가 ":668,"작전 ":71,"크이다":81,"동하였":160,"키와 ":57,"이었고":95,"발전소":109,"강한 ":187,"유한 ":186,"전달하":101,"표이다":71,"공동으":122,"임시정":57,"의적인":85,"한글 ":77,"반적으":847,"나가와":69,"반적인":257,"과를 ":382,"이야기":454,"아과 ":60,"공동체":220,"구가 ":427,"산과 ":144,"의원이":92,"의원을":113,"필드 ":75,"이징 ":127,"이진 ":64,"뜻한다":497,"이지 ":498,"끼리 ":67,"인슈타":59,"뜻하는":380,"검은 ":78,"하는 ":18164,"품안전":65,"보내는":62,"으킨 ":120,"사관 ":76,"전력 ":61,"이즈 ":185,"개최된":194,"개최되":218,"입이 ":60,"하거나":693,"이스하":64,"랑크 ":135,"사고 ":119,"라틴 ":103,"위탁집":59,"내는 ":646,"있을 ":239,"자와 ":463,"쓴다 ":92,"입자 ":81,"내각의":69,"내각이":64,"드라마":492,"것이 ":936,"키의 ":170,"휘하는":77,"라트 ":74,"일시적":63,"찬가지":135,"것의 ":67,"남도 ":1094,"관련 ":511,"잎은 ":109,"것을 ":2109,"것은 ":829,"차는 ":139,"백제의":59,"램으로":79,"차관급":64,"램이다":179,"사건 ":437,"의자들":60,"고등학":392,"자원 ":102,"인지 ":119,"과로 ":135,"의인민":505,"작용 ":98,"교관 ":61,"이스볼":109,"것에 ":138,"드라이":156,"러운 ":114,"났다 ":378,"표준화":112,"방자치":117,"피드 ":77,"사가 ":831,"럼비아":66,"터내셔":76,"채널 ":133,"인조 ":92,"바지하":107,"바지함":89,"원회는":112,"베를린":147,"각한 ":85,"건이 ":129,"이스라":226,"배우이":125,"건은 ":246,"이스를":85,"건을 ":482,"이슬란":101,"이슬람":294,"발족하":147,"가한 ":185,"건의 ":226,"법상 ":135,"의에서":83,"베르트":104,"이시아":128,"해군 ":184,"각형 ":66,"임의 ":182,"작업 ":64,"입을 ":113,"자연 ":134,"입은 ":73,"표준이":73,"교구 ":85,"레스 ":275,"임이 ":89,"이스테":112,"베르크":152,"이스트":150,"뜻하며":87,"의약품":99,"로로 ":225,"법무부":66,"베리아":142,"하나 ":294,"인종 ":68,"이스에":70,"일정 ":91,"원회에":81,"이스의":66,"이중 ":62,"일제 ":401,"갖추고":82,"원회의":88,"전략 ":114,"위키백":58,"임은 ":107,"함께 ":1469,"임을 ":405,"자에 ":270,"방정식":221,"한국인":156,"한국전":116,"일종의":150,"보로 ":117,"일종으":266,"가치를":121,"한국어":281,"한국에":288,"로마 ":867,"러의 ":97,"프레임":130,"로만 ":108,"거운 ":93,"등교육":72,"한국의":765,"텐도 ":71,"이지만":374,"베스 ":74,"자연적":68,"있으면":59,"있으며":2859,"한국시":112,"임의의":76,"작업을":98,"프로이":170,"고를 ":213,"프로젝":410,"플라스":132,"프로야":90,"풍으로":73,"발트 ":82,"자체 ":74,"학기술":709,"번에 ":61,"품의약":58,"한국철":84,"일정한":279,"일종이":264,"사고이":102,"교가 ":168,"차를 ":208,"프로세":418,"보다는":163,"프로스":78,"학교이":155,"학교의":215,"이크 ":192,"번역 ":68,"학교와":105,"고리 ":71,"프로그":1505,"학교에":231,"사거리":82,"프라이":147,"건설되":84,"사고는":93,"로를 ":314,"남구 ":380,"내각 ":119,"사는 ":696,"프랑스":2191,"거의 ":466,"프레드":80,"한국농":75,"자역학":77,"프랑수":66,"프레스":87,"쟁의 ":196,"프로레":124,"한국사":95,"작으로":431,"자원의":107,"자원을":67,"프랑크":230,"작용을":104,"이집트":409,"료가 ":69,"거인 ":70,"자이다":1129,"쟁은 ":76,"자유민":74,"한국불":125,"쟁을 ":247,"품이다":304,"프로듀":150,"사건이":570,"사건의":71,"했기 ":58,"품으로":223,"사단 ":91,"사각형":73,"자이너":75,"하기도":550,"한국방":93,"사건으":140,"사건을":260,"사건은":231,"키스탄":273,"한국문":122,"사고로":88,"자유를":60,"사건에":107,"자유롭":67,"자유로":59,"터로 ":147,"이클 ":128,"정보 ":449,"자에게":301,"고려 ":339,"키지 ":73,"택시 ":63,"하고있":61,"하고자":204,"하던 ":520,"제명 ":69,"인정받":89,"《한국":70,"거에 ":119,"로듀서":129,"러싸고":66,"학교는":93,"아나 ":116,"한국기":62,"전라북":260,"고로 ":237,"은하 ":58,"조나 ":59,"임으로":241,"프란시":116,"쟁에 ":89,"자연과":58,"학교로":108,"한다 ":13903,"일으킨":137,"일으키":165,"전부 ":107,"학교를":131,"한국과":105,"한국교":74,"독특한":83,"려서 ":64,"나기 ":67,"임위원":62,"건에 ":136,"있으나":300,"가치가":81,"잡지 ":125,"타운 ":91,"자였다":159,"임이다":401,"되지만":116,"타워 ":79,"은행 ":219,"했고 ":365,"록되었":91,"방지하":84,"록되어":194,"레비전":409,"학과를":67,"본래 ":178,"으키는":87,"행과 ":57,"인정하":121,"임워크":62,"하다 ":1598,"인접해":357,"로몬 ":72,"자에서":67,"학교가":70,"터를 ":399,"인으로":806,"정도의":111,"인이다":2750,"정도이":95,"레옹 ":96,"레오 ":64,"보면 ":81,"인이나":74,"이전까":77,"안과 ":124,"터리 ":112,"재위 ":590,"타이 ":170,"이익에":60,"제를 ":765,"하나다":64,"공기업":62,"해당 ":227,"하나님":89,"럽의 ":196,"타의 ":138,"드로 ":276,"나가는":87,"정동으":58,"정동이":148,"있었고":57,"인이며":112,"하나로":1278,"일요일":80,"임에서":89,"있었다":516,"정되었":453,"정되어":273,"버스는":71,"이익을":123,"반한 ":62,"타인 ":187,"장소에":66,"재산을":59,"타임 ":62,"크톱 ":64,"제도로":95,"않고 ":518,"테네그":93,"러진 ":169,"제도를":150,"있었던":279,"종과 ":71,"제리 ":77,"탄의 ":170,"힘을 ":86,"프린스":68,"일이다":180,"버스를":60,"한때 ":76,"일원으":71,"본명 ":192,"탄을 ":70,"위하여":1507,"재의 ":594,"위해서":284,"항공사":85,"이치 ":126,"있어서":333,"안구 ":63,"량이다":57,"안군 ":67,"인이었":103,"량으로":84,"잠수함":77,"벌인 ":97,"악기 ":166,"제도에":143,"들로 ":446,"이전의":127,"함된 ":80,"법원 ":127,"이전에":294,"종교·":271,"보물 ":70,"해는 ":66,"인정되":99,"인이자":288,"종교 ":464,"발한 ":578,"전라남":362,"있었으":214,"일제강":66,"탄생하":60,"도하는":89,"프리카":740,"합된 ":92,"내기 ":85,"러시아":1259,"제도의":156,"제도이":151,"사나 ":68,"과거에":114,"재에 ":70,"개인이":78,"드를 ":373,"관계가":119,"로봇 ":68,"플레잉":80,"차례 ":100,"전문 ":280,"보를 ":504,"플레이":676,"플랫폼":184,"경되었":98,"제로 ":572,"자신의":370,"프리드":163,"자신이":152,"공격하":60,"계를 ":823,"차로 ":128,"계대전":148,"개인용":83,"개인의":86,"드르 ":75,"루가 ":76,"이외에":93,"레이 ":338,"이유는":64,"레인 ":73,"항공모":59,"정당이":122,"이외의":104,"레잉 ":75,"관광객":71,"프리미":129,"일에서":154,"공기관":197,"이오프":76,"레의 ":65,"공동 ":225,"이유로":136,"재산권":68,"건복지":107,"졌다 ":803,"플라이":63,"이용자":68,"인천 ":74,"플래시":78,"강조하":64,"상과 ":312,"장은 ":771,"장을 ":940,"차량 ":86,"정도로":127,"로벌 ":412,"정리 ":264,"정도를":60,"이이다":59,"렉산드":154,"관계를":280,"내고 ":139,"태양 ":74,"삼국 ":84,"둘째 ":87,"장의 ":441,"렉산더":61,"하드 ":77,"장이 ":399,"정된다":136,"게서 ":71,"프로토":225,"정되는":135,"관계에":153,"법에 ":402,"번의 ":336,"았고 ":97,"관광부":1127,"장인 ":112,"태에 ":107,"항공기":266,"이용해":188,"이용한":219,"이용하":540,"이용할":86,"플로리":82,"쓰다이":179," 권 ":193,"약으로":104,"외전화":76,"노 ":946,"류는 ":92,"야이다":152,"논 ":119,"본부 ":174,"송의 ":131,"독을 ":96," 국 ":173," 구 ":910," 군 ":386,"념 ":104,"도의 ":1235,"아카데":151,"약이다":130,"치해 ":314," 교 ":197,"편되면":82,"넷 ":554,"도인 ":130,"독의 ":116,"울의 ":64,"녀 ":215,"우수성":57,"이나 ":3015,"서태평":66,"페라이":66,"년 ":40320,"있기 ":171,"나게 ":58,"너 ":308,"수도권":102,"분과 ":75,"널 ":639,"성한 ":265,"독일 ":770,"도자 ":82,"독이 ":70,"수단이":125,"술로 ":87,"수단으":59,"수단은":92,"네 ":732,"수도는":71,"올라 ":62,"도서관":240,"영화제":129,"차가 ":177,"수를 ":679,"영화이":249,"영화의":83,"왕가의":62,"냐 ":176," 곳 ":60," 공 ":72,"영화에":74," 곧 ":122," 과 ":1102,"내 ":913,"왔다 ":379,"어와 ":330,"남 ":651,"치한 ":2915,"수립 ":59," 곡 ":60," 고 ":93,"낼 ":127,"언어 ":291,"낸 ":543,"나고 ":122,"날 ":645," 계 ":88," 경 ":182,"난 ":1514," 겸 ":249,"나 ":11468,"코나미":58,"우승을":222," 것 ":328,"어야 ":159,"르기 ":103,"우수한":99,"도와 ":396,"우주 ":186,"여러해":77,"끈 ":67,"우스의":77,"수도원":123,"불교 ":309,"동부에":445,"수도이":101,"수도인":69,"베스트":178,"세한 ":63,"열렸다":111,"포드 ":79,"움을 ":181,"어에 ":175,"성화 ":93,"로크 ":62,"오늘날":414,"으로 ":33705,"자기 ":286," 개 ":1973,"트페테":64,"북구 ":168,"월부터":252,"루마니":186,"바티칸":62,"자군 ":57,"오페라":314," 강 ":510,"페르디":70," 각 ":779," 가 ":1852,"도어 ":73,"퍼스 ":60," 간 ":172,"속씨식":90,"외교 ":73,"의된 ":59,"수도회":61,"도에 ":499,"번역되":60,"영등포":159,"파크 ":92,"기》":151,"오랜 ":123,"아트 ":64,"오래 ":72,"야에서":263,"나급 ":176,"이너 ":95,"잡고 ":153,"속에서":155,"련한 ":84,"대회의":76,"대회이":310,"로켓 ":60,"코드 ":238,"법인 ":238,"월에 ":731,"법이 ":149,"대회에":121,"소에서":153,"판에서":87,"바키아":85,"번주 ":58,"이는 ":2155,"박테리":63,"양으로":132,"연속 ":101,"수들이":80,"바탕으":333,"버지 ":158,"대회로":103,"올린 ":87,"어져 ":580,"대회를":64,"우승팀":125,"되면 ":130,"얻은 ":80,"원에 ":338,"께 ":1488,"보디아":75,"대항하":93,"대회는":163,"되며 ":566,"법의 ":384,"버스터":121,"와라 ":58,"파이다":83,"법을 ":510,"법은 ":260,"관ㆍ":62,"법상의":86,"안한 ":113,"범을 ":60,"범은 ":63,"이거나":117,"동물의":147,"동물이":83,"음력 ":492,"별시 ":2065,"러피언":110,"이곳에":70,"이더 ":126,"방향 ":69,"원산지":59,"커뮤니":173,"특히 ":652,"길 ":492,"연맹이":57,"기·":85,"이던 ":135,"긴 ":534,"러해살":77,"범죄 ":90,"김 ":69,"유럽과":60,"원은 ":642,"인다 ":433,"장군 ":139,"대한제":216,"어인 ":140,"원을 ":808,"르가 ":94,"대학을":73,"인구가":178,"대학으":61,"대학의":118,"윌리엄":254,"대학이":119,"부가 ":501,"원의 ":690,"역사 ":258,"판으로":77,"속이다":111,"글 ":394,"아케이":78,"용어이":331,"업에 ":215,"금 ":663,"급 ":1684,"여서 ":467,"원이 ":411,"미합중":60,"원인 ":98,"위성 ":107,"극 ":392,"치체를":60,"그 ":5244,"여섯 ":109,"여성 ":225,"으며 ":7980,"근 ":449,"으면 ":156,"파이어":96,"대회가":57,"소이다":165,"인구는":2448,"원자 ":102,"언을 ":82,"센터는":143,"약칭 ":297,"기 ":12721,"양에서":166,"속으로":235,"이도 ":156,"∼ ":117,"베이 ":167,"파일을":68,"원장 ":135,"요가 ":84,"어사전":65,"대한민":7350,"재가 ":151,"소재로":63,"귀 ":64,"용어는":192,"미한다":599,"카자흐":73,"대한불":58,"열리는":184,"렉트로":73,"미하는":189,"오피스":122,"손으로":97,"석하는":74,"이것이":59,"두는 ":58,"장관 ":152,"이것을":92,"이것은":282,"인간이":102,"인간의":257,"르게 ":297,"장과 ":256,"오피아":86,"이다 ":44553,"균 ":216,"규 ":410,"법적 ":132,"오를 ":79,"스가 ":772,"용어로":228,"어의 ":700,"센터에":60,"왕국이":67,"대해서":237,"왕국의":244,"파이터":67,"궁 ":201,"대하여":393,"대학에":115,"로젝트":389,"나가 ":143,"굴 ":183,"번째 ":1634,"국 ":12549,"대학원":221,"데이터":858,"구 ":8696,"구·":91,"국·":107,"용어를":67,"미하며":79,"델이다":57,"군 ":3195,"력한 ":146,"대학생":58,"번지 ":352,"르고 ":392,"권 ":1513,"유래되":83,"유래된":87,"되도록":63,"용자들":92,"에서의":511,"록한 ":134,"대하는":92,"법인이":2000,"법인의":65,"법인으":81,"껍질":59,"때 ":2169,"원숭이":76,"웨어 ":581,"애플 ":68,"련하여":63,"법정동":157,"된다는":107,"번주이":218,"딸 ":107,"병원 ":128,"딴 ":82,"손자이":109,"대표하":220,"인기를":112,"양주시":57,"오랫동":75,"따 ":83,"끄는 ":125,"꼬리":79,"용이다":73,"딩 ":636,"유러피":111,"리고 ":1415,"데이비":123,"용으로":300,"인된 ":59,"대표팀":149,"오래된":193,"용과 ":88,"리게 ":92,"대표적":477,"운영되":116,"외교관":176,"소장은":80," 개국":359," 개교":77," 가나":171," 객관":69,"용자가":142," 개관":128," 강남":325,"에서부":139," 간단":314,"대표이":78," 가능":764,"떤 ":678,"소유하":90,"파트 ":89," 가는":110," 가공":207," 각국":128,"대학교":1503,"꾸는":62,"소재지":313," 각각":403,"발행하":79," 가격":195," 개가":76," 강과":184,"당하였":79," 갖고":277,"른다 ":763,"에의 ":102," 감각":84,"유래했":74," 가깝":67," 가까":264,"인구의":90,"기초":552,"까지":4684,"기체":184,"도시 ":600,"유럽의":184,"인도 ":427,"균형":133,"유래하":101,"유래한":138,"득 ":112,"르기도":285,"드 ":7173,"포로 ":81,"근하":60,"유럽에":197,"김천":58,"안토니":87,"들 ":1322,"든 ":2179,"역번호":77,"우가 ":328,"기준":2706,"코드를":100,"길을":64,"장기 ":92,"길이":553,"센트럴":124,"기존":438,"기종":131,"에서만":99,"기지":219,"김정":94,"까운":158,"김일":103,"원전 ":1040,"까이":70,"버지는":99,"깊은":85,"버지니":97,"스과 ":57,"자나 ":108,"이기도":812,"르기까":96,"왕후 ":114,"대표로":57,"법으로":406,"기하":531,"기한":151,"디 ":655,"딕 ":78,"인데 ":220,"기획":281,"발하였":89,"기회":128,"기화":84,"발하여":147,"딘 ":125,"기호":268,"당한다":198,"되는데":137,"범으로":68,"기후":219,"본선 ":60," 년 ":39188,"김해":119,"당하는":446,"영상 ":190,"범이다":175,"인구밀":163,"속적인":79,"소재하":88,"속적으":84,"급한":91,"급하":294,"기타":646,"에서는":2546,"당했다":95,"코드는":96,"등 ":2664,"에서도":483,"유롭게":61,"법원에":72,"우에는":147,"도스 ":66,"법원이":60,"창군 ":59," 네 ":279,"이동 ":119,"법이다":392,"있던 ":494,"방행정":150,"일까지":745,"아키텍":133,"근에":408,"그와":139,"국화":64,"구회":87,"국회":507,"둘 ":109,"그에":296,"배하는":77,"극에":58,"구현":195,"구협":65,"둔 ":252,"규정":490,"국해":88,"국하":80,"국한":79,"급시":77,"두 ":2443,"규제":83,"급수":72,"구할":69,"구한":114,"구하":801,"기부":230,"기병":61,"당하고":80,"금속":226,"기본":591,"기보":62,"기법":290,"연방의":172,"기반":832,"당하기":83,"운영하":421,"교황":497,"부는 ":372,"구팀":94,"기물":83,"교환":215,"교회":1206,"술부 ":260,"귀족":311,"방향으":165,"교향":157,"르는 ":1003,"교하":132,"국토":367,"기리":116,"기름":86,"기를":1419,"베이스":456,"기록":1098,"기로":698,"될 ":726,"기의":1771,"기이":523,"기인":192,"기자":720,"기장":468,"기재":80,"김영":83,"기적":272,"기전":79,"기점":130,"뒤 ":432,"기와":342,"근처":198," 내 ":198,"업인 ":92," 낸 ":84,"기원":1347,"병의 ":61,"김씨":66,"우이다":123,"없이 ":416,"기압":128,"기아":138,"수록된":81,"수록되":127,"기에":2090,"기여":1114,"기어":60,"기억":172,"기업":1321,"금지":206," 남 ":62,"었을 ":154," 난 ":119,"급제":92,"양자역":76,"규칙":225," 날 ":143,"급을":163,"급으":81,"금의":358,"급은":66,"기슭":65,"기스":70,"금이":141,"급의":120,"워싱턴":116,"방해양":72,"금을":242,"금은":170,"금으":71,"금융":485,"급이":114,"기시":64," 나 ":149,"글이":148,"글을":102,"본식 ":161,"기소":84,"권한":204,"극적":138,"역시 ":1264,"작가로":60,"기수":80,"기술":2933,"어진 ":1828,"다》":68,"글자":184,"그이":134,"기사":297,"용자의":81,"의료 ":61,"극을":92,"그의":1129,"둥 ":134,"금액":59,"급에":136,"기서":167,"워진 ":158,"근의":142,"극장":238,"금에":66,"극작":92,"기생":113,"기상":277,"극이":61,"국외":69,"법칙 ":74,"국왕":350,"예루살":108,"구원":461,"국역":58,"국영":89,"국에":2267,"기기":316,"구역":938,"기까":238,"군에":440,"구와":486,"구장":488,"업을 ":671,"국인":683,"국이":927,"구자":155,"구인":96,"구이":427,"국의":7823,"공화":2272,"군의":841,"국장":157,"과하":139,"과학":2295,"군은":187,"군을":273,"군으":160,"공했":63,"업은 ":103,"급되":106,"공해":80,"공항":327,"공학":446,"공한":214,"공하":712,"국은":420,"국을":477,"공헌":123,"동 ":5208,"료의 ":83,"국으":324,"구의":1030,"국유":104,"이때 ":93,"기경":136,"근대":297,"곡한":152,"곡하":125,"기계":386,"기고":122,"교적":271,"벤처 ":66,"교정":87,"도 ":19093,"기관":3227,"독 ":328,"교의":1054,"그대":267,"교이":272,"교인":98,"교장":78,"고한":57,"고학":98,"고하":102,"구약":91,"공포":158,"기금":119,"돌 ":128,"포를 ":110,"그들":327,"국어":699,"구에":986,"골프":78,"있다 ":14608,"기구":741,"돈 ":121,"성하고":147,"도·":76,"국악":57,"교와":220,"구시":81,"성하기":80,"군수":86,"구스":198,"교에":493,"교역":61,"국수":106,"그녀":205,"계획":601,"위스 ":122,"로프 ":73,"업의 ":668,"계화":120,"운영에":81,"기갑":60,"기가":643,"교육":2560,"기간":605,"아테네":86,"그는":921,"교원":80,"병을 ":93,"데에서":64,"공통":193,"국식":78,"국시":294,"구사":107,"구상":120,"국사":242,"국산":101,"베이니":80,"경향":176,"용인시":67,"경험":178,"번지에":231,"경하":130,"업이 ":195,"국소":69,"구속":114,"그나":76,"구소":547,"금까":92,"구성":2022,"군산":96,"국세":95,"계한":64,"계학":71,"계하":113,"군사":580,"관측":213,"극복":58,"근본":126,"교통":744,"구치":82,"교토":91,"괴하":71,"포니아":254,"오디오":134,"의로 ":155,"기라":71,"버지이":112,"교파":61,"된 ":12222,"있는 ":7557,"리가 ":556,"기둥":76,"근무":149,"기들":73,"국철":110,"구체":168,"군청":66,"구축":271,"도모하":352,"도모할":62,"기능":861,"광학":98,"기는":959,"글로":653,"규범":69,"그룹":828,"권위":161,"그마":69,"기대":121,"그림":313,"그리":2785,"기다":78,"그린":273,"궁전":122,"기니":172,"그를":132,"인근에":94,"권이":270,"권익":212,"이드 ":468,"권의":270,"권을":623,"권은":116,"권으":128,"긴다":73,"기되":97,"돼 ":80,"글리":111,"기동":137,"권자":142,"기도":3690,"기독":739,"잇는 ":257,"관한":1484,"관하":360,"국제":2470,"규모":583,"국정":128,"관해":84,"이들 ":222,"기나":120,"관할":563,"구적":61,"군인":461,"군이":624,"구제":159,"국적":195,"국전":133,"공회":208,"규명":89,"괄하":139,"교차":238,"르다 ":174,"그러":521,"그런":74,"그랑":78,"그라":318,"구조":960,"그램":1195,"그래":891,"군정":60,"그랜":101,"교체":59,"그로":269,"글라":119,"번째로":469,"그렇":75,"관현":159,"그레":393,"국주":73,"그려":165,"기념":619,"권에":285,"구지":95,"근로":136,"글러":60,"국지":159,"그루":101,"군주":362,"글랜":378,"계약":484,"여왕 ":58,"결정":751,"계열":387,"광범":88,"교들":65,"경우":1823,"계에":1064,"고시":148,"계와":142,"역사가":122,"국농":82,"고스":110,"겸임":57,"고슬":67,"육상 ":106,"고서":106,"격자":62,"倉丘 ":68,"견을":57,"격적":116,"교도":126,"것처":82,"광물":81,"곡선":132,"고성":119,"구나":78,"고속":845,"결의":140,"경에":408,"교동":80,"결을":132,"국내":616,"경영":484,"걸친":86,"권과":106,"결이":61,"경연":59,"이끌어":63,"계의":805,"구대":120,"계이":169,"과사":639,"국대":144,"계인":107,"계자":130,"군대":278,"언어학":160,"계적":562,"倉三 ":204,"경주":361,"국도":235,"구동":102,"경유":80,"겨지":139,"겨진":136,"경으":205,"경은":67,"경을":557,"경의":245,"공산":474,"경인":98,"공사":628,"경이":223,"광부":1133,"경쟁":408,"구는":2663,"계유":60,"倉丁 ":151,"경전":188,"경제":1825,"구단":288,"고와":65,"국들":75,"곡에":64,"교로":205,"고원":88,"관심":174,"율리우":65,"고용":223,"광산":147,"관습":89,"아프가":115,"고안":197,"군도":75,"구되":67,"관성":72,"관세":134,"료에 ":73,"구들":66,"외한 ":214,"고에":169,"경지":70,"고양":226,"공식":660,"공신":113,"더 ":1406,"공업":378,"공에":64,"덕 ":151,"경찰":561,"고정":166,"곳에":821,"고전":364,"던 ":4707,"교문":57,"공연":355,"구라":68,"고조":104,"공여":69,"권도":62,"공예":112,"공영":61,"고종":145,"고유":256,"교리":113,"곡은":123,"곡으":120,"고의":311,"교류":510,"고위":328,"교를":330,"거하":226,"곡을":163,"고인":82,"고이":182,"곡의":107,"고있":175,"곡이":253,"고자":344,"공적":170,"공전":79,"구르":74,"구를":602,"구름":70,"계층":171,"국립":960,"과이":162,"구매":106,"공정":235,"구리":185,"공제":58,"과의":611,"구마":87,"공익":127,"공인":185,"덤 ":58,"공작":278,"공자":115,"공의":311,"공이":148,"에스페":64,"교법":60,"공장":150,"곳을":106,"잉글랜":374,"곳은":97,"곳으":142,"고지":182,"곳이":283,"공으":88,"에스파":105,"공을":239,"관여":73,"관에":348,"과와":60,"곳의":66,"공유":261,"구려":197,"공원":601,"구로":546,"계청":60,"국령":72,"과에":332,"공용":107,"관악":84,"격파":66,"델 ":199,"고체":92,"관적":103,"에스토":68,"관점":111,"덴 ":238,"공직":57,"국문":157,"국무":264,"과정":1002,"과적":120,"데 ":4107,"공중":115,"격투":81,"벗어나":80,"과제":59,"궤도":196,"광양":81,"공주":233,"관장":263,"광역":1560,"번이다":102,"세포의":69,"과일":57,"관위":96,"관으":1430,"교부":78,"倉之 ":137,"관의":2174,"규격":148,"관을":436,"관은":726,"관인":90,"관이":1515,"구멍":69,"근거":264,"격하":210,"격한":92,"권리":429,"국불":143,"그것":304,"구분":457,"계통":173,"곤충":101,"교사":363,"국보":135,"우치 ":58,"그가":220,"군벌":62,"구별":243,"관직":120,"광장":110,"역에 ":645,"국방":338,"국민":1323,"구밀":163,"권력":210,"구미":92,"성한다":77,"급격":65,"고타":71,"달하는":148,"결혼":181,"페르시":179,"고쿠":186,"금강":116,"성하는":565,"과천":96,"관차":147,"관찰":160,"근교":78,"광진":58,"광지":58,"교수":610,"발표된":92,"발표되":107,"결합":346,"결하":438,"결한":100,"웨이 ":207,"倉並 ":110,"견해":92,"군부":82,"광주":542,"견한":104,"견하":126,"는 ":115688,"언어에":73,"관광":1592,"관구":95,"늘 ":65,"관급":89,"광객":71,"가축":73,"언어의":102,"고는":199,"언어이":170,"광고":267,"고대":997,"능 ":323,"가치":461,"고도":1752,"경로":111,"건설":619,"곡동":96,"경력":62,"의도동":78,"연안 ":64,"입된 ":104,"개척":110,"갖춘":117,"검색":157,"쟁과 ":63,"고등":647,"계로":351,"개체":157,"곡들":64,"검사":367,"객체":131,"거스":89,"갖추":158,"개최":745,"가타":160,"골든":61,"공단":148,"경마":75,"과는":512,"계를":823,"개칭":76,"공되":110,"과대":160,"공동":868,"가톨":538,"도메인":70,"닉 ":179,"계몽":70,"과도":95,"니 ":1434,"독립하":59,"게서":76,"닌 ":658,"고래":67,"고라":68,"고로":318,"거에":255,"고려":896,"관되":68,"가포":95,"건에":176,"경보":91,"게시":63,"골라":80,"고르":135,"고를":213,"작가이":229,"교가":179,"작곡가":402,"고리":445,"닝 ":121,"거의":471,"예술 ":247,"님 ":88,"거운":99,"도사 ":75,"경부":199,"경북":127,"건이":681,"가했":110,"거점":73,"것에":206,"교과":83,"거제":96,"교관":186,"곡면":66,"다 ":111349,"간하":67,"닥 ":58,"간한":58,"자는 ":1091,"번역하":66,"거인":110,"거이":70,"개통":175,"경비":85,"건으":192,"건은":246,"가하":646,"가한":227,"료와 ":63,"도모를":115,"건을":482,"검역":179,"건의":278,"각한":97,"가해":91,"각하":172,"간호":75,"달 ":337,"성하여":117,"성하였":153,"결성":349,"벌어진":238,"거주":408,"공룡":180,"과로":156,"검은":129,"단 ":1654,"각형":159,"교구":291,"건전":253,"콜라 ":66,"간행":103,"공로":98,"건조":153,"고문":120,"고무":61,"경상":955,"담 ":279,"검정":63,"강한":188,"강하":127,"관료":227,"관련":1378,"것을":2110,"아파트":117,"것의":67,"것으":1506,"게오":79,"것은":830,"개편":325,"공립":95,"예수 ":112,"여야 ":87,"것인":86,"것이":2686,"거지":151,"계선":74,"강화":343,"공무":742,"구경":84,"개한":63,"국경":557,"국계":88,"개하":180,"관리":2036,"고분":99,"공모":103,"언어를":125,"당 ":2124,"게임":2275,"게이":356,"계사":104,"계산":303,"경성":115,"구가":459,"구간":259,"과를":382,"결승":176,"국가":3195,"구개":146,"거창":63,"계수":70," ∼ ":97,"군과":252,"국교":130,"관목":62,"구권":57,"국군":183,"개화":66,"계속":320,"괴되":57,"고비":130,"개혁":328,"구관":145,"국고":62,"구광":218,"대 ":7820,"과목":73,"국과":776,"국공":136,"국관":64,"의를 ":445,"언어로":146,"계승":345,"국기":252,"거쳐":379,"구기":227,"격에":68,"공법":70,"관문":69,"구글":106,"루스 ":303,"거치":92,"격이":190,"격의":79,"고사":138,"건축":557,"격을":325,"검찰":214,"격으":151,"변에 ":94,"교단":121,"교는":208,"걸쳐":621,"결에":59,"공부":145,"이끄는":118,"겨울":101,"개미":141,"건너":96,"언어들":71,"개발":2877,"개방":130,"독립적":105,"거나":1557,"우주의":58,"본명은":189,"으로부":448,"개막":77,"각본":92," 길 ":123,"간부":65,"개명":89,"어에서":253,"강릉":107," 기 ":194,"개를":146,"가보":96,"것과":204," 긴 ":204,"감소":107,"게는":217,"자금을":57,"계가":271,"것도":92,"강서":89,"경계":516,"경과":169,"가수":461,"으로서":1111,"결과":527,"감사":119,"가스":350,"격기":81,"결국":131,"가슴":62,"가시":211,"올랐다":64,"울주군":82,"가설":126,"가서":87,"건담":81,"언어는":67,"간섭":83,"간선":71,"가속":96,"온라인":297,"거두":93,"갈색":63,"거는":139,"개별":97,"독립을":81,"개봉":161,"가사":265,"가산":81,"으로써":965,"우즈베":76,"거대":185,"가상":319,"독립운":225,"간을":392,"건립":125,"간의":1187,"감염":98,"간인":100,"동부 ":460,"간이":504,"가입":188,"경남":138,"가자":80,"각이":151,"가장":2280,"각자":67,"간으":125,"간은":170,"가을":69,"가의":732,"각을":142,"가인":226,"각의":239,"가이":1724,"거를":112,"고가":145,"곡가":419,"거리":759,"고객":146,"아티아":132,"가요":88,"가우":57,"가운":1240,"갈어":69,"발행되":71,"간에":601,"겠다":82,"개신":194,"가와":549,"개시":97,"가였":116,"가오":62,"각에":105,"거로":219,"계기":156,"계급":233,"가에":444,"왜란 ":94,"가야":126,"개수":58,"이란 ":1323,"거래":321,"경기":2213,"겨난":61,"이라 ":885,"것들":114,"감시":118,"계관":69,"개성":108,"계곡":114,"개선":303,"개설":104,"우주선":91,"느 ":258,"아티스":65,"이래 ":159,"공급":250,"관계":1290,"계된":88,"공기":688,"개조":143,"계되":101,"대통령":1050,"관과":175,"건복":108,"발표하":134,"발표한":276,"개장":97,"거부":132,"공국":116,"공군":148,"발표했":57,"으로만":64,"개정":217,"강점":455,"공고":79,"과가":110,"개의":1771,"계대":179,"공공":459,"공과":122,"강제":215,"개인":627,"강조":121,"경되":110,"과거":402,"객이":82,"오브 ":217,"개입":58,"변의 ":108,"늄 ":94,"감정":123,"개원":65,"개월":142,"간지":80,"공격":404,"계는":211,"같은":1786,"외국인":172,"같이":515,"게르":135,"공간":766,"겼다":112,"값이":110,"강의":343,"강이":240,"강은":66,"강을":182,"가지":2292,"가진":654,"공개":410,"강으":96,"각지":87,"고급":165,"감을":118,"결되":247,"결된":216,"값을":97,"수많은":217,"간주":126,"건물":382,"고기":355,"강원":385,"각종":393,"작된 ":349,"격되":57,"고구":210,"간접":88,"견된":179,"견되":279,"가져":156,"각적":77,"뉴 ":162,"가정":341,"가적":80,"고고":132,"고교":84,"가족":289,"간적":65,"갈의":77,"강에":164,"가졌":76,"각기":59,"가까":268,"가깝":67,"가기":142,"개가":139,"발하는":83,"강과":253,"갖고":277,"외국어":63,"가가":352,"각각":410,"유명하":406,"유명한":333,"가고":100,"유명해":61,"가공":315,"변을 ":61,"가게":78,"가격":229,"자동 ":86,"가경":58,"간고":62,"간과":147,"가구":64,"음반 ":194,"간격":58,"감각":102,"도마뱀":70,"각국":139,"작곡한":149,"작곡하":118,"가능":974,"가는":573,"가다":72,"가니":157,"강남":335,"가대":160,"영미법":57,"간단":335,"간다":177,"독립된":72,"으로는":2876,"우고 ":113,"뇌 ":72,"가도":58,"개관":130,"객관":70,"가나":261,"개교":78,"개국":380,"가노":67,"으로도":374,"강동":69,"강도":90,"가량":79,"개된":94,"》는 ":359,"가락":137,"개되":181,"가라":155," 급 ":610,"가들":383,"간되":110,"간된":75,"개념":759,"가된":598,"가되":103,"각되":72,"같다":132,"갈등":59,"감독":690," 그 ":3864,"갔다":90,"갖는":232,"발하고":100,"강력":160,"갈리":113,"누 ":64,"거구":248,"법에서":118,"건강":248,"가면":128,"가며":64,"감리":65,"가미":121,"개로":133,"개량":160,"가문":317,"수사 ":66,"눈 ":92,"건국":190,"갈래":67,"가루":139,"가로":847,"갈라":146,"객들":72,"거가":75,"가리":1920,"육성 ":90,"가마":120,"각류":82,"가르":365,"가를":483,"성으로":264,"역을 ":746,"역의 ":612,"성이다":196,"여자 ":201,"로스트":70,"역이 ":202,"사회학":136,"이런 ":124,"보에 ":94,"자기장":84,"여성의":88,"역인 ":105,"으로의":77,"포가 ":68,"이로 ":257,"역사학":74,"상품의":60,"상품을":66,"칭하는":246,"웨스턴":61,"세웠다":74,"연을 ":133,"동체 ":85,"이론 ":210,"연의 ":81,"웹사이":130,"분기점":81,"세청 ":72,"원에서":238,"유민주":67,"자들 ":77,"본어 ":107,"위에 ":582,"불구하":86,"섬이다":96,"의미 ":59,"약한 ":189,"보와 ":132,"어졌으":85,"소송 ":68,"업체 ":73,"본에 ":135,"위와 ":92,"렸으며":77,"특징이":235,"특징을":85,"칭한다":172,"설치 ":74,"파와 ":67,"웨스트":211,"월간 ":75,"령으로":158,"사회에":170,"령이다":76,"케스트":70,"연장 ":111,"판에 ":66,"사회의":194,"사회적":296,"로스앤":97,"사회주":304,"영어 ":539,"들고 ":146,"일러 ":99,"도쿄 ":329,"동이다":346,"열을 ":131,"동으로":429,"베타 ":78,"예비치":64,"열의 ":161,"되어 ":4088,"올림픽":756,"영에 ":111,"소수 ":73,"셋째 ":57,"콜롬비":70,"들과 ":470,"판이 ":87,"어이다":784,"상호간":218,"이니아":108,"상하이":159,"드가 ":238,"상하였":80,"역사는":67,"동아시":126,"판의 ":77,"본사는":86,"로소프":372,"도자이":65,"어있다":103,"독일어":182,"독일에":106,"독일의":688,"판을 ":143,"어있는":86,"성장하":60,"동안에":69,"어이며":75,"독자적":97,"아프리":752,"역삼동":82,"장관이":63,"원이다":371,"재는 ":561,"장관을":107,"론적 ":99,"성적을":87,"엔진 ":111,"일로 ":158,"동양사":141,"원으로":777,"업연구":87,"선진화":57,"서쪽으":883,"서쪽은":82,"벡터 ":62,"본사를":78,"동아일":75,"위원 ":99,"서쪽에":179,"역사를":147,"친환경":81,"수는 ":371,"재단 ":121,"상했다":89,"오리지":76,"보의 ":153,"파인 ":61,"러한 ":557,"소스 ":199,"도입하":78,"원자력":149,"론을 ":213,"론은 ":142,"원자로":90,"론이 ":58,"파의 ":209,"동에서":90,"보이 ":67,"역사상":171,"이를 ":1065,"론의 ":239,"이름 ":428,"복을 ":86,"려져있":78,"원이었":68,"역사적":299,"인도네":219,"판은 ":76,"동차 ":460,"속버스":59,"역사와":60,"위인 ":74,"원의원":81,"역사에":123,"으면서":126,"장군이":119,"세워졌":73,"역은 ":410,"세워진":147,"별자리":112,"본은 ":113,"인류 ":64,"파일 ":247,"어졌다":315,"위의 ":333,"업에서":66,"동영상":97,"역상으":142,"장된 ":102,"원인이":59,"치한다":628,"로의 ":620,"베이징":158,"록은 ":122,"본부는":115,"치하는":172,"업적을":72,"였을 ":64,"평균 ":104,"록을 ":144,"패션 ":61,"상황에":103,"원작으":116,"려졌다":93,"요한 ":1062,"원장은":99,"불가능":81,"상황을":61,"도이다":379,"영의 ":109,"특정한":270,"치하며":277,"일대에":115,"동쪽 ":179,"도입된":60,"도입되":74,"소비에":317,"독으로":61,"로세스":161,"도이기":66,"불가리":157,"여진 ":140,"로이 ":96,"영상을":69,"록의 ":66,"움직임":86,"움직이":119,"로세서":244,"영을 ":122,"올바른":82,"로서의":214,"원칙 ":76,"이며 ":7170,"부대 ":58,"도움을":80,"도자로":59,"도이며":276,"우체국":67,"수다 ":66,"로서는":102,"앙행정":163,"수단 ":74,"인도양":80,"치하여":88,"샌프란":83,"치하였":59,"영이 ":63,"인도에":68,"인도의":191,"로운 ":749,"독이다":59,"케인 ":80,"애플리":100,"웨어를":99,"이동하":97,"있는데":320,"상호작":138,"세인트":105,"술개발":68,"있다고":197,"으므로":160,"세츠 ":90,"우는 ":262,"료를 ":341,"양한 ":767,"요구하":87,"팀은 ":106,"북도 ":876,"없으며":74,"있다는":230,"원과 ":225,"었으나":628,"의료기":64,"론에 ":135,"어지고":78,"로와 ":177,"부기관":105,"업이다":206,"서초동":58,"동시에":375,"돌아가":62,"영역 ":58,"원주민":78,"업으로":181,"웨어이":75,"웨어의":74,"파에 ":57,"팀을 ":101,"열이 ":57,"티스트":94,"버트 ":241,"팀인 ":106,"었으며":1637,"팀이 ":193,"끌고 ":65,"이들의":125,"이들은":168,"이들을":80,"컵의 ":85,"이들이":88,"롬비아":69,"력으로":173,"있는지":77,"어진다":212,"일명 ":77,"어지는":358,"도에서":324,"레지스":65,"업인이":86,"부근에":142,"자력 ":84,"재까지":179,"로에 ":104,"인들에":65,"로어 ":69,"자로 ":938,"원군 ":162,"원구 ":57,"보안 ":111,"수도 ":546,"소비자":202,"인물 ":119,"치하고":1297,"세종특":81,"역시의":94,"성직자":104,"오스 ":205,"성질을":132,"불교의":418,"인들은":69,"인들을":66,"이듬해":68,"인들의":133,"인들이":149,"도체 ":99,"서초구":253,"불교사":82,"인디언":128,"록에 ":92,"이미 ":149,"료로 ":215,"론으로":87,"생하였":144,"로이센":147,"인민 ":73,"생하여":71,"이버 ":210,"입되었":102,"론이다":163,"입되어":66,"특한 ":81,"겐 ":159,"자니아":79,"이번 ":130,"게 ":7497,"걸 ":59,"건 ":812,"예수의":57,"검 ":84,"것 ":343,"소설가":209,"음성 ":63,"예술의":117,"어트 ":59,"업진흥":67,"케이드":87,"거 ":803,"》 ":2725,"《 ":66,"외하고":65,"〉 ":274,"소와 ":115,"자료 ":141,"곡 ":443,"고 ":30643,"곤 ":148,"되면서":411,"복지 ":67,"티칸 ":59,"운데 ":1085,"겸 ":297,"경 ":1590,"워크 ":327,"계 ":4339,"겨 ":191,"あ ":337,"격 ":421,"별지방":128,"있도록":441,"견 ":139,"결 ":234,"패의 ":75,"일반 ":275,"자리 ":281,"로이다":326,"광 ":234,"이라고":1978,"쿠니 ":89,"장남이":65,"관·":148,"로이드":134,"용해 ":222,"자를 ":1000,"관 ":2189,"과 ":18300,"곳 ":78,"록이다":99,"공 ":940,"웨이의":60,"곱 ":113,"분류 ":142,"골 ":227,"곧 ":133,"운동 ":385,"케이블":113,"교·":290,"용할 ":375,"선총독":94,"부를 ":654,"연안에":133,"케이션":201,"웨일스":98,"역에서":460,"용한 ":474,"소에 ":141,"교 ":3766,"유성구":90,"생하는":237,"괴 ":95,"쿠가와":148,"속에 ":300,"생한다":87,"생활을":151,"생활의":64,"이렇게":62,"소장 ":152,"이라면":58,"생활에":67,"동인 ":74,"설치된":235,"설치되":170,"동이 ":214,"소설이":157,"속이 ":83,"분된다":62,"용된 ":167,"소송법":99,"유사하":81,"유사한":151,"의사 ":123,"동생이":135,"소의 ":297,"소속된":73,"소속되":78,"육의 ":116,"속은 ":58,"동을 ":1013,"속을 ":100,"동은 ":194,"퍼레이":61,"육을 ":217,"이라는":1497,"속의 ":605,"코미디":122,"북동부":261,"유의 ":224,"센터 ":405,"선출하":127,"동의 ":429,"론에서":229,"부동산":93,"었지만":266,"갈 ":248,"이블 ":130,"가·":97,"간 ":2583,"이븐 ":57,"율을 ":155,"각 ":1502,"가 ":25131,"소설로":59,"이브 ":214,"개 ":2483,"객 ":182,"육성하":58,"와서 ":62,"강 ":1090,"코스 ":158,"감 ":193,"팀이다":183,"갑 ":72,"작되었":341,"값 ":73,"우도 ":215,"작되어":105,"팀으로":74,"이론과":70,"일본 ":2149,"일보 ":109,"ア ":202,"우구스":84,"소속기":439,"유적 ":73,"북동쪽":269,"션으로":78,"예수교":59,"의무를":57,"자동차":777,"예술가":65,"용될 ":69,"로아티":131,"로에서":97,"음반이":168,"션이다":124,"일랜드":482,"전과 ":393,"요리 ":109,"설한 ":86,"위에서":153,"이루고":127,"일부 ":510,"여졌다":78,"영어권":66,"보전 ":62,"의미로":213,"의미를":182,"위가 ":174,"설치하":82,"설치한":64,"도중 ":60,"우크라":193,"역이었":65,"의미가":67,"도시를":74,"이라크":126,"본이 ":103,"이러스":164,"평가를":70,"본을 ":178,"역이며":57,"운행 ":58,"의미는":59,"본의 ":1263,"도시로":1022,"역임하":186,"역임한":62,"역임했":134,"포구 ":295,"온의 ":60,"의미이":69,"의미의":91,"의미에":122,"이르기":105,"업체이":81,"베트 ":83,"소송을":60,"로야구":89,"외부 ":93,"틴어로":69,"재로 ":182,"이룬다":93,"소속의":361,"소속으":82,"이루는":173,"자들에":142,"보스니":124,"별자치":252,"이론에":160,"보존 ":138,"자들이":347,"레코드":185,"자들의":199,"자들은":141,"소속이":111,"자들을":122,"이론이":166,"용노동":84,"페라 ":140,"이론은":70,"이론을":75,"이러한":453,"율적 ":57,"이론의":70,"도시에":84,"오의 ":93,"오사카":137,"전국 ":139,"도시이":464,"도시의":101,"영어로":117,"이론적":57,"자바 ":87,"일보》":157,"의식 ":75,"송을 ":189,"서태지":87,"오와 ":60,"코리아":162,"쉽게 ":173,"세포 ":173,"의미한":603,"의미하":328,"전기 ":326,"위이다":181,"번호 ":230,"이른바":70,"일련의":181,"유지 ":127,"이른다":109,"이르는":307,"류가 ":152,"동안 ":746,"요로 ":75,"루를 ":71,"돈을 ":58,"이르렀":73,"이르러":58,"이루어":990,"우드 ":107,"수로 ":513,"수록 ":68,"요하다":73,"티오피":85,"범한 ":75,"도시는":118,"이름을":619,"이름은":905,"이름으":571,"이름의":95,"이름이":757,"이름인":68,"약품안":57,"위원회":1083,"이맥스":73,"평가하":59,"재단은":166,"역이다":379,"엔진이":58,"재단법":390,"역으로":485,"도시가":58,"엔진을":65,"센트 ":61,"선형 ":74,"위원장":186,"동에 ":602,"보스턴":85,"이름에":98,"점과 ":81,"여의도":98,"열차 ":88,"일렉트":97,"운드 ":160,"부로 ":338," 본 ":154,"생 ":830," 볼 ":404,"면의":154,"면으":111,"면은":81,"면을":202,"목사":132,"명에":192,"오스트":938,"샤 ":153,"면이":257,"없다 ":290,"새 ":176,"무공":61,"무관":282,"색 ":724,"아스 ":84,"선수가":86,"속되어":95,"속되었":68,"문과":177,"명시":68,"불렀다":77,"맨해":63,"무국":71,"물건":208,"물관":507,"물과":183," 또한":784,"무기":312,"물고":100,"문구":184,"면에":519,"쪽에 ":528,"면역":75,"문기":69,"선수권":182,"명제":72,"선이 ":276,"무대":221,"무덤":62,"명종":57,"대자동":69,"문대":131,"모아":148,"모양":499,"먼트":452,"모야":73,"면적":2044,"목소":65,"명예":174,"무네":57,"별한 ":132,"모스":327,"모습":277,"멸종":98," 라고":1124,"모시":69,"명을":535,"명은":636,"명으":376,"몬스":58,"명의":990,"명이":1234,"명인":142,"무는":91,"무늬":117,"선수단":172,"웠다 ":98,"메스":59,"목록":269,"모로":214,"쓰이고":88,"본주의":134,"마틴":68,"마티":65," 별 ":66,"었다 ":8595,"모를":206,"모르":159,"불렸다":153,"립허":405,"성에 ":315,"립했":99,"리히":298,"립한":356,"립하":482,"마포":172,"멕시":360,"림픽":766,"메시":170,"선수들":135,"마토":82,"리프":182,"몇몇":116,"립트":86,"명명":152,"마드 ":89,"만큼":116,"모라":68,"부르고":80,"마트":201,"모래":91,"복지부":89,"명문":66,"설이 ":126,"리눅스":103,"리핀":364,"리한":242,"리학":967,"리하":714,"매년 ":363,"머지":135,"삼 ":80,"명사":136,"만화":639,"산·":76,"모바":156,"용되고":286,"설을 ":221,"무거":57,"메인":168,"메이":1331,"메일":86,"매하":265,"매한":223,"무게":90,"용되기":81,"용도로":77,"문가":203,"설은 ":94,"망한":94,"망하":162,"상 ":4129,"망했":69,"명성":146,"무가":118,"대전광":266,"업한 ":64,"마하":117,"산한 ":76,"사 ":5233,"모리":571,"산하 ":318,"사·":132,"말하":599,"말한":3393,"말해":114,"알바니":106,"설의 ":132,"살 ":233,"만한":100,"모모":86,"면서":1898,"먼저":128,"산 ":2364,"연극 ":67,"림축":110,"마찬":135,"리코":146,"모노":103,"매주":81,"매장":77,"리카":1441,"리칸":149,"림청":142,"르프":78,"매일":93,"명된":68,"명되":255,"리치":164,"메모":272,"리케":256,"모나":69,"맹에":60,"르헨":201,"였으며":1303,"맡은":78,"매우":478,"명단":130,"망을":100,"리츠":81,"몽골":207,"메리":794,"메르":162,"멜로":63,"리처":147,"마주":117,"만족":167,"마지":479,"만주":184,"망원":114,"메로":64,"맥에":68,"리청":92,"리로 ":428,"마쿠":85,"몰도":68,"모듈":69,"모든":1119,"모드":98,"《글":317,"매체":185,"머스":152,"맞추":113,"더스 ":76,"마크":376,"몬드":71,"리포":321,"리투":109,"마케":253,"목동":57,"명령":365," 법 ":85,"《고":72,"멸망":116,"리튼":79,"리트":432,"멤버":200,"모두":692,"리티":185,"링크":113,"모니":182,"릭터":187,"리토":63,"마카":106,"선수를":64,"마치":202,"양하고":63,"쇼군 ":68,"모델":475,"리킨":658,"리키":700,"루지야":62,"선수로":140,"리크":106," 번 ":635,"리타":117,"맹이":84,"맺은":57,"모는":61,"마추":91,"리적":466,"린이":257,"르카":91,"립에":85,"린의":68,"리이":430,"매사":94,"리인":91,"리잡":147,"르침":80,"르치":74,"리자":271,"명과":183,"메달":133,"대이다":77,"리의":878,"리위":69,"메니":147,"리우":347,"간과 ":140,"리원":62,"연주자":75,"없는 ":605,"리올":65,"리와":436,"마시":117," 배 ":81,"명가":99,"린에":77,"동당 ":72,"리온":117,"리오":529,"류큐":59,"마스":330,"르체":115,"리엔":67,"리에":1417,"리엄":263,"리얼":106,"리어":346,"리언":69,"리야":146,"리아":4106,"리안":159,"면과":138,"선조 ":80,"만든 ":839,"워드 ":182,"부로부":57," 밤 ":74,"만들 ":66,"른쪽":57,"맡았":269,"アアア":910,"맡아":66,"말의":116,"말이":844,"메라":241,"르트":563,"르티":105,"만의":176,"류한":91,"류하":347,"류학":112,"르투":301,"많은":1276,"많으":66,"만이":234,"를테":61,"말은":132,"목과":78,"말을":157,"멀리":64,"많이":591,"마음":149,"만원":63,"마의":295,"막으":95,"막을":84,"막의":105,"마이":1302,"르토":132,"마일":89,"명나":114,"마인":99,"마자":90,"만으":117,"만을":299,"말에":148,"모가":149,"마을":459,"머리":313,"르키":132,"리지":401,"만에":262,"립적":128,"마운":61,"마우":104,"르타":206,"림이":95,"만약":57,"많아":58,"립을":177,"림의":63,"마에":203,"림을":114,"르크":924,"마와":68,"립자":112,"리즘":285,"맥스":130,"리즈":1369,"마오":77,"메디":104,"맞는 ":58,"립운":266,"르쿠":64,"릴적":114,"마쓰":345,"르케":65,"르코":77,"연구 ":736,"리조":116,"미디":615,"무총":163,"민들":272,"물체":288," 《 ":64,"대주교":92,"바니":158,"바다":409,"바닥":88,"바닷":63,"에게서":75,"받는":351,"덴마크":199,"반대":455," 》 ":203,"말기의":111,"발달":319,"바둑":111,"반도":781,"미래":237,"소를 ":405,"복잡한":88,"였고 ":681,"미로":269,"발되":197,"밀라":58,"니콜라":139,"발된":170,"바디":62," 러브":58,"반드":90,"미르":208,"에트 ":311," 럭비":135,"보장하":64,"였으나":406,"미를":235,"미리":145,"대적인":85,"북부 ":315,"원도 ":290,"바라":259,"바람":163,"말기에":91,"대적으":83," 뜻하":527," 뜻한":499,"쇼 ":167,"밀리":163,"반란":144,"묘호":73,"박람":78,"부분 ":310,"발라":85,"바로":295,"대전에":70,"발레":115,"발렌":61,"밴드":389," 로도":98,"바르":450,"바른":136,"발로":78,"에는 ":3701,"미법":59,"민과":66,"손 ":238,"씨의 ":120,"본적은":117,"본적으":80,"㎢ ":1457,"민국":7568,"본적인":119,"소 ":1996,"모험":72,"보컬 ":70,"속 ":1432,"민경":63,"모형":163,"미국":5159,"리대신":59,"미군":124,"민공":1009,"문에":1412,"르메니":81," 런던":288,"무와":74,"뮬레":141,"못하":264,"무용":185,"무엇":78,"무에":108,"석을 ":172,"무역":348,"속도로":609,"무의":163,"오스만":137,"송 ":796,"되고 ":1458,"무장":362," 라스":58,"무이":87,"무인":90,"무자":113,"못했":96,"못한":122,"물에":348,"무원":831,"무위":67,"물인":66,"물이":1091,"미나":192,"물의":738,"물을":636,"물은":155,"어나는":167," 레닌":99,"문제":1052,"받게":60,"서의 ":942,"물자":101,"문적":150,"문이":527,"문의":337,"문으":196,"문을":285,"문은":98,"번호는":213,"무제":84,"발간":137,"문장":149,"문자":743,"문인":134,"바깥":84,"발굴":181,"미노":144,"므로":460,"소리 ":122,"가고 ":59,"반구":82,"바그":84,"발과":99,"미널":258,"발견":763,"미네":81,"받고":279,"미니":258," 레드":100,"방과":160,"방공":70,"미는":124,"방검":64,"받기":77,"반기":87,"무지":87,"백과":766,"니케이":87,"방글":64," 라우":71,"쇄 ":106,"배경":222,"대중교":58,"방국":87,"바뀌":172,"민당":124," 라오":74,"바꾸":197,"데서 ":76,"물질":647,"미드":124,"바노":62,"미들":57,"배급":90,"밀도":557," 라인":139," 라이":756,"배구":85,"바나":90,"쪽의 ":187,"셔 ":93,"어떤 ":676,"션 ":1251,"면허":59,"문법":143,"더불어":242,"묘사":175,"무부":172,"각각 ":274," 분 ":172,"석이 ":101,"명하":918,"명한":593,"명해":88,"모터":117,"뮤니":177,"명했":79," 불 ":168,"모토":300,"무사":118,"리를 ":1377,"셜 ":158,"쪽은 ":318," 부 ":168,"모티":57,"쓰인다":281,"영역에":73,"무스":102,"무슬":67,"무성":57,"문사":91,"명확":97,"무선":162,"아버지":563,"몬테":144,"무상":72,"문서":608,"무소":302,"쓰이는":420,"보유하":119,"미가":155,"가가 ":278,"모함":111,"서인 ":81," 뜻은":75," 뜻으":193,"오전 ":100," 뜻을":185,"민간":303,"석의 ":89," 뜻의":103,"모하":434,"모할":70," 뜻이":249,"목포":121,"무실":1878,"무신":111,"목표":383,"속도를":65,"문신":350,"목을":213,"목으":232,"모의":222,"목은":112,"모음":164,"목이":209,"모임":172,"모이":107,"목의":117,"물들":160,"멀티":173,"메타":96,"메탈":156,"모에":58,"모어":73,"목에":288,"묘로":172,"모여":147,"무렵":74,"무로":175,"선 ":3009,"무력":69,"설 ":998,"멘트":73,"서 ":35405,"석 ":585," 라는":949,"무라":196,"명체":83,"선의 ":720,"메트":127,"영역을":65,"사할 ":58,"목적":2924,"물리":852,"세 ":2296,"었던 ":886,"사한 ":256,"물류":61," 라디":307,"성 ":4445," 뛰어":203,"대응하":136,"무리":134,"무르":110,"무를":833,"섯 ":281,"물로":548,"물론":141,"명칭":965,"코의 ":222,"섭 ":81,"섬 ":744,"선을 ":465,"섬에 ":188,"물러":57,"선은 ":236,"무료":103,"에게는":108,"베트남":406,"셀 ":135,"문명":266,"센 ":259,"성인 ":111," 때는":150,"성이 ":557,"류군":65,"운동에":85,"로즈":93,"른바 ":70,"로지":186," 디렉":63,"루를":71,"류가":157," 뒤에":143,"루마":207,"레프":79,"세운 ":205,"론의":239,"러한":558,"론을":213,"론은":142,"론으":87,"러피":111,"록이":179,"로자":119," 때까":219,"로이":916,"록의":66,"어났으":91,"록은":122,"록을":144,"로의":623,"렉터":64," 들면":62,"료를":341,"데뷔 ":105,"렉트":128,"론적":143,"로젝":410,"포시 ":85,"러해":78,"론이":279,"론인":135,"로어":89," 둘째":114,"에도 ":1237,"로에":225," 드림":64,"로야":96,"레코":236,"로아":218,"성장 ":64,"로운":756,"로우":91,"복음서":69," 등록":235,"레타":68,"로와":192,"론에":369,"본으로":94,"록에":122,"료로":247,"마는 ":64,"령을":243,"령으":158,"려진":458,"령은":67,"령의":138,"대에는":126,"로시":141,"렸으":99,"려지":128,"루이스":129," 드라":584," 드래":82,"령이":322,"루는":434," 드러":122,"로써":1210,"례의":129,"룬다":192,"력자":59,"련을":64,"련의":320,"련이":137,"로서":3082,"로세":432,"려져":1058,"령어":129,"려졌":161,"령에":131,"로소":400,"액션 ":92,"료되":58," 동해":190,"상한 ":61,"려주":69,"독교의":109,"례에":105," 동화":66,"로스":851,"도동 ":93,"운동으":60,"운동을":238,"운동은":65,"력에":199,"운동이":163,"운동의":127,"운동장":67,"렌즈":95,"레지":140,"려오":65,"롬비":71,"려운":69,"력이":427,"력의":188,"로사":75,"력은":109,"력을":1047,"려의":143,"력으":173,"였기 ":60," 두었":68,"레의":65,"래프":324,"레이":2571,"레인":196,"레일":572,"레임":136,"복음주":62,"루가":93,"로봇":217," 되지":86,"래하":136,"래한":155,"루고":261,"립된 ":2847,"래했":80,"람회":84,"레잉":82,"로부":1256,"래픽":327,"울대학":150," 뒤를":83,"로비":221,"보》":174,"랫폼":184,"레우":79,"로버":215,"로벌":433," 독특":101,"성전 ":62,"캠퍼스":89,"로베":142,"랑하":95,"성적 ":116,"로미":125,"레오":247,"럽이":82,"레온":77,"려시":129,"로바":151,"레옹":150,"어내는":57,"러진":185," 등급":161,"러지":66," 등기":66,"마다 ":357," 몇 ":197,"럽의":198,"료기":76,"수가 ":625,"어났다":277,"레아":111," 명 ":3052," 등과":227,"페이스":345,"로몬":89,"라프":68,"로망":63," 돼지":64,"려서":69,"락하":73,"럽에":218,"라하":106,"료가":69," 도쿠":173," 되어":644," 도쿄":425,"로를":314,"로만":123,"빙 ":74,"로마":1459,"러의":97," 되었":1574,"로리":196,"편집 ":64,"업기술":80,"송되었":59,"렌스":99,"롤러":79,"러운":114," 면 ":100,"예정이":232,"레시":93,"로로":266,"롤라":89,"레스":841,"레슬":216,"렉스":73,"빌 ":184,"러에":64,"라파":96,"로레":146,"로렌":92,"란트":70,"빈 ":332,"도공사":78,"랑크":259,"라틴":522,"라티":92,"라트":181,"로라":126,"비 ":1194," 두번":84,"란토":57," 동쪽":939,"렉산":237,"라톤":92,"라토":75,"라테":80,"서쪽 ":159,"세의 ":523,"례를":88," 도착":65,"랑카":81,"롯데":72,"량이":250,"런스":86,"량의":196,"라키":103,"량을":270,"량은":105,"량으":84,"러싸":178,"러시":1282,"라크":271,"로디":91,"러스":472,"예전에":64,"례로":68,"로드":367,"보이는":153,"레비":475,"량에":67,"록된":166,"록되":319," 동적":58," 동전":120,"로듀":151,"라쿠":58,"개가 ":134," 동원":67," 동위":64,"라카":115,"페인의":159," 둘러":256," 두루":60," 동의":119," 동인":124," 동이":75," 동일":295,"략적":66,"로도":729,"로동":69," 동작":222,"레바":60," 독점":68,"뜻하":537," 동영":96,"로는":3896,"뜻한":519,"로닉":61," 도중":97,"로니":159,"럼비":69,"븐 ":128," 도입":355," 독일":2033,"로네":72,"였지만":104,"영국 ":794,"세이 ":81," 독재":77," 동안":690," 동아":274,"램의":106,"페이지":237,"블 ":323," 독자":160,"램으":79,"램을":192," 동양":311,"보인다":123,"램이":268," 도요":85," 돌아":168," 도와":123,"럭비":137,"케이팅":98," 도움":142,"브 ":1116,"로나":143,"러브":90," 동시":463,"웨덴 ":161,"레르":65,"강과 ":246,"랑이":109,"렸던":58,"래의":273,"레마":102,"래이":139,"갖고 ":272,"운동량":60," 되며":91," 되면":122,"람이":690,"람의":253,"람은":92,"람으":164,"래에":165,"람을":297,"로그":1803,"랑을":65,"라진":68,"당에서":58,"라질":223,"라지":147,"롭게":172,"렸다":446,"케이프":74,"라즈":65,"로교":66," 동생":173,"운데에":81,"뷰 ":75,"페인어":121,"련되":65,"련된":461," 동서":118,"론과":135," 동성":75,"로구":410,"막부":166,"름에":203,"우리 ":186,"르자":62,"리사":226,"르이":139,"매매":71,"망명":76,"대원군":58,"르의":306,"매를":90,"메가":77,"말부":57,"리소":182,"당이다":139," 떨어":835,"린스":117,"사회 ":554,"르지":156,"리시":222," 바 ":139,"당으로":105,"릭스":107,"리스":2842,"름인":68,"름이":849,"름의":136,"름을":721,"름으":597,"름은":938,"보여주":101," 및 ":5806,"르주":61,"마사":281,"마산":169,"림수":541,"머니":414,"먹는":147,"선수이":268,"용되어":68,"용되었":289," 반 ":121,"말로":462,"마를":97,"마르":609,"리보":100,"리본":58,"마리":562,"말레":213,"ああ":1092,"마루":87,"리버":108,"률에":74,"리베":90,"세에 ":126,"사후 ":103,"섬을 ":86,"말리":164,"리브":160,"마모":59,"률을":89,"률의":82,"유가 ":79,"류의":468,"류이":218,"루프":57,"륙의":69,"류인":86,"우를 ":179,"선시대":218,"본에서":359,"르에":154,"만명":119,"률이":149,"리비":231,"뿐 ":113,"르웨":217,"마법":87,"표기법":77,"아어 ":389,"마뱀":70,"르와":82," 미 ":80,"마디":57,"맞는":62,"마드":193,"리로":509,"맞닿":106,"맞대":170,"만든":919,"만드":289,"만들":1730,"리를":1378,"루터":61,"매니":102,"리마":98,"베키스":65,"리만":74,"어로 ":1139,"리모":80,"리면":59,"르세":70,"리며":220,"리메":102,"료하":61,"많다 ":288,"아에 ":243,"루트":166," 또는":7044,"매되":436,"마라":171,"매된":213,"운동가":315,"류에":196,"마련":128,"와이 ":67,"르시":280,"류와":152,"리바":71,"섬의 ":133,"리미":169,"르스":205,"말라":199,"마로":110,"륙에":126,"와의 ":292,"만나":155,"않아 ":81,"리들":59,"마노":70,"리드":424,"리듬":79,"립대":87,"만년":233,"르미":76,"마누":86,"르바":197," 딸이":114,"른바":71,"르베":87,"마는":64,"코어 ":106,"립되":628,"립된":2855," 때에":297,"마니":282,"마다":425,"막대":84,"르부":80,"동가이":124,"많다":296,"리랑":80,"리라":82,"르비":233,"용되지":64," 때의":151,"매년":365,"루지":119,"리눅":103," 디젤":62,"룹이":172,"룹의":106,"리노":143,"마그":94,"르렀":74," 디자":437,"르러":58,"많고":58,"만과":68,"마구":74,"월까지":163,"리나":257,"마나":106,"리되":201," 따위":129,"리된":63,"르면":329,"리도":58,"르모":65,"를린":149,"르몬":72," 디지":563," 따왔":76,"르메":125,"르며":194," 따온":72,"맡고":199," 디즈":98,"리더":98,"상품 ":73,"린다":701,"르마":127,"리대":73,"르만":187,"리니":123,"르를":106,"리다":169,"매개":71,"리단":71,"리는":1356,"말기":395,"를로":97,"로회":57,"루에":65,"르드":132,"름다":121,"루었":69,"리그":1570,"루어":1049," 때부":83,"리교":75,"롤플":72,"릭교":272,"루아":114,"리고":1473," 등의":1711,"록한":139,"록하":240," 등을":1621," 등은":71," 등으":490,"용된다":504," 등장":882,"르도":108,"리공":66," 등이":930," 디스":332,"로프":248,"료이":131,"료의":83,"르데":87,"로하":57,"리게":121,"로피":98,"른다":783,"루이":519,"용되는":630," 등지":163,"류를":301,"세와 ":160,"마가":184,"립국":88,"루의":66,"룡이":113,"립과":102,"립공":193,"롯한":246,"롯하":108,"롯해":85,"리기":520,"류로":226," 따서":167,"르디":177," 들어":754,"론토":100,"로트":65,"류되":96,"류된":67,"르나":166,"성을 ":1294,"로테":59,"アア":1184,"류는":92,"령하":64,"니티 ":64,"로토":241,"루살":113," 때로":79," 물 ":110," 떠나":66," 등에":874,"부등식":78," 들이":58,"리가":609,"르니":86,"르단":65,"르다":274,"부문 ":76,"성은 ":337,"대에서":249," 때문":1317,"르는":1030,"룹이다":116,"서식한":105,"서식하":112,"로파":60,"르네":183,"료와":64,"루시":73,"료에":110,"르노":173,"로페":81,"루스":522,"르가":186,"씨식물":94," 따로":85,"려하":124,"르게":347,"르고":464,"력하":175,"력한":156,"포스 ":94,"루미":72," 따라":2041,"표기하":113,"르기":616,"로크":88,"용되며":85," 디비":65,"로키":66,"름과":57,"로타":76,"뼈 ":57,"련하":119,"련한":87,"로켓":139," 딸로":65," 따르":395," 따른":279,"로코":155,"성의 ":571,"월드 ":285,"비즈":153," 면이":112,"술관 ":61,"서울대":188," 목사":116,"비주":93,"술과 ":112,"비전":604,"비정":124," 면역":63,"비잔":183,"비자":254," 면에":75,"비의":141," 무기":218,"비이":110,"비유":59," 물고":76,"비율":126," 물건":200,"비용":150,"어를 ":713," 색 ":76," 맨해":60,"선에서":160,"비오":69," 새 ":102,"비에":454,"비영":173," 모아":120,"포스트":82,"다음과":165," 무대":132," 명제":57,"빼앗":60," 명의":647," 명이":852," 모스":252," 모습":262," 멸종":93,"데미 ":77," 명으":136," 명을":174," 명에":64," 목소":63," 면적":1943," 명예":164,"빛을":75,"빛의":78,"빛이":59," 선 ":178," 메트":66,"빅토":130,"류와 ":147," 무렵":70," 무력":62,"상태에":175," 서 ":81,"비크":81," 목적":2802," 메타":86," 메탈":133,"쓰 ":219," 모음":140," 모임":130,"양부 ":86,"쓴 ":323," 모여":146," 모양":452," 멀티":170,"비치":319,"상태이":66,"비해":227,"비하":168,"씨 ":474,"비행":326,"립대학":73," 물리":729,"수권 ":164,"비평":114," 세 ":1630," 문명":202,"단어이":100," 무리":83," 성 ":737,"비판":182," 무료":100,"위는 ":290," 섬 ":655,"소련의":131," 명칭":907,"대신을":63," 물론":123,"비트":348," 메모":266,"왕의 ":270,"비로":129,"비롯":537,"빌라":66," 매장":60,"비례":72,"마르 ":85," 매일":60,"대중 ":103," 마찬":135,"부터":5834,"비를":209," 매주":80,"서울시":173,"마를 ":96,"서유럽":60," 만주":134," 망원":101," 리처":111," 만족":131,"빌딩":554," 마주":96,"단에서":85," 메리":77,"스》":261," 몽골":174," 메르":63," 맡은":78," 매우":478," 명단":126," 마지":456,"대수학":86," 멸망":115,"르부르":74," 모두":685," 멤버":192," 링크":71,"분포":314,"왕이 ":131,"영토 ":78,"분파":90,"브스":61," 리투":93,"비밀":157," 마케":230," 명령":322," 맞추":106,"부하":241,"부학":61,"부한":79," 마크":121,"북한":235,"말로 ":363,"빠르":149,"빠른":101,"부품":104," 몰도":67," 《글":315," 모드":84," 모든":1118," 모듈":58," 매체":153,"빌리":113,"부통":99,"다양하":99,"다양한":769,"솔로몬":76," 마치":120,"부패":65," 모델":414," 마카":90," 모니":75,"브웨":59," 마틴":63," 모래":84," 모로":109," 목록":249,"비상":121,"비사":81,"률에 ":68," 멕시":326," 마포":164,"비서":71," 메시":150,"블이":59," 모르":86,"아와 ":370,"분해":122,"분할":143,"분한":71,"분하":183," 만큼":59,"빌보":67,"부활":98," 명명":148,"부호":224," 명문":58,"분화":119," 몇몇":116,"부흥":81,"안에 ":523," 만화":581," 모바":147," 명성":112,"비아":1009," 상 ":162," 메이":433," 메인":91," 무거":57," 무게":59,"비어":60," 《대":64,"왕을 ":61," 모리":222," 》는":90," 마하":64,"비슷":450,"다양체":72,"비스":1259," 사 ":101,"비시":110," 먼저":127," 말하":595," 말한":3392,"마니아":199," 산 ":314,"립되어":101," 말해":112,"립되었":460," 바닷":63," 받는":232,"얼 ":291," 물체":225,"억 ":318," 바다":321," 바닥":59,"언 ":512,"어 ":13255," 미드":74," 미디":308,"뿐만":235," 반드":89,"요르단":59,"뿌리":157,"업 ":1759," 미래":216," 반도":513," 바둑":89,"엄 ":340,"오고 ":79," 반대":427," 발달":304,"않을 ":58,"류인 ":79,"않은 ":396,"외버스":131,"엘 ":358,"코에서":61,"엔 ":200," 미리":101,"왕위 ":75,"에 ":60715,"륙의 ":69,"역 ":2192,"여 ":11527," 발렌":60," 발레":96," 밴드":336," 바로":243," 반란":141,"연구기":185," 묘호":66,"㎢이며":85," 바라":115,"단위이":64," 바람":140," 수 ":4761,"연대 ":75,"안의 ":228," 문학":506,"연구관":133," 발매":1040," 반면":95," 무형":61,"염 ":118," 문헌":108,"엽 ":65,"류의 ":468," 박물":283,"다이아":64," 바르":206," 민법":129,"연 ":555,"연구가":59,"연구개":126,"열 ":422," 미분":130," 무함":67," 무한":103,"표되었":92," 반발":61,"예 ":316,"컨트롤":65,"상호 ":183," 방면":68," 반복":102," 미사":275," 백만":60," 문화":3374,"영 ":1172," 발명":156," 뮤직":150," 뮤지":132,"안을 ":183," 번길":63,"마리 ":98," 미술":327,"옛 ":293,"단이다":349,"도는 ":1874," 민속":126," 미스":154," 방법":913," 민사":90," 방문":137,"능한 ":349," 미생":74," 바빌":60,"다음으":96,"올 ":130," 박사":165,"연고로":70," 반사":111,"오 ":2121,"꾸는 ":60,"단으로":253," 미시":127,"온 ":1060," 벨기":181,"옥 ":109," 묘사":165,"씩 ":192," 문법":92,"씬 ":58,"분석 ":120,"뻗어":76,"비히":129,"빌헬":89,"서에서":124,"서양에":67," 모터":106,"안 ":2071,"다이묘":287,"부속 ":57,"아 ":8561,"여겨진":95,"여겨지":132,"악 ":699," 무선":135," 명확":90," 몬테":130,"알 ":228,"원래 ":383," 모토":78," 무사":83,"앙 ":331,"아인 ":67,"역대 ":70,"악의 ":190,"압 ":126,"세종 ":61,"암 ":229,"안산시":60,"아이 ":66," 문서":489,"단위로":101,"르비아":166," 문신":338,"애 ":160,"액 ":125," 목표":359," 목포":112,"앨범이":199,"상태를":182," 무신":103,"아의 ":1952," 무슬":66," 민간":289,"다이라":201,"악을 ":149,"앤 ":156,"우디아":57,"오에서":59," 소 ":58," 속 ":64," 모험":70," 미국":5128," 미군":118," 모형":148,"펼친 ":88," 무엇":78,"연결된":112,"연결되":159," 무역":240,"상태로":92,"쿠데타":107,"육군 ":150," 못하":260," 무용":112," 물에":69,"야 ":1639," 못한":122," 못했":94,"약 ":1839," 송 ":74,"얀 ":94," 무인":69," 무장":257," 문인":57," 문자":616,"어린 ":76," 문장":142," 발간":125," 물을":74," 미나":115," 물이":74,"쪽에는":101," 받고":185," 미네":57," 발견":731," 문제":899," 발굴":168," 바깥":84,"양 ":1169," 바그":80,"단체 ":215," 미니":154," 바꾸":196," 물질":502," 바뀌":168,"대신하":67," 배경":215," 배구":64," 백과":71," 방글":64,"오가 ":92," 밀도":372," 배급":88,"㎢이다":412,"바타":69,"바키":98,"배출":125,"바탕":457,"박테":79,"케팅 ":75," 루마":188,"보드":262,"별명":122,"포의 ":126,"버스":1087,"보디":132,"바퀴":74,"법사":70,"외에 ":210,"방출":95,"《한":112,"법상":248,"버시":57,"여기에":152,"만나는":64,"대서양":169,"보도":136,"병력":63,"대인 ":123,"더라도":75,"미트":96,"습 ":140,"별로":160,"보는":250,"미터":286,"보니":57,"보다":1010,"앞에 ":58,"승 ":599,"미토":66,"아주 ":103,"미크":58,"백질":181,"서》":62,"슨 ":398,"봉기":75,"보내":172,"미코":72,"상트페":66,"보나":64,"션은 ":114,"백제":182,"업단지":59,"벨리":83,"방지":261,"션을 ":118,"미카":113,"미치":144,"백인":67,"션의 ":92,"방정":311,"베리":361,"연구하":376,"받지":126,"법무":134,"스 ":13453,"백작":129,"병대":98,"방의":550,"방이":189,"방인":58," 레코":213,"성에서":91,"アア ":190,"바지":303,"방을":182,"방은":68,"방으":94,"베르":797,"베를":164,"방전":97,"방자":121,"배우":453,"반직":145,"본부":489,"분과":114,"연구자":69,"배하":244,"밝히":70,"사하였":95,"부고":63,"사하여":75,"부과":108,"불가":326,"불리며":153,"배했":65,"범죄":410,"별시":2275,"방향":410,"부가":553,"보보":58,"방행":151,"연구원":419,"쪽에서":93,"범주":67,"베이":1145,"번지":621,"법정":263,"연구와":136,"대상이":87,"법적":239,"발효":94,"법전":58,"번째":2258,"법조":75,"방해":160," 르네":101,"대상으":305,"보병":102,"방하":123,"여기서":132,"법인":2460,"밝혀":97,"버지":661,"배포":144,"바흐":101,"병사":95,"베어":68,"분류하":120,"범은":64,"범으":68,"범을":60,"사학자":68,"반화":65,"법으":406,"법은":260,"법을":510,"변수":210,"범이":273,"법의":401,"범인":79,"발현":63,"번주":320,"법이":695,"본명":404,"발하":523,"싼 ":93,"발해":78,"법원":385,"복무":64,"보물":134,"발한":597,"범위":305,"발했":61,"발행":355,"버전":329,"보면":93,"볼리":68,"설이다":223,"반하":117,"반한":76,"사학위":59,"부산 ":108,"벌이":122,"벌인":102,"번이":145,"동과 ":314,"벗어":103,"별히 ":78,"발표":789,"번의":336,"법에":547,"범에":61,"싱 ":188,"복리":93,"보리":111,"보를":504,"보르":185,"사항을":108,"반포":61,"십 ":167,"심 ":452,"벌어":349,"쿠라 ":120,"보통 ":571,"번역":488," 로켓":129,"번에":76,"발트":112,"민회":89,"베스":344,"버에":60,"보로":179,"선정되":73,"본래":267,"실 ":537,"신 ":2121,"시·":101,"바티":117,"표를 ":238,"보라":58,"대의 ":1668,"바트":73,"아시리":63,"시 ":11102,"식 ":2547,"어떻게":123,"별법":78,"미합":62,"미하":416,"미한":620,"배치":148,"민스":65,"방부":88,"벨기":186,"연결하":272,"소셜 ":67,"었다가":169,"박사":214," 레오":139,"델로 ":62,"불리는":292,"반사":163," 로버":203,"변경":400,"미쓰":125," 로베":67,"불린다":546,"방문":160,"사회를":100,"민사":140,"었다고":153,"반부":115,"바빌":62,"미소":59,"콘서트":79,"사》":106,"대성당":95,"미술":456,"민속":159," 로마":1229,"안전 ":96,"미스":278,"어드벤":57,"방법":1110,"미시":173,"에너지":555,"믹스":156,"연구소":525,"술 ":1167,"문화":5237," 라트":76,"반민":78," 빈 ":109,"뮤지":139,"뮤직":175,"사회민":64,"번길":63,"동계 ":168," 로렌":78,"배를":124,"방면":99," 라틴":503,"법과":157,"반발":61,"반복":104," 레슬":75,"미상":126,"미생":82,"법규":59,"미사":365,"백만":74," 레스":71,"사회복":58,"윈도 ":523," 로드":134,"리드리":128,"민법":149,"바리":82,"섬으로":166,"물품":90,"무하":135,"무한":119,"무함":67,"미분":165,"소속 ":308," 러시":1253,"수나 ":68,"무형":102,"반면":95,"발매":1060,"업하고":81,"숙 ":91,"문하":118,"문학":996,"수 ":7565," 롯데":68,"버그":188,"발명":165,"박물":478,"었다는":78,"문헌":145,"물학":358,"순 ":180,"연구에":78,"슈 ":248,"받을":102,"보관":133,"받으":61,"받은":358,"발의":72,"발음":233,"본격":104,"방어":137,"발을":206,"방언":181,"박정":58,"보교":91,"발이":87,"방에":589,"밝은":61,"반정":76,"방영":378,"발자":127,"반적":1188,"보건":323,"밖의":84,"발원":228,"반응":325,"방안":91,"반으":349,"반은":59,"반을":232,"반이":289,"대용 ":70,"법령":101,"반의":263,"본거":86,"반인":88,"보고":314,"법론":58,"별도":116,"산하의":96,"보급":496,"배에":64,"보기":184,"배열":85,"별된":62,"벨라":135,"발족":210,"방위":167,"본과":71,"본관":553,"변동":70,"백악":117,"발적":105,"발전":2168,"법률":748,"본국":57,"본군":86,"반에":291,"리며 ":216,"받았":547,"민중":107,"바오":74,"받아":531,"연고지":92,"민주":1442,"벌레":89,"반영":96,"생활 ":136,"미지":217,"바에":127,"베드":57,"소설 ":296,"밀접":73,"미주":57,"밖에":180," 로서":72,"부르크":380,"버로":72,"버리":89,"부르키":72,"리되어":97,"보가":134," 로스":253,"바일":167,"바이":1137,"박이":57,"바의":87,"밀집":72,"연구를":213,"발에":122,"평양에":104,"바위":87,"민지":365,"민의":388," 레지":112,"민을":66,"민이":137,"배상":72,"방송":1655,"불리고":64,"병과":64,"미이":133,"연한 ":80,"미의":191,"민운":60,"베니":113,"방식":893,"밑에":68,"뮌헨":76,"버러":63,"민족":1125,"연합 ":418,"민정":107," 렌즈":75,"불리기":218,"생태계":74,"쉬 ":68,"버드":123,"미에":254," 로부":64,"미어":138," ㎢ ":1456,"미야":182,"베네":199,"단지 ":89," 레이":539,"미얀":97," 로봇":182,"미아":149,"발생":1139,"열대 ":90,"발사":358,"방사":203,"미우":66,"밀양":61,"었는데":175,"미와":61,"에티오":85,"바실":66,"민에":93,"박스":126,"앙에 ":58,"바스":196,"부시":91,"보편":89,"《동":63,"부스":118," 메가":61,"《대":78," 망명":76,"부속":120,"분석":483," 막부":162,"당시의":130,"보험":439,"비공":80,"복합":220,"보행":57,"복하":136,"북아":337,"보화":79,"부아":90,"보호":994," 먹는":127,"비교":324,"덜란드":394,"동기 ":74,"비가":127,"사하고":71,"보한":539,"보하":152," 리스":112,"별한":142,"별하":152," 마리":480,"변형":136,"분비":61,"변호":218,"콘스탄":176,"변화":514,"변환":239," 말리":67,"불법":161," ああ":304,"부분":1658,"부부":74,"북부":794," 말레":203,"보컬":139," 리버":60,"변하":108," 말로":338," 마르":422,"다에서":81,"부산":804,"서울특":2233,"부사":121,"별히":78,"부상":150,"보통":876,"북서":597,"부선":57,"부설":70,"본토":85,"분산":123,"부서":74,"였던 ":503,"단어는":61,"》는":360," 마법":66,"사하는":206,"《뉴":90,"어떠한":155," 리비":141,"쪽으로":3077," 뿐 ":75," 만명":106,"병하":65,"병합":73,"분리되":151,"소리를":144," 만에":155,"블루":171," 많아":58,"북쪽":976," 메디":66," 마오":62,"브리":671,"분류되":81,"비는":93,"분류된":61,"블로":244,"블록":95,"붙은":60,"붙이":74,"붙인":86,"브르":78,"아직 ":93,"분지":84,"브뤼":58,"브루":161,"부지":240," 마쓰":270,"블레":66," アア":171,"블랙":152," 말이":732," 말의":90," 맡아":66," 맡았":269," 멀리":60," 많이":589,"부총":58," 말은":95," 말을":102,"원대학":60," 많으":66,"비디":498," 많은":1059,"부처":108,"비드":137,"부천":98,"역학 ":84," 마일":63," 명나":109," 막의":74," 마이":939,"역한 ":65,"부착":63," 마음":145," 마을":343,"매된 ":209," 머리":222,"비되":67," 말에":106,"블리":126," 마우":75,"부위":94,"북위":64,"북유":57,"부의":976,"북으":70,"부이":295,"부인":395," 마스":139,"불어":280,"영국군":62,"부원":64,"봉한":65,"부였":63,"영국과":68,"불안":74,"부와":395,"분에":266,"분열":122,"당시에":146," 마산":139," 마사":186,"비극":79,"부에":2420,"보훈":113,"석으로":73,"분야":1297,"부여":260,"여할 ":326,"블라":186,"브로":228,"붙어":138,"북조":69,"브랜":239," 매사":94,"브러":125,"브레":203,"붙여":262,"여한 ":90,"붙었":64,"》를":132,"분쟁":215,"부정":220,"부제":59,"빨간":59,"부적":74,"분자":304,"브란":69,"부족":250,"브라":793,"부조":64,"불이":70,"붉은":121,"비나":140,"분으":162,"분은":160,"서이다":247,"부장":172," 류큐":57,"부자":61,"부작":96,"서울지":61,"분이":366,"분의":455,"표로 ":284,"분을":409,"법칙":261,"코스트":84,"보시":70,"벤처":112,"대전 ":499,"보스":329,"병원":361,"본식":170," 많고":58,"대적 ":85,"봉사":97,"병이":85,"르바이":113,"병의":61,"편으로":96,"병을":93,"부담":95,"별지":141," 마그":88,"편이다":60,"섬에서":72,"부는":373,"분류군":63," 리눅":99,"마로 ":78,"분당":74,"북대":84,"륙에 ":70,"북단":60,"부대":314,"부도":71,"부동":151,"북도":1000,"북동":589,"《아":100,"법학자":59," 매개":67,"보안":263,"보아":63," 말기":330,"버트":260,"보에":135,"》에":434," 리더":83,"부등":87,"보여":239,"오카 ":175,"분되":78,"분된":74," 맡고":199,"본에":513,"보와":133,"보였":72,"본어":215,"북구":223,"부근":226,"불과":67,"백화":66,"부기":151,"왕조 ":222,"북극":86,"불구":99,"불교":1054,"분기":161,"변에":132," 롤플":72,"《삼":93,"《사":79,"였다 ":5818," 리그":1028,"붕괴":176,"보상":91,"보살":73,"복사":130,"부르며":106,"변을":61,"변의":108,"본사":255,"변이":82,"보성":57,"복선":58,"산하는":104,"마누엘":65,"병역":76,"별이":83,"복소":98,"본선":122,"별자":389,"교·철":266,"변조":64,"《스":59,"보수":235," 루이":396,"복수":116," 만들":1714," 만든":918," 만드":275,"엔터테":273,"분리":365," 맞는":58,"아시아":1039,"아시안":64,"분류":756," 마드":70,"부리":81," 매년":364,"생한 ":427,"부문":240," 매니":71," 루터":58,"불리":906,"선이다":194,"불린":592,"부모":121," 맞닿":106," 맞대":170,"북면":63," 마라":96,"북방":63,"연구회":81," 루트":124,"북미":98," 리메":64,"동구 ":190,"부르기":284," 말라":128,"대장 ":64,"선으로":205," 마련":126,"변화 ":64,"니카 ":85,"위공무":227,"보유":232,"벡터":159,"》이":131,"》은":258,"복음":190,"》을":87," 리듬":66,"보의":154,"복을":86,"》의":292,"베타":109,"베키":68,"복원":94," 만나":155,"보원":72,"보정":59,"써 ":1207,"본인":182,"보전":215,"보조":194,"복제":80,"보장":241,"산하고":57,"보인":174,"보이":571,"보자":82," 만년":216,"보잉":66,"본의":1265,"위대 ":64,"본이":148,"본은":114,"본을":178,"복잡":213,"본으":94,"류에 ":144,"벤트":72,"번호":560,"부로":440," 많다":296,"베트":571,"보좌":71,"본적":361,"보존":422," 마다":65,"별칭":85,"본제":60," 막대":80,"불렸":246,"부르는":270,"본질":90,"부르":1396,"부를":656,"부른":614,"본주":135,"벨트":79,"보증":59,"범한":75,"범하":94,"복지":390,"법학":195,"불러":105,"불렀":138,"부른다":562," 기름":70,"사자의":62,"에른 ":64,"된다 ":2822,"레 ":617," 국토":265," 기리":96,"렌 ":169," 기록":1008,"높이":655," 될 ":180,"누리":83,"놓은":145,"에르 ":123,"육관광":1111,"지휘관":62," 교회":451,"렘 ":99," 교황":491,"노출":59," 교환":135,"노카":116,"는가":88,"영한 ":59," 교향":137,"사이자":85,"사이의":380,"되던 ":89,"뉴기":78," 귀족":241,"소니 ":75,"농촌":268,"렬 ":143,"노쿠":69," 구하":94," 두 ":1616," 규제":69," 규정":454," 기반":744,"뉜다":143,"려 ":970,"영국령":58," 금속":188,"사이에":1050," 기법":166,"력 ":1905," 기본":560,"련 ":727,"령 ":1058,"의가 ":145," 그와":88,"페인 ":307,"갔다 ":76,"례 ":297,"에피소":110,"느끼":75,"념하":189,"페이 ":59," 구현":181,"분에 ":187," 둔 ":199," 국회":467," 둘 ":80," 그에":142," 군주":331,"네치":59," 그로":78,"영국에":154,"량 ":637,"사이트":392," 기념":453," 그려":163," 관현":152," 그렇":75," 그레":286," 그러":510," 그런":74," 교차":215," 그랜":97," 그래":457,"농산":152," 그라":116," 구조":750," 관해":73," 관할":541," 관한":1386," 관하":68," 국제":2240," 규모":317," 국적":83," 구제":77,"두고 ":439," 군이":85,"념이":269," 군인":411,"념일":61,"노스":368," 공화":1170," 군의":89,"크게 ":346," 기동":58,"대해 ":649," 권익":198,"진행하":112,"노예":123," 기독":683," 기도":110,"사이타":67,"노에":90,"사이클":132," 권위":133," 그린":258," 그리":2688," 궁전":116," 기대":78,"불어 ":253," 그림":284," 기능":764," 광학":73," 기니":64," 그를":80," 글로":188," 그루":89," 근로":105,"감독 ":133,"동물 ":185," 그룹":609,"농수":73,"러 ":2371,"오토 ":64,"덴의 ":130,"놀이":209,"높아":61,"농어":139,"논쟁":109," 구축":257,"농업":544,"웨덴의":119,"노인":60,"논의":106,"녹음":73,"노이":170,"노의":75," 구체":160," 근무":128,"양성 ":103,"설립되":451,"설립된":2612," 된 ":602,"럼 ":613,"럽 ":802,"높은":480,"누르":181,"대학 ":618," 근본":125," 교통":460,"대한 ":2759,"놓여":60,"럴 ":180,"독부 ":84,"네트":660,"외의 ":195,"런 ":372,"네팔":64," 교토":88,"단기":81," 길이":462," 기존":437,"담고":149," 기종":88,"사이를":61," 뒤 ":412,"니는":215,"루이 ":211," 기준":2663," 김일":102,"의거 ":83,"사이며":98,"니다":213,"대개":179,"대가":323,"당과":113,"보통신":129,"대공":201," 김정":92,"대고":184,"당구":102," 기지":96,"대구":662,"대국":94,"대군":106,"대교":257," 깊은":74,"능력":467,"다나":107,"생이 ":83,"판타지":99,"대규":180,"유고슬":66,"애인 ":68,"대까":71,"대기":340,"사위원":70,"봉한 ":64,"액을 ":97,"다는":1787,"루의 ":66," 균형":110,"다니":145,"다다":78,"같다 ":114," 기체":135,"당나":184," 기초":518," 까지":200,"니라":554,"업무 ":115,"단독":80,"로토콜":205,"크고 ":90,"도로가":62,"단된":60,"단되":119,"다드":63," 김천":57,"뉘어":86,"달되":59,"뉴스":481,"담당":568," 드 ":245,"당대":81,"영국의":714,"생의 ":215,"사이드":109,"대는":204,"에만 ":119," 극작":78," 기생":98," 극장":162," 기사":214,"산이다":72," 그의":924," 기상":201,"소는 ":346,"노트":95,"대형 ":188,"도로로":74," 글자":146," 글을":71," 권한":175," 기소":61,"노폴":62,"늘날":414,"누스":263,"눅스":105,"는다":1118,"노프":93,"느님":102," 금융":393,"니가":164," 기술":1634,"부산지":60,"론 ":1033,"산으로":200,"록 ":1898,"로 ":67654," 급제":82,"는데":1842,"니고":130," 금지":145,"눈에":57," 규칙":165,"롤 ":88,"누어":127,"누엘":65,"영화 ":632,"다가":955,"성부 ":74," 기억":140," 기업":903,"롱 ":111," 기여":1107,"롬 ":79," 기아":103,"가나가":58,"단계":343," 기원":1309," 근처":198,"농협":62,"생산한":69,"다고":1243,"생산하":159,"되기도":259,"사용해":88,"사용할":205," 기자":613,"단군":64," 기장":60," 기재":70,"사용했":107," 김영":81,"육과정":69,"사이버":92," 김씨":58,"다국":61,"단과":143,"영된 ":101,"사용하":1482,"사용한":491,"우루스":147,"대비":160,"니아":1877,"다수":306,"단속":75,"달성":181,"니어":177," 딸 ":100,"대부":1072,"다섯":195,"징하는":66,"대백":608,"능을":413,"대방":188," 따 ":65,"니시":132,"능이":137,"는지":249,"사우스":167,"대법":96," 딴 ":76,"대본":115,"대변":64," 꼬리":58,"변화에":59,"대문":243,"퍼시픽":98,"닉스":176,"니스":713,"다에":144,"대수":239,"당시":1347,"다양":1016,"니즘":123,"대시":62,"다와":77,"단어":494,"니지":109,"대신":635,"단에":340,"님이":60,"님의":97,"대승":104,"덜란":394,"보호 ":293,"업하였":82,"대서":179,"당수":72,"대성":249,"더라":88,"당선":166,"《글로":315,"대상":709,"니이":111,"대사":358,"니의":167,"니온":70,"다시":502,"니와":67," 때 ":1805,"다스":163,"니오":102,"단순":280,"니에":124,"니우":69,"당사":230,"단말":68,"도로와":66,"다른":2127,"다르":492,"다를":71,"능성":166,"아어로":59,"다리":327,"룡 ":75,"다마":68,"다만":82,"집합이":107,"도로이":159,"다룬":160,"도로의":196,"다루":444,"달러":146,"니버":155,"달려":57,"집합을":57,"뉴욕":478,"가까운":155,"달라":143,"가까이":69,"사이다":504,"대회 ":303,"지휘하":108,"대되":108,"다란":88," 등 ":2330," 기타":617,"니며":81,"니메":636,"대동":97,"료 ":646,"생을 ":121,"대도":73,"어로는":135,"당되":95,"요시 ":90,"대대":66,"대덕":70,"당된":101,"다비":69,"담배":65," 김해":111,"대만":114," 기후":202,"대립":98,"대리":167,"단법":2075,"대륙":468,"단백":191,"룹 ":318,"대를":566," 기회":105," 기획":174,"늘어":87," 기호":241,"룸 ":70,"뉴질":239,"사용자":630,"간된 ":72,"동방 ":109," 기하":177,"대로":1018,"대량":103,"대략":109,"룬 ":185,"생산자":98,"다목":60,"달린":72,"달리":502,"루 ":753,"倉三三":66,"다면":173,"동단":59,"동당":117,"동대":173,"대출":63,"다테":75,"대칭":171,"되거":99,"데서":86,"리 ":8468,"소년 ":186,"되고":1487,"닌텐":124,"도덕":82,"다카":195,"분야 ":184,"데뷔":278,"다케":114,"도동":111,"생산되":114,"생산된":72,"니티":109,"대책":66,"대체":365,"더스":149,"도리":106,"르네상":84,"링 ":477,"릿 ":94,"사업자":63,"도를":839,"도르":185,"립 ":1307,"더이":67,"대통":1071,"도마":97,"립·":64,"독립":1230,"단하":227,"막 ":535,"단한":192,"동력":113,"도모":729,"마 ":2884,"부에 ":2000,"도메":83,"동량":72,"달한":116,"달하":316,"만 ":5041,"리나 ":165,"표기 ":66,"파키스":153,"린 ":1716,"릭 ":421,"도라":117,"릴 ":239,"리·":102,"음과 ":230,"던에":74,"데시":125,"도록":1047,"도로":1920,"되기":565,"않았다":134,"데스":339,"월드컵":315,"단편":181,"더욱":101,"림 ":455,"다중":115,"어로서":101,"단조":57,"대역":76,"대에":1339,"담은":79,"률 ":308,"닿아":111,"도군":69,"도구":218,"독교":722,"도교":185,"데라":73,"대안":86,"도권":125,"대용":96,"륨 ":85,"대우":165,"당이":352,"델로":83,"단지":227,"대원":153,"사우루":135,"당으":106,"당은":139,"당을":126,"많고 ":57,"대외":71,"당의":341,"대와":275,"동가":358,"대왕":208,"동강":89,"도가":518,"산업의":264,"산업을":71,"다의":253,"다음":663,"댄스":109,"다운":233,"양시 ":144,"달에":108,"양식 ":64,"단이":638,"단일":159,"갖는 ":164,"단장":102,"컴퓨터":1613,"도공":91,"류 ":1253,"달을":122,"달의":59,"륙 ":150,"부와 ":386,"당에":141,"다이":1163,"에드워":105,"단위":503,"단으":253,"단을":267,"단은":441,"단의":275,"부산광":308,"國三三":57,"대지":106,"단체":1326,"니크":67,"데미":171,"데스크":83,"를 ":35910,"대중":546,"사용된":476,"사용되":1174,"동남":213,"아에서":342,"산에서":114,"도달":64,"릉 ":110,"도니":163,"만과 ":66,"컴퓨팅":102,"니터":63,"도는":1874,"능하":401,"능한":354,"름 ":818,"지휘자":123,"덴마":200,"니치":57,"대자":83,"동구":281,"동국":64,"더불":242,"동군":61,"동굴":86,"대장":219,"니카":153,"대의":1701,"대응":252,"동과":336,"대인":254,"대이":136,"사운드":156,"동계":192,"동경":61,"데르":128,"데리":57,"도네":236,"대주":148,"니코":105,"니콘":62,"어려운":65,"니콜":165,"른 ":3003,"더블":67,"도나":71,"유는 ":91,"동기":179,"대조":88,"대적":271,"대전":1323,"르 ":2357,"당주":83,"니케":131,"델리":75,"설명하":178," 가속":74," 간섭":72," 건담":75," 가설":110," 거대":183,"산주의":205," 개봉":160," 가상":296,"안양시":65," 가사":108,"또 ":312," 개별":96," 감소":102,"영토를":59," 강서":86," 경계":464," 것도":74,"꼽힌":72,"꼽히":75,"아우구":84,"나고":178,"날개":106," 감사":72," 가스":186," 결국":129,"여러 ":1385,"나게":61," 가시":92," 거두":92," 가수":406," 결과":470,"나간":63,"나가":628,"않았으":59," 개막":77," 각본":88," 개를":59," 것과":188,"류로 ":186,"리기 ":163," 강릉":100," 가미":74,"끝난":64,"끝나":75," 개방":107," 개발":2267,"개된 ":81," 개미":68," 건너":96,"끄는":125," 개명":89,"복합 ":62,"우승 ":162," 가르":250," 가마":103," 가리":1469,"선수 ":168,"데에 ":162," 가로":154," 갈래":64," 갈라":132,"야에 ":151," 개량":110," 가문":299," 건국":186," 개로":82,"동대문":97,"끌고":66," 건강":182," 가면":65," 강력":160," 갈리":98,"안에서":172,"우스 ":629," 갖는":232,"산지방":80," 감독":595,"약에 ":90," 개념":737,"위로 ":281," 가량":60," 가락":78," 강도":61," 같다":128," 강동":61,"언론인":132,"나머":129,"루아 ":64," 경마":60,"어리그":61,"나며":97,"나면":62," 개최":744," 개척":103,"나리":168," 개체":141," 고등":369,"나마":61," 검사":233," 갖추":158," 객체":126," 검색":137," 갖춘":117,"나무":576," 공동":773,"넓게":76," 가톨":515,"내려":193," 개칭":76,"대체하":94," 골든":60,"나모":58,"낭만":64," 계몽":59,"진흥원":106,"끌어":118,"끌었":93,"남면":62,"내로":74,"나미":178,"나바":64,"속된 ":167,"낱말":206,"내륙":89,"내리":117,"네갈":68," 고려":852,"진흥을":64,"세력을":62," 관구":68,"끼리":100,"내는":669,"남도":1277," 는 ":22667," 관광":330,"남동":614,"진흥에":83,"나라":1940," 가축":63,"낸다":205,"나로":1319," 가치":372," 고대":982," 광고":196,"너가":67,"나를":121,"나르":103,"날로":118,"왕이었":76,"보험 ":71," 건설":517," 경로":93,"냈다":532," 고도":101," 감염":75," 가정":260," 고고":115," 가져":155," 뉴 ":66," 강에":91,"나눈":68," 가졌":75,"나누":202," 가족":215,"여하기":269," 간접":70," 고구":194,"나뉘":181,"성립된":61," 각종":385,"나뉜":158,"변화를":114,"나는":746," 고급":147,"나다":684,"나님":90," 건물":348," 강원":365," 고기":109," 값을":58," 간주":115,"남녀":66," 값이":70,"난다":238," 공간":529," 가지":1815," 가진":647," 공개":359," 강을":88," 각지":78," 감정":108," 강이":197," 강의":193,"됐다 ":189," 공격":386," 같은":1691,"낙동":61," 개월":137," 개원":65," 강점":383,"드》":58,"도바 ":94," 같이":483," 게르":105," 개인":584," 개의":1532," 공과":57," 강제":192," 공공":336," 개장":93," 공국":74," 공군":133,"뀌었":102," 거부":106,"여하고":161," 강조":120," 과거":399,"나들":306,"남단":82,"남대":69,"났다":419," 개정":186," 관계":890," 개조":140," 공기":186," 공급":227,"상이다":365,"가로 ":579,"나급":179," 감시":80," 것들":82,"나기":90," 계곡":100," 개성":96,"변호사":207," 개설":79," 개선":238," 거래":171,"남겼":58,"집행형":59,"리그 ":853,"루어 ":63," 경기":2030,"남군":59,"내각":453,"남구":433," 계급":193," 가야":70," 개신":191," 가와":79," 개시":84," 간에":91,"남극":119,"소기업":149," 계기":111," 가운":1193,"내고":151," 가을":65," 고객":140," 거리":295,"나나":64,"상으로":821," 건립":120,"나노":70,"내기":127," 간의":259," 각자":58," 가장":2276," 경남":74," 가입":175," 가이":129," 공예":66,"립과 ":82," 고종":129," 공업":167,"되거나":99,"내지":117," 고조":84," 공여":67," 공연":289," 곳에":683," 고정":158," 고전":325,"냈으":120,"네바":84," 더 ":942,"남쪽":977," 경찰":296,"롯해 ":76," 《한":67," 곡이":150," 곡은":71," 곡을":68," 고위":306," 고유":247,"아이들":108," 교리":89," 관심":162,"아있는":62,"남지":88," 고원":69," 교류":254,"아이디":58,"육기관":94," 고용":157,"내전":134," 관습":82," 광산":108,"아일랜":478,"놓고":102,"농구":128,"내장":79,"내이":69,"날짜":67,"내의":242,"네마":59," 고양":209,"네르":65," 공식":585," 고안":196," 군도":61,"내용":752,"내외":188," 관세":105,"낮은":183,"나지":103," 국도":176,"농가":60,"났으":129,"남은":84,"내에":828,"남으":73,"내어":79,"남의":129," 군대":254,"남인":67,"남이":161," 경주":328,"남자":215,"남작":57," 구단":114," 경제":1255,"년도":117," 경전":153,"남원":74,"너무":67,"나중":141,"나주":65,"원래는":145,"널리":610,"날의":126," 경쟁":314,"날이":62," 고시":62," 경유":69,"남에":64,"난을":59,"너먼":74,"년대":962," 공사":132," 공산":365," 경인":69," 경우":1814,"남아":651," 결정":636,"남양":87," 겸임":57,"나인":191,"나일":67,"나이":2105," 계열":315,"나의":720," 광범":84,"날에":128,"속기관":448,"동맹 ":104,"나우":78,"속도 ":68," 경영":338," 걸친":86," 국내":570,"기회를":64,"녀는":98," 계약":327,"널로":65,"나와":256,"나왔":84,"나오":543,"낙엽":63," 것처":82,"나온":207,"소규모":61," 결의":95," 고속":345,"나에":147," 거치":92,"나였":70," 광물":68," 고성":109," 곡선":103," 공부":118,"아일보":76," 걸쳐":620,"내셔":215," 겨울":100,"남시":112," 건축":501," 검찰":106,"류를 ":300,"나아":108," 교단":70," 관문":58,"어머니":289," 구글":105," 계승":325," 국기":175," 거쳐":379,"네덜":393," 계속":310,"설립하":135,"남서":500," 대 ":2047,"나시":87,"설립한":221,"나스":130,"남산":76,"마가 ":85," 국경":502," 거창":60,"설립허":405," 국군":59,"널드":63," 개화":61,"안으로":82,"약성경":63," 개혁":224,"대표 ":142,"년까":1214,"남성":246,"넘는":103," 당 ":135," 국가":2932," 구간":223,"년과":139," 경성":105," 결승":157," 강한":138," 강하":79," 공무":329," 강화":298," 고분":81," 관리":1066," 게임":2058," 계산":280," 게이":242," 게오":73," 것으":1465," 개편":301," 것은":475," 것을":1889,"년간":256,"내부":331," 관련":1271,"오는 ":541," 것인":82," 것이":2545," 공립":83,"나서":96,"아이슬":98,"아이스":118,"상청 ":78," 경상":930,"어릴적":114," 관료":212,"년경":335," 건전":249," 교구":126," 간행":78," 공로":79,"어린이":195,"내버":165," 단 ":185," 고문":87," 건조":136,"끝이":62," 거주":394," 공룡":99," 결성":335," 간호":57," 달 ":74," 검은":120,"당한 ":273," 개통":168,"내무":67," 거의":351,"끝에":210,"살이풀":102,"네그":122," 검역":63," 건의":89,"되는 ":3192," 교과":67," 거제":77," 것에":181," 거점":62,"남북":337,"남부":952," 다 ":148,"아인슈":58,"남미":85,"내면":60," 고르":57,"나비":119," 경북":70,"으나 ":1984," 경부":97,"육과학":285," 고리":78," 》은":86," 《아":76," 국왕":338,"노선":512,"년이":209,"년을":193,"념에":64,"년의":324,"년으":59," 구와":86,"녹색":141," 기기":139," 구역":567,"노사":86," 그들":324," 국어":100," 구에":103,"녀의":96,"념을":152,"비가 ":106," 과학":1159," 국장":81,"념으":242,"원래의":69,"평론가":63," 구이":74," 공헌":115," 동 ":488," 구의":119,"여를 ":144," 공항":153," 공학":187,"완전히":129,"략 ":236,"넷을":59," 기관":864," 도 ":370," 근대":279," 기계":313,"농민":122,"랜 ":157," 기갑":57," 그대":262," 기금":68," 공포":138," 구약":88," 기구":396,"단히 ":190,"램 ":396,"컴파일":63,"년에":3786," 골프":64,"랑 ":252,"네의":71,"네임":79,"네이":376,"기호는":57,"노벨":252,"랍 ":91,"람 ":490," 《사":76," 그녀":204," 계획":467," 《삼":92,"노보":62," 그는":841,"랙 ":101,"래 ":1320," 공통":185," 기간":439," 교육":1812,"누구":78,"사카 ":146,"세부 ":62,"노비":106,"사적으":114,"않으며":102,"노부":168,"사적인":86,"넷에":63,"넓은":250," 경향":146,"란 ":2827,"노무":59,"넓이":80," 경험":163,"부분에":132,"넣어":89,"남편":86,"라 ":7177,"널을":66,"널은":82,"락 ":201,"널이":171,"㎢이":539,"노바":67,"노미":136,"네오":95,"너지":586,"남한":59,"남해":125,"농림":676," 구속":101,"왕과 ":60,"랄 ":61,"논문":137,"부분은":122,"부분으":81,"부분을":239,"완전한":90," 군사":494," 관측":187,"부분의":331," 군산":83," 구성":1965,"부분이":237," 견해":86,"나폴":191,"왔으며":92,"네시":339," 광주":506," 근거":256,"네스":338," 근교":75,"노르":399," 교수":480,"소관의":1872," 결합":308,"널에":59,"아제르":103," 과천":67," 관찰":132," 금강":82,"노리":119,"북부에":293," 급격":65,"왕으로":142,"상에서":235," 결혼":171,"논리":281,"넘어":123," 국민":1106," 국방":227,"네상":85,"야의 ":314,"나톨":89,"나토":79," 권력":168," 구미":71," 광장":76," 구별":240," 관직":109,"네소":58,"년부":1645,"약은 ":85,"왕이다":132," 국보":88," 곤충":91," 교사":123,"네수":64,"나파":113,"노란":66," 그가":158,"노래":459,"나트":57," 군벌":59," 권리":410,"약을 ":246,"논란":123," 계통":136,"노력":131," 그것":304," 구분":433,"루살렘":108," 관위":91," 계층":133," 국립":899," 구매":95,"노드":76," 규격":131," 과정":755,"기하학":185,"너스":57," 공중":106," 데 ":719," 공주":130," 관장":217," 궤도":157," 광양":65,"나타":1198," 관점":110,"크가 ":117,"나키":64," 광역":123,"설립자":69," 국무":229,"념물":120," 고체":87,"개국이":87,"롯한 ":236,"약의 ":117,"뜻 ":134," 공용":96," 관악":81," 공원":222," 구로":190,"나카":127," 곳을":76," 곳으":130,"나치":255," 공유":240,"년마":62," 곳이":274," 관여":73," 공을":98," 공의":61," 공익":122," 공인":132,"데이 ":96," 공작":263,"노동":651," 공장":112," 공적":69," 공전":62," 구리":102," 공정":172," 구마":64," 과의":71,"북쪽 ":122,"룡이다":97," 높이":619," 놓은":85," 누르":169," 높은":470,"연맹 ":267,"연합의":90,"유나이":86,"안전성":62," 뉴기":71,"이고 ":1432,"민 ":686," 노출":57,"미 ":1244,"동남아":110,"밀 ":162,"밍 ":299,"및 ":5874," 농촌":225,"떨어":849,"어서 ":645,"바 ":1223,"박 ":148,"루어졌":77,"《뉴스":59,"표된 ":94,"름다운":96,"루어지":209,"루어진":357,"발 ":1144,"반 ":1275,"안전부":66,"도를 ":838," 노트":65,"밤 ":79,"도르 ":69," 느끼":75,"서비스":980,"평양 ":255,"루어져":306,"방 ":1689,"배 ":369,"리그의":168,"리그이":116,"백 ":172,"오케스":66,"약자 ":66,"릭교회":264,"리그에":124," 로 ":1417,"왕국 ":201," 록 ":195,"세서 ":65,"온다 ":69,"붙인 ":79,"약성서":58," 단계":303,"약이 ":77,"인공 ":105," 대가":98," 대개":167," 담고":149,"북아메":178,"서부터":164,"떠한":155,"편의 ":84," 대기":219," 대규":162,"송된 ":57," 대구":602,"연합회":169," 능력":290,"보험자":61,"였다고":86," 대공":148,"였다가":68,"기하는":100," 단독":80,"벌 ":585,"번 ":888,"기한다":67,"르디난":61," 다니":91,"리기도":228," 다다":76,"보호를":80," 당나":182,"버 ":622,"베 ":197,"벤 ":68,"양에 ":120,"였는데":120," 담당":539," 뉴스":181,"법 ":1604,"범 ":366,"편이 ":71,"인과 ":273,"알제리":72,"벨 ":321," 대덕":65,"를린 ":83," 대대":58," 대도":60," 대동":68," 다리":217,"변 ":297,"독립 ":282," 다른":2061," 다르":415,"열린 ":403,"기하고":59,"용어 ":87,"벽 ":98,"문》":144,"별 ":439," 다만":62,"외에도":132," 달라":106," 뉴욕":473," 다루":441," 달러":131," 다룬":160,"병 ":351,"서부의":58,"편을 ":77," 뉴질":235," 단말":62,"서부에":508," 달린":64," 다목":59," 달리":473," 대략":109," 대량":101,"유네스":90,"폴리스":134," 늘어":84," 단백":169," 대륙":393,"르면 ":308," 대만":99,"용에 ":191," 대리":110,"르며 ":194," 대립":97,"볼 ":700," 대부":829,"도모 ":96," 다섯":189," 대본":114,"본 ":2748," 다수":252," 달성":171,"봇 ":74,"여했다":68,"쿠스 ":111," 대비":138,"단한 ":186,"보 ":1353," 대법":85,"가나 ":114," 대변":60,"복 ":311," 니시":68,"또한":786," 대백":573,"람과":66," 대성":143,"위를 ":1049,"라기":109," 대서":110," 대승":78," 단어":482,"람교":71," 다양":968,"래가":61," 당시":1308," 대수":150," 당사":200,"라고":4349,"이가 ":440," 다시":472," 다스":119," 단순":279,"았으며":218,"봉 ":181,"라가":293," 대상":627,"라그":59," 대사":173," 당선":156,"란과":57,"연합군":116,"라과":75," 도교":68,"상적인":119,"블랙 ":57," 도구":194," 대안":79,"라는":3492,"롯하여":102,"라니":63," 다중":111,"포르투":247,"라다":70," 담은":65,"라데":146," 대왕":88,"란다":72," 대외":65," 대우":121,"라도":290," 대원":80," 단지":69," 대신":337,"립공원":182," 다운":92," 댄스":86,"라남":362,"라나":128," 단위":404," 다이":676," 다음":656,"라노":165,"여하는":229," 단일":156," 를 ":1994,"량과":69,"았으나":73," 대주":65," 대중":469,"람들":653," 단체":752," 대지":68," 도달":62,"래된":291,"래되":137,"라라":69," 대응":233," 대의":90," 동계":163,"라드":158," 덴마":189,"뛰어":204," 대장":118," 동굴":66," 더불":242," 동구":140," 더블":62,"랐다":87,"란드":1305," 당주":81," 대전":1030,"라디":509,"래는":286,"상적으":63," 니콜":153," 대조":67," 동기":77," 대칭":110,"여함을":171,"불 ":296," 되고":141,"요일 ":146,"라미":123,"래로":202,"비는 ":92,"분 ":768,"오픈 ":155,"욕의 ":65,"북 ":391,"붙은 ":60,"부 ":7536,"위대한":96,"성립하":64,"라면":117," 동대":115,"라를":184,"송공사":57,"랫동":75,"러가":176,"라리":104,"라마":640," 대체":348,"달한 ":64,"요소를":93,"라르":90," 데뷔":275," 다케":102,"만년 ":186,"랜드":2035,"라루":63," 동남":210,"역학에":87," 도덕":66," 다카":170," 닌텐":122,"라로":150," 만 ":771," 동력":61," 도모":650,"표는 ":100,"사키 ":99,"뜻이":250,"뜻으":193,"뜻은":77,"뜻을":186,"래밍":301,"뜻의":103," 도메":76,"역할을":484," 대통":1037," 독립":1158," 도마":78,"보한다":522,"라비":303,"러나":429,"라브":117,"레고":91," 더욱":100," 단편":179,"라북":263,"래머":71," 데스":111,"렇게":90,"색으로":87," 도로":444," 되기":85,"라보":58,"래를":142,"어스 ":86,"럽과":65,"양사상":138,"인간 ":162,"아즈치":65," 리 ":87,"라바":138,"성리학":82,"럽게":67,"울시 ":92," 대형":216,"란스":62," 대화":129,"란시":124,"왕조의":225," 대회":1040," 동방":166,"선민주":511,"리드 ":197,"령과":63,"령관":126,"레니":104,"레닌":105," 데이":1061,"레네":63," 된다":760,"개국 ":83," 대학":1552," 대하":366," 대한":10310,"러드":105," 동물":531,"라스":508," 두고":421,"상임위":62," 대항":179," 대해":872,"대체로":110,"라시":210,"렀다":179," 맥 ":79,"예로 ":72,"려고":144,"라서":389,"란색":79," 대표":1228,"력과":186,"런던":299," 동맹":199,"레나":119," 되는":598," 동명":90," 두개":57," 달하":98,"왕족으":63,"려가":88,"러다":58," 말 ":208," 데에":168,"융기관":72,"세르비":167," 도미":66,"라이":2502,"라인":656,"려대":69,"라의":696,"러리":161,"로가":256,"러를":61,"라위":80,"란이":133,"란의":103," 도심":76,"란을":119,"람에":161,"랍어":117," 도시":2717,"론가":102,"라자":84," 도스":70,"래시":96,"라와":192,"래식":78,"란에":60,"연합뉴":77,"라오":172,"래스":164,"려는":295,"라운":208,"라우":429,"랜스":139,"랑스":2207,"라엘":229,"라에":374,"레드":242," 도서":212,"라야":78,"뷔 ":108,"여하여":59,"여하였":106," 동북":136," 동부":560,"랑수":68," 또 ":308,"서울 ":354,"동북":139,"동부":1119,"도사":155,"도상":81,"맹 ":398,"도서":352,"독성":71," 나가":269,"도네시":218,"양의 ":498,"도스":130," 나고":67,"도시":3144," 날개":87,"도심":95,"동산":156,"동사":97,"데에":273,"말 ":452,"리는 ":1318,"편에 ":81,"담하":105,"도바":121,"됐다":193,"있게 ":311,"도미":81,"용자 ":266,"동류":60,"되는":3370,"단체가":70,"당하":866,"동맹":284,"대표":1493,"망 ":267,"유니온":66,"동명":97,"두개":63,"원시 ":244,"단히":190,"당했":130,"양자 ":62,"두가":58,"동면":71,"당한":473,"대한":10786,"대하":654,"대학":3157,"유닉스":93,"독부":116,"된다":2991,"설에 ":116,"되던":89,"대해":906,"대항":184,"덴의":130,"데일":69,"데이":1328,"도부":57,"양이 ":144,"붙어 ":106,"맥 ":265,"델이":112,"두고":457,"동물":811,"대행":62,"매 ":354,"되도":63,"록》":64,"대화":239,"임과 ":71,"동방":208,"도비":86,"두교":119,"업무를":406,"대형":256,"맨 ":157,"말기 ":110," 끝난":64," 끝나":75,"대회":1337,"론》":70,"도적":126," 나누":200,"동안":920,"동아":323,"독재":105,"않지만":68," 나눈":68,"독점":82,"머 ":157," 나뉘":181," 나뉜":158,"폴레옹":142,"도전":67,"동양":321,"패키지":72,"도인":180,"독의":116,"도이":817,"도의":1257,"독으":61,"독을":96,"독자":172,"돈을":58,"도입":373,"도자":409,"독이":170,"독일":2061,"도주":98," 남녀":64,"용이 ":226,"도중":100,"돌이":75,"동에":723," 나는":112,"먼 ":217,"동영":106,"동을":1013,"동으":429,"동은":199,"두루":66,"동의":526,"둘러":259,"동이":750,"동인":172,"붉은 ":71,"동일":302,"동작":235,"동자":198,"동장":81,"동원":102,"동위":88,"용의 ":212,"도지":99,"동조":206,"동전":186,"동적":145," 나들":286,"양을 ":259,"페스티":62," 나급":177,"동서":126,"동성":165,"두는":59,"동생":282,"펜실베":90," 남구":129," 내각":396,"운영 ":417,"되며":571,"되면":541," 남겼":58,"분석하":113,"도와":470,"돌아":199,"도에":874,"영향력":151,"도역":63,"도였":83,"도양":84,"도어":105," 남극":113,"가공의":109,"동시":546,"간다 ":126,"로프로":79,"영리 ":121,"유니버":126,"용을 ":583,"도원":152,"도요":90,"데타":127,"도움":144,"용은 ":157,"됐으":67,"돼지":84,"보편적":59," 나무":232,"서와 ":94,"들기":109,"사추세":98,"우의 ":106,"면 ":3592,"예를 ":240,"들고":156,"도쿠":187,"되어":4394,"들과":539,"도쿄":436,"가된 ":579,"되었":8669,"단체로":153," 나머":129,"등급":177," 내륙":67," 낱말":204,"등기":113," 내리":81,"독특":101,"명 ":4495,"등과":273," 내려":165,"등교":82,"몇 ":303,"되자":92,"두산":85," 낭만":59,"료이다":66,"울산광":189,"도청":90,"도체":168,"동진":82,"동지":68,"작가 ":277,"양성하":72,"동중":64," 남동":554,"능하게":61,"메 ":160,"영되었":153,"도착":72," 내는":102,"동차":918,"선에 ":199,"동천":62,"동청":79,"단체를":65,"동체":258,"며 ":20765,"두번":86," 나라":893,"동쪽":1485,"드가":250,"동화":155," 남서":495,"붙여 ":64," 남성":230," 년까":1209,"자가 ":1294,"몽 ":74,"영화를":78," 남산":66,"연방 ":286," 넘는":98,"동하":683,"동한":158,"동해":218,"동했":140,"못 ":69,"언십 ":93,"영화로":104," 년경":335," 년과":115,"몸 ":59,"룹의 ":106,"둥이":68,"드루":67,"들로":549,"소로 ":203,"양성을":74," 나아":105,"드로":556,"유를 ":151," 내셔":136,"드레":194,"있고 ":893,"드러":189," 네덜":389,"드라":851,"드래":93,"디나":115,"디난":62,"유대인":130," 남부":768," 남북":317,"뒤를":89,"드니":128,"드는":706," 끝에":201," 내무":66,"되지":653,"각국의":60," 남미":66,"와는 ":313,"두에":64,"두었":122," 내부":320,"두어":97," 년간":235,"들도":100,"도회":85,"의는 ":204,"도화":58,"세스 ":128,"몬 ":213,"영화는":78," 끝이":59,"돌프":99,"목 ":781,"모 ":847,"영향을":487,"독하":86,"든다":93,"도한":130,"도하":306,"영하고":172,"성북구":63,"때는":184,"데스 ":135," 남쪽":933,"인권 ":61," 내지":88," 낮은":182,"듀서":131,"각각의":117," 남자":211," 내용":717,"상징하":77,"디렉":63,"오후 ":120,"우에 ":227," 내의":129," 날짜":60,"단체이":367," 내전":128," 놓고":87,"단체의":76,"뒤에":153,"소련 ":87," 농구":95," 내장":76,"들면":80," 남양":79," 년대":956,"들목":286,"때까":243," 널리":529,"여러가":94,"가는 ":557," 년도":79," 너무":67," 남원":73,"드벤":58,"분을 ":409,"일곱 ":76," 나중":141," 남은":57," 내에":577,"인구 ":628,"분은 ":160,"드를":373,"드르":92,"드림":70,"서에 ":154,"드리":485," 나온":189," 낙엽":63,"둘째":114," 나오":450,"북서쪽":240," 나왔":80," 나와":88,"일과 ":143,"상태 ":130," 남아":516,"등록":321," 나이":323,"디션":57,"들은":1170,"들을":1666,"등에":962,"떠나":66,"들의":1832,"폴란드":330," 나폴":174,"드이":229,"드인":87,"떻게":123,"록하였":99,"부의 ":953," 논리":244,"성시 ":68,"북서부":257," 넘어":117," 노리":66,"때문":1343,"들이":3059,"들인":129," 노르":359,"드와":268," 년부":1632,"유럽 ":636,"때로":134,"물 ":1678,"드에":481,"드어":129,"등식":95,"드의":964," 노력":128,"대중화":66," 논란":122,"드웨":238,"들여":122,"드워":117,"들었":219,"들에":883,"들어":2018," 노래":446," 노란":64,"롤플레":72," 노드":65,"디부":57,"맡고 ":193,"딸로":82,"능하다":188,"따른":285,"따르":402,"부인 ":190,"디비":83,"리》":59,"오키나":93,"문 ":1427," 나타":1132,"무 ":858,"원소 ":72,"양수산":143," 나치":207,"서양 ":162," 나카":79,"생으로":111,"묘 ":112," 뜻 ":130,"따라":2057,"생이다":99,"린다 ":670,"드시":125," 노동":422,"도록 ":1040,"되기 ":267,"도로 ":981,"따로":85," 년마":59,"용산구":85,"디미":126,"드스":58,"온타리":105,"따왔":76,"디지":599,"따위":130,"따온":72,"디즈":107," 년이":145,"우와 ":60," 노선":481," 년의":255," 년을":99," 녹색":130,"리다 ":103," 년에":3712,"드컵":316,"디젤":71,"디우":62," 누구":77,"디움":88," 농민":87,"디자":541," 노부":118,"디어":563,"디언":175,"디에":99,"유대교":67,"닌텐도":124,"디오":1235,"단편 ":96,"더욱 ":87," 노벨":241,"디아":393,"르만 ":85," 네이":195,"등지":173," 넣어":82," 논문":122,"비나 ":82,"도니아":125," 네오":57,"때부":94," 남한":57," 농림":654," 남해":111,"따서":170,"디스":467," 남편":86," ㎢이":537," 라 ":285,"등을":1777,"등으":538,"등은":87,"듯이":70,"등의":1888,"등이":1018," 넓은":226,"등장":884," 란 ":635," 넓이":75,"데시 ":86," 놓여":58,"분의 ":454,"등한":57,"등학":492,"또는":7080,"앤젤레":101," 네팔":63," 네트":572,"듬해":68," 높아":61," 놀이":123," 농업":387," 농어":113," 논쟁":99,"》를 ":132," 논의":77,"엘리자":71," 녹음":67,"등포":159,"분이 ":204,"르를 ":106,"득하":114," 노예":114,"영하는":250,"때의":171,"에선 ":72,"에서 ":25246,"때에":310,"북서태":59,"원수 ":69,"분자 ":68,"업발전":65,"딸이":133," 농산":100,"이기 ":244,"유로 ":235," 노스":140," 질서":67," 지수":62,"출신":1014,"출시":497,"치가":564," 지시":73," 지식":486,"활용하":92,"폭 ":101,"리미어":109,"치고":191,"포 ":705,"말라위":74,"회장 ":94,"치권":60,"폰 ":320,"치구":161,"협회 ":346,"치기":91,"축에":59,"폴 ":159,"추어":204,"친구":113,"최초":1069," 증진":256,"출연":224,"폼 ":65,"추원":63,"침공":92,"달린 ":60," 지어":146,"카가":116," 진압":74,"달리 ":397," 지었":64," 지역":3302,"축을":74," 직업":172,"축이":59,"축일":68,"카고":81,"추적":79,"총회":148,"추정":272,"출을":84,"축제":166," 지원":890," 직원":79,"출이":71,"출입":201," 지위":222,"출자":57,"출장":65," 지은":141," 집안":60,"출전":170,"홍콩 ":113," 지점":194," 지정":819," 지적":134," 진입":80,"트는 ":279,"추진":396," 직접":507," 직전":63,"치는":396," 지중":131,"츠를":86,"치단":151," 진주":131,"충류":73," 지명":221,"축물":164," 지리":258,"츠가":74,"신성 ":109," 주파":92," 직무":110,"최신":58,"등학교":448,"회적 ":239,"름이 ":380,"름의 ":136,"충무":57," 지방":2170," 지배":541,"출범":212,"단되었":80,"출발":166,"취득":125,"시뮬레":108," 진보":117,"편 ":523,"르시아":205,"총칭":263,"충분":86,"충북":60," 질병":153,"축산":260,"추상":117,"평 ":133,"최종":194," 즉위":118," 중학":70,"축소":72,"출생":339,"최적":97,"추세":127,"폐 ":75," 직선":63," 중화":785,"름은 ":937," 지속":294," 지상":157,"름을 ":720,"리시 ":78,"카마":74,"카리":164,"카를":321,"카르":370,"카로":94,"출판":563,"취지":64,"김일성":79,"말레이":208,"칼라":126,"출한":142,"출할":58,"호텔 ":84,"출하":502,"측에":78,"캐롤":76,"풀 ":129," 지하":383,"츠에":83,"축하":152,"카메":240,"시민들":57,"릭스 ":81," 지표":99,"칼리":211,"푸 ":69,"츠의":100," 진화":199,"풍 ":312,"캐릭":191,"식물의":97," 지휘":383," 직후":72," 질환":60," 집필":59,"회의 ":1094,"스스로":208,"치세":62," 집행":129,"대략 ":95," 집합":390,"츠와":84," 키 ":71,"카미":146," 지향":127," 지형":89," 진행":628,"품 ":726,"기준이":62,"실상 ":131,"치스":72,"치시":115,"비트 ":196,"층을":58,"층의":89,"기준으":386,"층이":72,"캘리":254,"측정":243,"층에":486,"리스 ":961,"흥과 ":58," 진흥":250,"름인 ":66," 킹 ":69,"캄보":77,"화이트":83," 집중":134,"치된":254,"치되":225,"카나":127,"취소":59,"치도":175,"충주":70,"측면":113," 집적":64," 지지":202," 지진":67," 지질":87,"친다":61,"류이다":187,"최하":230,"카데":164,"추측":97,"카니":70,"춘추":91,"추출":65,"획을 ":70," 짧은":155,"춘천":122,"칠레":106," 큰 ":1157,"치로":193,"치료":253,"치러":226,"캐논":62,"최후":86,"카드":366," 지칭":299,"충청":623,"캐나":550," 진출":298,"르지 ":83,"트》":61,"카라":247,"친목":162,"치면":57," 지키":86,"칠리":66,"표 ":478,"취임":66,"치를":548,"침략":84,"다리 ":68,"청소":381,"총독":192," 주민":210,"체성":73," 중립":59,"순히 ":106,"티 ":784,"최고":599," 주변":342,"틱 ":146,"틴 ":360,"차트":146," 정해":170," 정하":119," 정한":85," 제품":480," 좋은":135," 종전":57," 주를":95,"촌동":62," 종족":57," 중랑":60," 종종":129,"창출":92,"초등":129," 제한":242," 정확":274," 주목":103," 중력":134,"청사":253,"천시":356,"초명":74,"천이":116,"착한":88,"착하":100,"쳐서":143,"천의":92,"천을":85," 중복":60,"천으":103," 증가":212,"청에":126," 즐기":62,"최남":61,"철의":57," 중부":577,"파 ":1076," 증거":174,"시리즈":1281,"판 ":692,"천주":138," 주석":95,"다를 ":70,"청원":71," 증권":93,"팔 ":66,"체에":420,"초로":518,"체스":170," 조치":94,"형태이":106,"처에":159,"형태의":265,"천안":97,"최근":299," 조카":75,"천연":139,"등포구":133," 중반":138," 준비":90,"초를":88,"채택":233,"팀 ":383,"처의":89,"처음":1265,"팅 ":469,"창의":108,"쳤다":142,"차지":589,"채용":84,"창작":170,"처벌":87," 전투":1219,"참전":80," 주된":89,"참의":100,"초기":696,"체로":624,"책에":132,"창원":207," 준다":87,"천문":324," 주도":1756," 전통":766,"책의":157," 전파":198,"투스 ":105,"책을":239,"책은":69,"책으":129,"총괄":131," 조에":97,"처분":130,"마르크":179,"책임":299,"창조":274,"책이":259,"스의 ":2137,"참조":109," 조약":551,"체르":60,"체를":704,"천리":76,"천만":67,"참여":643,"찾아":159,"차적":58,"초고":65," 존스":65," 종사":119,"초과":70,"처리":574,"차의":173,"차이":655,"차장":60," 중남":73," 정치":2113," 주는":346," 제출":84,"창업":76,"차종":112,"찰이":62,"초구":256," 주니":71," 전화":182," 전환":128,"초대":489," 좁은":114," 전형":79," 전혀":90,"카이도":140,"마리아":248,"찰청":251,"다만 ":77,"청북":251," 조조":61," 조종":81," 전해":249,"초동":59," 좌우":58," 접해":75," 종이":322,"천사":64," 종의":95," 접하":505," 접한":376," 종을":67," 조지":462," 조직":785," 종으":82," 전후":94," 주로":2214,"튼 ":196," 졸업":314,"트 ":6929," 정통":81," 중단":103," 중대":70," 존재":1242," 조정":209," 조제":62," 전하":166," 전한":60," 저항":140," 조절":103," 적합":91," 조작":128,"틀 ":149," 중동":117,"첫번":86,"칭이다":298,"충돌":185,"초의":700," 집단":362," 주체":106,"초이":60,"초자":71,"출된":104," 지도":570,"체코":190," 지니":158," 지닌":190," 중점":61," 지대":176," 중종":67,"초연":81,"출되":164,"초에":140," 주지":67," 중이":151," 중인":117," 중의":484," 중장":65,"후속 ":76,"체하":137,"체학":60," 질량":159," 주택":112,"페 ":146,"체험":108,"최소":174," 지류":128,"촬영":165,"청하":95,"출력":193,"총재":61,"총장":195," 중추":105,"최상":86,"먹는 ":101,"추리":70,"천황":417,"취급":141,"형태로":346,"촉진":252,"체포":86,"처형":64,"총연":66,"추락":82," 주축":65,"천하":69,"퍼 ":383,"초점":102,"머니 ":100," 주최":206,"철학":918," 증명":166,"천체":227,"컬럼비":62," 지구":803,"추기":89," 조합":232," 종파":171,"챔피":311," 중소":133," 중생":144,"형태를":152,"슨의 ":72,"최되":220,"최된":194," 중서":84,"추구":133," 중성":88," 중세":272,"팬 ":60,"축구":1084," 주식":300,"슨이 ":59,"패 ":76,"청주":162,"출간":124,"체제":657,"다면 ":148,"체적":348,"추고":107,"최대":550,"체의":621,"축가":82,"시설 ":150,"체인":196,"체이":649,"팝 ":104,"천지":81,"창한":58,"청자":73,"청장":193,"체육":1359,"추가":282,"청으":59,"총리":508," 주소":166,"청은":176,"체와":186,"초반":121,"청이":131,"청의":110," 증기":82," 중요":799,"추는":61,"총선":155," 주주":79," 지낸":214," 지내":96," 줄이":91," 준정":80," 지냈":558," 주제":250,"초식":67,"쳐진":69," 지나":284," 지난":146," 중에":696," 중앙":1159," 주장":603," 죽음":141," 죽은":130," 주인":242," 주이":140,"기타 ":345," 주자":62," 주의":999," 집권":84,"률이다":60," 종합":466," 주위":126," 줄여":450," 좌표":69,"쳐져":57," 주요":741," 주와":621," 주연":98," 주었":87," 지금":536," 지급":139," 주에":722," 주어":275,"충격":69," 중시":60," 조화":72," 중심":1728,"척추":104,"형 ":1793,"코토":79,"콘텐":200,"카에서":101,"협 ":123," 차별":95,"코트":155,"코틀":235," 창립":241," 체계":535,"코퍼":63," 체결":226,"쿠시":66,"쿠스":228," 청구":123,"크는":102," 채무":154,"크다":86,"키가":93," 차량":299,"징이다":166,"말리아":59,"쿠바":94,"크고":102,"크게":351,"황제로":69,"크가":121," 차례":291,"현 ":2291,"혁 ":104,"혀 ":208,"코타":86,"징으로":70,"크기":368,"대륙 ":89,"케팅":130,"컴퓨":1719,"험 ":357," 채널":298,"쿠르":111,"스와 ":613,"칭으로":286," 찾는":68," 창달":98," 창단":225," 창당":93,"대를 ":566,"컨트":120,"코어":137,"코에":118,"리비아":152," 채권":172," 차남":67,"지중해":127,"커피":96,"코이":72,"코의":222,"컴파":63,"허 ":115," 차단":59,"헌 ":139,"쿠라":165,"태는":61,"킬로":125,"대만 ":63," 첫번":85,"키르":123," 처분":74,"타디":124,"타로":108,"크스":277,"쿼크":68," 초대":479,"타라":76,"택되":100,"키며":125,"효 ":88,"타는":77," 차종":107," 창업":69,"크바":228," 초고":57," 찾아":157," 천만":60," 참여":607," 차이":388," 처리":445," 책이":152," 책임":221," 창조":192," 총괄":120," 책으":80," 참조":105," 채용":77,"회 ":3813," 창작":141," 처벌":80,"획 ":208," 창의":90," 참전":79," 차지":583," 창원":193," 참의":90," 초기":674,"타니":105," 천문":270,"크레":166,"크로":1143,"활 ":172,"클래":142,"클라":364,"크루":160,"타고":165,"타공":116,"지질학":58,"통칭은":57,"키나":206,"화 ":3719," 창설":237,"크래":193," 찰스":137,"크라":447,"화·":76," 청동":62,"환 ":439,"지지하":61,"킨다":751,"크메":76,"태계":75,"클리":189,"클린":94,"니메이":622,"타난":144,"타나":395," 차원":250,"타내":438,"타낸":182,"클럽":362,"클레":211," 창시":139,"클론":67,"클로":182,"키는":1068,"크를":175,"탐구":65,"태가":129,"크리":684,"황 ":673,"크림":80,"크립":110,"혼 ":107,"홀 ":76," 청나":165,"코프":157,"호 ":2171,"키고":232,"코하":57," 철도":572,"타가":60," 첨단":85,"쿠오":85," 청년":105,"키기":265," 차세":58,"홈 ":112,"시리아":214,"면과 ":135,"르에서":63,"순한 ":97,"플 ":168,"리아 ":2068,"코노":61,"까지를":69,"코네":66,"리안 ":121,"픈 ":178,"콩고":162,"커버":63,"프 ":1611,"코나":75,"측하":71,"담당한":59,"담당하":386,"대로 ":879,"켰다":167,"카지":59,"커뮤":174,"치체":194,"칭으":286,"치지":83,"칭을":179,"칭은":479,"카와":268,"리야 ":76,"카오":100,"침입":91,"캐스":57,"카운":74,"칭이":493,"카우":76,"컬러":63,"츠키":72,"컬럼":77,"카의":270,"코가":63,"카자":100,"카이":616,"카인":59,"신라의":85,"취하":134,"칙을":131,"칙으":83,"치의":242,"치이":165,"친위":80,"치인":905,"칙이":149,"케도":119,"친일":133,"치적":263,"카에":232,"지털 ":477,"카야":118,"치주":107,"치에":313,"카스":306,"칙에":97,"치와":131,"카시":93,"치원":65,"후부터":65,"케다":64,"커다":69,"카사":70,"리어 ":186,"컫는":618,"치안":73,"치아":129," 참고":71," 창건":80," 창간":97,"쿠데":107,"향 ":296,"리엄 ":215,"쿠니":115," 참가":722,"콘스":196,"코시":67,"리얼 ":67,"술한 ":81," 차관":105,"코스":429,"치인이":484,"치인으":223,"콘서":84,"케도니":118,"리에 ":892,"행 ":946,"황제이":83,"해 ":11775,"핵 ":98,"캠퍼":91,"쿠가":151,"케이":822,"케인":158,"항 ":503,"마스 ":131,"컵의":85,"코미":140,"코믹":87,"합 ":1352,"함 ":501,"현하는":126,"한 ":36846,"학·":96,"할 ":5085,"컵에":64,"칭호":106,"카프":65,"학 ":3682,"하 ":1115,"침해":108,"핑 ":85,"스어 ":438,"코리":211,"코르":181,"콜로":109,"칭한":198,"칭하":436,"콜롬":91,"필 ":124,"친환":81,"코로":88,"까지는":225,"리오 ":289,"스에 ":397,"콜라":224,"케스":103,"카페":78,"치하":2020,"치학":71,"치한":3549,"치해":358,"치했":87,"현하기":80,"콩고 ":134,"피 ":342,"픽 ":726,"카트":104,"핀 ":359,"까지도":76,"기초로":124,"기초를":61,"코드":673,"커스":115,"코딩":85,"카타":138," 언론":305,"중종":67," 알아":113,"진다":650,"진단":77," 안에":305," 않아":113,"황제 ":355,"지대":330," 않았":277," 안양":112," 에디":95,"중점":70," 어려":133,"중적":79,"지니":319,"지닌":190," 에드":185," 어렵":70," 예금":57,"지는":2060,"지능":93,"즈를":119,"중이":356,"중인":249,"중장":67,"중재":63,"주지":232,"중을":67,"중의":646," 에도":489," 액션":106,"주최":210," 안정":204," 안전":386," 얼마":78," 열대":176,"호작용":139," 아제":99,"즈미":73," 않을":60," 않으":216," 않은":394,"주체":150,"집단":449," 연대":172," 안의":57," 어머":279," 야생":70,"중지":63," 아이":1133," 역대":77," 아인":68," 아일":388," 어릴":143,"진동":81," 어린":266,"직된":74,"지되":322,"지정된":110,"지정되":305," 아우":255,"지도":837,"만들기":94,"지동":62," 아울":60,"지라":98," 않지":68,"만들고":94," 에르":95," 양성":269," 아직":134," 였다":83," 양산":93,"증명":181,"다르게":104," 야스":61," 아즈":91,"즈베":96,"주축":66," 앞에":89," 아주":105," 옮겨":85," 열도":76," 알제":77,"지를":1015,"지르":60,"비주얼":58," 여러":1622,"지류":134,"질로":116," 양식":231,"진료":73,"지리":373,"때에는":117,"지름":79," 영등":165,"질량":179,"주택":185," 완공":67,"중추":123,"마드리":66,"지로":949," 엘리":185," 엄밀":69,"질랜":239," 업무":536," 옛날":72,"률이 ":74," 영동":66," 오다":74," 오늘":444," 여름":113,"주특":163,"리랑카":66," 아침":58," 약자":169," 아카":249," 왕국":656,"친위대":72,"직무":110,"주파":110,"비즈니":135,"치이다":129," 왕가":158,"진리":60,"지원하":329,"지막":450,"지마":92,"지만":2443,"쿄 ":369,"지명":245,"진이다":61," 연료":112,"신문》":123," 열렸":185,"지며":169,"지면":175," 예로":96,"진으로":71," 압축":113,"주한":124,"지부":157,"휘는 ":146,"주하":399,"진보":157,"률의 ":82," 액체":118," 오디":174,"지배":563,"지방":3019," 열리":279," 아케":110," 열린":474,"지바":88," 여명":78," 연맹":288," 온도":178," 여부":76,"쿨 ":70,"즈에":211,"준화":118,"째로":481," 외계":79," 에서":1916," 영문":107," 안토":103," 양주":57,"즈와":76," 외교":366," 아키":249," 예루":104," 양의":74,"질병":167," 양자":305,"률을 ":89,"칙이다":82," 연방":722," 양재":71,"쿠 ":497," 예를":204," 아테":125,"리브 ":93,"주화":87," 아토":61," 왔다":158," 올랐":110,"지상":185," 엑스":155," 언어":1410," 알파":121,"지선":61,"중화":873,"즌이":138," 양쪽":62," 얻어":102,"직선":85," 예방":102," 아폴":70,"다루는":196," 어원":87," 얻었":76,"지션":130," 오르":201," 오른":92,"지속":297," 억원":58," 오류":88," 영미":82," 오랫":76," 완도":63," 오래":309," 아트":82," 외국":347," 오랜":130," 온라":297,"중학":110," 아파":129," 아티":99," 어업":104,"칙으로":83,"즉위":118,"다룬다":88,"중해":135," 올라":148," 에스":395,"즈이":155,"지사":210,"즈의":362," 안티":91," 여성":671,"지스":206," 여섯":126," 없었":105," 연산":188," 역설":57,"즘의":66," 없어":111,"진수":64,"즘이":67," 약칭":414," 역삼":92," 아프":573," 역사":1519,"지수":116,"질서":165," 오리":271," 연쇄":64," 여수":102," 연속":241,"진시":95," 언제":60," 요구":281," 예비":61," 알프":80,"증을":98," 올림":799," 얼음":64," 올리":108," 차 ":1230,"지식":562,"지시":133,"능을 ":412," 얻은":80," 업적":116,"통적인":164," 없이":247," 여신":124," 역시":116," 없으":149," 에어":174,"지점에":130,"증진":435," 에이":242," 예산":87," 애플":267,"지어":207," 암호":144,"지아":300," 올바":113,"지야":80," 오버":65," 영상":317," 에우":58,"진영":77," 오브":259,"진에":166,"지와":387," 책 ":74,"직업":192,"직에":115,"지역":3831,"지연":62," 예선":234,"지었":74,"진압":79,"지에":1070,"지오":59," 채 ":79,"지였":87,"지인":168,"지이":710,"직의":93,"직을":212,"직으":106,"지의":930," 연애":57,"집안":61,"지은":150,"진은":66,"진으":71,"직자":136,"직임":89,"직이":307,"즈치":65,"직접적":93,"지자":110," 예수":291," 예술":486,"직원":124,"지원":1263,"진왜":117,"질에":101," 연안":287,"직위":91,"지위":252," 엔진":327,"질의":280,"질이":274,"징역":61,"켰다 ":152,"진정":61,"진을":324," 여왕":102,"진의":113,"집에":83,"투르크":97,"진이":164," 연예":76,"지적":171,"진입":88,"지점":270,"지정":864,"직전":64,"직접":511,"질은":90,"질을":426,"집을":67,"진주":148," 오세":84," 업체":175," 연재":197," 영양":101," 월간":68," 연장":152,"집이":101," 원격":57," 영어":930," 역을":64," 역은":61,"뉴욕의":60," 여의":113,"지주":81,"질적":241," 오사":151,"지중":143," 역임":393," 역이":108," 여자":305,"치체 ":79,"집적":68," 외래":64,"징이":263," 영역":323,"지질":112,"징을":98,"지지":438,"지진":90," 영업":143,"징으":70," 예언":71,"호이다":121," 여주":68,"니버스":58," 옆에":57,"집중":149," 왕복":62,"닉스 ":124," 연주":271," 오스":1097," 영웅":113,"황의 ":111,"큐 ":96,"쪽과":149,"수행 ":60," 용도":92," 월까":163,"짧은":155,"지청":89,"맞대고":167," 영조":60," 왕비":129,"황을 ":121," 예전":119," 예정":308," 영주":76," 운동":905," 외무":89," 영지":58," 완성":200,"수한 ":277," 오염":68,"진출":340,"지칭":300," 열차":200,"지컬":96,"크 ":3097,"니스 ":252,"통적으":123," 요리":491,"황이 ":66,"큰 ":1169," 요르":61," 엔터":268," 오이":81," 연출":58,"다르다":167,"클 ":181," 외부":231," 어항":59,"만든다":72,"만드는":271," 오전":103," 우라":85,"지키":149," 천 ":124,"큼 ":96," 에티":97,"까지의":267," 왕실":92,"지털":569,"수학 ":153,"통치 ":72," 위기":77,"딸이다":74,"주광":202,"주관":327,"정주":71,"주국":154,"준공":69,"주군":206,"지정하":64,"지정한":88,"주교":442,"제조":448," 쓰는":224,"재한":349,"재학":69,"재하":725,"조부":69,"제정":539,"주공":78,"제적":455,"주고":191,"재했":212,"재해":152,"족보":60,"제의":631,"주가":248,"장했":136,"정전":83,"정적":273,"주간":164,"제일":134,"제임":217,"제작":1329,"제자":224,"주거":121,"정조":73,"종목":228,"제이":524,"제인":138,"전지":147,"전직":85,"정일":58,"정인":93,"맞닿아":98,"정자":73,"접전":71,"접적":132,"장해":59,"장하":1342,"장한":379,"장학":77,"현한 ":91,"제와":162,"제외":408,"적지":100,"잡한":94,"정으":195,"저지":133,"자흐":77,"정을":810,"정은":146,"정이":736,"정의":966,"제어":265,"점유":74,"점은":112,"제에":429,"점을":418,"점으":208,"칼 ":114,"정원":147,"제연":82,"점의":91,"전주":277,"종류":648,"제였":72,"점이":420,"자협":82,"전제":91,"칸 ":384,"제안":211,"장편":92,"르와 ":76,"조물":104,"재판":339,"족문":173,"종료":106,"제약":76,"자회":144,"전자":1283,"작해":97,"작했":259,"조명":72,"카 ":2034,"적절":91,"절을":58,"전쟁":1436,"정에":737,"절이":80,"전적":194,"젊은":111,"종로":421,"기한 ":75,"주년":107,"존속":102," 아가":58,"정체":133,"중기":285,"정청":74,"주나":75,"접촉":83,"조세":77,"조성":326,"정책":908,"정착":170,"조선":2884,"중구":359,"중국":2218,"져있":131,"조상":99,"중교":61,"조사":870,"점차":84,"중계":60,"줄기":211,"중간":227,"전철":111,"전체":673," 쪽 ":114,"절차":197,"전청":59," 쓴다":97,"전차":161,"주기":300,"정직":60,"정지":148,"제주":546,"주권":127,"재화":72,"재활":72,"조약":652,"조에":276,"족에":140,"조어":71,"조업":97,"전파":285,"중단":108," 아나":248," 아날":71,"조와":122,"존에":63,"전통":832,"주도":1871," 않기":58,"주된":119,"전투":1249,"만들었":181,"주되":65,"만들어":1188,"제출":94,"주는":689,"죄를":91," 알고":262,"주니":101,"주당":264," 악기":149,"중남":73,"준다":217,"제천":67,"졌으":236,"존스":69,"종사":195," 않고":534,"정치":2288,"접하":574,"접한":492,"토콜 ":80,"종의":527,"전후":101,"조지":513,"조직":975,"종으":458,"종은":82,"종을":142," 아돌":57," 아동":142,"정할":108,"제품":561,"정하":1154,"정한":1132,"정해":250,"컵 ":410,"종자":67,"좌우":61," 안동":112,"종이":791,"접해":436," 야구":716,"적화":77,"전혀":91,"절하":80,"절한":80,"르에 ":62," 약간":124,"젝트":419," 아니":980," 아닌":443,"조종":82,"제트":78,"전했":92,"조조":79," 않다":148,"주로":2465,"좁은":114,"전화":322,"전환":157,"컬 ":194,"주력":61," 않는":847,"족주":88,"전형":115,"존의":272,"저항":165,"적하":72,"헨티나":186,"조이":169,"족의":411,"조인":125,"족이":405,"조작":149,"족인":57,"전할":66,"전한":509,"전학":81,"전하":663,"종에":80,"전함":71,"전해":286,"적합":98,"조절":138,"조적":99,"조제":92,"조정":332,"존재":1248,"커 ":224,"정통":92,"졸업":318," 아내":168,"중대":89,"중도":73,"족을":151,"족은":109,"중동":130,"조의":562,"족으":212,"또는 ":7073,"주민":357,"제휴":62,"중립":69," 아라":177," 아랍":266," 아래":375," 양국":63,"제후":83,"조치":133,"주변":352,"즈가":129,"켓 ":162,"중랑":61,"종종":130," 안드":171,"제하":242,"제학":350,"종족":62,"정확":287,"제한":327,"정화":73," 아드":72,"주를":291,"전히":177,"종전":66,"좋은":148,"제프":122," 아들":897,"주문":90,"켄 ":72," 애니":636,"제회":58,"중력":144,"주목":108,"케 ":324,"중부":615," 아무":102,"즐기":62,"증거":195,"중복":64,"증가":223," 알리":168," 없고":94," 아버":489," 앙리":58,"증권":184,"치에서":60,"주선":105,"주석":120," 아미":102," 얼굴":67," 언급":141," 아바":62,"죄의":58," 쓰여":83," 쓰였":116,"주사":146," 아름":145," 아르":573,"켜 ":348," 알려":1676," 알렉":314," 알레":71,"조카":85," 앤드":80," 아메":328," 압력":91,"준비":133,"중반":143," 아마":238,"식별 ":61,"다루고":124," 아리":130,"즈는":180,"중서":88,"지개":84,"지가":490,"중성":112,"즈니":258,"지각":64,"중세":273,"중소":176,"종특":81,"지게":172,"조합":512,"조항":62,"조하":271,"조한":146,"족한":81,"족하":252,"종파":173,"지고":1085,"족행":61,"코 ":1075,"지관":81,"직공":120,"존하":237," 어느":236,"지구":953,"진공":59,"진과":118,"콘 ":142,"스부르":91," 여개":62," 여객":163,"조화":108,"중시":81," 야마":192,"중심":1840,"증기":111,"주소":193," 알바":142," 쓰이":700," 쓰인":357," 아부":59," 에게":145," 아브":60,"주시":655,"주식":354," 아비":83,"중생":157,"집이다":61,"준의":154,"준을":139,"준이":188,"준인":62," 연기":182,"중앙":1401," 암석":66,"주자":204,"주일":76," 암살":118," 아스":164," 어디":65,"주재":60,"준으":443," 아시":766,"주장":609," 연극":159,"지나":378,"중에":1098,"지난":146,"지낸":215,"줄이":100,"지내":104," 에는":69," 어떤":681," 어떠":156,"주전":67,"주적":67," 어떻":123,"주제":335,"지널":67,"지냈":559,"준정":82,"즈로":101,"주주":795," 영국":2073,"중요":802,"지노":57,"주연":137,"지기":214," 아서":83,"주에":945," 여겨":280," 얻는":95,"콜 ":173,"주얼":72,"주었":136," 앨범":615,"지금":541,"지급":150,"질과":85,"주어":317,"진구":106,"좌파":64," 아사":106," 아산":69," 연계":84," 연결":746," 양력":106,"준에":93,"주와":720," 언덕":79," 안산":100,"주였":57,"줄여":450,"종하":70,"좌표":82," 에너":453," 연관":129,"증대":108," 연고":210,"주요":747,"질적인":82,"죽음":150,"죽을":61," 여기":344,"죽은":134," 없다":375,"주인":347,"주이":578," 어드":76,"주의":4317,"집권":101,"콩 ":169," 없는":546,"주위":137,"종합":594," 연구":2349,"통 ":1329," 이오":58,"찬가":157," 이온":105,"지에서":319," 자신":748," 이와":206," 이외":199," 정당":373,"차관":122," 일어":792," 이용":1530," 일에":1863," 이웃":64," 적도":79," 절대":86," 의존":77," 전도":66,"톰 ":68," 전동":120,"톱 ":100," 이야":437," 제네":68,"차가":214," 이어":589," 이에":430," 이었":171," 이전":899," 장수":124," 위해":3054," 위험":192,"창간":101," 있었":1191," 일으":392," 일을":310," 인재":83," 잠시":62,"짧은 ":150," 이정":57," 있어":761," 이제":64," 제도":983," 인접":485,"지역번":77," 인정":430," 이종":59," 일자":88,"참고":72,"급하는":96," 위협":73," 일이":219," 장식":99,"창건":81,"증진을":113," 정도":1045," 이유":297," 인용":64," 이익":269," 장소":242," 재산":268," 일요":83," 이자":142," 유통":251," 이의":67," 제대":57," 잠수":90," 위한":1962," 일원":183," 위하":1580,"참가":738," 재생":127," 이수":74," 전남":74,"다섯 ":151," 인쇄":158," 원형":97,"스미스":64," 일생":60," 저널":82," 재무":111," 이스":433," 이슬":305," 인수":172," 이승":76," 익스":106," 응용":360," 제기":143," 유체":74," 원활":65," 이성":104," 인사":173," 인상":58," 제고":88," 제공":975," 접근":267," 정권":212," 정기":77," 일상":122," 제국":1197," 정규":377,"증진에":72," 원하":87," 의장":114," 자생":67," 장비":172," 작사":165," 유치":74," 자살":101," 자산":102," 일시":89," 임시":200," 작성":314," 작센":73," 의정":119," 자세":74,"트남 ":220,"다목적":58," 전달":242," 인스":61," 위키":130," 위탁":113,"식물 ":196," 재배":113," 이시":95,"톤 ":252," 잠비":60,"토 ":1338," 인식":217," 의원":243," 유출":60," 종교":1217," 이치":73," 적분":68,"급하고":62," 장점":70,"회에 ":290,"채널":346," 재임":96,"기호 ":76," 전반":229," 일체":59," 재위":670," 정리":561," 인체":78," 인천":505," 장의":150," 정립":87," 정복":93," 재직":117," 정보":1417," 제반":57," 이탈":1113," 일컫":636," 일컬":82," 재즈":82," 자체":292," 입출":70," 은하":175," 제목":256,"찾는":69," 전부":159," 전북":64," 재정":153," 일치":69," 은행":189,"창당":94," 잡지":202,"창달":122,"창단":248,"기타공":108,"채권":223," 잡아":60," 작은":643," 전류":87,"차남":68," 잎은":103,"매를 ":89," 조각":203," 자유":815," 작위":102,"찾기":70," 자율":74," 인증":117," 작용":292," 인지":119," 자원":213,"비이다":74,"창군":88," 있으":3188," 전례":131," 있을":254," 전력":146," 입자":250," 이진":78," 이집":402," 입장":142," 있음":85," 이지":96," 자연":687," 이즈":59," 잭슨":67," 일종":701," 전략":271," 일제":542," 임의":128," 작업":288,"쪽과 ":149," 자에":59," 인조":152," 전라":664," 이중":112," 일정":464," 인종":139," 이주":143," 조기":59,"차단":60," 전문":804," 유효":82," 조금":94," 자주":201,"차는":139," 있지":409," 유형":175," 임진":126," 작전":169," 저명":67," 자전":132," 유행":117," 장애":251,"대부 ":69," 장안":62," 장악":76," 유한":96," 유학":142," 점령":153," 조건":224," 자이":125," 제사":70,"천국":78,"천구":94,"천군":186,"첨가":59," 이해":327," 이행":101,"화예술":214," 이하":307," 정수":130,"창립":248," 인해":475," 인한":143," 인하":140,"참모":66," 일하":99," 인형":68,"차별":151," 정신":352," 정식":444," 이후":1760," 전압":65," 적어":66,"청과":62,"체가":469,"청구":177," 제시":244," 전에":217," 일환":93,"체결":240," 전역":168,"체계":709," 입학":80,"체고":109," 적용":417," 조류":83," 작품":1113,"체국":70," 조리":61," 전용":143,"채무":169," 적은":107," 전원":79,"신들의":66," 저자":149," 저작":244," 적이":69," 저장":326," 전의":63," 점에":156," 정부":1251," 자치":673," 정비":99," 인텔":209," 인터":1102,"차량":357," 층 ":246,"막부 ":71," 전사":114," 전산":106," 장착":83," 저서":110," 의해":2065,"차로":318," 의한":371," 의하":461," 의학":175,"차례":334,"시민 ":74," 전세":104," 전설":193,"름에 ":102,"차르":58," 저술":147,"차를":208," 의회":263,"착륙":65," 전송":205,"지역으":204,"지역을":380,"지역은":111,"카이 ":111,"지역의":387,"지역이":326,"지역인":66," 정사":64," 전승":126," 점성":173," 전신":162," 전시":258,"투 ":527," 장치":478," 정상":182,"천광":247," 접속":105,"스박스":60,"창설":240," 제주":484," 주기":199,"청동":74," 전차":91," 주권":84," 준공":68,"찰스":137,"체는":221," 정지":85," 재화":72,"화에서":184,"체되":102,"채소":57," 줄기":140," 중간":217," 전철":72,"지역에":787," 절차":120," 전체":567," 중구":353," 중국":2024,"체들":133,"차에":128,"르의 ":305," 정착":154," 조선":2644," 정책":576," 조세":64," 조성":287," 점차":84,"창시":143," 조사":590," 조상":79,"처럼":463,"차원":316," 존속":100," 주년":103,"처를":60,"초가":64,"카의 ":270,"차와":65," 접촉":74," 정체":105," 중기":271," 장편":91,"리메이":69," 칸 ":70," 제안":202," 전제":81," 젊은":107," 종로":405,"신문 ":168,"지어졌":61," 전쟁":1259," 적절":82," 전자":881," 저지":85,"차선":57," 제외":403," 점이":100,"청나":170," 종류":641,"청남":319," 전주":245," 정원":86," 칼 ":70," 점을":70," 제어":219," 점유":66," 종료":104," 재판":231," 제약":63," 자회":101," 제이":81," 종목":202,"찰서":86,"천동":110," 주가":75," 주간":132," 전직":83,"체나":63,"비에트":309,"차세":59," 정의":672," 접전":68,"청년":158," 제조":389," 주교":176," 주고":100," 제정":501," 주관":308," 재학":57,"처드":110,"첨단":94," 재해":66," 제임":214," 제일":119," 제작":1250," 제자":112," 주거":104,"철도":871,"탁 ":81,"타 ":1864," 용산":140," 오키":113," 영향":784,"탄 ":632," 유니":282," 유닉":93," 온타":105," 연호":83," 외에":213,"진흥":630,"징하":82," 유네":90," 요시":226,"지위를":75,"킹 ":248,"회사의":138,"지휘":406," 오케":84,"비잔티":171,"진화":280," 역할":582,"회사인":94," 여행":159,"회사이":255,"직후":76," 역학":81,"집행":234," 윈도":635," 연합":775," 유나":92," 요소":324,"질환":102,"집필":60,"집합":497,"집하":97,"진행":633,"코가 ":60,"지형":108,"킨 ":405," 운반":62,"진하":250,"진한":63,"진해":69,"비치 ":261,"는지 ":96," 왕조":583,"회사에":86," 왕족":171," 요새":71," 원리":213," 오카":66,"킬 ":155," 위대":99,"질학":58," 유기":176,"집트":409," 왕위":174,"직하":135,"직한":63,"직할":115,"지할":102," 왕으":89,"지함":108,"지하":1289,"지한":165,"키 ":1509," 왕자":123,"지향":165," 왕이":223," 영토":326," 왕의":63,"지했":204,"환으로":121," 완전":315," 육군":269,"지표":121," 원래":602," 우리":336," 유가":72," 예측":113," 와이":160," 에피":148," 유고":73,"리소 ":81," 월드":600," 우르":62," 첫 ":814,"루트비":99," 자국":94," 운전":103," 있기":213," 이나":121," 율리":81," 잉글":370," 의도":111," 유명":895," 우정":89," 작곡":845," 이끄":118," 이끌":202," 임기":78," 임금":138,"진왜란":116," 윤리":120," 자격":186," 우익":62," 총 ":839," 유리":168," 작가":611," 입구":69," 자가":81," 운용":154," 일까":724," 있고":843,"화인민":476," 있게":303," 원시":110," 용인":92," 워싱":115," 인기":260," 운영":1278," 인근":199," 위반":87," 우에":75," 오후":126," 이기":80," 유로":181," 원수":138,"화재로":72," 인구":3560," 일곱":88," 인권":151," 원소":289," 유럽":1117," 유러":115," 인공":238," 유래":578," 오호":60," 윌리":269," 용어":934," 이곳":157," 원산":126," 이고":250,"회원 ":168," 초 ":138," 일간":93," 이것":480," 인간":752," 오피":88," 오프":61," 오픈":259," 요인":73," 왼쪽":64," 오하":61,"회사를":67,"매되었":349," 위를":178,"능이 ":86,"택 ":158,"태 ":322," 우승":675," 오페":308,"회사로":118," 운송":76," 우수":237,"탕 ":82," 위로":69," 울산":328," 월부":254,"탑 ":86," 영화":1571," 우선":79," 유동":58,"컫는다":300," 유도":157,"화재단":118," 오토":147," 유대":297," 의거":134,"탈 ":268," 의견":96," 육성":259," 음반":525," 자동":798," 유성":164," 이래":213," 의무":187," 작동":93," 이라":1915," 이란":1010," 위에":358," 이러":460," 이런":125," 이렇":64," 우크":191," 인력":88," 의미":1870," 유신":72," 이로":113," 이론":891," 요크":70," 이때":111," 있다":14657,"칭이 ":130," 원주":160,"쪽은":318," 있는":7503,"활약한":126,"활약하":64,"쪽의":188,"화유산":156," 잇는":267," 이들":580,"쪽으":3081," 이듬":68," 있던":474,"터 ":8679," 인디":276," 의료":229,"턴 ":549," 있도":441," 자는":478,"털 ":563,"활약했":89," 육상":128," 장남":113," 웨일":79," 유산":69," 웨이":59," 유사":324," 장관":274," 이다":3516,"칭은 ":476," 위상":193," 장교":115," 음력":494," 장군":295,"칭을 ":179,"쪽에":725," 위성":197," 원인":186," 읽는":58," 원작":227," 원자":518," 이동":421," 음료":60," 움직":247," 일대":282," 장기":194," 유비":58," 인도":1080," 원제":62," 원정":83," 요코":84," 자녀":61," 원조":65," 유물":109," 우주":613," 이념":113," 요청":100," 자극":96," 자금":128," 웹사":127," 우즈":92," 으로":1449," 자기":443," 웨스":246,"지이며":179," 울주":85,"화이다":369," 월에":788,"화재가":70," 이는":558," 육지":73,"시되었":188," 전기":671," 재료":157," 유지":523," 전까":100,"회사가":85," 장르":269," 자발":83," 의식":186," 자바":143," 잘못":97,"지원을":145," 전국":513," 적극":94,"시되어":63," 이산":64," 이사":315," 이상":1002," 익산":65," 정교":161," 위치":5975," 제거":141," 자본":218," 장면":76,"회와 ":238," 음악":1252," 전개":206," 유입":69," 자매":61," 의사":412," 유인":76," 유일":468,"직으로":106," 임무":131," 일반":1861," 자리":455," 이복":68,"신도시":92," 자를":83," 자문":64," 입법":102," 일부":2106," 전공":90,"직이는":74," 장로":61," 운행":339," 운항":58,"커다란":64," 음식":283,"직이다":99," 유전":388," 유적":194," 일본":4755," 일명":82," 유엔":66," 인물":662," 인문":59," 유역":93," 자란":59," 자라":124," 인명":72," 자랑":66,"템 ":507," 음성":147,"기후 ":77," 임명":212," 인민":184," 유용":63," 이번":134," 이베":69," 이벤":57,"시드니":63," 이민":78," 우편":108," 이미":324," 이바":343," 워크":74," 자료":390," 이름":3638," 이른":205," 이르":661," 이를":700," 일로":82," 위의":84,"텐 ":76," 재단":468," 이뤄":69," 일렉":96," 이룬":137," 이루":1409," 위원":421,"테 ":299," 일련":190," 원칙":300," 이며":915,"지이다":426," 일리":90," 위임":72," 인류":278," 요한":197," 입력":181,"카와 ":236,"텔 ":308,"우가":342,"앙행":163,"용과":94,"암호":151,"올바":113," 수술":59," 수신":86," 순수":111,"仕 ":99,"에의":112,"창 ":189,"예상":58,"애플":275,"예산":117,"에이":482,"염색":75,"업이":475,"업인":321,"기의 ":1762,"업자":222,"없이":431,"여신":129,"었으":2299,"었을":161,"업으":181,"업을":673,"업은":134,"연수":157,"없으":154,"업의":671,"영사":61,"업적":191,"오버":82,"영상":486,"에우":80,"리아는":57,"어지":702,"역시":1578,"현존하":104," 수소":95,"어진":2055," 수송":124,"압하":71," 순서":114,"찬 ":123,"언제":60,"요구":295,"양체":81,"연세":57," 승격":131,"알프":91,"열사":59,"예비":145,"와라":117,"어족":218,"역소":73,"양천":58,"안하":96,"올림":844,"얼음":64,"착 ":90,"안한":115,"어졌":507,"올리":188,"올린":122,"어져":637,"찰 ":199,"에어":186,"연쇄":70," 수상":426," 수산":175," 수사":159,"여수":112,"에야":80,"단법인":2067,"연속":265,"연소":68,"기상청":109,"업용":63,"요건":96,"역삼":95,"역상":157,"어인":161,"어이":939,"아프":916,"어있":255,"글이다":95,"역사":1691,"어의":702,"오리":350,"얻은":80,"차 ":2442,"업연":100,"여성":713,"연산":224,"人 ":246,"없었":109,"여섯":133,"역설":57,"업에":293,"없어":112," 승객":70,"언이":107,"악하":95,"여서":479,"언을":82,"약칭":420,"아하":63,"역의":628," 수영":134,"오사":185,"역임":393,"역인":112,"여자":435,"역이":801,"스타 ":243,"연을":133," 싱가":97,"화에 ":525,"약하":110,"약한":190,"언츠":61,"연의":96,"오세":131," 수요":109,"연이":74,"업체":378,"업청":73,"연장":227," 수용":208,"월간":108,"영양":129,"연재":221,"약했":96,"영어":955,"열을":131,"스탄 ":326,"원격":59," 수원":209," 좀 ":59,"여졌":92,"열의":161,"영업":166,"원경":141,"용노":84,"영에":132," 손해":107,"연예":123," 소형":113," 존 ":356,"열에":85,"엔진":374,"약품":127," 심각":57," 수업":58,"역으":485,"여의":138,"역은":412,"역을":746," 수여":189," 시기":380," 소프":898,"예술":941,"예수":361,"여야":90,"연애":59,"연안":304," 소피":59," 시공":71," 속해":183," 송파":169," 소행":123,"역에":1144,"우기":66," 속하":1012," 시계":86," 속한":669," 신경":196,"여왕":122," 조 ":468,"어촌":137,"우고":127,"예선":249,"오브":295,"책 ":401,"채 ":187,"앙회":62,"용기":122,"왜란":118,"우구":91,"업지":61,"업진":68,"었지":266," 시가":100," 시각":159," 시간":720,"영리":199,"양으":132,"양을":259,"언스":130,"아키":326,"예루":109,"후로 ":162,"양의":505,"아타":75,"아크":78," 제 ":4289,"예로":129,"만이 ":196,"기자 ":326,"어스":174,"오디":190,"액체":118,"于 ":68," 수많":217," 정 ":183," 수립":238,"압축":117,"온도":198," 수를":81," 수리":128," 성향":66,"와나":75,"양에":292,"여명":94,"연맹":487," 점 ":68,"아케":128,"열린":478,"열리":280,"오도":95,"어선":74,"어서":782," 쉽게":163,"亀 ":88," 세포":385,"아침":62,"亂 ":1513,"왕국":706,"온다":87,"아카":282,"약자":201,"많은 ":1265,"약이":262," 수록":335,"약을":246,"약은":85,"어사":104,"야이":202,"약의":117,"왕과":64,"스턴 ":192,"오다":102,"약으":104,"야의":315,"오니":86," 수렴":58,"여를":144," 선형":142,"여름":122," 센트":165,"오늘":448,"오는":554,"기장 ":96,"연료":154,"열렸":185,"스터 ":402," 전 ":1060,"연령":60,"왕가":164,"에밀":59," 손자":147,"에미":96,"어원":140,"亳 ":89,"오류":90,"올로":60,"억원":67," 숭배":60,"오르":366,"오른":100,"지하기":153,"오를":110,"요가":94,"예보":65,"어와":363,"언에":58,"왔다":418,"예방":135,"티드 ":85,"아폴":74," 수비":88,"양쪽":63,"알파":123,"얻어":102,"얻었":76,"올려":58,"어업":225,"어에":445,"만을 ":299,"오로":122,"안티":93,"올라":211,"에스":534,"엑스":201,"어오":76,"어온":57,"언어":1505,"올랐":113,"영미":83,"지하고":206,"오랫":76,"완도":69,"아티":246,"온라":299,"염병":62,"아파":201,"어야":172,"양지":59,"기인 ":130,"외곽":60,"양주":113,"외국":418,"오래":309,"아트":145,"리아를":70,"오랜":130,"외교":422,"오라":68," 수반":64,"亞 ":1949,"여부":101,"외계":90,"에서":29088,"안토":110,"영문":119,"에선":73,"아테":131,"아토":68,"양재":76,"역번":77,"연방":786,"언십":167,"양자":317,"만의 ":176,"양이":303,"예를":245,"와는":313,"어리":167,"어린":285,"어를":713,"모가 ":127,"오가":148,"어류":77," 소수":212," 셋째":71,"악원":61,"양부":117,"협의회":158,"야생":75,"안을":183,"안으":82,"아자":83,"악이":91,"아있":148," 소스":183,"아일":569,"아인":232,"악의":207,"역대":111,"아이":1321,"어링":63,"어릴":143,"악을":150,"아의":1952," 서태":96,"약성":129,"아제":124,"않은":399,"않으":216,"오고":97,"않을":60,"안이":117,"연대":266,"안의":235,"어머":290,"앙아":113," 수단":217," 선택":204,"연도":64,"역되":83,"안정":303,"안전":674,"여되":60,"에라":85," 수는":94,"앙에":77,"얼마":78,"열대":208,"어라":65,"권한을":66,"액션":116,"였기":64,"때의 ":171,"실시되":64,"실시된":74,"어려":133,"아어":558,"스크 ":410," 선출":244,"아야":80,"에디":101,"않았":277,"안양":115," 소셜":77,"아와":401,"알아":118,"아오":102,"예금":73,"어렵":70,"에드":190,"아에":642," 소설":770,"않아":117," 생활":496,"어로":1502,"악에":82,"佇 ":72,"화와 ":320," 설치":696,"아웃":101,"아울":62,"아우":283,"안에":740," 소송":210,"언론":352," 소속":1527,"영된":120,"영되":313,"애자":58,"업무":727,"액을":97," 속에":242,"애인":203,"글자를":64,"양수":197,"기술과":58,"엄밀":70," 선포":61,"업발":66,"오나":76,"엘리":216," 소유":309,"완공":68,"여래":59,"영등":168,"디자이":77,"디자인":463,"양시":233,"양식":325,"앤젤":102," 소장":152," 속이":74," 센터":199,"기술개":69,"여러":1623," 속의":125,"약에":115," 서해":142,"카에 ":110,"야에":433," 소재":587,"연되":60,"옮겨":85,"아주":160,"앞에":101,"열도":82,"알제":77,"오기":78,"에로":64,"였는":131,"엘라":90,"야스":153,"아즈":110," 수도":1216,"았으":301,"였다":6029,"야시":57,"아지":116,"아직":137,"양사":193,"양산":105," 속씨":88,"였던":503," 소아":74,"어버":79,"양상":57,"다시 ":463,"니와 ":61,"않지":68,"에르":318,"시즌 ":224,"에른":140,"에리":86,"앙정":74,"양성":389,"에만":120,"영동":84,"옛날":72," 산하":461,"씨식":95," 자 ":216," 소멸":81," 서적":93,"막을 ":84,"야로":143," 사항":165," 선의":80," 섬에":254," 사하":60," 상트":68,"아사":140," 섬이":161,"아산":86,"스키 ":291,"야마":570," 섬의":117,"기술로":69," 산화":93," 상표":91,"대륙의":64," 잘 ":546," 성우":86," 생태":163," 세에":95," 사후":156," 섬을":79," 섬으":146,"여개":65,"여객":267," 선종":67," 사회":2075,"야를":102," 섬유":63,"여가":61," 설정":173," 살해":171,"어는":597," 선조":133,"리아가":58,"단백질":180,"어느":238," 선전":63," 사형":76," 선정":248," 쇼군":110,"알베":57," 선양":102,"쓰인":360,"쓰이":716," 선언":226,"아부":73,"에게":2384,"안보":81,"양도":65," 상태":747," 일 ":16662,"알바":146,"르토 ":64,"양동":61," 서양":207,"었고":896,"애를":72," 소련":332,"막의 ":105," 석유":97," 소리":267," 소말":61,"어내":101,"아비":128,"어낸":69," 서유":66,"아브":61,"어나":672,"어날":73,"어난":835,"었기":138," 솔로":142,"어났":456,"업기":117," 서울":3544,"머리 ":75,"안시":66,"었던":889,"어떻":123,"투에서":133,"였고":684," 성직":123," 성질":259,"에는":3705,"었다":9042,"연금":63,"아시":1288,"금융기":75,"연극":189," 성주":57,"어떤":681,"어떠":156," 세종":300," 소비":642," 세조":70,"암석":66,"연기":258,"염기":67,"마을 ":150," 서초":356,"영국":2099,"에도":1265,"씨의":120,"영과":63,"에다":73,"어도":167," 세우":109," 세운":200,"언덕":81," 성인":182," 세워":312,"양력":120," 성장":260,"역과":296,"연계":123," 상하":185," 선진":141,"연결":758," 세웠":103,"연고":218,"안성":60," 상품":255,"얻는":95," 소방":94,"여겨":282," 세와":139,"앨범":671,"아서":237," 성은":94,"얀마":118,"안산":110," 성의":77,"없는":620,"여금":57,"에노":70,"어드":104,"어들":198," 상호":668,"여기":352,"업단":115,"기적 ":104,"었는":201,"없다":384," 샌프":82,"암살":118,"아스":346,"어디":66," 상황":261," 수가":104,"마의 ":282,"연과":73," 성전":89,"연관":142," 성적":226,"에너":561," 장 ":243," 세의":435," 서쪽":905,"대륙에":112," 세이":184," 세인":106,"연구":3821," 사찰":82,"았던":129," 은 ":18128,"암동":115," 을 ":2159,"애니":672," 소녀":88," 상징":242,"양계":70," 소년":140,"양경":128,"양과":174,"양군":100,"양국":72,"양구":62,"비아의":150,"아들":1116,"야기":519," 서북":81," 서부":519,"아드":146,"때에 ":170," 상주":72,"아디":57,"았다":1000," 서브":58,"안드":183,"길이 ":175," 서비":842,"금으로":71," 서사":93,"실제 ":183,"俘 ":76," 생존":104,"아레":72," 석사":65," 사카":84," 세부":97,"아로":119,"알라":103,"앤드":86," 속도":241," 의 ":5102,"치인 ":144,"기술서":144," 성분":85," 읍 ":79," 성북":85,"디오에":59,"아라":308,"아랍":275," 소니":105,"아래":399,"압력":96," 서식":301,"리에서":329,"승용차":65," 선수":1292,"아마":286,"아목":89,"알리":180,"아몬":59," 사태":68,"아메":712,"알려":1678,"알레":75,"알렉":317,"아리":224," 소득":100,"아름":151,"어가":540,"어간":77," 서술":85,"아르":723,"아를":253,"직할시":78,"아바":91,"언급":141," 이 ":6364,"앙리":61,"쓰였":116,"얼굴":69,"아미":126,"지하였":117,"업과":191,"업관":61,"없고":95,"아버":569," 인 ":395," 생체":57,"업계":106," 센서":58,"양대":68,"지》":67,"르트 ":344,"어권":118,"니어 ":84,"아무":106,"아문":73," 성서":95,"기술부":292,"길을 ":64," 산타":90,"업가":98,"쓰여":83,"형적인":66," 세상":123," 위 ":236,"악기":342," 상수":101,"안구":75,"안군":96," 상속":67,"알고":264,"아기":73,"시스템":1332,"심이 ":111," 섬들":62,"안과":128,"아군":73,"니오 ":65," 성동":57," 세대":321,"실시간":101,"않고":536,"악구":59,"아과":121," 산스":84," 삼성":325," 성당":140," 세는":67," 웹 ":345,"악과":60," 사용":5046," 생산":1061," 사운":155," 사우":191,"기술적":76," 사원":114,"아날":73,"아나":434,"아내":271," 산업":792," 살아":158," 살았":134,"시스코":114,"았고":97," 사업":732," 산악":85,"지함을":85,"슈퍼 ":89," 서로":499,"진행된":77,"진행되":197," 상승":71,"않기":58,"심의 ":122," 소규":60," 생식":58,"아는":197,"기술연":67," 삶의":108," 삶을":67," 살인":124," 상에":106," 상업":212,"아누":93," 설립":4416," 서명":79," 사정":82," 사제":64," 소관":1915," 사적":136," 사전":183,"기술을":263," 산이":64,"기술의":232,"아노":202," 사장":102," 사자":61," 사이":2278," 소개":178,"기술이":179,"기술인":74," 사유":70," 생성":251,"기술자":91,"애국":68,"급으로":81," 손꼽":71,"안되":84,"안된":112,"기존 ":131," 선보":66,"즈》":67,"안동":130," 성모":102," 색을":60,"侶 ":207," 세르":235,"야구":985," 서버":233,"아돌":57," 상임":95," 선발":106,"심을 ":205," 선박":128,"아동":169,"앵글":61,"안데":62,"애나":90," 산지":91," 상의":104,"않다":149," 상을":58," 성립":280," 상이":114," 상인":70," 상위":92,"않는":850," 삽입":61," 사진":217," 상으":64," 성리":78,"지하철":230," 상원":72,"기술에":58,"악단":271," 세로":127," 소금":58," 세력":190,"아닌":450,"아니":1250," 설명":374," 상용":62,"약간":124,"아다":74,"말은 ":132," 사무":2313," 사물":92," 설계":434," 선교":139," 선구":63,"지한다":100," 서기":255," 새로":744," 새롭":78," 상륙":76," 상류":70,"리아어":250,"리아에":188," 섬과":100,"목과 ":70," 사법":159," 성격":204,"말을 ":157,"스주의":80," 성경":194," 세가":201,"통틀어":103,"트리 ":144," 성공":441," 성과":102," 세계":3678,"스코 ":228,"치적 ":191," 서남":105," 원 ":160," 센고":139,"지하는":209,"쓰는":237,"쓰다":206,"니아 ":1248," 월 ":22165,"많이 ":589," 세균":89,"쓴다":98," 세기":1685," 생리":73," 생명":334,"쪽 ":908," 샤를":130," 서대":93," 성남":123," 사상":928,"르체고":106,"캄보디":75," 생물":516," 산소":61," 선도":76," 산성":63,"집트의":91,"악가":125,"실이 ":60,"비영리":167,"아가":452," 세네":79,"화의 ":697," 사실":480," 성능":96," 사도":134," 외 ":86,"슈팅 ":70,"진하고":72," 생기":166," 생긴":156," 살던":66,"倉 ":985,"토해양":108," 상당":502," 상대":501,"캘리포":248,"말이 ":72,"회사 ":324,"지했다":169,"뉴질랜":239,"측정하":101,"메라 ":83," 사람":2374," 사랑":216," 사라":147,"쓰고":114,"화재 ":280," 사령":113," 사례":65,"기준 ":2089," 사망":418,"기술하":75," 용 ":77,"리아와":111," 사립":104," 사마":97," 사막":116," 사르":73,"확인 ":117," 선고":68," 서구":220,"리아의":735,"투아니":93," 우 ":320," 선거":866,"쓰기":124," 산맥":264," 산림":264,"말의 ":116,"적이":892,"저작":265,"저자":156,"적인":4818,"亂並":388,"전위":69,"亂丘":135,"전으":231,"전은":327,"亂三":545,"저장":344,"적자":77,"전의":505,"정안":94,"亂丁":320,"점에":478,"전을":821,"적재":58,"자하":69,"전인":95,"전이":516,"져서":63,"작하":761,"작한":580,"작품":1198,"조를":342,"조르":59,"조리":96,"전용":189,"적은":2124,"적으":6892,"칭 ":682,"전원":97,"적의":174," 쓰기":94,"적을":427,"임했":179,"저우":183,"당시 ":1027,"침 ":97,"따왔다":64,"절에":116,"적용":431,"환의 ":69,"조류":112," 쓰고":102,"리우스":221,"전에":1678,"제시":329,"일환":97,"임한":108,"임하":316,"전역":201,"입한":122,"입학":90,"입하":329,"인형":87,"조로":212,"적에":131,"적어":75,"전압":72,"이후":1816,"잔티":172,"並國":285,"칠 ":70,"정식":676,"정신":481,"이화":58,"일한":420,"정시":83,"일하":307,"친 ":511,"정수":173,"乙倉":109,"이해":346,"인프":61,"이행":148,"인하":333,"칙 ":237,"인한":241,"인해":494,"치 ":2632,"신인 ":126,"이프":354,"제사":194,"이한":134,"이하":446,"三大":142,"신이 ":258,"접속":115,"이풀":105,"정성":106,"정서":149,"장치":620,"전신":167,"환을 ":105,"전시":500,"乙乙":117,"정상":286,"乙之":271,"之倉":297,"일파":102,"자키":58,"종대":66,"乙亞":103,"전승":143,"정사":185,"점성":174,"乙亂":105,"자크":65,"乘三":61,"이퍼":110,"乙並":232,"재청":67,"전술":58,"족들":119,"乙丁":307,"시절 ":80,"저스":73,"乙丘":178,"乙三":410,"三國":301,"인트":215,"전소":133,"이팅":121,"저수":94,"이파":70,"전송":216,"저술":150,"의회":639,"이티":216,"之亂":405,"명단 ":58,"之亞":451,"이틀":169,"이트":1360,"전세":108,"신장 ":61,"조된":63,"전성":138,"조되":83,"전설":222,"전선":133,"之丁":1040,"之三":2090,"之丈":89,"망을 ":100,"之両":59,"之並":1657,"之丘":553,"之丙":209,"之且":109,"장착":83,"전산":136,"之丹":143,"전사":228,"저서":111,"의해":2089,"之之":1269,"의한":468,"의학":321,"의하":736,"之乙":257,"의할":59,"제부":107,"자치":1144,"정비":150,"확장 ":96,"인텔":213,"이토":133,"인터":1169,"층 ":466,"丹倉":60,"조달":91,"응하":231,"조는":153,"이터":1189,"정부":1875,"정벌":59,"정법":122,"자체":347,"丹乙":84,"정변":61,"재즈":85,"丹之":209,"丹並":154,"제반":61,"정보":2086,"이크":941,"이클":251,"정복":111,"재직":118,"재지":325,"쟁으":65,"쟁은":76,"僅 ":60,"쟁을":248,"쟁의":208,"이타":167,"이탈":1138,"쟁이":241,"丹亞":73,"제법":87,"은하":190,"측 ":145,"제목":268,"이케":58,"츠 ":974,"리이다":317,"잡지":250,"화제 ":72,"조나":82,"쟁에":206,"일치":88,"은행":538,"이코":123,"리잡고":88,"丹三":207,"丹丘":102,"丹丁":217,"입출":70,"정받":96,"비스를":298,"並倉":230,"일컬":82,"일컫":636,"재의":598,"並人":80,"장점":74,"中三":72,"종과":75,"실을 ":195,"제리":117,"종교":1302,"장조":63,"이치":249,"재자":79,"並亂":444,"전보":64,"傭 ":180,"재이":59,"재임":98,"並亞":687,"재일":59,"적분":127,"정무":57,"이카":88,"제명":84,"재정":215,"이커":104,"전북":82,"전부":292,"中並":57,"中之":85,"장을":941,"인체":87,"장은":773,"장으":586,"정리":695,"両之":103,"인천":533,"졌다":865,"장인":245,"丟之":69,"両三":99,"장이":1191,"젤레":109,"장의":447,"정립":97,"両並":87,"並丹":89,"일체":116,"並並":1269,"並乙":240,"전반":248,"丘倉":118,"스케 ":67,"실은 ":1893,"並之":1226,"장자":107,"並丁":729,"제를":766,"並丘":363,"융합":89,"재위":678,"제르":165,"並両":95,"並三":1954,"並丈":68,"종결":57,"三倉":449,"丘之":421,"丘丘":278,"자주":224,"丙丁":90,"丘並":322,"丙三":80,"화적 ":138,"丘亂":138,"僅丁 ":58,"丘乙":185,"丙之":212,"있지":448,"재연":67,"제로":721,"조기":91,"丘亞":147,"실의 ":102,"잡이":62,"유효":82,"丟並":76,"조금":122,"이천":64,"재에":108,"丟丁":65,"전문":1050,"유해":57,"장애":383,"장악":78,"장안":65,"丁倉":187,"자전":175,"자적":159,"유행":121,"자재":65,"작자":143,"자자":71,"작이":256,"조건":316,"유하":391,"유학":156,"유한":279,"점령":156,"장에":759,"임진":131,"유형":215,"전면":62,"제라":64,"작전":209,"丘三":684,"丘丁":429,"임즈":102,"저명":67,"정렬":62,"은퇴":57,"三侶":71,"족과":104,"丈三":117,"잎은":109,"三丈":107,"丈並":101,"三万":69,"三丁":1471,"자원":529,"三三":4055,"三丘":745,"인지":345,"三丟":89,"三丙":67,"三並":2154,"三両":110,"인증":178,"丈之":100,"三丹":134,"작용":550,"三之":2117,"三乘":139,"三乙":351,"자인":870,"자이":2068,"작의":72,"三亂":531,"작으":448,"자의":1247,"작은":702,"잡아":77,"작을":166,"三亞":989,"전류":94,"조개":72,"자음":68,"三人":96,"자율":111,"작위":138,"조각":242,"조가":162,"자유":977,"자였":253,"임이":597,"자연":787,"리위원":62,"자열":60,"丁丘":425,"丁且":87,"임의":297,"작업":333,"입은":77,"자역":86,"입을":113,"작에":70,"丁丈":69,"임을":405,"자에":659,"丁丁":864,"제들":91,"丁三":1278,"전략":331,"丁丹":114,"임은":109,"임으":241,"자어":57,"丁並":785,"일제":561,"이중":126,"丁乙":272,"일정":485,"임위":64,"인종":158,"丁之":1054,"이주":163,"으키":165,"으킨":137,"이징":189,"丁亞":282,"이집":416,"이진":103,"이지":1231,"입장":148,"있음":88,"丁亂":313,"丁人":80,"전례":156,"있을":258,"있으":3281,"자와":521,"입자":336,"전력":195,"丈丁":89,"잭슨":68,"이즈":350,"임자":75,"입이":70,"일종":722,"정되":1028,"이잔":109,"정된":594,"식을 ":714,"재생":142,"이재":61,"이익":326,"이이":176,"재산":389,"이인":77,"장소":328,"황에 ":67,"유통":346,"일요":87,"이자":2950,"인의":1113,"위해":3093,"인이":4001,"인으":806,"인을":537,"임에":183,"인은":257,"일원":193,"위하":1705,"위한":2059,"식은 ":203,"장성":72,"이유":337,"인용":176,"졌고":82,"익을":208,"제대":96,"정동":479,"이의":934,"잠수":99,"이은":66,"제단":60,"정도":1147,"인원":74,"이종":60,"일자":116,"인접":487,"인정":511,"인적":116,"위협":78,"인조":157,"전라":669,"장시":86,"장식":115,"임워":63,"취 ":74,"잠시":62,"기지 ":76,"인재":122,"이제":109,"제도":1306,"있어":778,"이정":63,"인자":89,"이점":63,"장수":156,"이전":987,"통합하":160,"이저":329,"인인":120,"이족":65,"이조":77,"제되":75,"일인":105,"일이":470,"일의":940,"위험":212,"일을":556,"일은":174,"일으":424,"있었":1211,"자수":84,"이야":497,"제네":71,"전되":64,"절도":60,"이어":1308,"이언":277,"이었":2244,"이에":1970,"익에":79,"신설되":74,"의존":91,"전도":170,"이아":356,"전동":176,"이안":76,"캐릭터":185,"이외":210,"정당":438,"유키":92,"이용":1588,"이우":80,"일어":991,"장사":59,"시인 ":178,"일에":2143,"이웃":70,"제는":261,"이였":67,"인어":137,"이올":125,"이온":234,"이오":499,"식의 ":484,"인에":349,"이완":207,"자신":773,"유클":61,"이와":378,"자식":84,"의지":73,"일신":97,"일시":120,"일스":114,"잠비":104,"파는 ":76,"인시":91,"인식":243,"유출":66,"의원":853,"활을 ":172,"위키":154,"인스":118,"위탁":115,"재배":144,"인슈":60,"응을":89,"제나":96,"절대":92,"적도":104,"임시":210,"작센":85,"의제":61,"작성":335,"의정":233,"의적":274,"자세":81,"자성":62,"임스":290,"전당":60,"전대":71,"전달":281,"의장":161,"자생":114,"음주":74,"의자":305,"유치":96,"작사":221,"장비":214,"의인":546,"자산":166,"자살":104,"자사":95,"의의":352,"일상":129,"시작 ":73,"제국":1585,"일생":67,"시설이":123,"원한":148," 집 ":170,"원하":678,"쟁력":155,"식이 ":173,"이소":58," 신흥":72,"비스이":77,"정기":592,"제교":76,"을이":57,"이세":85,"제고":109,"이센":174,"인상":83,"제곱":73,"제공":1150,"정규":416,"이션":1407,"음에":319,"접근":281,"정권":313,"점기":433,"이선":105,"시설을":69,"인사":262,"兌 ":199,"이성":133,"익스":143,"활의 ":71,"입사":58,"이승":80,"원후":60,"이식":83,"이시":344,"의와":146,"이슈":60,"제기":237,"유체":83,"음의":177,"원활":70,"원회":1095,"임상":62,"이슬":422,"이스":2082,"仕三":62,"육청":92,"음이":197,"인수":210,"원형":177,"인쇄":185,"음으":564,"음은":101,"의에":273,"음을":324,"응용":379,"저널":109,"음원":65,"재무":115,"일성":102,"이수":87,"전남":92,"의약":116,"이순":57,"재로":240,"저기":73,"대백과":604,"잘못":97,"적극":95,"출 ":296,"치의 ":225,"점검":69,"장르":283," 시행":352,"육지":116,"재료":225,"유지":641,"시장 ":205,"전기":888," 신학":305," 시험":291," 지 ":95,"의식":267,"자바":152,"자발":97," 시호":361," 진 ":93,"점과":85,"人三":93,"전까":209,"을에":58,"자베":74,"장면":96,"자본":288,"제가":451,"제강":74,"음악":1495," 질 ":78," 신호":310,"정계":61,"정경":76," 실행":293,"칙을 ":131,"재를":220,"이사":565,"이산":85,"정과":206,"스카 ":184,"정관":93,"충 ":115," 신화":415," 실험":263,"정구":335," 실현":162,"이상":1108,"위치":6047,"제거":152,"익산":65,"정교":193,"일반":1994,"자리":1002,"익보":67,"이보":83,"이복":70,"유의":257,"임무":144,"일방":62,"유인":88,"之大":89,"유일":471,"입문":63,"자매":75,"의사":510,"육의":120,"마스터":68,"육을":218,"인보":77,"유자":93,"의상":66,"유입":79,"까운 ":156,"전개":214,"이부":78,"육자":126,"의성":63,"일보":361,"일본":4937,"유적":246," 실패":143,"유전":485,"전거":106,"추 ":144,"이븐":59,"율을":155,"이브":495,"음식":352,"축 ":318,"이블":337,"운항":73,"율이":70,"亞倉":61,"이비":217,"장로":122,"운행":351," 식품":226,"전과":411,"전공":113,"일부":2152,"전광":273,"우호":59,"춘 ":196,"신은 ":98,"자문":126,"입법":132," 심판":95,"작물":136,"율적":242,"유주":119,"전국":582,"위주":70,"심에 ":61,"신을 ":369,"之國":69,"之圓":61,"이묘":288,"자랑":69,"인명":89,"유역":120,"이므":100,"자로":1208,"인물":819,"인문":93,"유에":59,"유엔":67,"자력":167,"일명":83,"음성":162,"亞丁":182,"亞三":599,"음서":80,"亞丘":99,"자루":62,"워크":714,"이바":376,"유와":57,"亞並":511,"자료":546,"울특":2233,"이미":360,"亞之":492,"이민":84,"우편":143,"자를":1003,"용해":337,"자르":81,"용했":139,"육원":57,"용하":2610,"亞亂":216,"용할":380,"용한":877,"이베":186,"亞亞":176,"이벤":65," 시티":84,"유용":65,"임명":214,"인민":1273,"이버":362,"이번":163,"위원":1545,"亂乙":88,"의병":59,"일로":231,"재는":563,"亂之":408,"이뤄":69,"일렉":99,"亂亞":162,"亂亂":108,"일련":190,"장동":67,"이마":124,"위의":342,"신의 ":935,"위이":229,"이맥":82,"위인":79,"인류":317,"이름":3683,"이를":1128,"이른":205,"이르":700,"재단":793,"이리":104,"입력":182,"요하":229,"이머":65,"요한":1121,"이먼":60,"이메":59,"디지털":568,"이며":7197,"위임":83,"위자":61,"장된":112,"장되":154,"인리":94,"인먼":269,"비스타":71,"위조":60,"자라":247,"재되":117,"자란":69,"원칙":340,"은색":62,"이면":165,"이명":60,"亂倉":143,"일리":688," 신체":155,"외하":104,"외한":225," 스포":456," 신청":73,"매사추":92,"육상":138," 시초":66,"장남":121,"우치":86," 스펙":67," 스페":757,"웨일":116,"장난":59,"유산":310,"유사":363,"웨이":486,"자대":65," 시청":126,"쟁과":63," 丘 ":87,"있도":442,"자니":97,"입된":112,"입되":233,"마쓰다":198,"의를":445," 스파":150,"자는":1092,"일드":67," 스티":145,"원지":101,"있던":494," 스트":337,"인디":287," 스튜":250,"인들":571,"워진":166,"의료":334,"요크":80,"이때":111,"있다":15164,"원주":176,"위스":240,"재까":179,"이디":118,"있는":7991,"우체":68,"의로":182,"으므":160," 즉 ":662,"메리 ":61,"유신":98,"이루":1453,"일러":233,"이룬":139,"우크":207,"위와":98,"의미":1879,"일랜":486,"인력":158,"자들":1135,"위에":756,"이론":1040,"이로":415,"이렇":64," 스프":81," 시카":80," 스피":206," 스핀":57,"의무":269,"의문":59,"이런":131,"이러":633,"통하여":478,"이라":4666,"이란":1468,"이래":223,"작되":526,"작된":385," 並 ":184,"음반":546,"자동":1079,"최 ":64," 실천":136,"작동":101,"유성":172,"육성":326,"장경":65," 신조":74,"이는":2179," 슈팅":70,"장관":476,"이다":44715,"재개":63,"장과":268," 실제":432,"이니":195,"재가":173," 시조":152,"원예":81,"우지":75,"원에":608," 싱어":64,"장갑":60,"월에":803,"장거":57," 스코":288,"잡고":156,"울주":88,"으로":39730,"자기":738,"웨스":286," 신인":71,"움이":79," 신이":79,"움을":181,"이노":91,"요청":103," 신의":77," 신장":97,"비슷한":239,"유발":57," 스케":164,"비슷하":194," 시절":186,"우즈":100,"유민":79,"웹사":130," 신자":91," 시점":66,"이네":70," 시인":309," 시이":157,"유무":68,"이너":235,"통해서":112,"유물":138," 시의":190,"작권":131," 실용":67," 시장":413,"자극":108," 스카":114,"자금":194,"시상식":63,"우주":734," 시작":1908,"이념":153," 스톡":57," 스토":195,"투이다":152," 실질":132,"자녀":65,"인되":81,"원조":101,"인된":61,"웨어":1107,"이듬":68,"이들":778,"잇는":270,"이드":728,"움직":247,"워졌":78,"음료":81,"일대":304,"깊은 ":79,"자나":141,"왕후":204," 三 ":313,"원전":1057,"유비":60,"장기":260,"원정":112,"요코":91,"인도":1254,"원제":94," 스테":207,"원의":775,"용차":86,"원인":277,"위성":291," 스키":157,"원이":1024,"으면":282," 스타":592,"으며":8023,"읽는":62,"원자":600,"원작":230," 스탠":103,"원장":376,"이도":263," 싸움":60,"이동":495,"인데":229,"울지":66," 심의":80," 슈퍼":229," 丁 ":138," 신주":59,"위상":207,"장교":142,"재건":66," 시즌":547,"음력":497," 심장":68,"위생":71,"이던":150,"이더":241," 십자":179,"원으":778,"인다":456," 스크":137,"장군":389,"원을":809,"원은":649,"이데":100,"육부":79,"형태 ":63,"일군":61,"원수":199,"유롭":69,"원숭":77,"三爲":60,"우에":529,"오후":131,"이기":1185,"유로":412,"이그":62,"일과":149,"인권":206,"용이":398,"용인":137,"임과":78,"운영":1427,"용의":233,"워싱":117,"용은":159,"용으":300,"용을":584,"인기":282,"인근":202,"위반":100,"우와":70,"유래":580,"인공":464,"용에":213,"인격":64,"용어":1083,"윌리":271,"초 ":560,"원생":66,"요제":75,"일간":105,"일가":59,"일곱":97,"인구":3663,"유럽":1343,"冲 ":93,"유러":115,"원소":347,"오호":62,"촌 ":246," 실시":523,"인과":324,"이끌":202,"자고":60,"우저":126,"이끄":118,"의도":237,"유명":908,"우정":123,"작곡":859," 썼다":61,"윤리":162," 시와":120,"임금":151,"자격":236,"옹호":75,"임기":171," 시외":158,"의된":97,"대성 ":58,"이내":59,"의되":83," 시위":129,"운전":120,"자국":131,"자군":113," 신용":132," 실업":107,"율리":85,"작과":77,"잉글":396,"울의":68,"이나":3192,"있기":222,"이남":57,"유를":151,"있고":900,"일까":746,"자간":58,"운용":176,"자가":1369,"우유":59,"울에":84,"위법":67,"원시":421,"용자":756,"있게":320,"유류":81,"의는":204,"일기":79,"우이":219,"우익":68,"우자":69," 시에":386," 신앙":144,"입구":92,"입국":115,"치에 ":239,"작가":798,"유리":178," 신약":73,"총 ":953,"우의":129,"월부":255," 스위":277,"우수":283,"운송":99," 시사":57," 중 ":2659," 시상":122," 시설":392," 신사":98,"운수":104,"시의 ":1040,"오페":337,"우스":1158,"리칸 ":115,"위로":336,"의거":140,"용소":61,"우선":90,"영화":1822,"활성화":218,"의견":100,"리카 ":773,"유동":91,"원부":65,"유도":196,"리적으":85,"용수":83,"외전":91,"울산":345," 스웨":362,"리적인":105,"으나":1988,"이가":479,"울시":173," 승용":68,"오픈":264,"요인":96,"욕의":65," 심사":67,"오프":166," 시스":1156,"요일":287,"이거":210,"인가":135,"인간":782,"이것":484,"오피":231,"이게":62,"오하":85,"왼쪽":64,"원사":88," 실수":85,"이고":1474,"원산":135,"이곳":157," 승인":135,"원상":70,"우승":695,"우시":59,"오폴":57," 신성":183," 신설":178," 신속":77,"위를":1049," 시민":365,"오카":257," 식민":417,"원리":261,"여하":917,"여한":139,"여할":326,"여함":202," 스스":212,"요새":77,"왕족":171,"왕조":659," 심리":249," 신문":265,"위대":223," 시뮬":113," 식물":383,"위는":290,"스이다":194,"융기":85,"윈도":643," 식별":121,"오케":99,"쳐 ":1031,"연합":1184,"요소":392,"유나":103,"연하":111,"연한":93,"여행":192,"요성":113,"여했":135,"역할":607,"역학":339,"역한":78,"역하":120," 亞 ":77," 신분":107," 주 ":4010,"매우 ":477,"통합되":76,"요시":396,"오클":62,"리이며":65," 수행":872," 수학":787,"연호":88,"메니아":86,"외에":406,"유네":90,"倉亂":100,"육대":57,"오토":172,"예프":66,"외의":195,"유대":321,"倉亞":80," 순환":84,"영향":799," 줄 ":89,"유닉":93,"용성":58,"유니":298,"의가":147,"온타":106," 신비":72,"倉丁":326,"영한":96,"영하":577,"유는":91,"倉並":271,"倉丘":124,"倉三":482," 수호":59," 준 ":71,"음과":236,"용산":154,"오타":75,"倉乙":106,"오키":160,"倉之":304,"월드":684,"첩 ":70," 식량":97," 신라":319,"우를":179,"우르":146,"첫 ":815,"업협":85,"유가":172,"우리":506,"웠다":116,"원들":183,"오지":91,"맡은 ":78,"울러":57,"업한":67,"업하":195,"대사 ":83,"우루":219," 시라":58," 亂 ":146,"웨덴":355,"유고":84,"육과":405,"육관":1166," 신뢰":95,"체 ":1944,"청 ":1596,"예측":116,"와의":292,"와이":323,"와인":57," 시로":87,"업화":69,"빛의 ":78,"카스 ":79,"에피":155," 시마":63,"회민주":65," 쇼핑":60,"원래":605,"빛을 ":75," 술탄":78,"위나":89,"유권":118,"육군":281," 시리":1297," 시를":88,"완전":346,"칙에 ":86,"유구":70,"영토":332,"왕이":422,"왕의":270,"스위스":188,"원로":84,"왕을":61,"왕으":142,"운반":68,"왕자":127,"리자베":66,"육기":102,"유기":217,"온천":62," 시모":72,"왔으":124,"니의 ":167,"왕위":174,"오염":119,"요로":94,"우디":104,"메르 ":60,"언트":67,"우드":176,"오에":131,"오와":86," 시도":132,"열차":260," 스몰":57,"운드":310," 승려":122,"디즈니":97,"요르":77," 乙 ":95,"위가":175,"왕성":75,"스이며":63,"侶三":70,"외버":131," 신도":90,"오이":168,"오일":61,"엔터":311," 스미":84,"요리":555," 시드":98,"연출":61,"대상 ":99,"오의":95,"위공":239,"오전":108,"왜성":61,"위계":74,"용량":87,"어항":124,"원대":91,"온의":63," 승리":259," 순천":93,"어하":99,"어학":198,"후반 ":84,"외부":238,"원도":356,"에트":351,"위구":57,"우라":154," 수출":183,"척 ":91,"처 ":416," 수치":83,"우려":58,"왕실":105,"에티":108,"원동":67,"천 ":830,"요미":57,"어휘":60," 죄 ":83,"우로":109,"위기":195,"철 ":398,"워드":249,"연적":84,"원고":58,"치와 ":115,"열이":89,"영역":375,"언컵":95,"외래":67,"원과":233," 종 ":214,"화시키":67," 수익":76," 수의":96,"실에 ":74,"원구":83,"원국":135,"여주":182," 수입":181,"옆에":62," 순위":121,"예언":76," 수장":78,"원군":252,"영원":61,"영웅":130,"오스":1498,"연주":311," 수정":210,"옥스":58,"몽골 ":105,"였으":1734,"오시":78," 丹 ":77,"왕복":96,"였을":67," 싱글":369,"여지":119,"여진":185," 시나":74,"영을":122,"영의":146," 시내":231,"영이":82,"영자":87," 숙종":91,"영장":73,"양한":780,"양학":62,"양하":218,"우는":273,"월까":163,"양항":61,"용도":148," 수준":219," 슬라":77,"예인":65," 실내":91,"예의":63,"용된":671,"용될":69,"영조":63,"왕비":137,"용되":1573,"예전":122," 숫자":146,"와서":75," 수집":187," 수직":64," 之 ":340,"운데":1232,"예정":321,"우도":237,"영주":89,"완성":213,"울대":190," 슬로":136," 스리":80,"운동":1815,"외무":92,"어트":94,"대문구":151," 스마":121," 시대":2063,"였지":104,"영지":83,"어파":69,"소시":75," 분쟁":188," 붉은":115,"소스":310," 부정":201," 분자":245,"세청":93," 부인":212,"셋째":71,"소수":273," 부작":91,"수다":87,"수단":483,"세츠":103,"시와 ":384," 브로":98,"티나 ":145," 붙여":234," 브레":111,"선택":248,"름에서":87,"서태":158," 붙어":133,"손실":63,"단의 ":266,"수는":372," 브라":642," 부족":243,"송사":95," 브랜":223,"션으":79," 불안":63,"션을":118,"션은":114,"소사":75," 분야":1053," 부여":249,"쟁 ":829,"선출":248,"썼다 ":65," 비극":78,"생했":81,"생하":591,"생한":515," 북아":293," 북위":59,"성체":67," 부위":68,"설치":714,"수나":99,"소속":1553,"소송":393,"신에 ":117,"명과 ":133,"속성":78,"소셜":82,"션의":93,"소설":947,"생활":791,"션이":221," 분열":80,"생화":64,"따온 ":72," 부착":61,"선포":63,"소위":58,"단은 ":440,"소와":122,"트라 ":199,"속인":60,"속이":290,"소자":103,"소장":374,"손으":98,"속작":76,"소재":625,"석하":208,"서해":154,"석학":150,"손이":173,"소유":337," 부천":92," 부처":87,"속으":235,"소의":301,"속은":58,"진화 ":77,"속을":100,"기에는":229,"소이":246,"속의":608,"소인":65," 비디":406,"센터":804,"수도":1289," 블로":143," 블록":76,"술대":62," 분지":73,"술단":60,"수동":96," 블랙":148," 블라":159,"시오 ":93," 브루":131," 부지":65,"단을 ":267,"속에":468,"수들":238,"소에":318,"소아":88,"속씨":93,"수되":71," 붙인":76,"서트":83," 블루":149," 북쪽":939," 브리":239,"성학":57,"성한":344,"성하":1173,"수를":679,"성했":93,"점 ":566,"컫는 ":311,"접 ":423,"술로":114," 비롯":537,"트남의":86,"수리":211,"성향":92," 비례":62,"수립":264,"정 ":3475,"수많":217,"수면":90,"성화":282,"정·":70,"세한":64," 부터":293,"수목":78,"제 ":8681,"수라":68," 빌딩":89,"송에":109,"저 ":691,"선하":86,"적 ":6488,"손자":185,"디스플":113,"속적":207,"선형":177,"센트":222,"적·":123,"화민국":208,"설하":120,"설한":95,"세트":80,"전 ":5853,"대부터":165,"수론":65,"수록":388,"수로":616,"절 ":333,"수렴":66,"송이":144,"송의":131,"송을":189,"설화":59,"세포":573,"선후":58,"쉽게":176,"형식의":79," 분파":90,"형식으":115," 비밀":142,"시에 ":1035,"다이 ":78,"스과":112,"져 ":1995," 분포":280," 빠르":144," 빠른":99,"수비":112," 부품":80,"술부":294,"숭배":68," 부하":57,"형식이":60,"젠 ":160,"젤 ":96," 부통":85,"디스크":171,"술문":89,"수반":73,"수법":94,"스웨덴":355,"단위 ":102,"식에 ":147,"스가":801," 본질":83,"다음 ":231,"의 ":113266,"기업의":151," 불렸":246,"기업이":229,"속도":891,"선생":91," 법학":135," 불러":104,"서서":57,"선사":107," 복지":133," 불렀":138,"르이며":70,"사카":280,"세부":108,"기업인":182,"석사":85,"행하고":204,"생존":118,"서사":106,"응 ":113," 번호":380,"소니":139,"소닉":71,"소는":346,"성분":130,"읍 ":503,"성북":87,"사추":98,"성부":135,"다의 ":251,"생자":65,"음 ":1500," 별칭":84," 베트":372," 보좌":58,"생이":317," 본적":119," 보존":342," 보조":161,"생의":222,"생을":123,"행하기":192," 복제":75,"사촌":66,"등장한":143,"생으":111,"등장하":542,"치아 ":63," 본인":73,"설비":81," 보전":117,"을 ":57193," 보장":178," 복잡":211,"사천":61,"사찰":89,"화방송":96," 복음":136," 보이":420," 보인":139,"소년":531," 보잉":63,"소녀":133,"상징":246," 보유":223," 벡터":143,"은 ":53620,"생에":69," 복원":82,"소나":91,"서비":984,"상주":112," 베타":95,"서브":58,"리사무":72,"성문":74,"인 ":17705,"서양":389,"성시":131,"이 ":37893,"성수":74,"성술":118,"센서":66,"생체":62,"익 ":246," 북미":97,"송된":69,"송되":126,"세상":147,"세서":262,"산타":97,"성서":193,"사토":62,"호에 ":574,"선스":81,"성사":84,"성산":69,"선시":239,"사태":109," 부문":169,"서스":92," 불리":901,"서식":307," 불린":584," 부모":106,"행하는":526,"기업에":73,"사키":131,"르웨이":214,"서술":88,"사쿠":75,"소들":74,"소드":129,"소득":173," 분리":348,"선수":1423," 부르":974," 부른":608,"상청":115,"소되":63,"속되":249,"속된":176," 분류":697,"설정":185,"섬유":95,"사회":2727," 부상":120," 보통":720,"선종":75,"설적":60,"선조":162," 부산":765,"살해":175,"투자 ":68,"자·":93,"세아":63,"잔 ":168,"상품":332," 북서":591," 부설":61," 본토":83,"섬이":172," 분산":111," 부서":58,"성원":193,"상표":111,"산화":189,"성운":59,"섬의":133,"섬으":166,"생태":212,"성우":110,"세에":183,"사후":160,"섬을":86,"성요":67,"잘 ":553,"소멸":82,"서적":136,"성애":87,"사항":240,"선인":123,"사했":72,"선이":566,"사하":732,"사학":226,"선을":465,"섬에":271,"선은":242,"선의":737,"사할":112,"사한":280,"서장":59,"선으":205,"상트":78,"선전":130,"사형":99,"쇼군":122,"선정":259,"설이":415," 병합":69,"성어":114,"설을":222,"작 ":890,"산한":104,"산학":93,"산하":736,"설의":132,"성에":448,"자 ":9649,"설은":95,"솔로":157," 변형":122,"소를":405,"임 ":1301,"서울":3628,"입 ":284,"선왕":86,"서원":70,"설에":180,"도가 ":497,"서인":123,"석의":89,"서이":286,"석을":172,"소말":61,"석이":169," 변호":184,"서유":74," 변화":441,"잉 ":170," 변환":220,"서의":944,"석으":73,"소리":483,"석유":108,"서열":68,"소로":258,"석에":88,"세스":264,"서예":61,"출하였":98,"선양":152,"소련":350,"출하여":68," 불법":159," 부분":688,"일 ":19527,"상태":852,"인·":87," 북부":662,"서에":300,"기에서":318,"선언":288," 보컬":132,"서와":99,"선에":397," 변하":73," 복합":146,"술교":71,"송법":108,"수기":70," 보한":520,"션에":87,"서초":359," 비공":72," 비교":314," 보험":236,"선총":96," 보호":689,"등장인":96,"소비":693,"세조":81,"수교":103,"후기의":231,"리와 ":419,"수권":213,"세종":317,"성지":76,"성질":285,"성직":128,"술관":133,"술과":123,"장 ":5509,"세이":380,"세의":524,"서쪽":1399,"세자":103,"세인":151,"성종":71," 보편":88,"상호":734,"샌프":83,"상화":104,"상황":295,"술개":70,"재 ":2451,"성주":100,"상회":64,"술가":133,"세와":166,"캐롤라":68,"소방":114,"성으":264,"성은":337," 부속":114,"성을":1294," 분석":366,"성의":584,"술회관":61,"사히":61,"성인":258,"성이":936,"성자":131,"세운":225,"세우":158,"세워":323,"성장":335,"속버":59,"세웠":108,"선진":160,"상한":79,"상학":60,"상하":394,"대부분":788,"상해":61,"기여를":92,"수가":629,"소보":57,"상했":97,"성전":187,"성적":290,"석기":155,"기여하":360,"상류":71,"상륙":81,"새롭":78,"스에서":445," 미합":61,"서기":467,"새로":765,"기여함":193," 미하":64,"기여할":296,"산문":69,"산물":335,"호와 ":151," 시 ":1062,"성가":64,"사바":60,"선교":176,"설계":483,"사물":97,"사무":2780,"울 ":554,"설과":91,"선구":92," 배치":124,"성경":276,"성계":81,"웃 ":62,"성격":206,"성공":519,"시안 ":86,"성과":536,"웅 ":112," 본래":266,"사본":71,"세가":270,"사보":77,"시아 ":1643,"사법":297,"움 ":309," 신 ":151," 바티":91,"섬과":108," 번역":439,"세계":3906,"산부":140,"워 ":326," 벌어":349," 발트":81," 베스":190,"성구":134,"성군":196,"사부":61,"세균":108,"월 ":22349," 발표":766," 번의":314,"달에 ":101,"상법":81,"세기":1777,"서남":114,"성기":125,"센고":142," 보르":102,"카메라":190,"원 ":4524," 복리":66,"현청 ":126,"서는":3206," 법원":151," 보물":126,"상북":422,"상부":76," 본명":394,"근처에":117," 벌이":59," 벌인":98,"생리":85," 볼리":57,"진흥 ":62," 버전":294," 보면":65," 벗어":103," 번이":131,"선대":74,"생물":828,"산사":62,"사선":118,"사서":100," 버지":75," 배포":140," 법인":125," 밝혀":97," 발현":62,"사사":122," 법의":69," 법이":73,"사상":1612," 번주":318,"성남":125,"서대":130," 변수":131,"생명":423," 베어":58," 발행":329," 범위":193,"샤를":135,"산소":74," 범죄":367,"선되":118," 베이":535,"세네":84," 법조":68," 번째":1969," 방해":63,"산성":191," 방향":321,"스앤젤":97,"선도":157," 번지":619," 법정":244," 보병":96,"사소":200," 병사":60,"다운 ":138,"사성":112," 법적":136," 발효":85,"서도":675,"성단":63,"성당":248,"성대":78,"삼성":344,"산스":90,"사슬":60," 부과":61," 불가":312,"성능":133," 본부":184,"사실":538,"사시":168,"설되":275,"설된":124,"사스":150," 범주":66," 밝히":70," 북구":110,"산식":630,"산시":419,"리적 ":251,"웹 ":348,"세는":97,"성도":116," 북극":76," 불과":64," 부근":222,"상속":91," 불교":744,"성동":166,"세대":442,"섬들":63,"성된":741," 불구":96,"성되":956,"상수":192," 분기":121,"위 ":2320,"성들":103,"상스":90,"윈 ":68,"상승":79,"셔널":254,"상식":83,"상시":144,"상실":57,"린이 ":101," 붕괴":168," 보상":64," 복사":102,"산악":97,"사업":1551,"사에":878,"행한다":126,"통합 ":160,"서로":775,"사연":96," 본사":229,"사였":94,"사와":428,"산업":1568,"살아":160,"선로":99,"통해 ":1561,"집합 ":96,"살았":134,"산에":325," 병역":76," 복소":94," 본선":116,"사용":5124,"생산":1237,"때문이":153," 별자":112,"사우":425,"사운":168,"서류":82,"설로":162,"사원":206,"생생":58,"사위":127," 보수":175,"댄스 ":74,"소가":271,"사유":116,"생성":266," 복수":110,"서를":347,"사자":349,"사장":277,"사인":290,"사이":3510,"립에 ":79," 보스":263,"사일":246,"사의":845,"서만":132," 병원":123,"서리":63,"소개":184,"사정":140,"서명":92,"산자":217," 봉사":58,"산재":60,"소관":1922,"사제":90,"산인":85,"산이":328,"사절":60,"사전":1026,"서면":90,"사적":555,"산의":351,"산으":200," 법칙":215,"스어로":96,"산은":127,"산을":359,"윤 ":93,"카르타":63,"삶의":108,"살인":141,"사조":84,"세라":64,"살이":176,"삶을":67,"상업":234,"상에":642,"상어":65,"설립":4441,"때문에":1177,"육 ":627,"유 ":819,"소금":62,"율 ":154,"세력":252,"설명":378,"소규":61," 부대":178,"생식":87,"생시":83," 부담":83,"속기":527,"산주":225,"상위":159,"삽입":62," 부동":119,"상용":93,"기업청":60," 분당":67,"소기":175,"세로":166,"상원":88,"선민":529,"상이":873,"상인":149,"서버":242,"상자":197,"상임":117,"선발":112,"선박":143," 보안":191,"상은":243,"상으":822,"성리":100,"사진":268," 북동":561,"사지":81,"성립":287,"파가 ":58,"상을":1272,"상의":1384,"세를":145,"세리":64,"송공":59,"상적":276,"산지":293,"상장":64,"비의 ":135,"세르":275,"선보":70,"색이":139,"송국":244,"린의 ":67,"서부":1232,"서북":82," 보여":231,"색을":143,"성모":113,"색은":70," 부등":76,"색으":87,"융 ":140,"색의":117,"손꼽":71,"뿐이":69," 밀양":57,"사고":593," 방사":170,"사관":271,"사과":105,"와 ":15664,"옹 ":166," 바스":71,"사건":1923,"사거":86,"사격":73," 바실":65," 미야":70," 베네":192," 미얀":92,"사가":957,"사각":96,"통치하":66," 미쓰":88," 변경":370,"대법원":73," 발사":235,"출하기":61," 발생":1103,"삼고":63,"산기":92,"기이다":399,"대신 ":324," 방식":744," 밑에":65," 뮌헨":76,"사기":229,"수행한":79,"상가":172,"수행하":444,"산구":164," 방송":1132,"살고":132,"단에 ":234,"산권":82,"산군":133,"삼각":154,"기원전":1034,"산광":498,"완 ":175,"산과":197," 밀집":64,"니즘 ":74," 바위":64,"사냥":61,"사나":120,"상급":60," 반영":94,"상국":58,"출하는":157," 밖에":125," 받았":408," 민중":86," 받아":431," 민주":647,"상공":144,"상관":120,"상과":358,"왕 ":1238," 민족":808,"상경":57,"다와 ":69,"삼국":365," 밀접":73,"통한 ":448,"상계":62," 배열":80," 벨라":91," 발족":205," 보기":59,"사대":81," 별도":115,"생과":60,"사단":1910,"사다":97," 보급":405,"통칭하":65," 발전":1652," 법률":682,"생겨":142,"사는":701,"생겼":60,"현으로":89," 백악":117," 본관":550," 변동":57," 방영":369," 밝은":60," 발음":225," 본격":104," 방어":109," 방언":173,"생각":316," 보관":104," 받을":65," 받은":244," 본거":85," 보고":251," 법령":81," 발원":164," 반응":269," 방안":58," 보건":205," 밖의":74,"단어 ":74,"까지 ":3943," 바이":697,"산동":187," 방정":183,"살던":66," 받지":79," 법무":126,"산된":107,"산되":209," 미카":72," 미치":120," 백인":62,"사들":245," 백작":125,"기초 ":68,"상남":484," 방위":104,"행하였":97,"행하여":85,"산당":204,"산대":62,"사도":176," 배우":373,"산더":62,"외 ":526,"사동":76,"사되":110," 베르":258,"생긴":160,"사된":69,"생기":186," 베를":141,"상동":61,"상도":117,"등으로":538,"비전 ":429,"산드":170," 백제":157," 보내":142," 봉기":67,"상당":532,"상대":588,"상담":89,"니스트":163,"삼동":102,"는지를":90," 방지":187,"살라":75," 보도":103,"사례":68,"사령":195,"사로":662,"생대":183,"살렘":109,"사료":78," 보는":142,"사라":234," 미터":113,"사랑":297,"사람":2438,"술하는":60,"요 ":955,"욕 ":325," 미토":58,"리의 ":865," 보다":233,"니스탄":150," 승 ":122,"우 ":2451,"살리":87,"석과":66,"사모":61," 바탕":449,"서관":272,"님의 ":89,"선거":1124,"설가":221,"산맥":321,"산림":320,"운 ":2130,"선과":230,"서구":357,"선고":72,"산면":109,"상록":72,"생들":178," 배출":120,"욱 ":152," 별명":121,"서가":119,"사르":183,"사를":1012,"산리":59," 버스":279,"생되":69,"생된":72,"사망":432," 방출":92," 보드":68,"사리":79,"사마":121,"사막":121,"용 ":1999,"메달 ":69,"사립":113,"혹은 ":1399,"당은 ":139,"실베이":80,"호의 ":145," 온 ":216,"수학자":299,"수학의":99,"집트 ":224,"수학적":106,"째 ":1900,"니이다":60," 옛 ":292,"카를로":81,"당을 ":126," 영 ":87,"마에 ":93,"림을 ":114,"당의 ":340,"기원후":57,"집 ":514," 연 ":60,"짐 ":75,"트를 ":420,"당이 ":130,"림의 ":63,"징 ":234," 열 ":74,"수행을":57,"신흥":75,"립을 ":177,"확보하":72,"짜 ":61," 생각":309,"단순히":106," 생겼":60," 생겨":141," 사는":188,"현재는":456," 사단":1779," 왕 ":308,"출판사":141," 상관":89," 삼국":254," 살고":132,"당사자":199," 삼각":141,"릴적 ":113,"동가 ":129," 와 ":1259," 삼고":59,"현재까":179,"대와 ":262," 사기":64," 사가":107,"리스에":76,"리스어":391,"대왕 ":100,"수학에":150," 사고":438,"리스의":233," 사건":1669,"스하":127,"시칠":59,"시카":195,"스피":342,"스핀":66,"승팀":125,"즉 ":662,"즈 ":1704,"시코":429,"시켜":325,"시켰":170,"실천":166,"길이는":187,"스플":201,"스프":195,"시청":223,"스퍼":73,"스펙":70,"스페":888,"시초":70,"스포":707,"신청":95,"스폰":57,"신체":167,"스투":75,"심지":489,"스튜":256,"스트":3813,"스틴":60,"스틱":113,"스티":592," 양 ":88,"스파":404,"실질":136,"스톡":64,"스토":637,"스테":623,"스템":1334,"스텔":113,"스터":863,"스턴":325,"스톤":109,"시지":134," 약 ":1207,"스크":1207,"현재에":63,"십자":249,"심장":74,"시즘":60,"시즌":581,"시즈":65,"신주":76,"심이":158,"싸움":90,"스탠":109,"스탄":838,"스탈":89,"심적":69,"층의 ":88,"스타":1367,"스키":554,"슈티":64,"슈팅":76,"카리브":78,"시조":180,"실이":117,"신전":61,"신적":108,"신정":80,"스코":803,"슈트":99,"스쿨":82,"심으":693,"심을":206,"심의":204,"슈퍼":245,"싸여":64,"신조":82,"실적":80,"스쿠":71,"실제":435,"스칼":83,"신으":438,"스카":534,"스칸":67,"시장":721,"실용":73,"신의":975,"당선되":96,"식재":59,"스캐":63,"신은":100,"단순한":93,"심에":93,"신을":369,"시인":445,"시이":748,"식의":488,"식을":715,"식은":204,"식인":80,"식이":641,"시작":1913,"시자":111,"현재의":485,"스케":361,"실은":1894,"신장":127,"식적":310,"실의":105,"싱어":70,"실을":195,"신이":763,"신인":167,"시점":75,"신자":135,"시정":103,"시적":140,"시절":216,"름으로":597,"실현":165," 역 ":190," 여 ":231,"실행":302,"신화":440,"실험":314,"신호":329,"질 ":802,"식회":208,"신해":58,"실하":58,"시호":371,"진 ":4958,"시했":60,"시행":387,"등지에":113,"시한":333,"식하":203,"식한":134,"마쓰 ":58,"지 ":12819,"직 ":904,"신한":74,"시험":488,"신학":397,"신하":183,"심판":171,"달성하":68,"시하":616,"시픽":100,"식품":996,"실패":146,"다중 ":70,"름이다":364," 뿐만":102,"싱턴":122," 에 ":3968,"층을 ":58,"시판":68,"시티":207," 뿌리":139,"시트":81,"기장에":74,"치시 ":58,"리스도":198,"증 ":222,"승한":65,"기장을":58,"신탁":73,"슷한":239,"슷하":194,"즘 ":268,"승하":152,"시킬":102,"시키":987,"시킨":292,"시타":80,"담은 ":79," 어 ":61,"기장이":86," 억 ":226,"전》":368,"대에 ":921,"호선의":73,"즌 ":240,"시뮬":120,"신문":591,"심리":312," 빅토":126,"식민":428,"말에 ":88,"시바":74,"시민":455,"시미":57,"스스":259," 비판":173,"시베":62," 비트":267,"식별":124,"수함":92,"수한":297,"수학":984,"수하":240,"수행":909,"실베":102," 비평":80,"시부":72,"수필":57,"순한":104,"수형":93,"신부":105,"신분":132,"주 ":6421,"스앤":101,"신비":78,"머니이":60,"이》":70,"수호":86,"준 ":2588,"술하":154,"술한":115,"수화":58," 비해":209," 비행":271,"승을 ":297,"줄 ":149,"스어":694,"스에":917,"순화":57,"순환":168,"기존의":250,"시라":107,"신라":377,"식량":124,"시로":1329,"승부":68,"실록":97,"실로":87," 쓴 ":317,"신뢰":98,"시를":363,"시리":1561,"시마":348,"시립":76,"술탄":80,"쇼핑":78,"림수산":539,"신망":70,"시모":119,"실리":112,"스속":105,"층에 ":431,"식물":650,"식문":64,"독교 ":326,"승전":168," 알 ":152,"실시":535,"시아":3207,"시안":117,"다수의":139,"트로 ":289,"닿아 ":81,"시온":61,"시오":180,"썼다":75,"시와":405,"신에":201,"신약":75,"식에":220,"비아 ":592,"시어":65,"기적인":70,"시에":1534,"신앙":167,"기적으":76,"신용":161,"식으":607,"시의":1064,"시위":145,"시우":69," 앤 ":149,"실업":129,"실에":130,"시외":223,"슈타":211,"스위":294,"순히":106,"스웨":430,"스오":59,"스와":739,"심부":90,"술회":66,"슨의":72,"직한 ":59," 뻗어":76,"시설":711,"슨이":67,"신사":180,"스의":2139,"중 ":3559,"도권 ":73,"식사":62,"시상":135,"스이":308,"시사":106,"스인":78," 빌헬":87,"신속":78,"지할 ":97,"스주":85,"승에":60,"지한 ":61,"신설":186,"신선":60,"실상":172,"신성":225,"시시":71,"실수":92,"승을":297,"승인":162,"승자":99,"신시":65," 안 ":111,"승용":69,"습을":167,"습이":87,"시스":1777,"심사":156,"丙 ":228,"소하":122,"소한":93,"속하":1183,"시계":147,"丘 ":1912,"속한":740,"소행":127," 비스":147," 비슷":450,"시게":81,"소피":62,"지하 ":71,"족 ":952,"신경":249,"손해":112,"니아와":58,"송파":188,"만에 ":232,"속해":225,"丟 ":282,"식경":89,"시공":102,"丞 ":58,"튜디오":194,"스도":241,"식과":187,"식공":81,"조 ":2368,"丑 ":151,"스는":657,"달의 ":59,"지칭한":98,"지칭하":168,"호주 ":71,"송통":78,"형이다":110,"丕 ":102,"소프":1375,"且 ":206,"니아의":174,"습을 ":166,"시간":1006,"시각":172,"시가":359,"스니":152,"수요":138,"심리학":178,"신기":142,"수와":199,"싱가":102,"丫 ":62,"르크 ":449,"마와 ":59,"송하":166,"술연":115,"좀 ":61,"수원":294,"술에":154,"형으로":179,"수용":253,"신교":225,"両 ":166,"소화":114,"시권":58,"수아":82,"존 ":728,"시국":65,"니아에":65,"신과":128,"신고":75,"소형":129,"수영":176,"수였":72,"並 ":3956,"시기":468,"수여":200,"수엘":64,"수에":273,"수업":92,"리즘 ":90,"심각":57,"달을 ":122,"승격":131," 분화":88,"수사":246,"수산":1050,"수상":476,"소통":90,"트럴 ":117," 북한":208,"스기":80," 빌보":65," 분할":95," 분해":71,"승객":82," 부호":199," 부활":95,"수술":79,"수수":82," 비상":102,"丈 ":353,"순수":115,"스노":76,"三 ":5345,"출판되":65,"시스 ":159,"리지 ":202,"소포":62,"수시":75,"수신":110,"수성":148," 부흥":64,"수소":134,"스나":102,"丁 ":3345,"식민지":360,"万 ":91,"순서":137,"수송":152,"술사":85,"술서":149,"당에 ":75,"리스트":199,"스미":145,"스바":64,"스박":60,"리스티":104,"시드":120,"시된":222,"식되":65,"신도":198,"스몰":61,"승려":122,"乙 ":1163,"슬링":138,"슬림":73,"乘 ":269,"시되":344,"리스토":113,"시도":203," 빛을":61," 빛의":72,"식문화":60,"죄 ":281,"단이 ":202,"비에 ":63,"스부":93,"수치":89,"단일 ":104,"스볼":110,"乩 ":76,"수출":221,"스베":93," 빼앗":59,"진출하":105,"순천":97,"신들":162,"승리":273," 비율":114,"숙종":94,"스럽":73,"스러":103," 비잔":182,"술적":188,"스레":59,"수주":83,"수준":267,"좌 ":100," 비용":109,"시나":175,"술의":455,"싱글":400,"술을":417,"술은":76,"술인":130,"시내":295,"스라":342,"술이":263,"술자":98,"丹 ":433,"수장":102,"술원":71,"리즈 ":556,"수적":127,"싸고":92,"수정":259,"맥스 ":80,"종 ":2005,"수의":777," 비영":167,"수익":107,"수이":764,"순위":140,"수입":226,"수인":135,"수자":76,"스마":263,"시대":2951,"스만":162,"머니는":107,"스리":201,"시다":76," 비즈":134,"시는":274,"슬로":221,"때부터":94,"스를":979," 비주":67,"之 ":4501,"수직":77,"수지":162,"수집":212,"숭이":78,"숫자":148,"슬러":133,"파구 ":139,"슬레":59,"스로":529,"시노":86," 비전":65,"슬람":295," 비정":120,"슬란":124,"슬라":250,"실내":104,"빌보드":64,"그의 ":1128,"즘의 ":66,"수인 ":81,"마크 ":189,"극을 ":92,"는다 ":996,"기사 ":106,"수상하":96,"수상했":86,"술서기":141,"지방에":438,"교향악":94,"지방을":71,"지방으":60,"특수 ":78,"지방자":116,"지방이":59,"지방의":262,"국토해":108,"니고 ":121,"카미 ":64,"힌두교":117,"증을 ":98,"노쿠니":65,"만큼 ":89,"수적 ":58,"싸고 ":90,"파로 ":96,"지식 ":88,"는데 ":1814,"주하고":96,"술은 ":76,"분포하":147,"술을 ":417,"술의 ":445,"지배하":161,"싱글 ":166,"지방행":148,"지방해":72,"메리카":560,"메리칸":121,"근에 ":191,"리튼 ":69,"칠리아":60,"리트 ":176,"특별시":2267,"그와 ":129,"념하기":58,"술이 ":63,"모두 ":601,"특별법":73,"국토관":71,"지방과":88,"지방검":64,"숙종 ":62,"리티 ":77,"특별지":138,"주파수":89,"마지막":447,"특별자":254,"떠한 ":154,"술적 ":124,"파를 ":146,"누어 ":109,"모든 ":1110,"빠르게":93,"圓 ":266,"그에 ":214,"지방국":73,"國 ":227,"지방법":71,"기록하":192,"기록한":116,"파리 ":176,"비스 ":391,"특별히":77,"머스 ":117,"질서 ":65,"츠의 ":100,"특별한":118,"스로 ":388,"지막으":64,"중해 ":78,"르헨티":187,"신교 ":109,"급이 ":64,"림축산":107,"급을 ":162,"기반의":108,"기반을":124,"기반으":258,"급은 ":65,"금이 ":60,"급의 ":111,"기법이":65,"지리학":71,"즈의 ":361,"지사 ":62,"지면서":85,"리키는":517,"기술 ":470,"충청남":311,"리킨다":654,"지상 ":78,"판매 ":90,"수영 ":75,"트비아":70,"수에 ":166,"시기 ":87,"금은 ":161,"수와 ":188,"리키며":63,"금을 ":242,"주특별":162,"다가 ":862,"티브 ":148,"금의 ":358,"중화 ":71,"충청북":246,"금융 ":121,"國之":115,"國亞":57,"리카의":183,"國丁":85,"國三":298,"國並":202,"구팀이":62,"圓三":73,"리카에":152,"트비히":100,"립트 ":57,"명되었":154,"술에 ":87,"수산부":104,"수원 ":77,"수산물":73,"즈와 ":65,"마찬가":135,"교회의":291,"리프 ":97,"교회와":67,"교회에":135,"몇몇 ":106,"메모리":266,"리케인":119,"리케이":107,"지류인":60,"급에 ":117,"기서 ":119,"리한 ":169,"수산식":513,"리학 ":218,"치러진":143,"근의 ":141,"금에 ":58,"트어 ":82,"리핀 ":247,"극장 ":74,"수의 ":736,"글을 ":102,"트에 ":193,"최후의":62,"출할 ":58,"지리적":89,"패로 ":61,"다고 ":1194,"출한 ":110," 피부":85,"스는 ":657,"부터의":66," 행동":333," 허가":248," 하류":58," 했던":64," 하루":107," 했다":544,"리즈에":127," 표현":714," 필립":80," 필리":448," 필름":96,"부통령":87,"단과 ":106,"지로서":70," 해당":773," 한때":92," 합동":61,"리즈의":173,"리즈이":119," 하드":309,"르크스":130,"冲之":64," 함대":59,"질랜드":239," 필라":63," 핀란":212," 할당":112,"시가 ":252," 한다":3550," 하다":597,"트로닉":61," 하늘":148," 하는":2411," 하느":107," 丹並":115," 丹丁":139," 한성":74," 丹三":149," 丹丘":70," 丹之":113," 현과":87," 피아":207,"르키나":72," 학생":355," 학살":107," 학사":67,"트라이":124," 並倉":88," 並之":464," 並乙":106," 필수":58," 丘倉":66," 항복":59," 해발":149," 해방":139," 並亂":164," 並亞":126," 並丁":313," 並丘":167," 합병":171," 並三":570," 並並":376,"단계 ":74," 丘乙":71," 三倉":199," 丘之":178," 丘並":131," 하부":75," 丘丘":138," 丘丁":182," 丘三":312," 행렬":75," 항만":73," 丁倉":83," 한반":214," 하버":79," 한민":80," 해리":88," 한번":60," 항목":84," 三丘":352," 합류":148," 三三":1459," 三並":676," 학문":432,"매주 ":79,"규정하":103," 丈三":77," 三丁":609," 三亞":219," 합리":91," 三之":750," 헝가":248," 三乙":140,"트랜스":130," 三亂":164," 하며":678," 하면":179," 丁丘":176," 丁三":472," 丁丁":355," 丁並":314," 학명":174," 할리":61," 丁亞":83," 丁亂":112," 丁乙":148," 丁之":391," 해에":93," 함으":81," 해양":438," 해안":258," 한정":80,"비롯한":246,"비롯하":108,"화하여":111,"비롯해":85," 해운":73," 향상":440," 해외":278," 하지":445," 합작":67," 해와":61," 합의":125," 하위":142," 학위":90," 행성":260," 하원":92,"화학자":72," 행사":339," 하우":120," 하와":84," 피지":78," 핵심":157,"트로이":97," 한자":374," 한일":97,"화학적":86," 현대":825," 프톨":67," 하인":90," 함양":74," 학자":250,"째로 ":479,"리크 ":66," 허리":151," 하이":338,"환하는":65," 해서":99," 해석":322," 하여":729," 하에":107," 하였":775," 하얀":72,"립적인":63," 학습":168," 행복":66," 학술":377," 피에":140,"즈에 ":111," 항성":196," 함수":391," 해산":87," 필요":634," 해상":198," 항상":88," 합성":230,"르크의":62,"시간 ":432,"중추원":62,"화합물":130," 확대":221," 피해":312," 회계":74,"트레이":130,"트레일":500," 협력":288," 황금":95," 혁명":479," 합쳐":178," 헤비":59," 혼동":64," 해체":141," 해주":60,"늘날에":73," 하천":129,"기부터":151," 행위":568,"부터는":174," 했으":125," 호는":364," 피터":73," 행정":2058," 협동":102," 헤르":204," 화가":164," 홈구":160,"노폴리":62," 헌법":409,"규정에":101,"늘날의":100," 헨리":126,"틀랜드":245," 倉 ":57," 해전":102," 허브":61," 헬리":60," 환경":782," 호남":99,"僅丁":77," 현실":117,"僅三":105,"캐나다":542," 형성":527," 확률":198," 항해":59,"속해 ":143," 혁신":98," 형상":83," 형사":105," 협상":60," 학회":76," 형식":446," 화물":115," 해협":157," 후계":64," 화면":108,"속한 ":330,"지부 ":95," 확립":183," 헤이":198," 한편":88,"구하여":62," 호로":200," 형법":118," 활동":1538," 학파":104," 한해":58," 현상":583,"화하는":131," 효과":357," 하프":69,"티노폴":57,"티누스":77," 황도":62,"傭三":85," 호르":78," 허용":108," 홀로":66," 확산":127," 화석":129," 화성":179,"림청 ":130,"확하게":91,"카를 ":215," 홍수":68,"카데미":148," 형이":88," 향하":60," 현지":86," 협정":151,"립운동":226," 화산":224," 형제":214," 현청":128," 확실":69,"화하기":61," 활성":221," 효력":75," 호에":514," 활발":97," 확보":146," 행해":125,"화하고":69," 행하":123," 현의":154," 협약":108," 혈액":72," 현에":71," 홍보":159," 획득":133," 후기":424," 혐의":64," 회로":165," 호수":187," 현존":112," 협의":101," 호스":85," 현재":2800," 현장":98," 호선":451," 호세":57," 확정":107,"최하는":107," 확장":327," 환자":90," 홈즈":59," 형태":1027," 후반":207,"구하는":390,"소형 ":88," 휘는":138," 황제":690," 후보":221,"리즈는":127,"는다는":74,"리코 ":60," 호칭":116," 혼인":73," 혹은":1388,"부호 ":101," 회복":73,"리즈로":71," 호주":106," 호조":68,"倉倉":133,"수아 ":63," 활약":340," 훈련":135," 화재":138," 확인":346," 활용":295,"신과 ":111," 회사":809," 화이":94,"기본적":189,"마에서":71,"리지널":66,"식과 ":176,"분할 ":73," 효율":259," 홈페":62," 혼합":144,"丘驿":62," 호환":88," 후에":392,"출판 ":83," 효소":84," 회원":584," 홋카":126," 회의":273," 휴대":208," 호텔":130,"주한 ":63,"담고 ":141," 홍콩":226," 회장":211," 협회":248," 후속":211," 후손":130,"구하고":183," 회전":155," 희곡":57," 후지":163," 흘러":172,"수소 ":62,"스나 ":83,"모델 ":116," 화합":132," 화학":486," 화폐":85," 후원":123," 倉丁":148,"주최하":113," 倉並":114," 倉三":242," 倉丘":63," 倉亂":57," 倉之":119," 힌두":143,"니다 ":190,"대개 ":139," 히데":85,"즈베키":65," 황후":99," 흐르":180," 황해":115," 흐름":133,"당과 ":104,"마치 ":141,"기원 ":60,"북한 ":74," 후쿠":159,"트리스":63,"지배 ":66," 히로":126," 倉倉":67," 회화":73,"지방 ":781,"대가 ":240," 희망":65," 히브":111,"트리아":442," 흰색":64," 희생":96,"니는 ":214," 후한":109," 흡수":98,"부한 ":73," 흑인":105,"명령 ":61,"흐르는":105,"그이다":93,"리처드":110,"당구 ":75,"치를 ":548,"·연구":60," 히스":59,"치단체":145,"극작가":87," 힘을":80,"기와 ":326,"망원경":111,"트리트":107," 휘하":73,"맡았으":76,"집단이":82,"흘러드":76,"만으로":117," 흑해":77,"대교 ":83,"빠른 ":100," 흔히":425,"류하는":80,"류하다":77,"르투갈":235,"능력 ":85," 히트":60," 히틀":63,"기여 ":85,"명나라":109,"기에 ":1427," 힙합":219,"근에서":110,"류하고":65,"기업 ":244,"막으로":89,"대고 ":174,"기억 ":70,"마이다":107,"맡았다":163," 之三":810,"말이다":630," 之丁":474," 之丹":65," 之並":680," 之丘":236,"마이클":102," 之亂":157,"마이크":612," 之之":450," 之乙":108," 之亞":129,"술문화":87," 三國":102," 乙丁":159," 乙三":188," 乙丘":109," 乙並":99," 之倉":147," 乙之":125," 乙乙":65,"를테면":61," 乙倉":62," 並國":241,"치되어":106,"치되었":68," 亂丁":147," 亂丘":69," 亂三":239,"국회의":347,"수법인":62," 亂之":174," 亂並":156," 亂倉":78,"그에서":63,"구협회":61,"근에는":98,"마이오":85," 亞之":103," 亞亂":72," 亞三":197," 亞丁":62," 亞並":142,"대구 ":108,"구현하":65,"모는 ":61,"학자":2335,"한의":189,"현대":875,"함에":145,"한을":120,"학의":956,"하이":710,"학을":385,"학으":130,"하의":429,"학은":215,"하자":244,"학이":346,"함양":88,"하임":58,"하인":110,"하일":70,"프톨":69," 통상":134,"학적":904,"기념일":61,"할을":489," 터키":401,"한이":100,"합에":109,"한자":411,"한일":104,"행사":447,"하우":219,"하원":98,"피지":85,"핵심":159,"호간":219,"호가":153,"행성":432,"학위":158,"허리":158,"학원":440,"하위":156,"항을":212,"하지":1949,"항으":97,"해왔":62,"혔다":58,"합작":74,"해와":166,"그래픽":309,"합이":240,"합의":328,"함이":112,"한지":68,"협동":167," 통신":479,"해운":88,"그레이":201,"그래프":117,"무관 ":85,"향상":599,"합적":124,"항이":169,"헬레":69,"해외":283,"항의":92,"한제":218,"해안":395,"현동":109,"한정":88,"프트":1666,"호국":61,"항에":121,"합으":111,"합은":87,"합을":202,"함으":765,"함은":90,"함을":479,"해에":225,"해야":257,"해양":758,"현된":68," 태화":59,"퓨터":1617,"부천시":57,"할시":83,"항성":207,"피의":87,"해산":95,"함수":560,"필요":647,"해상":245,"해살":124,"피에":185,"행법":63,"픽에":161,"피오":64,"학습":221,"행복":77,"하시":85,"슬로바":89,"합성":289,"항상":88,"해수":92,"그려져":89,"하얀":74,"퓨팅":102,"항시":79,"하에":362,"헤드":71,"학연":123,"하였":7094,"학에":1132,"하여":10590,"한에":57,"하와":112,"해시":144,"할아":61,"해석":435,"해서":1485,"핀의":74,"해병":58,"그러한":104," 털이":65,"필수":60,"픽스":83,"항복":61,"해발":152,"해방":192,"피스":241,"합병":195," 토머":69,"합법":65," 토마":98,"확히 ":61,"지역과":116,"학술":418,"피어":124,"피언":426,"하수":77," 테이":155,"피아":520,"현과":109," 태풍":365,"한성":103," 탐험":69,"향력":154,"호하기":67,"한사":58,"한산":83,"학생":582,"해부":71," 태평":280,"학상":204,"학살":146," 테오":77,"학사":169,"프와":64,"헝가":249,"항만":156,"합류":148," 텍사":129,"문과 ":115,"프에":98,"한문":65,"함마":70,"합리":119,"해로":105,"하므":81,"학문":497,"하며":3518,"하면":1454," 타타":70,"호하고":60,"마포구":144,"할린":58,"하모":62,"학명":176,"할리":69,"권에서":136,"한불":58,"행렬":102,"피소":115,"하부":85," 텍스":81,"구지방":63,"학부":176,"스리랑":67,"해리":93,"한번":67,"해를":394," 테스":96,"합물":165," 토론":158,"항목":94,"프의":110,"한반":216,"한민":7437,"하버":84,"표하":378,"해당":812,"필리":478,"필립":81,"필름":113,"해도":129,"표했":61,"표한":304,"파르트":58,"신라 ":149,"표현":736,"하라":255,"해되":60,"하려":361," 토대":84,"했다":3819,"했는":102,"하로":70,"프스":170,"했던":889,"비디오":466,"하루":140,"하리":63,"판매되":66,"히는 ":130,"행동":388,"하마":145,"하르":159,"하류":64,"허가":742,"하를":74,"문가 ":75,"피부":86,"기념하":175,"행된":216,"행되":505,"하느":121,"하는":18789,"필드":112,"효율적":195," 태어":821," 테러":91,"하늘":174," 태양":417,"했고":365,"하다":2005,"행과":61," 터미":91,"학대":110," 타지":75," 탑재":107,"하던":522,"한다":14560,"하도":295," 텔레":470,"매한 ":208,"해남":60,"했기":61,"할당":116,"한데":57," 통계":268," 테르":57,"행기":130," 통과":189," 테마":80,"표팀":149,"합뉴":77,"기록 ":133,"핀란":235,"하드":336,"호하는":60,"함대":93,"피로":70,"필라":72,"기로 ":619," 태조":124,"파리에":66," 태종":83,"한때":92,"합동":105," 토너":70,"티베트":129,"함된":182,"함되":256,"해는":67,"합된":94,"합되":136,"품종":97,"프를":84,"플로":271,"합과":70,"함과":93,"풍이":93,"플레":847,"플러":120,"합격":100,"함경":91,"풍으":73," 탑승":57,"한글":194,"플랫":190,"플랜":61,"플래":146,"해결":279,"합금":58,"항구":135,"플루":90,"항공":848,"프리":1512,"프린":151,"합군":122,"대기 ":87,"해가":156," 타이":816," 타인":112," 타자":72," 타임":92,"한나":105,"하나":4082,"품질":176,"해군":428,"함께":1510,"근로자":82," 타원":89,"플리":188," 탄자":80,"학년":63,"그리고":1096,"하노":64,"피드":122," 탄생":197,"표지":57,"표준":828,"푸에":75," 탐사":100,"품안":66," 탄소":90,"시로 ":1233,"하고":10067,"학계":119,"그룹의":105,"학과":608,"학관":65,"프라":535,"프란":258,"풀이":93,"한계":93,"글로벌":433,"하거":696,"하게":2062,"그룹이":170,"한강":205,"품에":167,"그루지":65,"하계":214,"프레":585,"한국":5662," 태생":107,"품을":440,"품은":147,"품으":223,"하기":4401,"프로":4269,"품의":327,"품이":502,"플라":427,"학기":729,"프랑":2551,"학교":2885,"하구":63," 테니":98,"프랜":72,"프랭":62,"후부":73,"호칭":116,"그랜드":67," 팔라":87," 팔레":125,"군인으":72,"효소":123," 티베":152," 투표":249,"현하":338,"현한":123,"군인이":191,"황이":118,"황의":113," 파란":59," 파라":163,"시대의":878,"휘는":146,"후보":250," 특수":377,"황제":796,"구조를":175,"형태":1105," 특성":245,"후반":218,"황을":121,"환의":69,"환이":100,"화적":211,"환자":123,"화점":66,"화제":218,"화정":132,"홈즈":59,"시를 ":363,"활을":172,"확정":119,"활의":75,"황에":133,"구조물":73,"무가 ":97,"화유":179,"확장":333,"괄하는":86,"환을":105,"환으":121,"화재":932,"화장":67,"활용":342,"확인":373,"화인":552,"회사":1398,"화이":520,"화의":717,"회주의":311,"훈련":219," 파동":83,"화와":331,"그레고":75,"떨어져":174,"화예":218,"그래머":64,"헨티":187,"화연":58,"후로":193,"화원":71,"활에":69,"파소 ":63,"활약":341," 특별":638,"호조":94,"호주":113,"화에":724," 판단":139,"황실":61,"그러나":356,"호작":139,"호의":161,"혹은":1401,"호이":192,"떨어지":62,"떨어진":577,"그래밍":301,"휘관":62,"혼인":75,"회복":162,"확실":95," 파나":61,"현청":128,"규모의":148,"관하여":88,"호와":168,"화시":180,"관하였":59,"호에":620,"회민":65,"국제적":222," 트리":201,"효력":77,"리핀의":59,"형질":59,"활성":267,"화소":62,"스마트":110,"교차로":174,"화석":141,"화성":207,"리학의":82,"홍수":94,"시마 ":242,"형적":101,"형제":253,"화산":351,"리학에":160,"화사":143,"리하여":91," 트로":120,"확산":137,"화상":154,"형인":57," 트랙":87," 트랜":212,"현지":87,"협정":206,"향하":118,"협조":69,"회를":455," 트레":142,"형의":233," 트라":148,"형이":378," 파괴":164,"형은":85,"형을":227," 판결":111,"형으":179,"협의":275,"호스":99," 파견":97,"혐의":73,"회로":482," 통화":141," 투쟁":64," 투자":238,"현종":57," 투입":65,"현존":113,"호수":201,"리학자":290," 통합":613," 통해":1669," 통하":497,"구조에":62,"메인 ":87," 통한":430,"형에":86,"현장":128,"현재":2814,"카드 ":151,"호세":67,"현을":152,"현의":222,"호선":457,"현이":155,"협약":154,"행형":63,"치로 ":163,"현으":89,"활발":97,"호사":248,"행해":164,"행했":102," 투어":114,"확보":154,"티아 ":137,"행할":139,"행한":306,"행하":1296,"트에서":297,"화방":117,"혈액":74,"홍보":199,"티안 ":64,"현에":128,"화민":221,"확립":194,"헤이":247,"합회":170,"해한":72,"해하":221,"후계":66,"판사 ":69,"화면":122,"해협":171,"형식":491,"화물":206,"리하고":119,"현악":203," 통틀":103,"획득":137,"후기":490,"시대를":133,"리하기":81,"험을":187,"학회":466,"험자":75,"험의":59,"화로":327,"험이":169,"함한":517,"함하":555,"혁신":147,"협상":84,"형사":139,"합한":173,"함해":71,"합하":407,"현실":137,"형상":96,"형성":570,"항해":93,"확률":208,"화를":1063,"회는":1632,"항하":173,"판매하":166,"회당":58,"회담":74,"호를":436,"황도":75,"홀로":69,"호르":135,"험에":93,"하프":78,"효과":428,"홀름":57," 통칭":251,"한하":72," 통치":299,"그램이":267,"학하":70,"그램의":101,"하학":186,"현상":661,"그램을":190,"그램으":79,"헌정":58,"해튼":63,"할하":144,"할한":336,"한해":67,"리하는":212,"화되":254,"화된":293,"화동":78,"시대부":79,"활동":1928,"학파":157,"필하":71,"형법":133,"호로":363,"리한다":63,"형벌":57,"허용":114,"한편":89,"기념물":120,"추진하":98,"글라데":64,"화는":234,"회가":354,"향을":646,"다는 ":1779,"향으":228,"향의":75,"하키":105,"향이":127,"피하":88,"피해":356,"회계":133,"형문":163,"화도":69,"회관":224,"화당":76,"확대":271,"관할한":325,"관할하":116,"황금":99,"혁명":621,"행중":61," 통일":365,"합쳐":178,"피트":60,"헤비":60," 투르":130,"화나":80,"향에":72,"협력":477,"합체":119," 토지":194,"향악":104,"해체":142," 통제":86,"했지":130,"혼동":64,"행을":299,"행으":73,"해진":265,"해지":305,"행의":136,"행위":930,"헤미":57,"했을":154,"했으":992,"화기":123,"하천":174,"하철":238,"망한 ":84,"지지 ":191,"해주":158,"호대":71,"행정":2568,"징을 ":98,"호는":1131,"관현악":157,"피터":77,"실로 ":59," 통영":63,"치료 ":66,"행이":179,"행자":83,"시대에":456,"합중":92,"헨리":128," 테크":122,"해자":124,"해있":127,"해의":187,"화강":75,"헌법":468,"홈구":160,"화가":521," 토양":84,"헤르":222,"향신":67,"화권":68," 大丁":58," 大三":77," 大並":205,"화국":2106,"행에":142,"환경":1224,"글랜드":374,"호남":102,"화교":105,"허브":67,"해져":64,"했었":83,"헬리":65,"해졌":152,"해제":83,"화관":60,"해적":58,"해전":114," 토요":69,"헬름":103," 파티":73," 폭력":128," 파트":83,"황후 ":99," 포맷":119,"최초의":614," 포르":340," 패턴":71,"흥에":88," 펜실":91," 폴란":330," 페스":58," 포로":65,"술이다":157,"흑인":107,"질적 ":91," 폭발":156," 표기":452," 페어":66," 편성":112," 폴리":129," 퍼져":67,"틴어 ":259,"흥을":81,"흥원":108,"히스":77,"히어":69," 페이":330," 평생":57," 페리":59,"회화":87," 패의":57,"후쿠":167,"히로":187,"회학":141," 페루":67,"시내버":159,"집을 ":67," 페르":414,"평가 ":122,"휘자":123,"지식을":78," 평등":60," 특허":114,"브리어":86,"흐스":60," 포도":92," 편리":65," 특히":654,"트이다":286,"흰색":64,"니라 ":509," 파키":145," 퍼시":104," 파크":139,"후한":126," 평면":83," 판타":104,"히브":114,"희생":100,"흡수":105," 포드":63,"지스터":88,"후원":128,"호흡":62,"확한":140,"확하":150,"화합":170,"화한":180,"화학":716,"화하":463,"훈장":68,"후인":57,"후이":122," 파스":91,"후의":318," 평가":365,"환하":122,"화협":57,"활하":69,"흘러":172," 파악":100,"확히":75," 평균":159,"북쪽은":67," 패션":82,"북쪽으":560,"후지":177,"활화":61,"히데":106,"힌다":88," 팀이":267," 팀인":83," 팀은":59," 팀을":65,"부터 ":5567,"황해":119,"흐르":181,"흐름":139,"히는":131,"북쪽에":130,"황후":208," 파워":88,"비를 ":208," 파울":70,"브리지":101," 포괄":107,"힌두":145," 폭격":62," 파이":306," 파일":459," 파장":70,"희망":76,"멕시코":351,"면에 ":269," 판정":70," 파르":82," 파리":324,"호쿠":61," 판매":505,"획에":68,"화체":1138,"회에":827,"회와":251,"호크":61,"홋카":126," 패러":89,"티에 ":60,"퍼드 ":93,"회원":649,"호텔":166,"스볼 ":93,"흥과":58,"획을":70,"회의":1793,"휴대":208,"회이":521,"회장":306,"협회":1182,"획이":86,"홍콩":229,"흥군":61,"회전":178,"희곡":60,"회적":302,"후손":130,"후속":211," 특유":59," 특이":78,"회주":311,"수이자":73," 특정":747,"단계에":81,"효율":286," 특징":490,"히고":71,"홈페":65," 패배":83,"호하":246,"호협":62,"혼합":151," 파생":136,"호화":96,"호환":91,"화폐":97,"후에":722,"황태":64,"브리튼":77," 하기":275," 프로":3860,"스러운":81,"지적 ":62," 한국":5417," 프레":355,"지속적":117,"슬란드":102," 학교":399," 프랜":67," 프랑":2466," 프라":205," 프란":161," 한계":75," 하고":784," 하계":203," 하게":69," 한강":168," 하거":107,"부지방":64,"메시지":94,"무기 ":82,"뉴스 ":176," 표준":763," 푸에":67," 학년":60," 함께":1504," 해군":342," 한나":93,"지정 ":159,"출연하":62," 하나":3925," 품질":108," 항구":132,"모로코":75," 해결":238,"직접 ":361,"추정된":67," 품종":76,"추정되":104," 항공":685," 프리":592," 프린":124," 플루":58," 플러":58," 합격":78," 플레":499," 플로":151," 플라":322," 플래":123," 플랫":188," 한글":177," 함경":88,"림픽에":158,"질을 ":426,"슬라브":70," 평화":277,"슬라비":71," 포크":98," 표시":264," 포털":64,"질은 ":90," 포터":82," 풍부":90," 폭탄":71,"홈페이":65,"그리스":1462,"질의 ":262," 풋볼":90,"티의 ":76,"힙합":222," 포항":151,"출입국":71," 포함":1756," 피고":86,"수주의":71,"지식경":85,"블로그":77,"기능이":91,"수준의":104," 포트":118,"립하여":100,"질이 ":162,"립하였":147,"힌다 ":87,"기능을":346,"그림을":66,"흔히":426," 펼친":93,"싱글이":108,"트웨어":808,"기독교":722," 포지":92,"히토":70,"직의 ":93,"지인 ":141,"히코":61," 포유":87," 펼쳐":96,"흑해":78,"립하는":77," 폐지":323,"스라엘":216," 표면":218," 포인":78,"마트폰":75,"비로 ":77,"중화인":472," 평택":82,"립하고":77,"권으로":128," 푸른":60,"히틀":64,"히트":99,"트워크":561,"립했다":63,"티움 ":158," 편이":112," 편의":114," 편입":97,"뉘어 ":79," 평안":87,"힘을":86," 평양":179,"립허가":405,"진은 ":66,"문구 ":138,"물과 ":174,"진을 ":324,"지션은":60,"슬람교":71,"휘하":198,"권익보":58,"물관 ":160,"목록이":78,"후에는":149,"진의 ":108," 편찬":115,"기를 ":1415," 포스":152," 평원":63," 편집":205,"진이 ":64," 철학":533," 처형":61,"키타":74,"직을 ":211," 추락":80,"브라이":58,"브러리":89,"태지":103," 초점":101," 체포":79,"탈출":64," 촉진":190,"타카":106," 취급":129,"흥 ":176,"토대":96," 천황":402,"키텍":134,"토니":210,"기반 ":179," 촬영":150,"타케":96," 총장":96,"희 ":216," 출력":112,"브라우":133,"브라운":81,"지의 ":915," 체험":72," 최소":174,"타크":164,"터스":138,"시기에":129," 최상":86,"터미":267,"후에 ":504,"탑재":111,"지은 ":141,"테레":67,"태양":433,"테러":113,"태어":822,"태에":207,"테르":345,"통계":363,"술연구":104,"통과":270,"테리":169," 초에":92,"태의":337,"테마":94,"타지":184,"탕으":335,"탁집":59,"텔레":541," 체코":189,"터베":257,"중화민":210,"테면":61,"브라질":214,"흐 ":135,"대교구":66,"태자":84,"태이":177," 초연":67,"토노":77,"태종":105,"태조":138,"토너":74," 충돌":170,"터이":208,"터장":63,"토리":469," 취득":122,"터의":386,"토마":125,"대는 ":203,"토머":72,"턴의":58,"통로":60," 총칭":263," 충분":75,"톨릭":545,"톨리":110,"토모":60,"모델이":91,"털이":89,"통령":1141," 축산":110," 추상":113,"태평":463," 최적":94,"토미":114,"테오":100," 출생":337," 최종":192,"회원국":118,"출시한":118," 축소":69,"투갈":235,"태풍":371,"탐험":74,"템에":113,"테일":72,"테인":310,"테이":563,"타타":84,"화체육":1124,"키프":58,"텍사":130,"통되":70,"틀어 ":87," 최신":57,"히 ":2784,"리투아":94," 출발":163,"즌이다":79,"힌 ":149,"터에":367,"탄티":148,"토로":115,"힐 ":59," 출범":202,"텍스":122,"토론":166,"테스":290,"터와":150,"톨레":83," 편 ":84,"힘 ":69,"토르":148,"토를":102,"터널":153," 처음":1260,"터내":78," 팀 ":177,"스》 ":220,"시된 ":202," 채택":232,"붙여졌":67,"회원의":67," 천연":128,"크이":135,"크의":348," 최근":299," 천안":91,"태를":373," 청원":63,"회이다":450,"지원 ":253,"터는":441,"치도 ":124," 판 ":94,"수엘라":63," 최남":61," 초명":73,"모니터":62,"키스":403,"질에 ":69,"터넷":668," 청사":77,"탈리":1166,"탈린":65,"후 ":3176,"타마":167," 초등":93," 창출":78,"타르":237,"타를":77,"타리":335,"터가":166,"키보":78,"키백":58,"금속 ":93," 차트":125,"태로":481," 최고":577,"타미":69,"크와":139,"타바":73,"구축하":85," 청소":291,"붙여진":80,"크어":57,"크에":279,"마케도":118,"모리 ":253,"훈 ":123," 총독":86,"터로":173,"타에":65,"타운":164,"타우":94,"홋카이":126,"택시":93,"키지":131,"탄에":62,"타와":67,"텐도":125," 척추":95,"휘 ":79,"탑승":57,"트와 ":207,"탄자":83,"토관":72,"탄의":170,"탄을":70," 총선":154,"터를":399,"토가":60,"터리":174,"타워":123,"타원":94,"타입":74,"크톱":69,"타임":197,"타자":78,"시도 ":60," 초식":67,"타의":139,"타일":144,"타인":408,"타이":1114,"키아":237," 추가":264," 체육":101,"테네":224,"탄소":127," 최대":527,"키에":115,"타스":72,"탐사":115," 초반":121," 천주":117,"블라디":122,"슬링 ":88," 총리":306,"모를 ":205,"탄생":209," 팝 ":93,"테나":69," 추기":61,"즈이다":127,"키의":171,"태생":110," 챔피":295," 천체":219," 청주":153," 출간":122," 체제":509,"키와":61,"모델로":73," 추구":126,"키우":65," 축구":957,"테니":137,"트는":279,"리히 ":247," 측정":234," 캘리":251," 층에":435,"통틀":103,"토해":114," 캄보":74,"트니":100,"토프":60,"진에 ":121,"퇴적":60,"대규모":179,"트남":408,"투스":187,"튜디":195," 캐릭":178,"빌딩 ":490," 칼리":148,"토크":67," 카메":218," 풀 ":69,"수이며":80,"통치":346,"통칭":251,"대구지":67,"트가":392," 카리":88,"지와 ":314," 카르":178," 카를":260,"교환 ":84,"토콜":205," 출판":450,"히고 ":66,"특성을":91," 침입":81,"능력이":74,"능력을":155," 카운":60,"국철도":85,"파나":80,"파냐":96," 카자":80," 카이":174,"티드":127,"귀족 ":88,"트럭":83,"트럴":128,"트럼":73," 친위":73,"트레":777,"수원시":104,"티노":156," 취하":75,"트롤":86,"트로":845,"싱가포":89," 친일":132,"틀란":68,"틀랜":270,"마추어":90,"티누":78,"트루":135,"틀러":85,"트르":93,"트를":421,"치된 ":243,"트릭":104,"트리":1178,"통행":63,"파가":72,"통하":582,"통한":472,"립한 ":329,"통해":1678,"통합":684,"투자":326,"투이":162," 카스":160,"투입":66,"통화":178,"파견":103,"분으로":162,"투쟁":105,"판관":57,"파구":159,"트랙":134,"트랜":257,"지역 ":878,"트라":796,"트란":76,"트랄":57,"니며 ":81,"판결":150,"파괴":182,"티나":260,"직에 ":77,"분이다":91,"수이다":549,"투아":129," 커다":65,"투어":153,"투에":168,"통신":860,"토양":96,"수여하":88," 추진":344,"텐츠":220,"트인 ":57,"터테":274," 출전":165,"통상":199," 출입":97,"투는":161,"브랜드":212," 추정":261," 총회":98,"토시":94,"토스":227,"터키":426," 축제":131,"텍처":131,"지에 ":707," 출연":182,"토성":62,"림픽 ":413," 침공":92," 최초":1056," 폴 ":107," 친구":98," 폰 ":187,"수용소":59,"통부":57,"트의 ":617,"회에서":510,"중학교":87,"투기":161,"템이":180,"템을":168,"템은":65,"템의":139,"술에서":64,"태화":59," 출신":971," 출시":471,"지어 ":59,"통문":80,"택하":130,"근무하":84,"증진 ":102,"교황 ":314," 카라":126," 친목":158,"대구광":218," 침략":81,"토착":58," 취임":66,"통제":163," 최후":85," 카드":233," 캐논":60,"모로 ":86," 치료":200,"투명":71," 칠레":103,"토지":226," 캐나":542,"통은":64,"통을":190,"통일":475," 충청":610,"통의":266,"통이":170," 치러":226,"통적":314,"투르":163,"투를":79," 춘추":67," 추측":96,"교회 ":398,"송하는":91,"통용":58,"구체적":132,"투로":114,"大之":149," 춘천":113,"大亞":171,"통영":82," 추출":59,"통에":86,"大丁":104,"大丘":81,"大並":265,"토의":195,"大三":116,"터페":234,"토에":143,"토어":58,"토요":82,"테크":171,"뉴스》":133," 충주":65," 측면":110,"최초로":360,"판서":64,"파소":76,"티안":75,"티아":321,"판소":126," 항 ":96," 케이":278,"파수":93,"티야":82," 캠퍼":62,"퍼드":118,"티에":140,"파스":130,"티오":173," 해 ":409,"틴어":370,"파시":83,"평가":559,"스를 ":979,"확한 ":138,"티움":195,"티의":76,"팀에":70,"평군":70,"평구":64,"패션":100,"평균":190,"파악":100,"퍼런":57,"부여하":82,"팀은":106,"파에":127,"팀을":101,"팀이":414,"팀인":106,"패스":68,"폐기":68,"머지 ":99,"퍼레":61,"특수한":100,"팀으":74,"속하는":814,"판에":167," 콘서":79,"파와":71,"파워":96,"속한다":406,"파울":88,"파의":210,"파이":647,"파인":105,"파일":554," 코스":197,"포가":72,"시는 ":274,"판이":166," 콘스":180,"폭격":74,"판의":82,"판은":76,"판을":143,"기록되":116,"판으":77,"파장":76,"페란":60,"페라":351,"포괄":108,"즈에서":95,"편되":118,"지아 ":174,"포구":341,"판정":86,"송파구":154,"트페":72," 쿠데":103,"후속작":75,"페르":483," 컨트":101,"대까지":71,"트폰":76,"페루":70,"시간에":90,"시간이":97,"티칸":62,"특한":83,"시간을":87,"명명되":59,"페리":114," 코어":59,"평동":57,"패의":75," 컴파":63," 커피":78,"특허":119,"비드 ":116,"평등":101,"특히":654,"스니아":124,"편리":65," 컴퓨":1671,"출생하":68,"포니":304,"화학 ":222," 쿠르":73,"평론":89,"포되":112,"포도":114,"화한 ":163,"포동":68,"퍼시":110,"파크":221,"퍼스":194,"포드":131,"분에서":63,"평면":121,"판타":113,"파키":163,"특별":3191,"파는":76," 커뮤":155,"퇴하":73,"판단":150,"부이며":90,"판되":77,"기록을":88,"기록은":61,"기록이":62," 코나":66,"파도":65,"파동":100,"트비":181," 콩고":150,"수집 ":85,"파라":208,"파란":62,"티모":92,"틸리":74,"티미디":76,"만화 ":211,"특성":272,"트사":71,"팔레":130,"티미":122,"북으로":70,"파르":244,"파를":146,"티벌":59,"티베":160,"투표":309,"특수":392,"파로":128,"팔라":113,"특정 ":405,"트어":154,"패러":91,"티븐":77,"티브":220,"부이다":121," 카타":66,"판매":556," 코드":377,"파리":365," 친환":78,"특유":61,"트이":390,"트의":618,"특이":82," 카페":68,"트인":71,"트워":614,"트웨":825,"송통신":78,"트와":228," 카트":59,"틀어":112,"트에":514,"패로":64,"맞추어":57,"티시":93,"티스":235," 칭호":106,"다란 ":85," 코리":125," 코르":132," 칭하":80," 콜로":74,"트족":57," 콜롬":89,"틀을":58,"특정":766," 침해":88," 함 ":107,"판사":210,"슬람 ":141,"파생":140," 코미":128," 코믹":74,"특수법":62," 할 ":828,"틀을 ":58," 한 ":3539,"패배":83,"특징":490,"펙트":82,"페트":58,"펼친":93,"포지":121," 회 ":430,"푸르":142,"풀로":95,"다른 ":2094,"말해 ":85,"분야로":104,"포의":126,"펼쳐":96,"페테":81,"포유":91,"표를":238,"품들":89,"비교하":63,"포인":100,"포이":65,"포자":65,"표면":243,"평택":84,"휴대용":67,"프가":223,"포츠":546,"회의에":88,"푸른":64,"회의원":323," 킬로":101," 키르":89,"리포니":249,"포터":121,"포털":72,"풍부":92,"폭탄":88,"교통의":80,"포크":112,"표시":333,"기본 ":174,"스도의":64,"평화":337," 키보":71,"편하":134," 쿼크":66,"풋볼":91,"품부":596," 후 ":926,"협회는":600,"출시된":71,"출시되":154,"풍속":64,"표이":142,"치가로":67,"포하":234,"포함":1770,"포한":64," 터널":91,"프는":77,"표의":64,"표율":67,"뉴욕 ":303,"푸아":98,"비교적":115,"표적":496,"피고":92,"포항":188,"식경제":82,"표에":93,"포트":180,"폭포":71,"먼저 ":125,"푸스":101,"당나라":170,"추진 ":78," 크기":356,"후의 ":318,"포르":483,"패턴":74,"포를":110,"표가":57,"팔레스":104,"분야이":162,"파하":69,"분야의":248,"만한 ":99,"포맷":122,"치는 ":382,"패키":72,"분야에":384,"티스 ":63," 크고":91,"폴란":333,"페스":133,"펜실":91,"폴로":79,"소프트":1324,"폴레":162," 현 ":1849,"면서 ":1800,"파티":108,"다룬 ":72,"파트":241,"몰도바":68,"포로":194," 크게":351,"폭력":166,"포럼":115," 쿠바":79," 國三":150," 國並":101,"팽창":61,"스만 ":143,"패하":101,"후인 ":57,"북아프":61," 코트":97,"리트어":69,"평생":67,"츠를 ":86,"구할 ":67,"시대 ":1109,"페이":806," 코퍼":58,"페인":767,"소행성":117,"페어":82," 콘텐":143,"마케팅":128,"표기":489,"마쿠라":73,"폭발":174,"퍼져":67,"폴리":363,"편성":139,"식공룡":68,"판하":95,"팩트":77," 형 ":90,"시다 ":61,"편적":65,"표된":100,"표되":128,"편입":100,"평야":69,"평양":609," 홈 ":107,"편으":97,"편을":77,"평안":88,"편의":170,"편이":180,"치가이":174,"표는":100,"품과":60,"편에":104," 호 ":622,"주하는":151,"보호하":179," 크다":72,"표로":306," 클리":72,"폐지":328,"편찬":136,"명령어":128,"포에":131," 클로":116," 탐구":62," 크리":494,"펑크":69," 크루":115," 클럽":310,"분야를":77," 클레":98," 클라":269," 크로":292," 클래":126,"출신의":366," 크레":128,"출신이":151,"편집":254,"투표를":67,"평의":64," 크라":154,"부에서":328,"출신으":183,"평원":75,"속하며":146,"포스":282,"포시":139," 타고":80,"폭스":71,"줄여 ":63,"테리아":58,"국시리":95,"최초 ":67,"탁집행":59,"현대자":68,"현대의":80,"주요 ":619,"한제국":216,"현대에":59,"프트가":61,"터베이":229,"국시대":174,"학자이":807,"학자인":136,"면으로":111,"람과 ":57,"면이다":122,"태이다":143,"테르부":65,"행정 ":844,"중부에":219,"죽을 ":57,"목이 ":67,"허리케":119,"죽은 ":131,"주의 ":2545,"구조 ":121,"목의 ":117,"교에서":366,"그녀의":76,"계획을":62,"계획이":79,"피터 ":60,"목소리":65,"통계학":58,"계획에":64,"추어 ":149,"호는 ":1131,"그램 ":342,"관한 ":1431,"학자로":168,"규모 ":208,"호간의":155,"라기 ":58,"국제 ":1037,"행성이":82,"종합 ":162,"념이다":176,"학자들":180,"행사하":73,"목은 ":112,"기나 ":97,"목을 ":213,"농업 ":98,"모의 ":211,"념으로":242,"통계청":58,"하인리":70,"준의 ":153,"년으로":59,"중앙 ":206,"교육자":122,"면적은":1793,"목적 ":106,"교육을":199,"교육의":108,"치구 ":58,"탕으로":335,"합이다":98,"준을 ":139,"함으로":764,"향에 ":59,"교이다":212,"해양수":137,"합으로":111,"그대로":248,"주인 ":62,"해안에":90,"해양부":103,"그룹 ":318,"화나 ":70,"중에 ":471,"군주 ":60,"지나 ":148,"지난 ":127,"혁명 ":188,"노이 ":65,"프트의":82,"프트웨":808,"노선을":63,"노선의":63,"노선이":141,"권에 ":140,"터스 ":74,"치고 ":176,"그로 ":130,"교육과":397,"출범하":69,"교육기":98,"해양경":119,"준인 ":60,"구스타":61,"준이 ":89,"텔레비":412,"무라 ":94,"학적으":169,"노의 ":74,"협력 ":130,"학적인":134,"지널 ":58,"태어났":310,"태어난":296,"태어나":175,"학에서":938,"敎之":110,"공항 ":89,"구성된":325,"구성되":572,"맨해튼":63,"하여야":74,"공한 ":83,"공학 ":155,"출이 ":60,"할아버":61,"라고 ":2752,"태양계":61,"구원 ":202,"지낸 ":207,"년에서":224,"카고 ":57,"물건을":96,"화국 ":933,"출을 ":84,"하에서":122,"최적화":72,"환경 ":270,"먼트 ":231,"하였는":106,"행에 ":116,"즈로 ":87,"터미널":258,"경하였":58,"학연구":99,"태에서":98,"국왕 ":169,"하였다":4731,"하였던":143,"추세츠":96,"물관이":96,"헬름 ":83,"구와 ":470,"군에 ":299,"모아 ":105,"해석학":116,"해석하":67,"하였고":606,"중생대":120,"중요 ":61,"중서부":70,"라가 ":204,"행을 ":299,"구성하":492,"주식회":205,"중을 ":67,"국적 ":77,"행의 ":135,"학으로":130,"공회 ":118,"군인 ":129,"그녀는":86,"묘로 ":171,"모여 ":116,"중의 ":536,"목에 ":210,"탄자니":77,"축을 ":74,"학이다":126,"군이 ":366,"국세청":71,"타이완":189,"행이 ":113,"프톨레":68,"타인의":82,"행위 ":176,"군의 ":818,"하우스":121,"했을 ":143,"축산식":105,"타이베":77,"금까지":91,"카가 ":78,"면에서":220,"과학 ":369,"중소기":146,"학위를":104,"국장 ":61,"군을 ":273,"해진 ":173,"군은 ":187,"지는 ":2020,"메탈 ":76,"국이 ":616,"구성요":67,"하와이":79,"국인 ":365,"군사적":69,"추원 ":57,"구성원":169,"구장 ":97,"타이틀":140,"즈를 ":119,"종특별":81,"국의 ":7793,"구인 ":59,"日三":105,"구소는":106,"하였으":1353,"중인 ":239,"즈니스":133,"총칭이":130,"구의 ":1000,"국을 ":477,"년에는":218,"국은 ":385,"중성자":58,"조하는":89,"하철 ":179,"토관리":71,"퓨터의":102,"향상 ":127,"항이 ":78,"퓨터에":131,"국왕이":80,"신하여":63,"해외 ":140,"항의 ":71,"출생 ":118,"해서는":187,"신학자":96,"시험이":69,"국으로":324,"명을 ":535,"명은 ":635,"공하여":84,"공하였":61,"구원은":154,"구이다":327,"필요로":66,"시행하":82,"합이 ":96,"구역은":158,"구역으":111,"타이거":62,"구역이":121,"명의 ":949,"주시 ":444,"기까지":238,"공하기":71,"합의 ":217,"공하고":126,"군에서":97,"합은 ":87,"명이 ":605,"명인 ":101,"합을 ":202,"신호를":88,"공하는":288,"敎 ":92,"최종 ":111,"항을 ":212,"식회사":205,"주변의":60,"하지 ":1309,"매하는":108,"해와 ":164,"무는 ":91,"주변에":81,"공한다":129,"합성어":91,"메이션":621,"퓨터를":66,"필요하":96,"필요한":311,"즈는 ":180,"헨리 ":107,"식품의":90,"농수산":67,"메이저":161,"시한다":59,"관하는":129,"시하는":199,"식하는":102,"구장이":90,"구장은":60,"구장으":111,"테면 ":61,"메이지":127,"지가 ":477,"즈니 ":71,"과학에":60,"태조 ":87,"과학원":110,"시행되":95,"과학적":147,"교토 ":66,"과학의":105,"과학자":114,"중세 ":186,"그라드":91,"국제명":67,"메이크":70,"교통 ":183,"공화정":72,"구치 ":63,"과학기":696,"화가 ":316,"국이다":140,"해살이":124,"국인들":59,"공화국":2091,"구축 ":59,"시하였":95,"함수의":80,"또한 ":786,"해의 ":186,"무관으":57,"물고기":88,"지게 ":159,"군이다":165,"공화당":68,"군으로":160,"헌법 ":117,"규모가":68,"시호는":324,"국제교":61,"국제공":130,"출발하":59,"합에 ":76,"테르 ":63,"한의 ":141,"현대 ":369,"조한 ":117,"곡하였":67,"해부학":57,"면서도":79,"족한 ":72,"한이 ":81,"조합 ":90,"통계 ":83,"향력을":65,"통과 ":82,"학적 ":572,"권의 ":269,"태생의":62,"말하는":177,"면은 ":81,"면을 ":202,"말한다":3380,"종파 ":71,"기관을":79,"기관으":991,"기관의":141,"기관이":951,"지고 ":1032,"한자 ":153,"기구로":81,"권은 ":114,"기관에":108,"권을 ":623,"할을 ":488,"태의 ":331,"만화가":103,"학은 ":215,"그를 ":132,"하의 ":427,"높은 ":477,"면이 ":103,"학상을":72,"말하면":59,"말하며":226,"기능 ":112,"하이 ":171,"학의 ":951,"교육청":88,"기는 ":947,"글로 ":110,"모모야":63,"학을 ":385,"면의 ":154,"학이 ":90,"피아노":101,"진과 ":113,"그림 ":81,"지구 ":303,"그린 ":169,"높이 ":126,"하자 ":138,"출신 ":245,"네치아":57,"함에 ":123,"한을 ":120,"피아니":63,"치가 ":290,"놓은 ":145,"학자 ":830,"기니 ":77,"면적 ":89,"신화에":220,"중심 ":210,"해야 ":228,"구에서":361,"신화의":72,"명에 ":142,"해양 ":65,"피언컵":95,"진구 ":92,"주얼 ":63,"그들의":150,"그들은":74,"질과 ":83,"함은 ":90,"함을 ":479,"해에 ":167,"타지 ":89,"구역상":152,"실행하":64,"국에서":1555,"주에 ":640,"지기 ":88,"기도 ":3458,"항에 ":88,"피에르":97,"권이 ":170,"기관차":146,"만화이":72,"농산물":130,"테니스":103,"프트 ":483,"구약성":69,"국에는":66,"국어로":84,"피언십":167,"모바일":151,"주와 ":707,"현동 ":88,"해안 ":100,"기구이":161,"준에 ":58,"증가하":70,"초점을":59,"뿐만 ":208,"퓨팅 ":60,"터에서":152,"중요성":72,"주주의":702,"관으로":1430,"관이다":925,"관이었":166,"탄티노":86,"국사 ":69,"과적으":61,"지냈으":100,"과정이":135,"과정을":292,"높이는":372,"과정에":270,"중앙행":163,"중앙회":62,"하여 ":10358,"학에 ":189,"누르고":122,"준정부":80,"하에 ":228,"관장하":182,"명칭으":130,"명칭은":335,"명칭을":111,"명칭이":208,"핵심 ":64,"군사 ":206,"중의원":106,"광역시":1449,"구성 ":124,"해시 ":75,"구밀도":162,"호가 ":138,"행성 ":102,"지니고":81,"투갈 ":67,"지로 ":816,"중요하":63,"중요한":537,"츠가 ":73,"국무총":119,"하위 ":107,"국민당":74,"텍사스":130,"학원 ":257,"네트워":601,"태풍 ":215,"구소 ":210,"주지방":89,"농어촌":104,"중이다":171,"학생들":151,"국민경":58,"지만 ":2429,"지마 ":65,"농업인":62,"지막 ":351,"한불교":58,"퓨터 ":987,"뉜다 ":141,"견한 ":100,"항상 ":79,"지리 ":83,"광주 ":85,"목으로":232,"공으로":88,"목이다":93,"피소드":110,"공원이":117,"질로 ":92,"지대에":91,"함수 ":148,"토미 ":72,"고지로":66,"교수 ":144,"지를 ":1014,"곳으로":142,"공용어":70,"촉진하":68,"결한 ":67,"구려의":58,"곳이다":155,"지니아":101,"해서 ":1198,"핀의 ":74,"철학자":224,"목적이":96,"목적으":2374,"목적은":73,"목적을":89,"관에서":127,"메트로":85,"타크래":128,"무부 ":116,"고쿠 ":157,"털이 ":79,"문》 ":124,"지되었":235,"관위는":85,"지면 ":61,"지며 ":169,"과이다":71,"톨릭 ":174,"천황의":64,"공유하":89,"지도자":328,"통령 ":409,"국립공":185,"턴의 ":58,"학명은":70,"모야마":65,"니가 ":122,"족행위":60,"진다 ":599,"출력 ":84,"물리 ":92,"결하는":245,"지도 ":237,"구분하":130,"지구에":121,"함마드":68,"근대 ":136,"노프 ":58,"곡한 ":151,"지금까":89,"기계 ":100,"토리 ":100,"기고 ":87,"교적 ":222,"현과 ":107,"교수로":84,"학문이":118,"학문적":57,"지관리":66,"기관 ":423,"지닌 ":149,"근거하":73,"존하는":140,"하므로":81,"한민국":7350,"피아 ":143,"터의 ":386,"문대학":65,"족하였":143,"직공무":113,"노카미":82,"누스 ":182,"합류하":76,"기구 ":245,"주어지":59,"주어진":139,"교수이":104,"지금의":259,"토니아":73,"늘날 ":214,"멀티미":78,"학술 ":98,"합리적":59,"지금은":121,"눅스 ":72,"헝가리":248,"토르 ":81,"픽에 ":123,"토를 ":102,"키텍처":131,"천황 ":220,"하버드":67,"한반도":215,"터와 ":146,"학습 ":59,"문명 ":78,"지기도":75,"구에 ":582,"국어 ":241,"·철학":275,"지구의":118,"모양을":68,"모양으":92,"테스 ":76,"중심부":67,"주었다":103,"모양이":89,"모양의":161,"주에서":249,"기기 ":100,"구역 ":205,"국에 ":586,"토대로":59,"목에서":71,"중심지":446,"직된 ":71,"촉진 ":60,"중심의":73,"중심으":672,"중심이":78,"한민족":78,"주의는":98,"줄여서":375,"계획 ":143,"주이다":406,"뉴기니":78,"무로 ":104,"모스크":232,"프스키":67,"주의를":168,"축물 ":62,"피스 ":81,"교에 ":115,"국방부":74,"터에 ":207,"종합적":78,"국민에":61,"권력을":67,"주이며":69,"허가된":536,"주요한":65,"토로 ":68,"국문화":83,"해발 ":137,"명으로":376,"해방 ":60,"집단 ":101,"명이다":422,"행되고":89,"국방송":84,"국민의":235,"행된다":79,"행되는":98,"명이며":67,"모습을":123,"교와 ":209,"주인공":208,"토너먼":71,"국불교":141,"중앙아":113,"무를 ":833,"중앙정":74,"하르트":89,"지냈다":412,"행동을":107,"권리를":141,"교사상":96,"행되었":179,"중에서":493,"주장한":106,"주장하":234,"구분된":68,"구분되":71,"기간 ":297,"주의에":98,"교육 ":543,"기가 ":609,"주의와":98,"준이다":67,"준으로":442,"통과하":60,"구별하":80,"주의자":203,"주의인":510,"그는 ":921,"주의적":180,"광주광":201,"하면서":761,"주의의":142,"물론 ":117,"물로 ":459,"무대신":61,"학살 ":67,"교의 ":1022,"명칭 ":96,"중에는":97,"그것은":65,"그것을":99,"주제로":77,"학생 ":143,"근거로":57,"丘乙 ":101,"실제로":191,"문을 ":285,"문은 ":97,"丙之 ":191,"현악단":131,"념물 ":77,"전한 ":445,"구를 ":601,"전할 ":64,"문서를":72,"三倉 ":159,"초에 ":110,"과사전":636,"챔피언":309,"丘之 ":172,"무장 ":114,"승팀 ":85,"경제자":60,"丘亞 ":97,"경제의":97,"경제적":182,"군대를":57,"三丁三":180,"三丁丁":115,"템이다":123,"무소는":58,"丘亂 ":62,"무의 ":132,"문서는":80,"화성 ":63,"스케이":155,"중기의":177,"조정 ":98,"젝트 ":155,"물을 ":636,"三並亞":103,"물은 ":154,"三並亂":75,"三並並":197,"화상 ":61,"三並之":157,"三並三":347,"三並丁":93,"확립하":58,"체코 ":66,"공작 ":150,"러나 ":375,"토에 ":57,"三三並":435,"三三之":337,"三三亂":104,"三三亞":275,"절한 ":64,"三三丘":91,"三三三":746,"三三丁":204,"三丁並":115,"三丁之":138,"정치가":329,"공이 ":75,"경제부":95,"三丘三":78,"추구하":83,"三三倉":73,"문서에":61,"무소장":62,"통신 ":286,"문이 ":99,"공을 ":239,"계이다":139,"스코틀":228,"문서이":58,"문자 ":261,"시코 ":197,"공의 ":248,"래되었":95,"三之並":215,"三之之":161,"문의 ":316,"三之亞":63,"공유 ":71,"三三國":62,"관에 ":201,"심으로":693,"시켜 ":262,"三之丁":129,"三之三":285,"공산주":198,"십자군":103,"화산 ":89,"졌으나":61,"과와 ":58,"공사의":74,"곳의 ":66,"못한 ":89,"노래 ":87,"조선민":527,"곳을 ":106,"곳은 ":97,"회를 ":455,"丁倉 ":81,"중국어":136,"스타리":76,"중국에":167,"시즌이":135,"형성하":139,"경쟁력":153,"명하였":122,"전》 ":217,"뜻의 ":103,"몬테네":93,"공원 ":236,"래밍 ":212,"국령 ":69,"협정 ":78,"구로 ":407,"해하는":84,"스타디":113,"뜻은 ":77,"뜻을 ":186,"스크바":214,"과에 ":283,"조사연":59,"무와 ":69,"스크리":94,"스크립":98,"硏之":94,"문에 ":1313,"결정하":131,"구려 ":72,"라루스":57,"丁並之":61,"정책을":139,"丁並並":65,"랜드의":362,"스토니":68,"정책의":92,"丁並三":108,"정책이":65,"정책에":70,"랜드에":119,"형의 ":232,"뮤니티":87,"존의 ":270,"스테르":112,"남쪽으":574,"남쪽은":77,"丁三亞":60,"丁三之":151,"경으로":205,"丁三並":143,"丁三三":197,"공산당":201,"丁三丁":74,"계열의":107,"스터미":113,"丁丁之":67,"丁丁三":77,"丁丁丁":59,"광범위":81,"합회는":84,"조선총":95,"스크톱":69,"스타일":126,"스타인":141,"형이 ":118,"스탄의":136,"냈으며":111,"남쪽에":112,"족의 ":403,"경우에":367,"뮤니케":86,"丘並 ":120,"조선시":218,"丘丘 ":120,"겨진다":96,"족을 ":151,"족은 ":109,"중국인":65,"중국의":313,"조의 ":556,"丘三 ":214,"丁之之":73,"중교통":58,"족이 ":130,"조사하":74,"형을 ":227,"丘丁 ":189,"조선의":232,"조선인":72,"丁之並":120,"丁之三":108,"출된 ":86,"조선왕":60,"조선에":59,"형은 ":84,"무원 ":354,"계에서":550,"물에 ":244,"나중에":137,"내에서":352,"정할 ":104,"스템에":108,"제품 ":138,"스테이":248,"형성된":99,"형성되":118,"並亞 ":397,"정한 ":1015,"태풍으":59,"태풍이":71,"공연 ":73,"경우를":91,"통상 ":58,"스템의":137,"스템이":178,"스템을":165,"스템은":65,"계약을":87,"정치학":71,"고종 ":91,"민국 ":4370,"터키 ":238,"중남부":58,"고슬라":66,"합하여":263,"럽과 ":63,"명하고":104,"시킨 ":207,"국내에":69,"시킬 ":95,"스타크":130,"곳에 ":630,"럽게 ":67,"종사하":78,"남이다":68,"丘三三":90,"주니어":77,"丘三之":57,"丘三並":57,"회로 ":295,"함하여":108,"경우는":71,"국내외":132,"스탄티":147,"접해 ":411,"경우도":213,"스토리":150,"三倉三":63,"종이 ":160,"슷한 ":237,"공업 ":91,"전투는":153,"심지는":190,"제프 ":99,"템에서":59,"경찰 ":105,"초의 ":683,"명하며":58,"호수 ":60,"경우가":270,"전투기":110,"고전 ":139,"스튜디":195,"결정되":69,"실질적":115,"났으며":113,"함한다":221,"고속버":59,"란드에":64,"란드어":81,"주도로":122,"전통문":78,"투는 ":161,"무에 ":90,"테이션":199,"명하다":316,"투갈어":68,"주도는":958,"명하는":145,"란드의":272,"주를 ":291,"래로 ":181,"전히 ":176,"테인먼":266,"함하는":169,"경에서":129,"테이블":95,"三丘驿":62,"좋은 ":139,"무역 ":89,"三亞三":128,"三亞並":101,"三亞之":123,"내용은":87,"내용을":238,"내용으":63,"스파냐":94,"내용의":62,"내용이":162,"곡을 ":163,"곡은 ":123,"좁은 ":114,"함하고":175,"고속도":557,"주로 ":2407,"고의 ":280,"철학 ":391,"고자 ":296,"곡이 ":62,"전화 ":176,"형에 ":66,"곡의 ":107,"심지이":98,"험이다":74,"람들이":226,"람들의":90,"검찰청":131,"람들은":61,"람들을":90,"고유 ":58,"민간 ":90,"람들에":76,"현재 ":1569,"스트로":176,"並丁 ":256,"스트를":69,"並三 ":685,"렇게 ":88,"것처럼":82,"모할 ":63,"스트리":632,"졌으며":172,"학회는":214,"스트라":255,"전혀 ":90,"스트레":553,"並並 ":551,"종의 ":525,"호세 ":58,"ος ":121,"교류 ":167,"접한 ":137,"공부하":58,"스티안":65,"並丘 ":162,"종을 ":142,"현이 ":70,"종은 ":81,"스파이":76,"호선 ":244,"정치적":249,"민과 ":61,"並之 ":482,"조지 ":208,"조직 ":160,"정치인":881,"교를 ":330,"미국 ":2327,"통문화":79,"현의 ":217,"효과적":73,"스티븐":77,"할한다":328,"현을 ":152,"할하는":82,"並乙 ":122,"스티벌":59,"스파르":60,"행형 ":62,"현상이":157,"래를 ":142,"현상을":242,"텐츠 ":92,"並亂 ":230,"스트이":111,"스트의":82,"협약 ":74,"실시하":164,"제작자":87,"권리 ":62,"문신 ":143,"제자유":65,"경찰청":120,"곳에서":166,"스템 ":494,"노동자":160,"스텔 ":62,"노동조":80,"주당 ":171,"천주교":125,"격한 ":87,"톨리아":98,"효과를":107,"라면 ":101,"관악구":58,"재하였":61,"널은 ":82,"스톤 ":59,"뛰어난":130,"널을 ":66,"호사 ":85,"재하지":105,"통령을":141,"통령의":87,"통령으":73,"통령이":192,"제적인":115,"제적으":88,"그가 ":220,"제작하":177,"제작한":315,"라데시":106,"준다 ":198,"라마 ":280,"주광역":200,"러가 ":71,"미가 ":133,"초명은":71,"경찰서":86,"거하여":108,"제주도":96,"교사 ":69,"제조업":81,"전통 ":182,"넓은 ":249,"행한 ":180,"주도 ":249,"행할 ":135,"정이었":81,"장하며":59,"시아의":577,"시아와":80,"시아어":195,"넣어 ":72,"장했다":79,"시아에":205,"的 ":81,"접전을":65,"나타나":260,"나타난":144,"접적으":61,"나타내":437,"나타낸":182,"곡이다":130,"호르몬":66,"시에는":136,"라디미":108,"곡으로":120,"고있다":120,"제이다":258,"스트 ":1046,"체육관":1157,"현에 ":78,"홍보 ":87,"라디오":334,"재하고":117,"제외한":225,"정의한":63,"정의하":162,"제작되":129,"제작된":176,"장하였":134,"제외하":103,"장하여":85,"고유의":69,"정치 ":296,"식에서":64,"스틱 ":69,"교류를":138,"정적인":117,"재하는":271,"태평양":418,"재한다":193,"시에서":270,"체에서":108,"주는 ":678,"토스 ":122,"활동하":431,"활동한":114,"재했던":158,"고이다":107,"관심을":87,"종목에":75,"너지 ":209,"총리대":58,"청으로":59,"총리를":116,"제정되":152,"제정된":104,"국민 ":136,"제임스":214,"활동했":136,"후기 ":150,"죄를 ":91,"무선 ":97,"해협 ":57,"물리적":166,"시외버":130,"족에 ":107,"三之 ":901,"고용노":84,"三乙 ":170,"三乘 ":99,"조에 ":202,"시청 ":67,"줄기는":57,"활동을":468,"형식 ":59,"화물 ":67,"활동에":80,"총선 ":59,"三三 ":1345,"투기 ":72,"네스 ":139,"라로 ":118,"템의 ":139,"三丁 ":581,"고위공":230,"활동이":95,"조와 ":117,"토론토":96,"물리학":446,"전체의":66,"三丘 ":326,"시외전":76,"템은 ":65,"템을 ":168,"문서 ":147,"슈타인":136,"톨레마":65,"무소 ":98,"최대의":181,"三並 ":896,"三人 ":61,"화되어":83,"래된 ":286,"형문화":146,"텍스트":85,"져있다":81,"화되었":81,"쳐진 ":60,"고양시":85,"공식적":231,"시이다":491,"테스트":133,"활동과":61,"식으로":605,"식이다":352,"三亂 ":228,"고양이":81,"시작된":195,"시작되":382,"터이다":170,"三亞 ":422,"체이다":555,"청장은":97,"시이며":121,"스카르":62,"명한 ":534,"신으로":438,"년마다":62,"丁丹 ":61,"피해자":88,"관의 ":2170,"丁並 ":336,"丁乙 ":118,"주관하":162,"체제의":76,"관인 ":84,"제정한":62,"관이 ":275,"丁之 ":446,"신이다":266,"체제에":91,"공주 ":71,"시인이":151,"추기경":60,"과정 ":135,"궤도 ":71,"토마스":59,"최되었":107,"시장에":144,"제주시":75,"전투 ":320,"토리아":108,"丁丘 ":202,"라를 ":184,"주된 ":92,"시작으":155,"丁丁 ":409,"신이며":66,"丁三 ":412,"노동당":62,"丁人 ":68,"조약 ":210,"확률 ":66,"최되는":68,"경제학":303,"화를 ":1060,"모토 ":158,"과의 ":595,"시장을":66,"시장이":77,"체제를":101,"스카이":59,"회는 ":1632,"랜드 ":1148,"톨릭교":272,"구대회":70,"국립 ":152,"청주시":71,"계적인":187,"계적으":250,"시작하":474,"시작한":212,"시작해":80,"시작했":235,"경주시":80,"축구단":96,"시적으":65,"체적인":105,"丁亂 ":154,"체적으":123,"노동부":61,"신이자":75,"국대학":78,"식적으":162,"전체를":82,"제주특":160,"丁亞 ":110,"논리 ":71,"관을 ":436,"식적인":111,"피해를":133,"관은 ":726,"토머스":70,"大之 ":76,"중부 ":251,"공룡이":97,"제하는":65,"정해진":101,"개편되":114,"국내 ":265,"트가 ":377,"관련된":425,"건전한":206,"권과 ":96,"걸친 ":83,"경영 ":97,"大丘 ":63,"之之三":178,"之之丁":97,"之之並":118,"之之之":104,"네스코":91,"네시아":285,"험의 ":58,"정하여":167,"정하였":105,"험을 ":187,"구나 ":75,"고속 ":76,"경에 ":256,"발된 ":162,"결을 ":132,"험이 ":59,"정하지":57,"미로 ":169,"제품이":89,"제품을":86,"화로 ":269,"之丁並":63,"之丁之":91,"之丁三":123,"之丁丁":66,"량과 ":57,"나폴레":142,"거주하":225,"결정 ":73,"결성한":67,"경상남":479,"것이며":93,"최고의":169,"大亞 ":63,"乙亞 ":60,"조직의":65,"조직으":61,"조직을":68,"조직이":140,"것이라":157,"계약 ":89,"최대 ":229,"체의 ":618,"之並之":139,"之並並":158,"之並三":216,"之並丁":69,"之並亞":199,"함한 ":296,"之丙之":157,"논란이":57,"발견한":90,"발견하":88,"체인 ":128,"게오르":73,"之三丘":62,"것이다":1291,"之三三":426,"之三丁":121,"之三之":190,"之三並":196,"합한 ":160,"것으로":1506,"之三亞":92,"래는 ":286,"정하고":231,"격이 ":107,"것에서":61,"체제 ":143,"乙之 ":116,"정하기":94,"乙丘 ":89,"정하는":363,"정한다":104,"격의 ":76,"조지아":231,"반도 ":306,"乙並 ":132,"건축 ":91,"접한다":355,"검역소":69,"종으로":458,"미널이":72,"가하였":118,"가하여":102,"격을 ":325,"乙乙 ":66,"종이다":450,"라남도":362,"之倉 ":121,"체적 ":96,"추고 ":107,"접하며":306,"해체되":61,"향으로":228,"접하고":189,"노르웨":212,"문제에":105,"처에서":63,"전형적":62,"결성되":75,"결성된":112,"축구 ":742,"최근에":153,"족주의":88,"학회 ":138,"조직된":71,"년에 ":3322,"乙丁 ":151,"乙三 ":190,"문제로":78,"최된 ":172,"받는 ":236,"넷을 ":59,"견을 ":57,"문제를":243,"발견되":256,"발견된":167,"교관이":68,"반대 ":73,"거이다":64,"네의 ":71,"가하고":96,"채택되":89,"라도 ":249,"협력을":97,"물이나":96,"네상스":84,"문자를":85,"계승 ":114,"바다 ":113,"주민들":79,"통신사":68,"물이다":498,"문제가":87,"노벨 ":164,"걸쳐 ":536,"효과 ":100,"고리즘":197,"가했다":70,"건이다":462,"교는 ":208,"현상 ":62,"大並三":80,"건으로":192,"주소 ":90,"통신망":69,"했지만":129,"빌헬름":85,"험에 ":74,"나톨리":87,"체스터":79,"大亞之":73,"문이다":346,"정확하":95,"정확한":116,"문으로":196,"가하는":151,"식품부":594,"고려의":79,"무장관":67,"무원이":83,"무원으":224,"행정부":79,"나파소":72,"처음에":146,"천으로":103,"라드 ":121,"네수엘":63,"채택하":69,"가포르":89,"증권 ":60,"시하고":125,"행정동":192,"물에서":66,"뮬레이":141,"년부터":1634,"군과 ":241,"고려시":129,"거에서":68,"행정기":403,"비행기":106,"국군 ":60,"노래이":97,"제하여":61,"터페이":229,"노래를":60,"호를 ":436,"제학자":107,"미디 ":96,"격에 ":63,"거쳐 ":338,"랐다 ":84,"란드 ":706,"죄의 ":57,"못하는":67,"향악단":90,"문에서":86,"행정안":64,"처음으":371,"구글 ":68,"한편 ":73,"라노 ":78,"터키의":77,"민공화":988,"화된 ":280,"민국의":2300,"기》 ":71,"민국에":562,"했으나":251,"스페인":683,"스페이":72,"해진다":88,"행위를":267,"통을 ":190,"교로 ":167,"통은 ":64,"해지는":107,"못하고":66,"네이버":142,"했으며":734,"행으로":73,"통이 ":84,"스펙트":61,"통의 ":259,"고에 ":101,"주민 ":65,"고와 ":63,"並亞三":75,"並亞並":57,"시티 ":116,"학파 ":61,"미국의":1693,"미국이":72,"미국인":172,"노스 ":128,"계승하":58,"농림수":538,"미국에":434,"활동 ":273,"행위에":64,"건축가":79,"스페란":59,"형법 ":64,"호로 ":311,"並三國":84,"건축물":157,"투로 ":104,"並之之":99,"격으로":151,"並之並":167,"並之三":173,"並之丁":59,"농림축":109,"행위이":57,"행정구":264,"並並之":99,"並並亞":75,"並並並":118,"並並三":168,"전투에":151,"並丁三":78,"並丁並":60,"並丁之":93,"계적 ":102,"전투이":155,"강화하":64,"並三丁":85,"초를 ":88,"並三三":352,"並三並":166,"구기관":173,"並三之":182,"확대 ":72,"並三亞":95,"丹之 ":65,"스피드":93,"주도하":91,"화교류":59,"시칠리":59,"식품 ":76,"처음 ":669,"처의 ":89,"丹丁 ":93,"민경제":59,"국과학":73,"라는 ":3491,"민당 ":69,"조약으":67,"경주 ":115,"조약이":138,"민국과":113,"국도 ":142,"라고도":1545,"환경에":149,"並倉 ":90,"해주는":74,"미니 ":62,"통일 ":78,"전통적":268,"전투를":62,"종종 ":128,"환경을":119,"전투로":106,"환경의":58,"비해 ":202,"스포츠":528,"국기는":74,"싱턴 ":94,"터테인":265,"화국에":206,"제학 ":83,"제한 ":74,"천을 ":85,"주도이":258,"회관 ":166,"공식 ":315,"천의 ":86,"공신 ":59,"스프레":81,"화국이":115,"화국의":592,"쳐서 ":141,"스플레":121,"투를 ":79,"민간인":66,"방과 ":134,"착한 ":79,"시카고":77,"물질 ":102,"미국과":130,"논리학":71,"시한 ":262,"三國 ":87,"향이 ":94,"모하고":162,"구는 ":2663,"관리를":117,"국가의":292,"국가이":133,"년을 ":193,"경쟁 ":61,"향의 ":74,"협동조":95,"합중국":79,"국가와":63,"전하였":96,"년이 ":104,"노선 ":97,"하키 ":77,"모하기":104,"래가 ":59,"향을 ":645,"년의 ":319,"식한 ":96,"존재했":183,"존재하":508,"존재한":228,"헤르체":107,"헌법에":75,"관리소":120,"홈구장":160,"화가이":84,"관리사":94,"게임은":64,"게임을":126,"게임으":157,"게임이":455,"게임의":112,"게임에":95,"미는 ":124,"계속되":62,"고비나":106,"시코의":88,"계의 ":803,"경제 ":436,"철의 ":57,"전해지":88,"청에 ":72,"구단 ":75,"목표로":175,"회가 ":352,"게이트":71,"공모함":59,"전했다":59,"之亂 ":185,"스피어":79,"관리자":123,"논리적":58,"널이다":102,"발굴 ":71,"시켰다":116,"화는 ":234,"공무원":718,"之亞 ":205,"관리청":88,"녀의 ":96,"之之 ":481,"화국과":90,"토의 ":187,"국경을":303,"해졌다":113,"스하키":63,"관리하":241,"시픽 ":95,"문신이":114,"구관으":65,"之乙 ":132,"구광역":218,"체에 ":291,"향상과":135,"받고 ":270,"관련이":94,"之丁 ":427,"경우 ":678,"주변 ":114,"계에 ":486,"之三 ":666,"시키는":366,"항으로":97,"시킨다":73,"슷하게":59,"하지만":557,"之丘 ":282,"之且 ":96,"것이었":89,"계열 ":117,"실행 ":66,"졸업하":144,"之並 ":569,"너지를":107,"발과 ":93,"고스 ":58,"국가가":96,"경상북":406,"국가기":78,"노미야":60,"신호 ":86,"전하는":152,"국가대":143,"항이다":64,"개편하":94,"계와 ":138,"초등학":97,"전하고":83,"전하게":65,"관련하":58,"관련한":66,"국가들":151,"무실은":1845,"므로 ":457,"즈가 ":125,"념을 ":152,"실험 ":62,"시험 ":132,"게임기":80,"총리 ":123,"물의 ":727,"경을 ":557,"국가로":150,"경은 ":67,"물이 ":315,"청은 ":173,"조이다":111,"국가를":123,"청소년":337,"체와 ":151,"구개발":124,"관료이":65,"경의 ":225,"국가보":77,"비히 ":100,"받게 ":57,"향상에":76,"공사 ":223,"경이 ":113,"청이 ":90,"통에 ":62,"총독부":113,"문제 ":176,"향상을":130,"무슬림":67,"시키기":187,"청의 ":110,"빅토리":101,"족으로":212,"결승전":133,"시키고":183,"미널 ":87,"구간이":72,"광부 ":1095,"족이다":137,"국가에":207,"향상시":75},"n_words":[4985886,5984351,3260836],"name":"ko"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/lt b/nlp_resource_data/langdetect/profiles/lt

new file mode 100755 (executable)

index 0000000..3f3e052
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/lt
@@ -0,0 +1 @@
+{"freq":{"ūdu":237,"ūdo":102,"ūdi":671,"ūda":331,"D":8684,"E":5566,"F":3594,"G":7812,"A":19441,"B":11003,"C":5201,"L":15923,"M":12432,"N":7515,"O":2898,"H":3427,"I":6943,"J":8588,"K":17851,"U":3005,"T":11425,"W":951,"V":11358,"Q":106,"P":20146,"S":16914,"R":10122,"Y":441,"X":1264,"Z":1079,"f":17928,"g":102928,"d":143421,"e":363311,"b":73275,"c":39025,"Fed":229,"a":700709,"n":341806,"o":414082,"l":214716,"m":192290,"j":140065,"k":242084,"h":15703,"i":803156,"w":1450,"v":137211,"Fer":122,"u":266546,"t":305542,"s":495974,"r":352720,"q":282,"p":146221,"z":20490,"y":92511,"x":937,"ūgn":67,"ūgi":136,"":175,"²":445,"Fil":257,"Fin":83,"í":134,"é":265,"ä":96,"á":197,"à":65,"ü":86,"ú":64,"ö":70,"ó":158,"ė":94567,"ę":9237,"ā":164,"ą":20880,"Č":1561,"č":23545,"ī":70,"Į":732,"į":24134,"ō":147,"ų":90375,"Ž":2451,"ž":44691,"Š":7077,"š":70424,"Ū":76,"ū":25506,"Fal":71,"Far":66,"ūbr":365,"Fab":73,"Eri":121,"Est":230,"Eti":120,"Eur":1489,"Ekv":92,"Ele":243,"Eko":63,"μ":79,"ν":128,"ο":198,"ι":119,"κ":65,"λ":109,"ε":95,"α":204,"Ent":76,"σ":88,"ς":168,"ρ":114,"π":65,"τ":94,"ь":90," l":18168," m":39544," n":26364,"я":147," o":7927," h":2325," i":50039," j":10300,"ы":100," k":69096," d":33767," e":11412,"ц":67," f":7201,"ч":71," g":24899,"р":400,"с":408," a":48847,"т":349," b":21159,"у":184," c":5176," y":9991," x":101," z":358," u":9812," t":37056," w":140," v":43820," p":74653," s":59667," r":27511," J":8568," K":17825," H":3382,"Gel":228," I":6930," N":7489," O":2834,"Geg":68," L":15878," M":12380," B":10949," C":5111,"Ged":147,"С":93," A":19385," F":3572," G":7762," D":8642," E":5548,"л":327,"к":381," Z":1072,"й":180," Y":439,"и":579," X":1253,"п":122,"о":721,"н":471,"м":156,"г":113," S":16766,"в":327,"Ger":157," R":10064,"б":111," Q":106,"а":757," P":20081,"з":104,"Geo":114," W":927,"Gen":295," V":11305,"е":519," U":2992,"д":172," T":11383," ą":118," č":811," Č":1561," ė":303,"Gla":64,"Gib":75,"Gin":94,"Gil":90,"Gir":131," ž":11466," Ž":2446," ų":74," Ū":76," ū":618," Š":7067," š":19822," Į":722," į":17189,"šų ":222,"Gan":127,"Gal":319,"Gam":189,"Gaj":89,"Gau":103,"Gar":244,"Gai":76,"Gab":77,"ي":99,"ل":106,"م":65,"ن":73,"ب":63,"ا":149,"ر":65,"šūn":177,"Flo":97,"Fra":186,"Fri":67,"Fre":105,"A ":1074,"For":525,"F ":250,"Da":2187,"Cu":139,"Cy":71,"Cl":158,"Co":831,"Cr":212,"Ce":857,"Ch":1267,"Ci":314,"G ":220,"Ed":144,"Dv":187,"Du":605,"Dz":157,"Dy":151,"Do":667,"Dn":85,"Dr":666,"De":900,"Di":2059,"Fe":543,"H ":197,"Fa":428,"Eu":1613,"Ev":98,"Ex":73,"Er":363,"Et":189,"Es":386,"En":323,"Em":143,"Ep":87,"Ei":125,"El":573,"Ek":234,"Eg":301,"Ge":1133,"Ga":1414,"I ":1579,"Fu":220,"Fr":454,"Fo":751,"Fl":218,"Fi":559,"B ":521," С":93,"II ":866,"C ":1065,"Av":215,"Au":2682,"Ar":1944,"At":910,"As":631,"D ":380,"Ba":3819,"Az":731,"Af":806,"Ag":282,"Ab":409,"Ac":198,"Ad":552,"Am":1804,"An":2621,"Ap":788,"Ai":520,"Aj":72,"Ak":583,"Al":1985,"Bu":1123,"Br":1448,"Ca":774,"E ":242,"Bi":838,"Be":1548,"Bo":1206,"Bl":293,"Him":100,"Hip":99,"Kv":191,"Ku":1466,"Ky":76,"Kn":86,"Kl":830,"Kr":1957,"Ko":2668,"Le":2735,"Li":7587,"N ":208,"La":2652,"Lu":489,"Ly":250,"Lo":724,"Dū":73,"Me":1902,"Dž":420,"Mi":1792,"O ":471,"Ma":4741,"My":151,"Mu":533,"Mo":1998,"Ni":720,"Ež":273,"Ne":1803,"Na":1866,"P ":639,"Hel":151,"Ny":218,"Nr":561,"Nu":901,"No":779,"Ok":221,"Ol":265,"Om":89,"On":141,"Ją":106,"Oc":104,"Od":117,"Of":81,"Hen":73,"Ob":214,"Her":326,"Gi":594,"Gl":239,"Gr":2248,"Go":422,"Gu":450,"Gv":487,"Gy":327,"J ":126,"Ha":879,"He":851,"Hi":517,"Ho":543,"Hu":227,"Hy":110,"K ":521,"Ib":64,"Id":102,"ा":68,"Ig":304,"Im":227,"In":1584,"Ik":112,"Il":691,"Aš":111,"्":69,"Iv":75,"Is":624,"It":425,"Ir":401,"Ja":1783,"L ":366,"Dė":112,"Iz":149,"Ji":1102,"Je":435,"Jo":1434,"Bū":168,"Ju":1787,"Hal":105,"Ka":7199,"M ":415,"Han":93,"Ham":121,"Har":160,"Ki":1470,"Hav":88,"Ke":841,"Us":82,"Mū":111,"Ut":266,"Ur":330,"Uo":118,"Up":176,"Un":265,"Uk":462,"Ul":82,"Ug":88,"W ":71,"Ty":98,"Tv":72,"Gyv":237,"Tu":954,"Tr":1269,"To":1061,"Th":580,"Ti":954,"Te":1633,"Ta":3988,"V ":1768,"Sy":113,"St":1651,"Kū":349,"Sv":277,"Su":2454,"Wo":154,"Wi":274,"Wh":63,"Wa":211,"We":128,"Vy":520,"Vo":1251,"Vi":4324,"Vl":71,"X ":603,"Va":2935,"Ve":1368,"Uz":69,"Gva":257,"Gvi":230,"Iš":781,"Jį":73,"Pu":558,"Pr":4041,"Ps":108,"Kė":213,"S ":1115,"Pe":1585,"Gua":94,"ėžt":110,"Pa":7638,"Gud":68,"Pl":1104,"Po":1535,"Pi":2755,"Ph":112,"ėži":636,"Os":252,"Ot":63,"Op":146,"Or":562,"Jė":115,"R ":1502,"Se":1883,"Sc":300,"Si":1578,"Sh":180,"Sn":82,"Sm":286,"Sl":299,"Sk":1024,"Sr":63,"Sp":553,"So":1804,"Ru":2114,"Jū":181,"Ry":867,"Mė":156,"U ":260,"Jų":127,"Sa":3106,"Re":2289,"Ri":771,"Ro":1443,"Lė":71,"T ":426,"Ra":1932,"Gre":184,"Gri":532,"Gra":983,"Vė":216,"b ":435,"Už":349,"Gru":297,"Gro":162,"a ":85927,"Sė":67,"Yo":68,"Yr":183,"Z ":96,"Są":373,"Gol":68,"Sū":95,"Tė":65,"Za":514,"Ze":220,"Zi":68,"Zo":74,"Rū":230,"aė":112,"i ":72374,"fy":103,"gd":980,"ge":8770,"ga":24984,"Inf":65,"fl":318,"fg":67,"ff":102,"fi":4585,"fr":1236,"ač":2434,"fu":1782,"ft":398,"fo":4629,"Int":267,"Ins":77,"j ":778,"bę":413,"bė":6516,"gy":5384,"gz":596,"he":3549,"ha":2884,"gn":1795,"gm":391,"gl":2484,"bą":398,"gi":19462,"gh":277,"gv":1056,"gu":4898,"gt":1721,"gs":555,"gr":11904,"gp":278,"go":7511,"du":8502,"dv":2782,"dy":7581,"dz":385,"g ":2674,"ea":3623,"eb":2049,"ec":2366,"ed":9131,"de":15402,"dg":238,"Ilg":538,"di":31560,"dh":99,"dk":208,"dm":1790,"dl":210,"do":13968,"dn":150,"ds":320,"dr":6349,"ew":210,"ex":138,"eu":1022,"ev":4073,"ey":228,"ez":1754,"fa":1478,"h ":675,"Ind":876,"fe":2397,"eh":155,"eg":6969,"ef":1143,"ee":377,"el":20835,"ek":14743,"ej":2777,"ei":27949,"ep":4431,"Imp":87,"eo":2521,"en":55954,"em":13103,"et":36257,"es":24051,"er":37980,"ca":1365,"bz":182,"e ":76709,"by":382,"bs":381,"br":2910,"bu":7186,"bt":432,"bn":67,"bo":6948,"bj":1259,"bl":3193,"bi":8863,"bd":129,"be":9419,"da":31040,"f ":470,"cy":129,"cu":428,"ct":550,"cr":288,"co":1164,"cm":472,"ck":632,"cl":149,"ci":18836,"ch":6035,"ce":6724,"cc":125,"Iki":75,"c ":600,"az":2975,"ay":393,"ba":18685,"d ":4075,"at":26947,"as":91731,"ar":60917,"ax":94,"aw":113,"av":20318,"au":60068,"ak":21549,"al":70175,"ai":71518,"aj":9247,"ao":400,"ap":19804,"am":34880,"an":55907,"ac":11205,"ad":17624,"aa":279,"ab":6312,"ag":11672,"ah":654,"ae":3599,"af":1358,"nu":16862,"nt":40253,"ns":5838,"ič":2386,"nr":387,"np":87,"no":25328,"nn":299,"iė":81,"fų":79,"nz":353,"ny":7794,"nv":1496,"oe":521,"of":2043,"oc":3010,"od":8826,"oa":712,"ob":3847,"om":18799,"on":30734,"ok":15352,"ol":18423,"gš":230,"oi":681,"oj":37495,"ją":2263,"og":8954,"oh":445,"ot":12743,"m²":434,"os":91367,"gū":892,"ov":9611,"ou":825,"op":6292,"oo":622,"or":24858,"jė":931,"gų":1741,"ję":663,"r ":33992,"ox":93,"ow":356,"oz":1228,"oy":145,"pd":423,"pe":8966,"Ign":235,"gž":880,"pg":158,"pa":38606,"pc":152,"pl":7261,"pm":79,"pn":207,"po":11055,"ph":742,"pi":24691,"ką":1329,"pj":353,"pk":273,"lo":17284,"ln":7253,"lm":1772,"ll":1349,"ls":6461,"dū":239,"lp":828,"lv":2437,"lu":5702,"lt":6648,"dų":1603,"lz":63,"ly":8935,"gę":183,"gė":2321,"o ":113444,"md":246,"ma":41541,"mb":3140,"mg":95,"dž":9497,"me":27128,"mf":234,"mk":64,"ml":224,"mi":27912,"eš":4905,"mn":423,"mm":415,"mp":5761,"mo":36514,"mt":1706,"ms":5086,"mu":8277,"gį":95,"mz":185,"my":1666,"hė":71,"p ":5826,"na":39063,"nb":286,"nc":4749,"nd":18751,"ne":20163,"nf":1054,"ež":4639,"ng":14841,"nh":168,"ni":75394,"ią":1524,"nj":255,"nk":20248,"nl":177,"nm":383,"jy":130,"dį":460,"ju":6480,"eč":2839,"bū":3463,"jo":44031,"ki":36060,"kh":180,"kg":90,"ke":8017,"kd":1358,"kc":1366,"ka":55987,"m ":17519,"bų":1576,"ky":6899,"ks":8217,"cū":948,"kt":10687,"ku":20625,"eį":147,"kv":1386,"ko":29827,"kr":12212,"kl":14764,"km":7611,"kn":695,"li":63975,"lh":105,"lk":4623,"lj":376,"gą":598,"le":15356,"ld":6869,"lg":3321,"lf":324,"la":33178,"lc":275,"lb":5099,"fė":82,"cų":73,"n ":4899,"hr":575,"hs":179,"ht":431,"hu":755,"hi":2459,"hn":698,"ho":2226,"hl":151,"hm":207,"id":19607,"ic":4825,"ib":3663,"ia":66700,"ih":166,"ig":7460,"if":2042,"ie":61163,"hy":403,"k ":2911,"cė":232,"ir":43336,"is":76128,"it":24630,"iu":21162,"iv":9558,"ix":156,"aš":6163,"ii":607,"dą":519,"ij":49065,"ik":38607,"il":20049,"im":33709,"in":120546,"io":45938,"ip":8649,"je":37765,"až":6015,"ji":8060,"iz":7107,"dė":5783,"dę":233,"l ":5566,"ja":28492,"są":2757,"xi":80,"pš":499,"sč":2453,"pū":264,"rį":1236,"ww":153,"rę":1878,"z ":658,"rė":10366,"ož":891,"šį ":141,"nž":291,"oš":750,"wi":168,"rą":1320,"wn":67,"wo":98,"rč":361,"ws":118,"vy":6021,"nų":7287,"vz":413,"y ":1490,"wa":302,"we":130,"vl":94,"vk":143,"nš":207,"vi":38809,"mž":705,"vu":2039,"pį":89,"vr":219,"vs":237,"vn":98,"vo":18294,"pė":6097,"mų":3539,"uz":1745,"ux":118,"uv":14825,"ve":15404,"va":45949,"pę":314,"x ":613,"mš":101,"ui":3816,"uj":5784,"pą":197,"uk":12269,"ul":12572,"ue":864,"uf":303,"ėšr":96,"ug":10980,"lž":345,"uh":117,"ur":34041,"pč":97,"us":51555,"mū":487,"ut":11996,"um":9287,"un":10381,"uo":35316,"up":8302,"ty":13326,"lų":4078,"tz":73,"nį":1610,"tu":28297,"tt":447,"tw":77,"tv":4512,"ub":4404,"ua":1611,"ud":10289,"uc":1769,"w ":260,"to":36142,"tn":718,"tm":1109,"tl":1570,"ts":3686,"lū":569,"tr":20341,"oč":849,"tp":243,"tg":277,"tf":256,"te":29101,"tk":518,"tj":104,"lš":275,"ti":64800,"th":1088,"v ":492,"nę":1600,"kų":5621,"nė":25365,"tb":1204,"tc":76,"ta":61331,"su":18269,"sv":2479,"kū":2551,"ss":694,"st":45024,"sy":1283,"sz":67,"sl":4978,"sk":13287,"sn":3503,"sm":4814,"sp":7479,"so":8499,"sr":2349,"nč":4435,"sd":318,"sc":661,"sf":460,"se":19656,"sh":403,"sj":177,"ną":1594,"kš":7202,"si":47811,"rz":154,"jų":7476,"mė":4474,"u ":18088,"mę":301,"sa":23030,"sb":164,"mč":71,"rr":385,"jū":2283,"rs":5538,"rt":15475,"lį":874,"ru":13317,"rv":2004,"ry":13171,"rp":4617,"ro":31644,"rn":3965,"rm":7673,"rl":1174,"rk":4790,"mą":3705,"rj":224,"ri":79105,"rh":115,"rg":5620,"iž":716,"rf":267,"re":18683,"rd":4525,"rc":1831,"rb":9032,"ra":68430,"t ":8619,"lę":789,"ių":36065,"lė":8350,"kį":293,"qu":225,"iū":2482,"lč":267,"lą":1161,"iš":22230,"kė":5197,"s ":278368,"kę":593,"px":100,"py":2379,"pt":3572,"pu":6072,"jį":548,"pv":639,"pp":160,"kč":96,"pr":22301,"ps":4103,"IX ":219,"zę":74,"zė":693,"už":4062,"yč":2372,"vū":491,"vų":1508,"uš":1079,"tž":178,"IV ":137,"tš":139,"sų":1592,"vė":5454,"vę":713,"tų":12691,"tū":4346,"vą":321,"rž":1586,"rų":3959,"sū":452,"vč":186,"Hor":71,"zg":207,"zi":7116,"rš":2441,"zb":85,"zd":1810,"ze":985,"tę":1071,"za":3048,"Hom":79,"yz":303,"Hon":129,"tė":5769,"Hol":87,"zy":69,"rū":3551,"zr":101,"uč":735,"zu":1165,"tį":886,"zo":1897,"zn":153,"zk":67,"zm":1647,"zl":136,"pž":94,"yg":3318,"ye":94,"yc":258,"yd":2201,"ya":257,"yb":11044,"sę":282,"sė":3998,"pų":330,"yv":6788,"sį":230,"yt":11083,"ys":9392,"yr":12733,"yp":1504,"yo":203,"yn":5647,"ym":5531,"yl":2089,"yk":7720,"yj":6842,"tą":2285,"yi":98,"Arg":584,"Are":73,"Arc":130,"Ara":258,"Arm":184,"Ark":155,"Ari":111,"šči":2647,"Apo":92,"Atl":385,"Asu":66,"Ast":264,"Art":193,"Avi":81,"Aut":259,"Aus":646,"Auk":724,"Aug":852,"Ats":131,"yž":529,"Aze":82,"zū":96,"Azi":595,"zų":230,"yš":830,"šė ":158,"šės":100,"šėj":66,"Bag":85,"Bah":87,"Bai":88,"Bak":129,"Bal":1866,"Ban":272,"Bab":93,"Bad":64,"Bar":458,"Bat":90,"Bas":130,"Bau":99,"Abi":168,"Ada":67,"Ach":87,"Adm":203,"Adr":71,"Aga":104,"Afr":719,"šą ":73,"Air":332,"Aka":93,"Akm":194,"Ala":187,"Alb":193,"Alg":65,"Ali":208,"Ale":201,"Alt":143,"Aly":220,"Alk":99,"Alp":174,"Ame":1274,"Ama":241,"Ang":426,"Ank":145,"Ana":165,"And":549,"Any":154,"Ant":913,"Api":205,"Apa":86,"Buv":78,"But":90,"Bul":113,"Bur":231,"Bud":102,"Bue":112,"Bui":103,"Bru":71,"² ":443,"Cac":84,"Cad":64,"Cal":67,"Cam":66,"Car":189,"Can":84,"Cap":73,"Bet":72,"Ber":374,"Ben":312,"Bel":409,"Baž":77,"Bil":78,"Bir":299,"Bla":117,"Bre":103,"Bra":609,"Bro":169,"Bri":429,"Bol":312,"Bon":72,"Bor":174,"Bos":142,"Bot":70,"Dei":68,"Del":74,"Dem":131,"Dep":64,"Deg":63,"Dam":137,"Dan":260,"Dar":403,"Dau":478,"Dab":105,"Dai":135,"Dal":128,"Cho":100,"Chr":185,"Che":286,"Chi":117,"Chu":119,"Cit":67,"Cen":563,"Cer":118,"Cha":378,"Cro":65,"DP ":99,"Cor":127,"Com":218,"Col":154,"Con":133,"アアア":149,"FA ":185,"ón":71,"Dze":98,"Egi":189,"é ":79,"Dni":78,"Daž":276,"ą ":16360,"Dia":66,"Did":1104,"Dis":102,"Dir":98,"Din":76,"Die":275,"Dub":146,"Dun":110,"Dvi":80,"Dus":63,"Dva":68,"Dru":132,"Dri":158,"Dra":241,"Dod":83,"ES ":63,"Don":122,"Dom":99,"Nem":374,"Nev":213,"Neu":81,"Net":114,"ęs":3467,"Ner":231,"Nep":196,"ėz":158,"ę ":5692,"ėc":117,"ėd":1595,"ėl":4129,"ėk":704,"ėn":2911,"ėm":2033,"ėg":1127,"ėj":12119,"ėt":2531,"ės":37968,"ėv":406,"ėp":181,"ėr":671,"čų":69,"Nat":155,"Nau":859,"−":522,"Nig":114,"Niu":165,"ūšy":191,"Nik":109,"ūši":1752,"ėč":63,"Eže":263,"New":68,"ėž":954,"ėš":258,"Nar":128,"Nam":132,"Nag":68,"Nac":163,"Nuo":622,"ąl":250,"ąj":1470,"ąm":118,"ąn":83,"ąs":1224,"ąr":696,"ąv":409,"Nyd":143,"Či":814,"Če":428,"Ča":214,"či":21375,"čk":154,"če":803,"ča":249,"č ":409,"ąž":190,"čo":121,"ču":90,"ė ":26547,"ųjų":1526,"OS ":98,"Nr ":561,"Nov":113,"Nor":386,"Not":108,"čė":116,"įž":184,"Ją ":106,"Oke":81,"Och":90,"Obe":83,"Į ":110,"į ":15271,"Įe":85,"Įk":141,"įg":465,"įl":469,"įk":1035,"įm":672,"įd":140,"įe":122,"Oli":102,"Įs":221,"įr":905,"įp":182,"įv":1510,"įs":2207,"įt":836,"Ori":91,"Org":75,"Oro":67,"Ost":83," −":501,"Po ":75,"Plu":194,"š ":7298,"Plo":267,"Pli":66,"Ple":71,"Pla":439,"Pin":159,"Pil":450,"Paš":81,"Pir":463,"Pie":1313,"Pic":67,"šį":142,"Per":807,"Pet":236,"Pen":132,"Pel":183,"šč":2693,"šą":98,"šė":408,"Pat":292,"Pas":767,"Par":1014,"Pav":466,"Pau":206,"Pag":484,"Pab":107,"Pad":139,"Pan":697,"Pam":122,"Pap":991,"Paj":94,"Pal":1658,"Pak":297,"šg":221,"še":5219,"šd":199,"ša":5331,"šb":74,"šo":2052,"šp":366,"šm":981,"šn":613,"šk":10936,"šl":1712,"ši":16792,"šv":1932,"šu":1367,"št":7877,"šs":1233,"šr":983,"šy":1194,"Še":341,"Ša":886,"Šl":99,"Šo":70,"Ši":3391,"Šk":111,"Jėz":95,"Šu":220,"Št":155,"Šv":1543,"Šr":75,"Prū":117,"Kėd":210,"ō ":76,"Pun":96,"Pus":118,"Pue":76,"Pro":479,"Pri":882,"Pre":234,"Jį ":73,"Pra":2269,"Pod":91,"Pol":415,"Pon":120,"Pot":100,"Pos":67,"Pop":119,"Por":237,"žr":232,"žs":417,"žt":693,"žu":2236,"žn":2332,"žo":2063,"žp":171,"žv":1312,"žy":1527,"žb":100,"že":6838,"žd":1624,"ža":3699,"žk":289,"žm":1670,"žl":158,"ži":16869,"Žm":91,"Žy":116,"Žu":122,"Žv":178,"Ža":482,"Ži":399,"Že":955,"RS ":294,"Išs":132,"Išt":197,"ž ":1080,"žų":376,"Iš ":185,"žį":113,"žą":94,"žė":568,"SA ":111,"ūči":238,"Rad":254,"Rai":78,"Raj":66,"Rag":90,"Ram":470,"Ran":143,"ū ":138,"šų":238,"šū":280,"ūg":553,"ūd":1733,"ūb":446,"ūz":1060,"ūs":1429,"ūt":1461,"ūv":180,"ūp":172,"ūr":9049,"ūk":1535,"ūl":283,"ūm":598,"ūn":4467,"ų ":88718,"ūč":245,"ųj":1568,"ūš":2032,"ūž":93,"Ita":399,"Isp":323,"Isl":135,"įpr":139,"įma":63,"įmo":598,"Ira":230,"įsi":807,"įsk":80,"įkū":191,"įst":1175,"įta":351,"įte":278,"įtr":77,"įtv":117,"įra":525,"įre":301,"įro":74,"Izr":81,"įga":117,"įgy":242,"Dėl":85,"įei":119,"įdu":68,"įku":691,"Jav":80,"Jau":108,"Jas":71,"įla":456,"Jap":569,"Jan":213,"Jam":221,"Jak":178,"Jel":102,"Jer":74,"Jis":502,"Jie":135,"Ji ":425,"あ":83,"Jo ":148,"Įst":114,"Įsi":88,"Jos":270,"Jon":566,"ア":227,"Joh":97,"Būd":72,"Įku":124,"Jug":66,"Jup":99,"Juo":530,"Jur":232,"Juk":63,"Jun":554,"Įei":85,"Kad":98,"Kab":68,"Kai":1116,"Kam":412,"Kal":1098,"Kap":211,"Kan":625,"Kau":985,"Kat":242,"Kas":171,"Kar":1509,"Kaz":340,"国 ":82,"Ker":168,"Ket":93,"Ken":88,"Kel":250,"Kir":183,"Kit":124,"Kin":537,"Kip":63,"Kie":93,"Kil":126,"Kli":79,"Kle":64,"Kla":523,"Klu":106,"Kon":597,"Kom":332,"Kol":450,"Kos":140,"Kor":624,"Kop":80,"Kov":100,"LR ":167,"Kre":575,"Kra":414,"Kri":437,"Kro":173,"Kru":166,"Kry":125,"Kub":74,"Kul":193,"Kun":174,"Kup":188,"Kur":371,"Kva":100,"Les":78,"Lep":69,"Leo":93,"Len":1971,"Lei":123,"Lau":223,"Laz":170,"MC ":504,"Lai":279,"Lat":571,"Lao":67,"Lap":196,"Lam":138,"Lan":215,"Lab":226,"Kuž":106,"La ":159,"Lib":176,"Lia":138,"Lie":6085,"Lig":81,"Lim":100,"Lin":276,"Lit":89,"Liu":198,"Liv":74,"Luk":93,"Lua":75,"Los":103,"Lot":96,"Lon":143,"ūgš":200,"ūs ":568,"ūpi":97,"ūra":1708,"ūse":158,"ūry":183,"ūru":361,"ūro":3121,"ūri":1981,"ūks":471,"ūka":130,"ūki":544,"ūkl":137,"ūma":254,"ūna":1137,"ūmi":118,"ūdž":157,"Lyg":63,"ūnu":353,"ūni":1251,"ūne":67,"ūno":452,"ūrė":579,"ūrę":259,"Mek":335,"Men":159,"Mel":164,"Mes":186,"Mer":401,"Met":158,"Med":222,"Mez":64,"ūzi":829,"Džo":137,"ūrų":466,"Dža":76,"Dži":72,"Dže":84,"ūkš":76,"ūsi":141,"ūną":68,"ūst":200,"ūnė":216,"ūta":74,"ūte":79,"Man":463,"Mal":406,"ūti":914,"Mar":1387,"Mas":288,"Mag":384,"Mad":147,"Maj":79,"Mak":226,"Mah":72,"Mai":142,"ūty":109,"Mac":86,"ūmų":111,"ūvi":114,"Mau":95,"Mat":219,"ūnų":861,"ūrą":276,"Mok":227,"Mol":370,"Mon":426,"Mos":65,"Mor":209,"Mot":313,"Moz":127,"ūzų":144,"NR ":79,"Mik":194,"Mie":142,"Mia":99,"Mic":168,"Mit":95,"Mir":79,"Mis":99,"Mil":131,"Min":395,"ūtų":160,"ūsų":232,"Maž":500,"Mur":145,"Mus":128,"Miš":117,"Sąj":308,"XX ":204,"XV ":71,"кий":66,"Wor":95,"Wil":72,"Win":96,"XI ":73,"War":82,"Vyr":158,"Vyt":150,"Viž":91,"Vok":795,"Vol":265,"Viš":70,"Vis":391,"Vit":100,"čkų":69,"Zar":256,"Zel":141,"之":194,"並":122,"三":326,"丁":115,"Yra":183,"三三":72,"Sve":90,"Sva":109,"Sur":111,"Sus":251,"Suv":802,"Sum":113,"Suk":74,"Sup":79,"Suo":198,"Sun":92,"Sud":274,"Suc":101,"Sub":64,"Str":249,"Stu":148,"Sti":184,"Sto":157,"Sta":653,"Ste":219,"Ten":81,"Tei":320,"Tel":387,"Tek":128,"Tam":147,"Tan":110,"Tar":749,"Tau":535,"Tai":1790,"Tak":96,"Tal":105,"Tad":89,"Ski":160,"Skr":105,"Sku":184,"Ska":362,"Sim":140,"Sil":118,"Sir":117,"Sin":192,"Sid":156,"Sie":159,"Sib":270,"Sic":64,"Ser":315,"Sep":68,"Sen":642,"Sel":68,"Sem":92,"Sei":374,"TV ":98,"Kūn":234,"国":86,"Spa":162,"TS ":71,"Spi":75,"Spe":132,"Spo":74,"Sof":72,"Sok":963,"Soc":156,"Sol":99,"Son":84,"Sos":108,"Sky":69,"Slo":158,"Smi":70,"Sma":84," 三":86,"Ryt":687,"Ryg":105,"Jų ":127,"Jūr":171,"Rus":1573,"Rug":83,"Rud":101,"Rum":121,"Sak":119,"Sam":191,"Sal":563,"Sac":85,"Sco":107,"Sav":191,"Sat":80,"Sau":518,"Sar":175,"San":807,"Mėn":80,"Rač":79,"SI ":77,"Res":1390,"Rio":84,"Rin":77,"Rib":131,"Rie":98,"Ras":159,"Rau":279,"Rec":68,"Red":71,"Rei":156,"Reg":116,"Ren":82,"Rok":173,"Rob":64,"Rod":82,"SR ":568,"Ros":182,"Rom":573,"SS ":87,"čų ":69," 之":78,"SO ":120,"Vai":313,"Vad":65,"Vel":220,"Ven":367,"Vei":167,"ски":73,"Vas":102,"Van":212,"Val":695,"Vak":682,"Var":578,"Vaš":70,"Vid":634,"Vie":542,"Vir":184,"Vil":1774,"Vik":201,"Vin":109,"Ver":344,"Ves":109,"Ukr":322,"Ukm":131,"Uni":208,"Uru":89,"Ura":110,"Ute":210,"Upė":66,"VD ":82,"VI ":165,"Ter":267,"Tet":74,"Tes":113,"The":467,"Tib":113,"Tie":67,"Tik":149,"Til":74,"Tim":101,"Tin":67,"Tir":87,"Tiu":72,"Tor":126,"Tok":232,"Tol":111,"Tom":115,"Ton":98,"Tru":94,"Tro":116,"Tri":330,"Tre":156,"Tra":520,"Tur":537,"Tun":69,"šga":146,"ši ":168,"šel":123,"šer":281,"šei":4330,"ša ":145,"še ":72,"šas":283,"šar":241,"šau":451,"šac":68,"šai":175,"šak":893,"šal":2584,"šam":125,"šan":259,"Švč":176,"Šve":932,"Švi":88,"što":898,"štr":99,"šte":875,"šti":1747,"škų":474,"šta":1289,"šun":139,"šuo":95,"šul":170,"šum":70,"šus":224,"šuj":65,"šuk":112,"štu":1243,"šty":912,"švi":810,"šve":671,"šva":235,"šut":236,"šuv":83,"švy":117,"špa":109,"šką":187,"špl":90,"šon":165,"šor":222,"šos":190,"šiš":128,"škė":457,"šre":107,"šra":138,"šri":577,"šiū":137,"šių":982,"šsi":699,"šsp":63,"šsk":346,"šru":91,"šsa":81,"šmė":100,"šku":531,"šky":77,"ško":2173,"šle":715,"šli":309,"šla":532,"šme":157,"šeš":251,"šmi":172,"šo ":946,"šma":81,"šmo":366,"šni":181,"šią":67,"šne":70,"šna":94,"šny":138,"šno":80,"šoj":128,"šok":166,"šom":149,"šia":7811,"šie":268,"šin":2480,"šio":1156,"šil":344,"šim":653,"šik":111,"šiu":646,"šir":121,"šis":1523,"šdė":102,"ška":2905,"ški":3659,"ške":337,"štų":188,"štė":332,"štį":112,"šyt":666,"šys":236,"šym":121,"štą":94,"ęs ":3114,"ęsi":184,"ęst":139,"bje":1237,"baž":702,"bis":206,"bit":262,"biu":413,"bio":445,"bip":90,"bir":575,"bik":90,"bil":1165,"bim":181,"bin":2312,"bij":590,"bo ":831,"blo":199,"ble":254,"bli":2467,"bla":154,"bod":66,"bok":148,"bol":1634,"boj":747,"bež":91,"biš":132,"bon":209,"bom":266,"bor":294,"bot":239,"bos":2270,"be ":431,"bam":151,"ban":1542,"bak":168,"bal":1952,"bai":1573,"baj":127,"bac":144,"bad":71,"baz":180,"bau":383,"bat":206,"bas":1718,"bar":1644,"bda":77,"bi ":151,"bei":2412,"ber":929,"ben":2915,"bel":329,"bek":139,"bev":245,"bes":726,"bet":875,"bia":875,"bib":123,"ėdo":412,"bie":263,"ėdi":261,"ėda":433,"buč":70,"bzd":176,"− ":502,"ca ":297,"buž":82,"car":324,"cas":107,"cat":74,"can":137,"cac":140,"cal":88,"ce ":305,"bių":787,"bri":867,"bro":454,"bra":835,"bre":106,"bry":134,"bu ":188,"bru":157,"bso":103,"bse":75,"bta":77,"bst":94,"bti":224,"btr":68,"buo":284,"bur":719,"bul":328,"buk":109,"bum":669,"bui":94,"bud":276,"buv":3564,"but":190,"bus":430,"brė":285,"byl":146,"bys":99,"abų":128,"aka":10783,"am ":1782,"ake":779,"akc":437,"aki":1290,"aji":120,"ėza":80,"ajo":4851,"aju":423,"aiz":904,"al ":2864,"adė":694,"aja":2169,"aje":126,"aik":6012,"ail":788,"aim":3610,"ain":2341,"aip":4816,"air":2272,"ais":9853,"ait":1788,"aiv":2833,"ėsč":100,"ak ":92,"aig":1833,"aid":2977,"aib":279,"ahi":72,"aho":88,"ėvi":102,"aj ":639,"abė":174,"agv":331,"agy":177,"ėva":79,"aha":210,"agl":74,"agm":87,"agi":657,"ėnų":1107,"agr":3158,"agu":296,"agn":741,"ago":977,"anu":591,"anz":118,"any":235,"ano":3129,"ann":125,"anm":118,"ant":17439,"ans":1524,"aič":1223,"anr":199,"ane":1374,"ang":3590,"ani":4762,"anj":123,"ank":4229,"ap ":76,"ana":4088,"anc":1441,"and":6903,"amu":630,"ėtų":311,"amt":459,"amy":484,"amz":131,"amm":86,"amo":3569,"amp":1185,"ams":2382,"ami":5086,"adž":530,"ame":5511,"amb":1223,"ama":10174,"ao ":130,"adų":80,"agė":263,"aly":3612,"alv":1945,"alu":1766,"alt":2672,"als":5664,"alp":263,"alo":3517,"aln":4883,"alm":300,"all":289,"alk":1267,"alg":429,"ali":17593,"alc":176,"ald":5080,"ale":3230,"Šan":75,"ala":5633,"alb":4521,"Šal":318,"Šak":157,"an ":1340,"Šau":91,"ėtį":118,"akv":76,"aky":230,"aks":329,"akr":1125,"Šar":99,"aku":724,"akt":1972,"ako":1771,"akn":187,"akm":413,"akl":356,"aba":2740,"abe":207,"abi":1011,"abl":158,"abo":330,"abr":414,"abs":143,"abu":396,"abz":181,"ae ":3025,"aca":72,"ad ":1165,"ac ":144,"ab ":182,"afo":102,"afr":63,"aft":132,"afi":672,"ai ":28957,"aga":3563,"age":494,"afy":94,"aen":65,"ael":136,"aei":134,"afa":115,"ado":1953,"adr":251,"adm":1262,"adi":7918,"ade":803,"ady":144,"adu":285,"adv":445,"ack":137,"aci":7737,"ach":900,"ace":1807,"ada":1767,"act":213,"azm":131,"azo":425,"azi":1049,"arš":379,"azl":78,"auč":212,"arū":95,"atė":313,"aze":155,"azg":73,"aza":391,"azd":314,"ėcė":111,"arų":1990,"arž":560,"apš":64,"asč":85,"arė":673,"arę":105,"atą":166,"ėgo":219,"ėgi":261,"apų":68,"asė":868,"ėga":161,"avų":296,"ėkt":249,"Šta":114,"ėn ":149,"Šv ":293,"ėla":197,"ėli":1870,"auž":306,"ba ":7831,"azė":203,"ėly":383,"ėgė":107,"Šud":88,"ėme":99,"ėdž":122,"ėmi":1028,"ėms":611,"atū":905,"ėja":1671,"ėl ":1089,"ėdė":151,"ėji":1633,"ėje":5476,"atš":113,"asų":302,"ėjo":1675,"avė":225,"avę":277,"ėju":616,"atž":107,"ėki":72,"auš":245,"atų":809,"ėkl":135,"ėkm":115,"alč":210,"alė":817,"at ":1471,"ėpa":71,"arg":513,"aiž":139,"are":1358,"ard":2315,"arc":907,"arb":6774,"ara":6684,"arp":3635,"aro":3645,"arn":1263,"ėgų":151,"arm":594,"arl":418,"ark":2565,"amą":414,"ėję":513,"arj":115,"ari":8416,"alį":375,"aru":2631,"arv":357,"Šie":85,"Šia":1607,"arr":83,"ars":1153,"ajū":107,"art":6116,"au ":2801,"asa":3470,"ary":2268,"ajų":380,"akš":168,"asi":8082,"ash":79,"ase":867,"asd":82,"aso":710,"asn":256,"asp":307,"ask":1323,"aną":115,"asm":1404,"ės ":35616,"asl":491,"ėnu":240,"ėno":201,"aos":78,"agū":453,"ajė":352,"ar ":4730,"ėni":505,"agų":187,"apa":1300,"ėne":311,"ape":335,"ėna":175,"apd":357,"aką":119,"api":6467,"apg":138,"apl":1752,"apk":227,"apo":1183,"apr":1671,"aps":2502,"apt":510,"apu":404,"apv":201,"apy":1793,"akė":76,"as ":68022,"Ši ":192,"aiš":684,"alą":244,"ėny":160,"ėto":359,"ava":4842,"apė":321,"amų":755,"auz":79,"ėty":82,"aut":4104,"Ško":96,"avo":3989,"ėta":441,"avi":8394,"amž":667,"ėti":999,"ave":644,"ay ":151,"Šeš":127,"avy":790,"anų":720,"avu":579,"arč":154,"arą":182,"anž":85,"anė":190,"akų":454,"ata":3198,"asu":274,"ast":4236,"ass":235,"anč":3200,"asy":280,"asv":107,"Šil":407,"atm":389,"atn":97,"atk":211,"atl":931,"ėra":344,"Šip":64,"atr":904,"Šio":192,"ato":2724,"Šim":69,"atp":71,"ėlė":390,"ate":2024,"atf":194,"Šis":287,"Šir":132,"ėri":244,"ati":4429,"atg":129,"Šiu":193,"ath":131,"alų":2328,"ėsn":115,"aub":219,"att":75,"ats":3002,"alū":263,"atv":1480,"atu":978,"ėst":385,"aty":1953,"aul":3204,"aum":246,"aun":1743,"auo":93,"aup":243,"ėmė":214,"aur":9854,"aus":15793,"ėjų":416,"aud":4737,"ėsi":184,"aug":7354,"ėse":1427,"auj":2881,"auk":5802,"ος":93,"ος ":93,"Vėl":110,"ς ":168,"ėči":63,"Rūd":99,"α ":85,"Sūd":82,"アア":188,"ий ":71,"jei":143,"jer":295,"jek":1582,"jen":63,"jet":69,"jev":166,"ji ":3840,"aža":639,"ažd":237,"aže":494,"aži":1113,"ažn":2141,"ažo":516,"ažu":326,"ažy":107,"jad":82,"izė":124,"jas":2376,"jav":90,"jau":1901,"jap":205,"jar":157,"jak":120,"jan":2657,"jam":5082,"jag":120,"jai":1573,"je ":35197,"ažį":104,"ažų":112,"jog":127,"jok":177,"joj":10950,"jon":4897,"jom":502,"jot":90,"jos":20733,"jor":274,"ск":151,"jis":383,"jim":2557,"jin":736,"jie":392,"ст":86,"jo ":6159,"itn":63,"itm":290,"itk":77,"ioč":215,"itr":489,"ito":5645,"itv":127,"inį":1158,"itu":2883,"itt":74,"ity":2401,"ilų":166,"iub":80,"iuc":232,"ilž":195,"iud":174,"iuj":1926,"iuk":566,"iui":902,"isk":1115,"iną":194,"ism":716,"isl":291,"iso":747,"isp":344,"iss":63,"ikū":653,"inč":492,"isu":1429,"ist":10402,"isv":554,"isy":283,"ikų":1702,"inė":20322,"inę":1356,"ita":3101,"ite":2150,"ith":115,"iti":4567,"ivo":225,"inų":1093,"ivy":313,"ivu":89,"irą":66,"inž":95,"ius":6964,"iur":510,"ium":511,"iul":202,"iuo":4507,"iun":199,"imų":1088,"iuz":146,"iut":1006,"iuv":85,"iva":5713,"ipė":458,"ix ":136,"ivi":1601,"ive":1142,"ipr":458,"ipo":522,"ipu":252,"ips":676,"ipt":461,"ipi":440,"iką":480,"igž":771,"ipl":309,"ikę":172,"is ":52601,"ikė":859,"ion":5845,"iop":388,"ior":133,"ios":8346,"igū":296,"iot":445,"iog":457,"iją":1411,"ioj":3109,"iok":271,"iol":667,"iom":700,"iję":103,"ipa":679,"ipe":332,"iov":183,"igų":264,"ioz":68,"ir ":24988,"iru":470,"irv":355,"irs":602,"ijū":154,"irt":3182,"iro":972,"irp":210,"irm":1827,"irn":109,"irk":393,"iri":3779,"imą":1476,"ikš":1311,"isi":2798,"ish":114,"ise":370,"isd":71,"isc":198,"isa":1339,"iu ":2871,"imė":266,"ijų":2456,"iry":540,"ire":616,"irg":245,"irb":684,"ira":1224,"ird":228,"it ":169,"ilė":771,"ilę":379,"ašų":113,"iuš":73,"dėt":870,"dės":969,"itų":1031,"dėj":1386,"dėm":219,"dėn":263,"dėl":1132,"dę ":83,"ivė":295,"isų":290,"ivų":75,"dęs":150,"ja ":13854,"itą":217,"ipų":79,"isė":1814,"isę":115,"isą":156,"irę":81,"irė":261,"irž":446,"irų":76,"izu":821,"itį":88,"irū":86,"izo":451,"izn":83,"izm":1490,"izi":1251,"irš":1690,"izg":80,"ize":116,"izd":791,"iza":1796,"itę":494,"itė":412,"dė ":795,"kaš":89,"kik":107,"kij":2691,"kim":1590,"kil":1486,"kia":4293,"kie":2272,"kiv":63,"kin":5088,"kio":2376,"kip":128,"kir":3853,"kis":920,"kit":2704,"kiu":661,"kaž":63,"km ":5939,"ki ":3521,"kdy":223,"kg ":77,"kea":110,"ked":124,"kei":560,"kel":3590,"ken":372,"kep":138,"kes":439,"ker":661,"ket":958,"ke ":844,"kci":1331,"kda":895,"kdo":201,"kra":4663,"kre":1055,"klė":428,"kt ":364,"kių":2205,"kiš":1820,"klą":309,"ksa":796,"kse":134,"kry":1180,"kmė":162,"ku ":438,"kro":983,"kru":318,"kri":3232,"kov":891,"km²":423,"kot":816,"kos":8611,"kor":653,"kop":332,"koo":136,"kon":3138,"kom":3814,"kol":1241,"kok":423,"koj":2216,"koh":148,"kod":1418,"ks ":271,"kmu":184,"kme":718,"kmi":91,"kny":405,"kni":155,"kią":80,"klu":945,"ko ":5671,"kly":141,"kle":472,"kla":7712,"klo":1474,"kli":2387,"bų ":1576,"jyb":83,"jus":1760,"jun":1716,"juo":1236,"jur":161,"juj":263,"jui":107,"jud":642,"būn":290,"būk":89,"būr":626,"būs":338,"būt":996,"eči":2678,"dį ":451,"ju ":392,"būd":1044,"kaz":236,"kav":389,"kat":1246,"kau":876,"kar":10769,"kas":6028,"kap":1244,"kan":2442,"kal":11413,"kam":1843,"kaj":116,"kak":332,"kai":11174,"kag":106,"kad":1450,"kac":439,"kab":221,"ka ":5403," Ga":1407,"bėt":104,"bės":4168,"bėn":81,"bėm":94," Ge":1126,"bėj":670," I ":269,"bėg":144,"bėc":110," Fo":747," Fu":218,"bę ":399," Fr":454," Fi":557," Fl":215," Ha":877," He":844," Gy":327," J ":93," Go":421," Gr":2243," Gu":448," Gv":487," Gi":589," Gl":237," Ig":304," Id":102," Ib":64,"guž":295," K ":135," Hy":110," Hu":227," Ho":536,"ha ":309," Hi":515," Ji":1102," Je":431," L ":82," Ja":1776," Iz":149," Dė":110," Iv":74," Ir":401," Is":623," It":425," Im":226," In":1579," Aš":111," Ik":112," Il":691,"ham":166," M ":181,"han":534,"hai":104," Ka":7194,"haj":70,"hal":255,"hau":98," Ke":839," Ki":1466,"har":550,"has":169,"hat":88," Jo":1429," Ju":1786," Bū":168,"hab":63,"had":70," La":2647," Le":2732," Li":7563," Kl":830," Kn":86," Ko":2662," Kr":1957," Kv":191," Ku":1464," Ky":76," Ma":4719," O ":103," Mi":1782," Dž":420," Me":1896," Dū":73,"he ":653," Lo":721," Ly":249," Lu":488," Ne":1796,"а ":158," P ":142," Na":1859," Ež":273," Ni":719," Mo":1991," My":150," Mu":530,"hel":177,"hei":83,"hev":142," A ":384,"het":95,"hes":72,"her":871,"heo":356,"hen":157,"hem":647,"hi ":89," B ":160," C ":148," Ap":776," Am":1804," An":2616," Ak":583," Al":1977," Ai":520," Aj":72," Ag":281," Af":804," Ac":198," Ad":550," Ab":408," Ba":3796," D ":97," Az":731," Av":213," Au":2682," At":910," As":629," Ar":1939," Be":1541,"hie":74,"hid":370," Bi":835,"hia":94,"hip":216,"hin":342," Bl":291,"him":102,"hil":142," Bo":1202,"hij":135," Br":1443," Bu":1122,"his":79,"hit":343,"hir":118," E ":80," Ca":760," Ce":856," Ci":314," Ch":1252," Cl":153," Cr":197," Co":818," Cu":130," Cy":71," Da":2178," Di":2044," De":898," Dr":663," Do":658," Dn":85," Dy":151," Dz":157," Du":604," Dv":187," Ed":144," G ":65," El":573," Ek":234," Ei":124," Eg":301," Et":188," Es":385,"hlo":65," Er":362," Ep":87," En":317," Em":142," Ex":72," Eu":1612," Ev":98," Fe":541,"ho ":213,"hma":111," Fa":424," H ":80,"gma":128,"go ":1512,"gme":171," Są":373,"glo":300,"gle":181,"gli":959,"gla":443," Wo":146," Wi":267," We":123," Wa":210,"й ":139," Vy":520," Rū":228,"gog":103," Zo":73," Ze":220,"gno":577," Zi":68," Tė":65,"gni":373," Za":514," Yr":183,"gne":245,"gna":449," Yo":68," Sė":67,"giš":189,"gpj":199," Vė":216,"о ":63,"gr ":135," Sū":95,"goj":625,"gom":579,"gol":244,"gon":466,"gos":3067,"gor":332,"got":295,"gov":127,"gu ":264," a ":1580,"gro":326," Už":349,"gry":429,"gru":2484,"gra":4017,"gių":416,"glė":83,"gri":3118,"gre":877," R ":90," Jė":115,"gty":102,"glų":187,"gto":154," Os":251," Or":560," Op":146," Po":1526,"gui":113," Pl":1099," Pi":2753,"gum":1158," Ph":102,"gul":484," Pe":1581,"gua":121,"gub":128," Pa":7618,"gue":64,"gst":140," Ny":218," Nr":561," Nu":899," No":770," Ol":265," Ok":221," On":138," Om":89," Ją":106,"gti":868," Od":117," Oc":104," Of":78," Ob":214,"gta":465," Lė":70," Ra":1919,"gvo":218," Ro":1436,"grą":89," Re":2283," Ri":769," Kė":213," S ":144,"guv":99,"gut":67,"gur":151," Pr":4036," Ps":108,"gus":1460," Jį":73," Pu":558,"gun":99,"guo":261," Iš":781,"gvi":98,"gva":685," Sy":113," Sv":277," Su":2443,"grį":216," St":1586," Kū":349," Ta":3980,"gsė":181," V ":128,"gyb":293,"gyd":154," Th":577," Ti":951," Te":1623," Tr":1262,"gyl":77,"gyn":197," To":1060," Ry":866," Jū":181," Ru":2112," Sa":3099," Jų":127,"е ":76," Mė":156," Sh":178," Si":1574," Sc":292," Se":1876," So":1791," Sp":549," Sr":63," Sk":1023," Sl":298," Sm":286," Sn":82,"grū":76," Uz":68," Va":2931," X ":111,"и ":92," Ve":1358," Vi":4303," Vl":71," Vo":1248," Tu":948,"gyv":4200," Tv":72," Ty":93,"gys":254,"bė ":1092,"gzi":495," Ug":88," Uk":462," Ul":82," Un":258," Uo":118," Up":172," Ur":330," Us":82," Mū":111," Ut":265," ja":1311,"iai":12600,"iak":2489,"iaj":841,"iam":6822," dė":1157,"ial":2617," iz":109,"ian":3835," ji":819,"iap":102,"ias":3810,"iar":592,"iau":21521," je":150,"iat":599,"iav":1428," im":762," in":5586," ik":3172," il":1403,"ic ":202," aš":299,"iab":113,"iac":1098," is":1471,"iad":246," it":186," ir":24988,"iag":1048,"ibl":199," ka":22789,"ibi":638,"ibo":741," m ":8180," kg":76,"ibr":360," ki":5307," ke":3545,"ibu":210," jo":2067,"id ":126,"iba":407," bū":2814,"ibe":354," ju":2553," gy":4128," bė":99," ha":363," he":768," gi":1781," gl":388," gr":5317," go":132,"ia ":6457," gu":260," gv":148," k ":116," id":308," ie":140," hi":591," ho":365," hu":138,"iet":17789,"iev":1199," ni":198," ež":2821,"iel":363,"iem":1768," ne":7202,"ien":11441,"iep":530," na":5031,"ier":804,"ies":9563,"ied":842,"ieg":235," my":64,"iek":3632,"iej":2087," mu":1417," mo":7083," mm":210,"ieb":372," ok":189," ol":297," ją":256," oj":106,"ifo":912," od":155," of":644," ob":1166,"ife":169," ny":106,"ifi":598," nu":12007," no":1368,"ifa":104," le":2689,"icr":108,"icu":77," li":4372,"ico":274,"ick":163," la":7451," kv":492," ku":12047,"ici":1931," kt":309,"ich":922," ky":193,"ice":275," kn":370,"ie ":6151," km":6372," kl":2310,"ica":672," kr":4858," ko":8974," me":9020,"idy":563," dž":138," eš":161,"idu":2683," mi":7229,"idv":63," ml":158," gė":342,"я ":102," o ":1212,"idr":494,"ido":937,"idm":185," ma":4864," lu":92,"idi":2189," ly":1967,"ide":3721,"ida":3570," lo":631," ag":258,"aša":470," ab":508,"iid":108," ac":88,"aše":79," ad":1540," am":912,"idą":63,"ašk":334," an":3700," ap":10764,"aši":756," ai":453,"ašo":756," ak":1627,"ašl":96,"ašm":98,"iim":317," al":1456," av":222," au":6951," ar":9760," at":6438,"ašu":283,"ašt":1920," as":2056," d ":1323,"ašy":781," ba":4469,"il ":133,"idė":1297,"ija":13519," bi":992,"ije":95," be":6829,"iaž":208,"iji":627," bo":424," bl":292,"ijo":29798,"ieč":1593," by":148,"ibū":258," bu":4118,"iju":808," br":918," ca":122,"ibų":249," e ":82,"im ":74,"ika":5325,"ige":301,"iga":1803,"ii ":82,"igl":111,"igm":118,"igh":167,"igi":1406,"igu":322,"igt":307,"igr":206,"igo":701,"ign":206,"dą ":495,"ibė":80,"igy":112,"ik ":1008,"imo":10288," er":623,"imn":310," et":431," es":1712," en":778,"ims":264," em":226," ep":239,"imp":910,"idž":3127," ei":683,"imf":152,"ime":1400," el":1508,"imd":91," ek":915," ef":150,"imi":2582,"ieš":2115," eg":645," fe":604,"ip ":3767,"inc":1582,"ind":4737,"ina":9499," fa":728,"imt":970,"imu":2073," eu":161," ev":173,"imy":291," fu":1594,"inm":71,"ino":4836," fr":209,"ašč":176," fo":1905,"int":7918,"ins":1597,"inf":613," fl":132,"ine":2681,"iež":504,"ing":5394," fi":1973,"ini":42857,"ink":9182," ge":4705," ga":6719,"ioc":68,"iod":332,"inu":851,"inv":260,"iny":2814,"ašė":95,"iko":8237," cm":466,"ikm":102,"ikl":4355," co":131,"iki":8010," ce":2733,"ike":332," ch":821," ci":727,"ila":1055,"in ":677," da":11917,"iky":490," cu":64,"ikt":1187,"iku":935,"ikr":1885,"iks":1325," do":606,"ilp":157,"ilo":1045,"ill":367," dr":1386,"ilk":807,"iln":1654,"ilm":942,"ilg":1557,"ilj":190," de":4787,"ili":7113,"ild":935," di":7861,"ile":510,"ima":10774,"imb":476," g ":911,"idų":320,"igė":179,"io ":24537," dv":1810,"ily":138," du":1631,"ils":105,"ilt":617,"ilu":734," dy":1060,"ilv":228," sū":347," zo":229," rū":2140,"hol":535," tė":174,"hom":130,"hon":117,"ко":78," tę":151,"hos":152,"hot":89,"hov":154,"hop":73,"hor":425," yp":378,"ка":91,"ки":93," yr":9482," sė":250,"hni":331,"hno":260," są":2199," pū":86,"ин":66," ww":64,"ий":86," ož":66,"ра":115,"hua":109,"htt":75," už":2934,"hst":134,"ол":71,"ов":122,"hry":109,"но":71,"hro":216,"ни":79,"hri":103," tū":466,"ht ":128,"на":78,"hra":72," vė":1295," ru":1285," jū":1646," ry":5467," mė":866," jų":977," sa":11017," sf":110," se":4482," sc":162," si":4889," sn":168," sm":866," sl":1116," sk":5183," sr":2054,"hyl":73," sp":3493," so":1538,"ве":64," lė":404," t ":238," ra":8074," re":7211," ri":2152," mą":77," ro":1078," pv":390," pu":2293," jį":257," pr":18161," ps":398," s ":108," px":100," lą":374," iš":11701," os":113,"hum":232," ov":65,"hun":68," op":537," or":3230," jė":239,"ан":71,"ал":63," pe":3678," pa":29839,"ар":72," pl":4167," po":4807," pi":10334," pj":71," vy":3074," y ":87," x ":92," va":20321," ve":4247," pė":140," vo":600,"cėl":111," vu":113," vi":14126,"ер":72,"ен":65," ty":567," tv":701," tu":2991," mū":318," ur":214," uo":741," up":3500," un":1322," ul":71," ug":910," ta":10594," nė":349,"hyt":188," st":5898," kū":1165," sv":1124," su":14551," tr":3585," lū":96," to":1975," th":302," ti":5451," te":9733,"fes":435,"fer":701,"fed":364,"fen":78,"fek":526,"fel":82," Ča":214," Či":814," Če":428,"fga":67,"faz":69,"fas":187,"far":115,"žų ":324,"fan":250,"fak":286,"fal":119,"fai":102,"ezė":113,"fe ":71,"etų":4160,"esų":111,"evė":459,"evų":124,"fa ":109,"esė":92,"erį":83,"etą":266,"erė":268,"erč":118," če":566,"esč":286," či":215,"epš":367,"esą":106,"ezu":126,"erų":398,"erž":348,"etė":1071,"eza":64,"esį":127,"ezo":319,"eze":135,"erš":197,"ezi":795,"eta":5454,"enę":98,"ete":1059,"elš":173,"eti":4554,"etn":281,"esp":1704,"esn":1467,"eso":815,"est":4994,"esu":549,"enč":626,"ess":157,"esy":202,"enė":1439,"ekų":115,"eud":63,"euk":102,"eto":2887,"etr":2354,"ett":92,"enį":206,"etu":10440,"etv":1625,"ety":134,"ew ":79,"eve":542,"eva":902,"evo":715,"evi":699,"eut":158,"eur":366,"eus":130,"epė":63,"emų":168,"erą":156,"evr":154,"ey ":158,"evy":281,"enų":1350,"epe":177,"epc":101,"epi":513,"eph":64,"er ":2219,"epa":1258,"eot":67,"eor":538,"eom":87,"eol":734,"eop":149,"eon":218,"eiš":1009,"ekė":269,"es ":8000,"ekę":92,"ept":436,"eps":96,"epu":165,"epo":412,"epr":642,"erk":587,"erl":418,"eri":10880,"emą":219,"erg":1284,"ere":1184,"erf":77,"erc":545,"erd":659,"era":5900,"erb":981," ėj":280,"elę":124,"elė":2221,"et ":1493,"ekį":64,"esj":92,"eną":643,"esk":317,"esm":259,"ekš":67,"esi":2143,"ese":290,"esa":2125,"ery":234,"emė":1363,"ejų":738,"erv":903,"elį":206,"eru":488,"err":173,"ert":1937,"ers":2707,"ern":1320,"erm":1142,"erp":330,"ero":2197,"eki":1989,"ekl":411,"ekm":65,"eko":1042,"ekr":192,"eks":1896,"ekt":4207,"eku":345,"ekv":315,"eky":288,"en ":710,"elb":215,"ela":746,"eld":365,"elf":88,"ele":3424,"eli":8265,"elj":72,"elg":477,"elm":261,"eln":456,"elk":671,"ell":316,"elo":518,"elu":87,"elv":84,"els":353,"elt":670,"ely":474,"eo ":170,"egė":73,"edų":106,"emb":301,"ema":2008,"edž":1549,"eme":1016,"emd":90,"emo":2200,"emi":2274,"emt":191,"emu":703,"emp":946,"ems":853,"emy":348,"ene":2754,"eng":1816,"enb":111,"ena":6485,"end":4440,"enc":1042,"eno":4885,"enm":89,"enk":5159,"enl":92,"eni":6797,"enu":670,"env":1006,"ens":1444,"ent":11928,"enr":107,"eič":280,"enz":118,"eny":1624,"eog":245,"eod":89,"egl":182,"ego":396,"ege":449,"egi":3283,"egz":550,"ebė":292,"egr":181,"egu":769,"egy":91,"ek ":622,"eic":241,"eip":170,"eis":5132,"eir":124,"eim":4571,"eil":568,"ein":1861,"eik":5224,"eid":1499,"eig":1358,"eja":503,"edė":140,"el ":389,"eiz":76,"eit":977,"eiv":749,"ejo":153,"eji":683,"eje":65,"eke":125,"ekc":170,"eka":2352,"em ":150,"ejy":83,"eju":497," į ":8903,"gl ":220,"giu":292,"git":144,"gis":1794,"gir":399," Į ":86,"gil":204,"gim":1321,"gij":3190,"gik":123,"gip":251,"gin":4262,"gio":2880,"gie":143,"gia":3193,"ght":134,"bą ":394,"gi ":457,"gen":3944,"geo":416,"get":168,"ger":1023,"ges":352,"geb":197,"gei":69,"geg":248,"gem":66,"gel":1714,"gdo":74,"gdy":714,"gda":73,"ge ":442,"gab":134,"gac":108,"gad":87,"gai":1374,"gas":1974,"gar":1729,"gau":1337,"gat":335,"gav":268,"gam":2232,"gal":9014,"gan":3227,"ga ":2878," įv":1493," įt":810," įs":1931," įr":895," įp":146," Įs":221," įd":139," įe":113," įg":452," įk":1033," įl":469," įm":645,"fys":93," Įe":85," Įk":141,"fut":1079," įž":65,"fun":535,"fto":75,"ft ":191,"ača":64,"fra":263,"ačk":85,"ači":1928,"fri":745,"fro":133,"for":3388,"fos":117,"fot":172,"fon":490,"fol":145,"ač ":218,"fla":74,"fli":110,"fo ":188,"fic":473,"fig":118,"fij":466,"fil":952,"fik":605,"fin":1043,"fir":67,"fit":88,"fiz":526,"da ":3356,"de ":955,"dac":195,"dad":96,"dab":1199,"dak":155,"dal":5226,"dai":1928,"dag":146,"dae":1242,"dat":189,"das":3048,"dar":5510,"dan":2672,"dam":1100,"dav":1026,"dau":2555,"cul":112,"cto":80,"cti":127,"cta":124,"cus":68,"cur":66,"cko":72,"co ":242,"con":95,"col":102,"com":76,"cor":163,"cos":201,"cop":83,"ciš":133,"cro":177,"cea":1680,"ch ":208,"cer":417,"ces":658,"cet":70,"cen":3061,"cep":179,"cel":113,"ceg":78,"cha":955,"chu":89,"cia":2358,"ck ":279,"cie":95,"cid":198,"che":1211,"chl":85,"chi":1035,"cho":966,"chm":91,"chn":602,"chs":167,"cht":194,"chr":188,"civ":350,"cij":9068,"cik":433,"cil":163,"cif":247,"cis":123,"cit":220,"cin":3236,"cio":1497,"cip":355,"cm ":459,"ed ":292,"eba":313,"ebe":309,"ebi":367,"ebo":120,"ebr":205,"ebu":212,"eag":73,"eae":1640,"eak":217,"ean":215,"eal":365,"ear":134,"eap":89,"eat":373,"eau":64,"ea ":188,"efi":76,"efo":293,"efa":70,"efe":515,"ei ":3954,"ega":737,"eed":76,"edi":2343,"ede":757,"eda":2475,"edy":257,"edu":236,"edo":437,"edr":226,"eck":65,"ech":882,"eci":846,"ece":95,"eca":89,"ect":147,"eco":118,"dyg":104,"dyk":422,"dym":1573,"dyn":853,"dys":311,"dyt":634,"dyd":692,"dyb":2813,"drė":138,"drą":94,"dvy":133,"dvi":1481,"dve":133,"dvo":211,"duv":159,"dur":2035,"dut":428,"dus":534,"dva":622,"dvė":188,"dzi":233,"dor":1140,"don":1347,"dom":1366,"dol":160,"dok":455,"dow":94,"dov":1045,"dot":521,"dos":2310,"ds ":126,"diš":216,"deš":1558,"dmi":1463,"dni":66,"dob":88,"doe":78,"doj":2119,"dum":210,"duo":2102,"duj":255,"dui":167,"dul":493,"duk":700,"dug":90,"dub":205,"dua":83,"dri":926,"dra":1708,"dre":173,"dry":71,"du ":892,"dro":2362,"dru":787,"dge":113,"dic":606,"did":3151,"dia":454,"der":1524,"des":572,"det":124,"dev":201,"deb":268,"dea":123,"ded":1735,"deg":595,"dei":369,"del":1942,"dek":567,"den":3068,"dem":709,"dep":764,"deo":162,"di ":260,"dme":216,"do ":2940,"div":255,"diu":103,"diz":157,"dim":1824,"din":11534,"dio":390,"dip":85,"dir":773,"dis":1529,"dit":234,"die":4136,"dif":150,"dij":4732,"dik":613,"dil":259,"dka":165,"daž":1194,"ižy":171,"rbė":133,"rgu":182,"mą ":3290,"rga":2638,"iža":147,"ri ":2109,"rgl":72,"iži":186,"rgi":812,"rbą":117,"rge":1017,"rgo":391,"ret":1342,"res":1263,"rev":180,"rez":564,"rfi":80,"rfo":87,"rač":122,"rdu":515,"rds":64,"rdv":219,"reb":105,"rea":608,"ree":109,"ref":432,"rec":183,"red":291,"rei":3237,"lėš":194,"rej":92,"reg":2937,"rem":808,"ren":2189,"rek":801,"rel":806,"rer":127,"reo":63,"rep":489,"rda":868,"rcu":85,"rdo":569,"rdi":1074,"rde":192,"re ":1146,"rby":63,"rbt":207,"rbu":464,"rco":114,"rci":353,"rch":1033,"rce":143,"raz":577,"rd ":418,"rao":69,"rap":372,"rar":206,"ras":10157,"rat":2588,"rau":2880,"rav":918,"rbi":1182,"rbl":168,"rbo":606,"rba":5655,"rbe":227,"raj":5239,"rai":3447,"rah":84,"rag":1079,"ran":8258,"ram":2837,"ral":3029,"rak":1364,"rab":639,"raf":875,"rae":172,"rad":3322,"rac":2843,"rpt":814,"rpu":483,"rpr":64,"rps":98,"rpo":225,"rkė":212,"rs ":981,"rpe":180,"rpa":257,"rką":115,"rpi":679,"rgž":70,"ror":154,"ros":5272,"rot":753,"rom":1205,"ron":1539,"rop":2336,"roz":106,"rgų":68,"rou":123,"rov":2454,"row":67,"rob":326,"roa":194,"rod":1480,"roc":897,"roj":2662,"roi":152,"rol":1077,"rok":757,"rof":604,"roe":102,"rog":1825,"rno":408,"rič":321,"rnu":106,"rny":471,"rp ":1398,"rna":1385,"rež":195,"rne":498,"rią":492,"rni":813,"rmo":1239,"rmu":712,"rdų":72,"ro ":7225,"rgė":150,"rma":2602,"rme":740,"rdž":190,"rmi":1316,"rly":64,"rlo":116,"rli":270,"rld":79,"rle":65,"rla":454,"rn ":72,"rky":100,"rkv":63,"rku":184,"rkt":264,"rks":74,"rko":615,"rki":550,"rkl":319,"rke":407,"rka":1300,"rbų":71,"mąs":197,"reč":569,"mąj":193,"raž":304,"rje":132,"riz":580,"rix":85,"rdė":104,"rip":333,"rio":6643,"rit":7580,"ris":4114,"riv":457,"riu":3503,"rig":767,"rij":7031,"rdą":70,"raš":3717,"rii":272,"ril":196,"rik":6018,"rin":15816,"rim":1853,"ria":7346,"rib":1061,"ric":629,"rid":769,"rie":6574,"rif":330,"rk ":142,"rož":108,"jūč":213,"rsė":70,"jų ":7448,"ryb":1588,"ryd":120,"rui":138,"rug":582,"rud":269,"rur":65,"rup":2436,"ruo":2554,"run":372,"rum":1117,"ruk":710,"ruz":216,"ruv":66,"rus":3390,"rut":405,"rva":500,"rmų":317,"rvi":416,"rve":467,"rvo":162,"rnų":71,"rvu":153,"ry ":248,"rsk":222,"rsl":371,"rkš":158,"rsi":1214,"rso":508,"rsm":221,"jūn":185,"rsa":416,"rse":236,"rkų":131,"rta":4621,"rst":803,"jūr":1765,"rsu":138,"rtm":86,"rtn":140,"rto":2230,"rte":485,"rth":95,"rti":4648,"rub":105,"rtr":75,"rtu":1225,"rty":412,"riš":698,"rt ":258,"rių":3254,"mči":71,"rmą":324,"rre":91,"riž":94,"rra":105,"lį ":872,"ru ":632,"rmė":253,"sac":151,"sad":119,"sag":124,"sai":543,"saj":93,"sak":1089,"sal":3421,"sam":580,"ryš":526,"sap":78,"san":3811,"sau":3166,"sat":201,"sas":1914,"sar":840,"sav":6083,"sa ":609,"ryč":1308,"rvų":140,"mėg":132,"rvė":96,"rsų":124,"mę ":264,"rtų":277,"mėt":142,"mės":1624,"mėl":387,"mėm":110,"mėj":427,"mėn":862,"rtė":217,"mė ":740,"rys":1185,"ryt":5807,"ryo":78,"ryp":869,"ryk":138,"ryl":68,"rym":161,"ryn":188,"ryj":340,"rtą":376,"rtį":75,"ną ":1490,"shi":127,"si ":5878," 가":71,"siv":352,"sjo":73,"nąj":98,"sie":2665,"sid":3080,"sic":542,"sib":166,"sia":6343,"kšt":3974,"sit":1385,"siu":976,"sir":1172,"sis":6798,"sip":235,"sin":3201,"kšn":209,"sio":4893,"kšo":166,"kšl":78,"sil":885,"kšm":294,"sim":839,"sij":3056,"sik":1642,"sii":91,"sif":333,"sig":288,"sda":106,"sdi":133,"se ":12437,"ryž":454,"sce":120,"sci":144,"sch":242,"ser":955,"ses":253,"set":159,"seu":67,"sez":169,"sh ":92,"sfe":280,"sfo":80,"sei":720,"sed":68,"sep":203,"sen":3202,"sem":199,"sel":216,"sek":790,"spu":1602,"skė":68,"spy":159,"spo":957,"spr":582,"spe":1156,"spi":670,"sjė":92,"spa":2100,"sot":100,"sov":81,"sol":244,"som":660,"son":563,"sop":70,"sor":222,"sos":1164,"sod":242,"sof":371,"soj":156,"soc":1068,"su ":4472,"smė":297,"sru":176,"sro":178,"sri":1777,"nči":4255,"siž":207,"sra":172,"sių":1944,"slė":668,"st ":506,"siū":210,"siš":352,"ss ":140,"sli":978,"slo":811,"slu":296,"sky":900,"sla":1643,"sle":271,"ski":4050,"skl":344,"sko":1593,"skr":2283,"sku":545,"skv":165,"ska":2455,"ske":580,"kšč":2245,"sno":220,"sny":134,"snu":80,"sna":277,"sp ":78,"sni":2039,"sią":193,"sne":141,"smo":495,"smu":706,"so ":3418,"sma":829,"smi":538,"sme":1637,"syb":334,"kų ":5583,"syn":63,"syt":71,"sys":136,"syk":221,"stą":248,"stę":193,"syv":319,"stė":1387,"nė ":11072,"kūg":75,"sse":159,"ssa":96,"kūn":731,"sso":72,"ssi":140,"kūs":84,"kūr":1568,"snė":370,"skų":182,"ste":4835,"sta":11594,"sto":6918,"sti":7618,"snį":163,"stu":1797,"str":5446,"sua":240,"sty":3413,"slų":200,"sud":2168,"sue":135,"sub":489,"sui":78,"suf":148,"sug":205,"sul":523,"sum":815,"suj":169,"suk":1852,"sup":534,"sun":407,"suo":1175,"sut":1183,"sus":2952,"sur":378,"suv":210,"spė":63,"smų":124,"sva":1159,"sve":370,"svi":243,"svo":286,"svu":88,"svy":123,"tai":9707,"taj":152,"tak":2532,"tal":2568,"tag":213,"tab":401,"tac":699,"tad":469,"tav":1620,"tau":2680,"tat":2848,"tas":15287,"tar":6407,"tap":484,"tan":2917,"tam":3320,"te ":2806,"tbo":1115,"nę ":1530,"svė":126,"가":170,"stų":690,"nėm":645,"nėn":104,"nėl":246,"nėj":3435,"stū":94,"nės":9359,"nėt":145,"nėr":303,"ta ":7677,"suž":77,"nęs":68,"ovų":204,"jęs":545," št":72," šv":1206," šu":230," ši":9378,"pa ":504," šl":369," šo":352," ša":3536," še":4400," Šv":1543," Šu":220," Št":147," Šr":75," Šo":70," Šl":99," Šk":111,"otų":425," Ši":3390," Še":340,"jės":91," Ša":886,"jėg":559,"ovę":68,"ję ":118,"ovė":1571,"pdo":296,"pci":139,"pe ":192,"par":5103,"pat":3653,"pas":6180,"pav":2954,"pau":1209,"pac":165,"pad":1171,"paa":84,"pab":319,"pag":4995,"pak":1745,"pal":2163,"pai":635,"paj":498,"pap":2360,"pam":953,"pan":2096,"ozė":194,"pha":125,"ką ":1267,"pho":90,"phi":136,"gžd":845,"pač":638,"pec":747,"ped":487,"pen":679,"per":4439,"pet":193,"pes":135,"pei":362,"pel":1215,"pek":276,"pla":1821,"pli":2030,"ple":626,"plo":1805,"ply":260,"plu":196,"pkr":163,"paž":694,"phy":268,"pib":499,"pia":510,"pid":180,"pie":9064,"pig":92,"paš":357,"pij":468,"pik":263,"pil":4079,"pim":702,"pin":2920,"pio":1030,"pir":1825,"pis":533,"pit":515,"piu":812,"poz":347,"pr ":158,"por":1711,"pop":634,"pov":195,"pot":290,"pos":1432,"poj":445,"pog":134,"pom":76,"pon":986,"pok":315,"pol":1919,"pob":138,"poe":104,"poc":74,"pod":139,"po ":1517,"psu":123,"pst":87,"pta":1134,"pse":76,"psi":725,"psn":657,"psk":1854,"ptu":124,"pty":174,"pua":68,"pub":1670,"pte":200,"pti":1385,"pto":309,"plū":91,"pra":5043,"pių":716,"plė":383,"piš":241,"pjū":231,"pru":94,"psa":387,"pu ":180,"jį ":548,"kči":82,"pri":9679,"pre":1608,"pro":5447,"poš":212,"pož":275,"pyg":410,"pur":225,"pus":2091,"put":192,"pun":96,"puo":365,"pup":123,"puk":97,"pul":563,"px ":100,"pva":210,"pvz":390,"kę ":172,"kėj":853,"kėn":99,"kėl":172,"kės":2221,"ptū":85,"kėt":269,"puš":167,"pyn":242,"pyj":119,"pyl":1154,"pyk":93,"pyv":70,"pyr":87,"pyt":97,"kė ":1457,"prū":156,"puč":92,"kęs":421,"lą ":721,"iš ":6273,"ląs":439,"išd":131,"iše":155,"išg":193,"iša":318,"išm":207,"išl":1019,"išo":261,"išn":213,"iši":468,"išk":9640,"išt":673,"išv":411,"išp":207,"išs":1093,"išr":686," Ži":399," Že":954," Ža":482," Žy":115," Žu":120," Žv":178," Žm":91," ži":2342," žm":1523," ža":1939," že":2052," žy":592," žv":1024," žo":907," žu":959,"iū ":112," ųj":70,"kį ":292,"lči":255,"iūt":153,"iūr":499,"iūn":1343,"iūl":184,"que":92,"qui":80," šį":69,"ių ":35485,"lė ":2188," ūk":511,"iųj":519,"lėg":91,"lėj":539,"lėd":122,"lę ":354,"lėv":65,"lėt":585,"lės":3311,"lėn":507,"lėm":235,"lėl":106,"lėk":280,"ra ":13206,"lęs":405,"ngo":2538,"ngi":2530,"eži":1071,"ngl":1130,"ngv":440,"ngu":1035,"ežu":89,"ngr":450,"ežt":146,"ngt":1224,"ngs":123,"ni ":526,"eže":2899,"nge":541,"eža":160,"nga":2804,"ežd":87,"nha":74,"ią ":1321,"neg":409,"nei":1033,"nel":523,"nek":262,"nen":435,"nem":377,"nep":854,"neo":246,"ner":1909,"net":3370,"nes":2353,"nev":652,"neu":225,"ndy":107,"ng ":636,"nea":242,"neb":233,"nec":126,"ned":530,"nef":72,"nfo":543,"nfl":92,"nfr":63,"nez":420,"nfe":266,"nco":109,"nci":2521,"nce":603,"nch":181,"ne ":5137,"nbu":79,"ndu":1568,"ndr":3362,"nds":72,"ndo":1993,"ndi":4736,"nde":2551,"nda":1898,"nak":285,"nal":3006,"nam":4510,"nan":2181,"nap":314,"nar":1177,"nac":1442,"nad":530,"nae":88,"naf":82,"nag":711,"nai":2792,"naj":138,"nc ":104,"nab":100,"nbe":88,"nd ":453,"nav":974,"nau":2982,"nat":1500,"nas":9068,"naz":237,"na ":6125,"muš":82,"myr":132,"myn":393,"myl":81,"가 ":71,"mzd":138,"myk":132,"myb":757,"fų ":79,"nyb":586,"ntą":186,"nyj":775,"nyi":66,"nyg":399,"ny ":199,"nvi":1009,"nux":72,"nve":361,"nva":85,"nuk":354,"nul":255,"num":449,"nun":66,"nug":200,"nui":231,"nus":1958,"nut":401,"nuv":180,"nuo":11505,"nur":161,"nty":1460,"ntv":111,"nto":3213,"ntu":1006,"nts":156,"ntr":4642,"nti":13882,"nth":149,"ntg":70,"nta":6698,"nte":3240,"nsu":225,"nkų":1281,"nsp":335,"nso":235,"nst":1864,"nse":182,"nkš":139,"nsi":456,"nsl":149,"nsk":533,"nsa":277,"nu ":681,"nro":72,"iči":2192,"nri":70,"nra":165,"nių":11248,"nt ":3145,"niū":1065,"niš":1026,"nką":172,"ns ":1187,"nkė":2543,"nod":224,"nog":120,"nok":84,"nol":827,"noi":70,"noj":1649,"nop":168,"nom":2167,"non":256,"not":507,"nos":4995,"nor":1222,"nov":1035,"ngų":481,"noz":151,"nne":81,"než":127,"nni":71,"nme":211,"nma":125,"neš":331,"ndž":626,"ngą":233,"nla":91,"ndų":534,"ngė":262,"no ":11621,"ndū":86,"nke":587,"nkl":1886,"nki":5054,"nkc":507,"nka":2908,"nku":626,"neį":137,"nky":223,"nko":1384,"nks":891,"ncū":939,"nkt":1190,"nkr":206,"iąj":181,"naž":94,"nja":76,"ndė":404,"njo":112,"ndą":97,"nij":5825,"naš":624,"nig":783,"nif":163,"nie":779,"nid":254,"nic":409,"nia":7994,"nk ":388,"niz":2108,"niu":3647,"niv":780,"nis":15940,"nit":387,"nir":72,"nio":7526,"nip":69,"nim":4778,"nin":8141,"nik":1554,"nil":147,"ogs":78,"ogr":2210,"ogu":324,"ogi":3408,"ogl":119,"ogo":286,"ogn":64,"oga":839,"oge":235,"oho":167,"oha":70,"ohe":76,"obė":124,"oj ":68,"ją ":2223,"gšt":167,"odą":88,"oid":309,"ok ":122,"oju":555,"obū":144,"ojo":4241,"oji":3887,"oje":21105,"oja":6655,"odė":378,"ol ":243,"oce":779,"och":261,"oci":1478,"ock":185,"obs":81,"obu":232,"odg":104,"ode":1122,"odk":163,"odi":1127,"odo":1175,"odr":68,"of ":299,"oda":2306,"oel":84,"oet":66,"oeu":79,"ody":417,"odu":589,"og ":261,"ofi":842,"oft":141,"ofo":158,"ofe":379,"ofa":95,"oa ":103,"nyč":782,"oac":66,"oba":242,"od ":72,"oar":69,"oat":135,"obo":229,"obl":199,"obj":1006,"obi":1248,"obe":189,"nyn":1169,"nyk":427,"nyr":226,"nyt":99,"nys":2919,"ntė":672,"nzi":126,"nzo":87,"ntį":195,"nsų":145,"ntū":104,"ntų":1010,"gų ":1699,"osė":119,"orė":417,"ows":92,"orą":78,"orų":130,"orū":115,"ozo":156,"ozi":309,"jė ":190,"otė":267,"oza":345,"otą":235,"olų":162,"oty":482,"otu":369,"ow ":66,"oti":3180,"ote":1275,"ott":72,"otr":264,"oto":2263,"otn":65,"okų":103,"ost":1854,"osu":63,"osv":137,"ota":3370,"onė":1445,"osi":4557,"okš":697,"osk":117,"oną":313,"ose":10057,"osf":255,"osp":155,"oss":88,"gūr":160,"osm":327,"osl":114,"oso":811,"osn":165,"gūn":307,"ovy":172,"onų":1034,"ovi":2936,"onš":70,"ovo":1368,"ovu":118,"ovs":154,"omų":264,"opė":75,"ox ":72,"ova":2312,"ove":473,"oun":122,"oup":69,"ous":97,"our":160,"out":83,"opo":1809,"opi":1192,"opl":208,"ope":871,"oph":333,"opa":456,"os ":71850,"okė":208,"opu":410,"opr":78,"opt":255,"ops":178,"ool":67,"ood":82,"or ":364,"ojė":85,"ogų":154,"oor":180,"ork":297,"orl":173,"orm":3710,"orn":376,"oro":1427,"orp":329,"orc":113,"ord":1011,"ore":636,"orf":147,"org":2280,"ori":8425,"omą":125,"omė":233,"ojų":757,"ou ":101,"osa":489,"gūb":364,"ort":1674,"ors":672,"orv":193,"oru":254,"ory":137,"olą":105,"m² ":432,"ot ":142,"olė":893,"orb":205,"ora":1549,"olę":89,"okį":63,"ola":1063,"old":223,"on ":1119,"oli":7420,"oll":172,"olk":163,"olf":87,"ole":1072,"olg":102,"olt":96,"olm":81,"oln":64,"olo":5241,"oly":304,"odų":160,"ogė":746,"olu":523,"oka":1176,"om ":163,"oki":2742,"oke":340,"okr":516,"oks":1989,"oko":845,"okl":512,"oky":4231,"okt":115,"oku":1484,"ona":5705,"ond":604,"onc":437,"onf":314,"one":3050,"ong":677,"oni":5683,"onk":450,"ono":5947,"ons":941,"ont":1350,"onu":771,"onv":131,"ony":229,"onz":95,"oma":4058,"ome":2921,"omb":322,"omi":4219,"odž":830,"omp":1868,"omo":2822,"omu":462,"oms":893,"omy":305,"op ":190,"la ":3891,"kyč":84,"ksų":107,"kvė":157,"ktų":516,"ktū":882,"le ":987,"lci":77,"lde":301,"lda":470,"ldo":1438,"ldi":380,"ldu":86,"lab":885,"lac":354,"lad":233,"lag":301,"laj":360,"lai":7095,"lal":283,"lak":544,"lan":3678,"lam":672,"lap":1033,"lar":290,"lat":1022,"las":4962,"lau":4662,"lav":1958,"lay":83,"laz":248,"lba":2195,"ld ":140,"lbe":120,"kyš":114,"lbi":260,"lbo":1044,"lbu":643,"kvi":414,"kve":103,"kva":547,"kuv":68,"kut":500,"kus":1457,"kur":11466,"kup":293,"kuo":1084,"kun":573,"kum":831,"kul":3442,"krą":130,"kvo":154,"kta":2002,"kte":286,"cūz":939,"ksp":237,"ksu":217,"kst":1638,"ksi":1031,"kso":492,"ksn":481,"ksm":470,"ksl":1822,"kub":70,"kui":142,"kty":770,"klų":780,"ktr":1113,"ktu":872,"kti":2012,"kto":1582,"kyt":491,"kyr":514,"kys":407,"ktė":87,"krū":445,"kuč":63,"krų":140,"cų ":73,"kyb":405,"kyd":213,"kyk":2742,"kyj":122,"ktą":149,"kym":1532,"kyl":177,"ksč":304,"lpo":78,"lpn":97,"lpi":224,"lkė":206,"ls ":120,"lpt":73,"lok":724,"lon":920,"lom":534,"lop":556,"lor":335,"lod":121,"loc":79,"log":3426,"loj":732,"loi":69,"lpa":143,"los":2610,"lot":1612,"lou":74,"lov":512,"lno":410,"lią":195,"lni":4011,"lež":588,"lne":64,"lob":276,"lny":212,"lnu":597,"lmo":217,"lmi":148,"lme":109,"ldž":460,"lma":561,"lna":1027,"lmu":116,"lti":1386,"lto":827,"ltr":83,"loč":162,"ltu":191,"lty":140,"lub":724,"lkš":220,"lsi":157,"lsk":87,"lso":116,"dūr":138,"lkū":97,"lst":3935,"lsv":208,"lnė":167,"lkų":772,"lta":1286,"lte":434,"lu ":327,"lmė":408,"lsa":135,"liš":786,"liū":227,"lių":4116,"lt ":76,"lbė":158,"gą ":583,"lgu":66,"lgy":105,"lgo":332,"lge":222,"lbą":82,"lgi":1731,"li ":2175,"lga":584,"lač":341,"lfi":69,"lfa":89,"lez":63,"leu":86,"lev":356,"les":733,"let":811,"ler":662,"leo":285,"lep":84,"lem":868,"len":3529,"lek":1990,"lel":445,"lei":2618,"leg":368,"lef":117,"led":360,"lec":77,"ldy":3239,"lls":89,"llu":67,"lo ":4298,"lla":282,"lle":203,"lli":227,"llo":104,"lko":1340,"lku":68,"lks":80,"ln ":170,"lka":889,"lke":67,"lki":640,"lkl":70,"lbų":491,"lje":178,"ll ":250,"lja":125,"lit":2868,"lis":6123,"lip":320,"lio":6518,"lin":10945,"lim":1681,"ldė":95,"liz":1115,"liv":318,"liu":3368,"lic":390,"lid":423,"lia":9617,"lib":160,"lik":3451,"laš":117,"lij":2779,"lig":956,"lie":4974,"lif":303,"ma ":8966,"gęs":112,"mb ":104,"lvų":129,"mac":657,"mai":3495,"maj":238,"mak":310,"mad":111,"mag":648,"mar":995,"mas":16980,"mal":1077,"mam":360,"man":2648,"maz":236,"mav":409,"mau":158,"mat":2440,"mba":403,"mbl":299,"mbi":672,"mbe":160,"mbr":361,"mbo":656,"me ":4745,"mbu":328,"mdo":70,"mdi":69,"med":2275,"meg":289,"mec":166,"met":7146,"mes":916,"mer":2496,"mem":105,"mel":455,"men":7636,"mei":219,"mez":71,"mfo":93,"lmų":71,"lpė":80,"lva":657,"lve":135,"lvi":553,"luk":80,"lui":136,"lup":118,"luo":731,"lun":291,"lum":619,"lut":328,"lus":1933,"lur":73,"luv":66,"lnų":522,"ly ":90,"lvo":651,"lyb":79,"lyd":329,"dų ":1583,"ltą":68,"lyj":2459,"lyk":200,"lyg":1880,"lsč":1443,"gė ":262,"lyv":644,"lyp":129,"lym":88,"lyn":910,"lys":1218,"lyt":728,"gėl":263,"gėj":295,"gėg":66,"gę ":71,"lvė":163,"ltų":271,"gėr":187,"ltū":1732,"gės":1188,"mpi":2031,"mph":67,"mpe":889,"mpr":78,"mpo":545,"mpl":367,"mpu":163,"ms ":4671,"mog":880,"moc":111,"mob":923,"mod":511,"mon":3483,"mok":6158,"moj":1912,"mom":231,"mol":407,"mor":477,"mos":8683,"mot":735,"mpa":1170,"mu ":1142,"gį ":94,"miš":1246,"mių":389,"mt ":92,"mto":342,"mtm":156,"mti":479,"mso":66,"msi":145,"mta":393,"mur":124,"mus":1181,"mut":199,"mui":800,"mul":740,"mum":317,"mun":878,"muo":1470,"muz":1069,"mpė":146,"džo":105,"dža":265,"mga":68,"eš ":414,"mi ":2927,"dži":8825,"dže":173,"meč":426,"mbū":66,"maž":1405,"min":8866,"mio":695,"ešo":276,"mil":606,"mim":392,"ešm":321,"mir":512,"mis":3697,"ešp":103,"ešt":271,"mit":1456,"ešu":171,"miu":166,"mic":65,"mia":1046,"eša":283,"mig":185,"eše":206,"mie":3684,"mid":149,"mik":627,"ešk":262,"mij":1021,"eši":2201,"maš":150,"mo ":11787,"mln":124,"mm ":215,"mna":274,"meš":73,"vėž":564,"tša":98,"įve":191,"įva":943,"įvy":373,"tūr":3760,"tūn":64,"tūk":418,"sūn":301,"sūr":75,"sų ":1585,"vė ":1134,"Ček":141,"Čer":143,"vęs":499,"Čiu":96,"vėr":82,"vės":1539,"Čik":109,"vėd":103,"Čil":228,"vėm":70,"vėn":180,"vėp":105,"Čia":184,"vėj":505,"vėl":1093,"vę ":211,"rža":90,"rže":425,"rži":124,"ržo":186,"ržu":185,"ržy":332,"vą ":302,"ržų":128,"vč ":177,"zra":92,"zmą":73,"uči":685,"ršū":168,"čem":518,"rūd":121,"rūg":209,"tį ":848,"rūt":179,"rūs":316,"rūv":85,"rūk":126,"rūn":147,"rūm":473,"rūp":93,"zmų":92,"zuo":830,"zul":113,"čia":6305,"čiu":3945,"čin":247,"rų ":3880,"čio":4188,"rūš":1748,"rųj":78,"čią":252,"čių":6037,"čiū":111,"zga":86,"rš ":371,"zdu":307,"zdy":84,"zdo":187,"zeu":81,"zen":162,"zel":75,"zer":347,"ze ":133,"zda":224,"zdi":311,"zde":65,"zac":1511,"zai":89,"zam":101,"zan":208,"zal":69,"zar":93,"zau":206,"zav":191,"zas":215,"zos":160,"zot":82,"zon":766,"zol":127,"zo ":283,"zma":593,"zmo":623,"zme":95,"zdž":483,"zna":91,"zmu":127,"rša":115,"zia":212,"zie":245,"zid":291,"zic":125,"zij":2812,"rši":542,"zin":555,"zil":400,"zik":1298,"ršk":199,"zio":150,"zis":547,"ršt":245,"zit":198,"ršu":471,"yvu":368,"ynų":327,"yvy":146,"yvo":316,"yve":3277,"yvi":885,"yva":1082,"ymų":245,"ytu":3236,"yto":736,"yti":2477,"yta":2174,"ynė":548,"yst":1756,"yną":129,"ysk":104,"ysl":216,"ysi":1379,"ykš":318,"yse":638,"sį ":224,"ymė":134,"ymą":285,"yri":1443,"yro":395,"yru":239,"ylė":72,"yra":10061,"yre":153,"ys ":5011,"ykę":197,"ypt":748,"ygų":179,"ypa":451,"yop":72,"yny":165,"ynu":272,"tęs":608,"yvū":473,"yvų":140,"za ":200,"tėl":64,"tėn":94,"tėm":94,"tėj":863,"ytų":2089,"tės":2264,"tėv":185,"tę ":460,"yzd":234,"yrų":212,"ytą":75,"tė ":2107,"ytė":146,"ysč":138,"yrė":91,"ybi":1941,"ybo":1219,"yda":247,"yde":256,"ydi":267,"ydo":238,"ydr":112,"ydy":152,"ya ":66,"sęs":72,"rįž":117,"ybe":375,"yba":1117,"ydį":315,"ybų":501,"yka":208,"ykd":1149,"yki":766,"ykl":3525,"yko":378,"yks":518,"yku":494,"yn ":82,"yla":244,"yli":1437,"ygą":95,"yll":79,"ylo":78,"yma":917,"ydų":119,"ymi":564,"ydž":370,"yme":67,"ymo":2861,"ymu":353,"yna":1411,"yni":762,"yne":693,"yno":1069,"ygi":962,"ygl":225,"ybą":107,"yga":678,"ybė":5361,"tą ":2180,"ygo":549,"ygu":328,"ybę":375,"yin":78,"tąj":82,"yje":6777,"sči":2451,"pūs":102,"rį ":999,"sė ":786,"sėd":208,"sėk":228,"sėj":581,"sės":2025,"sėt":101,"sę ":210,"rįs":115,"pų ":324,"są ":505,"ožy":124,"ože":207,"oža":97,"oži":226,"rėž":276,"sąv":390,"sąr":690,"sąs":356,"sąj":351,"sąm":110,"sąl":240,"sąn":78,"pši":366,"ręs":410,"rėg":75,"rėl":160,"rėj":1557,"rėn":386,"rėm":119,"rėt":168,"rės":6344,"rę ":1445,"rėd":146,"ww ":76,"rąž":93,"rė ":1022,"www":76,"ws ":91,"rči":327,"nžu":77,"rą ":1079,"nži":161,"rąj":82,"ošt":122,"oši":132,"oše":197,"oša":95,"vyl":67,"vyk":2515,"vyn":610,"vyr":1040,"vyd":92,"vyj":96,"vys":564,"vyt":65,"vyz":235,"nųj":77,"war":74,"viš":606,"vro":160,"vių":2220,"vsk":219,"vu ":109,"pį ":89,"vus":1133,"vuo":278,"vum":127,"vul":245,"vz ":411,"nų ":7207,"vyb":651,"vož":79,"via":688,"vio":508,"vir":3137,"vik":248,"vil":1104,"vim":2439,"vin":4831,"vig":137,"nši":88,"vij":1156,"vic":102,"vid":2656,"vie":10516,"viz":527,"viv":2783,"viu":186,"vit":264,"vis":3495,"važ":200,"vka":106,"vo ":7394,"vež":182,"vič":254,"voj":2810,"vol":423,"vok":960,"von":160,"vor":253,"vot":192,"vos":5796,"ąra":686,"vi ":818,"ąjį":197,"vač":96,"mži":691,"ąją":384,"ver":3346,"ves":601,"vet":199,"vej":358,"vei":3674,"veg":178,"ven":4988,"vel":635,"vek":95,"ved":568,"ąmo":118,"ąna":83," − ":501,"ve ":425,"ąly":225,"val":10598,"vak":4952,"van":3055,"vam":251,"vap":91,"var":5998,"vat":733,"vas":2439,"vav":423,"vau":1068,"vaz":104,"vab":435,"vac":68,"vad":6655,"vai":6631,"vaj":535,"ąju":613,"vag":166,"uvų":343,"ąja":230,"va ":1330,"utų":550,"pės":2048,"pėj":200,"pėm":214,"pėn":79,"uvę":282,"pėd":630,"usų":229,"pę ":262,"uvė":461,"urž":199,"mūš":172,"mųj":304,"urų":123,"urš":132,"uzi":1325,"usį":91,"uza":114,"uzd":74,"utė":482,"pė ":2903,"urį":786,"mų ":3232,"usę":91,"usė":442,"usą":115,"uož":251,"urė":7194,"urę":1318,"uoš":293,"umų":278,"upę":254,"ux ":100,"upė":4550,"uvi":2426,"uvk":94,"uvo":8973,"uva":1382,"uve":227,"uvy":103,"unų":111,"uvu":401,"usl":117,"usm":292,"usk":802,"ukš":4009,"usi":16156,"mūg":65,"usd":94,"use":242,"usa":1585,"unė":100,"ukų":215,"usy":348,"usv":157,"usu":325,"ust":2671,"uss":74,"mūs":116,"ukū":174,"mūr":92,"usr":197,"uso":1941,"uti":3065,"ute":1211,"uta":1298,"utb":1100,"ulų":95,"uty":105,"uts":108,"utu":465,"uto":2462,"utr":654,"uoč":243,"us ":25324,"ukė":207,"ut ":98,"ulė":940,"urb":304,"ura":1290,"urd":135,"ure":433,"urg":609,"umą":619,"uri":13437,"pči":95,"urk":677,"urm":174,"urn":819,"uro":2323,"urp":92,"urs":335,"urt":2267,"uru":270,"ulį":98,"urv":125,"ury":263,"ujų":216,"uog":115,"uod":1799,"uob":148,"uop":102,"uon":468,"uol":1850,"uom":2229,"uoj":2916,"ują":85,"uok":472,"uot":3875,"uos":9387,"upa":260,"ugų":171,"uoz":64,"ur ":478,"uką":82,"upi":1336,"upe":880,"upo":164,"upr":184,"upy":138,"upt":113,"upu":131,"ump":675,"umu":499,"umi":565,"umo":1874,"uma":2535,"umb":630,"ume":1015,"udž":589,"uly":692,"ugė":81,"uo ":11031,"ugę":74,"unt":286,"uns":125,"unu":78,"unk":1010,"uni":2717,"uno":1046,"unc":101,"und":514,"una":778,"ung":2769,"une":377,"up ":104,"uks":488,"ukr":338,"uku":1424,"ukt":1044,"uko":768,"ukm":63,"ukl":276,"uki":835,"ukc":193,"uke":516,"um ":371,"uka":1363,"ubų":81,"uju":231,"ulv":76,"ulu":205,"ult":2376,"uls":117,"ulp":163,"ulo":319,"ulm":98,"ulk":1598,"uli":4261,"ulg":118,"ule":258,"ula":686,"un ":91,"uid":65,"uik":63,"uil":83,"uin":83,"uis":152,"uic":69,"ąvo":220,"uje":2229,"uji":238,"ujo":901,"ąve":118,"ąva":67,"uit":183,"uiz":104,"ul ":100,"udė":847,"uja":1817,"ugi":2043,"ąsi":447,"lži":190,"ugd":727,"uge":478,"ugn":281,"ugo":862,"ugp":256,"ugl":79,"ui ":2863,"uga":3449,"ugy":234,"ugv":81,"ugu":1147,"ugs":210,"ugr":96,"uha":63,"pą ":193,"ąst":536,"uda":2525,"ude":508,"udi":1843,"ubo":317,"ubt":123,"ubr":94,"ubu":247,"ue ":115,"uci":1337,"uch":229,"uer":154,"ufo":165,"udu":90,"udr":145,"udo":3114,"ug ":571,"udy":189,"udz":149,"uen":154,"uel":163,"uei":67,"tyč":85,"tuš":104,"ua ":181,"uau":101,"uar":229,"ual":389,"uan":260,"ubi":273,"ubj":203,"ubl":1778,"ube":224,"uba":739,"uac":101,"trų":261,"trū":89,"tyv":1223,"tyg":105,"tyj":2629,"tyk":592,"tyl":66,"tym":1125,"tyn":745,"tyr":732,"tys":1652,"tyt":1130,"tvė":152,"ty ":191,"tvy":70,"tve":795,"tvi":1569,"tva":1876,"tur":3389,"tus":4707,"tut":476,"tuv":8023,"tui":321,"tul":676,"tuk":242,"tun":200,"tum":1081,"tup":188,"tuo":4132,"tub":127,"tua":342,"tud":729,"tuc":1071,"tug":166,"tyb":2995,"lų ":4053,"trė":213,"trą":137,"ts ":284,"tiš":751,"tre":865,"tt ":65,"tra":6670,"tri":3774,"oči":767,"tru":2118,"tro":4397,"nį ":1595,"tu ":2200,"try":1680,"tsa":349,"lūd":70,"tsi":864,"lūn":117,"lūk":105,"tsk":627,"tsp":242,"tst":1087,"lūs":124,"tte":91,"ttp":75,"tme":480,"tma":182,"to ":9437,"tmo":148,"tmi":171,"tni":215,"tne":192,"tp ":76,"tna":162,"tno":94,"tod":622,"toc":126,"toj":3705,"tog":985,"tob":201,"tov":2012,"tos":4057,"tot":663,"toz":74,"tom":2117,"ton":1654,"tok":1091,"tol":2595,"tor":6343,"top":234,"tr ":100,"tpa":67,"tij":3726,"lši":179,"taš":256,"til":880,"tik":5456,"tif":144,"tie":4693,"tig":115,"tir":719,"tit":1932,"tis":9550,"tin":18689,"tim":2115,"tip":1038,"tio":733,"thu":146,"tia":180,"tib":66,"tic":399,"tid":346,"taž":90,"tiz":264,"tiu":112,"tiv":191,"tko":75,"tku":65,"tka":159,"tli":717,"tla":468,"tle":281,"tem":2567,"ten":1501,"teo":504,"tep":206,"tei":4906,"tek":2029,"tel":3046,"teg":305,"tea":212,"teb":306,"tec":578,"ted":149,"tfo":195,"th ":187,"tez":110,"tet":1080,"tes":640,"ter":7958,"ti ":12579,"tga":173,"tač":773,"tho":100,"the":357,"thi":68,"tha":124,"yži":382,"zūr":65,"žė ":79,"zų ":230,"žėj":93,"žės":302,"yšu":129,"yšk":285,"yši":281,"žįs":104,"AR ":460,"AT ":96,"AV ":1248,"zę ":74,"zės":255,"BA ":84,"AB ":129,"가가":99,"Žie":168,"Žmo":87,"Žal":202,"Žai":105,"Žem":792,"ža ":218,"vųj":76,"vų ":1432,"Žva":101,"žli":98,"žka":118,"žin":2776,"žim":518,"žik":242,"žir":69,"žio":3609,"žiu":1205,"žis":284,"ždė":292,"žia":4861,"žie":756,"三 ":108,"žpa":73,"žos":282,"zė ":373,"žny":789,"žoj":209,"žol":428,"žod":694,"žmo":1552,"žią":130,"žni":1016,"žna":456,"žo ":203,"ždž":217,"žde":109,"žda":511,"žas":486,"žba":64,"žai":1499,"žal":557,"žan":496,"žar":129,"ži ":73,"žer":3193,"žes":178,"žet":91,"žei":363,"žel":344,"žem":2279,"žen":275,"ždy":272,"ždu":70,"ždi":76,"už ":1010,"uža":101,"užd":391,"uže":68,"užs":382,"užt":281,"užr":186,"užu":95,"užk":126,"uži":622,"užp":129,"užn":124,"žys":63,"žym":728,"žyg":68,"žyd":166,"žyb":321,"žtv":87,"žud":129,"žuv":1113,"žut":66,"žur":382,"žuo":217,"žve":120,"žva":821,"žvi":225,"žvy":103,"žra":185,"žių":1603,"žiū":506,"žsi":337,"žta":167,"žte":70,"žti":234,"yči":2350,"vūn":458,"užė":257,"tųj":219,"tų ":12460,"ušė":81,"之 ":65,"ušt":63,"ušk":90,"uši":396,"uša":109,"tžv":91},"n_words":[6266541,7160065,6094403],"name":"lt"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/lv b/nlp_resource_data/langdetect/profiles/lv

new file mode 100755 (executable)

index 0000000..ce5ff7e
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/lv
@@ -0,0 +1 @@
+{"freq":{"ūci":182,"ūdu":26,"ūdr":44,"ūdo":28,"ūde":716,"ūda":90,"D":3059,"E":2166,"F":1831,"G":1835,"A":5495,"B":3355,"C":2145,"L":4567,"M":3974,"N":1829,"O":1507,"H":1820,"I":2327,"J":1801,"K":4616,"U":852,"T":4509,"W":365,"V":3715,"Q":38,"P":4759,"S":5447,"R":3339,"Y":65,"X":234,"Z":1747,"f":8161,"g":29438,"d":58295,"e":131519,"b":30435,"Fed":124,"c":22500,"a":237698,"n":95937,"o":88352,"l":73126,"m":69285,"j":43124,"k":78380,"Fel":22,"h":5764,"Fen":23,"i":199506,"w":553,"v":49552,"u":101354,"Fer":33,"t":118682,"s":178123,"r":127678,"q":293,"p":52161,"z":35062,"y":1297,"x":396,"²":54,"Fil":64,"í":35,"Fin":30,"Fir":23,"é":81,"ç":26,"ä":65,"â":42,"á":40,"ü":91,"ö":83,"ó":42,"Fiz":28,"ē":27728,"Ē":143,"Ā":409,"ā":76597,"Č":300,"č":1638,"ı":70,"ķ":3799,"Ķ":255,"Eze":42,"Ļ":56,"ļ":8048,"Ģ":128,"ģ":3732,"Ī":133,"ī":34346,"Ģer":28,"Ģeo":39,"Ģen":28,"ş":30,"ņ":7202,"Ņ":124,"ō":55,"Ž":134,"ž":4793,"Š":1043,"š":16459,"Ū":37,"ū":7903,"Equ":185,"Eri":30,"Ern":25,"Eur":70,"Eir":595,"́":38,"Ele":79,"Eks":36,"Eko":28,"μ":50,"ν":83,"End":31,"ο":116,"ι":60,"κ":31,"λ":61,"δ":42,"ε":43,"η":40,"α":118,"β":23,"γ":49,"ί":37,"Emi":22,"Eli":48,"ω":27,"Ent":23,"ό":36,"σ":41,"ς":97,"ρ":66,"π":29,"φ":23,"υ":22,"τ":47," l":9777,"ь":65," m":8646," n":13651," o":3224,"я":84," h":1222," i":26578," j":3868," k":23420,"ы":62," d":13444," e":3216,"х":31,"ц":23," f":3413," g":7521,"ч":56,"р":268," a":22507,"с":262," b":8086,"т":170," c":4249,"у":116," y":33," x":65," z":3907," u":13570," t":13126," w":93," v":19041," q":33," p":25406," s":20595," r":6772,"HK ":23,"Ūde":29,"Л":28,"К":30,"Н":52,"М":37,"О":27,"Б":29,"А":26,"В":29," J":1795," K":4607," H":1796," I":2309," N":1813," O":1473," L":4536," M":3956," B":3324," C":2058,"Р":35," A":5469,"С":193," F":1822," G":1815,"Ф":23," D":3031," E":2156,"л":187," Z":1739,"к":221,"й":81," Y":64," X":228,"и":382,"п":57,"о":387,"н":271,"м":85," S":5368,"г":53,"Ger":28," R":3325,"в":222,"б":51," Q":37," P":4729,"а":390,"Geo":50," W":350,"з":76," V":3691,"Gen":23," U":847,"е":335," T":4452,"д":133,"šža":24," ā":501," Ā":409," č":467," Č":299," Ē":143," ē":282,"HL ":22," ž":311," Ž":134," Ū":37," ū":565," Š":1042," š":2083," Ļ":56," ļ":329," ķ":663," Ķ":254," Ī":131," ī":702," Ģ":128," ģ":1094," Ņ":120," ņ":41,"י":25,"Gan":49,"Gal":115,"Gam":34,"Gau":47,"Gar":70,"Gai":64,"Fut":176,"و":37,"ي":53,"ل":68,"م":47,"ن":33,"د":35,"ب":27,"ة":22,"ا":101,"س":25,"ر":50,"Frī":41,"šūn":142,"Flo":40," А":25," Б":28," В":29,"Fra":343," К":29,"Fri":44," Л":27," М":37," Н":52," О":26,"Fre":87,"A ":493,"For":93," α":37,"F ":117,"Da":634,"Cu":37,"Cl":54,"Co":292,"Cr":43,"Ce":347,"Ch":135,"Ci":209,"G ":59,"Ed":95,"Eb":22,"Du":106,"Dz":229,"Do":208,"Dr":103,"De":400,"Di":679,"IP ":41,"Fe":227,"H ":468,"Fa":110,"Ez":43,"Eu":85,"Ev":42,"Ex":29,"Er":98,"Eq":185,"Et":48,"Es":44,"En":123,"Em":90,"Ei":677,"El":193,"Ek":93,"Eg":26,"Ge":158,"Bā":56,"Ga":460,"I ":434,"Fu":230,"Fr":536,"Fo":212,"Fl":70,"Fi":194," о":23,"B ":133," Р":34," С":176," Ф":23,"II ":185,"C ":377," с":29,"Av":74,"Au":759,"Ar":519,"At":405,"As":243,"D ":170,"Ba":1010,"Az":34,"Af":50,"Ag":51,"Ah":37,"Ab":147,"Ac":82,"Ad":148,"Am":328,"An":739,"Ap":260,"Ai":200,"Ak":154,"Al":535,"Hit":49,"His":22,"Bu":240,"Br":526,"Ca":279,"E ":106,"Bi":333,"Be":447,"Bo":341,"Hil":23,"Bl":116,"Him":40,"Hip":27,"Kv":49,"Ku":433,"Kl":267,"Kr":1045,"Ko":785,"Gā":24,"Le":274,"Li":1187,"N ":114,"La":2106,"Lu":187,"Ly":26,"Lo":258,"Me":656,"Hā":28,"Dž":311,"Mi":487,"O ":256,"Ma":1352,"My":39,"Hē":35,"Mu":257,"Mo":497,"Ni":163,"Ne":339,"Na":331,"P ":315,"Hel":44,"Hei":53,"Nu":37,"No":488,"Ok":85,"Ol":289,"On":42,"Og":53,"Oh":28,"Od":50,"Jā":93,"Hen":52,"Of":29,"Ob":41,"Her":113,"Gi":68,"Gl":88,"Gr":518,"Go":173,"IA ":51,"Gu":165,"Gv":31,"Bē":72,"J ":48,"Ha":442,"He":346,"Hi":235,"Ho":305,"Bī":48,"Hu":55,"Cē":86,"K ":99,"Ib":23,"Id":25,"Dā":72,"Ie":161,"Ig":193,"Im":77,"In":597,"Ik":26,"Il":62,"Dē":23,"Iv":35,"Is":56,"It":130,"Ir":159,"Ja":606,"L ":193,"Iz":255,"Je":180,"Jo":238,"Hab":30,"Ju":235,"Hal":59,"Hai":24,"Ka":1308,"Han":77,"M ":140,"Ham":27,"Har":76,"Ki":234,"Hav":40,"Ke":175,"Mū":173,"Ur":133,"Up":46,"Un":191,"Uk":57,"Ul":29,"Ug":37,"Pā":146,"W ":36,"Tu":367,"Nī":78,"Tr":348,"Lū":27,"To":528,"Th":183,"Ti":330,"Tj":24,"Te":412,"Ta":1094,"V ":507,"Bēr":58,"Grī":41,"Sy":41,"St":811,"Sv":222,"Su":204,"Wo":58,"Wi":99,"Wa":68,"Rā":30,"We":47,"Vo":206,"Pī":38,"Vu":40,"Vi":1349,"Vl":38,"X ":140,"Va":795,"Ve":528,"Pē":222,"Uz":97,"Lā":40,"Pu":149,"Pr":574,"Ps":29,"Gus":31,"S ":509,"Pe":330,"Kā":157,"Pa":1460,"Gui":25,"Pl":246,"Po":522,"Pi":543,"Ph":56,"Gul":51,"Os":199,"Ot":132," ا":37,"Ov":28,"Op":65,"Or":179,"R ":242,"šņu":59,"Jē":211,"Nā":75,"Se":449,"Sc":140,"Si":396,"Sh":52,"Sn":22,"Sm":71,"Sl":112,"Sk":318,"Mī":29,"Sp":308,"So":292,"Ru":198,"Jū":171,"Mē":139,"U ":122,"Sa":1320,"Grā":24,"Re":684,"Mā":133,"Ri":309,"Rh":25,"Ro":567,"Lī":215,"Qu":25,"Lē":28,"T ":118,"Ra":377,"Gre":140,"Gri":112,"Gra":91,"Vī":58,"b ":1431,"Gru":54,"Gro":39,"a ":45978,"Sē":77,"Tā":889,"Yo":26,"Tē":78,"Sī":40,"Z ":47,"Rē":60,"Sā":102,"Pū":23,"Rī":858,"Gol":28,"Got":33,"Vā":356,"Vē":122,"Za":126,"Ze":604,"Zi":554,"Tī":49,"Zo":72,"Zu":34,"God":26,"Rū":43,"Zv":222,"šķā":29,"i ":20844,"aē":38,"gd":52,"ge":675,"bā":1878,"ga":11955,"gb":23,"Ing":34,"Inf":36,"fl":161,"fg":38,"ff":54,"aā":32,"fi":2006,"šķē":67,"fs":184,"fr":742,"ač":195,"fu":915,"ft":143,"Int":119,"fo":1630,"Ins":32,"j ":431,"aķ":196,"gz":413,"bē":351,"Ir ":43,"cā":282,"he":981,"aļ":2174,"ha":893,"gn":346,"gm":103,"gl":1302,"gk":23,"aģ":126,"gi":1023,"gh":75,"gg":39,"gv":152,"gu":2538,"gt":345,"gs":1978,"gr":3115,"šķī":179,"aī":165,"go":1269,"dt":34,"du":3460,"dv":153,"dy":39,"dz":8558,"g ":378,"Ima":40,"ea":831,"eb":2145,"ec":2362,"ed":5035,"de":6326,"dd":61,"dg":22,"di":7508,"dh":33,"dk":50,"dm":437,"dl":81,"do":4137,"dn":387,"dp":40,"ds":1737,"dr":2632,"ew":72,"ex":66,"eu":138,"ev":3202,"ey":101,"ez":2104,"fa":698,"h ":326,"Inc":26,"Ind":290,"fe":924,"eh":574,"eg":1917,"ef":579,"ee":260,"el":8559,"ek":9435,"ej":1682,"ei":6080,"ep":1928,"eo":1018,"Imp":27,"en":17231,"em":12435,"et":9035,"es":13667,"er":13266,"cb":46,"ca":1325,"e ":9104,"bv":92,"by":23,"bs":478,"br":1873,"bu":2780,"bn":90,"bo":2478,"bp":134,"bj":321,"bk":204,"bl":923,"bi":5030,"bb":37,"bd":41,"be":3600,"da":13240,"f ":227,"cy":80,"cu":688,"ct":320,"cs":106,"cr":118,"co":534,"cp":28,"cm":65,"ck":210,"cl":78,"ci":9940,"ch":496,"ce":3798,"cc":58,"c ":1617,"Zī":28,"az":1867,"ay":138,"ba":7096,"d ":1260,"at":15301,"as":46560,"ar":22107,"šķe":37,"ax":44,"aw":25,"av":4016,"au":11606,"šķa":58,"ak":5105,"al":13877,"ai":16755,"aj":4377,"ao":99,"ap":6109,"šķi":792,"am":7193,"an":13591,"ac":1971,"ad":7331,"aa":123,"šķu":61,"ab":3333,"ag":2079,"ah":438,"ae":799,"af":317,"nu":3766,"nt":6816,"ns":6634,"ič":143,"nr":301,"hī":43,"np":87,"no":13676,"nn":366,"nz":252,"ny":68,"iē":45,"nv":1299,"oe":196,"jā":6443,"of":937,"Dān":37,"oc":1480,"od":5344,"oa":231,"ob":1666,"Dār":22,"om":4560,"on":9439,"ok":3012,"ol":6437,"oi":175,"gš":291,"iģ":203,"oj":3579,"og":1772,"oh":231,"Iga":176,"ot":8233,"m²":53,"gū":417,"os":7017,"ov":1717,"ou":258,"op":3592,"oo":225,"or":8642,"iķ":475,"r ":23056,"ox":43,"jē":253,"ow":104,"oz":1358,"oy":26,"pd":241,"pe":2624,"kā":9200,"pg":761,"gž":90,"pa":12196,"pb":92,"iļ":351,"pc":59,"pl":2446,"iņ":2232,"pm":337,"pn":123,"po":2922,"ph":171,"pi":8931,"pj":212,"pk":534,"lo":5141,"ln":1238,"eņ":1120,"lm":668,"ll":864,"ls":6050,"dū":151,"lr":53,"fī":68,"lp":774,"lv":2188,"lu":3183,"lt":2348,"lz":417,"ly":68,"gē":101,"o ":13523,"md":123,"ma":11255,"mb":1027,"dž":490,"mg":152,"me":8257,"mf":62,"hā":197,"mk":81,"ml":84,"mi":7529,"eš":2372,"mj":254,"mn":294,"mm":568,"mp":1937,"mo":3385,"mt":1499,"ms":4163,"mv":67,"Iek":27,"mu":4579,"hē":85,"mz":26,"Ied":23,"Ier":30,"p ":1187,"Ies":23,"na":11100,"nb":173,"nc":1308,"gļ":378,"nd":3610,"ne":6367,"nf":496,"iā":1355,"ež":1461,"ng":2033,"nh":159,"ni":10637,"nj":103,"nk":1409,"nl":178,"nm":119,"jv":54,"jt":24,"ju":6450,"jr":40,"eč":136,"bū":646,"js":1745,"jp":49,"dī":2457,"jn":108,"jo":2718,"jl":29,"jk":33,"ki":2861,"eģ":460,"kh":87,"kg":105,"fā":86,"ke":1430,"kd":52,"kc":519,"kb":27,"ka":21595,"m ":14604,"fē":229,"ky":24,"cū":51,"ks":6047,"kt":4783,"ku":9917,"kv":498,"ko":8251,"kp":53,"eī":44,"kr":3493,"kk":49,"kl":2204,"dņ":25,"km":1006,"kn":291,"li":12111,"lh":104,"lk":789,"lj":234,"le":5914,"ld":1879,"lg":521,"lf":344,"gā":2261,"la":14259,"eļ":2084,"lc":198,"lb":519,"n ":12333,"eķ":397,"hr":242,"hs":176,"bī":818,"ht":159,"hu":177,"hi":1019,"hn":345,"ho":773,"hl":97,"aņ":799,"hm":55,"id":6988,"ic":1972,"ib":840,"ia":987,"ih":374,"ig":1651,"if":662,"dā":3118,"ie":43851,"hy":49,"cē":786,"k ":3032,"cī":1751,"iq":24,"ir":20819,"is":22114,"it":6916,"iu":210,"iv":1914,"dē":1715,"ix":47,"ii":76,"aš":1551,"ij":18002,"ik":8262,"il":7272,"im":5227,"in":12037,"io":3152,"ip":1361,"je":2277,"jd":65,"až":1131,"eā":550,"ji":2430,"iz":8191,"l ":908,"ja":18785,"xi":24,"pš":285,"rģ":265,"pū":149,"rī":5569,"xt":23,"rē":1505,"ww":72,"z ":3167,"ož":188,"sā":1658,"nž":71,"wi":65,"oš":2487,"šīs":96,"šīr":28,"wo":40,"šīn":152,"ws":45,"vv":135,"vz":34,"y ":470,"wa":100,"pļ":34,"rā":7122,"we":66,"vl":36,"vm":57,"oņ":431,"vj":161,"vk":55,"vi":15879,"nš":154,"vg":67,"vu":1511,"vr":110,"vs":665,"nū":52,"vp":135,"pī":487,"vn":277,"vo":2198,"šīb":168,"uz":3239,"ux":47,"pē":3898,"uv":1138,"uu":163,"ve":6316,"vd":128,"vc":24,"oļ":231,"va":13478,"oķ":34,"x ":250,"oģ":891,"ui":330,"mš":154,"uj":549,"uk":2577,"ul":3970,"ue":159,"pā":3136,"uf":42,"ug":2904,"lž":37,"uh":49,"oī":103,"ur":8826,"mū":945,"us":8406,"ut":3554,"um":10369,"un":12883,"uo":66,"up":1299,"ty":109,"tz":181,"tu":9581,"tt":1263,"tw":38,"tv":2682,"ub":1129,"ua":219,"ud":1801,"uc":777,"w ":104,"to":9558,"tn":1900,"tm":416,"tl":676,"lū":587,"ts":7290,"tr":9995,"oč":89,"nī":2204,"tp":327,"tg":235,"tf":75,"te":8645,"td":205,"tk":771,"tj":107,"ti":16287,"lš":164,"th":431,"v ":977,"šī ":103,"tb":1541,"tc":58,"ta":17648,"su":2355,"sv":864,"ss":1501,"kū":96,"st":22568,"sy":29,"sz":152,"sw":23,"nē":1546,"sl":2038,"sk":10907,"sn":958,"sm":2964,"sp":4072,"so":1970,"sr":95,"nč":276,"mī":785,"sd":451,"sc":379,"sf":321,"nā":8240,"se":3495,"sh":197,"sg":86,"sj":78,"si":5921,"kš":1455,"rz":732,"u ":34678,"sa":11922,"sb":260,"rr":239,"rs":5613,"jū":1021,"rt":4290,"ru":7404,"rv":1310,"Cēs":56,"mē":2975,"ry":160,"lī":4107,"rp":1821,"ro":10142,"rn":1729,"kņ":114,"rm":3278,"rl":568,"rk":1664,"rj":126,"jš":27,"ri":15479,"rh":439,"rg":2074,"iž":275,"mā":4269,"rf":127,"re":6968,"rd":2322,"rc":590,"kļ":994,"rb":2090,"ra":18562,"t ":6209,"lē":2235,"qu":266,"kī":37,"iš":513,"lā":5128,"s ":99264,"kē":55,"pz":438,"pt":1346,"pu":3008,"pv":476,"pp":139,"jī":54,"pr":4678,"ps":1123,"zī":3820,"zā":1002,"IX ":48,"šģi":28,"zē":1137,"už":113,"uš":785,"IV ":42,"tš":221,"uļ":295,"uņ":333,"vī":926,"tū":1454,"vā":2847,"tļ":184,"uģ":166,"rž":79,"zz":61,"sū":160,"Bīb":25,"vē":3474,"tņ":104,"uā":341,"Hor":54,"zg":379,"zi":7929,"rš":1224,"zb":206,"Hou":27,"zc":577,"zd":606,"ze":4296,"za":1506,"Hom":27,"Hon":23,"Hok":34,"Hol":58,"zv":1734,"uē":34,"rū":541,"zs":1624,"uč":23,"zr":375,"zu":648,"zt":460,"zo":976,"zn":964,"tī":5903,"zp":657,"zk":332,"zj":64,"zm":1200,"zl":791,"yg":24,"ye":62,"tā":11917,"rļ":24,"yc":58,"yd":22,"ya":64,"rķ":279,"sē":2104,"tē":2602,"yt":35,"ys":126,"yr":30,"sī":1152,"yp":31,"yo":86,"yn":43,"ym":26,"rņ":171,"yl":66,"yi":22,"Arg":44,"Arh":40,"Are":22,"Ard":29,"Ara":25,"Arm":42,"Ari":31,"Šīs":37,"Apv":89,"Apo":23,"Atk":35,"Atl":83,"Atr":91,"Ato":22,"Ast":53,"Ata":28,"Asi":33,"Aso":76,"Art":56,"Avo":25,"Aut":94,"Aus":451,"Aug":149,"Alū":46,"Arā":51,"šēj":249,"zš":91,"zū":24,"zņ":392,"Šķi":29,"Atš":29,"Bak":26,"Bal":501,"Ban":57,"Bab":25,"Baz":45,"Bar":119,"Bas":49,"Bau":40,"Abr":31,"Act":23,"Ada":38,"šā ":313,"Adr":25,"šād":125,"šāk":184,"šām":60,"šās":183,"šāv":22,"Afg":26,"Aiz":104,"Aka":34,"Ala":26,"Alb":79,"Ale":93,"Alf":40,"Alt":55,"All":36,"Alp":29,"Ame":197,"Ama":49,"Šī ":93,"Ang":135,"Ana":95,"And":123,"Ant":228,"Ann":55,"Ar ":101,"ОН ":57,"Bul":41,"Bur":79,"Blū":22,"Bru":49,"Brī":92,"² ":53,"Brā":30,"Cal":37,"Cam":27,"Car":39,"Cau":23,"Can":86,"Cap":22,"CH ":201,"Bez":37,"Ber":134,"Ben":73,"Bel":45,"Bie":67,"Biz":31,"Šād":55,"Bil":27,"Bir":42,"Bio":33,"Blo":34,"CP ":24,"CO ":32,"Bla":25,"Bre":53,"Bra":125,"Bro":55,"Bri":92,"Bol":40,"Bon":23,"Bor":50,"Bos":71,"Bov":25,"Der":41,"Des":26,"Dei":40,"Del":44,"Dem":45,"Den":34,"Deg":23,"Dan":52,"Dar":42,"Dat":63,"Dau":261,"Dab":33,"Chr":32,"Cit":38,"Cir":22,"Cil":47,"Cel":26,"Cen":195,"Cet":27,"Cer":41,"Cha":50,"Cor":106,"Com":69,"Col":30,"Con":48,"FA ":41,"ģip":59,"ģin":756,"ģio":352,"ģit":52,"ģis":449,"ģim":77,"ģij":982,"Dze":94,"Dzi":93,"ģi ":28,"ģer":123,"ģel":28,"ģeo":161,"ģen":236,"Edv":25,"Edu":28,"ģa ":65,"Dzī":27,"āg":215,"Daž":59,"āf":379,"āc":3184,"ād":3136,"āb":862,"āp":472,"ām":4331,"ān":2529,"āk":5743,"āl":5581,"āj":3661,"āv":1981,"āt":4781,"ās":7161,"ār":6698,"āz":697,"Dib":24,"Dis":48,"Din":26,"Āb":22,"Dig":22,"Ād":23,"Āg":35,"Āf":113,"Die":362,"Āz":143,"Ār":50,"Div":37,"ā ":23455,"Dub":30,"ür":23,"ая ":29,"ģu ":60,"СН ":64,"Dra":31,"Dob":31,"ES ":40,"Don":29,"Dom":61,"OH ":75,"Nea":33,"ēģ":190,"Nei":26,"Net":40,"Ner":49,"Nep":30,"Neo":28,"Ēģ":41,"Nat":30,"−":111,"īdi":166,"īdr":32,"īds":186,"īdu":170,"īda":185,"īde":154,"Nin":27,"Nik":51,"ēž":23,"īci":383,"ūšu":22,"īca":276,"ūša":102,"īce":148,"ībn":58,"ēš":773,"ībi":22,"ību":1222,"ībv":37,"īd ":59,"ībe":36,"ība":3544,"ēļ":376,"New":31,"ēķ":56,"ēņ":27,"īcā":40,"ībā":960,"īgi":496,"īga":1676,"īgu":405,"īgo":298,"īgs":407,"īdz":1603,"īla":22,"Nam":26,"īgā":813,"īle":67,"īli":124,"Nac":121,"īka":42,"īkl":241,"īko":106,"īks":151,"īja":305,"ģu":81,"īji":84,"īju":700,"īdī":105,"īm ":246,"ģi":2798,"ģe":601,"ģa":66,"īdā":112,"īcī":70,"Ģe":106,"āļ":238,"āņ":597,"āč":113,"Či":48,"Če":124,"Ča":35,"Ču":25,"či":150,"če":516,"Īri":58,"ča":247,"āš":567,"āž":93,"No ":69,"ē ":2668,"Īsl":26,"čo":33,"čs":42,"ču":562,"ēz":349,"ēt":4896,"ēs":1279,"ēv":258,"čū":27,"Čī":26,"OS ":28,"ēm":2242,"ēn":867,"ēk":1496,"ēl":2212,"ēr":3544,"ēp":82,"Nov":61,"ēc":1136,"ēd":833,"Nor":163,"ēj":4209,"ēg":174,"Nos":50,"Not":22,"ēb":25,"Nok":22,"Ēr":64,"Nob":33,"ķe":541,"ķa":229,"ķu":510,"ķs":30,"ķo":78,"Odi":27,"ķi":1644,"ļ ":195,"Ogr":36,"Jān":51,"ļa":2291,"ļi":696,"ļk":39,"ļj":32,"ļe":71,"ļd":32,"ķā":31,"Ķī":170,"ļr":180,"ļs":48,"ļl":36,"ļo":646,"ķē":171,"ļu":2107,"ļv":154,"Ļe":28,"Oke":30,"ķī":541,"ļķ":37,"ļī":23,"ļģ":54,"ļē":91,"PA ":36,"ļā":656,"ļš":133,"ļū":169,"ļļ":89,"ļņ":176,"ņv":39,"ņi":716,"ņk":42,"ņj":25,"ņo":260,"ņn":28,"ņu":2226,"ņs":60,"ņr":139,"Ņu":43,"ņa":1766,"ņb":29,"ņd":47,"ņe":432,"Otr":101,"Ovi":22,"ī ":3304,"ģē":120,"ģī":39,"Jēk":140,"Īs":48,"Īr":60,"īb":5893,"Jēz":37,"īc":960,"īd":2819,"īj":1202,"īg":4125,"īl":317,"īk":663,"īn":1786,"īm":2002,"īp":453,"īr":757,"īt":3369,"īs":1871,"īv":3050,"īz":500,"ģīt":31,"Oli":250,"ı ":35,"Jāņ":29,"īģ":38,"īč":97,"īņ":368,"PS ":25,"īļ":122,"īš":592,"Ope":52,"Org":41,"Ost":30," −":67,"Osk":35,"Osm":120,"Ķe":55,"š ":1382,"Ple":55,"Pla":145,"Pil":83,"Paš":73,"Pit":22,"Pir":196,"QL ":27,"Pie":153,"Pha":28,"ģēt":22,"ģēr":24,"Kār":70,"ģēl":37,"šī":572,"šģ":28,"Per":109,"šļ":41,"Pet":30,"Pen":59,"Pek":25,"Pel":45,"šķ":1261,"Šī":134,"Kā ":42,"šā":893,"šē":273,"Šķ":45,"Šā":65,"Paz":31,"Pat":54,"Pas":386,"Par":468,"Pav":27,"Pau":41,"Pad":103,"Pan":44,"Pap":26,"Pal":61,"Pak":46,"šg":29,"še":180,"šd":202,"ša":5315,"šo":571,"šp":217,"šm":110,"šn":89,"šk":157,"šl":112,"ši":1623,"šv":170,"šu":2014,"št":354,"šs":357,"šr":148,"šz":36,"Še":104,"Ša":199,"Šo":132,"Ši":163,"Št":29,"Šv":81,"ņš":324,"Ņū":40,"ņģ":77,"Prū":54,"ņē":348,"ņķ":245,"ņā":297,"Prā":22,"Pro":228,"Pri":121,"Pre":78,"Pra":58,"Pol":274,"Pos":25,"Pop":24,"Por":59,"žr":80,"žs":96,"žu":691,"žn":24,"žo":773,"že":338,"ža":902,"žk":75,"žm":27,"ži":613,"Ža":46,"Ži":23,"Že":35,"RS ":104," ال":23,"ž ":41,"žū":26,"žņ":118,"žī":67,"žģ":48,"žā":790,"SA ":43,"Rad":110,"Rai":35,"Rag":24,"Rak":35,"ū ":140,"šņ":106,"Ūd":31,"šž":26,"šū":152,"ūg":56,"ūd":971,"ūc":206,"ūz":629,"ūs":815,"ūt":957,"ūv":256,"ūp":130,"ūr":2415,"ūk":417,"ūl":106,"ūm":112,"ūn":373,"ūš":146,"ūž":54,"Isl":22,"Irā":62,"Iva":31,"Izv":72,"Izr":29,"Itā":112,"Jac":27,"Jav":26,"Jau":217,"Jap":129,"Jan":40,"Jam":45,"Jag":28,"Ja ":27,"Jel":80,"Jer":41,"Jor":27,"Jon":25,"ア":23,"Joh":102,"KS ":29,"Jug":32,"Jup":83,"Jur":42,"Jum":22,"Kad":24,"Kab":22,"Kai":30,"Kam":79,"Kal":190,"Kap":52,"Kan":182,"Kau":72,"Kat":99,"Kas":57,"Kar":357,"Kaz":79,"Ker":28,"Ken":34,"Kem":29,"Kei":25,"Kir":46,"Kin":77,"Kij":27,"Kli":34,"Kle":26,"Kla":44,"Klu":132,"Kon":195,"Kom":112,"Kol":59,"Kos":58,"Kor":100,"Kop":106,"Kod":29,"Kok":40,"Kre":35,"Kra":70,"Kri":714,"Kro":35,"Kru":113,"Krā":28,"Kul":67,"Kur":259,"Kva":27,"Let":23,"Leo":37,"Lej":26,"Led":38,"Lau":64,"Lak":51,"Lai":103,"Lag":34,"Lat":1556,"Lar":29,"Lap":25,"Lam":24,"Lan":56,"Lab":56,"ML ":46,"Lib":28,"Lie":695,"Lim":30,"Lin":97,"Lit":29,"Liv":157,"Lut":24,"Luk":24,"Lud":28,"Lug":27,"Lor":22,"Lon":64,"Lok":22,"ūs ":24,"ūpn":62,"ūra":1491,"ūt ":294,"ūsd":328,"ūnā":49,"ūrs":28,"ūru":316,"ūrv":45,"ūrm":32,"ūrn":45,"ūrg":22,"ūri":123,"ūks":153,"ūku":78,"ūka":36,"ūko":44,"ūma":25,"ūli":71,"ūmu":36,"ūna":154,"ūns":22,"ūnu":55,"ūni":45,"Mek":48,"Mei":30,"Men":40,"Mel":144,"Mer":74,"Met":75,"ūtā":37,"Med":58,"Mez":22,"ūzi":561,"Džo":145,"ūzu":22,"ūtī":135,"Džu":25,"Dže":88,"ūsi":58,"ūsm":68,"ūsu":37,"ūst":237,"ūta":126,"ūte":28,"Man":129,"Mal":86,"ūti":73,"Mam":22,"Mar":393,"ūto":32,"Mas":145,"ūtn":56,"Mag":45,"ūts":52,"Mad":99,"Mak":70,"Mai":65,"ūtu":111,"Mac":23,"īzā":35,"īzē":29,"ūvi":42,"Maz":108,"Mat":72,"ūve":114,"ūrā":242,"īvā":296,"ītī":76,"īzi":177,"īze":130,"ītē":23,"Mod":95,"Mol":50,"Mon":122,"Mos":23,"Mor":85,"Mot":31,"Moz":28,"NS ":43,"īvī":139,"Mež":62,"īvē":41,"Mih":26,"Mik":53,"Mie":24,"īrā":42,"Mic":50,"īvn":200,"īvm":36,"īvp":28,"īvo":1046,"īvu":146,"Mir":23,"īvs":154,"Mil":64,"Min":133,"īva":467,"īve":148,"īvi":298,"ūvē":84,"ītā":552,"NO ":49,"īz ":81,"īsā":29,"īlī":22,"īrs":118,"īru":139,"īri":268,"īsa":30,"īnā":148,"īmē":659,"Mur":22,"Mus":124,"īra":120,"īt ":399,"ītu":205,"īnī":56,"ītr":41,"īts":565,"ītn":23,"īto":115,"īti":392,"īst":852,"īss":36,"īmī":130,"īsl":28,"īsk":119,"īsi":181,"īkš":28,"īte":151,"īta":808,"īne":41,"īni":507,"īno":33,"īns":285,"īnu":169,"īme":446,"īmi":343,"īmj":45,"īmo":31,"īmu":34,"īms":40,"īna":462,"īkā":26,"īs ":468,"ījā":106,"īpa":426,"Sāk":57,"Rēz":54,"XX ":24,"кий":32,"Wor":39,"Wil":32,"Win":29,"War":28,"Vul":29,"Vor":36,"Vol":134,"Viņ":206,"Viļ":33,"Vis":230,"Vit":48,"Vla":36,"ču ":517,"Zaļ":27,"Zie":391,"Zin":62,"Zil":29,"Zel":41,"Zem":525,"之":22,"Tēr":50,"三":24,"на ":32,"Tā ":634,"Tāl":53,"Tās":150,"Sēr":28,"Sēl":24,"Rīg":820,"Svē":102,"Sys":26,"Sve":45,"Spē":56,"Sul":33,"Spā":87,"Str":115,"Stu":53,"Sti":60,"Sto":87,"Sta":349,"Ste":80,"Teh":22,"Tec":22,"Tem":25,"Teo":25,"Tei":33,"Tel":43,"Tam":51,"Tan":40,"Tas":583,"Tar":26,"Tau":65,"Tai":63,"Taj":81,"Tak":27,"Tal":71,"Sko":119,"Sku":25,"Ska":110,"Sim":41,"Sil":58,"Sig":27,"Sir":32,"Sin":63,"Sid":28,"Sie":22,"Sib":28,"Nāc":29,"Ser":100,"Sen":122,"Sel":26,"Sem":34,"Sek":22,"TV ":22,"Spa":42,"TS ":22,"Spi":27,"Spe":35,"Spo":27,"Soc":53,"Sol":25,"Som":114,"Son":25,"Slo":56,"Smi":29,"TP ":38,"Mēr":47,"Mēn":64,"Jūr":108,"Jūl":32,"SV ":351,"Run":31,"Rum":56,"Rub":29,"Sai":29,"Sah":30,"Sak":57,"Sam":35,"Sal":189,"Sac":23,"Sab":29,"Sae":27,"Sad":27,"Sco":41,"Sci":36,"Sch":39,"Sav":262,"Sat":49,"Sau":200,"Sar":143,"Sas":60,"San":93,"ови":22,"Mār":46,"Māk":28,"SI ":51,"Res":30,"Rie":159,"Rau":22,"Rec":23,"Red":33,"Rei":58,"Ren":27,"Rep":321,"Rob":99,"Rod":49,"SR ":106,"Ros":76,"Ron":23,"Rom":176,"SS ":24,"Līg":32,"Līd":101,"čūs":27,"Reģ":30,"SO ":60,"Pēc":116,"Pēt":62,"Vai":65,"Vel":97,"Ven":119,"Vei":37,"Vec":107,"ски":39,"ска":25,"ско":22,"Vas":82,"Van":50,"Val":406,"Var":89,"Vaš":22,"Vid":291,"Vie":179,"Vir":65,"Vil":141,"Vik":70,"Vin":38,"Ver":44,"Ves":70,"Ukr":55,"Ung":47,"Uni":125,"Urs":73,"Mūz":68,"Mūs":71,"Ērg":30,"Uz ":28,"Čīl":23,"ēcī":76,"ēdā":27,"Trī":27,"ēj ":73,"ēgu":33,"ēgt":73,"ēkl":37,"ēko":23,"ēki":215,"ēku":376,"ēkt":35,"ēks":180,"ēm ":819,"ēka":504,"ēji":561,"ēju":643,"ējs":400,"ējr":35,"ējp":24,"ējo":477,"ēl ":109,"ēdē":151,"ējd":44,"ēja":1223,"ēmu":365,"ēma":558,"ēmi":221,"ēli":303,"ēlo":171,"ēln":72,"ēls":126,"ēlu":113,"ēla":172,"ēle":379,"ēc ":930,"ēci":41,"ēdu":26,"ēdz":296,"Pāv":29,"Pār":113,"ēda":98,"VI ":40,"ēde":49,"ēdi":142,"Ter":125,"Tet":23,"ēvs":32,"ērē":61,"ērā":124,"ērķ":215,"ēsē":144,"The":138,"ērī":91,"ēsā":29,"ēze":148,"Tib":26,"Tie":134,"ēsī":28,"Tim":41,"ētā":792,"Tir":25,"ēzi":135,"ērš":71,"ētī":178,"ēzu":41,"ēvē":143,"To ":233,"ēne":214,"ēni":174,"ēna":102,"ēns":137,"ēnu":91,"Top":34,"ējā":633,"Tor":80,"Tom":55,"Tos":31,"ēkā":56,"ēr ":112,"ējī":33,"ēpo":30,"ēlā":185,"ēs ":404,"ēt ":252,"ēlē":492,"Nīd":49,"ēra":721,"ērb":56,"ērd":31,"ēre":37,"ēmā":142,"ēri":669,"ērk":23,"Tro":39,"ērn":117,"ērp":23,"ēro":293,"ēlī":53,"ērt":389,"Tri":83,"ērs":217,"ērv":101,"ēru":97,"ēmē":83,"Tre":69,"ērz":31,"Tra":108,"ēnā":38,"ēkš":30,"ēsl":23,"ēsu":45,"ēst":566,"ēnē":31,"ēte":130,"ēta":1832,"ētn":71,"ēto":79,"ēti":598,"ētk":66,"ētu":414,"ētr":34,"ēts":396,"Tur":208,"Tuv":48,"Tuk":33,"Tul":26,"šdz":152,"šde":24,"ši ":834,"šel":77,"ša ":338,"šas":321,"šau":228,"šah":27,"šai":236,"šaj":205,"šam":126,"šan":3757,"ēģi":168,"Šve":65,"šte":79,"šta":234,"šst":29,"šum":55,"šus":57,"švi":22,"šve":23,"šva":72,"špa":34,"škā":87,"špi":125,"šos":132,"šot":31,"Ēģi":41,"špu":29,"šs ":266,"šre":66,"šru":52,"šsa":28,"šu ":1883,"šla":74,"šme":77,"вич":26,"šo ":346,"šni":32,"šno":38,"šie":334,"šif":27,"šin":270,"šim":31,"šis":80,"ēļa":42,"ēļ ":153,"ēķi":49,"švā":48,"šze":25,"ēļu":170,"bju":45,"bje":249,"biz":35,"bis":145,"bit":116,"bio":155,"bir":59,"bik":26,"bil":410,"bin":496,"bij":2367,"bo ":36,"blo":48,"ble":47,"bli":610,"bla":73,"bku":154,"bok":22,"bol":1514,"boj":283,"bni":61,"bež":545,"bs ":212,"biļ":23,"bpi":124,"biņ":28,"bon":217,"bor":108,"bot":84,"bos":35,"bov":23,"be ":419,"bam":32,"ban":81,"bak":69,"bal":1602,"bai":173,"baj":47,"bag":46,"bac":26,"bad":23,"baz":174,"bau":36,"bat":100,"bas":2875,"bar":192,"azī":447,"bi ":165,"bei":323,"bed":22,"ber":482,"ben":195,"bel":118,"bek":85,"bez":361,"bes":245,"bet":713,"bib":56,"bie":860,"brū":72,"− ":42,"buļ":79,"−C":44,"ca ":396,"car":55,"cas":201,"cat":29,"cau":337,"can":76,"cab":38,"cam":74,"cal":52,"ce ":431,"cba":35,"blē":52,"bri":385,"bro":61,"bra":298,"bre":171,"bu ":1529,"blī":48,"bru":342,"bsk":33,"bso":54,"bse":25,"bst":77,"bur":350,"bul":250,"bun":25,"bum":299,"bud":74,"but":64,"bus":88,"boļ":39,"bva":41,"brā":110,"brī":410,"aka":880,"am ":2221,"ake":96,"akc":136,"aki":53,"aji":783,"ajo":724,"ajs":38,"adī":860,"aju":73,"adē":277,"aiz":832,"al ":300,"aja":469,"aje":41,"aij":61,"aik":1200,"ail":137,"aim":532,"ain":809,"aip":45,"acī":139,"air":752,"ais":4315,"ait":1125,"aiv":51,"aig":508,"adā":819,"aie":25,"aid":426,"aic":101,"aib":25,"ahi":35,"ahs":67,"aht":36,"abī":81,"ahr":22,"abē":108,"aha":93,"agl":88,"abā":487,"agi":39,"agr":218,"ags":92,"agu":84,"agn":169,"ago":152,"ajā":2224,"anv":34,"anu":718,"anz":65,"ano":394,"ann":140,"ant":1775,"ans":674,"anr":123,"ane":123,"ang":611,"anh":30,"ani":1185,"anj":22,"ank":275,"anl":23,"ap ":332,"ana":3035,"anc":401,"and":1589,"amu":130,"amm":408,"amo":175,"amp":185,"ams":603,"ami":532,"ahā":45,"adž":91,"ame":775,"amb":217,"ama":1351,"ao ":25,"alv":1226,"alu":519,"alt":791,"als":1977,"alp":208,"alo":1383,"aln":628,"alm":121,"all":272,"alk":231,"alg":97,"ali":1208,"alc":61,"ald":1013,"ale":584,"alf":96,"agā":163,"ala":1330,"alb":172,"Šaj":103,"an ":784,"aks":1610,"akr":121,"Šar":24,"aku":213,"akt":789,"ako":151,"akn":91,"akm":124,"akl":47,"aba":1096,"abe":95,"abi":630,"abl":44,"abo":176,"abp":131,"abr":109,"abs":116,"abv":34,"abu":136,"ae ":655,"aca":38,"aau":77,"ad ":480,"ac ":37,"afr":25,"aft":53,"afi":105,"ai ":5352,"aga":835,"age":107,"ael":27,"aei":29,"ah ":32,"afa":26,"ado":609,"adr":151,"adl":29,"adn":49,"adm":304,"adi":802,"ade":142,"adz":161,"ads":379,"adu":517,"ack":35,"aci":939,"ach":47,"ace":507,"ada":1586,"act":88,"acu":34,"azn":207,"azm":31,"azo":137,"azi":230,"arš":263,"azu":55,"atī":1487,"azs":68,"atē":140,"aze":46,"aza":217,"azd":25,"atņ":23,"avē":54,"avā":267,"arž":51,"arī":2864,"arē":195,"az ":62,"asā":83,"asī":143,"asē":82,"Šob":33,"atā":330,"aye":23,"auž":79,"ba ":1526,"azā":276,"atū":468,"auņ":57,"avī":79,"atš":186,"auš":105,"at ":261,"alē":137,"arh":312,"aiž":27,"arg":217,"amā":271,"are":686,"ard":512,"arc":143,"akļ":72,"arb":1417,"ara":2574,"arp":1342,"aro":671,"arn":121,"arm":206,"arl":146,"ark":821,"arj":66,"ari":685,"aru":580,"Šie":27,"arv":25,"amē":73,"alī":910,"arr":34,"ars":430,"art":883,"au ":159,"asa":1301,"ary":36,"anā":994,"asf":22,"akš":415,"asi":550,"ash":28,"asc":35,"ase":553,"aso":90,"asn":297,"amī":170,"asp":375,"ask":709,"asm":50,"asl":59,"ar ":6199,"apb":88,"apa":727,"Šep":43,"akā":218,"ape":72,"apd":220,"aiļ":41,"apj":56,"api":222,"aph":30,"apg":734,"aiņ":241,"apm":273,"Šei":27,"apl":157,"apk":491,"apo":44,"app":36,"apr":520,"aps":483,"apt":349,"apu":118,"apv":358,"apz":415,"as ":38125,"alā":492,"aiš":99,"ava":1146,"ax ":25,"auz":73,"auv":33,"aut":1629,"avs":157,"apī":65,"avr":23,"avo":299,"avp":76,"anš":45,"avi":843,"ave":232,"ay ":49,"Šo ":72,"avv":82,"avu":357,"arā":580,"av ":331,"atb":314,"ata":845,"asu":217,"ast":3489,"ass":194,"anč":183,"anē":182,"asv":32,"atm":195,"atn":153,"atk":493,"atl":205,"anī":82,"atr":1790,"ato":1405,"atp":124,"ate":661,"atf":33,"Šis":89,"atc":32,"atd":107,"ati":895,"atj":54,"atg":171,"ath":38,"auc":511,"att":979,"ats":614,"alū":32,"atv":2153,"atu":973,"atz":132,"aul":1187,"aum":53,"aun":922,"aup":26,"aur":568,"aus":1232,"aud":1114,"apā":234,"aug":1739,"auj":444,"auk":1608,"Vēs":43,"Vēr":24,"ος":59,"ος ":59,"ς ":97,"ν ":22,"Zvi":157,"Zva":55,"α ":62,"Vār":66,"Vāc":268,"ēša":752,"Н ":147,"ий ":35,"ич ":26,"jeb":1453,"jer":106,"jek":399,"jel":23,"jen":64,"jet":57,"jev":88,"eān":337,"eāl":99,"eāt":94,"ji ":637,"aža":89,"aži":139,"ažk":37,"ažo":204,"ažr":77,"ažu":75,"ļģi":26,"jad":67,"izē":509,"jas":8640,"jau":557,"jap":53,"jar":34,"jak":65,"jan":85,"jam":931,"dēš":109,"jai":593,"izņ":119,"izš":65,"jda":46,"jni":88,"jol":22,"jon":751,"jom":89,"jot":574,"jos":350,"jor":71,"js ":1627,"dīb":475,"dīc":65,"jiņ":29,"jpu":34,"ск":94," zī":183,"jis":272,"jie":1464,"то":24,"ст":46,"jko":24,"jo ":387,"ažā":482,"itm":69,"itl":241,"itr":232,"inī":182,"itp":32,"ito":983,"itu":694,"itt":30,"its":171,"ity":36,"imš":47,"isk":6070,"ism":951,"isl":163,"iso":147,"isn":115,"imī":127,"isp":195,"iss":145,"inč":41,"isu":253,"ist":3658,"isv":78,"inē":330,"ita":561,"ite":695,"ith":30,"iti":844,"ilš":47,"inū":22,"ivs":26,"ivr":48,"ipī":32,"ivp":30,"ivo":171,"ivv":36,"ivu":117,"irā":703,"inž":48,"ius":102,"iur":38,"ium":46,"ipē":33,"iva":204,"ivd":29,"dē ":186,"ix ":38,"ivi":487,"inš":34,"ivj":45,"ivk":27,"ive":291,"ipr":208,"ipo":63,"ipu":70,"ips":96,"ipt":115,"ipi":127,"igž":89,"ipl":132,"cīb":793,"cīg":288,"ilā":148,"is ":8439,"ion":1944,"iop":23,"ašī":283,"ior":31,"igū":45,"ios":84,"iot":138,"ijā":2772,"iog":37,"iok":51,"iol":177,"iom":40,"ipa":229,"ikā":1450,"ipe":167,"iov":23,"ir ":14672,"imē":165,"iru":216,"irs":599,"irt":236,"ilī":94,"irr":24,"iro":905,"irm":929,"irn":124,"irk":171,"iri":558,"isi":459,"ikš":84,"ish":36,"inā":3097,"ise":117,"isc":158,"isb":151,"isa":605,"irz":314,"cīt":112,"cīn":190,"cīj":23,"cīk":97,"imā":119,"ire":99,"irg":402,"irb":90,"ira":259,"ird":185,"irc":39,"it ":228,"ilē":113,"cīz":63,"itū":160,"ivī":107,"izā":483,"dēļ":202,"ja ":7647,"itā":1332,"irī":150,"dēs":22,"dēt":124,"dēv":144,"dēj":602,"dēn":29,"dēm":188,"cīņ":106,"dēl":80,"isā":208,"irē":31,"iz ":254,"ivā":236,"itļ":164,"izz":43,"izu":55,"izv":798,"itī":222,"izr":274,"izs":752,"izt":200,"izp":558,"izo":166,"izn":131,"izm":1077,"izl":692,"izk":139,"izj":45,"irš":41,"izi":513,"izg":245,"ize":159,"izd":376,"izc":301,"izb":41,"iza":170,"itē":104,"isī":84,"eģi":394,"kij":77,"kil":129,"kia":33,"kie":620,"eģe":52,"kin":233,"ļās":38,"kip":24,"kir":36,"kis":66,"ļām":48,"keā":336,"km ":504,"kga":63,"ki ":1566,"kgr":28,"kaļ":69,"kaķ":31,"kho":57,"kaņ":374,"kej":204,"kel":71,"ken":50,"kes":27,"ker":86,"ket":457,"ļā ":507,"fāt":30,"ke ":111,"kci":508,"kdi":30,"kra":920,"kre":163,"klē":111,"kt ":222,"eīn":30,"klā":396,"ksa":250,"kse":83,"kmē":88,"ku ":2253,"kro":296,"klī":65,"kru":265,"kri":985,"koz":38,"kov":64,"kot":513,"km²":49,"kos":513,"kor":259,"kop":1110,"koo":61,"kon":1216,"kom":1199,"kol":578,"kok":286,"koj":143,"koh":29,"kog":34,"koe":27,"kod":306,"ks ":1859,"kme":260,"kne":183,"klu":183,"kls":75,"ko ":1627,"kle":136,"kla":494,"klo":153,"kli":573,"ļēj":87,"dīš":155,"još":397,"jus":320,"jum":2601,"jur":89,"jvi":27,"jsi":45,"būv":217,"būs":22,"būt":380,"eču":47,"ju ":3050,"dīg":518,"dīj":452,"dīn":38,"dīt":668,"dīz":28,"jra":36,"eče":56,"kaz":48,"kav":162,"kat":1010,"kau":385,"kar":1806,"kas":6913,"kap":301,"kan":621,"kal":1136,"kam":395,"kaj":1054,"kak":37,"kai":2823,"kad":430,"kab":198,"ka ":3723,"juš":280," Ga":455," Bā":54," Ge":158," I ":103,"guļ":52," Fo":211," Fu":230," Fr":535,"ļš ":124," Fi":190," Fl":70," Ha":441,"aķu":37," He":345," Bē":72,"aķi":22,"guš":31," J ":40," Go":172,"aķe":127," Gr":516," Gu":162," Gv":31," Gi":68," Gl":86," Ig":193," Dā":72," Ie":156," Id":25," Ib":23," K ":22," Cē":86," Hu":54," Bī":48," Ho":304,"ha ":116," Hi":232," Je":179," Ja":604," Iz":255," Iv":35," Dē":23," Ir":159," Is":56," It":130," Im":77," In":594," Ik":26," Il":59,"ham":85," M ":45,"han":159,"hai":42," Ka":1303,"hal":100," Ke":174,"hau":35," Ki":234,"har":152,"has":51,"hat":24,"aļ ":30," Jo":237," Ju":233,"hae":23," N ":55," La":2100," Le":272," Gā":24," Li":1175," Kl":266," Ko":784," Kr":1044," Kv":49," Ku":433," Ma":1344," O ":55,"aļu":335," Mi":483,"aļv":40," Dž":308," Me":654," Hā":28,"he ":281," Lo":254,"aļa":796," Ly":24," Lu":187,"aļi":184," Ne":334," P ":63,"а ":100," Na":328," Ni":163,"cā ":43," Mo":496," My":39," Mu":257," Hē":35,"hek":42,"hel":84,"hei":58," A ":102,"С ":41,"het":54,"her":196,"heo":49,"hen":64,"hem":51,"cāk":136,"cām":35,"hi ":25," B ":49,"cās":52," C ":203," Ap":260," Am":327," An":736," Ak":149," Al":533," Ai":198," Ag":50," Ah":36," Af":50," Ac":82," Ad":148,"aļā":556," Ab":144," Ba":1003,"aļē":89," D ":68," Az":34," Av":74," Au":758," At":403," As":243," Ar":519," Be":444,"hie":34,"hid":182,"hic":23,"hib":51," Bi":327,"hip":128,"hin":98," Bl":114,"him":44,"hil":44," Bo":338,"hij":70," Br":524," Bu":238,"his":79,"hit":143,"aļķ":22," E ":56," Ca":270," Ce":344," Ci":203," Ch":131," Cl":40," Cr":39," Co":292," Cu":37," F ":24," Da":629," Di":673," De":398," Dr":102," Do":202," Dz":227," Du":106,"hn ":22," Eb":22," Ed":95," El":193," Ek":92," Ei":676," Eg":26," Et":48," Es":44," Er":98,"hlo":75," Eq":185," En":121," Em":90," Ez":43," Ex":28," Eu":85," Ev":42," Fe":226,"aņa":180," Fa":110," H ":140,"aņd":30,"gma":49,"go ":413,"gme":45," Rī":858," Pū":23,"glu":49,"glo":94," Sā":102,"gls":48," Z ":22,"gle":241," Rē":60,"gli":344,"gla":305," Wo":56," Wi":95," We":44," Rā":30," Wa":65,"й ":60," Rū":43,"gog":33," Zu":31,"god":68," Zv":222," Zo":72,"aģē":26," Tī":49," Ze":604,"gno":38," Zi":551,"gni":35,"bāš":52," Za":124," Sī":40," Tē":78,"gna":34," Yo":26," Tā":889," Sē":77," Vī":58,"gs ":1150,"glā":76,"о ":42," Vē":121,"н ":27,"gol":71,"gon":119,"gos":60,"gor":256," Vā":354,"got":83,"gov":43,"gu ":870,"gnā":78," a ":72,"р ":25,"glī":122,"gro":67,"gru":596,"gra":755,"gt ":56,"gri":552,"gre":213,"aīs":153,"gtu":35," R ":37," Jē":211," Ov":27," Os":199,"gto":46," Ot":132," Or":178,"gts":50," Op":65," Po":516," Pl":245,"gum":401," Pi":541," Ph":55,"gul":224," Kā":157,"gua":28," Pe":329,"gub":86," Pa":1447,"gud":66,"gst":733,"gnē":109,"gsn":33," Nu":37," No":486," Ol":289," Ok":85," On":42," Oh":28," Og":53," Od":50," Jā":93," Of":27," Ob":41,"gta":111," Ra":371,"д ":27," Lē":28," Qu":23,"goš":45," Lī":215," Ro":565," Re":681,"grā":671," Ri":308," Rh":25," Mā":133," S ":101,"guv":148,"gur":196," Pr":570," Ps":28,"gus":188," Pu":147,"gun":194,"gvi":84," Lā":40,"gva":31,"bēr":112," Sy":39,"grī":79,"bēt":95," Sv":220," Su":203," St":793," Ta":1092," V ":48," Tj":24," Th":177," Ti":329," Te":399," Tr":346," Lū":27,"ļūt":29,"ļūs":33," Nī":78," To":521," Mē":139," Jū":171," Ru":196,"ļūd":92,"grē":114," Sa":1313," U ":30,"е ":61," Sh":52," Si":395," Sc":134," Se":442," Nā":75," So":290," Sp":307," Mī":29,"bēj":25," Sk":316," Sl":112," Sm":70,"bēm":82," Sn":22," Pē":221,"grū":45," Uz":96," Va":790," X ":41,"и ":54," Ve":526," Vi":1347," Vl":38," Pī":38," Vo":205," Vu":40," Tu":361," W ":26," Pā":146," Ug":37," Uk":57," Ul":29,"gzd":28," Un":191," Up":44," Ur":133,"gzn":376," Mū":173," ja":682,"cēš":42,"iak":26,"ь ":24,"iam":59,"ial":119," iz":4946,"ian":220,"ias":27," je":1511,"iat":108," im":336," in":1344," ik":136," il":190,"ic ":183," dē":302," iv":22," is":92," it":111," cī":105,"iag":25," ir":14600,"ibl":76," ka":7945,"ibi":285,"ibo":30," m ":158,"ibr":57," ki":241," fā":22,"ibu":58," ke":72,"iaz":26," jo":407," dī":55,"id ":25,"iba":66," bū":407,"ibe":125," ju":119," bē":90," ha":132," he":218," aģ":61," gl":287," gr":1494," go":110," gu":155,"ia ":270," gs":71," cē":148," id":175," dā":101," ie":4455," ig":94," hi":297," hl":48," ho":304," bī":86," hr":108," ht":33," hu":35,"iet":4005,"iev":1792," ni":63,"iez":227,"iel":3446,"iem":8120," ne":2383,"ien":7676,"iep":380," na":986,"ier":1197," p ":50,"ies":2658,"iee":100,"ied":2745,"ieg":992,"iek":4508," mu":445,"iej":39,"iea":35," mo":718,"iec":1164," mm":35,"ieb":72," ok":512," ol":76," on":40,"dāv":49," og":135,"ifo":180,"dām":160," of":307," jā":140,"dān":30,"ifs":22,"dās":334,"dāt":198,"dār":218,"ifr":46," ob":243,"dāf":28,"ife":75,"dāl":53,"dāj":251,"ifi":245," nu":108," no":9749," gā":212," le":458,"icr":40,"ics":32,"ict":31," li":2852,"icu":63," n ":74,"ico":88,"ick":37," eļ":37,"icl":22," la":3298," kv":261," ku":4521,"ici":543," cū":37,"ich":68,"ice":118,"ie ":1698," km":554,"ica":238," kl":531," kr":1914," ko":4523," me":1393,"idz":312," dž":28,"idu":626," mi":1711,"ids":336,"я ":56,"idr":632,"ido":1694,"idp":27,"idm":53," ma":1802,"idn":29," lu":115,"idi":317," lv":27,"ide":638,"idd":33,"ida":1167," lo":437,"dā ":1681," af":23," ag":155,"aša":197,"idā":226," ab":209," ac":105," ad":392," am":414," an":722,"aši":302," ap":4517,"ašn":47," ai":846," ak":503,"ašl":70," al":593," av":171,"ašs":53," au":2262,"ašr":63,"icī":260," ar":6236,"ašv":86," at":4655,"ašu":167," as":496," d ":35," ba":1482,"idē":338,"il ":50,"ija":12639," bi":2895,"ije":54," be":1390,"iji":165," bo":133," bl":146,"idī":348,"ijs":425," bu":382,"iju":1715," br":766," ca":410," e ":62,"im ":312,"ika":3320,"ige":58,"iga":284,"ii ":23,"igl":56,"igm":40,"igh":40,"igi":68,"igu":73,"igs":27,"igr":58,"igo":33,"ign":102,"ij ":163,"igz":407," b ":53,"iha":58,"ihi":54,"ihs":70," Zī":28,"iho":104,"ibī":64,"ik ":182,"icē":211," c ":63," er":23,"imo":88,"imn":195," et":118," es":227,"ims":83," en":251," em":85," ep":55,"imp":593,"idž":50,"imf":36," ei":45," el":849,"ime":522," ek":494," ef":77,"imi":811,"ieš":1802," fe":288,"ip ":24,"inc":281,"ind":539,"ina":1252," fa":282," ez":655,"imt":1282,"imu":237," ev":61," fu":829,"inn":34,"ino":729," fr":351,"ļļu":39,"inr":24,"int":1230," fo":740,"ins":796,"inf":318,"ašā":181,"ine":633," fl":69,"inh":61,"ing":589,"iež":570,"inj":23," fi":803,"dāš":37,"ini":1119,"ink":89," ge":43,"cī ":27," bā":233,"ioa":45," ga":5014,"iod":270,"ļļa":40,"inu":278,"inv":33," i ":44,"iko":225,"ikn":25," cm":50,"ikm":90,"ikl":215," co":68,"iki":144,"ikg":62," ce":962," ch":25,"ike":59,"ikd":24," ci":2295,"ila":297,"ilb":52," da":4272,"in ":197,"ieķ":378,"ikv":30," cu":46,"ikt":677,"iku":1045,"ikr":195,"iks":380," do":494,"ilp":256,"ilo":443,"ill":168," dr":234,"ilk":121,"iln":265,"ilm":237,"ieņ":198,"ilh":58,"ilg":175," de":1205,"ilj":175,"ili":579,"ild":409,"ilc":81,"ieļ":23,"igā":206," di":3087,"ile":125,"ima":242,"imb":308,"ч ":32," g ":36,"io ":204," eb":89," du":128,"ilz":47,"idū":60," dz":3456,"ils":2044,"ilt":346,"ilu":127,"ilv":752,"ль":27,"hs ":112,"bīg":65," sū":56,"bīd":30," vē":1059,"ми":22,"bīb":430,"ло":25," vā":1227," tī":340," zo":142,"ла":30," zu":23,"ле":23," rū":113,"ли":29," zv":544,"hok":186,"hol":226,"hom":35,"hon":30," za":193,"ко":45,"hos":37," ze":740,"hot":32," zi":2010,"hop":25,"hor":75,"ка":54," tē":247,"ки":52," sī":84,"hni":146," sē":228," tā":1796,"hno":151,"aņu":241," pū":63," rī":133,"aņe":35,"hme":29,"ин":34,"aņi":61," ww":36,"ий":38," sā":685,"ич":34,"aņo":46,"ри":31,"ро":53,"ра":42,"ре":36,"htt":39," zā":53,"hst":50,"ос":22,"ор":37,"ол":32,"aņģ":36,"ов":87,"hu ":43,"ое":22,"aņē":59,"нс":22,"но":31,"hro":158,"aņā":78,"ни":29,"hri":23," tū":103," vī":192,"ht ":30,"на":44,"bīr":30,"bīt":96,"bīs":120,"bīn":33,"cēt":118," ru":206," jū":849," mē":678," u ":116," sa":6813," nā":219," sf":46," se":1483," sc":42," si":1753," sh":33," sn":112," sm":289," sl":596," sk":1357," mī":111," sp":2239," so":351,"ве":29," qu":23,"ви":41," lē":170," t ":61,"во":24," ra":2342," kļ":152," re":2229," mā":826," ri":726,"cēn":32," kņ":49,"cēm":41,"cēl":179," ro":913,"cēj":228," lī":2107," pu":1266," pr":3156," ps":120," s ":86," lā":230,"ļņu":56,"ва":29," os":114," gū":38," ot":330,"ļņi":27,"hum":50," op":221,"ав":30," or":1147,"ан":46,"ļņa":76," jē":154,"ал":24," pe":651,"cē ":120," kā":2571," pa":8411,"ар":38," pc":24," pl":1389,"ая":29," po":1148," pi":5908," rā":73," x ":60," va":7044," ve":2127," pē":1285," uz":2903," vo":159," pī":83," vu":52," vi":7158,"ес":31,"ер":46,"ен":52," tv":38," tu":790," mū":750," ut":55," ur":52," up":232," un":10091," ug":66," pā":1765," ta":1745," st":3025," kū":39," sv":412," su":879," nī":29," tr":1203," lū":49," to":1082," th":155," ti":3365," te":2144,"aāt":29,"fi ":28,"fes":176,"fer":133,"feo":25,"fed":168,"ņi ":361,"feb":35,"fen":84,"fek":185,"fel":37," Ča":35,"ņin":63," Či":48,"ņie":244," Če":124,"fga":33,"faz":23,"fas":28,"ezī":69,"far":40,"fan":93,"fak":97,"fal":45,"fai":36,"ņbr":28,"fab":102,"ņem":387,"ņda":33,"fe ":34,"ņos":47,"ņot":124,"ņoj":49," Čī":26,"evē":231,"evī":174," ēz":45," ēt":31,"ņra":135,"fa ":173," Ēr":64,"ņs ":24," ēd":44," ēr":63," ēk":65,"esē":107,"esī":292,"erņ":62,"etā":683," Ču":25,"ez ":111,"erē":156,"erī":393," če":400,"ņji":23,"erģ":206," ča":23,"esā":195,"ezv":29,"ezu":188,"evā":40,"eza":38,"ezd":35,"etē":183,"ezm":33,"ezn":141,"ezo":141,"ezp":31,"etī":89,"erū":27,"ezt":29,"eze":817,"ezg":53,"erš":24,"ezi":201,"ezk":125,"etb":351,"eta":734,"ete":624,"eti":845,"elš":89,"eth":26,"etn":170,"etl":29,"esp":338,"emī":84,"esn":43,"eso":286,"est":937,"ņoš":22,"esu":212,"enč":28,"esr":33,"ess":340,"enē":213,"esv":35,"ev ":24,"epā":130,"emš":50,"eto":1378,"enī":969,"etr":698,"ets":222,"ett":58,"etu":1584,"etv":168,"ew ":38,"eve":83,"eva":376,"evo":146,"evn":29,"evi":1543,"enš":59,"eut":31,"eus":24,"ex ":26,"erā":709,"evu":397,"evr":24,"evs":91,"ey ":55,"epe":71,"ekā":290,"epi":258,"epj":76,"eph":37,"er ":1104,"epa":258,"eot":23,"egū":271,"eos":34,"eor":322,"ņu ":2105,"eom":53,"eol":176,"eop":39,"eon":61,"elā":1125,"es ":8988,"ept":197,"eps":32,"epu":444,"epl":47,"epo":71,"epr":190,"erk":66,"erl":157,"eri":2539,"erg":167,"ere":536,"emā":318,"erf":37,"ekļ":537,"erc":178,"erd":99,"era":1626,"erb":238,"et ":1093,"elē":112,"esk":278,"esl":110,"esm":388,"enā":1113,"ņus":89,"esi":374,"ekš":656,"esc":24,"ese":289," Ēģ":41,"esa":337,"erz":36,"ery":29,"erv":331,"eru":391,"emē":508,"ņve":32,"err":36,"elī":103,"ert":417,"ers":1648,"ern":467,"erm":855,"erp":67,"ero":595,"eki":366,"ekl":427,"ekm":150,"ekn":62,"eko":416,"ekr":305,"eks":1397,"ekt":1750,"eku":811,"ekv":109,"en ":402,"elb":111,"ela":1449,"eld":81,"egā":92,"elf":85,"ele":1367,"eli":980,"elj":22,"elg":122,"elm":98,"eln":210,"elk":50," Ģe":106,"ell":209,"elo":182,"elp":198,"elu":464,"elv":59,"els":567,"elt":409,"elz":306,"eo ":63,"emb":176,"ema":412,"edž":46,"emg":143,"ehā":103,"eme":2607,"emd":28," ģe":323,"emn":25,"emo":348," ģi":768,"emi":389,"emj":71,"emt":133,"emu":60,"emp":493,"ems":89,"ep ":41,"ene":1004,"eng":203,"enb":97,"ena":2039,"end":388,"enc":397,"eno":1358,"enp":40,"enm":45,"enn":52,"enk":331,"enl":88,"eni":1040,"enu":734,"env":1058,"ens":2464,"ent":2847,"ehī":26,"enr":79,"enz":110,"egš":47,"eog":110,"eof":26,"ejā":89,"eod":60,"egl":203,"ego":210,"ege":31,"egi":29,"eaģ":26,"egr":83,"egs":36,"egt":159,"egu":482,"egv":44,"ehn":276,"eho":29,"ecā":200,"ehi":58,"ek ":1542,"eib":23,"eic":317,"ecē":53,"ecī":715,"eis":229,"eir":180,"eim":142,"eil":42,"ein":253,"eih":26,"eik":735,"eie":54,"eid":2544,"eig":207,"edā":218,"eja":808,"el ":113,"edē":86,"eiz":614,"eit":215,"eiv":56,"ejs":177,"ebū":23,"edī":123,"ejo":81,"ejn":76,"eji":80,"eke":31,"ekc":120,"eka":402,"em ":6412,"eju":260,"ejv":23,"gaž":22,"git":41,"gis":49," īs":256,"gin":40,"aģe":63,"gie":240," īp":415,"aģi":37,"ght":32," Īr":60," Īs":47,"gaļ":93,"bās":254,"bāt":57,"bāz":178,"gi ":519,"bāj":44,"bāl":44,"bāk":295,"bān":43,"bām":208,"gen":244,"ger":118,"ges":34,"gel":54,"gej":38,"bā ":646,"ge ":89,"gab":646,"gad":2505,"gai":779,"gas":1682,"gar":786,"gau":305,"gat":362,"gav":387,"gaj":108,"gam":98,"gal":1339,"gan":1451,"ga ":1326,"frī":26," ķē":68,"aēl":30," ļo":206," Ķī":169,"frē":39," ļa":118,"žņu":81," ķī":292,"Ņuj":43,"fut":631," Ļe":28,"fta":36,"fun":189,"ft ":61,"fra":303,"fre":65,"fri":213," Ķe":55,"fu ":37," ķi":28,"fro":69,"aču":132," ķe":273,"for":1089,"fos":41,"fot":77,"fon":280,"fol":75,"ņak":24,"ņam":33,"ņas":543,"ņaz":78," ņe":38," Ņu":39,"fs ":175,"ņai":51,"ņa ":1018,"fle":24,"fla":25,"fli":29,"flo":42,"fic":294,"fig":49,"fij":235,"fil":435,"fik":181,"fin":189,"fir":37,"fis":214,"fiz":287,"da ":3140,"de ":574,"dac":25,"dab":346,"dak":25,"dal":823,"dai":150,"dag":58,"dae":523,"dat":711,"das":2562,"dar":1471,"dap":69,"dan":60,"dam":271,"dau":771,"dda":35,"cul":28,"cum":72,"cty":34,"cto":75,"cti":74,"cte":48,"cus":46,"cyo":60,"ceļ":571,"cle":31,"co ":36,"ciā":742,"cog":76,"con":50,"col":50,"com":55,"cor":52,"cop":28,"cot":97,"ciņ":28,"cs ":85,"ct ":32,"cro":67,"cu ":436,"cea":28,"ch ":124,"cer":171,"ces":699,"cet":115,"cen":1090,"cep":69,"cek":125,"cem":35,"cel":374,"ced":32,"ci ":164,"cha":72,"cia":105,"ck ":59,"cie":767,"che":97,"chi":53,"cho":24,"chn":24,"civ":69,"cij":3939,"cik":179,"cil":946,"cim":32,"cif":53,"cir":29,"cis":561,"cit":901,"ciu":34,"cin":303,"cio":768,"cip":250,"cm ":47,"cke":32,"cka":44,"ed ":130,"eba":49,"ebe":168,"ebi":87,"ebk":175,"ebo":22,"ebr":247,"ebs":44,"eak":105,"ean":79,"eal":83,"ear":51,"eas":36,"eap":54,"dzī":2001,"eat":203,"eau":46,"eb ":1292,"dzā":72,"dzē":373,"ea ":66,"efi":131,"efo":131,"efa":30,"efe":198,"ei ":338,"ega":166,"efs":31,"eej":99,"een":27,"edi":335,"ede":1042,"eda":646,"edz":1195,"eds":41,"edu":254,"edo":120,"edr":685,"eck":45,"ech":44,"eci":633,"ece":209,"eca":89,"ee ":59,"ecu":116,"ect":70,"ecp":24,"eco":62,"drī":349,"dz ":1238,"doš":285,"drā":113,"dy ":22,"dvi":33,"dve":34,"doņ":93,"dur":107,"dus":742,"dva":54,"duš":31,"dzv":49,"dzs":170,"dzu":124,"dzo":52,"dzn":78,"duā":33,"dzi":2913,"dze":1097,"dza":294,"dor":118,"don":402,"dom":421,"dol":232,"dok":188,"dow":29,"dov":34,"dot":762,"dos":455,"diņ":89,"dpo":22,"ds ":1372,"dmi":319,"dne":44,"dni":257,"diā":75,"dob":64,"dod":87,"doe":23,"doj":533,"dnī":68,"dun":24,"dum":173,"dul":78,"duk":158,"dug":33,"dub":73,"dua":23,"duc":94,"dri":640,"diž":52,"dra":350,"dre":139,"du ":1832,"dro":640,"drs":87,"dru":292,"dsi":206,"dsk":23,"dsm":48,"daļ":1548,"dic":163,"dia":195,"dib":246,"der":1382,"des":875,"det":70,"dev":375,"deb":147,"dea":22,"ded":49,"dec":38,"def":128,"deh":29,"deg":170,"dej":161,"dei":89,"del":244,"dek":100,"den":1101,"dem":139,"dep":138,"deo":95,"di ":661,"deļ":51,"dko":25,"dma":88,"do ":341,"deņ":285," Āz":143,"div":810,"diu":34," Ār":50,"dim":118,"din":621,"dio":398,"dip":35,"dir":59,"dis":577,"dit":40,"die":2511," Āf":113,"dif":53," Āg":35,"dig":41," Ād":23," Āb":22,"dij":481,"dik":129,"dil":24," ār":250," āt":154," āp":26,"daž":632," ād":29,"ižu":32,"rgu":304,"raļ":69,"rhe":62,"raķ":99,"rha":55,"rhi":273,"rbī":529,"māz":34,"raē":35,"māt":506,"iža":92,"rga":943,"jš ":26,"ri ":1063,"ižk":30,"rgi":83,"iže":26,"rge":111,"rbā":31,"rgs":168,"rgo":78,"ret":778,"res":696,"rev":103,"reu":25,"rez":384,"rfa":28,"māc":507,"māj":191,"māk":627,"māl":154,"mām":145,"mān":158,"rfo":45,"mār":102,"mās":220,"rdu":246,"rds":349,"rg ":40,"rdz":219,"reb":22,"rea":246,"ree":37,"ref":132,"rec":144,"red":582,"rei":822,"rej":271,"reg":233,"rem":185,"ren":387,"rek":208,"rel":259,"rer":29,"reo":23,"rep":144,"mā ":1508,"rda":365,"kļu":259,"rct":35,"rdo":96,"rdn":63,"rdi":228,"rde":305,"re ":399,"ņām":78,"ņās":22,"rbu":231,"rbs":88,"rco":80,"kļo":52,"kļi":136,"rci":226,"rch":63,"rce":59,"rca":34,"kļa":421,"rax":25,"raz":76,"rd ":116,"rap":69,"rar":28,"ras":4121,"rat":1049,"rau":810,"rav":242,"rbi":268,"rbo":531,"rba":232,"rbe":87,"raj":717,"lēš":123,"ņā ":197,"rai":643,"rah":53,"rag":283,"ran":1071,"ram":1238,"ral":520,"rak":1567,"rab":128,"raf":152,"rad":1141,"rac":100,"rpt":521,"rpu":97,"rpr":64,"rpp":23,"rpo":72,"rs ":2853,"rpe":30,"rpa":43,"riķ":45,"riņ":229,"rpl":24,"rpi":90,"rkā":93,"ror":63,"ros":500,"rot":765,"rom":435,"ron":900,"roo":23,"rop":1052,"roz":140,"rou":41,"rov":266,"rob":624,"roa":30,"rod":1744,"roc":512,"lī ":122,"roj":521,"roi":35,"riģ":56,"rol":216,"rok":423,"rof":236,"roe":58,"rog":545,"rno":46,"rns":56,"rnu":187,"rp ":573,"rgļ":35,"rna":307,"rež":135,"riā":400,"rne":333,"rni":347,"māš":32,"rmk":26,"māņ":51,"rmo":304,"rms":203,"kņu":24,"rmu":139,"ro ":308,"kņa":79,"rma":773,"rme":418,"rdž":57,"rmi":499,"reš":174,"rls":24,"rlo":32,"rli":126,"rgā":217,"ižā":28,"rld":35,"rle":24,"rla":201,"rn ":84,"rku":265,"rkt":251,"rks":248,"kļū":96,"rkn":35,"rko":61,"rki":73,"reģ":361,"rkl":58,"rke":63,"rka":377,"rbū":24,"reč":45,"rdī":30,"rja":44,"reā":83,"raž":305,"rje":48,"riz":128,"rdē":22,"rip":74,"rio":330,"rit":1293,"ris":2568,"riv":81,"riu":29,"rih":80,"rig":98,"rij":2366,"raš":66,"ril":99,"rik":885,"rin":683,"rim":290,"ria":161,"rib":107,"ric":188,"rid":231,"rie":3740,"rif":114,"rdā":161,"rk ":63,"roš":448,"mēd":92,"rož":23,"rsā":44,"mēj":294,"mēm":78,"mēl":71,"mēn":131,"mēs":84,"mēr":1231,"mēt":199,"rsē":59,"rtā":193,"rpā":40,"rud":44,"ruc":33,"rur":31,"roī":36,"rup":644,"run":357,"rum":1852,"ruk":350,"ruz":42,"rpē":106,"rus":878,"rut":84,"rva":432,"mē ":626,"rvi":327,"rve":210,"rvs":22,"līč":64,"roņ":62,"rvu":63,"ry ":107,"rsk":108,"rsl":46,"jūl":25,"rsi":451,"rkš":28,"rso":354,"rsp":142,"rsm":215,"rsn":57,"jūd":35,"rsd":23,"rsa":148,"rse":69,"rnā":130,"rta":1012,"jūt":39,"rst":685,"rss":51,"jūr":881,"rnē":53,"rsv":73,"rsu":128,"rtl":32,"rtn":59,"rto":248,"rnī":124,"rte":168,"rth":30,"rti":792,"rts":333,"rtr":97,"roč":76,"rtu":328,"līm":186,"līn":460,"riš":83,"līj":172,"līg":392,"līc":164,"līd":1562,"līb":213,"rt ":437,"līz":66,"līv":121,"līt":444,"rri":36,"rre":54,"rmā":762,"rra":96,"ru ":2726,"rmē":77,"rlī":45,"sab":330,"sac":442,"sad":310,"sag":154,"sai":857,"mēš":100,"saj":138,"sak":466,"sal":1082,"sam":248,"sbi":144,"rzī":41,"sap":162,"san":223,"sau":2277,"sat":285,"sas":1660,"sar":747,"sav":1100,"saz":48,"sa ":1042,"mēģ":49,"rsū":33,"rvē":141,"ruš":59,"ruņ":155,"rze":250,"ruā":39,"rza":50,"rtē":197,"līš":88,"rvā":75,"rzs":23,"rtī":278,"rzi":284,"saņ":59,"sho":31,"shi":22,"si ":826,"kš ":48,"sga":35,"nāv":198,"nāt":1807,"nās":1024,"sgr":33,"saī":148,"siz":24,"saž":44,"sie":635,"sid":107,"kšd":149,"sic":34,"sib":36,"kša":264,"sia":56,"sk ":23,"kšt":50,"sit":335,"kšu":53,"sir":69,"sis":1127,"kšs":52,"kšp":164,"sip":27,"sin":756,"kšn":32,"sio":148,"sil":229,"kšl":23,"kšm":75,"sim":594,"sij":423,"sik":99,"kšk":106,"sih":129,"sif":65,"sig":89,"nā ":1513,"sda":48,"sdi":356,"sbu":40,"se ":542,"sca":22,"sce":66,"sci":117,"sch":88,"sco":39,"sev":271,"ser":334,"ses":362,"set":54,"sfa":26,"sez":117,"sh ":43,"nāj":705,"nād":289,"nāc":166,"sfo":68,"nār":126,"nāl":1149,"nāk":422,"nām":427,"sdz":22,"sei":104,"seg":72,"sed":36,"sec":62,"sep":119,"sen":630,"sem":62,"sel":189,"sek":324,"sej":83,"spu":61,"spo":584,"siņ":49,"kšņ":49,"spr":198,"skā":2459,"spe":377,"spl":61,"spi":604,"kšķ":38,"spa":151,"sot":106,"sov":22,"sol":116,"som":91,"son":503,"sor":227,"sos":35,"sod":41,"sof":59,"kšģ":28,"soj":40,"soc":383,"su ":834,"smē":44,"nču":194,"sjū":46,"slī":114,"kšž":25,"smā":173,"nča":33,"sra":53,"st ":664,"slē":278,"mīt":118,"mīn":84,"mīk":32,"mīl":44,"mīg":202,"slā":344,"mīb":243,"mīd":29,"ss ":875,"sli":399,"slo":148,"slu":68,"sfē":192,"sla":592,"sle":81,"ski":1153,"sko":1104,"sks":400,"skr":170,"sku":481,"ska":4650,"ske":410,"sno":53,"kšē":181,"sns":27,"sna":41,"nāš":340,"sni":516,"sng":29,"sne":180,"kšā":24,"smo":229,"shē":40,"sms":242,"smu":340,"so ":75,"sma":1140,"smi":429,"seš":62,"sme":291,"soš":166,"nēt":549,"nēs":72,"nēm":160,"nēj":281,"nēz":87,"stā":3123,"sze":116,"suā":44,"stē":996,"stī":1710,"svā":52,"sse":108,"ssa":247,"sso":61,"ssk":34,"ssi":62,"ssv":26,"kūv":24,"smī":51,"ssp":22,"snē":32,"ste":1133,"sta":3898,"stm":49,"stn":212,"sto":1125,"stp":85,"sti":2606,"stl":22,"stv":62,"stu":1969,"str":3208,"snī":38,"sts":1496,"sud":63,"sub":69,"spā":202,"sug":619,"sul":139,"sum":128,"sup":49,"sun":42,"sus":127,"sur":82,"spē":1745,"suv":25,"nē ":308,"sva":373,"sve":113,"svi":110,"soņ":53,"spī":75,"nēš":36,"tai":828,"taj":702,"tak":124,"tal":256,"taf":30,"tag":177,"tab":171,"tac":309,"tad":222,"tbi":216,"tba":91,"tav":303,"tau":931,"tat":537,"tas":3364,"tar":1920,"tap":52,"tan":395,"tam":934,"tce":25,"te ":724,"tbo":1157,"tbr":38,"tdi":61,"tda":95,"svē":166,"suņ":88,"stū":113,"svī":41,"ta ":6165,"ozā":24," št":206," ši":160,"pa ":730," šo":253," ša":314,"iķu":54," še":87,"iķi":331," Šv":81,"iķa":30," Št":29,"iķe":39," Šo":131," Ši":163," Še":104,"ovī":25," Ša":199,"ovē":187,"osū":27," Šķ":45,"pdr":26," šā":147,"iļu":60," Šī":134,"kā ":4239,"iļi":22,"pci":27," Šā":65,"iļa":27,"pe ":174,"par":3884,"ozī":528,"pat":548,"pas":2092,"pav":354,"pau":149,"paz":430,"pba":51,"pac":124,"pad":181,"paa":70,"pab":25,"pag":415,"pak":786,"pal":445,"pai":59,"pap":138,"pam":682,"pan":195,"pc ":24,"iļā":27,"paļ":30,"pha":38,"paņ":40,"phi":54,"pga":664,"kāz":22,"pi ":142,"pgr":35,"kād":424,"kāb":641,"kāc":170,"kāp":196,"kān":152,"kāt":32,"kās":1168,"kār":1333,"paā":29,"kām":189,"kāl":251,"kāk":39,"kāj":89,"pea":50,"pec":223,"ped":72,"pdz":192,"pen":158,"per":1152,"pet":33,"pes":264,"pei":23,"pel":224,"pek":146,"peļ":35,"pla":1300,"pli":106,"pgā":32,"ple":241,"plo":77,"plu":29,"pka":321,"pko":110,"pja":69,"iļņ":141,"pjo":49,"pju":63,"pie":4300,"pig":22,"paš":762,"pij":105,"pil":2275,"pin":133,"pio":305,"pir":966,"pis":387,"pit":151,"poz":106,"por":630,"pop":247,"pot":153,"pos":221,"poj":154,"pog":70,"pon":210,"pok":25,"pol":767,"pod":51,"ps ":241,"ppu":25,"ppl":37,"piņ":42,"pkā":95,"iņu":475,"iņs":42,"iņo":51,"kāņ":216,"iņi":157,"iņj":22,"pme":52,"pma":68,"iņa":746,"po ":65,"gžņ":89,"pni":63,"pne":24,"psu":31,"pst":387,"iņķ":170,"pta":564,"pse":42,"ņķo":49,"ņķi":80,"psi":158,"psk":78,"ņķa":92,"pso":28,"ptu":237,"ptv":61,"pub":494,"pte":121,"pti":177,"pto":59,"pnī":27,"plū":201,"plē":184,"pra":501,"jīg":32,"plā":202,"pmē":120,"pru":34,"psa":72,"pu ":321,"pmā":37,"iņā":150,"pri":1091,"pre":942,"pro":1797,"plī":101,"poš":65,"pož":46,"prē":65,"prī":60,"psē":31,"pur":145,"pmū":26,"pus":720,"put":341,"pum":233,"pun":290,"ņģē":37,"iņš":323,"pul":349,"poļ":35,"pva":35,"pvi":404,"prā":125," Ņū":40,"ptū":22,"pzi":58,"ptī":28,"prū":42,"ņģe":27,"pzī":361,"lā ":1304,"lār":401,"lās":590,"lāt":209,"lām":367,"lān":215,"lāp":55,"lāj":219,"lāk":1056,"lāg":85,"lād":144,"lāc":180,"lāv":61,"lāz":36,"iša":70,"iši":31,"išu":56," Ži":23," Že":35," Ža":46,"lāč":85,"lāš":22," ža":153," že":29," žu":96,"lāņ":65,"išķ":223,"išņ":36,"ņēm":347,"qua":27,"lē ":420,"quu":124,"que":29,"qui":79," šī":215," šķ":525,"lēd":76,"lēc":25,"lēm":141,"lēn":178,"lēk":87,"lēl":48,"lēj":178,"lēg":138,"lēs":295,"lēt":383,"lēr":42,"lēp":64," šū":130," Ūd":31," ūd":556,"ra ":3874,"ežo":374,"ngo":130,"ngi":47,"eži":331,"ngl":303,"ngv":61,"ežu":144,"ngu":183,"ngr":212,"ngt":36,"ngs":108,"ni ":1261,"nge":139,"eža":263,"nga":229,"ncē":85,"nha":38,"nhi":24,"nhe":54,"neg":80,"nej":92,"nei":201,"nel":264,"nek":293,"nen":247,"nem":97,"nep":336,"neo":76,"ner":698,"net":418,"nes":1590,"nev":218,"ndz":31,"ng ":184,"nea":224,"neb":55,"nec":44,"ned":153,"nfi":27,"nfo":273,"iān":89,"iāl":1004,"nfl":40,"nfr":48,"nez":35,"iāc":169,"nfe":88,"nci":571,"gļi":38,"nce":476,"nch":29,"nca":32,"ne ":1014,"nbu":52,"ndu":273,"ndr":359,"nds":68,"ndo":248,"ndi":727,"nde":437,"nda":780,"gļu":239,"nak":85,"nal":235,"nam":315,"nan":170,"nar":136,"nac":383,"nad":66,"nae":90,"naf":23,"nag":57,"nai":923,"naj":193,"nab":23,"nbe":61,"nd ":206,"nav":456,"nau":117,"nat":306,"nas":3073,"naz":22,"na ":4384,"muļ":23,"muš":43,"hēm":45,"mtā":193,"ntā":395,"nsē":48,"noš":193,"ny ":39,"nvi":1034,"nux":29,"nve":133,"nva":62,"nuk":32,"nul":25,"num":139,"nus":295,"noī":26,"nto":950,"ntu":416,"nts":925,"ntr":875,"nti":950,"nth":28,"nta":799,"nte":768,"nsu":59,"nsv":36,"nsp":162,"nso":88,"nst":540,"nss":27,"nkū":25,"nsf":27,"nse":131,"nsg":24,"nsi":192,"nsl":44,"nsk":554,"nsa":85,"nu ":3157,"nmē":32,"iču":24,"ičs":33,"nrs":66,"iči":25,"nri":78,"iča":38,"nra":85,"nt ":98,"hīd":23,"nkā":234,"niņ":82,"ns ":4213,"noc":58,"nod":505,"noa":58,"nob":42,"nog":200,"nof":35,"nok":292,"nol":424,"noj":304,"nop":85,"nom":610,"non":115,"not":1518,"nos":1301,"nor":435,"nov":804,"noz":696,"npa":23,"niķ":23,"nne":83,"nna":118,"ngļ":211,"nno":31,"nni":41,"nme":24,"nma":29,"iāņ":39,"ežģ":48,"neš":108,"ndž":62,"ežī":26,"ežā":198,"ngā":136,"nn ":32,"nla":94,"neļ":30,"nle":25,"no ":5915,"nke":26,"nki":45,"nkc":180,"nka":173,"nku":121,"nko":53,"gļū":52,"nks":81,"nkt":272,"nkr":126,"nja":39,"ndī":35,"njo":28,"nij":1586,"nig":62,"nif":36,"ndā":228,"nie":3147,"nid":123,"nic":83,"nia":73,"nk ":29,"niz":488,"ndē":98,"niv":242,"nis":2463,"nit":151,"nir":27,"nio":28,"nip":37,"nim":128,"nin":84,"nik":329,"nil":57,"ogs":183,"ogr":713,"ogu":116,"ogi":122,"ogl":96,"ogo":65,"ogn":46,"oga":163,"obā":56,"ņš ":323,"obī":23,"ocē":27,"oho":29,"ohn":25,"oha":113,"odē":120,"ois":41,"oin":31,"ocī":54,"odā":459,"iģi":165,"iģe":27,"ok ":31,"gša":114,"oju":1293,"odī":31,"ojo":586,"oji":109,"oje":183,"oja":1186,"ol ":51,"oce":578,"och":26,"oci":581,"ock":58,"oco":46,"obs":66,"obu":91,"oca":32,"ode":548,"odi":313,"odo":337,"odn":41,"ods":132,"odr":212,"ocy":51,"jā ":4204,"of ":167,"oda":2367,"oel":45,"oef":31,"oei":29,"oen":27,"odz":39,"odu":575,"jām":1042,"jān":27,"ofi":331,"jāj":22,"jās":923,"ofs":60,"oft":50,"ofo":72,"ofe":168,"jād":51,"jāb":32,"ofa":27,"nzē":40,"oal":24,"oak":87,"oba":54,"od ":93,"obo":33,"obr":164,"obl":72,"obj":208,"obi":154,"obe":704,"nsī":306,"ntē":146,"ة ":22,"nza":50,"nzi":41,"nzo":53,"ntī":340,"nvā":46,"ntū":58,"otā":1010,"orķ":25,"orī":50,"opš":220,"jēj":29,"osā":36,"jēd":148,"orē":91,"ows":32,"ovā":78,"gūš":56,"ozu":30,"otī":57,"ozo":189,"oze":115,"ozi":143,"oza":288,"otē":115,"orņ":38,"otu":305,"oti":1556,"oth":36,"ote":602,"ott":59,"olū":125,"ots":723,"otr":369,"onī":135,"oto":694,"otn":222,"onē":180,"ost":506,"gūt":168,"osu":40,"ota":947,"osi":149,"osk":156,"ose":88,"osf":203,"onā":1526,"osp":73,"oss":48,"gūs":114,"gūr":45,"osm":456,"osl":225,"oso":109,"orā":211,"owe":22,"ovi":389,"ovg":31,"opī":120,"ovs":143,"opē":176,"ox ":29,"jē ":37,"ova":643,"ove":137,"opā":322,"oun":55,"ous":25,"our":37,"out":44,"opm":30,"opo":347,"opi":253,"opl":68,"ope":301,"okā":278,"oph":40,"opa":835,"os ":3856,"opu":475,"opr":71,"opt":86,"ops":93,"ojā":198,"or ":99,"oot":22,"oor":77,"ork":107,"orl":52,"orm":1086,"orn":222,"oro":248,"orp":128,"olī":129,"orr":39,"orc":49,"okļ":89,"ord":492,"ore":247,"omā":452,"orf":64,"org":872,"orh":26,"ori":1689,"osa":935,"ort":723,"ors":691,"orv":185,"oru":295,"omē":208,"orz":33,"ory":23,"olā":244,"ot ":1449,"m² ":52,"orb":148,"ora":653,"olē":98,"olb":29,"ola":1498,"old":135,"on ":659,"oli":1155,"oll":82,"olk":110,"olf":57,"ogā":71,"ole":441,"olg":32,"ols":417,"olt":46,"olm":107,"oln":31,"olo":1146,"olu":413,"ogē":50,"oka":339,"om ":55,"oki":85,"okh":49,"oke":542,"okr":192,"oks":506,"oko":208,"okl":145,"okm":47,"okt":89,"oku":309,"ona":1194,"ond":224,"ogļ":73,"onc":136,"onf":111,"one":173,"ong":158,"oni":1530,"onk":159,"onn":56,"ono":626,"onr":30,"ons":1271,"ont":491,"onu":567,"onv":60,"gšē":65,"ony":39,"onz":36,"oma":1047,"ome":546,"omb":95,"omi":730,"omm":51,"omj":98,"omp":487,"omo":191,"omt":26,"omu":284,"oms":200,"op ":56,"la ":3657,"eķu":303,"kuļ":50,"ktū":291,"kuš":110,"eķi":82,"le ":615,"eļd":25,"lce":40,"eļe":30,"eļa":635,"lci":93,"eļi":128,"eļv":32,"eļu":421,"eļo":286,"eļr":165,"gā ":654,"lde":303,"lda":256,"ldo":68,"ldn":139,"ldi":101,"ldv":22,"ldu":71,"lds":71,"ldr":27,"lab":686,"lac":74,"lad":63,"lah":38,"lag":32,"laj":474,"lai":2379,"lak":285,"lan":766,"lam":314,"lap":246,"lar":131,"lat":1524,"las":2361,"lau":462,"lav":247,"lay":44,"laz":38,"lba":32,"ld ":81,"lbe":128,"lbu":171,"lbr":107,"kvi":68,"kve":97,"kva":316,"kuu":25,"kut":36,"kmū":49,"kus":576,"kur":4124,"kup":73,"kun":181,"kum":1594,"kul":627,"kuk":32,"kuj":25,"koš":92,"krā":588,"koņ":106,"kta":560,"kte":51,"knē":45,"kss":117,"kmī":26,"ksp":156,"ksu":120,"kst":1947,"ksk":31,"cūk":37,"ksi":433,"kso":59,"ksn":205,"ksm":118,"ksl":373,"ktr":699,"kts":634,"ktu":492,"kti":662,"kto":408,"ksī":103,"ktē":73,"kuģ":144,"krū":64,"ktī":473,"ksā":78,"krē":127,"ktā":172,"krī":67,"fēr":208,"ksē":49,"lpo":192,"liņ":107,"lps":165,"lkā":113,"lpe":42,"lpi":52,"dū ":66,"ls ":2358,"lpu":76,"lok":186,"lon":250,"lom":194,"lop":98,"lor":215,"lod":1255,"loc":172,"log":309,"loj":39,"liģ":139,"lpa":98,"los":267,"lot":233,"lov":125,"loz":171,"lno":58,"lnk":23,"lni":256,"lne":55,"lob":58,"lnv":34,"lnu":164,"lns":198,"eņo":30,"lmi":75,"eņi":138,"lme":73,"eņe":150,"eņb":28,"lma":269,"eņa":350,"gāļ":23,"lna":214,"lmu":99,"eņu":143,"eņr":134,"lms":33,"ltk":80,"lti":424,"ltn":68,"lto":53,"ltr":29,"lts":217,"lnī":100,"ltu":221,"lud":54,"luc":32,"lub":111,"lug":36,"lpā":54,"lsi":33,"lkš":22,"lsk":140,"lsm":135,"lsn":30,"lso":131,"lsp":34,"lss":53,"lst":1735,"lsu":49,"eņķ":69,"lv ":26,"lta":371,"lte":188,"lkņ":24,"lmā":69,"lu ":1807,"eņē":35,"lnā":79,"lse":64,"dūd":48,"lsa":85,"lt ":35,"lhe":59,"eļā":42,"lgu":44,"lgs":36,"lgt":25,"lgr":44,"lgo":65,"lbā":28,"lge":25,"li ":1126,"gāz":197,"lga":178,"gār":144,"lfr":22,"gās":198,"gāt":113,"gān":110,"gāj":129,"gāk":229,"gāl":68,"gām":215,"gāc":40,"gād":124,"lfa":193,"lez":112,"lev":95,"les":1626,"let":176,"ler":213,"leo":70,"lep":67,"lem":358,"len":329,"lek":1222,"lei":141,"lej":190,"leg":36,"lef":47,"led":254,"lec":71,"lea":23,"lfī":26,"lls":27,"ldū":50,"llu":78,"lo ":480,"lla":140,"lle":160,"lgā":68,"lli":182,"leņ":73,"llo":82,"lko":184,"lku":31,"lks":97,"lka":119,"lke":27,"lki":33,"eļš":95,"leģ":37,"lkl":39,"leč":51,"ljo":123,"ldī":542,"eļļ":42,"lje":30,"ll ":123,"lja":61,"eļņ":24,"lit":895,"lis":1779,"lir":22,"lip":94,"lio":99,"lin":517,"lim":573,"liz":308,"ldē":33,"liv":35,"lic":208,"lid":326,"lia":94,"lib":49,"lik":929,"laš":379,"eļģ":25,"lij":964,"lig":100,"lie":3537,"lif":131,"ldā":34,"ma ":2997,"mac":23,"mai":882,"maj":174,"mak":194,"mad":68,"mag":255,"map":23,"lzī":23,"mar":276,"mas":1459,"mal":305,"mam":184,"man":1880,"maz":508,"mat":1537,"mba":149,"mbl":29,"mbi":138,"mbe":47,"mbr":158,"mbo":287,"me ":570,"mbu":166,"mda":33,"mde":27,"mdo":23,"med":310,"meg":22,"met":1247,"mes":1625,"mer":791,"mem":29,"mel":157,"men":1391,"mei":147,"meh":105,"mek":209,"mez":28,"hār":41,"mfo":25,"hān":110,"lva":502,"lve":648,"lvi":41,"luk":23,"loģ":845,"lup":35,"luo":34,"lum":401,"lut":87,"lus":425,"lur":30,"loī":24,"ly ":28,"loš":29,"loņ":26,"lvo":84,"lvu":57,"lož":83,"ltā":300,"gēn":73,"lsē":1122,"lzi":33,"lza":40,"lzc":242,"lsī":24,"lvā":83,"ltī":58,"lzs":43,"lvē":725,"ltū":225,"mpi":549,"mpe":254,"mpr":55,"mpo":156,"miņ":109,"mpl":189,"mpu":71,"mps":29,"mpt":22,"ms ":3789,"mog":37,"moc":27,"mob":84,"mod":286,"mon":399,"mop":26,"mok":98,"moj":43,"mom":24,"mol":217,"mor":220,"mos":1307,"mot":190,"mpa":147,"ešķ":123,"miķ":31,"mmā":27,"mmē":68,"msa":67,"mu ":2868,"mt ":26,"mtu":58,"mts":51,"mti":48,"mug":77,"mpā":152,"mss":33,"mst":25,"msu":24,"msk":121,"mte":23,"mta":1054,"mvi":24,"moš":35,"mur":78,"mus":558,"mut":78,"mui":109,"mul":121,"mum":318,"mun":151,"mpē":250,"muz":106,"maņ":208,"džu":100,"džs":35,"mga":146,"dža":84,"mi ":937,"maģ":28,"dži":70,"dže":114,"mju":201,"ml ":23,"mje":32,"min":1418,"mio":29,"ešo":30,"mil":392,"mir":718,"ešs":32,"mis":1428,"ešv":29,"mit":475,"ešu":1242,"miz":31,"mic":35,"eša":400,"mig":64,"mie":997,"ešd":35,"mid":33,"mik":313,"mij":472,"maš":145,"eši":317,"mo ":301,"meņ":339,"meļ":1113,"džā":37,"mko":36,"mm ":35,"mni":192,"mna":27,"ešā":114,"mež":103,"mmu":74,"mma":291,"mme":33,"vī ":36,"tšķ":209,"uņi":48,"uņo":84,"uņu":131,"tūt":53,"tūr":1186,"tūk":66,"tūc":93,"vīn":52,"vīt":70,"vīr":237,"vīg":77,"vīd":56,"vīb":202,"vīz":153,"vē ":134,"īču":22,"īča":67,"sūt":95,"vēl":461,"vēk":693,"vēj":244,"vēc":27,"Čeh":54,"vēs":501,"vēt":425,"vēr":790,"vēn":31,"vēģ":76,"Čik":22,"uļo":37,"uļv":73,"uļu":71,"uļi":36,"uļa":32,"vēš":46,"tļa":51,"tļu":59,"tļi":52,"tļo":22,"vā ":578,"vāj":43,"vāc":471,"vāl":70,"vāk":140,"vān":38,"vām":138,"vār":1021,"vāt":107,"vās":151,"vāv":76,"ržu":43,"uģa":52,"uģi":59,"uģu":36,"tņu":72,"ča ":145,"zra":210,"zru":92,"zjū":39,"tīd":188,"zlā":29,"tīb":1094,"zs ":189,"tīt":1190,"tīs":433,"tīv":934,"tīr":77,"tīk":237,"tīn":159,"tīm":268,"tīg":275,"tīj":350,"zte":106,"zti":66,"ztu":134,"čem":249,"ztv":96,"čet":144,"znī":263,"čer":57,"zsa":231,"rūd":56,"znā":215,"zmē":101,"zu ":183,"rūt":48,"zst":360,"rūs":59,"zsv":33,"znē":22,"zsl":25,"zsk":600,"rūk":50,"rūn":90,"rūm":55,"zsp":47,"rūp":88,"zva":673,"zvi":325,"zve":584,"či ":69,"zul":164,"zum":137,"zpē":51,"zus":63,"čie":24,"zsā":72,"tīņ":237,"zrā":47,"tīģ":32,"zzi":39,"zuā":39,"īģe":36,"tīš":166,"zvē":121,"rūš":31,"čs ":40,"zgl":134,"zga":142,"rš ":626,"zi ":317,"uār":73,"uāl":181,"zaļ":79,"zgu":59,"zef":34,"zej":232,"zdz":26,"zeb":60,"zdo":186,"uāc":50,"zes":430,"zen":285,"zem":1268,"zel":450,"zek":170,"zer":903,"ze ":308,"zce":422,"zbr":68,"zda":107,"zde":234,"zci":73,"zab":39,"īļu":88,"zai":49,"tēš":90,"zah":57,"zam":111,"zan":24,"zak":23,"zar":328,"zau":128,"zav":27,"zas":275,"zat":43,"zod":22,"zob":42,"tī ":235,"zos":43,"zot":69,"zor":28,"zop":22,"zom":26,"zon":281,"zol":154,"zof":144,"ziļ":81,"ziķ":104,"zpa":78,"zpr":45,"ال":32,"zpl":262,"ziņ":261,"zkā":32,"zpi":173,"zo ":48,"zma":918,"zme":35,"zmi":84,"zna":38,"zmu":41,"zno":68,"zne":198,"ršā":79,"zni":156,"zbū":81,"zka":34,"zkl":48,"zkr":193,"zla":642,"zgā":25,"zli":78,"zeļ":55,"zeņ":47,"rša":176,"zie":1720,"zid":135,"zij":625,"rši":100,"zin":1174,"zim":1926,"zil":123,"zik":669,"ršo":29,"zio":48,"zcī":73,"ršr":51,"zir":324,"zis":229,"zit":58,"ršu":90,"ziv":58,"tē ":318,"yst":46,"rņā":31,"sīk":51,"sīj":64,"sīm":27,"sīn":31,"sīs":28,"sīt":105,"sīv":85,"sīb":502,"sīd":98,"sīg":56,"tāž":25,"yon":65,"sī ":32,"tēģ":40,"īņā":28,"za ":232,"āža":23,"āžu":50,"īņu":254,"īņa":76,"sīļ":23,"tēk":46,"tēl":335,"tēm":821,"tēn":44,"tēj":348,"tēt":204,"tēs":73,"tēv":63,"tēr":175,"tēz":37,"īša":499,"īšu":52,"tā ":2912,"tāc":148,"tād":527,"tāk":538,"tāl":1306,"tāj":1760,"rķu":27,"rķe":31,"rķi":179,"yla":29,"tāļ":48,"rņa":66,"rņu":41,"tāš":25,"tāv":1332,"tāz":22,"tāp":79,"tām":538,"tān":411,"tās":1054,"tāt":740,"tār":419,"rīz":162,"rīn":188,"rīj":40,"rīk":162,"rīl":26,"rīv":387,"rīs":243,"rīt":190,"rīg":665,"rīb":593,"rīd":236,"rīc":210,"pūt":23,"sāļ":43,"rī ":2361,"rīš":55,"pūš":30,"sē ":157,"sēj":316,"sēk":27,"sēm":89,"sēn":52,"sēd":44,"sēs":30,"sēr":135,"sēt":1211,"ožu":58,"oža":54,"sāt":137,"sās":67,"sār":68,"sāp":24,"sān":37,"pš ":215,"sāc":43,"sāj":28,"sāk":643,"sāl":103,"sām":92,"ožā":27,"pšu":22,"rģi":226,"pša":33,"rģe":23,"rēķ":47,"sā ":333,"rēš":122,"rē ":110,"ww ":36,"āļu":173,"āļi":45,"www":36,"rēt":449,"rēs":31,"rēm":93,"rēn":56,"rēj":380,"rēk":33,"rēl":48,"rēd":88,"rāļ":69,"rāņ":44,"ošā":198,"rāš":23,"ws ":30,"ošī":35,"wor":28,"rā ":1557,"pļa":22,"rāk":869,"rāl":881,"rām":402,"rān":164,"rāg":38,"rāj":67,"rāc":432,"rād":816,"rāf":309,"rāb":117,"wer":27,"nže":53,"rār":31,"rāp":24,"rāv":135,"rās":553,"rāt":492,"rāz":74,"ošs":181,"ošu":115,"oši":415,"ošo":147,"oša":1342,"vze":33,"vvē":32,"āņi":41,"āņu":468,"vuš":26,"āņa":65,"war":33,"pīt":24,"pīr":89,"pīn":23,"pīl":42,"viš":195,"pīg":88,"pīd":81,"pīb":56,"vs ":473,"vri":37,"vst":90,"vsk":63,"vu ":956,"vus":141,"vmū":35,"vum":201,"vuk":61,"vul":71,"vva":94,"voš":86,"pīļ":68,"āša":527,"via":49,"nša":32,"vio":33,"vir":905,"vik":41,"vil":371,"vin":313,"vig":52,"vij":2101,"vic":31,"vid":1832,"vie":6969,"viz":69,"nšu":53,"vit":157,"nšt":29,"vis":1620,"vji":56,"vju":71,"vo ":339,"oņa":59,"veš":46,"oņi":162,"oņu":145,"viā":31,"vna":31,"vni":238,"vič":61,"vod":34,"voj":423,"vol":130,"vok":149,"von":248,"vor":27,"vot":667,"vos":64,"vpa":23,"viļ":118,"vpi":79,"viņ":404,"vpr":25,"vaņ":38,"vaļ":151,"vi ":493,"vgo":31,"ver":992,"ves":531,"vet":45,"vej":30,"vei":2843,"ven":961,"vem":34,"vel":158,"vek":42,"ved":123,"vec":250,"oļu":172," − ":35,"vda":91,"uzņ":254,"ve ":176,"val":3215,"vak":43,"van":198,"vam":39,"var":1672,"vat":135,"vas":1579,"uzē":26,"vab":47,"vad":1354,"vai":3704,"vaj":267,"pēļ":144,"va ":982,"mūž":31,"uvē":31,"uvā":81,"uzv":164,"uzl":72,"uzk":39,"urš":530,"uzi":73,"uzg":36,"uze":128,"uzt":192,"uzs":479,"uzr":64,"utī":62,"uzm":28,"utē":47,"uzb":144,"uzc":22,"uzd":109,"utā":171,"usī":47,"pēt":433,"usē":169,"usā":96,"pēd":188,"pēc":941,"pēj":691,"urģ":36,"pēm":32,"pēl":736,"pēk":384,"pēr":307,"urī":204,"urē":196,"uz ":1335,"urā":908,"umš":52,"uum":27,"upā":104,"upē":50,"ux ":33,"uus":124,"uvi":141,"uvo":31,"uva":357,"uve":337,"upī":23,"uvu":135,"usl":182,"usm":71,"usj":49,"usk":104,"mūk":26,"ukš":120,"usi":609,"unā":343,"usd":32,"use":127,"usa":206,"mūz":468,"usz":100,"usv":36,"usu":107,"ust":2158,"mūs":296,"uss":395,"mūr":69,"umī":24,"uso":49,"mūn":33,"utn":283,"uth":28,"uti":652,"ute":201,"uta":279,"utb":797,"utt":67,"uts":90,"utv":22,"utu":102,"uto":580,"utr":30,"unī":25,"us ":3775,"ulā":424,"oīd":99,"ulē":350,"ut ":97,"urb":65,"ura":1608,"urd":30,"urc":141,"umā":913,"ure":246,"urg":216,"uiž":110,"urj":22,"uri":851,"url":46,"urk":327,"urn":303,"uro":291,"urp":107,"ulī":38,"urs":344,"urt":245,"uru":1149,"urv":97,"umē":24,"urz":200,"unz":34,"ugš":195,"ujā":42,"uor":22,"upa":461,"ur ":573,"upj":23,"upi":127,"ukā":121,"upe":293,"upo":29,"upu":109,"ump":111,"ums":2443,"umu":2190,"umt":47,"umv":39,"umi":1048,"umk":31,"pāņ":98,"umm":30,"uml":29,"umo":1060,"umn":30,"uma":1511,"umb":191,"umd":66,"ume":317,"udž":74,"unt":48,"uns":163,"unr":34,"unv":30,"unu":158,"unl":25,"unk":486,"uni":663,"uno":67,"ugļ":37,"unc":25,"und":348,"una":264,"ung":150,"une":47,"up ":25,"uks":271,"uku":836,"ukt":791,"uko":36,"ukl":91,"uki":42,"ukc":69,"um ":175,"uka":140,"ulv":63,"ulu":136,"ult":666,"uls":122,"ulp":39,"ulo":29,"ulm":42,"ull":58,"ulk":243,"uli":203,"ulg":48,"ule":823,"ulf":35,"ugā":140,"ulc":29,"uld":110,"ula":397,"ulb":50,"un ":9898,"uid":83,"oģe":28,"udā":23,"oģi":861,"uil":24,"uis":38,"ucē":126,"ķa ":175,"mša":94,"uji":27,"ujo":73,"udē":94,"ul ":24,"uja":223,"ubā":54,"ugi":71,"ugo":30,"ugl":76,"pār":1984,"pās":27,"uga":887,"ugu":553,"ugs":799,"uha":22,"uj ":134,"uco":23,"pā ":453,"uda":124,"ude":104,"udi":282,"ķir":550,"ķis":433,"ķin":48,"ubs":106,"ķij":62,"ķim":24,"ubu":76,"ķie":106,"ķid":184,"uca":88,"ue ":35,"uce":66,"uci":96,"uer":36,"pāc":26,"pān":369,"pām":68,"pāj":65,"udu":86,"āču":68,"udr":142,"udo":32,"ug ":37,"udz":752,"uel":29,"ķet":39,"ķes":25,"ķer":270,"ķen":35,"ķel":35,"tuš":25,"ua ":31,"tzī":84,"uar":51,"ual":31,"uan":35,"ubi":59,"ubj":31,"ubl":522,"ube":111,"uba":77,"uag":22,"uc ":316,"ķi ":217,"tvā":37,"tzi":22,"tuā":134,"ttī":266,"trū":52,"ttē":198,"tza":38,"ttā":196,"nīš":44,"tyl":39,"tvē":108,"ķu ":470,"ty ":51,"toņ":39,"tve":395,"toļ":81,"tvi":1976,"tva":158,"tur":1909,"tus":646,"tuv":673,"ķo ":45,"tul":280,"tuk":47,"tun":72,"ķeš":66,"tum":1336,"tub":43,"tud":188,"tuc":22,"tug":34,"tpū":25,"trī":313,"trē":132,"two":22,"toš":435,"trā":1294,"nīc":430,"nīd":58,"nīb":728,"ts ":6404,"nīj":53,"nīg":331,"nīt":121,"nīs":25,"nīr":102,"nīn":27,"nīm":63,"tlē":35,"tre":229,"tt ":106,"oča":27,"tra":1972,"tri":1251,"oči":28,"trs":398,"tru":1883,"tro":2406,"tlī":43,"tu ":4075,"tmē":32,"tsa":92,"tse":174,"lūc":74,"lūd":44,"lūg":25,"lūk":137,"tsk":152,"tsl":24,"lūp":24,"tsp":162,"tsv":27,"tsu":24,"lūt":49,"tst":123,"lūs":138,"lūz":46,"tnē":293,"tta":34,"tte":67,"tti":291,"ttl":22,"tto":22,"tnī":27,"ttp":39,"ķus":35,"tma":80,"to ":1892,"tms":32,"tmo":123,"tml":23,"tmi":87,"tni":753,"tne":504,"tp ":39,"tna":35,"tns":112,"tnu":123,"tno":42,"tof":38,"tod":192,"toc":137,"toj":671,"tog":69,"nī ":207,"tob":116,"tov":52,"tos":547,"tot":998,"toz":23,"tom":540,"ton":588,"tok":294,"tol":346,"tor":2083,"top":326,"tpe":28,"tkā":47,"tpi":87,"tiķ":219,"tpa":63,"tpl":32,"tiņ":164,"tpr":24,"tij":940,"til":601,"tik":1958,"tif":82,"tie":4403,"tig":30,"tir":195,"tit":273,"tis":2277,"tin":593,"tim":104,"tip":378,"tio":312,"tia":75,"lša":104,"tic":277,"tid":70,"teā":78,"tju":24,"tiz":110,"lšu":27,"tiv":158,"tja":44,"tki":59,"tkl":212,"tko":27,"tkr":96,"tku":37,"tka":280,"tli":341,"teņ":82,"tla":152,"tgā":22,"tle":76,"tem":480,"ten":587,"teo":285,"tep":65,"tei":672,"tej":47,"tek":587,"tel":542,"teg":123,"teh":268,"teb":48,"tec":92,"ted":53,"tfo":35,"th ":68,"tet":45,"tes":1146,"ter":2575,"tgr":47,"ti ":2942,"tga":132,"tač":123,"tho":40,"the":191,"thi":29,"taļ":38,"tha":41," α ":31,"āpa":64,"āpe":142,"ākā":884,"āpi":34,"ājē":32,"ār ":34,"ājā":201,"āno":72,"āns":365,"ānu":327,"āre":82,"āmā":78,"ārg":36,"āra":470,"ārb":33,"ākļ":103,"ārd":904,"ārm":72,"ārn":280,"āro":74,"ārp":114,"āri":461,"ārk":89,"ārl":118,"āt ":234,"ālā":1101,"āpu":44,"āps":37,"ās ":6616,"āld":44,"āgā":35,"āle":181,"āli":1326,"āla":1215,"āks":485,"ākt":61,"āku":495,"āko":642,"āki":268,"āka":1716,"ām ":3525,"ājs":680,"ābū":29,"ādī":293,"āju":1139,"āji":502,"ājo":89,"ājp":22,"āni":891,"āna":615,"ānd":34,"āmu":42,"āms":160,"āmi":68,"āma":396,"ālu":633,"āls":579,"ālo":317,"ārē":202,"āsā":40,"āpš":36,"ārī":93,"āvu":149,"žās":33,"ārā":289,"žād":455,"žāk":194,"žām":34,"ātē":54,"āze":265,"ārš":275,"āzi":216,"āzu":50,"zšķ":85,"ātī":67,"ātā":439,"ārņ":54,"āv ":562,"žā ":38,"āta":627,"āst":95,"āsu":62,"ātn":541,"ātr":318,"āto":42,"āte":469,"āti":825,"ārz":92,"āsa":252,"ālī":22,"ārr":75,"ārs":844,"ārt":1313,"āru":144,"ārv":488,"ānā":124,"āsi":27,"ākš":30,"āvd":67,"āva":113,"āpē":74,"āvs":30,"āvo":161,"āvj":71,"āvi":182,"āve":205,"āts":679,"ātu":408,"Āzi":142,"āfi":261,"āfr":63,"āfs":37,"āga":57,"āgs":41,"āgo":45,"āj ":38,"ābē":84,"ācē":37,"āk ":1029,"ādā":677,"ācī":199,"ādē":157,"āja":844,"āje":33,"ābi":54,"ābj":26,"ābe":514,"āba":24,"āca":27,"ābu":87,"āci":2513,"ācb":31,"āda":636,"ācu":337,"ādn":33,"ādo":104,"āde":246,"ādi":416,"āds":129,"ādu":390,"ļin":31,"ļie":252,"ļi ":260,"zņe":149,"ļda":25,"ļav":42,"ļau":478,"ļai":47,"ļas":391,"ļam":83,"ļa ":1215,"āzē":88,"zņē":241,"žīm":24,"žģī":37,"ātņ":41,"āvē":149,"āvā":189,"āvī":87,"AU ":30,"":23,"zēš":55,"BA ":99,"ķī ":22,"ļot":251,"ļos":305,"ļiņ":134,"ļoj":28,"ļri":165,"Ķīn":125,"ļu ":1814,"zīš":29,"ļsk":34,"ļve":81,"ļvi":28,"ļum":33,"ļus":125,"ļuv":123,"ķēr":58,"ķēn":27,"ķēd":39,"ļoš":38,"Žan":29,"zīs":345,"zīt":108,"zīv":1468,"zīl":49,"zīj":29,"zīm":1070,"zīd":65,"zīc":55,"zīb":324,"zīg":242,"AP ":26,"zāc":457,"zā ":57,"ža ":310,"zāk":210,"zāl":65,"zām":26,"zān":65,"zās":75,"žku":26,"zē ":172," С ":34,"žis":66,"ķīd":71,"ķīr":23,"ķīs":84,"žie":84,"ķīn":49,"ķīm":271,"žos":33,"žot":153,"žor":51,"žkā":37,"Āge":33,"žon":63,"žoj":249,"Āfr":113,"zēl":36,"zēj":212,"zēm":74,"zēt":500,"zēs":27,"zēr":47,"žni":23,"žo ":82,"žas":293,"žag":66,"žai":25,"žan":122,"žam":29,"ži ":383,"žer":29,"žes":28,"žet":61,"žei":43,"žel":23,"žen":74,"uža":37,"užu":32,"žus":34,"žur":90,"žoš":80," Н ":25,"žs ":70,"žre":77,"žu ":492,"ušā":109,"ušu":56,"ušo":68,"uši":230,"uša":268},"n_words":[2106271,2428959,2036825],"name":"lv"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/mk b/nlp_resource_data/langdetect/profiles/mk

new file mode 100755 (executable)

index 0000000..a0c736a
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/mk
@@ -0,0 +1 @@
+{"freq":{"D":603,"E":600,"F":432,"G":577,"A":1208,"B":839,"C":1276,"L":579,"M":1021,"N":552,"O":461,"H":549,"I":1773,"J":289,"K":455,"U":346,"T":883,"W":350,"V":551,"P":991,"S":1413,"R":594,"X":347,"f":1092,"g":1689,"d":2970,"e":9834,"b":1224,"c":2969,"a":10833,"n":6781,"o":7437,"l":4802,"m":3009,"j":528,"k":1469,"h":2724,"i":8516,"w":745,"v":996,"u":3216,"t":5815,"s":5501,"r":6845,"p":1803,"z":571,"y":1482,"x":320,"ν":292,"ο":532,"ι":356,"α":455,"ς":427,"ρ":335,"τ":303," m":470," o":776,"ш":16654," i":285," k":375," d":474,"ф":11418," e":367,"х":6666,"ц":21764,"ч":22883,"р":143160,"с":124190," a":559,"т":178546," c":357,"у":55891,"ќ":2840,"џ":1064,"љ":298,"ј":55736,"њ":6876,"ѕ":416," t":538," p":361,"ѓ":4991," s":442,"Ј":1691,"Џ":661,"И":3549,"Л":2399,"К":6325,"Н":3704,"М":7265,"П":6311,"О":4503,"Б":5118,"А":6164,"Г":3754,"В":4777,"Е":2673,"Д":3627,"З":1960,"Ж":390," K":424,"Ш":1184," H":467," I":966," N":433," O":314," L":498," M":897," B":722,"Т":4778," C":1036,"У":965,"Р":4369," A":1024,"С":9342,"Ц":1100," F":403,"Ч":767," G":506," D":520,"Ф":2111," E":476,"Х":1951,"л":91274,"к":111435,"и":261784,"п":63533,"о":278169,"н":193670,"м":56196," S":1135,"г":42718,"в":105270," R":489,"б":32290," P":814,"а":349769,"з":39653," W":317,"ж":11105," V":362,"е":224574," U":304,"д":90920," T":673,"ا":324,"Ист":523," А":5482," Б":5022," В":4654," Г":3616," Д":3069," Е":2489," Ж":385," З":1936," И":3361," К":5975," Л":2339," М":6938," Н":3453," О":4096," П":6133," Ј":1624," Џ":661,"I ":636," б":12052," а":9283," г":16454," в":30954," е":23200," д":18766," з":12782," ж":2284," и":33901," л":4546," к":23687," н":51827," м":12882," п":36928," о":29291," Р":3961," С":8974," Т":4639," У":893," Ф":1931," Х":1928," Ц":1068,"II ":332," Ч":758," Ш":1131," ј":6372," ќ":305," т":10678," у":3946," р":10396," с":42490," ц":3019," ч":2568," ф":5078," х":2102," ш":4226,"Ma":323,"II":479,"Кар":402,"a ":2176,"i ":786,"he":719,"ha":360,"g ":312,"ea":411,"ed":390,"de":624,"di":400,"h ":457,"el":544,"Кра":315,"en":923,"et":387,"es":758,"er":1600,"ca":400,"e ":2587,"da":295,"f ":324,"Кон":322,"co":397,"ci":314,"ch":448,"ce":390,"d ":868,"at":846,"as":511,"ar":1185,"al":1004,"am":363,"an":1614,"ac":442,"ae":314,"nt":684,"of":311,"om":450,"on":1271,"ol":392,"os":424,"ou":298,"op":290,"or":933,"r ":882,"lo":357,"ll":449,"o ":1254,"ma":512,"me":447,"na":717,"nd":628,"ne":472,"ng":429,"ni":597,"m ":843,"km":293,"li":761,"le":704,"la":806,"n ":1564,"hi":303,"id":295,"ic":719,"ia":681,"ie":315,"is":934,"it":584,"il":394,"in":1115,"io":670,"l ":690,"y ":690,"ve":297,"ur":355,"us":735,"to":540,"tr":283,"te":849,"ti":907,"th":673,"ta":699,"st":673,"se":302,"si":446,"rt":285,"ro":643,"ri":940,"re":789,"ra":861,"t ":818,"s ":2376," ја":4740," ју":1544," ќе":299," ра":3709," ре":3839," ри":619," ро":1485," пр":14360," св":1979," си":2281," се":16046," сл":2331," см":1011," ск":603," сп":1778," ср":1095," со":9088," ру":564," са":1270," ти":832," тв":438," те":3329," то":1808," тр":1830," ст":3760," су":653," та":1317," ус":533," уп":512," ун":383," ум":454," ту":815," фо":1103," фр":608," фу":590," фе":667," фи":1284," фа":744," уч":625," уш":328," хр":549," хо":304," хи":314," хе":609," цр":836," ци":311," це":1471," чо":400," чл":393," чи":499," че":954,"На ":392,"Мак":2308,"Мар":480," ше":287," шп":318," шт":2822,"Нов":305,"ад ":2199,"ав ":328,"аа ":1364,"ам ":781,"ан ":3695,"ак ":666,"ал ":2115,"Ово":374,"Ова":565,"аву":1675,"авт":686,"ага":381,"агр":309,"аго":607,"ада":949,"ади":1566,"аде":2265,"адр":313,"адо":1542,"адм":285,"адн":1034,"Опш":342,"аед":445,"би ":515,"ај ":1373,"ажу":426,"аат":3002,"або":1052,"ава":2546,"авн":1461,"аво":532,"аве":1404,"ави":1807,"алс":704,"алн":2980,"ало":848,"алб":313,"ала":1561,"алк":385,"али":3092,"але":2411,"амо":926,"ама":619,"ами":1203,"аме":1650,"ано":1527,"ану":570,"анс":4432,"ант":1418,"анц":867,"ана":3750,"анд":1155,"анг":1211,"ани":6541,"ане":747,"анк":497,"азн":452,"азл":623,"ази":3687,"азб":417,"азв":373,"аза":432,"аин":488,"аку":331,"акт":1164,"ако":4144,"акв":313,"аке":3801,"ака":975,"ач ":549,"Пар":311,"ас ":519,"ар ":2828,"ат ":6463,"ба ":1244,"аш ":464,"Пет":286,"САД":331,"Пол":375,"При":340,"Пре":445,"Про":342,"Рим":340,"Се ":508,"Реп":976,"Алб":429,"Аме":307,"Рус":402,"Ско":700,"Срб":287,"Сев":333,"Све":487,"Сел":334,"Таа":318,"Во ":1246,"Бел":406,"Тур":381,"Тој":871,"Вел":340,"Буг":380,"Гра":387,"Грц":367,"Фил":384,"Фра":322,"Евр":489,"лам":296,"лар":497,"лан":1512,"лас":1834,"лат":1878,"лба":979,"ме ":1911,"ма ":2440,"лав":1747,"лаг":370,"лад":919,"кул":1012,"куп":340,"кув":856,"кци":783,"кој":5826,"кре":290,"кра":1871,"кри":723,"кру":671,"кро":310,"лу ":886,"кса":284,"кси":508,"кст":341,"кте":605,"кти":891,"кто":713,"ктр":386,"кту":473,"кла":474,"кло":480,"ло ":2497,"кле":364,"клу":723,"кни":318,"ког":686,"ков":1275,"ком":1394,"кон":2061,"коп":1132,"кор":1338,"кот":2936,"кое":689,"кои":2339,"кол":2256,"кин":368,"кио":3400,"кит":2518,"ле ":2377,"кви":345,"кед":3751,"ли ":5921,"кај":517,"ква":1424,"кат":5730,"кар":1133,"кан":1422,"как":3097,"кал":885,"каж":406,"каз":294,"кац":351,"кад":630,"ла ":4400,"кт ":520,"ку ":1035,"иња":474,"ко ":6033,"ија":20901,"ки ":14686,"од ":14751,"нај":2709,"нац":749,"нау":554,"нач":1308,"нао":2376,"ог ":396,"нан":329,"нам":682,"нал":1991,"нат":7755,"нас":2005,"нар":1995,"нап":588,"нае":357,"над":781,"нак":346,"нда":719,"нго":310,"нгл":1025,"нга":386,"ое ":669,"неш":642,"нек":1134,"нен":441,"ои ":2794,"нер":1150,"нес":1333,"нет":980,"нег":1025,"нез":398,"нди":607,"ндо":434,"нив":1190,"низ":1413,"ник":2233,"ние":1319,"ок ":1161,"неј":441,"оа ":1059,"ов ":1440,"нав":317,"нт ":615,"мпи":518,"мпе":650,"мот":1353,"мск":1350,"мун":373,"муз":537,"мил":526,"мин":1679,"мио":320,"мис":724,"мир":732,"мит":1061,"миј":737,"но ":8706,"мна":468,"мно":1052,"мод":313,"мож":942,"мон":609,"мор":627,"мај":590,"нд ":382,"мбо":288,"маш":335,"мал":1112,"мак":1628,"мат":1625,"мас":309,"мар":614,"нг ":468,"ман":2367,"маа":339,"меѓ":1150,"мес":950,"мет":2904,"мен":3380,"ни ":9820,"мер":1511,"мед":429,"мвр":974,"не ":1045,"на ":51854,"лош":669,"му ":324,"лни":1621,"лно":1301,"лна":1254,"лок":568,"лог":1288,"лож":284,"лос":414,"лот":1377,"лом":349,"лон":478,"лов":1561,"лоб":414,"луч":790,"луц":507,"лув":419,"луѓ":365,"лст":374,"лск":1346,"лту":424,"лта":406,"лиг":412,"лив":613,"лиз":933,"лим":882,"лик":2508,"лез":472,"лев":497,"лег":370,"лед":1625,"лес":486,"лер":345,"ми ":1067,"лен":3033,"лем":2373,"лек":1469,"лет":727,"лку":522,"мо ":748,"лиц":678,"лич":1268,"лис":1550,"лит":1817,"лиф":510,"лин":1340,"лио":355,"лип":313,"лиј":1726,"лка":359,"пат":1339,"пад":1381,"пак":283,"пан":956,"пар":1020,"паѓ":517,"ре ":437,"оѓа":2306,"ра ":3142,"рв ":449,"пин":286,"пис":1343,"пиш":657,"оја":4079,"ојн":740,"ојо":993,"оју":491,"пла":934,"пли":320,"пле":333,"ро ":695,"пло":389,"рз ":285,"ри ":5094,"пер":1862,"пес":307,"пет":425,"ори":3831,"опј":624,"орд":482,"оре":2005,"орг":1108,"орс":410,"оро":1288,"орм":1326,"орн":663,"опш":1157,"опс":733,"ора":1582,"опе":396,"опи":565,"опо":646,"опр":349,"опа":631,"оте":1213,"отк":367,"оти":883,"ото":5629,"отн":448,"отр":639,"ота":373,"осе":641,"оси":467,"оск":407,"осл":1286,"осн":1425,"осо":623,"ост":6452,"ору":832,"орт":556,"осв":433,"омн":415,"оми":891,"оме":1683,"ома":1343,"олу":1982,"олс":527,"олн":609,"по ":1690,"оло":2164,"олк":576,"олж":334,"оле":2647,"оли":2456,"олг":289,"ола":807,"окр":1306,"окт":286,"око":2063,"онс":2772,"онт":604,"онц":377,"они":3271,"оно":1647,"она":1646,"онг":566,"оне":1032,"омо":661,"омп":789,"ошк":618,"очи":455,"оче":494,"очн":1426,"офи":716,"оце":494,"оци":931,"оцн":311,"ова":2348,"обр":811,"обо":536,"обл":1113,"оби":1201,"обе":598,"па ":1261,"оис":379,"оим":505,"ока":1091,"ој ":6820,"озд":514,"ози":514,"оза":639,"озо":309,"озн":2037,"оиз":367,"одн":2574,"оди":4273,"одр":1508,"одо":1251,"оед":304,"оем":310,"оен":405,"пи ":325,"оет":296,"оже":937,"ове":2546,"ови":5165,"обј":366,"ово":2355,"овн":742,"овр":984,"овс":795,"ога":1201,"оги":878,"огл":313,"ого":815,"огр":1093,"огу":753,"ода":1107,"одг":298,"одв":294,"оде":2517,"от ":21175,"нот":2355,"нос":3399,"ос ":575,"ном":868,"ное":302,"ног":889,"нов":2577,"ор ":1906,"он ":2167,"нкц":314,"нка":519,"нки":331,"мја":559,"ниј":3754,"мји":343,"нир":495,"нис":1095,"нит":3914,"ним":450,"нин":793,"ол ":609,"нио":3455,"нич":1161,"ниц":2089,"нци":1726,"нцу":394,"нув":1506,"нта":1389,"нте":740,"нти":1942,"нто":379,"нтр":628,"нск":8777,"нст":1138,"сам":861,"сан":508,"сат":511,"те ":14904,"сво":1166,"све":1266,"себ":334,"сев":971,"сед":864,"сел":2274,"сек":726,"сеп":284,"ти ":4039,"сен":517,"сем":349,"сет":758,"сер":521,"сис":839,"сит":795,"син":510,"сил":492,"сим":340,"ски":15155,"ска":7767,"сиј":608,"сли":753,"сле":1796,"сла":744,"ско":2753,"сме":854,"слу":574,"сло":1010,"то ":13336,"сна":721,"сни":929,"соб":473,"сов":600,"сод":296,"соз":452,"сок":602,"сно":1836,"спе":625,"спа":419,"сот":388,"сос":874,"сон":314,"соч":283,"соц":292,"сре":870,"спо":1324,"спр":420,"роц":373,"рот":1919,"роф":371,"роп":944,"рос":834,"ст ":3322,"рој":2600,"рст":683,"рти":754,"рск":2698,"руа":354,"рув":1033,"руг":1560,"руж":292,"руп":1220,"рус":732,"рум":335,"рци":855,"рхе":302,"рхи":519,"АД ":331,"рши":562,"рчк":744,"та ":23306,"рад":4624,"раз":2373,"раа":405,"раб":909,"рав":1584,"рам":1207,"ран":5737,"рап":368,"раи":514,"рак":1026,"рал":1971,"раф":696,"рач":346,"рац":462,"рас":1293,"рат":2616,"рби":405,"рањ":570,"рај":718,"рва":669,"рди":936,"реб":752,"рев":1305,"рег":1001,"ред":3985,"реа":398,"рет":3047,"рес":955,"реп":457,"си ":856,"рен":1613,"рем":1904,"рел":504,"рек":1638,"рез":448,"реж":432,"ржа":1664,"реч":605,"реш":461,"се ":11859,"рво":342,"рве":449,"рви":554,"рга":1142,"рда":316,"пје":616,"рио":1095,"рип":420,"рим":878,"рин":740,"рик":1477,"рил":774,"рии":353,"рич":442,"рит":2109,"рир":852,"рис":2774,"рка":561,"ркв":703,"риј":3270,"рза":354,"реј":340,"риб":299,"рив":398,"рие":488,"рид":648,"риз":552,"рзи":395,"рни":1032,"рна":1303,"рок":621,"рол":306,"ром":966,"рон":752,"роз":340,"рои":701,"ров":2202,"рог":438,"род":3344,"роб":326,"рно":847,"рми":912,"рма":1504,"со ":5615,"пра":1665,"прв":1141,"при":3441,"пре":5957,"про":4233,"поп":440,"пор":1847,"пос":2237,"пот":1782,"поч":860,"пој":398,"рт ":589,"пое":340,"под":1948,"пог":377,"пов":1628,"пон":487,"пом":1198,"пол":2038,"пок":698,"поз":1786,"пуб":1193,"пст":289,"пск":1161,"пшт":1492,"са ":460,"вар":956,"ват":2229,"вач":868,"вај":507,"вањ":3265,"ваа":2386,"ван":1597,"вал":1237,"га ":1478,"бук":590,"вто":1107,"вск":1077,"вст":329,"врд":656,"вре":1804,"ври":1040,"врз":737,"вро":757,"гу ":683,"вру":340,"врш":792,"вој":2262,"вол":822,"вои":368,"воз":430,"вое":499,"вод":1487,"вот":2101,"вор":1284,"вос":388,"вни":1914,"вна":1234,"вно":1024,"вле":285,"вла":692,"го ":2513,"вкл":312,"виј":388,"вич":290,"бје":284,"виз":445,"виж":381,"вил":734,"вин":1260,"вио":811,"вис":1212,"вит":1405,"вид":1204,"вие":441,"веќ":667,"вес":376,"вет":2185,"вер":2282,"вен":3438,"ги ":1837,"вел":447,"век":1300,"вез":364,"вее":283,"вед":808,"ва ":8865,"бан":796,"бал":536,"ачи":1031,"ачк":640,"ачу":385,"ашк":290,"афс":366,"афи":298,"ача":344,"аче":347,"аци":3529,"апс":423,"апр":517,"аоѓ":2290,"апа":1524,"апо":665,"апи":499,"арх":678,"арс":1354,"арт":1039,"ару":298,"аса":450,"аре":1123,"ард":653,"ара":1867,"арн":612,"аро":1814,"ари":3185,"арк":629,"аст":2995,"ата":20566,"аси":680,"асе":1117,"асл":312,"аск":436,"асп":410,"асо":310,"асн":388,"ату":514,"ате":2125,"ати":3730,"атк":340,"атн":495,"атп":334,"ато":1865,"атс":493,"атр":330,"бол":467,"бод":337,"бор":1569,"бот":1139,"аќа":380,"бро":2687,"бри":330,"бре":488,"бра":1074,"ајќ":358,"бич":355,"аја":393,"ајг":681,"ајд":338,"биј":384,"ајм":316,"ајн":658,"ајп":372,"ајс":402,"ајч":379,"ања":532,"бла":981,"бли":2035,"ање":3967,"во ":22199,"беш":291,"ви ":3384,"бен":594,"бер":412,"без":326,"бед":404,"бел":545,"бир":443,"бит":461,"бил":3972,"бид":516,"ве ":532,"бат":375,"бар":494,"аѓа":856,"дба":505,"дан":444,"дар":782,"дат":1402,"дви":611,"два":530,"две":568,"ед ":1436,"дал":491,"дад":551,"дав":666,"ев ":387,"дек":1195,"дем":341,"дел":3411,"ден":5845,"дер":607,"деј":478,"дес":441,"дво":453,"ез ":342,"дст":314,"дск":660,"дро":328,"дру":1086,"држ":2045,"дре":612,"дра":666,"ет ":2199,"дув":889,"ец ":658,"ен ":10275,"диш":466,"диц":456,"ем ":1181,"диј":826,"дин":5382,"ел ":3388,"дис":491,"дит":597,"ек ":1095,"доб":755,"дов":988,"дод":356,"ес ":859,"дос":369,"дол":887,"док":331,"дон":4125,"дом":347,"доц":331,"дот":1786,"дна":2163,"дни":2521,"дне":428,"ер ":2117,"дно":2480,"дми":387,"al ":294,"да ":4123,"гал":357,"гаш":492,"гат":448,"ган":1283,"гар":1072,"де ":1153,"and":335,"an ":327,"вув":2038,"гол":2184,"гот":553,"гор":505,"гов":1806,"год":3338,"гру":1162,"грч":711,"гра":5732,"гри":662,"гре":320,"гус":349,"ген":767,"гер":583,"ди ":1894,"гео":440,"гит":546,"гио":772,"гиј":928,"ati":284,"гле":564,"гла":1406,"до ":1648,"гли":1163,"жан":284,"жат":386,"жав":1162,"за ":5828,"жит":423,"еја":386,"ејз":450,"ејс":859,"жив":1211,"жењ":316,"еѓу":1271,"жел":288,"зи ":329,"жен":928,"жно":527,"жни":489,"еќе":715,"жна":492,"ење":1370,"ς ":427,"жув":901,"еј ":342,"ежи":423,"еду":826,"жи ":501,"еза":382,"езн":323,"езд":396,"езе":477,"ези":298,"ева":629,"еви":727,"еве":1630,"еба":481,"ебе":337,"его":1463,"еда":979,"еде":2597,"еди":2433,"едо":4194,"едн":3441,"ево":1021,"же ":578,"евр":732,"ега":454,"еги":839,"ент":3074,"ену":344,"енс":1151,"енц":490,"енк":321,"ени":4714,"емј":955,"ено":2370,"ена":3300,"ене":1764,"енд":500,"еол":430,"еор":360,"еог":380,"епт":359,"епу":1119,"епо":307,"ерс":707,"ерм":1219,"ерн":1475,"еро":1509,"ери":4408,"ерз":406,"ерк":320,"ере":664,"ера":2000,"еке":284,"еки":306,"екл":312,"еко":2603,"ект":1806,"екс":1174,"еку":1063,"ека":1814,"елн":396,"ели":2173,"елу":301,"елс":372,"ело":2613,"еле":3177,"ела":1614,"елб":498,"емо":621,"еми":2065,"еме":2170,"ема":1276,"емв":738,"еци":352,"ечк":442,"ечн":297,"ече":723,"ешт":455,"ешн":741,"ешк":443,"еше":584,"есе":1065,"еси":417,"еск":430,"есн":1000,"есо":341,"есу":502,"ест":3547,"ета":1622,"ети":1694,"ете":797,"етк":321,"етр":679,"ето":5033,"етн":1147,"етс":2563,"етх":473,"иве":1425,"иви":638,"ива":1008,"К ":292,"иен":500,"иер":283,"иет":479,"иже":402,"идо":449,"игр":971,"ида":352,"иди":356,"иде":991,"иво":1058,"ивн":1457,"ига":533,"иги":461,"ико":1235,"ики":550,"ика":4788,"иит":454,"изр":316,"изм":698,"изи":1398,"изд":435,"изг":666,"иза":1563,"изв":1007,"изб":296,"иј ":327,"ион":2538,"иот":8785,"инц":810,"иод":616,"Д ":485,"ине":862,"ини":3140,"ино":890,"инс":2166,"инт":558,"ину":385,"ина":9820,"инд":577,"инг":507,"ими":494,"име":2091,"имс":590,"имо":452,"имп":928,"има":2023,"или":5248,"иле":1067,"илм":478,"ило":1256,"ику":390,"ила":1963,"иси":462,"исе":361,"иса":487,"ист":8007,"исо":810,"исн":402,"исл":520,"иск":3494,"ити":1229,"ите":15877,"ита":1364,"иту":775,"ито":1415,"ипа":375,"ипи":348,"ира":3770,"ире":311,"ири":916,"иро":1138,"ица":1798,"ици":3395,"ифи":395,"ифо":430,"ичи":380,"ичк":3274,"ичн":2026,"ича":867,"иче":543,"ишу":288,"ишт":844,"иша":368,"ка ":12158,"ив ":663,"зав":514,"зае":409,"збу":314,"збо":1117,"ид ":907,"зви":372,"зве":330,"зац":679,"зат":324,"зар":363,"зап":1380,"зан":720,"зам":529,"зби":290,"згр":557,"зда":948,"зво":628,"ие ":1909,"зер":453,"ии ":1591,"зен":322,"зем":1002,"из ":417,"зик":2343,"зир":670,"ил ":3300,"зин":797,"ик ":3740,"ин ":1454,"им ":736,"зиј":706,"зиц":855,"зич":514,"зит":574,"змо":513,"зли":625,"зна":3413,"зни":663,"ир ":506,"ис ":533,"зон":283,"ит ":464,"зра":561,"зув":311,"хе":1090,"хи":1138,"хо":1598,"хр":886,"ха":892,"ци":13250,"цн":316,"цр":866,"цу":450,"ца":2399,"це":3069,"чл":414,"чн":4154,"чо":686,"чи":3265,"чк":5243,"чу":1362,"че":4695,"ча":1926,"шп":336,"шн":1414,"шк":1724,"ши":1346,"шт":6867,"шу":599,"ше":1638,"ша":1244,"ск":26715,"см":1616,"сл":4921,"со":11740,"сн":3623,"ср":1256,"сп":3213,"св":2617,"се":20196,"си":5941,"рш":1040,"са":3377,"рс":3892,"рт":2549,"ру":6871,"рх":1018,"рц":1077,"рч":928,"тн":2841,"тл":534,"тк":1435,"тс":3837,"тр":9143,"тп":601,"то":28365,"те":29796,"тв":4680,"ти":19086,"та":36712,"су":2000,"ст":32059,"сц":293,"ур":4376,"уп":2469,"ут":1165,"ус":3800,"ум":2729,"ул":3220,"ун":2432,"уз":1241,"ук":2337,"уд":1768,"уг":3250,"уж":1378,"уе":298,"уа":1187,"уб":2169,"ув":11713,"тт":407,"ту":4071,"тх":499,"фу":697,"фс":456,"фр":997,"фо":2242,"фи":3122,"фе":1624,"уѓ":407,"фа":1468,"уч":2178,"уш":1171,"уц":759,"џа":360,"is ":354,"ion":475,"јќ":588,"ње":5394,"ња":1280,"ќе":1037,"ќи":734,"ќа":686,"јк":379,"ји":475,"јп":396,"јо":1613,"јн":1588,"јм":402,"ју":2500,"јс":1508,"јч":415,"ја":31927,"јв":456,"јг":710,"јд":458,"је":1464,"јз":570,"ѓа":3178,"ѕв":310,"ѓу":1282,"ѓе":418,"Ју":674,"Ја":560," Ma":317,"he ":401,"а ":151197,"Ис":747,"Им":497,"Ин":571,"к ":7206,"Из":327,"Ле":628,"Ли":491,"Ла":558,"Ку":463,"Кл":294,"м ":4019,"Ко":1594,"Кр":735,"Ки":516,"Ка":1846,"л ":10222,"На":1551,"Не":719,"Ни":442,"Мо":930,"о ":68362,"Ма":3832,"Ми":784,"Ме":823,"Ло":317,"н ":18349,"Лу":312,"Па":998,"Пе":965,"Пи":295,"По":1730,"с ":3798,"Оп":470,"р ":8070,"Ос":606,"Ов":1078,"п ":907,"Но":521,"в ":3431,"Ам":433,"Ан":848,"Ал":1075,"Ав":472,"Ба":928,"Ар":692,"б ":542,"АД":338,"д ":20357,"Во":1671,"Ве":686,"Ви":848,"Га":384,"г ":2028,"Бо":738,"Бр":705,"Бе":879,"Би":685,"Ва":687,"Бу":778,"Ди":461,"Де":856,"Др":341,"До":567,"Ег":326,"Ев":564,"Ге":525,"Гр":1204,"Гл":343,"Го":804,"е ":62888,"Да":437,"и ":77640,"За":893,"Зе":393,"з ":1486,"Ер":317,"ia ":314," km":286," o ":369,"Ст":846,"Та":770,"Ти":438,"Те":690,"То":1430,"ф ":405,"Тр":452,"Ту":524,"х ":407,"Пр":1600,"СА":368,"Ра":521,"Ре":1553,"Ри":680,"Ро":593,"т ":35867,"Ру":528,"Са":911,"Св":650,"Си":589,"Се":1780,"Сл":373,"Ск":807,"Ср":474,"Сп":404,"у ":4465,"Со":1077,"Це":313,"ш ":868,"Цр":465,"Че":319,"ц ":857,"Фр":445,"Фи":522,"Ха":496,"Хр":339,"ч ":920,"Хо":298,"Хе":392,"мб":886,"ма":12609,"мв":986,"ме":13316,"лј":325,"ми":7979,"лм":650,"лн":4295,"ло":11112,"лс":1887,"лт":1426,"лу":4631,"лб":1321,"ла":14718,"лж":375,"ле":15180,"лд":283,"лг":660,"лк":1166,"ли":21964,"кн":889,"кл":2271,"кр":4199,"кс":1892,"ко":29166,"кт":3820,"ку":4201,"кц":796,"ка":27714,"ки":22033,"кв":1916,"ке":4892,"иј":21371,"ињ":534,"иш":2058,"ио":12982,"ип":1917,"им":7924,"ин":21871,"ик":11472,"ил":14255,"ии":2086,"иц":5454,"ич":7399,"иф":1198,"их":496,"ит":22205,"иу":361,"ир":7402,"ис":15994,"ри":23710,"пј":828,"рк":2169,"рл":526,"рм":2952,"рн":3559,"ро":18691,"рп":592,"ра":32025,"рб":1278,"рв":2697,"рг":2004,"рд":2000,"ре":21468,"рж":2185,"рз":1370,"пш":1508,"пр":16649,"пт":674,"пс":1836,"пу":1996,"ој":14117,"пи":3835,"пн":348,"по":20419,"пл":2009,"па":7611,"пе":4276,"оѓ":2317,"ош":1554,"оч":2768,"оц":1836,"ос":13243,"ор":16926,"оп":6149,"оо":627,"ох":400,"оф":1458,"оу":339,"от":31750,"ок":6835,"ол":13527,"ом":7054,"он":15080,"ож":1857,"оз":4638,"ои":4773,"ов":16820,"ог":5746,"од":29777,"ое":2635,"оа":1652,"об":5296,"нц":2585,"нч":293,"нт":6041,"нс":10729,"нф":376,"ну":2258,"но":20747,"нл":307,"мј":1034,"нк":1682,"нз":408,"ни":34239,"не":10732,"нг":2911,"нд":3033,"нб":290,"на":77634,"му":1844,"мс":1507,"мр":516,"мп":2215,"мо":6592,"мн":1816,"ге":2417,"ги":5000,"гн":490,"го":11946,"гл":3601,"гр":9052,"гу":1829,"дг":373,"дв":2181,"дб":788,"да":9183,"вг":415,"ве":13770,"ви":13074,"бј":460,"вк":750,"вл":1274,"вн":4219,"во":32617,"вр":6858,"вс":1589,"ву":2258,"вт":1215,"га":5709,"би":7825,"бе":3314,"аѓ":883,"бр":4813,"бн":514,"аќ":462,"бо":4771,"бл":3356,"ањ":4559,"ај":7017,"бу":1986,"ва":22923,"ад":10986,"ае":1609,"аж":1096,"аз":7096,"аа":4444,"аб":2331,"ав":11512,"аг":2167,"ам":6375,"ан":27243,"ао":2597,"ап":4252,"аи":936,"ак":12466,"ал":15259,"ах":737,"аф":1127,"ач":3591,"ац":3769,"ас":7790,"ар":17103,"ау":1031,"ат":37636,"ба":4355,"аш":1946,"зр":646,"зу":791,"зи":7831,"зо":1382,"зн":4381,"зм":998,"зл":1008,"ив":6555,"иг":2802,"иа":334,"иб":811,"иж":639,"из":7830,"ид":3802,"ие":3605,"ј ":8902,"жу":939,"еј":2932,"жи":2889,"ењ":1526,"жн":1527,"еќ":923,"за":12607,"зб":1994,"зв":1627,"зг":962,"зд":1570,"зе":2250,"еф":889,"ет":17396,"ес":8709,"ер":16613,"еп":2846,"ео":1987,"ен":28920,"ем":9666,"ел":15327,"ек":11126,"еи":513,"ез":2727,"еж":1162,"ее":663,"еѓ":1275,"же":2514,"жа":2420,"еч":1979,"еш":2657,"ех":563,"ец":1329,"дс":1024,"др":5219,"ду":2009,"дн":7627,"дм":697,"до":12951,"ди":12328,"дл":415,"де":15216,"еб":1885,"ев":5864,"ег":3639,"ед":16863,"еа":1309," th":283,"er ":422,"es ":384,"ѓу ":904,"ѓа ":2452,"уци":681,"учу":537,"учн":321,"учи":444,"уче":651,"ушт":599,"уѓе":382,"фев":335,"фер":358,"фиј":319,"фин":438,"фил":788,"фик":307,"фиц":459,"фун":322,"фра":515,"фот":446,"фор":1281,"фск":446,"ца ":1645,"ци ":2599,"хео":283,"хем":340,"хри":386,"ход":569,"хит":310,"сто":6090,"стр":4031,"ств":3177,"сте":2303,"сти":4708,"ста":7801,"сув":590,"сту":413,"тав":2466,"таа":321,"так":837,"тал":1456,"тан":2640,"тат":1512,"тап":396,"уг ":454,"тар":2183,"тац":321,"тву":302,"твр":556,"тво":2438,"тве":696,"тва":548,"тем":1441,"тел":2805,"тео":301,"тен":1940,"тер":3359,"тет":1279,"тес":476,"тек":1487,"тив":1778,"тка":575,"тиј":1079,"ум ":532,"тич":2394,"тин":2798,"тик":921,"тил":553,"тир":892,"тис":499,"тио":463,"тип":307,"тит":1897,"тко":341,"тки":324,"тно":744,"ток":893,"тол":870,"тои":602,"тов":635,"тоа":832,"тни":1408,"тна":541,"тпр":359,"тре":1295,"тра":3106,"три":1787,"тор":3285,"тот":2026,"том":830,"тон":944,"ус ":503,"топ":356,"тоц":289,"точ":1182,"тој":1245,"тст":1499,"тро":1844,"тру":758,"тсе":374,"тск":1871,"тхо":477,"туд":444,"тув":539,"тур":2009,"ува":11421,"уго":905,"уги":667,"уга":1084,"уда":547,"уар":643,"убл":1212,"узи":649,"ужн":805,"уди":328,"удб":322,"уме":785,"уми":352,"улт":955,"ули":697,"ула":701,"ука":732,"укв":382,"упо":434,"ура":1158,"ури":529,"уре":411,"упа":1105,"унк":323,"уна":414,"уни":908,"уст":803,"уте":298,"урц":319,"урс":618,"урн":333,"уск":1016,"уси":423,"шки":805,"шко":400,"шни":549,"шно":321,"шна":496,"шка":496,"шир":378,"шин":495,"шен":456,"шан":341,"што":3204,"шув":475,"ште":1404,"шти":1481,"шта":515,"on ":498,"че ":342,"цен":1177,"чи ":884,"цел":738,"циј":5710,"ции":969,"цио":1351,"цир":340,"цит":991,"цар":343,"цат":333,"цус":389,"црк":515,"чењ":285,"чев":319,"чен":1330,"чес":1120,"чет":714,"чле":406,"чко":656,"чки":2688,"чка":1890,"чин":910,"чит":363,"ше ":558,"чар":740,"чна":1312,"чов":406,"чни":1436,"чно":1236,"чув":1137,"ќи ":636,"us ":554,"tio":345,"ter":323,"the":294,"ѕве":297,"ѓаа":300," ар":1051," ас":329," ат":297," ба":813," ав":1128," ад":312," аз":335," ал":614," ак":630," ан":1462," ам":537," ап":418," бу":674," ва":516," би":4668," бе":1085," бр":3187," бо":1018," бл":584," вт":300," ви":1958," ве":1999," во":21906," вр":2219," вл":994," вк":456," дв":1419," да":2966," го":6416," гл":1761," гр":5370," ге":1159," ги":1230," ев":363," ед":3169," до":4472," др":2545," де":5656," ди":1319," же":492," ел":592," ек":551," зе":866," за":9018," зб":1071," жи":1512," зн":1014," иг":813," из":2798," ил":3716," ин":1410," им":3303," ит":411," ис":2928," ка":5502," ки":580," кр":1663," ко":13222," кн":382," кл":860," ку":901," ла":991," ли":1394," ле":768," лу":423," ло":957," ме":2789," ми":1057," ма":4441," мо":2407," мн":729," му":1023," ни":1307," не":4874," на":43543," но":1774," ол":358," ок":1879," оз":315," ов":1233," од":15767," об":2355," оф":386,"њет":1203," от":647," ор":1315," ос":2370," оп":2029," по":15733," пл":1137," пи":929," пе":1794," па":2332," Ре":1553," Ра":517," Ро":593," Ри":680," Пр":1592,"ќа ":348," СА":357," Пе":961," Па":996," По":1728," Пи":295," Ос":605," Оп":468," Те":689," Ти":438," То":1430," Тр":451," Ст":843," Та":770," Св":648," Си":588," Се":1778," Сл":373," Ск":806," Сп":403," Ср":473," Со":1077," Ру":528," Са":911," Фр":445," Фи":520,"ќе ":750," Ту":523," Цр":465," Це":313," Хр":338," Хо":298," Хе":392," Ха":496," Че":319," Ба":927," Ар":690," Ан":848," Ам":433," Ал":1073," Ав":472," Ва":686," Бу":778," Бо":736," Бр":703," Бе":877," Би":680," а ":1703,"јќи":588," Ег":326," Ев":563," Ди":459," Де":856," Др":341," До":561," Ер":317," Га":384," Ве":685," Ви":845," Во":1669," Да":437," Ге":523," Гл":341," Го":800," е ":17047," Гр":1204," Ис":742," Ин":570," Им":496," Ки":514," Ка":1843," и ":17952,"ње ":4162," За":893," Зе":393," Из":327," Мо":928," На":1547," Не":717," Ни":438," Но":521," Ов":1078," Кл":294," Ко":1590," Кр":732," Ку":462," Ла":557," Ле":628," Ли":490," Ло":316," Лу":312," Ма":3825," Ме":823," Ми":780," Ја":559," Ју":674,"јна":646,"јни":502,"јзи":419,"јек":289,"јго":612,"јал":1511,"јан":1997,"јат":4096,"јав":629,"јаз":2727,"јад":290,"ња ":993,"јче":337,"југ":496,"јуж":519,"јск":745,"јст":589,"јот":1205,"је ":862,"ја ":19199},"n_words":[2810075,3234074,2357835],"name":"mk"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ml b/nlp_resource_data/langdetect/profiles/ml

new file mode 100755 (executable)

index 0000000..4223344
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ml
@@ -0,0 +1 @@
+{"freq":{"D":213,"E":274,"F":167,"G":214,"A":497,"B":279,"C":430,"L":186,"M":384,"N":219,"O":182,"H":218,"I":415,"K":134,"T":348,"P":401,"S":493,"ിത ":164,"R":240,"f":703,"g":1106,"d":1581,"e":5009,"b":935,"c":1838,"a":5004,"n":3585,"o":3390,"l":2378,"m":1762,"k":455,"h":2313,"i":4303,"w":903,"v":415,"u":1770,"t":4207,"s":2723,"r":3549,"p":1477,"y":840,"x":345,"ില ":173,"ിയ ":1562," m":228," n":149," o":399," h":314," i":345," d":151," e":167," f":185," a":486," b":205," c":370," t":707," w":308," p":446," s":377," r":294," H":179," I":298," N":154," O":131," L":160," M":333," B":247," C":358," A":386," F":135," G":193," D":163," E":193," S":374," R":193," P":342," T":265,"ും ":8335,"ا":186,"ുക ":185,"ീയ ":378,"ാൻ ":875,"ാർ ":676,"ാൾ ":197,"ാൽ ":543,"ാഗമ":334,"ാഗത":387,"ാഗങ":167,"ാക്":1414,"ാകു":356,"ാങ്":227,"ാജാ":164,"ാജ്":625,"ാടി":272,"ാടക":206,"ा":135,"In":142,"ാട്":1178,"ാടു":187,"ിംഗ":155,"ാണു":455,"Th":138,"ാണി":500,"ാണപ":194,"ാതി":141,"ാതന":248,"ാണ്":9070,"ാത്":940,"ുഴ ":240,"ാനം":570,"ാദി":179,"ാനത":448,"ാധാ":281,"ാധി":232,"ാനങ":199,"ാദ്":187,"ാനു":295,"ാനി":583,"ാനാ":214,"ാനപ":155,"ാനമ":615,"ാപന":156,"ാന്":803,"ാപി":311,"ാപ്":231,"ാമം":286,"b ":294,"a ":674,"ാമത":404,"ാമപ":768,"ാമമ":268,"ാമാ":133,"ാമി":294,"ായക":210,"ിൻ ":265,"ാമ്":413,"ാരം":468,"ായത":1564,"ായി":3609,"ായാ":181,"ായ്":163,"ാരമ":247,"ാരന":265,"ാരത":490,"ാരണ":361,"ായു":463,"ാരാ":454,"ാരി":539,"ാലം":169,"ാരു":187,"ിൽ ":7420,"ാര്":247,"ാറു":256,"ാറി":215,"ാളം":134,"ാലക":339,"ാലയ":259,"ാറ്":497,"ാലത":252,"i ":213,"ാവ്":303,"ാവി":316,"ാവു":302,"ാവാ":229,"ാഴ്":134,"ിക്":6093,"he":539,"ha":244,"gh":231,"ികാ":247,"ികമ":181,"ികള":716,"g ":236,"ea":262,"ec":168,"ed":311,"de":246,"ാളി":252,"di":309,"h ":190,"ാലി":392,"ാലാ":156,"el":240,"ാളത":153,"ാലൂ":542,"ാലു":158,"ികം":150,"en":424,"em":159,"et":186,"es":424,"er":794,"ca":205,"e ":1341,"ിച്":3119,"ാഹി":294,"ിചെ":253,"da":131,"ിങ്":403,"ാസ്":1047,"f ":272,"ct":158,"co":290,"ാസി":297,"ci":170,"ch":206,"ce":229,"c ":161,"ാസമ":136,"ാഷ്":369,"ാഷയ":224,"d ":539,"at":600,"as":248,"ar":530,"al":614,"ാശി":190,"am":238,"an":747,"ac":198,"ad":149,"ാസം":149,"ികൾ":430,"nt":385,"ിഞ്":388,"ns":149,"no":135,"of":259,"om":259,"on":664,"ു് ":338,"ol":194,"ou":163,"op":135,"or":514,"r ":518,"pe":169,"ിടക":167,"lo":176,"ll":179,"o ":216,"ma":277,"mb":320,"me":225,"mi":137,"p ":313,"na":310,"nc":163,"nd":424,"ne":310,"ng":305,"ni":239,"്":186908,"ൊ":2828,"ോ":12821,"ൈ":2243,"െ":27110,"േ":13547,"ൂ":8137,"ൃ":1848,"ീ":8295,"ു":58406,"ൗ":469,"m ":405,"ൽ":11771,"li":349,"ർ":12870,"ൾ":4761,"le":352,"ൻ":6054,"ൺ":871,"la":410,"n ":864,"ഈ":1751,"ഉ":3287,"എ":6292,"ഏ":1399,"ht":515,"hu":298,"ം":26623,"hi":201,"ആ":4557,"ഇ":5473,"അ":8089,"id":145,"ച":18270,"ic":457,"ങ":13504,"ia":297,"ഘ":772,"ട":30378,"ഞ":4086,"ig":259,"ജ":7532,"ie":161,"ഓ":760,"ഒ":5000,"ഐ":333,"ഗ":10132,"ഖ":1936,"ക":65976,"ഔ":217,"ന":73400,"പ":39149,"is":392,"ഫ":2123,"it":348,"ബ":5674,"ഭ":5198,"മ":32874,"ിടെ":160,"യ":48687,"ഠ":364,"ഡ":3439,"ണ":22285,"il":179,"ത":67027,"ഥ":4613,"ിട്":804,"in":824,"ദ":11612,"io":368,"ധ":5371,"ഹ":4850,"സ":25595,"ി":86416,"ാ":63951,"റ":22910,"ര":47335,"ള":18695,"ല":30821,"വ":29294,"ഴ":3384,"l ":444,"ഷ":7585,"ശ":8287,"ww":338,"ിധാ":325,"y ":432,"ിദ്":640,"ിതി":938,"ിതാ":203,"ിതമ":146,"ve":190,"x ":258,"ul":150,"ur":239,"us":255,"um":390,"tt":296,"ിത്":1980,"w ":236,"to":263,"tr":153,"tp":241,"te":463,"ti":568,"th":747,"ta":278,"ss":146,"st":355,"se":201,"si":227,"ിപ്":1405,"rt":139,"ry":142,"ിനി":378,"ിനാ":453,"ro":259,"ri":601,"ിനെ":479,"re":445,"ിനു":1055,"ra":513,"t ":716,"ിന്":5173,"s ":1099,"px":216,"ിനോ":212,"ിമാ":267,"ിയന":152,"ിയപ":914,"ിയമ":337,"ിയയ":135,"ിയത":291,"ിയി":1237,"ിയാ":1791,"ിയോ":318,"ിയു":1173,"ിയെ":180,"ിയേ":145,"ിയം":136,"ിഭാ":320,"ിലവ":146,"ിലാ":907,"ിലി":195,"ിലു":1762,"ിലൂ":238,"ിലെ":4614,"ിലേ":330,"ിലൊ":225,"ിലോ":400,"ില്":2038,"ിഴക":306,"ിളി":317,"ീകര":447,"ിറങ":160,"ിരാ":141,"ിരി":844,"ിരു":2143,"ിയൻ":295,"ിറ്":420," ല":2034," ര":2887," റ":942," ശ":1879," ഷ":238," വ":10435," സ":10862," ഹ":1263," ഡ":822," ധ":248," ദ":2030," ത":5869," ഫ":858," പ":15081," ന":8054," യ":1000," മ":9063," ഭ":2376," ബ":2531," ൽ":544,"ശ്ര":558,"ശ്യ":219,"ശ്ശ":386,"ശ്വ":377,"ശ്ച":178,"ശേഷ":320,"ഷത്":575,"ശൂർ":174," എ":6248," ഏ":1397," ഈ":1746," ഉ":3278," ആ":4494," ഇ":5444," അ":8029," ട":631," ജ":4225," ച":4988," ഘ":145," ഗ":2980," ഖ":305," ക":14030," ഔ":215," ഓ":757," ഒ":4983," ഐ":325,"ശേര":218,"സത്":172,"വർഷ":372,"ഷിണ":161,"സഞ്":150,"ഷിക":304,"വർഗ":216,"വർത":599,"ഷിയ":170,"സങ്":186,"സരി":133,"സമ്":174,"സമാ":200,"സമു":201,"സഭയ":152,"ഷ്ണ":218,"സഭാ":174,"ഷ്ട":557,"ഷ്ഠ":143,"ഷ്യ":741,"സന്":155,"ഷേത":398,"ഹത്":297,"സൂച":143,"സിസ":131,"സിയ":151,"സില":142,"സിന":374,"സിപ":159,"സാധ":303,"സാന":195,"സാമ":424,"സിക":356,"സാഹ":254,"സിദ":345,"സസ്":334,"ാം ":756,"സോഫ":142,"സ്യ":395,"സ്റ":846,"സ്ല":281,"സ്ഥ":2863,"സ്പ":221,"സ്ത":2267,"സ്ക":623,"സ്സ":397,"സ്വ":855,"ഹിത":291,"ഹാര":204,"ഹായ":132,"ഹിന":258,"ാന ":514,"ഹ്മ":132,"ിക ":723,"ാള ":284,"സർക":222,"ാല ":148,"ായ ":2610,"െക്":658,"െട്":1283,"െടു":2359,"െങ്":500,"േക്":437,"െള്":180,"േഖല":167,"െറ്":191,"െല്":138," ഈ ":1593,"െത്":201,"െന്":699,"െബ്":146,"െപ്":220,"െറി":225,"െറു":216,"െയ്":1600,"െയു":744,"െയാ":471,"െയി":161,"േശം":258,"േശത":140,"േഷം":140,"േശങ":199,"ൈക്":133,"േശ്":174,"േശി":194,"േശീ":282,"േഷ്":132,"േഹം":278,"േഹത":162,"േഷൻ":136,"േത്":452,"േതാ":159,"േണ്":180,"േന്":408,"േയു":430,"േരള":1271,"േരി":960,"േരു":255,"േറ്":263,"ീവി":408,"ുക്":824,"ുകൊ":132,"ുകി":165,"ുകാ":227,"ുകള":1051,"ുകയ":410,"ീറ്":411,"ീരത":159,"ീതി":299,"ുംബ":279,"ിസ്":1202,"ിശ്":421,"ിശേ":137,"ിവർ":154,"ിഷ്":341,"ിവയ":235,"ീക്":294,"ിഴ്":206,"ിവര":215,"ിവസ":160,"ിവാ":178,"ിവി":329,"ുവി":444,"ുവാ":341,"ുവന":251,"ുവര":355,"ൂക്":727,"ുഷ്":354,"ുറ്":170,"ുളം":204,"ുറി":322,"ുറത":245,"ുരു":345,"ുരാ":293,"ുള്":2528,"ുമ്":328,"ൂർ ":1389,"ുറം":199,"ുമാ":1413,"ുരം":260,"ുമു":219,"ുന്":10068,"ുനി":282,"ുപ്":479,"ുതു":171,"ുതി":335,"ുണ്":1051,"ുത്":1223,"ുതൽ":371,"ുദ്":545,"ുടെ":2766,"ുടേ":141,"ുട്":263,"ുടു":255,"ംഗീ":301,"ംഗ്":542,"ാർക":154,"ുടങ":281,"ാർത":251,"ാർഡ":136,"ുകൾ":669,"ീഷ്":363,"ീസ്":201,"ംഖ്":225,"ംഗല":153," ബ്":675," ഭര":361," മണ":230," ഭൂ":280," മത":278," ഭാ":1188," മദ":178," മന":430," മല":1061," മര":285," മറ":395," മഹ":339," മൂ":522," മു":1582," മീ":223," മി":342," മാ":1333," മോ":137," മേ":380," മെ":288," പൊ":392," പോ":463," പ്":4372," ബന":199," ഫ്":175," ബി":227," ബാ":363," ബു":137," ബോ":172," പക":257," ന്":326," നോ":233," പദ":316," പന":151," പണ":151," പത":547," പഞ":607," പട":612," പഴ":174," പല":234," പറ":573," പര":747," പെ":480," പേ":780," പൂ":332," പു":1278," പാ":1107," പി":660," നൽ":229," നക":244," നഗ":396," ദേ":508," ദ്":241," നദ":211," നട":599," നവ":174," നൂ":265," നീ":290," നെ":227," നേ":376," നാ":1197," നി":2688," തെ":531," ദക":157," ത്":183," ദി":368," തി":720," താ":982," തു":591," തീ":297," തൃ":268," തന":303," തമ":355," തര":169," തല":366," ഡി":290,"ൂറ്":264,"ംബത":191,"ൂരി":182,"ൂമി":187," ടെ":156,"ംബർ":289,"ൂണി":149," സർ":371," ഹൈ":162,"ൂന്":269," ഹാ":165," ഹി":383," സെ":318," സൂ":331," സു":414," സി":733," സാ":1185," സഹ":208," സസ":221," സ്":2807," സോ":248," സൈ":131," വർ":592," സഭ":212," സമ":812," ശേ":181," ശ്":451," ശാ":428," ശി":199," സം":1809," വൈ":304," വേ":548," വെ":601," വ്":771," വസ":228," വി":3041," വീ":201," വാ":826," വൃ":131," വന":287,"ുസ്":374," വല":423," വഴ":131," വള":462," വര":613," വയ":153," വക":168," വട":443,"ംവി":172," ലോ":543,"ൂട്":482," ലെ":167,"ിൽപ":165," ലി":203,"ൂടെ":257," ലാ":299,"ൂടു":168,"ൂടി":333," റോ":225," ലഭ":180,"ംസ്":701," രീ":168," രൂ":379," രാ":1161,"ിർമ":366," യു":294," രണ":452," യൂ":213," രച":157,"ിർത":148," ഉൾ":449,"ൃഷ്":193," എം":133,"ൃശ്":191," എന":4353," എട":138," ആധ":137," ആദ":541," ആന":214," ആയ":406," ആറ":135," ആര":244," ആല":221," ആവ":187," ആസ":220," ഇട":294," ഇത":1342," ഇദ":201," ഇന":1407," ഇല":190," ഇര":190," ഇവ":544," ഇസ":230," അർ":208," ഉദ":192," ഉണ":345," ഉത":324," ഉയ":159," ഉപ":1087," ഉള":175," ഇൻ":148," അം":186," അത":659," അണ":160," ആം":188," അട":472," അക":377," ആണ":724," ഇം":372," അവ":660," അസ":137," അറ":908," അല":394," ആക":232," അഭ":214," അമ":420," അയ":173," അര":179," അപ":189," അഥ":462," അദ":348," അധ":209," അന":672," ജന":950,"ൃതി":272," ചെ":1896," ചേ":382,"ൃത്":357," ചാ":241," ചി":892," ചു":295," ജോ":204," ജി":1596," ജീ":407," ജൂ":195," ജ്":148," ഗാ":220," കർ":226," ഗു":264," ഗ്":1743," ഗോ":177," ഗണ":178," ചര":212," ചല":414," ഓഫ":177," ക്":1197," കെ":231," കൈ":147," കേ":1543," കൊ":822," കോ":957," കാ":1855," കി":865," കീ":159," കു":1227," കൂ":693," കൃ":254," കവ":234," കര":347," കമ":372," കഴ":213," കള":233," കല":418," കന":144," കഥ":180," കട":337," കണ":816," ഏറ":601," എഴ":295," ഏക":259," എല":187," എറ":154," എസ":137," ഒന":341," ഒര":4105," ൽ ":540,"മി ":226,"ബർ ":418,"പ്ര":5359,"പ്യ":278,"പ്റ":162,"പ്പ":6916,"പോൾ":223,"രം ":1921,"ബത്":224,"ബന്":340,"ഫ്ര":256,"ഫ്റ":145,"പരമ":221,"പയോ":768,"പരി":493,"പരാ":131,"പറയ":460,"യം ":1215,"പള്":185,"പാട":326,"പാദ":161,"പാത":301,"പാല":480,"പിച":417,"നൽക":242,"പിക":595,"പിന":279,"പുക":146,"പാർ":143,"പുത":214,"പുര":764,"പുറ":547,"പുഴ":384,"പൂർ":310,"പെര":170,"പെട":3183,"പേര":707,"പൊത":227,"പോല":201,"രെ ":495,"രു ":4025,"ലം ":658,"ഭാഷ":528,"ഭാവ":201,"ര് ":145,"ഭിച":159,"ഭിന":146,"ഭാഗ":860,"ഭാര":363,"മങ്":199,"മന്":204,"മനു":281,"മപഞ":718,"മദ്":277,"മത്":656,"ഭൂമ":198,"മണ്":363,"ളം ":631,"മലപ":135,"മറ്":307,"മലയ":732,"റു ":169,"യർ ":285,"മരണ":133,"റി ":343,"ഭ്യ":155,"മമാ":310,"യൻ ":979,"രണ ":296,"ഭക്":141,"യി ":2287,"റം ":319,"യോ ":322,"യ് ":274,"യെ ":323,"രള ":241,"ബ്ര":460,"മൻ ":173,"ബ്ള":167,"ബ്ല":277,"ഭരണ":352,"രി ":963,"പി ":248,"ദ്ദ":733,"ധമാ":139,"ദ്വ":180,"ദ്യ":1112,"ദ്ര":848,"ദ്ധ":1337,"ദേഹ":453,"ദേശ":1461,"ദേവ":242,"ധീക":138,"ധിയ":166,"ധാര":358,"ധിക":430,"പ് ":643,"ധാന":997,"നടന":157,"നടത":237,"നത്":2604,"നതു":269,"നതി":539,"നതാ":252,"നങ്":656,"നഗര":403,"നക്":345,"ദത്":171,"ദിയ":235,"ദിവ":229,"ദിന":206,"ദായ":132,"ദിക":158,"പങ്":155,"പക്":254,"പകര":152,"മം ":430,"ബി ":170,"ന്ത":3077,"ന്ദ":1121,"ന്ഥ":210,"ന്ന":18547,"ന്ധ":586,"ന്റ":3777,"ന്മ":414,"ന്യ":476,"നോവ":149,"പനി":191,"പന്":169,"പതി":293,"പത്":765,"പട്":440,"പടി":267,"പഞ്":1398,"ബ് ":189,"നയി":167,"നറി":301,"നപ്":274,"നമാ":873,"ധ്യ":502,"നനം":232,"നദി":238,"നന്":309,"നൂർ":159,"നേത":186,"നേട":136,"നെയ":183,"നാൽ":247,"നുമ":332,"നുള":311,"നുസ":192,"നുഷ":303,"നുവ":212,"നിസ":191,"നും":1607,"നുക":143,"നിർ":607,"നിൽ":228,"നൂറ":277,"നിവ":495,"നിയ":759,"നിര":355,"നിറ":179,"നില":691,"നിമ":172,"നിന":1060,"ദർശ":140,"നിച":204,"നിക":437,"ഫ് ":323,"നാല":249,"നാമ":420,"നായ":1102,"നാണ":1039,"നാട":633,"വർ ":251,"ഹം ":356,"വകാ":131,"വക്":151,"വടക":415,"ഴിക":261,"ഷ് ":469,"ളിക":406,"ളായ":311,"ളാണ":384,"വംശ":155,"ളുട":947,"ളിയ":253,"ളില":1194,"ളും":648,"ളിൽ":1372,"ളെയ":186,"ളോക":168,"ള്ള":3418,"വൻ ":161,"ൾപ്":365,"ല്പ":290,"ല്ല":3043,"ല്യ":139,"ളരെ":204,"സം ":495,"ഴക്":419,"ലായ":359,"ലാമ":187,"ലിക":373,"വ് ":420,"ലിന":217,"ലിപ":166,"ലാണ":736,"ലാത":169,"ലാം":153,"ലൂട":241,"ലുള":500,"ലൂക":546,"ലീഷ":334,"ലുക":166,"ലിയ":569,"ലും":1211,"ലേയ":136,"ലേക":222,"ലേജ":143,"ലെയ":139,"ലെങ":175,"ളത്":1319,"ലോമ":256,"ലോക":643,"ലൊന":171,"ൾക്":604,"ൽപ്":210,"ശസ്":398,"ശിയ":152,"സംഘ":223,"സംഖ":216,"സംഗ":292,"സംബ":143,"സംഭ":157,"ശീയ":288,"സംവ":186,"സംസ":658,"ശിക":160,"ശാല":131,"ശാസ":704,"വേദ":193,"വേണ":171,"വെള":233,"വെയ":137,"വൈദ":160,"വേഷ":140,"വൃത":136,"ർവ്":178,"വിൽ":193,"ൽക്":256,"വുമ":350,"ർഷത":185,"ശത്":307,"ശമാ":164,"ഷൻ ":245,"വ്വ":236,"വ്യ":986,"ർമ്":559,"ർപ്":202,"al ":246,"വസാ":210,"വഴി":162,"ർന്":390,"വളര":299,"വലി":422,"ർദ്":196,"and":176,"ർണ്":319,"വരി":311,"വരു":524,"വരെ":305,"ർത്":1697,"an ":167,"വിസ":175,"വിശ":604,"വിഷ":250,"വിവ":377,"വില":388,"വിള":304,"വിയ":254,"വിഭ":320,"ർഡ്":135,"വും":1320,"വാർ":151,"വാക":237,"വാത":177,"വാണ":177,"ർട്":250,"വാദ":195,"വിക":518,"സ് ":1944,"വാസ":359,"വായ":334,"വിത":325,"വിധ":550,"വിദ":293,"വിന":462,"വിച":163,"വിട":354,"വസ്":414,"ശങ്":338,"ർജ്":166,"ർച്":198,"വനന":179,"ൻസ്":171,"വത്":265,"ർഗ്":232,"ർക്":684,"ഴുത":366,"ഴിയ":159,"ൻറെ":142,"വയാ":176,"വയു":170,"സി ":306,"വയം":133,"ati":208,"വന്":355,"ളി ":445,"രയി":142,"റർ ":428,"രമ്":132,"രമു":245,"രമാ":1123,"വം ":264,"രളത":906,"രശസ":300,"രവു":183,"രവാ":158,"റക്":177,"രധാ":578,"യേക":178,"രദേ":627,"രനാ":222,"രന്":347,"രപ്":252,"യോഗ":971,"യ്ത":319,"യ്യ":1468,"യ്ക":431,"രഞ്":146,"യാപ":286,"യാന":412,"യാണ":2227,"യാക":148,"യിട":223,"യാസ":209,"യാള":680,"യിക":256,"യായ":924,"യും":2125,"യില":2744,"യിര":1399,"യുമ":248,"യുത":144,"യുന":1398,"യുട":1638,"രണം":295,"രണത":177,"രണമ":149,"യുള":358,"യിൽ":1778,"യൂട":261,"രണ്":518,"രത്":1578,"രതി":352,"രതീ":156,"രക്":477,"യവസ":141,"ലെ ":4757,"യസ്":188,"രങ്":806,"രജ്":131,"ലോ ":133,"ലി ":304,"യയു":269,"ലാ ":171,"യയി":607,"മ്ര":188,"മ്യ":161,"രൻ ":203,"യമാ":807,"മ്പ":1717,"മ്മ":1128,"രകാ":407,"മേര":234,"മെന":220,"മേഖ":168,"മുൻ":206,"യതി":155,"യത്":2606,"യപ്":1110,"യനാ":300,"യന്":328,"മാണ":3756,"മാത":372,"മാന":490,"മായ":3224,"മാക":397,"മിന":134,"മിയ":208,"മാര":358,"മാറ":260,"മാല":167,"മിക":467,"റ് ":1265,"മാസ":192,"മിച":154,"മൂല":253,"മൂന":234,"മുള":608,"മുസ":161,"മൂഹ":169,"മീറ":381,"രംഭ":134,"രംഗ":221,"മിഴ":237,"മില":152,"മുന":161,"മുണ":204,"മുത":397,"മുദ":221,"മാർ":420,"മുഖ":353,"മുക":147,"യക്":335,"യകാ":146,"യങ്":581,"മഹാ":304,"റെ ":3202,"ലസ്":209,"ലവി":164,"വി ":249,"ലയി":1550,"ലയാ":752,"ലയു":182,"വാ ":463,"ലമാ":175,"ഷം ":301,"ലപ്":395,"റ്റ":5132,"ലഭി":141,"റെയ":243,"റിൽ":141,"ലണ്":140,"ലത്":725,"റാണ":337,"റിന":263,"റിക":201,"ഴ് ":251,"റിച":232,"റും":178,"റിയ":1715,"റില":158,"റുക":338,"ലങ്":374,"ലച്":445,"ലക്":585,"റവു":574,"ലകള":209,"ശം ":392,"ഴി ":137,"റയു":379,"ര്യ":585,"റബി":132,"റപ്":145,"രോഗ":204,"രെയ":158,"റത്":391,"രുവ":555,"രിൽ":376,"രൂപ":574,"രീക":374,"രില":203,"രിയ":788,"രിസ":279,"രീത":277,"രും":137,"രീയ":292,"രുക":338,"രീര":138,"രുട":311,"രുന":2242,"രുത":325,"രുമ":189,"രാധ":160,"രാണ":212,"രാത":212,"രാഗ":175,"രാജ":994,"രിപ":260,"രിന":226,"രിട":191,"രിത":358,"രിച":602,"രാഷ":322,"രാശ":159,"രായ":317,"രാമ":1573,"റഞ്":136,"രാവ":195,"രിക":1867,"രാള":187,"റങ്":254,"രസ്":464,"രവർ":501,"ളെ ":475,"രസി":277,"ാ ":1411,"ി ":10063,"ീ ":761,"ു ":10222,"ഗീത":286,"െ ":14490,"കൾക":240,"േ ":611,"ഗിച":235,"ച് ":831,"ഗിക":587,"ഗാന":207,"ഘടന":192,"ൈ ":191,"ൂ ":136,"ഗത്":599,"മ ":394,"ര ":1278,"യ ":5708,"ion":318,"ഖ്യ":445,"വ ":552,"ഗമാ":450,"സ ":133,"ഷ ":141,"ചു ":503,"ല ":970,"റ ":307,"ഴ ":264,"ള ":2840,"ഗ്ഗ":220,"ഗ്ല":445,"് ":30096,"ഗ്ര":2123,"ോ ":1249,"ങൾ ":1356," In":137,"കല്":194,"കലാ":228,"കറ്":169,"കവി":299,"he ":334,"കഴി":190,"കളെ":330,"കളു":895,"കളി":1388,"കളാ":321,"കിയ":350,"കിഴ":329,"കില":839,"കും":421,"കായ":240,"കാര":1466,"കാവ":233,"കാശ":316,"കാസ":134,"കാറ":154,"കാല":744,"കിട":299,"കുമ":280,"കുള":420,"കുറ":545,"കുവ":161,"കിൽ":695,"കൂട":756,"ം ":21178,"കുക":537,"കീഴ":158,"കുട":502,"കാർ":351,"കാൻ":202,"കുന":3755,"കുപ":182,"കാക":163,"കാണ":507,"കാന":330,"കാട":474,"കഥാ":145,"കന്":204,"കനു":217,"കനാ":221,"കണക":160,"കണ്":651,"കത്":528,"കമാ":395,"കമ്":385,"കയാ":132,"കയി":197,"കയു":359,"കരണ":329,"കരു":191,"കരി":544,"കപ്":708," in":230,"ച ":753,"ഗങ്":342," ht":204," of":235,"ട ":916," an":158,"ണ ":661,"ഗണി":159,"ത ":1303,"igh":217,"ധ ":268,"ing":193,"ങ് ":236,"ന ":6972," co":199,"in ":185,"ദ ":154,"കൊല":162,"കൊള":171,"കൊണ":402,"കൊട":175,"കേര":1281,"കേന":314,"ഈ ":1594," ww":169,"htt":234,"കൃത":542,"ht ":197,"കൃഷ":204," ri":189," px":215,"hum":268,"ക്ട":264,"ക്ത":515,"ക്യ":293,"ക്ഷ":2208,"ക്ര":1063,"ക്ല":147,"ഖ ":135,"ക്സ":418,"ക ":1483,"കോട":568,"കോഴ":224," th":549,"ക്ക":14730,"ജനന":254,"ജനു":148,"ജനി":202,"ജന്":153,"ങൾക":243,"er ":252,"es ":231,"ച്ച":4631,"ent":145,"ght":209,"ജ്യ":739,"ജ്ഞ":320,"ജ്ജ":187,"ജീവ":507,"ജില":1519,"ട് ":2684,"ടു ":337,"ടി ":838,"ടെ ":2958,"ചത്":271,"ജ് ":184,"ങിയ":431,"ചക്":148,"ങളെ":405,"ങളാ":313,"ങളി":1296,"ങളു":891,"ed ":198,"ചേർ":314,"ചെയ":1441,"ചെറ":399,"ചേര":187,"ചെട":173,"ചിട":172,"ചിത":936,"ചായ":1294,"ചാര":301,"ചിന":169,"ചിപ":139,"ചിര":413,"ചിറ":138,"ചില":197,"ചലച":426,"ടം ":259,"ചരി":346,"ങ്ങ":6013,"ങ്ക":1271,"ടാക":287,"ടുക":629,"ടും":382,"ടിൽ":160,"ടുള":178,"ടുവ":240,"ടുമ":147,"ടുപ":172,"ടുന":1925,"ടുണ":196,"ടുത":820,"ടിച":133,"ടിഞ":226,"ടായ":281,"ടിക":477,"ടാമ":153,"ടിസ":180,"ടിര":215,"ടില":180,"ടിന":182,"ടിയ":767,"rig":195,"ടേയ":132,"ടെയ":346,"ട്ര":721,"ട്ട":5142,"ണു ":234,"ഥം ":166,"ണി ":174,"ണ് ":9312,"തി ":1445,"തു ":412,"ദം ":215,"തെ ":1217,"ണം ":720,"ഡി ":135,"px ":213,"ടർ ":247,"ടങ്":577,"ടക്":856,"ഡ് ":679,"ഞാറ":228,"ടന്":213,"ടത്":622,"തം ":428,"ഞ്ഞ":800,"ഞ്ച":1999,"ടയി":180,"തപു":179,"തന്":625,"ോർ":445,"ോൾ":398,"തനാ":181,"്മ":2196,"്യ":11234,"്ര":16053,"്റ":10182,"്ല":4477,"്ള":3597,"ng ":163,"്വ":2298,"്ശ":389,"്ഷ":2226,"്സ":1373,"ോൺ":268,"തമാ":549,"തമി":248,"തരം":239,"ോസ":481,"ോഷ":199,"ോഹ":213,"ോഴ":323,"ോള":566,"ോവ":275,"ോര":171,"ോയ":198,"ോല":414,"ോറ":240,"ോബ":223,"ോഫ":177,"ോമ":554,"്ബ":173,"ണ്ഡ":544,"ണ്ട":3469,"്പ":9300,"്ധ":1930,"്ന":18960,"്ഥ":3459,"്ദ":2047,"്ണ":1196,"്ത":22771,"്ഡ":591,"്ട":9563,"്ഠ":162,"ണ്ണ":973,"്ഞ":1123,"്ജ":323,"്ച":6932,"്ങ":6013,"്ഗ":330,"്ക":17340,"ൊണ":430,"ൊത":283,"ൊന":285,"ൊട":319,"േർ":559,"ോദ":305,"ോണ":319,"ോത":321,"ോപ":434,"ോന":149,"ോജ":148,"ോഡ":324,"ോട":1348,"nd ":168,"ോക":1283,"ൊള":210,"ൊല":199,"തപ്":226,"ോഗ":1263,"ൊര":233,"േന":623,"േവ":391,"േശ":1652,"ൈക":178,"േറ":444,"േല":220,"േയ":868,"േര":2800,"േഹ":481,"േസ":147,"േഷ":809,"ൈദ":208,"ൈന":296,"ൈവ":271,"ൈറ":197,"െൻ":139,"െൽ":155,"െർ":168,"ൈസ":157,"ണിത":465,"െക":672,"െങ":501,"ണിയ":184,"െട":3862,"െത":287,"െന":785,"െപ":232,"െബ":161,"െമ":160,"െയ":3291,"െര":246,"െറ":681,"ണാട":139,"െല":319,"െള":276,"േക":651,"േഖ":487,"ദ് ":330,"ണിക":268,"െസ":158,"േജ":272,"േട":244,"േത":832,"േണ":267,"േദ":234,"ൃത":879,"ുൽ":137,"ുൻ":211,"ുർ":254,"തനം":152,"ൃഷ":287,"ൃശ":256,"തത്":650,"ൂർ":1703,"ണൂർ":281,"ുപ":1108,"ുര":1360,"ുമ":2460,"ുഭ":196,"ുത":2652,"ുണ":1220,"ാൾ":251,"ുന":10596,"ുദ":692,"ൂച":155,"ുഹ":134,"ിൽ":7943,"ിൻ":555,"ൂട":1342,"ിർ":824,"ുള":2978,"ുഴ":501,"ൂക":744,"ുറ":1425,"ുല":285,"ുഷ":436,"ുസ":641,"ുവ":2148,"ുശ":133,"ൂപ":660,"ൂമ":233,"ു്":339,"ൂറ":506,"ൂര":435,"ൂണ":183,"ൂത":256,"ണു്":155,"ൂന":329,"ീർ":319,"ൂല":403,"ൃക":135,"ൂഹ":200,"താര":239,"തായ":301,"തിക":867,"താവ":406,"താല":576,"തിച":316,"താണ":917,"താന":186,"തിയ":1314,"തിര":1091,"തിറ":164,"തില":2687,"തിന":3912,"തിപ":216,"താം":142,"തലസ":164,"തമ്":155,"തോട":190,"ഥമാ":131,"ത്വ":330,"ത്സ":249,"ത്മ":171,"ത്ര":4088,"ത്യ":2047,"ത്ത":16443,"ത്ഥ":385,"of ":224,"നി ":376,"തുട":444,"തുന":334,"തുമ":302,"തുള":199,"തുവ":386,"തും":420,"തീയ":195,"തീര":189,"തുക":619,"തീർ":152,"തൃശ":187,"തിർ":138,"തിൽ":2321,"തെക":351,"തെയ":178,"ർമ":803,"ന് ":1964,"ർശ":261,"ർഷ":608,"ൽക":534,"ർവ":450,"ഥാപ":368,"ർണ":557,"ഥാന":1185,"ർഡ":239,"ർട":256,"ർജ":233,"ർപ":232,"ർന":493,"ർദ":256,"ർത":1751,"ൻറ":218,"ൻസ":353,"ർച":209,"ർക":724,"ർഗ":447,"ൻഡ":203,"ഥിത":865,"ൻപ":166,"ൾപ":377,"ൾക":618,"ൽപ":280,"പം ":271,"നീ ":509,"നു ":3729,"ദക്":161,"ഥവാ":438,"on ":337,"നെ ":631,"ഉള":177,"ഉയ":159,"ഉപ":1088,"ഇൻ":149,"ഇര":190,"ഇല":190,"ഇവ":544,"ഇന":1412,"ഇസ":231,"അർ":208,"ഉണ":345,"ഉത":324,"ഉദ":193,"le ":135,"എട":140,"എന":4361,"ൺ ":614,"എഴ":295,"ഏക":259,"എല":190,"എറ":155,"എസ":156,"ഉൾ":449,"എം":136,"ംഘ":229,"ംഖ":254,"ംഗ":1698,"ംക":276,"ംശ":210,"ംസ":820,"ംവ":207,"ംഭ":344,"ംബ":834,"ഡിയ":165,"ത് ":4999,"അസ":137,"അവ":661,"ആണ":726,"ഇം":373,"അപ":189,"അധ":210,"അന":675,"അഥ":462,"അദ":350,"ആക":235,"അറ":909,"അല":395,"അയ":178,"അര":181,"അഭ":217,"അമ":421,"ആസ":222,"ഇത":1342,"ഇദ":201,"ഇട":298,"ആദ":541,"ആധ":137,"ആന":248,"ആല":222,"ആവ":187,"ആയ":406,"ആര":244,"ആറ":138,"അം":186,"അക":377,"അട":473,"ആം":188,"അത":670,"അണ":160,"ചേ":581,"ചെ":2298,"ചു":1255,"ചി":2735,"ചാ":2146,"ച്":5556,"ചത":464,"ചന":269,"ങാ":172,"ങി":552,"ചല":494,"ങ്":7589,"ചയ":168,"ചര":413,"ങന":154,"ങള":3065,"ചക":269,"കൾ":1615,"ഗീ":392,"ഗി":1014,"കൽ":282,"ഗു":368,"കർ":533,"ഗാ":550,"ഘട":411,"കൻ":533,"ഗസ":163,"ണങ്":272,"ഗോ":498,"ഗ്":3272,"ഞാ":426,"ടണ":268,"ടത":795,"ടപ":165,"ടന":619,"ടമ":180,"ഞ്":2940,"ടല":163,"ടറ":140,"ടയ":443,"ടവ":181,"ടാ":1306,"ടം":274,"ടക":1208,"ടങ":583,"ജൂ":197,"ജീ":561,"ജു":186,"ജാ":371,"ജി":2005,"ജോ":210,"ജ്":1493,"ങൾ":1604,"ജന":1163,"ജല":137,"ണക്":248,"ജയ":163,"ഓഫ":177,"ൾ ":3523,"ഒര":4108,"ഒന":344,"ൽ ":10227,"ർ ":4755,"ൻ ":4719,"ഏറ":601,"ഗമ":519,"ഖ്":518,"ഗര":457,"ഗല":182,"ഗവ":220,"ഗങ":344,"ഗണ":317,"ഗത":782,"കൈ":174,"കേ":2116,"ഖന":135,"കെ":542,"കോ":1765,"കൊ":1243,"ഖര":135,"ക്":21779,"ഖല":214,"കസ":257,"കവ":506,"കി":3548,"ഗം":303,"കീ":288,"കാ":6054,"കൃ":756,"കു":7634,"കൂ":1037,"കപ":759,"കദ":149,"കഥ":303,"കന":747,"കല":863,"കറ":301,"കഴ":243,"കള":3150,"കമ":989,"കര":1697,"കയ":816,"കങ":196,"കക":230,"കണ":938,"കത":663,"കട":577,"കം":672,"നന":662,"നപ":369,"നയ":493,"നമ":1154,"ധ്":613,"നറ":327,"നല":133,"നവ":539,"നസ":256,"നാ":4964,"ദർ":205,"നി":6348,"പം":293,"നീ":1069,"നു":7416,"നൂ":483,"നെ":1274,"നേ":692,"നോ":770,"ന്":30257,"mb ":256,"പക":675,"പങ":155,"പഞ":1399,"പട":805,"പത":1182,"പണ":230,"പന":675,"പദ":488,"പയ":870,"പമ":183,"പറ":902,"പര":1220,"പള":198,"പല":318,"പഴ":198,"പാ":2340,"നൽ":278,"പി":2507,"പീ":178,"പു":2667,"പെ":3528,"പേ":973,"പൂ":626,"ഫല":133,"പ്":13660,"പോ":1036,"പൊ":466,"ണപ്":245,"ഫി":226,"ബന":358,"ഫെ":137,"ബത":225,"ഭക":205,"ഫോ":209,"ഫ്":882,"ബൈ":150,"ബോ":333,"ബി":709,"ബാ":585,"പർ":146,"ബു":258,"മം":543,"മങ":200,"ഭവ":280,"ഭയ":191,"ഭര":449,"ബ്":1417,"മക":266,"ണത്":429,"മന":817,"മധ":161,"മദ":367,"മപ":831,"യം":1280,"ഭി":475,"ഭാ":2218,"മത":1028,"മണ":590,"ഭൂ":412,"യങ":585,"മസ":322,"മഹ":381,"മല":1243,"യക":899,"മമ":375,"ഭ്":194,"മയ":404,"മര":545,"മറ":466,"മോ":397,"മൊ":134,"മ്":3221,"യമ":1442,"യന":858,"മേ":980,"മെ":621,"യപ":1289,"മൈ":144,"യത":3000,"മൂ":870,"മി":2147,"ബർ":483,"മാ":10411,"മു":3097,"മീ":726,"രം":2337,"രഞ":148,"യാ":6131,"രജ":211,"രച":317,"യസ":303,"രങ":807,"യവ":498,"രക":1280,"യറ":218,"യല":155,"യയ":1033,"യര":304,"രയ":465,"രമ":2024,"യ്":2647,"യോ":2076,"രപ":409,"യേ":524,"രന":771,"രധ":593,"യെ":576,"രദ":892,"രത":2444,"രണ":1938,"യൂ":920,"യു":6606,"റം":434,"യി":9140,"ടു":5165,"ടീ":186,"ടി":3790,"ടെ":3674,"ടേ":223,"ട്":8736,"ടോ":357,"ഡല":261,"ഡാ":142,"ടർ":374,"ഡി":791,"ണം":729,"ണങ":272,"ണക":349,"തം":452,"ഡെ":175,"ഡ്":914,"ഡോ":225,"തങ":173,"തക":671,"ണവ":186,"ണി":1469,"ഥം":170,"ണു":765,"ണൂ":306,"ണാ":642,"തട":148,"ണപ":319,"ണത":458,"ണയ":165,"ണമ":522,"തി":15367,"താ":3666,"തൃ":441,"തൂ":222,"തു":3776,"തീ":783,"ദം":233,"തവ":316,"ണ്":14451,"തമ":1061,"തയ":396,"തല":712,"തര":903,"തത":746,"തപ":458,"തന":1444,"ഥാ":1753,"ഥി":968,"ദത":182,"ഥവ":455,"ത്":29067,"ഥമ":194,"ഥയ":152,"ഥല":260,"ദക":185,"തെ":2173,"തേ":368,"തൊ":210,"തോ":646,"ധത":175,"ദു":403,"നം":1457,"ദീ":159,"ദി":1290,"തൽ":420,"ദാ":495,"ദര":150,"ദമ":198,"നത":3826,"നദ":293,"ധീ":204,"ധി":1094,"ധു":167,"ധാ":1533,"നട":732,"നങ":658,"നക":609,"നഗ":483,"ധമ":209,"ദ്":4614,"ധന":255,"ദേ":2169,"ദൈ":141,"ഹി":1080,"ഹാ":862,"ഷൻ":274,"ഹൈ":165,"ഹ്":425,"ഹോ":192,"സി":2539,"സാ":1870,"സഹ":227,"സസ":365,"സേ":205,"സെ":370,"സു":655,"സീ":147,"ഹത":306,"സൂ":414,"സ്":11156,"ഹമ":212,"ഹര":199,"സൈ":220,"സോ":402,"സർ":564,"ണാക":136,"ാബ":224,"ാഭ":193,"ാപ":1119,"ാന":5014,"ാദ":754,"ാധ":756,"ാത":1928,"ാണ":10704,"ിം":358,"ാട":2078,"ിഞ":391,"ിജ":257,"ാഹ":589,"ിച":3441,"ിങ":411,"ാസ":2241,"ാഷ":985,"ിഗ":248,"ാശ":745,"ാവ":1901,"ിക":9608,"ാഴ":297,"ാള":1241,"ാല":2916,"ാറ":1288,"ാര":3963,"ായ":9472,"ാമ":3387,"ാം":1111,"ാച":293,"ാജ":1079,"ാഗ":1285,"ാഖ":155,"ാങ":229,"ാക":2271,"ീപ":245,"ീന":303,"ീയ":989,"ും":8705,"ീത":716,"ീസ":331,"ാൽ":616,"ാർ":1852,"ുട":4050,"ാൻ":1060,"ീല":186,"ീറ":467,"ീര":476,"ീഷ":499,"ുഗ":147,"ീവ":642,"ുഖ":381,"ുക":4108,"ീഴ":170,"ിധ":718,"ിന":8501,"ിപ":1879,"ിഭ":450,"ിമ":977,"ിട":1697,"ിഡ":255,"ിണ":277,"ിത":4094,"ിദ":770,"ിസ":1623,"ിഷ":679,"ിഹ":196,"ീട":157,"ിര":3940,"തങ്":171,"ിയ":9237,"ില":11613,"ിറ":959,"ീക":980,"ിഴ":585,"ിള":544,"ിശ":915,"ിവ":1776,"റെ":3559,"ലന":318,"റേ":484,"റു":1385,"ലണ":143,"ലത":808,"ലമ":351,"റ്":6646,"ലയ":2792,"ലപ":453,"റോ":534,"ലഭ":229,"റവ":704,"ലങ":375,"ററ":173,"ലക":1101,"റാ":1047,"യർ":416,"യൻ":1031,"റീ":135,"ളം":676,"റി":3562,"നം ":1340,"ലച":462,"രെ":725,"രേ":448,"രൂ":693,"റണ":147,"റത":394,"രീ":1446,"ലം":722,"രു":8550,"റയ":625,"റമ":259,"ര്":755,"രോ":818,"റബ":175,"റപ":149,"രശ":457,"രവ":1227,"റക":237,"രള":1305,"രര":176,"രി":6901,"രാ":5614,"മർ":179,"മൻ":182,"റഞ":137,"രഹ":238,"റങ":256,"രസ":985,"ളെ":751,"ളേ":165,"ളോ":309,"ള്":3452,"ഴയ":224,"ഴക":439,"ളവ":247,"ളാ":873,"റർ":523,"വം":540,"ളി":4155,"ളു":2116,"ലൂ":979,"ളത":1504,"ലൈ":193,"ലെ":5253,"ലേ":789,"ല്":3649,"ളമ":142,"ലൊ":242,"ലോ":1419,"ളര":321,"ലവ":344,"ളക":192,"ലസ":248,"രൻ":214,"ലു":2355,"ലീ":583,"ലി":2464,"ലാ":2706,"ഷക":240,"ശയ":135,"ശമ":220,"വ്":1715,"ശര":163,"വോ":132,"വെ":830,"വൈ":359,"വേ":1138,"ശന":174,"വു":2019,"വൃ":226,"ശത":359,"വാ":3225,"വി":6428,"ഷം":305,"വീ":487,"വഹ":163,"വശ":269,"വസ":973,"ശങ":339,"ശക":161,"വഴ":173,"വള":556,"വല":693,"വര":1681,"ണമാ":331,"വയ":976,"വമ":167,"ഴ്":515,"വന":931,"വണ":138,"വത":706,"ശം":401,"ഴു":589,"വട":524,"ഴി":934,"വച":214,"വക":573,"സര":276,"സവ":164,"സന":236,"ഷേ":485,"സഭ":457,"ഷ്":2365,"സമ":1136,"ഷി":1059,"ഹം":374,"സത":207,"സങ":187,"വൻ":194,"സഞ":150,"ഷാ":253,"വർ":1764,"ഷര":160,"ഷയ":312,"ശേ":646,"ഷന":151,"ഷമ":135,"ശ്":1951,"സം":2540,"ശീ":345,"ശു":222,"ശാ":1071,"ശി":810,"ദി ":217,"തൽ ":385,"ഷണ":462,"ശൂ":214,"ഷത":605,"ശസ":408,"ഇസ്":226,"ഇന്":1364,"ഇദ്":200,"ഇവി":189,"ആസ്":173,"ഇത്":627,"ഇതി":553,"ആണ്":627,"ആദ്":407,"ആയി":224,"അവസ":151,"അല്":193,"്സ്":542,"്സി":291,"്ഷ്":138,"്ലെ":193,"്ലേ":201,"്ലോ":274,"്റർ":497,"്ളി":382,"്ളു":150,"്റേ":221,"്റെ":3192,"്ലയ":1398,"്റ്":2584,"്ലി":325,"്ലീ":428,"്ലാ":703,"്ളത":321,"്ലൂ":138,"്രേ":194,"്ററ":147,"്രോ":492,"്റവ":566,"്റു":446,"്യൻ":654,"്റാ":444,"്റി":828,"്രപ":199,"്രന":361,"്യേ":240,"്രധ":583,"്രദ":820,"്രത":1256,"്യൂ":574,"്രര":148,"്രയ":207,"്രമ":1061,"്യോ":256,"്രഹ":203,"്രസ":612,"്രവ":914,"്രശ":364,"്രു":156,"്രി":1522,"്രീ":856,"്രാ":2390,"്ഷര":156,"്ഷേ":452,"്ഷി":552,"്ഷത":297,"്ഷണ":214,"്ശേ":205,"്വര":172,"്വയ":160,"്വീ":179,"്വാ":520,"്ളോ":174,"്വത":242,"ഇംഗ":366,"അഥവ":448,"അതി":433,"അത്":146,"അനു":271,"അന്":287,"അധി":169,"അദ്":338,"അഭി":173,"അറബ":149,"അറി":618,"അമേ":219,"്ങന":152,"്കൽ":212,"്കൻ":384,"്ചേ":162,"്ചി":1394,"്ചാ":1705,"്ചു":944,"്ങ്":141,"്ങാ":169,"്ങി":542,"്ചത":372,"്ങള":3056,"്ങൾ":1597,"അക്":233,"്ച്":847,"്ടത":365,"്ടണ":266,"്ടറ":139,"്ടയ":169,"്ഞാ":395,"്ടം":148,"്കട":170,"്കം":167,"്കൊ":183,"്കോ":591,"്ക്":1983,"്കപ":570,"അടി":235,"്കയ":184,"്കള":257,"്കര":378,"്കറ":207,"്കി":2121,"്കാ":2470,"്കൃ":134,"്കൂ":155,"്കു":5270,"്കേ":378,"്കെ":229,"്നവ":135,"അംഗ":150,"്ധ്":347,"്നറ":289,"്നത":3020,"്ധി":284,"്ധാ":165,"്ന്":1214,"്നെ":265,"്നാ":1196,"്നൂ":143,"്നു":4523,"്നീ":591,"്നി":970,"്പറ":240,"്പര":163,"്പന":256,"്പത":250,"്പ്":921,"്പോ":426,"്പെ":2946,"്പൂ":178,"്പു":822,"്പി":1252,"്പാ":674,"്യങ":413,"്യക":572,"്മാ":586,"്രം":725,"്മി":445,"്മദ":157,"്യം":593,"്യസ":193,"്രങ":393,"്യവ":286,"്രക":715,"്യു":1251,"്റം":197,"്യാ":1524,"്രജ":175,"്യന":356,"്യത":846,"്യര":146,"്യമ":802,"്യയ":869,"്യപ":213,"്ടു":1302,"്ടി":1743,"്ടാ":1065,"്ട്":2062,"്ടോ":261,"്ടെ":178,"്ഡല":230,"്ടർ":257,"്തം":182,"്തക":337,"്ഥം":135,"്ണാ":141,"്ണൂ":295,"്തത":221,"്തപ":248,"്തന":539,"്തമ":327,"്തര":440,"്തവ":158,"്താ":1357,"്തി":9289,"്തീ":242,"്തു":1834,"്തൂ":145,"്തെ":1346,"്തേ":231,"്തോ":380,"്ഥമ":153,"്ത്":4115,"്ഥല":260,"്ഥാ":1534,"്ഥി":931,"്ദി":192,"്ദേ":658,"്ദു":211,"്ധത":135,"്ധമ":162,"്ദ്":530,"ോളി":145,"ോമീ":259,"ോഫ്":143,"ോസ്":247,"ോഴി":212,"ഉൾപ":337,"ww ":169,"www":169,"ൊരു":147,"ോൾ ":349,"ോൺ ":165,"ോഗി":791,"ോഗ്":169,"ൊല്":164,"ോകത":185,"ൊള്":135,"ോക്":696,"ോട്":908,"ോഡ്":151,"ോപ്":223,"ോത്":164,"ോദ്":147,"്ച ":729,"്ട ":817,"്ത ":630,"ഉള്":169,"്ന ":5898,"us ":153,"്ര ":614,"്യ ":889,"ൊട്":150,"േർന":202,"ൊതു":218,"umb":269,"ൊണ്":419,"്ള ":2181,"്ല ":256,"ഉപയ":669,"ൊന്":268,"ൈദ്":158,"ഉണ്":335,"ഉത്":318,"ൈറ്":167,"ttp":234,"tp ":236,"tio":259,"thu":262,"്ക ":209,"ter":154,"the":302,"കം ":627,"എസ്":144,"ഏകദ":134,"എഴു":292,"ഏറ്":547,"എന്":4356,"എല്":166,"കൻ ":521,"�":641,"ഗ് ":228,"കങ്":196,"കൽ ":221,"കൾ ":1360,"ഓഫ്":164,"ക് ":1892,"ഒന്":342,"കര ":204,"കെ ":257,"കേ ":146,"കി ":452,"ഗം ":252,"ഒരു":3867,"കു ":269},"n_words":[1303092,1392078,987774],"name":"ml"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/mr b/nlp_resource_data/langdetect/profiles/mr

new file mode 100755 (executable)

index 0000000..19a1942
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/mr
@@ -0,0 +1 @@
+{"freq":{"णां":79,"णाच":69,"थे ":276,"तका":129,"था ":217,"ोल्":72,"ोलि":66,"ोला":81,"ोर्":136,"ोबर":168,"णता":197,"तंत":167,"ोव्":196,"तो ":552,"णजे":209,"्तर":330,"ं ":115,"्त्":568,"्ते":64,"्थळ":62,"तां":107,"्ता":389,"ः ":139,"्ती":202,"ताक":90,"्ति":145,"्तु":82,"ताच":344,"्दर":73,"तात":957,"तान":226,"्थि":115,"ताम":63,"्था":569,"तार":104,"ताल":160,"तिक":153,"्थे":76,"तीं":64,"्दा":73,"а":72,"्धत":82,"तिह":109,"तीच":131,"तिस":79,"्ट्":957,"्टो":150,"्टे":199,"्टी":161,"्टा":154,"्टि":89,"इ ":181,"तसे":151,"धन ":83,"्तक":79,"दी ":549,"्यं":164,"्मन":141,"्यक":342,"दू ":113,"्मा":304,"्मि":128,"्रं":90,"्यत":102,"्मे":88,"्यप":104,"्यम":126,"्रक":379,"्यव":153,"्रज":340,"्यु":195,"्या":9729,"्धा":165,"तया":65,"्ने":69,"तरे":84,"्ना":140,"तरा":162,"तरर":91,"्पन":111,"दा ":131,"्पर":85,"तले":94,"तला":76,"्पा":80,"ण्य":949,"ا":67,"तमि":228,"्का":190,"्कृ":130,"्कर":75,"दर ":72,"णून":182,"तदा":122,"्ञा":188,"दल ":81,"तत्":79,"णार":809,"णात":73,"्गा":89,"्चि":216,"्चा":100,"णुक":137,"्चन":73,"थ ":288,"द ":645,"ध ":495,"न ":5349,"ड ":671,"थील":100,"ठ ":317,"थाप":202,"ण ":1146,"थान":236,"त ":6358,"थित":69,"धी ":120,"ज ":339,"दक्":266,"ञ ":122,"धा ":98,"ट ":1322,"ं":13502,"ः":232,"ँ":308,"आ":6891,"इ":1323,"अ":4895,"ऊ":224,"ऋ":69,"ई":607,"उ":1473,"घ ":103,"ए":2374,"ओ":357,"ऑ":510,"ऐ":63,"ग":7424,"ख":2772,"क":20151,"औ":108,"छ":206,"च":12137,"घ":1111,"ट":6169,"ञ":320,"झ":981,"ज":8381,"ठ":2153,"ड":3743,"ढ":440,"ण":6906,"त":27784,"थ":2477,"द":9254,"ध":4679,"न":18404,"प":12607,"फ":1499,"ब":5130,"्ग ":233,"भ":4247,"म":15733,"य":22975,"र":35779,"ळ":2991,"ल":18817,"व":16505,"ष":4156,"श":6670,"ह":17826,"स":18689,"ऽ":116,"ि":18218,"ा":66714,"थवा":122,"े":31183,"ॅ":601,"ू":4430,"ृ":896,"ी":21357,"च ":843,"ु":7163,"ौ":422,"्":44808,"ो":8505,"ै":961,"ॉ":916,"०":1926,"१":3312,"्क ":91,"६":758,"७":829,"८":1020,"९":2445,"२":1611,"३":695,"४":690,"५":705,"क ":4288,"ग ":912,"ख ":378,"त्स":89,"त्व":365,"त्प":68,"त्र":2148,"त्य":1716,"त्म":72,"त्त":815,"त्न":66,"ம":77,"த":78,"ா":90,"ி":93,"ர":79,"तून":246,"க":89,"तुर":76,"ॉर्":122,"்":211,"तीत":68,"तीन":98,"तीय":438,"तील":2444,"तेल":85,"ई ":267,"ै ":165,"्न ":140," ख":774," ग":1923," औ":107,"ोजी":152," क":6366," ओ":265," ऑ":506," ट":487," ज":3863," झ":541," च":2075," छ":154," घ":462," इ":1199," आ":6783," अ":4797," ए":2269," ऊ":68," उ":1326," ई":99,"दार":306,"दिग":99,"दान":93,"दाच":70,"दिल":153,"दिव":143,"्फ ":77,"दिर":79,"े ":16291,"दीच":62,"नंत":140,"दुर":84,"दुस":164,"ोठे":238,"्म ":214,"ू ":729,"्य ":1111,"्र ":868,"ि ":987,"नी ":1364,"ोणा":90,"ी ":12189,"ोत्":125,"ोतो":104,"ोते":752,"ोती":123,"ोता":310,"ु ":263,"ा ":17571,"ोधन":72," ८":95," ९":93," ६":115," ७":115," ४":132," ५":115," २":1066," ३":209," ०":70," १":2735,"्व ":312," प":6615," फ":822," न":2997," म":6902," य":3998,"्ष ":149," ब":2263," भ":2543," ठ":228," ड":419,"ह ":265," द":3178," ध":461," त":3644," थ":112,"ोपा":104," ह":7628," स":7738," ल":2032," र":3046," श":2241," व":5406,"ने ":992,"्स ":322,"स ":1697,"ष ":215,"थ्व":73,"श ":822,"व ":2502,"्च ":156,"दरम":106,"ळ ":724,"्ट ":345,"ल ":4586,"्ञ ":122,"दर्":246,"ना ":828,"्ड ":88,"र ":6250,"य ":2749,"्ठ ":232,"म ":1312,"्ण ":136,"ोका":76,"ोकस":391,"भ ":104,"ब ":134,"्थ ":178,"फ ":180,"्त ":411,"्ध ":328,"प ":291,"्द ":119,"टना":76,"डू ":84,"डी ":137,"डा ":115,"ठे ":245,"ठी ":788,"ञान":146,"टोब":135,"ट्य":132,"ट्र":1051,"ट्ट":87,"टेड":67,"टेक":65,"तः ":64,"डे ":134,"टां":121,"टें":125,"टात":71,"टार":107,"टिक":111,"ठ्य":96,"णी ":301,"णि ":782,"णा ":114,"्ह्":230,"्सि":71,"्हण":631,"्हा":235,"्हि":103,"्हे":192,"्स्":66,"ठिक":87,"डणु":118,"्लि":183,"्ली":63,"्ला":198,"्ले":93,"्रद":304,"्रथ":62,"्रत":188,"्रण":89,"्यू":155,"्रप":582,"्ये":1183,"्रम":557,"्यो":94,"्रल":83,"्रव":174,"्रश":146,"्रह":141,"्रस":352,"्रा":1606,"्रु":153,"्रि":802,"्लं":83,"्री":882,"्रे":549,"्रो":151,"्र्":109,"्षण":152,"्शि":77,"्षे":198,"्षि":311,"्षा":344,"्षी":88,"्वत":172,"्वज":70,"्वर":132,"्वी":309,"्वे":282,"्वि":88,"्वा":1006,"ड्य":63,"ता ":928,"ती ":831,"ते ":2330,"णे ":394,"डाच":85,"डात":62,"डिय":72,"डिस":128,"डील":75,"तर ":662,"डून":170,"जे ":259,"जा ":105,"जी ":439,"चीन":184,"चित":727,"चार":281,"चाल":111,"चिन":184,"चिम":207,"जगा":147,"च्य":2911,"च्च":223,"जन्":279,"टक ":89,"जनत":86,"जधा":220,"टन ":79,"जाग":68,"जां":62,"जिल":278,"जास":142,"जार":66,"जान":157,"जात":654,"जाण":139,"जरा":68,"जर्":134,"जवळ":124,"जोड":73,"ज्य":1261,"ज्ञ":320,"जीव":120,"जुन":111,"जुल":121,"जून":124,"टर ":126,"झाल":422,"टी ":174,"टा ":105,"ठा ":104,"ंघ":431,"ंख":180,"ंग":1699,"ंक":394,"ंड":876,"केत":187,"ंट":262,"ंज":187,"ंच":1704,"केच":132,"केट":130,"ँड":81,"ंश":217,"ंस":555,"ंव":461,"् ":692,"ंध":318,"ंद":1200,"ंन":966,"ंथ":109,"ंत":1644,"ंम":356,"केल":701,"ंप":555,"ंभ":92,"ंब":953,"ो ":900,"ँग":72,"आण":837,"आढ":90,"इं":419,"कृत":201,"अस":1528,"अश":181,"आग":71,"अव":109,"आक":127,"अल":143,"अर":282,"अभ":461,"अम":428,"अप":77,"अध":195,"अन":311,"अथ":144,"इत":287,"आश":86,"आह":3959,"आल":245,"आय":156,"आर":178,"आप":132,"आफ":117,"कृष":91,"आध":103,"अं":259,"आं":194,"कें":67,"अत":143,"अण":68,"अक":80,"उर":65,"उप":257,"उल":75,"ऊन":63,"ऊर":66,"इस":98,"उं":94,"खले":96,"उच":118,"उत":389,"उन":70,"उद":171,"कोण":88,"कोल":77,"एक":1893,"एख":65,"एप":100,"क्क":92,"क्ट":201,"क्त":320,"क्य":169,"क्ष":1291,"क्र":523,"क्स":174,"ऑस":91,"ऑफ":67,"ऑक":141,"ऑग":114,"गर":430,"गल":148,"गळ":139,"गव":115,"खे":276,"गप":74,"गम":74,"ख्":538,"गट":66,"खा":505,"खि":83,"गड":87,"खी":84,"गण":286,"गत":154,"खर":91,"क्":2945,"खल":175,"के":1566,"खन":94,"कॅ":149,"को":604,"कॉ":161,"कि":847,"की":630,"गं":74,"का":3922,"कृ":301,"कु":375,"कू":79,"कस":544,"कव":162,"कश":95,"कल":330,"खक":117,"कम":167,"कर":1579,"कप":84,"कथ":172,"कन":175,"कड":224,"खं":163,"कण":68,"कत":150,"कक":66,"कं":129,"ओळ":135,"ची":1602,"चि":1243,"चा":1991,"चे":2243,"चौ":88,"च्":3182,"० ":717,"जग":235,"जक":120,"चन":187,"चर":81,"चल":81,"चव":64,"घा":367,"घे":138,"चं":156,"घट":138,"गा":1549,"गस":133,"घड":69,"गी":328,"गि":172,"गू":80,"गु":294,"गो":373,"ग्":1223,"गे":325,"घर":90,"टन":228,"ञा":188,"टा":652,"टल":177,"टर":206,"खाद":93,"४ ":416,"झा":550,"झि":70,"टच":62,"३ ":377,"टक":258,"जो":179,"जे":499,"जू":189,"जी":702,"जु":275,"जा":1637,"जि":541,"२ ":421,"ज्":1646,"जन":601,"खान":75,"जध":220,"खाल":87,"जस":63,"जव":226,"१ ":522,"जर":265,"जल":91,"जय":81,"जम":98,"डच":80,"ठा":219,"ठि":103,"ठव":67,"५ ":430,"टे":445,"ट्":1360,"टो":293,"टी":396,"टि":297,"डा":413,"डि":310,"डी":307,"डल":139,"डळ":73,"डव":71,"६ ":435,"ठ्":106,"डम":84,"डर":65,"ठी":879,"डत":76,"डण":186,"डन":65,"ठे":307,"तं":195,"तः":94,"ढा":78,"णज":221,"णक":97,"७ ":492,"ढळ":87,"ड्":156,"डो":138,"डॉ":73,"डे":278,"डू":298,"डु":64,"णि":910,"णी":405,"णु":223,"णू":264,"णा":1365,"तक":277,"८ ":406,"णप":91,"णत":272,"९ ":481,"तव":72,"तस":223,"ति":798,"ता":3296,"तू":362,"तु":325,"ती":4269,"दं":76,"तद":128,"तत":106,"तप":146,"तन":88,"णे":550,"तम":332,"ण्":982,"तय":70,"तल":255,"तळ":94,"तर":1232,"थव":138,"था":849,"थी":145,"थि":153,"ते":2652,"तो":665,"त्":5456,"खेळ":198,"थळ":77,"दक":326,"दृ":82,"धत":108,"दू":226,"दु":367,"नं":194,"दी":767,"दि":707,"दा":1004,"थे":411,"दन":81,"दव":72,"दल":156,"दर":524,"थ्":121,"धा":812,"नच":194,"नत":190,"नद":208,"धी":622,"नड":78,"धि":310,"धू":73,"धु":110,"दो":255,"द्":1977,"धन":165,"दे":1492,"नक":199,"नग":152,"धर":260,"धल":79,"नर":109,"नल":207,"नव":362,"धे":84,"नय":86,"नम":96,"ध्":1350,"नी":1612,"पं":155,"नु":263,"गळ्":76,"ने":2153,"नस":171,"ना":2993,"नि":1438,"पक":409,"नो":289,"नै":75,"न्":1414,"पत":355,"पण":229,"पन":409,"पद":339,"पड":106,"पट":647,"पश":225,"पह":161,"पस":114,"पल":219,"पर":957,"बई":129,"पे":273,"पै":233,"पॉ":77,"पू":594,"पृ":102,"पॅ":69,"पा":1577,"पि":254,"बं":239,"पी":379,"पु":752,"बच":72,"फळ":72,"प्":3134,"पो":211,"बन":164,"फे":175,"बद":156,"बत":64,"फु":80,"फि":95,"फा":147,"फो":62,"फ्":393,"बर":861,"बह":109,"बि":244,"बा":902,"बु":115,"मं":347,"बी":172,"बे":403,"बॉ":75,"बो":197,"भर":75,"ब्":696,"मक":100,"गरा":120,"गरी":78,"मग":69,"मज":82,"मच":80,"यं":250,"भि":399,"भा":2637,"मत":229,"भू":282,"मण":132,"भे":177,"मन":439,"मध":1532,"मद":153,"भौ":79,"भो":73,"मल":215,"यक":493,"भ्":94,"मर":604,"मह":1126,"यत":203,"मृ":108,"मू":271,"मॅ":66,"ख्य":500,"मि":1010,"यट":74,"मा":2886,"मु":1239,"रं":435,"मी":463,"मो":679,"यम":237,"म्":1246,"यन":341,"मे":875,"यप":140,"मै":72,"यव":231,"रख":99,"रग":70,"रक":670,"यल":98,"यर":147,"या":14036,"रज":391,"रच":375,"यस":75,"रध":68,"रद":356,"रथ":65,"रत":1594,"यू":189,"रण":1061,"यु":675,"यी":118,"यि":121,"रय":68,"रम":767,"रभ":78,"रब":97,"यो":389,"रफ":64,"रप":674,"ये":1927,"रन":85,"गाच":88,"लय":143,"लब":72,"लन":141,"लढ":74,"गात":319,"लच":92,"लग":102,"लक":174,"र्":5914,"रो":716,"रॉ":92,"रे":1413,"गां":194,"रू":387,"लं":299,"री":2531,"रु":575,"रि":1770,"रा":6808,"रह":201,"रस":797,"रश":254,"रव":382,"रल":306,"रर":139,"ळ्":150,"ळे":264,"ळा":565,"वं":149,"ळी":258,"ळू":62,"ळव":128,"ल्":2173,"लो":767,"चन ":87,"लू":62,"ळण":107,"ळत":82,"लै":137,"ले":3399,"ळन":66,"लु":220,"ली":1530,"लि":1122,"ला":2983,"लव":72,"ळख":135,"ळक":78,"वो":76,"शब":124,"वै":95,"वे":911,"शन":181,"शव":74,"षक":96,"व्":1218,"शर":79,"वश":88,"गस्":118,"वस":571,"वू":99,"वृ":105,"शत":95,"वा":3822,"वि":1941,"वी":773,"वन":263,"शक":187,"वळ":226,"वल":308,"वर":1642,"वय":91,"वज":113,"वच":65,"वक":74,"वण":225,"वत":410,"वड":312,"वट":66,"सन":187,"षे":500,"सप":188,"सभ":348,"ष्":1679,"सम":806,"सर":1179,"सल":632,"सव":108,"सच":99,"षा":600,"षि":375,"षी":127,"सण":108,"सत":526,"सद":113,"सध":64,"शे":245,"श्":798,"शो":167,"षय":121,"सक":135,"शह":705,"सं":1892,"शी":422,"शु":72,"शा":1405,"शि":914,"षण":199,"हे":6136,"हॅ":73,"हु":162,"ही":1380,"हि":1219,"हा":3206,"ह्":871,"हो":1733,"हन":73,"से":980,"सु":643,"सी":265,"हत":249,"सू":619,"हण":691,"सि":714,"सा":2701,"सह":169,"हव":64,"हस":71,"हम":90,"स्":3801,"हय":143,"हर":898,"सै":65,"सो":298,"ात":6409,"ाथ":69,"ाढ":83,"ाण":917,"ाठ":965,"िं":1222,"ाड":364,"ाट":442,"ाब":339,"ाभ":170,"ाप":1160,"ान":3426,"ाद":925,"ाध":343,"गुर":68,"ाव":2259,"िख":96,"िक":2722,"ाळ":594,"ाल":2412,"ार":6697,"ाय":1081,"ाम":1731,"िज":229,"ाह":1051,"िच":184,"ास":2359,"ाष":1328,"ाश":369,"िग":160,"ां":4665,"ाँ":113,"ाऊ":66,"ाइ":80,"ाई":239,"ाउ":96,"ाक":602,"ाच":4155,"ाझ":79,"ाज":2078,"ाग":1046,"ाख":206,"ीण":65,"ीठ":87,"ुं":330,"गिर":64,"ीत":755,"ीप":214,"ीन":664,"ीम":234,"ीय":1104,"ील":3494,"ीर":265,"ुग":140,"ुख":527,"ीव":305,"ुक":528,"गीत":150,"ुज":89,"ीस":155,"ुट":125,"िट":204,"गाव":277,"ीं":192,"िड":79,"गाल":127,"िण":383,"ित":1967,"गाय":92,"िद":477,"िध":193,"िन":1260,"िप":347,"िब":126,"घटन":111,"िभ":130,"िम":634,"िर":694,"िय":1422,"िल":1282,"ीक":319,"िळ":425,"िश":552,"िव":770,"िस":721,"िष":365,"िह":250,"ीच":727,"ीज":85,"ेव":793,"ेश":1384,"ेळ":324,"ैक":225,"ेल":2762,"ेय":96,"ेर":672,"ेम":206,"ेब":175,"ेप":128,"ेन":599,"ैद":75,"ॉं":62,"ेह":98,"ेस":492,"ेष":313,"ॉक":64,"गेल":207,"ैव":63,"ैन":90,"ॉन":105,"ॉट":71,"ॅर":127,"ॅन":107,"ें":768,"ेक":722,"ेख":448,"ेट":491,"ेड":164,"ेत":2226,"ेण":224,"ेद":177,"ेथ":380,"ेग":137,"ेच":757,"ेज":75,"ृथ":73,"ृत":438,"ृष":221,"गोल":138,"ुत":238,"ुण":210,"ुढ":64,"ुड":67,"ूं":81,"ुन":719,"ुध":64,"ुद":440,"ुप":192,"ुर":1052,"ुम":183,"ुळ":257,"ग्ल":179,"ूक":85,"ुल":420,"ुष":125,"ुस":491,"ुव":264,"ग्र":670,"ूच":81,"ग्द":71,"ग्न":84,"ूत":126,"ून":1612,"ूप":102,"ूम":143,"ूर":779,"ूल":112,"ूळ":67,"ूह":94,"चा ":1450,"्व":2745,"्श":274,"्ष":1475,"्स":788,"्ह":1660,"्म":1315,"्य":13761,"्र":9096,"्ल":826,"ची ":1393,"ोर":443,"ोय":88,"ोल":499,"ोब":307,"ोम":148,"ोस":168,"ोष":97,"ोह":121,"ोळ":97,"ोश":68,"ोव":325,"्ण":384,"्त":2563,"्ड":225,"्ट":2277,"्ठ":301,"्झ":104,"्ञ":320,"्फ":142,"्ब":125,"्प":569,"्ध":714,"्न":549,"्थ":1118,"्द":543,"ौर":111,"्ज":211,"्च":600,"्ग":488,"्क":801,"ों":135,"ॉर":159,"ॉल":165,"ोज":285,"ोड":211,"ोट":232,"ोठ":384,"ोद":77,"ोण":200,"ोत":1519,"ोप":280,"ोध":220,"ोन":394,"ोक":697,"ोच":112,"ोग":276,"चे ":2125,"घात":147,"घाच":71,"चंद":96,"घेत":69,"०६":62,"०७":100,"०४":65,"००":610,"०१":76,"०८":75,"०९":89,"१०":143,"१५":108,"१६":128,"१७":139,"१८":321,"११":87,"१२":105,"१३":94,"१४":109,"१९":1554,"२१":65,"२०":603,"८०":89,"८६":64,"९०":106,"९६":205,"९५":154,"९८":288,"९७":177,"९२":135,"९१":161,"९४":175,"९३":129,"८८":69,"८९":125,"९९":420,"३०":86,"२३":65,"२२":68,"२५":80,"२७":76,"२६":63,"२८":64,"५०":65,"६०":76,"जन ":67,"चना":79,"के ":109,"का ":542,"ओळख":133,"की ":396,"खक ":110,"कर ":232,"कन ":100,"काह":112,"किन":92,"काम":326,"कार":1020,"काय":62,"काल":221,"काळ":222,"काश":148,"कास":81,"किम":109,"किल":76,"कीय":113,"कां":335,"काद":84,"कात":158,"काण":89,"किं":328,"काच":171,"कवी":78,"कसं":90,"कशा":76,"कसभ":296,"कला":81,"कल्":115,"गी ":87,"कर्":219,"गा ":89,"करा":112,"करू":89,"करी":67,"三":73,"करत":181,"करण":522,"कथा":145,"कडे":62,"कडू":62,"गर ":134,"खंड":160,"कंप":101,"एप्":100,"एका":149,"एखा":65,"ऑफ ":63,"ऑस्":91,"ऑगस":114,"ऑक्":139,"०९ ":83,"०८ ":72,"०७ ":92,"ऊर्":63,"११ ":69,"१२ ":81,"१३ ":67,"१० ":113,"१६ ":75,"१७ ":70,"१४ ":80,"१५ ":76,"०० ":116,"०४ ":63,"एक ":1511,"आणि":780,"आपल":113,"आफ्":114,"२००":459,"आले":120,"आर्":98,"आहे":3954,"इति":65,"इतर":127,"अथव":122,"अति":63,"१९१":93,"१९८":221,"१९९":347,"१९६":128,"१९७":149,"१९४":143,"१९५":124,"१९२":109,"१९३":101,"अधि":146,"अने":154,"अभि":344,"अभ्":70,"अमे":278,"अमि":68,"अर्":160,"आका":68,"असल":335,"असत":304,"असण":73,"अशा":75,"अशी":64,"असे":343,"असू":247,"असा":90,"इंड":62,"इंग":258,"आढळ":87,"००९":79,"००७":86,"் ":95,"उच्":113,"उद्":130,"३० ":73,"उत्":376,"१९ ":73,"२७ ":71,"ऊन ":62,"२८ ":62,"२५ ":73,"इस्":65,"२० ":116,"ेशा":514,"ेशि":132," ओळ":134," कं":103,"ेष्":213," खं":102," कथ":124," कम":84," कर":956,"ैकी":208," कल":112," कव":97," कि":683," का":1385," कृ":78,"ेवर":160," कु":249," के":933," कॅ":146," को":333," कॉ":122," क्":525,"ेवा":214," एक":1893," एख":65," एप":100," ऑक":140," ऑग":114," ऑफ":67,"ेस्":75," ऑस":91," चौ":88," च्":195," जग":208," ची":128," चि":913," चा":258," चे":120," जर":148," जम":72," १ ":124," जन":432," ज्":461," २ ":80," जि":352," जा":1062," जू":107," जु":247," जी":146," जे":116," जो":163," ३ ":78," झा":447," खे":215," गण":130," गट":64," खा":206," घर":66," गो":192," ग्":328," गे":212," गु":208," गा":404," चं":92," घे":131," घा":63,"ंट ":79," अं":254,"ेथे":251,"ेथी":97," अप":73,"ेते":395," अन":300," अध":194," अथ":143," आक":124,"ेता":130," अल":139,"ेती":391," अर":236," अम":426," अभ":454," अस":1524," अव":109," अश":180," आग":68,"ेत्":247," आढ":90," आण":837," इं":409,"ेणा":90," अक":80," आं":187,"ेण्":83," अण":68," अत":140,"ेतल":69,"६० ":62,"ंत ":465,"ंड ":204,"ेने":94,"ेन्":114,"ेनि":72,"ंग ":304,"ेब्":104,"ेळा":116," इस":89,"ेल्":528,"ेलि":98,"ेली":343," इत":279,"ेला":618," आह":3959,"ेले":852," आश":86," आर":162," आय":154," आल":243,"ंच ":100," आध":97," आप":129," आफ":117," उल":66," उप":255," उद":168," उत":384,"ेरि":321,"ेरी":87," उच":118,"ंघ ":80," वा":841,"ोत ":71," वि":1279," शत":75," वस":293," व्":476," शर":73," वे":251," वै":84," शब":119," सं":1488," शि":338," शा":307," शह":692," शे":119," श्":176," शो":63," शक":86," वर":325," वन":73,"ोन ":196," ला":353," लि":318," ले":317," लो":622," या":2852," रच":72," यु":309," यो":83," ये":604," रश":94," रा":1893," रि":79," रस":83," रे":153," रु":64," रो":269,"ंद ":67,"ोर ":89," हो":1650," ह्":509," हि":526," ही":801," हा":1542," हे":1892,"ोल ":64," सम":640," सप":137," सर":815," सद":67," सध":64," सत":74," स्":1248," हर":99," हय":132," सो":189," सा":1287," सि":219," सह":142," से":175," सी":112," सु":443," सू":90," दर":173," दृ":63," दु":235," दा":117,"ोक ":70," दि":467," दक":264," त्":1085," तो":143," ते":655," तय":65," तम":233," तर":237," तत":72," ता":327," ति":182,"ोग ":64," ती":172," तु":116," तस":163," ७ ":73," ६ ":68," डि":141," टे":67," ट्":135," टो":93," ५ ":63," ठि":88," ४ ":80," टा":72," मो":576," म्":694," मे":240," मै":64," मू":114," मृ":78," मा":1083," मि":294," रं":86," मु":622," मह":1114," मल":145," मर":508," भौ":66," मन":77," मध":589," मत":139," भू":208," भा":1980," ब्":283," बे":267," बो":108," बा":493," बि":111," मं":217," बह":106," फ्":212," बर":139," बद":64," बन":145," फे":128," फा":98," फु":65," फि":73," बच":70," प्":2295," पो":147," पि":143," पा":818," पु":600," बं":158," पे":127," पॉ":75," पृ":96," पू":277," पर":492," पश":206," पह":159," पड":69," पट":81," पद":228," पत":76," न्":105," नो":178," पक":276," नि":731," ना":905," पं":133," ने":304," नव":130," धा":110," नद":157," दे":1033," द्":193," दो":206," धर":205,"ॅरि":66,"ेंट":68,"ेंद":165,"ेंब":371,"८९ ":68," द ":78," इ ":175,"ेल ":174,"८० ":71,"ेर ":66,"ेस ":273,"ेश ":484,"ेव ":85," स ":136,"ेच्":192,"ेचे":150,"ेचा":92,"ेची":66,"ेकड":79,"ेखन":78,"ेक्":200,"ेखक":117,"ेटा":76,"ॉन ":63,"९८ ":66,"९९ ":71,"९६ ":77,"९१ ":71," व ":1440,"ृष्":213,"ृथ्":72,"ृती":81,"ृत्":188,"अंत":115,"ेत ":845,"ेन ":118,"ेट ":229,"ेड ":81,"९९६":63,"ेक ":319,"ेख ":111,"ेच ":240,"आंत":108,"ुष्":79,"ुसर":130,"ुवा":185,"ुळे":148,"ुला":65,"ुलै":113,"ुरा":109,"ुरस":95,"ुरो":111,"ुर्":256,"ुरु":156,"ुरू":78,"ुमा":140,"ुप्":76,"ुनि":101,"ुनी":63,"ुना":87,"ुद्":358,"ुत्":100,"ंच्":531,"ंचा":365,"ंची":306,"ंचे":348,"ंग्":429,"ंगा":332,"ंघट":78,"ंगी":166,"ंघा":242,"ंका":93,"ंख्":149,"ंगल":89,"ंगण":71,"ंडा":180,"ंडळ":71,"ंना":305,"ंनी":632,"ंपै":165,"ंबई":124,"ूर्":429,"ंपा":63,"ंपर":71,"ंपन":86,"ंत्":351,"ंता":140,"ंती":82,"ंतर":401,"ंद्":289,"ंदी":266,"ंदू":116,"ंदा":71,"ंदि":113,"ंदर":105,"ंबर":448,"ंमध":305,"ंस्":364,"ंसा":99,"ुस्":136,"ंवर":89,"ुसा":139,"ंवा":298,"ंशो":74,"ाळी":71,"ाळा":189,"िकन":68,"ाला":601,"ालि":128,"ाली":458,"ालु":126,"ाल्":170,"ाले":279,"ावि":118,"ावी":70,"ावा":374,"ाव्":94,"ावे":148,"ावण":64,"िका":572,"ावर":588,"ावल":82,"िको":101,"िक्":215,"िके":418,"ाषा":187,"ासत":84,"ाषे":285,"ाष्":810,"ाशि":80,"ाशी":94,"िग्":108,"ासक":92,"ाही":306,"ाहि":257,"िचा":69,"ाह्":62,"ुन ":350,"ासि":69,"ासा":570,"ासु":84,"ाहत":75,"ासू":218,"ास्":488,"िजे":63,"ींच":65,"िणे":98,"िता":173,"िती":211,"ित्":984,"िद्":408,"िना":177,"िनि":68,"िनी":230,"िने":334,"िन्":135,"िपी":179,"िभा":112,"ियन":201,"िमे":82,"ियम":83,"िमी":103,"िमा":152,"ियो":66,"िया":833,"िर्":245,"िरी":73,"िरा":70,"िले":194,"िल्":507,"िला":170,"िली":83,"िसर":109,"िष्":172,"िषय":111,"िश्":136,"िशे":76,"िवस":72,"िवा":161,"िवि":81,"ीका":68,"िवड":238,"ीचे":213,"ीची":91,"िहि":83,"िहा":133,"ीचा":113,"ून ":1516,"ीच्":262,"िसे":127,"िस्":228,"ुंब":177,"ीती":162,"ूर ":255,"ीने":102,"ीरा":80,"ीवर":113,"ुक्":227,"ुका":154,"ुख्":276,"ृत ":101,"ीला":83,"ित ":454,"िण ":169,"िन ":167,"िध ":64,"िल ":211,"िळ ":165,"ीक ":118,"ांड":163,"ांच":1386,"ांक":146,"ांग":206,"ांस":157,"ांव":124,"िम ":197,"ांम":292,"ांब":110,"ांप":219,"ांन":910,"ांद":82,"ांध":110,"ांत":540,"िय ":84,"िर ":71,"िश ":190,"िस ":90,"ागा":289,"ीत ":405,"ागर":198,"ाखा":79,"ाक्":63,"ाका":132,"ाकर":80,"िंद":422,"ाडू":108,"िंव":279,"ाठी":839,"िंग":264,"ाटक":140,"ाजा":172,"ाजी":169,"ाज्":867,"ाजध":220,"ाजव":101,"ाचा":713,"ाची":807,"ाचे":1121,"ाच्":1245,"ाजक":91,"ीन ":336,"ाने":765,"ाना":350,"ानि":81,"ानी":337,"ानु":71,"ानल":114,"ानव":85,"ुख ":209,"ाध्":84,"ापन":141,"ान्":247,"ादी":203,"ानं":83,"ानत":66,"ाधि":74,"ाधा":87,"ानच":62,"ानक":91,"ाद्":145,"ाता":198,"ाती":1964,"ातू":202,"ाणे":107,"ाण्":123,"ातल":106,"ाते":315,"ात्":194,"ातो":153,"ील ":3327,"ीय ":1038,"ातं":85,"ीर ":82,"ाणी":153,"ाणा":214,"ारी":709,"ारि":99,"ारा":1018,"ारस":180,"ार्":1216,"ारे":318,"ालय":129,"ामा":334,"ायक":105,"ाम्":126,"ायन":99,"ामी":72,"ारं":64,"ामु":161,"ाया":101,"ारच":84,"ारख":74,"ारक":98,"ारण":233,"ारत":1080,"ाबा":143,"ामध":252,"ामन":110,"ापर":215,"ापू":104,"ापी":79,"ापा":183,"ाप्":78,"ीस ":70,"ाई ":106,"होत":1397,"होण":97,"ह्म":91,"ह्य":743,"ाक ":79,"ाग ":209,"ाद ":178,"ाण ":173,"ात ":2992,"ान ":859,"ाज ":109,"ाच ":165,"ाट ":100,"ाव ":432,"िक ":1026,"ाळ ":124,"ास ":473,"ाम ":369,"ाल ":196,"ार ":1261,"ाय ":102,"सले":457,"सल्":84,"समु":164,"समा":227,"समू":87,"सम्":80,"सरा":98,"सर्":792,"सप्":140,"सभे":108,"ष्य":98,"ष्ण":139,"ष्ठ":291,"सभा":231,"ष्ट":1050,"सध्":62,"षेत":366,"हत्":160,"सेच":158,"सेन":81,"सें":161,"सीम":75,"सुन":150,"हणू":177,"सुर":133,"सुम":66,"हणत":183,"सून":460,"हणज":210,"सां":148,"साह":175,"सिद":219,"साध":112,"सार":316,"साम":328,"साय":147,"सिक":101,"साव":80,"साल":226,"साग":141,"सिं":119,"साठ":416,"सात":110," १२":89," ११":78," १४":90," १३":81," १६":113," १५":99," १८":311," १७":121," १९":1529," २०":568,"हरा":269,"हर्":68,"स्व":512,"हया":139,"स्य":64,"स्ल":87,"स्थ":759,"स्प":226,"स्ट":559,"स्त":1031,"स्क":382,"सेव":68," १०":100,"हेत":649,"हें":134,"हिल":252,"हिन":68,"हित":204,"हास":245,"हाव":84,"हाय":65,"हार":516,"हाम":139,"हान":148,"हिं":378,"हात":102,"हे ":5167,"षां":107,"सणा":80,"सतो":69,"सते":145,"सता":120,"सत्":153,"षाच":186,"षिण":264,"शेष":68,"शोध":130,"हा ":1574,"ही ":1273,"श्व":169,"श्र":226,"श्य":73,"श्च":243,"शहर":683,"से ":365,"सी ":115,"हर ":423,"संत":74,"शिय":317,"संक":81,"संग":244,"शिव":108,"संख":143,"संघ":417,"संब":106,"संप":91,"संस":355,"संश":79,"शात":171,"शाच":270,"शास":375,"शिक":213,"शां":163,"वेळ":85,"वेल":78,"वेश":67,"वेग":99,"वेद":75,"शतक":63,"वृत":90,"सा ":185,"व्ह":547,"व्य":626,"शब्":122,"वर्":390,"षा ":197,"वरा":67,"वरू":76,"वरी":175,"षी ":90,"वले":122,"वसा":93,"वसल":181,"वस्":189,"सन ":83,"वां":120,"वात":562,"वाद":207,"वान":200,"वाच":234,"वाज":104,"विक":171,"वाल":80,"वास":154,"वाप":201,"वार":452,"वाय":134,"वाम":62,"वित":108,"विन":71,"विद":197,"विध":115,"विज":139,"वाह":173,"विच":82,"विष":207,"विश":185,"वीच":70,"विल":73,"विव":106,"विम":91,"विभ":103,"वीप":110,"वडू":94,"वडण":125,"वणा":82,"वण्":85,"वती":67,"वता":101,"वना":93,"शा ":95,"षण ":107,"शी ":306,"वंश":67,"ळात":171,"ळाड":66,"षक ":73,"ळ्य":142,"शन ":88,"वे ":211,"वा ":653,"वी ":367,"ल्प":162,"ल्य":1022,"ल्ल":251,"ल्व":86,"ल्स":80,"ल्ह":320,"ळना":64,"लेश":64,"लोक":579,"वळ ":98,"लेल":1062,"लेख":355,"लिह":91,"लिश":121,"लिय":136,"लील":65,"लुक":126,"वर ":789,"लां":197,"लाग":85,"लाच":70,"लिं":80,"लाप":66,"लाव":113,"लिक":135,"लास":65,"लिन":79,"लिप":197,"वत ":73,"ळखल":119,"ळे ":200,"ळा ":98,"ळी ":148,"रेस":151,"रोज":164,"रोप":113,"र्श":238,"र्व":1106,"र्स":68,"र्ष":184,"र्म":630,"र्य":552,"र्ल":103,"र्थ":343,"र्द":88,"र्ध":121,"र्न":169,"र्फ":104,"र्ट":116,"र्ड":113,"र्ण":231,"र्त":217,"र्ग":412,"र्क":189,"र्ज":148,"र्च":115,"रीक":109,"रिय":267,"रिल":109,"रिस":109,"रीत":101,"लंड":195,"रीय":372,"रील":210,"रुन":72,"रुप":68,"रुव":148,"रून":145,"रूप":76,"रें":77,"रेक":125,"रेट":62,"रेल":183,"रसा":80,"रसि":236,"रस्":202,"ले ":1654,"लै ":119,"रां":490,"रान":176,"राठ":475,"राट":76,"रात":566,"राण":185,"राज":1474,"राच":427,"रिट":93,"रित":141,"राष":801,"रास":112,"राह":156,"राम":258,"राय":119,"रिक":669,"राव":320,"रलि":71,"ला ":1877,"ररा":122,"रम्":122,"रमा":263,"रमु":208,"रसं":149,"रशि":112,"रशा":97,"रवा":143,"रले":92,"ली ":1229,"रपट":518,"येष":193,"येण":75,"येथ":336,"येत":173,"रदे":228,"योग":175,"युक":84,"युर":149,"युन":102,"युद":90,"याच":1413,"याम":178,"यान":560,"याप":224,"यात":1507,"याद":157,"यास":414,"याव":251,"यिक":66,"याल":203,"यार":164,"रता":731,"रति":74,"रती":443,"रते":65,"रत्":99,"लय ":91,"रणा":308,"रणे":72,"रण्":354,"रचन":71,"यवस":94,"रक्":111,"रजा":126,"रजी":94,"यां":1609,"रज्":107,"रच्":105,"लन ":62,"रे ":511,"महा":885,"महत":141,"यक्":261,"रू ":76,"मले":80,"री ":1362,"मृत":96,"मूह":88,"मुळ":150,"मुल":69,"मुद":185,"र् ":478,"मुख":511,"रंप":65,"रंथ":84,"मुं":140,"रंग":175,"मिळ":390,"मित":196,"मिन":63,"मार":537,"माल":161,"मिक":114,"माव":85,"माह":78,"माण":213,"मात":178,"मान":677,"माच":73,"माज":179,"मां":206,"मोठ":381,"में":100,"मेर":300,"मेल":94,"यत्":103,"रका":358,"म्र":173,"म्य":205,"म्ह":682,"म्म":64,"रत ":129,"यू ":94,"रण ":190,"या ":6887,"भिन":289,"भाव":89,"भाष":512,"भार":1098,"भाग":467,"रम ":63,"यंत":200,"ये ":856,"मधी":410,"मध्":993,"भेव":86,"मनी":88,"भूत":65,"भूम":113,"मतद":118,"मरा":500,"रा ":574,"भ्य":77,"मा ":144,"बर्":157,"मी ":268,"यन ":212,"मे ":125,"बहु":71,"बिय":93,"बाज":80,"बाब":72,"बार":87,"बाद":98,"बां":115,"मंत":114,"मंद":83,"मंड":86,"यम ":64,"रक ":67,"बेर":65,"बेट":143,"बोध":67,"ब्र":411,"ब्द":139,"प्र":2788,"प्त":110,"प्ट":127,"भा ":239,"मण ":66,"मन ":141,"बच्":64,"यक ":87,"बद्":68,"बनव":83,"फेब":101,"फ्र":309,"पहि":119,"पश्":206,"पल्":82,"बी ":93,"बा ":68,"पर्":325,"परि":145,"परा":78,"परं":68,"�":133,"पैक":208,"पृथ":73,"पुस":66,"पूर":541,"पुण":116,"पुत":63,"पुर":350,"पीठ":86,"बंग":106,"बंध":84,"पास":320,"पाच":82,"पाण":105,"पान":76,"पात":141,"पाद":66,"पार":155,"पाल":72,"पाय":64,"पां":102,"पक्":286,"न्न":159,"न्य":422,"न्म":293,"न्ह":93,"न्स":215,"नोव":141,"पद्":103,"पदा":135,"पनी":76,"पना":113,"पणे":87,"बर ":537,"पत्":186,"पती":143,"पटा":141,"नले":90,"नवी":100,"पी ":206,"नदी":146,"ध्य":1211,"नुस":148,"नीच":95,"निस":85,"नेत":475,"नेश":67,"नेव":132,"नेक":163,"बई ":90,"निव":276,"निर":218,"निय":224,"नास":64,"नाह":82,"नाव":453,"निक":308,"नाय":79,"नाम":119,"नार":159,"नात":134,"नाट":176,"नाड":71,"नाच":211,"नाग":125,"नां":144,"द्द":122,"द्व":204,"द्र":575,"द्य":488,"द्ध":540,"धर्":201,"पट ":338,"देव":241,"देश":972,"देण":76,"दोन":169,"धात":66,"धार":222,"धिक":210,"धान":301,"धील":412,"पर ":71,"नता":99,"नगर":128,"पण ":79,"नच्":79},"n_words":[573395,652939,442285],"name":"mr"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ne b/nlp_resource_data/langdetect/profiles/ne

new file mode 100755 (executable)

index 0000000..2247748
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ne
@@ -0,0 +1 @@
+{"freq":{"ेश्":122,"ेष्":121,"दन ":955," कम":272," कर":171," कल":142," कि":313,"ेशक":101," का":2467," कृ":130," कु":353," के":400," को":746," क्":668,"ेवा":114,"ेवी":117," एक":2232," एम":84," एस":66," एव":82,"ेही":103," चौ":73," चु":87," ची":65," चि":272," चा":174," छन":436,"थी ":98," जर":83,"तको":395," छो":111," जस":287," १ ":94,"तका":126," छा":110," जन":834," ज्":137," २ ":106," जि":1031," जा":297," जु":184," जी":93," जे":102,"था ":777," छ।":1176," जो":201," ३ ":81," झा":69," गर":1726," खो":93," खे":113," गण":154," गत":96," खा":152," घर":283," गौ":72," गो":262," ग्":152,"ैति":183," गु":176," गा":503," । ":1729,"दछ ":91," चल":134,"णमा":89," चर":93," चन":91,"ोल्":69," अं":111,"ोलि":69,"ोला":99,"ोलन":95," अप":72," अन":738," अध":277," अथ":96," आक":83,"ेता":128," अल":77," अर":427," अम":148," अभ":65,"ोर्":161," अस":323," अव":627,"ेत्":416," अक":100," अग":100," अत":71," अञ":333,"ेपा":2443,"ेन्":518,"ोबा":73,"थल ":101,"ेना":77,"ंग ":129,"ेमा":93," एउ":305,"त् ":79,"थम ":65," इल":78," इन":145,"ेली":90,"ेला":163,"तो ":127," आर":92," आय":68," आन":91," आद":140," आध":94," आफ":173," उह":329,"ेर्":71," उप":369," उन":300," उद":118," उत":328,"ेरि":117," उच":89,"ेरै":136,"ंघ ":125," वा":585," वी":67," वि":1830," वु":93,"्तर":540," व्":234," शर":146,"ं ":520,"ैशा":79," वै":119," शब":115," शु":75,"्तो":97," सं":1338,"्त्":507," शि":206," शा":289,"्तै":88," शह":107,"्थल":136,"्ता":424," सक":211,"्ती":153," श्":338,"ताक":144,"्ति":514,"्तु":122,"्थ्":92,"तान":131,"्दछ":254,"्थि":236,"ताम":82,"्था":656,"तार":119,"ताल":259,"तिक":679,"्थी":106,"ताह":141,"्दो":111,"्द्":640," वट":348,"तिन":112,"तिम":220,"ँ ":497,"तिब":72,"्दा":387,"तिल":64," वर":390,"्दी":92,"्दि":109,"तिर":95,"्दू":82,"तीक":85,"तिह":119," वन":80,"्दै":77,"्दे":123,"्ट्":320," लग":144,"्टी":291,"्टा":74," लल":75,"्डल":215," लु":106," ला":569," लि":251," ले":262,"इ ":252," लो":97,"्डक":81,"्णक":65," या":166," यस":2003," यह":103," यि":70,"धन ":65," यु":243," यो":2991,"्तक":221," रा":1721," रह":505," रे":111," रू":232,"दै ":146," रु":253," रो":152,"्बन":84,"दी ":721,"्बि":65,"्मक":109,"्मन":119,"्यक":669,"्मा":313,"्मि":89,"्मी":132,"्यत":93,"्मे":80,"्यम":169,"्रक":2903,"्यव":169,"्यस":223,"्यह":70,"्रज":90,"्यु":227,"्या":1225," हो":3798,"्धा":79,"्धि":77,"्धु":64,"तया":66," हि":324," हा":314," हु":1822," हे":105,"्ने":717,"्नु":381,"तरी":81,"्ना":84,"तरा":100,"्नो":104," सम":2258," सभ":305," सब":222,"्पन":76," सन":367,"्पत":195," सर":306,"दा ":445," सद":429," सत":75,"तर्":239," स्":1058," हर":182,"्फब":94," सो":117,"्पा":1027," सा":1310," सि":468," सह":215," से":264,"्प्":73," सु":497," सू":97,"दि ":71," दल":430," दर":95," दु":249," दा":223,"ोक ":69," दि":387," दक":141," त्":468," ते":64," था":199," थि":1391," तर":320," तथ":624," ता":221,"्क्":76," ति":156,"ोग ":226,"तमा":143,"्को":75," ती":93,"्का":162,"्कृ":122,"्की":66,"ण्ड":575,"ैनि":66," ठु":72," ठू":98," डा":80,"ँको":90," टे":70,"ैभन":80,"तपु":159," ५ ":87," ठा":328,"तन्":137," ४ ":90,"७१ ":81," मो":133," मे":182,"्जा":70," मा":2199," मि":226," मी":71," मु":393,"्जी":68,"्छ।":1182," मह":623," भो":79," मन":295," भे":81," मध":395,"ैमा":131," भु":80," मण":175," भू":134," भि":128," भा":1679," मज":70," भर":94," ब्":200," भद":64," बे":149," भन":724," बै":74," बो":103,"्ञा":169," बा":593," बि":392," मं":65," बी":67," बु":143," भग":64," बह":81," बस":196,"्टर":85," फ्":93," भए":2250," बर":162,"दल ":217," बन":332," फा":77," बज":106,"तथा":612," बढ":88," प्":5303," पो":110," फल":67," पि":113," पा":1007," पु":600," पे":81," पृ":77," पू":312,"ौं ":109," पर":802," पश":191," पह":651," पछ":122," पन":637," पद":103,"णाल":87," पत":3652,"ौँ ":65," न्":72," नै":168," नि":657," ना":582," पं":74,"्चल":413," ने":2763," धे":138,"्छन":169," नय":92," नर":78,"्चि":231," नव":75,"्चा":256," धा":133,"्। ":184," नद":186," दै":64," दे":732," धन":78," द्":298," दो":108," धर":161," नग":197,"थ ":223,"दछ।":89,"द ":562,"ध ":304,"दछन":85,"न ":5506,"ड ":307," छ ":575,"ठ ":146,"थाल":88,"थिए":221,"थाप":321,"ण ":876,"थान":166,"थिय":1145,"त ":3231,"थित":211,"धी ":65,"ज ":416,"दक्":143,"ट ":1241,"धि ":82,"ं":3288,"ः":73,"ँ":2200,"आ":1341,"इ":1404,"अ":4126,"ऋ":113,"ई":1594,"उ":3478,"घ ":163,"ए":6874,"ओ":539,"ऐ":69,"ग":8111,"ख":2732,"क":41084,"औ":111,"छ":5495,"च":3790,"ङ":845,"घ":968,"ट":4637,"ञ":822,"झ":319,"ज":7684,"ठ":1551,"ङ ":292,"ड":2764,"ढ":364,"ण":2637,"त":20032,"थ":4532,"द":12121,"ध":3905,"न":29922,"प":22723,"फ":1251,"्ग ":136,"ब":5465,"भ":7912,"म":22298,"य":17139,"र":42105,"ल":18318,"व":12132,"ष":4104,"श":6654,"ह":13763,"स":20352,"ि":29268,"ा":66742,"े":14995,"थवा":92,"ू":2666,"ृ":815,"ी":11554,"च ":186,"ु":12537,"ौ":1228,"्":49626,"ो":28329,"ै":2713,"।":9235,"०":2064,"१":1785,"्क ":72,"छ ":1403,"६":623,"७":619,"८":678,"९":972,"२":2170,"३":565,"४":638,"५":791,"क ":5557,"ेल ":167,"ग ":763,"ख ":276,"ेर ":327,"त्स":67,"त्व":197,"त्प":68,"त्र":5347,"त्य":758,"त्त":449,"ए ":239,"तीय":69,"द् ":189,"ेस ":70,"उ ":287,"ई ":1278,"ेश ":318,"ै ":1366,"्न ":467,"नै ":335," ख":612," ग":3519," औ":90," क":6153," ओ":97," ऐ":65," ट":271," ज":3597," झ":164," च":1230," छ":2605," घ":502," इ":461," आ":1284," अ":3988," ए":2988," ऋ":77," उ":1784," ई":92,"दार":162,"नो ":211,"दान":85,"दीक":207,"दिर":84,"े ":3604,"दिन":243,"न् ":626,"दुई":79,"ौँम":309,"दुर":326,"ेकप":97,"्म ":374,"ू ":585,"ेका":344,"ेको":828,"्य ":1998,"ेखि":464,"्र ":942,"ेखा":65,"ि ":3525,"नी ":612,"ेटि":110,"ी ":7016,"नु ":137,"ोत्":73,"ु ":1173,"ा ":19102,"ँउ ":227," ८":76," ९":73," ६":100," ७":169," ४":169," ५":165," २":1308," ३":186," ०":277," १":1083," ।":2222,"्व ":234,"ेजी":71,"दस्":318," प":14289," फ":558," न":5247," म":5367," य":5733,"्ष ":268," ब":2996," भ":5836," ढ":70," ठ":526," ड":306,"ह ":271," द":2964," ध":656," त":2303," थ":1702," ह":6806," स":10170," ल":1764," र":4971," श":1531," व":3977,"ने ":1547,"्स ":116,"स ":1459,"ष ":373,"थ्य":109,"श ":446,"्छ ":694,"व ":550,"्च ":74,"दरम":107,"्ट ":232,"ल ":2508,"दलह":69,"नि ":671,"दर्":91,"दलक":97,"ना ":698,"ँग ":83,"र ":6666,"्ड ":120,"य ":3205,"्ठ ":89,"म ":1712,"्ण ":171,"ोखर":85," र ":1581,"ब ":130,"्थ ":94,"फ ":86,"्त ":382,"्ध ":250,"प ":228,"ो। ":2741,"्द ":129,"ृष्":136,"डल ":72,"डी ":201,"डा ":124,"ञ्ज":95,"ञ्च":499,"ृति":122,"ृत्":80,"ञान":153,"ेत ":110,"ठमा":517,"ट्र":339,"ट्ट":87,"ढी ":85,"ेक ":82,"अक्":91,"टिन":116,"अञ्":333,"टीक":104,"ुवा":235,"ूको":117,"ुला":154,"ुलो":69,"ुल्":89,"ुले":184,"ुरा":226,"ुरो":69,"ुर्":237,"ुरी":67,"ुरु":159,"ुरम":86,"ुम्":151,"ुरक":79,"ुमा":298,"णी ":110,"ँदै":80,"ुभय":124,"णा ":67,"डलक":101,"ुभए":420,"ुप्":75,"ुपा":81,"ुपम":101,"ुन्":1483,"ुनै":129,"ुने":151,"ुनु":515,"ुनि":176,"ुना":77,"ुद्":231,"्सा":87,"्से":64,"्ष्":65,"ँमा":361,"्स्":64,"ठाउ":291,"ठूल":104,"्ला":1132,"्लो":92,"्ले":68,"ंचा":97,"्रद":218,"्रथ":82,"्रत":719,"्रण":90,"्रप":72,"्रन":77,"्ये":172,"्रध":142,"्रम":518,"्यो":226,"ंग्":118,"्रय":179,"्रव":165,"्रश":109,"्रह":151,"्रस":389,"्रा":928,"ंगा":83,"्रि":4096,"्री":584,"ठुल":72,"्रै":117,"्रे":422,"्रो":266,"्षक":74,"ीहर":197,"्षर":74,"ंगठ":68,"्षे":409,"्षि":335,"्षा":151,"्षी":92,"्वक":82,"ुग्":67,"्वत":106,"्वर":184,"्वव":84,"्वम":76,"्वय":164,"्वी":175,"्वा":1398,"डौँ":365,"डौं":74,"ूला":68,"ूलो":82,"ति ":1256,"ता ":557,"णको":70,"तु ":113,"ूमि":65,"ूमा":80,"ती ":337,"ूर्":396,"ूपम":142,"तै ":115,"ते ":88,"ूद्":136,"ंमा":238,"ंस्":240,"तर ":294,"ुस्":101,"ुसा":127,"ुहु":425,"ंवि":168,"ाला":279,"ालि":277,"ाली":1573,"ाल्":175,"ाले":372,"छ। ":2152,"ावि":101,"ाशन":833,"िकृ":90,"िका":4480,"ाशक":884,"िको":207,"िक्":271,"ाषा":1077,"ासन":94,"ासद":191,"ाष्":296,"ाशि":795,"ासक":88,"ाही":67,"ाहि":278,"ाहा":83,"ुन ":257,"ासि":356,"ासी":103,"ाहर":380,"ास्":236,"जा ":203,"चलक":256,"िज्":95,"चर्":65,"िजय":135,"जी ":92,"ुर ":568,"ङ्ग":239,"ितप":65,"छन्":559,"िता":146,"िति":411,"ित्":493,"िद्":368,"िधि":130,"िधा":249,"िना":260,"िनि":160,"िनी":149,"िने":242,"िनु":78,"िन्":1022,"चीन":87,"िभि":96,"िभा":91,"ियन":73,"िम्":132,"ियम":114,"िमा":914,"ियो":1315,"िया":360,"िर्":332,"िरा":176,"िले":173,"िल्":1069,"िलो":573,"चित":254,"िला":204,"चाय":66,"चार":351,"चाल":100,"चिव":79,"चिन":100,"चिम":183,"िष्":174,"िश्":356,"िशे":71,"िवा":232,"ीका":189,"ीको":490,"िहा":169,"िस्":142,"ूल ":110,"ीति":294,"च्च":109,"जमा":145,"ीद्":108,"छोर":78,"जन्":230,"जनक":98,"जनत":72,"जधा":122,"जनव":107,"जनै":185,"जनी":264,"जना":168,"जदु":67,"ीमा":357,"ीया":65,"ुक्":205,"ुको":176,"ुका":171,"ुख्":130,"ृत ":152,"ीले":86,"ीला":83,"जस्":157,"जसल":64,"ित ":1545,"िण ":71,"जिल":993,"जिक":146,"जार":186,"जान":79,"जात":246,"जर्":77,"जयी":89,"ाँउ":242,"ाँच":67,"ाँक":137,"िन ":363,"जवा":81,"ाइन":178,"िल ":92,"जोड":89,"ाउँ":218,"ाउं":237,"ाउन":366,"ीक ":78,"ज्य":351,"ाँस":67,"ांग":75,"ज्ञ":223,"ाँल":66,"िम ":169,"िय ":243,"िर ":148,"जील":69,"जीव":121,"ाएक":100,"जुन":124,"टर ":108,"िव ":68,"िस ":86,"ाग्":194,"ागु":64,"ागि":189,"ीत ":128,"ागर":78,"ागम":84,"ाको":3606,"ाक्":157,"ाका":303,"ाओव":397,"ाडी":141,"िंह":72,"ाठम":478,"ाटन":82,"टी ":233,"टा ":741,"ाजा":131,"ाजि":146,"ाज्":244,"ाजन":456,"ाजध":126,"ाजव":83,"ीन ":153,"ाचन":103,"ाङ्":108,"ुई ":73,"ाने":97,"ाना":169,"ानि":283,"ानी":322,"ानु":103,"ानव":65,"ुख ":149,"ानस":161,"ानम":146,"ाध्":103,"ापन":228,"ान्":702,"ानो":110,"ादे":93,"ादी":738,"ादु":180,"ादन":914,"ानक":94,"ाद्":158,"ाति":178,"ाता":140,"ाती":68,"ाण्":134,"ात्":408,"ादक":75,"ीय ":427,"ाडौ":440,"ारी":407,"ारि":163,"ारा":1342,"ार्":2184,"ारे":69,"ालक":765,"ालम":256,"ालय":324,"ालद":78,"ामा":889,"ायक":72,"ाम्":197,"ायण":151,"ायत":156,"ाया":71,"ारक":244,"ारम":182,"ारण":184,"ारत":376,"ाबा":106,"िएक":373,"ामक":80,"ामय":74,"ामम":66,"ापा":186,"ाप्":232,"ुङ ":64,"ंघ":232,"ँस":83,"ौ ":65,"ंख":88,"ंग":634,"ंक":193,"ँल":78,"ंच":134,"केन":240,"ँद":154,"ँड":80,"ँम":363,"ंस":389,"ंह":86,"ंव":201,"् ":1181,"ंत":94,"ंम":241,"केह":83,"ँक":178,"ो ":20357,"ँग":166,"ँच":76,"ँउ":243,"कृत":264,"अस":341,"अव":627,"आक":84,"अल":82,"अर":461,"अभ":65,"आए":70,"अम":151,"घर ":251,"अप":74,"अध":281,"अन":771,"अथ":96,"इत":80,"ाइ ":156,"इए":81,"आय":68,"आर":95,"आफ":174,"कृष":151,"आद":141,"आध":94,"आन":112,"ाई ":1080,"अं":111,"अञ":333,"अत":71,"अक":101,"अग":107,"उम":66,"उप":377,"उह":331,"ऋत":72,"इन":366,"इर":70,"इल":137,"इस":102,"उँ":251,"उं":238,"उक":79,"उच":90,"उट":311,"उत":340,"हो।":2612,"उन":717,"उद":141,"कोट":86,"एउ":305,"एक":5494,"एम":113,"एप":66,"क्क":84,"क्त":425,"क्य":129,"क्न":67,"क्ष":1267,"क्र":426,"क्स":110,"ाँ ":310,"ए।":67,"एव":82,"एर":147,"एस":80,"गर":2258,"गल":135,"गव":76,"खे":215,"गन":66,"खो":112,"गम":263,"ख्":345,"खा":431,"गठ":95,"खि":580,"खी":82,"खु":123,"ाग ":123,"गढ":68,"गण":181,"गत":326,"गको":91,"खर":150,"क्":2722,"खम":75,"गक":127,"कै":214,"के":654,"को":14452,"कि":691,"की":474,"का":12832,"कृ":425,"कु":618,"कू":91,"कस":66,"कव":78,"कह":90,"कल":273,"कम":415,"कर":463,"कप":247,"कन":76,"कत":154,"ाङ ":105,"कक":112,"ओव":398,"चु":124,"ची":172,"चि":731,"चा":795,"छन":717,"चौ":90,"च्":216,"चो":89,"० ":345,"जक":89,"चन":324,"ङ्":397,"चर":129,"चल":599,"घा":108,"गण्":66,"ङम":69,"गते":76,"। ":7045,"गा":955,"गी":218,"गि":310,"गु":302,"गो":351,"गौ":84,"ग्":809,"गे":161,"गै":75,"घर":335,"टन":160,"ञ्":596,"ञा":169,"टा":992,"टर":235,"४ ":302,"झा":105,"३ ":263,"टक":135,"जो":225,"जे":208,"छ।":2479,"जी":387,"जु":351,"जा":939,"जि":1330,"२ ":297,"ज्":670,"जन":1499,"खान":74,"जद":90,"जध":126,"छि":255,"छा":205,"जस":334,"जव":93,"१ ":351,"जर":120,"जल":76,"गठन":79,"छो":118,"जय":160,"जम":205,"ठा":393,"डक":133,"ाथ ":102,"५ ":351,"ठन":83,"टे":203,"ठम":522,"ट्":498,"टो":148,"टी":408,"टि":258,"डा":364,"डि":270,"डी":265,"डल":256,"६ ":313,"ाद ":282,"ठु":73,"ठू":104,"ढी":114,"ढा":72,"ाण ":70,"णक":119,"७ ":328,"ड्":123,"डौ":474,"डो":86,"डे":171,"णि":132,"णी":170,"णा":196,"ात ":197,"तक":594,"८ ":352,"णम":93,"तव":80,"९ ":264,"तह":110,"ति":2878,"ता":1722,"तु":292,"ती":680,"तथ":624,"तप":208,"तन":233,"तम":226,"ण्":631,"तय":73,"तल":91,"तर":935,"थव":101,"दछ":271,"था":1584,"थी":140,"थि":1694,"ते":197,"तै":125,"तो":181,"थम":91,"त्":7154,"थल":165,"खेल":103,"दक":289,"दस":347,"दू":145,"दु":584,"दी":1175,"दि":689,"दा":996,"दन":1011,"दव":74,"दल":480,"दर":324,"दम":71,"थ्":174,"नज":74,"धा":907,"नत":94,"नद":266,"धी":98,"धि":828,"ान ":852,"धु":156,"दो":255,"दौ":70,"द्":2831,"दे":1366,"धन":183,"दै":256,"नक":650,"नग":337,"धर":218,"नर":137,"नल":223,"नव":289,"ाज ":178,"धे":169,"नन":100,"नप":96,"नब":64,"नय":150,"नम":477,"ध्":787,"पं":75,"नी":1179,"नु":1548,"ने":4740,"नस":329,"नह":97,"ना":2286,"नि":2559,"पक":159,"नो":316,"नै":540,"न्":7416,"पत":4116,"पन":1101,"पद":120,"न।":118,"पछ":277,"पट":80,"पश":213,"पह":665,"पस":130,"पल":132,"पम":272,"पर":1026,"पे":166,"पू":427,"पृ":79,"पा":5213,"पि":272,"पी":141,"पु":1182,"फल":131,"फर":70,"प्":6004,"फब":94,"पो":196,"बन":449,"फे":70,"बत":86,"फु":76,"बढ":88,"फू":67,"फा":131,"बज":120,"फ्":316,"भए":2692,"बर":285,"बल":88,"भक":78,"भग":82,"बस":232,"बह":195,"बि":521,"बा":1458,"बु":218,"मं":75,"बी":142,"भद":79,"भन":878,"बे":211,"गरम":92,"बै":272,"बो":141,"गरप":94,"भय":191,"भर":147,"गरे":321,"ब्":486,"मक":339,"गरी":134,"गरि":502,"मग":77,"मज":101,"भि":375,"भा":2323,"मत":163,"मण":318,"भू":158,"भु":153,"गर्":821,"मन":572,"भे":108,"मध":481,"मद":72,"भो":89,"मप":65,"मल":187,"यक":819,"मम":151,"भ्":133,"मय":212,"मर":128,"मस":126,"मह":673,"मृ":68,"यत":299,"मू":114,"यण":158,"ख्य":245,"यद":91,"मि":1113,"मा":10729,"ाट ":785,"मु":840,"मी":348,"रं":73,"मो":273,"यम":527,"म्":2421,"मे":530,"यन":231,"यप":89,"मै":88,"यव":183,"रख":100,"रग":116,"रक":3696,"यल":80,"यर":101,"या":2168,"रज":106,"यह":189,"रच":84,"गमा":170,"यस":2268,"रध":173,"रद":310,"रथ":102,"रत":1236,"रण":521,"यु":657,"यी":175,"यि":191,"रय":191,"रम":1296,"रभ":77,"रब":131,"यो":5061,"रप":226,"रन":174,"ये":226,"लम":462,"लय":351,"लब":81,"लद":123,"लन":243,"गाउ":130,"लच":65,"लग":235,"लक":1487,"र्":6868,"रो":703,"रै":297,"रे":1245,"गाँ":224,"रू":1254,"री":1735,"रु":1451,"रि":5597,"रा":5311,"रह":868,"रस":574,"रश":114,"रव":321,"रल":193,"वं":88,"िक ":1742,"ल्":1854,"लो":1078,"चन ":69,"ले":2083,"लु":247,"ली":1940,"लि":1013,"ला":3708,"लल":168,"लह":138,"लस":69,"शब":122,"वै":233,"शन":968,"वे":271,"षक":102,"शम":116,"व्":325,"शर":176,"वह":113,"वव":90,"वस":368,"वु":106,"वा":3817,"वि":2520,"वी":434,"वप":72,"वन":350,"वध":419,"शक":1037,"वल":144,"वर":826,"वय":169,"वम":117,"वक":170,"वत":227,"वट":383,"ाह ":84,"सन":601,"षे":417,"सप":72,"सभ":377,"सब":248,"ष्":905,"सम":2673,"सर":429,"सल":312,"सव":65,"हक":127,"सच":90,"षा":1291,"षि":415,"षी":120,"िङ ":81,"ास ":619,"सत":83,"सद":673,"शे":152,"श्":1168,"षर":77,"सग":64,"सक":2028,"शह":143,"सं":1482,"शी":150,"शु":136,"शा":630,"शि":1111,"सँ":111,"षण":89,"हे":701,"हु":2366,"ही":279,"हि":1570,"हा":1720,"िए ":165,"ाम ":560,"ह्":87,"हो":3850,"से":478,"हन":162,"सु":586,"सी":345,"हत":108,"सू":105,"सि":1173,"चल ":150,"सा":2163,"सह":275,"हज":68,"हल":85,"स्":3576,"हर":1967,"सै":149,"सो":307,"ात":1181,"ाथ":320,"ाण":404,"ाठ":517,"िं":132,"ाड":760,"ाल ":1016,"ाट":1019,"ाब":231,"ाभ":93,"ाप":949,"ान":3357,"ाद":2589,"ाध":265,"गुर":71,"िख":71,"ाव":435,"िक":7298,"ाल":5422,"ार":6852,"ाय":812,"ाम":2329,"िए":681,"िज":372,"ाह":1078,"िच":143,"िङ":184,"ास":1934,"ाष":1391,"ाश":2631,"िग":115,"ां":267,"ाँ":1104,"ाइ":698,"ाई":1298,"ाउ":964,"ाओ":403,"ाक":4316,"ाए":185,"गुन":74,"ाच":265,"ाज":1659,"ाग":1013,"ाख":288,"ाङ":244,"ाघ":70,"ार ":1305,"ुँ":124,"ीद":163,"गिर":76,"ुई":88,"ीत":508,"ीप":148,"ीन":313,"ीम":461,"ीय":520,"ीब":79,"ील":304,"ीर":227,"ुग":188,"ुख":347,"ीव":147,"गीत":100,"ुक":691,"ीह":209,"ीस":78,"ुङ":145,"ुट":214,"िट":172,"ाय ":70,"िण":163,"ित":2827,"िद":484,"गाय":88,"िध":410,"िन":2481,"िप":245,"िब":181,"िभ":260,"िम":1517,"िर":1038,"िय":2231,"िल":2254,"ीक":872,"िश":533,"िव":468,"िस":542,"िष":301,"िह":253,"ेव":427,"ेश":820,"ेल":681,"सला":171,"ेर":844,"ेम":230,"ेब":84,"ेप":2592,"सले":93,"ेन":806,"ैत":257,"ैज":79,"ेह":196,"ेस":274,"ेष":206,"ैश":81,"ैर":64,"ैल":110,"ैभ":84,"ैम":134,"ैन":181,"ेक":1516,"ेख":723,"ेट":234,"ेड":98,"ेत":801,"ेद":151,"ेग":72,"ेज":215,"समु":85,"समि":387,"समा":459,"ृथ":65,"ृत":389,"ृष":194,"गोर":127,"सरक":167,"गोल":71,"समे":69,"सम्":1386,"ुत":130,"ुण":70,"ुन":2984,"ुद":383,"ुब":76,"ुप":397,"ुर":1722,"ुम":556,"ुभ":589,"ूक":155,"ग्ल":85,"ुल":730,"ुष":88,"ुस":363,"ुव":409,"ग्र":367,"ुश":84,"ुह":538,"सर्":105,"ग्न":160,"ूद":147,"ून":67,"ूप":214,"ूम":171,"ूर":453,"ूल":388,"ूह":74,"सबै":187,"्व":2939,"्श":82,"्ष":1678,"्स":579,"्ह":81,"्भ":100,"्म":1441,"्य":5693,"्र":13669,"्ल":1547,"्।":480,"समय":115,"ष्म":69,"ष्ण":122,"ष्ठ":171,"ष्ट":483,"सभा":365,"सद्":160,"ौत":91,"ोर":513,"ोल":515,"सदस":314,"ोब":119,"ोम":148,"ोस":122,"ोह":153,"सदर":107,"ोश":104,"ोव":69,"्ण":427,"्त":3287,"्ड":711,"्ट":1245,"्ठ":192,"्ञ":223,"्फ":210,"्ब":412,"्प":1700,"्ध":702,"्न":1912,"षेत":396,"्थ":1433,"ो।":3741,"्द":2288,"ौल":71,"ौर":128,"्ज":453,"्छ":2152,"्च":1134,"्ग":579,"्ख":113,"्क":866,"सन्":376,"ोज":238,"ौं":159,"ोड":185,"ोट":228,"ौँ":401,"ोद":66,"ोत":169,"ोप":186,"ोध":78,"ोन":98,"ोख":109,"ोक":287,"ोग":453,"हत्":91,"सेन":118,"छन ":99,"सुन":123,"सिर":66,"सिम":95,"सिन":119,"सुर":172,"साह":147,"सिद":138,"सान":163,"साद":218,"साप":131,"सार":311,"साम":357,"सिक":403,"साल":247,"सिं":91,"साथ":94,"सहर":108," १८":152," १९":516," २०":1012,"०६":103,"०४":114,"०५":217,"०२":500,"०३":93,"००":262,"०१":255,"१०":116,"१५":81,"१७":82,"१८":226,"१२":66,"१४":88,"१९":555,"२०":1126," ७१":72,"हरू":762,"हरु":794,"हरि":77,"ङमा":65,"७१":90,"स्व":373,"९०":64,"छि ":189,"९६":77,"९५":70,"९८":66,"९१":112,"स्र":74,"स्य":402,"स्न":77,"स्थ":1019,"स्प":226,"स्ट":105,"स्त":727,"९९":85,"स्क":351,"२३":69,"२२":66,"२५":83,"२७":122,"२६":80,"२९":70,"२८":115,"सोज":81,"सेव":93," १०":76," ०५":134,"हेक":471,"हेन":82,"जन ":98,"हुन":2172,"हुँ":68,"हिम":124,"हिल":691,"हिन":285,"हित":161,"हास":142,"हिक":125,"हाल":183,"हार":140,"हान":72,"हाद":225,"हाड":113,"हाँ":430,"चना":64,"चन्":135,"सचि":83,"सकि":79,"सका":115,"सको":1596,"सक्":148,"कै ":184,"का ":4257,"कि ":84,"षाक":932,"की ":394,"षिण":127,"हो ":1122,"षिक":187,"ओवा":396,"शेष":71,"हा ":81,"ही ":218,"श्व":419,"श्र":376,"श्य":86,"श्च":215,"शहर":129,"सी ":177,"हर ":124,"संग":215,"संख":67,"संघ":224,"संव":197,"संस":296,"शाख":89,"शित":796,"शाह":92,"शास":187,"सँग":107,"शिक":120,"सो ":69,"शार":66,"वैश":69,"वेश":81,"वुल":80,"शर्":111,"सा ":76,"व्य":273,"शब्":114,"वर्":460,"षा ":211,"किन":120,"काम":260,"कार":2072,"काल":377,"किक":104,"काश":2575,"कास":405,"किस":96,"कुन":166,"कुर":99,"कुम":159,"ववि":79,"शको":75,"कान":97,"काठ":489,"काक":1844,"वस्":272,"वहा":72,"सन ":103,"गि ":141,"वाद":803,"वान":150,"वाच":132,"विक":476,"वाल":112,"वास":237,"कला":91,"वार":1366,"वाम":64,"वित":78,"विन":80,"विद":224,"विध":297,"विज":192,"वाह":64,"गी ":84,"विष":89,"विश":380,"विस":90,"विम":83,"विर":102,"विभ":130,"वीर":70,"कम्":158,"कमा":129,"कर्":161,"गा ":93,"करण":105,"कपु":78,"कपा":103,"वधि":404,"कता":70,"वमा":91,"गर ":180,"वयम":124,"सं ":192,"शी ":67,"गत ":162,"खि ":378,"वटा":364,"खी ":68,"खा ":132,"को ":13998,"शन ":893,"वि ":102,"एमा":79,"वा ":501,"वी ":188,"ल्न":81,"ल्प":107,"ल्य":159,"ल्ल":1131,"एवं":67,"एकी":88,"एका":367,"एकि":98,"एको":2830,"लोक":92,"लेख":158,"शक ":887,"लेक":101,"लेट":101,"लिम":70,"लुम":71,"वर ":134,"लाई":880,"लाइ":94,"लाग":378,"लाक":490,"लाम":331,"लाल":114,"लिक":155,"लाह":236,"लिङ":98,"लित":141,"लिन":92,"वन ":146,"लहर":130,"एउट":304,"ललि":72,"लमा":361,"लद्":81,"लका":156,"वं ":67,"लगा":129,"लको":1159,"लक्":65,"रैम":78,"रेस":80,"रेष":101,"रोप":82,"रोग":68,"र्श":68,"र्व":558,"र्स":92,"र्ष":409,"र्म":544,"र्य":1351,"र्थ":286,"र्द":429,"र्न":750,"र्फ":154,"र्ट":316,"र्ण":301,"र्त":237,"र्ग":285,"र्ख":68,"र्क":192,"र्ज":198,"र्छ":112,"र्च":149,"रीक":121,"रिव":148,"रिय":408,"रीम":87,"रीय":210,"रुक":171,"रुम":97,"रुप":191,"रुल":94,"रूक":141,"रूद":136,"रूप":207,"रूम":82,"रूल":87,"रेक":307,"रेज":78,"रेल":67,"रेर":97,"रेन":80,"रसा":265,"रसि":109,"रहे":467,"रहर":171,"रस्":71,"ले ":1440,"राई":127,"रान":448,"राप":77,"रात":83,"राण":115,"राख":96,"राज":1244,"राक":104,"रिन":282,"रित":70,"राष":293,"राह":72,"राम":287,"रिए":176,"राय":214,"राल":136,"रिक":3842,"लो ":852,"रला":76,"लि ":69,"ला ":807,"रयो":160,"रम्":75,"रमा":715,"रमु":241,"रवा":142,"रले":64,"ली ":1594,"रधा":161,"येक":79,"रदे":155,"रमण":71,"यो।":1126,"रबा":70,"योग":340,"रपा":108,"युक":77,"युर":75,"युन":112,"युद":113,"याङ":69,"याक":103,"याम":150,"यान":128,"याप":111,"यात":120,"याद":68,"यास":84,"यिक":111,"याल":253,"यार":124,"याय":70,"यिन":65,"रति":595,"रत्":114,"रथम":70,"लय ":258,"युव":76,"रतक":192,"रणा":86,"यसक":1478,"यसै":79,"यसल":145,"रगत":68,"यवस":100,"रक्":100,"रको":493,"रजा":74,"याँ":127,"यहा":98,"यस्":93,"लन ":125,"रे ":127,"महे":70,"महा":221,"महि":223,"महत":77,"यको":226,"यक्":275,"रू ":452,"रु ":592,"यका":195,"मले":67,"री ":1056,"मुद":90,"मुख":298,"मुक":206,"मिल":98,"मित":526,"मिन":83,"मार":349,"माल":263,"मिक":100,"रो ":251,"मास":338,"मिट":74,"माण":135,"माड":449,"माध":70,"माथ":111,"मात":198,"मान":717,"माओ":399,"माक":73,"माज":262,"रै ":187,"मोर":76,"यन्":64,"यद्":67,"मेत":64,"मेर":107,"मेल":71,"रका":2994,"यमि":88,"ऋतु":72,"म्र":98,"म्य":162,"यमा":180,"म्प":1201,"म्ब":311,"म्म":335,"रत ":106,"मको":161,"रण ":320,"या ":441,"उहा":325,"यी ":158,"भिन":116,"भित":82,"भाव":75,"यो ":3433,"भास":199,"भाष":1089,"भार":394,"भाग":184,"उपत":120,"रम ":92,"मजद":66,"ये ":122,"मध्":444,"मना":80,"मन्":256,"भूम":78,"मण्":216,"मती":69,"मयि":71,"रा ":1447,"०० ":67,"रि ":116,"ममा":144,"मा ":6826,"बर्":135,"यत ":118,"भक्":69,"मी ":185,"यन ":118,"बस्":105,"बहा":133,"बाल":77,"बाह":65,"बास":89,"बिन":70,"बाट":745,"बाग":79,"बार":117,"बिह":66,"यम ":171,"बेल":114,"यस ":334,"भनि":212,"भने":219,"भन्":420,"बैभ":80,"भयो":166,"ब्र":111,"ब्य":100,"ब्द":140,"एर ":132,"प्य":67,"प्र":5545,"प्त":252,"भा ":133,"मन ":82,"बजा":98,"मय ":66,"यक ":69,"बने":91,"बना":154,"बन्":175,"एक ":2017,"भएक":2580,"फ्न":128,"फ्र":99,"आदि":87,"पहि":540,"पहा":109,"पश्":183,"आन्":94,"पर्":460,"आफ्":129,"इएक":72,"२०२":474,"परि":275,"२०१":229,"परा":119,"२००":176,"पमा":255,"�":311,"फबा":94,"पोख":90,"उन ":91,"पृथ":65,"पुस":117,"पूर":347,"पुग":79,"पुर":744,"पित":80,"पाक":119,"पान":108,"पात":131,"पाद":966,"पार":420,"पाल":2675,"पाइ":114,"बै ":101,"अथव":91,"१९१":90,"१९९":68,"१९६":68,"अधि":111,"अनि":120,"अनु":226,"अध्":166,"अन्":365,"न्च":76,"न्छ":1928,"न्त":1150,"न्ट":89,"न्ध":228,"न्न":383,"न्थ":109,"न्द":1479,"न्य":282,"न्म":233,"न्स":147,"न्।":473,"नैत":179,"अमे":66,"अरू":140,"अर्":230,"अवस":167,"अवध":415,"पनी":71,"पना":230,"पनि":614,"पन्":133,"असो":81,"अस्":166,"पत्":3819,"पति":105,"पता":130,"पछि":199,"पा ":229,"नले":102,"नया":92,"नवा":108,"नन्":74,"०२७":93,"०२८":90,"नदी":180,"धेर":139,"नमा":333,"ध्य":728,"नुह":426,"उटा":296,"नुप":72,"नुभ":561,"नुस":135,"निस":101,"निष":82,"नीत":296,"नेप":2459,"नेत":159,"नेक":221,"फल ":80,"उनी":101,"उनु":99,"उने":183,"उनल":77,"उद्":70,"उनक":91,"नस्":108,"निर":312,"निय":235,"निम":64,"निन":391,"उत्":329,"निध":76,"नाल":106,"निक":384,"नाम":395,"नार":232,"नाथ":91,"नाक":103,"नाउ":100,"नाइ":96,"द्द":65,"द्व":1093,"द्र":692,"द्य":285,"द्ध":407,"धर्":124,"नकप":71,"इने":87,"इन्":241,"नका":130,"नको":327,"इला":81,"देख":530,"देव":249,"देश":451,"दैन":81,"१८ ":75,"दोल":117,"२७ ":96,"धित":72,"धिम":418,"धार":244,"धिक":138,"धान":515,"२८ ":93,"नता":76,"नगर":305,"न। ":91,"उंम":222},"n_words":[614665,704688,490631],"name":"ne"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/nl b/nlp_resource_data/langdetect/profiles/nl

new file mode 100755 (executable)

index 0000000..c74f0d2
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/nl
@@ -0,0 +1 @@
+{"freq":{"D":295391,"E":101171,"F":127232,"G":105530,"A":221658,"B":178586,"C":193400,"L":124657,"M":162943,"N":137196,"O":83395,"H":199830,"I":105198,"J":61744,"K":84088,"U":22509,"T":111990,"W":76478,"V":96433,"P":145276,"S":218863,"R":114676,"Y":11382,"Z":56681,"f":358313,"g":1325585,"d":2549536,"e":9521882,"b":675678,"c":885504,"a":4428747,"n":5174765,"o":3044475,"l":2174413,"m":1301830,"j":487381,"k":957853,"h":1157193,"i":4146613,"w":618312,"v":1136367,"u":1226136,"t":3691802,"s":3058116,"r":3420928,"q":14380,"p":897097,"z":331215,"y":211813,"x":48411,"²":22665,"ï":7308,"í":7570,"ë":58538,"é":54151,"è":10784,"ä":6605,"á":12476,"ü":12035,"ö":12215,"ô":9257,"ó":9442," l":108039," m":271385," n":113728," o":359305," h":429647," i":863484," j":55299," k":151715," d":1022256," e":865604," f":60480," g":362770,"р":6363," a":271750," b":268848," c":85027," z":129440," u":151127," t":287237," w":287907," v":736662," p":253369," s":272741," r":136806," J":58839," K":80686," H":197094," I":88140," N":131673," O":75874," L":120300," M":156963," B":171895," C":177462," A":209281," F":122253," G":98641," D":289943," E":96010," Z":55679," Y":11046,"и":8389,"о":8805,"н":6323," S":205129," R":108525," P":136936,"а":10448," W":74232," V":87489," U":20421,"е":6745," T":105528," é":7785,"A ":9463,"Da":14497,"Cu":5824,"Cl":7287,"Co":57452,"Cr":6279,"Ce":10546,"Ch":35435,"Ci":5986,"Du":33850,"Do":13572,"Dr":5649,"De":198026,"Di":16644,"Fe":6923,"Fa":6211,"Eu":11420,"Er":8327,"En":17423,"El":7917,"Ee":19326,"Ge":21620,"Ga":13267,"I ":14576,"Fr":73223,"Fo":9924,"Fi":11740,"C ":12680,"Au":12206,"Ar":23539,"Ba":28977,"Am":55987,"An":22709,"Al":28901,"Bu":10850,"Br":34564,"Ca":38690,"Bi":11873,"Be":44722,"Bo":26862,"Bl":5807,"Kr":9527,"Ko":15418,"Le":21847,"Li":19176,"La":31976,"Lu":12150,"Lo":28450,"Me":24563,"Mi":26609,"Ma":59234,"Mu":7133,"Mo":28002,"Ni":14152,"Ne":59633,"Na":17889,"No":27858,"Ol":9544,"Gi":5965,"Gr":28322,"Go":11925,"Gu":7667,"Ha":30031,"He":95269,"II":10036,"Hi":30742,"Ho":27037,"Hu":6810,"In":31628,"Is":5913,"It":15844,"Ja":19607,"Je":8912,"Jo":19365,"Ju":6270,"Ka":21546,"Ki":7967,"Ke":10121,"Un":6926,"Tu":6976,"Tr":10760,"Ts":9146,"To":14361,"Th":19966,"Ti":10118,"Te":13194,"Ta":10688,"V ":6513,"St":35202,"Su":9985,"Wo":6733,"Wi":21345,"Wa":15248,"We":22896,"Vo":12732,"Vi":16383,"Vl":8595,"Va":15790,"Ve":22438,"Pr":15997,"S ":6945,"Pe":17479,"Pa":35566,"Po":30054,"Pi":14013,"Oo":13070,"Op":6344,"Or":9483,"Se":15433,"Sc":16133,"Si":16384,"Sh":6081,"Sl":6946,"Sp":19561,"So":16434,"Ru":16114,"Sa":42083,"Re":17106,"Ri":17272,"Rh":6524,"Ro":32707,"Ra":12538,"b ":17772,"a ":223995,"Yo":6270,"Ze":12835,"Zi":6830,"Zu":10609,"Zw":13009,"i ":117137,"gd":23906,"ge":622849,"ga":53344,"fl":6356,"fg":6573,"ff":17069,"fi":41042,"fs":14314,"fr":11929,"fu":6506,"ft":32454,"fo":24009,"j ":85100,"he":517783,"ha":140501,"gn":22691,"gl":11647,"gi":125274,"gh":17920,"gg":9680,"gu":24589,"gt":35908,"gs":36659,"gr":72480,"go":35093,"dt":42741,"du":30903,"dw":10868,"g ":209248,"ea":48028,"eb":81350,"ec":86712,"ed":198243,"de":1265409,"dd":20989,"dg":6934,"di":281697,"dh":8510,"dk":6604,"do":126396,"ds":99321,"dr":53877,"ew":31588,"ex":17157,"eu":69138,"ev":102331,"ey":15758,"ez":66338,"fa":30179,"h ":79224,"fd":18426,"fe":38002,"eh":28491,"eg":168269,"ef":50847,"ee":952339,"el":605553,"ek":125240,"ei":156611,"ep":103855,"eo":27881,"en":2016971,"em":320296,"et":590121,"es":318427,"er":1223846,"ca":46321,"e ":2100607,"bs":5841,"br":58403,"bu":44669,"bo":58794,"bl":25851,"bi":87840,"bb":9494,"be":276525,"da":122637,"f ":90198,"cu":23421,"ct":84409,"cr":11311,"co":70369,"ck":28358,"cl":14976,"ci":82756,"ch":396490,"ce":87968,"c ":16305,"az":12546,"ay":18941,"ba":76157,"d ":427738,"at":401880,"as":187038,"ar":458993,"aw":6737,"av":29958,"au":65384,"ak":138064,"al":362702,"ai":53889,"aj":7035,"ap":71362,"am":172784,"an":1131186,"ac":99694,"ad":102907,"aa":662152,"ab":30531,"ag":85506,"ah":10878,"ae":23635,"af":47867,"nu":31030,"nt":435391,"ns":284103,"nr":14733,"no":95599,"nn":67675,"nz":11234,"ny":9083,"nw":88548,"nv":13925,"oe":163750,"of":83411,"oc":46361,"od":56125,"oa":14423,"ob":29747,"om":159575,"on":522696,"ok":52068,"ol":157894,"oi":33928,"og":70046,"oh":14048,"ot":108603,"m²":22643,"os":81239,"ov":88551,"ou":131520,"op":166634,"oo":370075,"or":483257,"r ":577062,"ow":27555,"oz":6517,"oy":5912,"pe":152921,"pg":12730,"pa":116725,"pl":132966,"po":60048,"ph":12783,"pi":48220,"lo":99408,"lm":24809,"ll":128927,"ls":119057,"lp":13038,"lv":15042,"lu":37402,"lt":126691,"ly":17100,"o ":155926,"md":19425,"ma":206926,"mb":60844,"me":492457,"mi":91034,"mm":32963,"mp":51654,"ië":52728,"mo":56705,"mt":19679,"ms":35581,"mu":25483,"my":6479,"p ":141470,"na":201890,"nb":21559,"nc":69092,"nd":473575,"ne":270200,"nf":9979,"ng":259192,"nh":15116,"ni":168878,"nk":43137,"nl":16899,"nm":6930,"jv":11516,"ju":22953,"js":16655,"jn":70736,"jo":13034,"jk":115620,"ki":53710,"kh":5862,"ke":182363,"ka":118671,"m ":155936,"kw":7713,"ks":46927,"kt":131352,"ku":22530,"ko":60158,"kr":23259,"kk":20620,"kl":29172,"km":27830,"kn":6593,"li":338485,"lh":5934,"lk":34257,"le":302236,"ld":95337,"lg":36498,"lf":20122,"la":398196,"lc":8500,"lb":22338,"n ":2543334,"hr":30422,"ht":107458,"hu":34327,"hi":99792,"hn":9649,"ho":105347,"id":125660,"ic":175476,"ib":14735,"ia":95834,"ig":142126,"if":16130,"ie":477304,"k ":193827,"ir":54494,"is":722822,"it":333187,"iu":13498,"iv":35131,"iw":8456,"ix":5720,"ij":364327,"ik":93516,"il":169468,"im":40230,"in":879621,"io":144997,"ip":26974,"je":41083,"jd":31240,"jf":10910,"iz":17029,"l ":299819,"ja":41826,"xi":7194,"z ":13752,"wi":58960,"wn":7387,"wo":161887,"ws":8197,"y ":100628,"wa":155603,"wd":6545,"we":181101,"vl":34784,"ré":7748,"vi":112804,"vr":17131,"vo":180923,"uz":15014,"ux":8237,"uw":44437,"uv":7894,"uu":52115,"ve":243007,"va":525240,"x ":20927,"ui":271759,"uk":10265,"ul":55545,"ue":25713,"ug":30034,"ur":165500,"us":143221,"ut":46184,"um":51805,"un":100952,"up":10064,"ty":46646,"tz":8930,"tu":100531,"tt":45242,"tw":33429,"tv":9249,"ub":30475,"ua":33926,"ud":45500,"uc":29841,"w ":28312,"to":174226,"tm":7826,"tl":14300,"ts":211260,"tr":144811,"tg":20068,"te":770637,"tk":6386,"tj":6074,"ti":257739,"th":76245,"v ":8380,"tb":23716,"ta":261366,"su":28639,"sv":9064,"ss":138251,"st":562933,"sy":14252,"sz":5928,"sw":6865,"sl":43368,"sk":33909,"sn":9526,"sm":15035,"sp":74738,"so":59040,"sr":6378,"sd":20593,"sc":205927,"se":425364,"sh":26990,"sg":6293,"sj":16338,"si":98258,"rz":20638,"u ":30645,"sa":44187,"sb":15979,"rr":71892,"rs":228779,"rt":187734,"ru":73560,"rv":44462,"rw":18490,"ry":14720,"rp":27084,"ro":286166,"rn":60686,"né":6017,"rm":70366,"rl":117376,"rk":74650,"ri":419079,"rh":16037,"rg":84498,"rf":11818,"re":392586,"rd":257870,"rc":25722,"rb":33548,"ra":285112,"t ":1445388,"qu":12987,"s ":1176543,"pt":24055,"pu":20157,"pp":39364,"pr":99751,"ps":14478,"zi":116688,"ze":78298,"za":31109,"zu":15798,"zo":49094,"ye":6127,"yc":6275,"ya":8170,"ys":18578,"yr":9415,"yp":6946,"yn":9021,"ym":12848,"yl":8352,"² ":22661,"éé":7636,"én":13925,"ë ":43502,"é ":8856,"一":6916," Ga":13216," Ge":21536," Fo":9883," Fr":73181," Fi":11695," Ha":29997," He":95105," Go":11879," Gr":28204," Gu":7630," Gi":5928," Hu":6795," Ho":26988," II":6497," Hi":30725," Je":8879," Ja":19556," Is":5895," It":15839," In":31514," Ka":21414," Ke":10042," Ki":7899," Jo":19317," Ju":6264," La":31862," Le":21646," Li":18941," Ko":15401," Kr":9517," Ma":59034," Mi":26529," Me":24479," Lo":28411," Lu":12123," Ne":59527," Na":17815," Ni":14127," Mo":27929," Mu":7091," Am":55975," An":22679," Al":28811," Ba":28861," Au":12111," Ar":23451," Be":44588," Bi":11811," Bl":5783," Bo":26752," Br":34502," Bu":10803," Ca":38284," Ce":10523," Ci":5893," Ch":35359," Cl":7183," Cr":6209," Co":57213," Cu":5704," Da":14480," Di":16596," De":197691," Do":13329," Du":33818," El":7895," Ee":19260," Er":8309," En":17358," Eu":11409," Fe":6899," Fa":6133," Wo":6660," Wi":21285," We":22829," Wa":15180," Zu":10598," Zw":13007," Ze":12821," Zi":6778," Yo":6266," Or":9466," Oo":13036," Op":6330," Po":29977," Pi":14002," Pe":16009," Pa":35403," No":27810," Ol":9537," Ra":12376," Ro":32636," Re":17067," Ri":17238," Rh":6521," Pr":15952," Su":9973," St":34839," Ta":10649," Th":19903," Ti":10088," Te":13125," Tr":10698," Ts":9134," To":14233," Ru":16099," Sa":42032," Sh":6028," Si":16291," Sc":16056," Se":15382," So":16378," Sp":19500," Sl":6935," Va":15750," Ve":22303," Vi":16318," Vl":8585," Vo":12506," Tu":6926," Un":6865," ja":26885," in":468125," is":385652," ka":34673," ki":8773," ke":15418," ju":21208," ha":22481," he":342958," gr":38760," go":7687," hi":19625," ho":31679," hu":11200," ni":13445," ne":8341," na":55514," mu":11657," mo":19517," ok":9997," om":21439," on":84624," of":50488," nu":5724," no":28282," le":24338," li":33990," la":36262," ku":10447," km":25898," kl":14578," kr":8283," ko":24113," me":105531," mi":19475," ma":109683," lo":6645," af":19870," aa":54537," ac":15329," an":19082," ap":11762," al":65830," au":17140," ar":47665," ba":14963," bi":41311," be":166033," bo":19486," bl":6588," bu":9669," br":10013," ca":7354," e ":8358," er":11018," et":7087," en":320059," ei":19716," el":9897," ee":469095," fe":12855," fa":15977," fo":5946," fi":16445," ge":303157," ga":6353," co":32521," ce":11923," ch":6021," ci":15694," da":51374," do":72769," dr":11539," de":740739," di":128156," du":9386," zo":26839," zu":11699," za":9390," ze":21870," zi":55419," ru":5958," sa":9420," se":22896," sc":26028," si":12277," sl":6445," sp":31376," so":18785," ra":8521," re":91305," ri":13940," ro":16553," pr":57908," s ":10712," ou":7125," ov":18421," oo":33765," op":114061," or":11383," pe":20368," pa":24621," pl":113070," po":21982," wa":118659," we":90321," wo":59080," wi":19128," va":481040," ve":76969," vo":131543," vr":12217," vi":22015," vl":9586," tw":15968," tu":14723," ui":144553," ta":9836," sy":6615," st":111138," su":9964," tr":11805," to":58623," th":14839," ti":18789," te":138599," éé":7588,"Eur":9786,"Eng":13833,"Fra":63542,"II ":7320,"Hij":21364,"Het":68660,"Her":7011,"Gri":6704,"Gra":6848,"Gro":9812,"Ind":7590,"In ":14751,"Hon":6999,"Hol":6098,"Bar":6393,"Alp":5725,"Ame":44932,"Ams":6535,"Ant":6487,"Cal":8797,"Car":7946,"Ber":10327,"Bel":16135,"Bra":7962,"Bri":11166,"Bou":5646,"De ":168413,"Dez":5967,"Dee":6161,"Chi":8065,"Cen":7181,"Cha":14656,"Cor":5822,"Com":7136,"Con":6541,"Cou":26493,"Een":16908,"Dui":29012,"Ned":43974,"Nat":6561,"New":6417,"Nor":10502,"Noo":11547,"Oly":6017,"Oos":10737,"Per":5867,"Par":11966,"Poo":9123,"Ita":15181,"Joh":7769,"Lan":8583,"Man":6806,"Mar":24247,"Mon":9837,"Mid":7429,"Wil":9135,"Wer":5760,"Wes":9343,"Vla":7545,"Ze ":6085,"Sta":16762,"Sin":5858,"Spa":11230,"Rus":11905,"Sai":9817,"Sch":12674,"San":8826,"Rij":7380,"Rom":5676,"Ver":12912,"Uni":5818,"The":11210,"Tsj":8380,"bis":7734,"bin":13224,"bij":36202,"bli":10301,"bla":6463,"boe":8360,"bon":5829,"bor":10260,"bou":11481,"bbe":6915,"ban":15026,"bal":21186,"baa":7632,"bas":9398,"bar":8019,"beh":12808,"beg":7886,"bee":12385,"bed":18175,"ber":72483,"ben":9807,"bel":14825,"bek":19147,"bev":17462,"bes":52050,"bet":13589,"bie":17101,"ca ":12312,"car":7029,"cat":5698,"ce ":27218,"bri":6013,"bro":7591,"bra":10008,"bre":7181,"bru":26538,"bur":21107,"bum":8070,"am ":47277,"ake":12657,"al ":74743,"ail":5969,"ain":18706,"air":10562,"ais":6111,"ak ":11574,"agt":10140,"agn":7735,"anu":16559,"ano":8039,"ann":16248,"ant":55265,"ans":143853,"ane":9267,"ang":46635,"ani":27973,"ank":15316,"ap ":25134,"ana":20565,"anc":16651,"and":178626,"amm":8429,"amp":20053,"ams":9681,"ami":28047,"ame":31772,"amb":6788,"ama":9230,"alv":6355,"alt":35976,"als":49851,"all":30537,"ali":53705,"alc":5747,"ald":14462,"ale":38623,"ala":12213,"alb":11148,"an ":555423,"aks":6554,"akt":91851,"abe":6263,"abi":6431,"ae ":14725,"aaf":9902,"aag":19367,"aad":5831,"aak":81870,"aan":153725,"aal":41110,"aam":28835,"aas":6793,"aar":125046,"aat":183677,"ad ":41911,"afs":8757,"age":22392,"adi":11143,"ade":19021,"ag ":18180,"ach":45489,"ace":13447,"ada":5737,"af ":10774,"act":17739,"at ":119635,"are":27820,"ard":36005,"arc":10579,"arb":9145,"ara":15993,"aro":16161,"arn":8226,"arm":7787,"arl":11777,"ark":11271,"ari":63150,"arr":46599,"ars":14646,"art":89778,"au ":9748,"asi":6239,"ase":7343,"ar ":70080,"apa":7428,"app":11428,"apr":10161,"as ":104628,"aut":12900,"avi":8222,"ave":10618,"ay ":8013,"ata":7505,"ast":27312,"ass":19856,"ato":8717,"ate":41676,"ati":71342,"ath":9399,"att":6300,"ats":113866,"atu":9691,"aug":10264,"Zwe":6862,"Zui":9741,"jec":10400,"jk ":66346,"jaa":10547,"jar":7927,"jan":13106,"jd ":7717,"je ":16212,"jde":16692,"jns":7518,"js ":7214,"jn ":50205,"jks":11628,"jke":29064,"itt":7963,"its":47572,"ity":14389,"iss":57247,"ist":78828,"ita":16110,"ite":28930,"itg":14085,"iti":26080,"ium":6105,"ivi":15930,"ive":12826,"is ":431822,"ion":45782,"ir ":7395,"isi":11995,"ish":6473,"ise":13845,"isc":89426,"isa":6882,"ire":19679,"it ":148319,"iwo":7073,"ize":7008,"kin":23405,"ki ":12463,"kee":5719,"kel":22383,"ken":67439,"ker":29057,"ke ":43164,"kt ":89985,"kse":13533,"kri":8432,"km²":22590,"kor":8329,"kon":7670,"kom":21374,"ks ":13043,"kke":14372,"kle":13907,"kla":8538,"jul":9669,"jun":10013,"jve":8311,"jst":5685,"kan":26234,"kam":11248,"kaa":47537,"ka ":11569,"ham":9052,"han":20214,"hap":31501,"hal":11321,"har":17692,"haa":16281,"had":6694,"he ":78504,"hel":11001,"hei":31175,"hee":28005,"het":305470,"her":18043,"hen":9035,"hem":8872,"hie":14941,"hin":11368,"hil":13952,"hij":15137,"his":12860,"gne":9405,"gna":8227,"gon":7132,"gsd":8056,"gro":30942,"gra":24600,"gt ":25727,"gri":8059,"gre":7606,"gst":8398,"gus":11645,"ial":9076,"ian":13694,"iat":6807,"iaa":21077,"id ":39722,"ia ":31791,"iet":21274,"ieu":11845,"iev":7556,"iel":12150,"iem":5824,"ien":50882,"ier":48258,"ies":24371,"ied":23758,"ief":9097,"iek":35699,"ig ":26250,"ict":38543,"icu":11018,"ico":6343,"ici":9264,"ich":59319,"ice":15194,"ie ":201388,"ica":19828,"idi":13237,"ide":28170,"idd":13665,"ida":12788,"il ":19641,"ijd":30969,"ije":8022,"ijf":10866,"ijk":115115,"ijn":70104,"ijs":16040,"ijv":11367,"im ":7764,"ika":50912,"igd":11510,"ige":49060,"igh":9499,"igi":9202,"igt":11964,"ign":9284,"ij ":80452,"ik ":10619,"ime":7803,"inc":33586,"ind":41153,"ina":22287,"inn":14337,"ino":8487,"int":30127,"ins":19772,"ine":33694,"ing":150853,"ini":14846,"ink":10381,"ioe":9616,"inw":77340,"ikk":7357,"ike":6423,"ila":17614,"in ":405294,"ikt":10786,"ilo":7646,"ill":44186,"ilm":12944,"ili":39663,"ild":8065,"ile":5842,"ima":5906,"io ":71745,"hol":9679,"hou":16715,"hoo":29440,"hor":9181,"hoe":5979,"hui":10622,"hts":6548,"hth":9745,"hti":8381,"hte":21726,"hre":7796,"hri":17051,"ht ":49854,"hum":5764,"ffe":6190,"ffi":6818,"feb":9777,"fen":6207,"fam":16628,"fde":6413,"eze":22604,"ezi":32218,"etb":16830,"eta":11379,"ete":33498,"eti":8945,"eth":6048,"esp":11543,"est":111009,"ess":14653,"etr":9667,"ets":6968,"ett":13669,"ew ":6999,"eve":54896,"eva":7494,"evo":21109,"evi":14484,"euw":17939,"eur":21284,"ewe":9375,"ewo":5810,"ey ":9860,"epe":6552,"er ":333095,"epa":50447,"eor":9695,"es ":93024,"ept":12657,"erk":43542,"erl":61155,"eri":98353,"erg":28574,"erh":11977,"ere":86052,"erd":117197,"era":27762,"erb":15858,"et ":468269,"esl":15451,"esi":12100,"esc":18281,"ese":16953,"erz":13202,"erv":34773,"erw":12350,"err":17024,"ert":36463,"ers":189637,"ern":29282,"erm":20928,"erp":10525,"ero":16315,"eks":13762,"ekt":9522,"en ":1373884,"ela":20546,"eld":60315,"elf":9763,"ele":68456,"eli":59882,"elg":17311,"elk":7255,"ell":29565,"elo":7411,"els":48151,"elt":76311,"emb":35339,"ema":13115,"eme":213429,"emd":11991,"emi":9436,"ep ":11271,"ene":28524,"enh":7989,"eng":11580,"enb":15906,"ena":20359,"end":77370,"enc":8130,"eno":20895,"enn":18048,"enk":9463,"enl":7298,"eni":29460,"enw":7353,"ens":81592,"ent":262064,"enr":11987,"ego":6580,"ege":50512,"egi":74199,"eho":15902,"ek ":28609,"eis":7935,"eil":21113,"ein":30892,"eid":39967,"eig":6260,"el ":169543,"eiz":7508,"eit":10871,"eke":48960,"em ":17335,"gis":18136,"gin":20458,"gio":63608,"gie":9618,"gge":6516,"gep":6253,"gen":119618,"geo":6626,"get":5657,"ger":43727,"ges":41787,"gev":26798,"gew":8731,"gez":31827,"gee":9960,"ged":10354,"geb":49406,"geh":10722,"gem":117467,"gel":56245,"gek":5735,"gde":10792,"ge ":59115,"gd ":7871,"gaa":11487,"gan":14683,"ft ":25649,"for":15464,"fic":10500,"fie":5840,"fil":14418,"da ":11415,"de ":684651,"daa":13978,"dag":8773,"dae":7793,"dat":38507,"dan":11068,"dam":12288,"dde":16386,"cti":18928,"cte":10141,"cus":11312,"clu":9316,"co ":6846,"con":15186,"com":22591,"ct ":40340,"cea":6367,"ch ":51967,"ces":6110,"cen":17525,"cem":10124,"cha":49925,"cia":10705,"ck ":10471,"cie":34119,"che":90373,"chi":44310,"cho":18169,"cht":101737,"chr":21384,"cit":10702,"ed ":30096,"eba":5684,"ebe":6342,"ebi":14653,"ebo":12775,"ebr":30709,"eau":7092,"ei ":13562,"eft":21131,"eek":11355,"een":586471,"eel":149087,"eem":7130,"eef":23903,"eed":19633,"ees":22270,"eer":87604,"eeu":10006,"eet":5732,"edi":16531,"ede":93412,"eda":9229,"eg ":11274,"eds":11135,"edo":6698,"edr":18455,"ech":31918,"eci":6720,"ece":11419,"ee ":19989,"ef ":10385,"ect":16877,"eco":6380,"dwe":6219,"dor":15099,"doo":63254,"don":8072,"dom":7397,"ds ":31717,"dië":7942,"doc":6254,"doe":8162,"dst":15322,"dui":6443,"duc":8131,"dri":17764,"dra":19458,"dt ":39781,"dro":6043,"dsc":11185,"dse":26450,"dic":13497,"dia":8442,"der":192741,"des":13390,"dez":10434,"dec":12054,"dee":111320,"del":33701,"den":135340,"dep":43953,"di ":6337,"do ":6228,"din":21044,"dio":7262,"dis":79203,"dit":10793,"die":85548,"dig":22881,"rha":6318,"rga":14305,"ri ":27658,"rgi":6243,"rge":20743,"ret":8087,"res":26172,"rev":8098,"rdt":29462,"rg ":25954,"rea":7679,"ree":20672,"rec":16838,"red":11064,"rei":15261,"reg":76191,"ren":91873,"rek":7399,"rel":18716,"rda":12870,"rdo":6848,"rdi":16416,"rde":80930,"re ":58121,"rch":10326,"rd ":94954,"ras":7988,"rat":17581,"rbi":10377,"rbe":8163,"rag":7326,"ran":87841,"ram":11943,"ral":17891,"raa":43729,"rad":10061,"rac":17496,"rs ":114967,"rpe":6605,"ros":6652,"rot":12676,"rom":9424,"ron":74395,"roo":21289,"rop":13868,"rou":11473,"rov":34172,"rod":10405,"roc":11802,"rol":11209,"roe":27621,"rog":7078,"rno":7457,"rp ":10130,"rna":17955,"rne":14489,"rni":6410,"rmo":6164,"ro ":8067,"rma":25601,"rme":14051,"rlo":10187,"rli":43352,"rle":10232,"rla":47955,"rn ":7785,"rko":8091,"rke":15492,"rm ":8899,"rip":6607,"rio":7162,"rit":17759,"ris":26171,"riv":7770,"rig":10543,"rij":60217,"ril":12296,"rik":53920,"rin":39022,"ria":15453,"ric":60714,"rid":8288,"rie":42534,"rk ":23012,"rwe":7689,"rwi":6024,"rui":23623,"rug":6639,"rum":7291,"rus":7171,"rva":8120,"rve":6455,"rvl":18128,"rvo":6709,"ry ":9896,"rsi":7125,"rso":10142,"rsp":13174,"rsc":16580,"rse":16920,"rta":7455,"rst":29072,"rto":9036,"rte":64198,"rth":8072,"rti":18904,"rua":9937,"rts":6565,"rt ":52830,"rro":41654,"rri":6371,"rre":14498,"sam":7637,"sat":5699,"shi":8681,"sje":10417,"sie":18702,"sit":8909,"sis":17171,"sin":18433,"sig":6448,"sdi":9330,"se ":245903,"sch":188901,"sco":5845,"ser":23257,"sh ":6367,"sei":5692,"see":12427,"sep":12185,"sen":46205,"sem":40984,"sel":17905,"spo":12356,"spr":14426,"spe":34342,"spi":6360,"son":16537,"soo":14382,"st ":68174,"sla":28873,"ski":9665,"ske":6724,"sme":5826,"sse":83731,"ssa":7327,"sso":6523,"ssi":25931,"ste":153650,"sta":134037,"sto":24680,"sti":36773,"stu":49771,"str":76216,"sus":9037,"tai":6439,"tal":42563,"taa":79421,"tad":33209,"tba":18995,"tat":25594,"tar":9576,"tan":24920,"te ":245946,"ta ":14225,"pe ":6304,"par":65094,"paa":13416,"pan":11481,"pge":11800,"pec":7014,"pen":27399,"per":54393,"pes":8975,"pee":13839,"pel":26450,"pla":118636,"ple":6523,"pij":6479,"pio":11106,"pis":8712,"por":10136,"poo":7190,"pon":6968,"pol":16543,"ppe":30477,"pub":6405,"pte":13585,"pri":21984,"pre":14246,"pro":59321,"que":5834,"ra ":17662,"ngr":7754,"ngt":6149,"ngs":24028,"ni ":14308,"nge":85205,"nga":9441,"nha":5738,"nel":9354,"nen":38308,"nem":7134,"ner":92669,"net":10080,"nes":17246,"ng ":100478,"nee":12320,"nci":34445,"nce":13701,"nch":8196,"ne ":58920,"nbu":5898,"ndr":9897,"nds":53318,"ndo":10563,"ndi":65506,"nde":170234,"nda":12836,"nal":19401,"nam":19197,"nan":5995,"nad":6211,"naa":50268,"nbe":7893,"nd ":125333,"nat":22785,"na ":37268,"nwo":81256,"ny ":6026,"num":6116,"nua":11671,"nty":25239,"ntw":10449,"nto":19506,"nts":8743,"ntr":16443,"nti":22690,"nta":19603,"nte":152583,"nsu":9180,"nst":31383,"nse":133099,"nsc":14189,"nri":8599,"nt ":140657,"ns ":66627,"noe":11549,"noo":19104,"nom":9568,"nov":12875,"nne":42144,"nna":5969,"nni":9362,"nië":12014,"nla":8754,"no ":10798,"nke":14949,"nkr":6212,"nig":15471,"nie":28378,"nic":7027,"nia":7786,"nk ":5831,"niv":5902,"nis":35078,"nin":21782,"ogr":8544,"ogi":8951,"oge":15686,"oiw":7018,"ois":5765,"oir":6590,"ok ":26496,"ol ":12318,"och":15236,"ock":8061,"ode":16327,"ods":9122,"of ":51317,"oek":14333,"oel":8698,"oem":15572,"oeg":10025,"oer":17651,"oet":21912,"oen":24849,"oep":14118,"odu":7865,"oed":12901,"og ":18782,"off":7132,"ofd":11478,"oal":5868,"od ":7190,"obe":14031,"own":6969,"oud":23948,"oth":6889,"ote":16152,"ott":8284,"ots":8865,"oto":6753,"ost":29293,"ota":7234,"ose":6693,"ovi":34676,"ouw":19904,"ove":41666,"oun":29232,"ous":5891,"our":16621,"out":8877,"opp":20304,"ope":17398,"opg":12331,"os ":12938,"oon":17563,"ool":15370,"oom":10898,"ook":25159,"ooi":6071,"oof":13207,"oog":15636,"ood":7845,"or ":123656,"oot":15244,"oos":12524,"oor":222103,"oop":6797,"ork":12244,"orl":12130,"orm":37127,"orn":13425,"orp":13450,"ord":94351,"ore":21033,"org":20784,"ori":18821,"ou ":6777,"ort":47654,"ors":16087,"ot ":44568,"m² ":22639,"ora":14333,"ola":5947,"old":6867,"on ":90823,"oli":23091,"oll":13695,"olk":19645,"ole":13281,"olg":11475,"ols":13292,"olo":16700,"om ":28845,"okt":10174,"ona":29547,"ond":144852,"one":97393,"ong":27845,"oni":32623,"onn":9985,"ono":10092,"ons":14560,"ont":39227,"oma":10713,"ome":31512,"omb":6304,"omi":12146,"omm":12297,"omp":15805,"omt":12605,"oms":11302,"op ":79508,"la ":16552,"le ":71663,"lf ":5640,"lde":29685,"laa":121868,"lac":19544,"lad":5854,"lag":14890,"lai":7568,"lak":20347,"lan":133760,"lar":5948,"lat":13164,"las":13782,"ld ":39055,"lbu":9341,"kun":16374,"kwa":5842,"kte":24985,"kst":6852,"kto":10642,"lpe":6305,"ls ":61450,"lon":9738,"lom":7994,"loo":10759,"lor":5860,"loe":7778,"log":16741,"los":5784,"lië":7455,"lub":9599,"lst":25217,"lte":6994,"lse":20318,"lt ":100197,"lge":14115,"lgi":13560,"li ":13675,"lev":15162,"les":15192,"let":7945,"ler":20832,"lem":10140,"len":59840,"lei":20020,"leg":19493,"lee":17156,"led":8570,"lec":6605,"lo ":6322,"lla":18593,"lle":62567,"lli":22272,"llo":6485,"lks":5894,"lki":11362,"lm ":10134,"ll ":9099,"lit":19124,"lis":19817,"lip":6344,"lin":41169,"lic":12334,"lia":22468,"lij":95486,"lig":34482,"lie":40870,"ma ":13279,"maa":102890,"mar":12201,"mal":14307,"man":28303,"mat":12974,"md ":13491,"mbe":34190,"me ":20464,"med":7669,"mee":138208,"met":69732,"mes":6603,"mer":72670,"mel":11918,"men":146532,"mei":13406,"lve":5791,"lym":7416,"mpi":18269,"mpe":6753,"mpo":7375,"ms ":12121,"moe":5824,"mon":14109,"mt ":13658,"mst":14316,"muz":8520,"min":20688,"mil":24349,"mis":8333,"mit":6248,"mig":6106,"mie":5852,"mid":8678,"ië ":43280,"mma":6643,"mme":17999,"zui":10334,"zee":6519,"zet":5935,"zen":14435,"zel":8377,"ze ":24809,"zan":8216,"zoo":6798,"zon":12147,"zoe":7835,"zie":38234,"zic":12949,"zij":41913,"yst":6749,"ys ":5784,"ymp":7552,"wn ":6464,"wod":7070,"wor":46676,"woo":12376,"won":83488,"woi":6978,"wes":10367,"wer":76690,"wet":5976,"wen":7541,"wel":18559,"weg":14286,"wee":29661,"wit":7604,"win":7741,"wij":18720,"wat":9640,"war":13183,"was":80249,"waa":29854,"vro":6819,"vil":13878,"vin":41497,"vie":14100,"vis":16850,"vla":21113,"vli":6207,"voe":25269,"vol":30369,"von":6589,"voo":92340,"vor":15339,"ver":131519,"ven":47801,"vem":9979,"vel":12213,"vee":14817,"ve ":13676,"val":33641,"van":455616,"vat":5892,"vaa":8390,"uzi":9073,"uwe":13657,"uwd":6413,"uur":48711,"usi":5945,"use":7901,"ust":24814,"uss":30621,"ute":13524,"uw ":14870,"uto":6696,"us ":63461,"ut ":6738,"ure":14305,"urg":25394,"uri":10844,"url":29792,"uro":10683,"urt":8342,"ur ":31687,"umb":6654,"unt":32613,"uns":8285,"uni":16955,"und":14286,"um ":25713,"ult":7652,"uli":14819,"ule":6125,"ula":6204,"un ":7353,"uid":29632,"uik":15052,"uis":15533,"uit":185570,"ugu":11498,"ude":20442,"udi":6485,"ue ":5767,"uch":9124,"ub ":8370,"uar":23535,"ubl":7698,"ty ":41728,"tur":6970,"tus":27504,"tuu":41013,"tud":6157,"twi":6118,"twe":21595,"ts ":120548,"tre":20553,"tra":31398,"tri":58554,"tru":9236,"tro":22667,"tse":38669,"tsc":8839,"tst":17796,"tte":25599,"to ":11397,"toe":15554,"tob":10290,"tot":36145,"tow":6284,"ton":34234,"tor":20776,"tij":29417,"tie":72965,"tig":17350,"tit":9886,"tis":23784,"tin":21180,"tio":32273,"thu":8037,"tic":17261,"tle":5983,"tem":61681,"ten":116036,"tei":13819,"tek":11255,"tel":112278,"tee":16372,"teg":10988,"ted":9251,"th ":11037,"teu":8413,"tes":8744,"ter":133625,"tge":17358,"tho":11376,"the":28515,"tha":9247,"én ":8357,"één":7621},"n_words":[56157687,65372177,47614417],"name":"nl"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/no b/nlp_resource_data/langdetect/profiles/no

new file mode 100755 (executable)

index 0000000..7dc51ea
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/no
@@ -0,0 +1 @@
+{"freq":{"D":67298,"E":31132,"F":46085,"G":30699,"A":51244,"B":53160,"C":43132,"L":37752,"M":50816,"N":44173,"O":29040,"H":46630,"I":28485,"J":19280,"K":42898,"U":16316,"T":46770,"W":15084,"V":29381,"Q":2727,"P":33476,"S":98250,"R":33889,"Y":5384,"X":3613,"Z":4254,"f":354544,"g":680290,"d":751947,"e":2892960,"b":291896,"c":77603,"a":1296256,"n":1640496,"o":1106000,"l":1037936,"m":608425,"j":148253,"k":716879,"h":226424,"i":1317183,"w":19861,"v":434834,"u":407647,"t":1430364,"s":1268259,"r":1753294,"q":3203,"p":350711,"z":18142,"y":203712,"x":10440,"²":2235,"Å":2838,"Ø":5129,"í":2375,"é":5770,"æ":27321,"å":157055,"ä":7866,"á":3631,"à":2467,"ü":2355,"ø":174400,"ö":5464,"ó":2058," l":79574," m":127350," n":66294," o":215607," h":91447," i":286253," j":14786," k":126841," d":163994," e":389283," f":217023," g":55928,"р":2932," a":164625,"с":2244," b":141242," c":8452," u":55940," t":125598," v":100006," p":120765," s":286186," r":49569," J":19242," K":42735," H":46471," I":28413," N":43960," O":28901," L":37540," M":50591," B":52872," C":42604," A":51010," F":45901," G":30503," D":67077," E":31023,"л":2294," Z":4215,"к":2313," Y":5370," X":3570,"и":3772,"о":4246,"н":2997," S":97781,"в":2674," R":33770," Q":2713,"а":4944," P":33276," W":14969," V":29296,"е":3204," U":16274," T":46489," å":24460," ø":14695," Å":2834," Ø":5122,"A ":7964,"Da":7415,"Co":15888,"Ch":7594,"Do":3375,"Dr":2174,"De":44977,"Di":3865,"Fe":2733,"Fa":3197,"Eu":2611,"Et":2524,"En":8728,"El":2986,"Ge":3683,"Ga":4272,"I ":9052,"Fy":2932,"Fr":8731,"Fo":9701,"Fl":2786,"Fj":2610,"Fi":5958,"C ":3097,"Au":3599,"Ar":6717,"As":3221,"Ba":9331,"Am":2967,"An":7679,"Al":7567,"By":3184,"Bu":3866,"Br":7415,"Ca":9043,"Bi":3113,"Be":11885,"Bo":6452,"Bl":2411,"Ku":2113,"Kr":5137,"Ko":7893,"Le":5322,"Li":8435,"La":9531,"Lu":3278,"Lo":6840,"Me":7383,"Mi":7116,"Ma":15908,"Mu":2812,"Mo":8218,"Ni":3496,"Ne":6662,"Na":6162,"No":20842,"Ol":2369,"Gr":7570,"Go":2542,"Gu":4480,"Ha":15522,"He":10062,"Hi":2665,"Ho":7455,"Hu":4099,"In":7421,"Is":2952,"Ir":2049,"Ja":4954,"L ":8381,"Je":2911,"Jo":5435,"Ju":2288,"Ka":7574,"M ":2289,"Ki":8846,"Ke":2144,"Un":4807,"W ":3224,"Ty":3546,"Tr":8937,"To":6250,"Th":6517,"Ti":3470,"Te":5634,"Ta":4262,"V ":2848,"Sy":2174,"St":18668,"Sv":4781,"Su":4159,"Wi":3769,"Wa":4239,"We":2978,"Vi":7414,"Va":4893,"Ve":9972,"Pr":5274,"S ":4165,"Pe":5166,"Pa":8226,"Po":5247,"Pi":2410,"Os":6058,"Op":2483,"Or":3270,"Se":7511,"Sc":2844,"Si":5113,"Sh":4054,"Sl":2066,"Sk":6894,"Sp":4048,"So":11528,"Ru":4355,"Sa":11798,"Re":6477,"Ri":4763,"Ro":9292,"Ra":4607,"b ":4101,"a ":140249,"Yo":2095,"Sø":4628,"Xi":2231,"bø":2634,"i ":283647,"fy":8788,"gd":9301,"ge":149531,"ga":32513,"fj":12532,"fl":14388,"ff":8428,"bå":3627,"fi":25866,"fr":47803,"fu":8120,"ft":23386,"fo":101580,"j ":2796,"gy":3318,"he":44416,"ha":66151,"gn":19968,"gl":10219,"gj":12645,"gi":33050,"gh":8862,"gg":35921,"gv":2904,"gu":9872,"gt":8325,"gs":33529,"gr":39232,"go":8838,"dt":33464,"du":13333,"dv":5679,"dy":4144,"g ":242136,"ea":17194,"eb":16045,"ec":4860,"ed":108322,"de":299418,"dd":13784,"df":2274,"di":40822,"dh":3225,"dk":3004,"dm":5071,"dl":19375,"do":17360,"dn":4555,"ds":31560,"dr":31419,"ew":4011,"ex":3115,"eu":5509,"ev":23237,"ey":5833,"fa":23302,"h ":12462,"fe":26099,"eh":7528,"eg":49132,"ef":18320,"ee":8701,"el":199057,"ek":50272,"ei":44782,"ep":20746,"eo":9834,"en":582197,"em":46141,"et":320684,"es":146652,"er":695279,"ca":10711,"e ":500246,"by":34947,"br":29081,"bu":14609,"bo":17004,"bl":54931,"bi":19197,"bb":5548,"be":63706,"db":3584,"da":47371,"f ":9303,"cu":2103,"ct":3488,"co":8095,"ck":10053,"ci":6056,"ch":18251,"ce":8996,"c ":3649,"az":2995,"ay":5287,"ba":35656,"d ":150657,"at":91048,"as":69325,"ar":172213,"av":121888,"au":14940,"ak":30193,"al":135078,"ai":12693,"aj":2677,"ao":2477,"ap":29140,"am":63029,"an":267240,"ac":9777,"ad":37337,"aa":2792,"ab":12532,"ag":40280,"ah":4661,"ae":7154,"af":16520,"nu":7442,"nt":92694,"ns":124075,"nr":3026,"no":52873,"nn":98330,"jø":13856,"ny":13664,"nv":3459,"oe":7618,"of":17045,"oc":8436,"od":26153,"oa":3647,"ob":11128,"om":188777,"on":123342,"ok":27219,"ol":75500,"oi":3014,"og":163889,"oh":4581,"ot":32564,"m²":2226,"os":28900,"ov":41178,"ou":20519,"op":34634,"oo":5024,"or":234371,"r ":604948,"ow":4407,"oy":2125,"pe":55903,"pa":27179,"pl":13813,"pn":2118,"po":25761,"ph":3721,"lä":2860,"pi":29184,"lå":4732,"læ":3598,"lo":47607,"lm":11687,"ll":125327,"ls":54920,"lp":4961,"lv":17299,"lu":16394,"lt":42371,"ly":13905,"hø":10888,"o ":38269,"ma":52812,"mb":14541,"me":153343,"mf":5780,"mk":2245,"ml":5863,"mi":35740,"mn":2923,"mm":59754,"mp":15073,"mo":24316,"mr":8156,"mt":8238,"ms":13233,"mu":35413,"my":4118,"p ":18651,"na":63523,"nb":10179,"nc":6842,"nd":155382,"ne":210727,"nf":8666,"ng":145804,"nh":6700,"ni":70736,"nj":3359,"nk":18937,"nl":14559,"nm":3689,"ju":8303,"jo":49289,"kj":21128,"gå":9251,"ki":30043,"kh":3599,"ke":149522,"ka":75120,"m ":148254,"fø":39544,"ky":6546,"ks":31928,"kt":45246,"ku":18940,"kv":7398,"ko":71373,"kr":37123,"kk":42089,"kl":19159,"km":10210,"kn":9496,"li":148110,"lh":5181,"lk":27090,"lj":4011,"le":206454,"ld":27006,"lg":10977,"lf":6606,"la":117768,"lb":9872,"n ":508629,"hr":3491,"hv":8983,"ht":2318,"hu":13547,"hj":3760,"hi":16514,"hn":2241,"ho":28328,"id":53829,"ic":16512,"ib":6764,"ia":42013,"ig":91825,"if":12391,"ie":49167,"hy":2076,"k ":129587,"dø":14496,"ir":32357,"is":145060,"it":66902,"iu":3847,"iv":28029,"ii":2629,"ij":2484,"ik":79231,"il":114997,"im":14324,"in":224355,"io":25793,"ip":8856,"je":54644,"ji":3380,"l ":114552,"ja":10997,"xi":3017,"tå":6348,"z ":3331,"sø":16304,"så":13214,"wi":2306,"y ":43971,"rø":14616,"wa":4985,"we":2854,"vh":2078,"rå":14698,"vi":56505,"vt":2733,"vs":5579,"vn":14394,"vo":12525,"ve":142305,"vd":3441,"va":71132,"x ":3406,"ui":4966,"uk":21664,"ul":26959,"ue":11327,"uf":3039,"ug":10093,"ur":41090,"us":50014,"ut":47498,"um":19336,"un":102304,"up":9818,"ty":27626,"tu":28284,"tt":93499,"tv":14967,"ub":13117,"ua":11212,"ud":11995,"uc":2853,"w ":4541,"to":63371,"tn":11784,"tm":2957,"tl":16216,"ts":42441,"tr":65044,"tg":7470,"tf":6385,"te":283104,"td":2594,"tk":2587,"tj":4066,"ti":143857,"på":54079,"th":14355,"v ":99425,"tb":9067,"ta":95087,"su":13370,"sv":19448,"ss":49933,"st":245546,"sy":11979,"sl":28878,"sk":191864,"sn":6441,"sm":11084,"sp":37365,"so":108796,"sr":4235,"sd":5831,"sc":5435,"sf":8108,"se":148118,"sh":13228,"sg":2307,"sj":47719,"si":74309,"nø":2380,"u ":10252,"sa":44310,"sb":10119,"rr":19009,"rs":78883,"rt":82606,"ru":50990,"rv":11610,"ry":11156,"rp":5076,"ro":69758,"rn":31836,"rm":21775,"rl":20062,"rk":47716,"næ":4051,"ri":132605,"nå":4464,"rh":6919,"rg":35522,"rf":12509,"re":256059,"rd":66008,"rc":4262,"rb":18116,"ra":128776,"t ":465642,"mø":2258,"qu":2140,"må":7691,"lø":8427,"s ":146195,"pt":7260,"pu":11071,"pp":34390,"pr":51296,"ps":7773,"vå":3732,"za":2150,"væ":7493,"yg":16794,"ye":16616,"yd":10423,"ya":7947,"tø":14974,"yt":13580,"ys":21183,"yr":13910,"yp":7213,"yn":9715,"ym":5956,"yl":14093,"yk":8767,"yi":4209,"² ":2226,"å ":86331,"Øs":2809,"ær":24440,"åp":3244,"ån":3530,"åt":7047,"ås":2737,"år":24111,"åe":2573,"ål":6435,"åk":4798,"åd":11397,"än":3083,"øy":21642,"ør":54354,"øs":14419,"øp":5028,"øv":7903,"øt":3062,"øk":5634,"øn":8821,"øl":4965,"øm":4740,"øe":2478,"ød":34288,"ø ":2765,"之":2379,"专":2738,"三":3026," Ga":4247," Ge":3655," Fy":2931," I ":6572," Fo":9668," Fr":8721," Fi":5928," Fl":2776," Fj":2609," Ha":15503," He":10039," Go":2524," Gr":7534," Gu":4458," Hu":4096," Ho":7447," Hi":2663," Je":2895," Ja":4943," Is":2884," In":7378," Ka":7550," Ke":2113," Ki":8813," Jo":5426," Ju":2284," La":9489," Le":5267," Li":8383," Ko":7880," Kr":5128," Ku":2108," Ma":15819," Mi":7093," Me":7355," Lo":6813," Lu":3268," Ne":6630," Na":6059," Ni":3491," Mo":8183," Mu":2787," Am":2962," An":7665," Al":7546," Ba":9272," Au":3596," As":2828," Ar":6669," Be":11852," Bi":3097," Bl":2409," Bo":6415," Br":7392," Bu":3840," By":3181," Ca":8810," Ch":7563," Co":15800," Da":7402," Di":3846," De":44915," Dr":2166," Do":3289," El":2979," Et":2521," En":8708," Eu":2608," Fe":2724," Fa":3178," Xi":2228," Sø":4627," Wi":3744," We":2962," Wa":4211," Yo":2085," Os":5937," Or":3262," Op":2481," Po":5205," Pi":2406," Pe":5104," Pa":8187," No":20722," Ol":2367," Ra":4586," Ro":9267," Re":6462," Ri":4754," Pr":5244," Sy":2169," Sv":4778," Su":4150," St":18500," Ta":4246," Th":6493," Ti":3455," Te":5588," Tr":8905," To":6181," Ru":4349," Sa":11773," Sh":4026," Si":5095," Sc":2823," Se":7484," So":11504," Sp":4034," Sk":6885," Sl":2061," Va":4882," Ve":9942," Vi":7397," Ty":3541," Un":4794," ja":4722," in":34065," ik":4460," is":2212," ka":20700," fø":33238," kj":12239," gå":5771," ki":10103," jo":2540," ju":4702," ha":40329," he":11962," gi":4943," gj":7272," gr":19932," gu":2109," dø":7855," hi":3888," hj":2877," ho":10427," hu":4249," hv":8245," ne":7120," na":12532," my":2654," mu":6899," mo":13920," ok":2382," ol":3868," om":17822," og":141787," of":9223," ob":2162," ny":3021," no":33974," le":11218," li":27765," la":25410," kv":4363," ku":8567," km":9490," kl":4277," kr":10076," ko":42531," me":70036," mi":10095," hø":6629," ma":17890," lu":2098," lo":3520," ad":3981," am":9313," an":19228," ap":2222," ak":3076," al":9858," av":89135," au":3693," ar":13091," at":7530," ba":13278," bi":6342," be":29509," bo":6968," bl":40680," by":21015," bu":2338," br":16487," ca":3932," er":153435," et":63825," en":123670," ei":4836," el":30282," ek":4165," eg":3293," fe":8817," fa":9591," fu":3699," fr":43218," fo":78205," fl":9728," fj":6013," bå":2550," fi":13808," ge":3067," ga":9278," i ":237285," fy":7547," da":14064," do":2621," dr":4255," de":122624," di":7830," dy":2072," væ":4635," sø":12596," ru":9067," sa":19419," se":29156," sj":3216," si":17713," sl":7113," sk":21745," sp":19430," so":87507," ra":3039," re":25505," ri":2962," nå":3726," næ":2896," ro":6266," pu":2169," pr":27989," lø":2099," må":3587," ov":11878," op":17267," or":7497," pe":8091," pa":7750," pl":5274," po":9080," pi":4926," lä":2565," så":2458," va":41741," ve":33600," vo":3210," vi":14710," ty":6900," tu":3382," ut":27318," un":21502," ta":8589," sy":5687," st":43167," sv":7377," su":2425," tr":14296," to":9228," th":2455," på":53815," ti":62638," te":14306," Øs":2807," å ":15026," år":6691," øs":6280," øy":4235,"Fin":2794,"Eur":2192,"En ":4024,"Eng":3095,"Fyl":2572,"Fra":3815,"Fre":2185,"Fol":3808,"For":3758,"Hel":2604,"Han":5694,"Har":2577,"Ind":2121,"Øst":2801,"Bar":2295,"And":2152,"Car":2172,"Ber":4934,"De ":3390,"Det":18734,"Den":19203,"Dan":2408,"Cha":2366,"Cou":8345,"OL ":7076,"New":2594,"Nor":18501,"Osl":4630,"Par":3554,"Pro":2168,"SA ":4753,"Joh":2414,"Kar":2117,"Kin":6314,"Kon":2546,"Kom":2130,"Kri":2217,"Lan":3221,"MW ":2903,"Lon":2680,"Man":2484,"Mar":5164,"Sør":3934,"Sve":3734,"Str":2300,"Sto":5079,"Sta":5059,"Ste":2680,"Sko":2364,"Som":4932,"Rus":2959,"San":3826,"än ":2530,"Rom":2726,"åde":9646,"ål ":2493,"Vei":2618,"åle":2191,"Vin":2983,"åre":3629,"ård":2572,"år ":14295,"Ves":3148,"ått":3275,"Uni":3264,"Tys":2479,"ær ":4620,"ært":3809,"ære":8422,"The":4012,"Tro":3352,"bis":3511,"bil":4902,"bin":5018,"blo":2551,"ble":31774,"bli":12888,"bla":6396,"bok":3234,"bor":4144,"bbe":3093,"ban":9995,"bak":3210,"bal":7414,"bas":4309,"bar":6636,"bei":5432,"beg":3804,"ber":17512,"ben":4646,"bel":3914,"bev":2185,"bes":8730,"bet":9507,"ca ":4720,"ce ":3774,"bri":5623,"bra":2313,"bre":4128,"bru":13492,"bur":2443,"bun":2536,"bum":3444,"by ":8318,"byd":2738,"bye":5451,"byg":13374,"byp":2127,"am ":5629,"ake":6088,"al ":22413,"ain":3128,"ak ":2688,"agt":4288,"anu":3347,"ano":2720,"ann":21260,"ant":16537,"ans":34997,"ane":10156,"ang":34028,"ani":10694,"ank":5395,"anl":5003,"ap ":5392,"ana":7215,"anc":2796,"and":71633,"amt":3120,"amm":13306,"aml":4533,"amp":3404,"ami":6970,"ame":12312,"amb":2328,"ama":4584,"alv":2901,"alt":13338,"als":4480,"all":28689,"alg":3345,"ali":12960,"ald":4724,"ale":19089,"ala":6862,"alb":4116,"an ":35027,"aks":3242,"akt":7856,"akk":3187,"abl":2978,"ae ":2689,"ad ":7446,"aft":9172,"afi":2584,"ai ":3427,"aga":2271,"age":11424,"adm":2981,"adi":6411,"add":5083,"ade":7303,"ag ":10292,"ach":2606,"ada":2399,"at ":12379,"are":11421,"ard":7601,"arb":5329,"ara":7138,"arn":4304,"arm":4027,"arl":3706,"ark":12605,"ari":9703,"arr":4082,"ars":6026,"art":19222,"asi":5334,"ase":4616,"ask":2682,"asj":20288,"ar ":65150,"apa":4754,"ape":8916,"app":2255,"as ":8910,"ava":2465,"aut":2096,"avn":11841,"avi":3695,"ave":7468,"ay ":2288,"av ":85297,"ata":4831,"ast":9209,"ass":12223,"ato":4866,"ate":21584,"ati":14442,"att":15952,"ats":2979,"atu":5294,"aug":3319,"jer":9769,"jek":2468,"jel":9659,"jem":3205,"jen":18638,"jan":3394,"je ":3919,"jon":34428,"jor":11663,"itu":2594,"itt":16474,"its":2537,"ity":2756,"isk":51560,"isj":4819,"ism":2335,"iss":5696,"ist":35845,"iv ":3591,"ita":7242,"ite":15976,"iti":9684,"iva":2856,"ivi":3570,"ive":13599,"ipp":2048,"is ":12915,"ion":14718,"ir ":6919,"irk":9296,"isi":4522,"ish":2413,"ise":12425,"isa":4298,"ire":7035,"it ":2143,"kil":3815,"kk ":10113,"kin":8354,"kip":2377,"kir":5305,"går":7228,"kis":2484,"kje":16281,"km ":7242,"kel":6677,"ken":15267,"kes":10195,"ker":29257,"ket":9398,"ke ":65844,"kra":11143,"kre":8431,"kt ":12793,"kse":4519,"kro":2450,"kri":13226,"kot":2092,"km²":2186,"kor":5874,"kon":13827,"kom":31756,"kol":9129,"ks ":3032,"kny":2137,"kjø":3238,"kni":5015,"kke":25288,"klu":3574,"kle":4386,"kla":6195,"kli":3140,"jul":2456,"kat":3942,"kar":4133,"kas":2638,"kap":17702,"kan":20091,"kal":14105,"kam":2092,"ka ":6242,"før":14702,"fød":22348,"føl":2302,"ha ":2473,"ham":3082,"han":12460,"hal":3654,"hav":6160,"har":25645,"had":4958,"he ":6327,"hel":5939,"hei":3009,"het":10209,"her":5545,"hen":7004,"hin":2784,"his":4361,"hje":2935,"gle":2190,"gn ":3125,"gla":3596,"gjø":2529,"gni":2494,"gne":11554,"gs ":3264,"gru":12215,"gra":9273,"gt ":5657,"gre":14854,"gst":4516,"gså":9706,"gus":2418,"ial":4669,"ian":8957,"øke":2125,"ølg":2438,"id ":6821,"ibe":2226,"ia ":20599,"iet":4979,"iel":5273,"ien":17663,"ier":7794,"ies":2351,"ig ":24484,"ift":6890,"ør ":13261,"øpe":2597,"ømm":2930,"ich":3831,"ie ":7138,"ica":2979,"ids":3533,"idr":3478,"ønn":3150,"idl":8890,"ide":18687,"idd":2770,"ida":3822,"ønd":2352,"øst":12196,"il ":45404,"im ":2951,"ika":14540,"ige":27059,"iga":2145,"igh":5558,"igi":2075,"igg":17522,"igs":3095,"ign":3006,"øre":10636,"ørs":12878,"ørt":2899,"ik ":6168,"ime":2903,"ind":10332,"ina":14021,"inn":35697,"ino":2582,"int":10160,"ins":19112,"ine":16471,"ing":71844,"ini":7560,"ink":2146,"iod":2244,"iny":3773,"ikl":3957,"ikk":22590,"ike":13916,"in ":20575,"ikt":9421,"iks":3390,"ilo":2911,"ill":23109,"ilk":2217,"øve":6392,"ilm":4862,"ilh":3403,"ili":8732,"ild":3649,"ile":4123,"io ":2961,"ils":3090,"ilt":4137,"ørø":2134,"øy ":5016,"hol":9462,"hov":6717,"øye":5083,"øya":3991,"hvo":3807,"hun":2960,"hus":4616,"hve":2835,"døs":2243,"død":8337,"ffe":3959,"ffi":2131,"fes":3648,"fer":4921,"fem":2188,"fen":2692,"fek":3989,"fel":4217,"fat":5769,"far":3333,"fam":3942,"fal":2369,"eta":7185,"ete":19743,"eti":2646,"etn":3543,"esp":3357,"eso":2351,"est":46619,"ødt":21901,"ess":8652,"esv":2859,"etr":2759,"ets":9941,"ett":37893,"ety":3237,"ew ":2555,"eve":11454,"eva":2230,"evi":2446,"ey ":3642,"er ":431766,"epa":2788,"eor":2578,"eol":2061,"ød ":7105,"es ":40484,"ept":2696,"epu":5041,"epr":2644,"erk":15694,"erl":6414,"eri":32585,"erg":11340,"erh":2771,"ere":60500,"erf":5751,"erd":9568,"era":10673,"erb":3057,"et ":226950,"esk":8307,"esi":10382,"øde":3134,"ese":10631,"erv":4618,"eru":2639,"err":6355,"ert":31938,"ers":27995,"ern":17742,"erm":4003,"ero":2745,"ekk":7221,"ekn":2204,"ekr":3313,"eks":10659,"ekt":15683,"en ":368973,"ela":6680,"eld":5215,"ele":18548,"eli":14582,"elg":2392,"ell":56973,"elo":2842,"elv":10426,"els":37467,"elt":17365,"emb":7048,"ema":3872,"eme":6179,"emm":4042,"emo":2614,"emi":3720,"emp":3266,"ems":2210,"enf":4002,"ene":43923,"enh":3807,"eng":13869,"ena":2893,"end":24201,"enn":18704,"enk":3130,"eni":5336,"ens":38448,"ent":42518,"egn":9280,"ege":7883,"egg":4893,"egi":7310,"eha":2568,"egr":3736,"eis":4556,"eim":2591,"ein":6378,"eie":6921,"eid":6829,"el ":17607,"eke":3082,"em ":6695,"gje":8413,"git":6162,"gis":5345,"gin":2581,"gio":4769,"ghe":4290,"gge":29980,"gi ":3980,"gen":31946,"get":10089,"ger":52848,"ges":4527,"gg ":2739,"gel":10922,"gde":3615,"ge ":33172,"gas":3030,"gar":3339,"gat":3035,"gam":2397,"gal":2375,"gan":11800,"ga ":3023,"fyl":7035,"fte":8757,"fun":3666,"ftv":7006,"ft ":3864,"fra":35021,"fre":5013,"fri":6048,"for":85448,"fot":5733,"fol":6844,"fle":5300,"fly":4535,"fil":5351,"fik":2963,"fin":6087,"fir":2657,"fis":4937,"fje":5241,"fjo":6945,"da ":8120,"de ":60427,"dal":11372,"dag":6176,"dat":3189,"dar":2553,"dan":8876,"dde":9644,"co ":2205,"ch ":3122,"cha":2322,"ck ":3691,"che":4052,"chi":2081,"cke":2693,"ed ":55809,"eba":2196,"ebe":2240,"ebr":3393,"eal":3627,"eat":2589,"efi":2051,"efo":4434,"efe":4659,"ei ":7806,"een":2210,"edl":3176,"edi":4356,"ede":17856,"eda":3620,"eg ":6746,"eds":6107,"edr":4047,"dve":3394,"dus":5910,"don":4573,"dom":4754,"ds ":5042,"dmi":3335,"dni":2776,"dst":4471,"dte":2377,"duk":4468,"dri":5473,"dra":3627,"dt ":28714,"dre":18932,"dsk":5623,"dia":3579,"der":53613,"des":6374,"det":47571,"del":41297,"dek":2450,"den":73325,"dem":4267,"di ":2586,"dle":5037,"dla":2367,"dli":11521,"din":4634,"dio":3152,"dis":11321,"die":2789,"dig":4896,"rga":6400,"ri ":3980,"rgi":2280,"rge":14583,"ret":27902,"res":20361,"rev":6468,"rfa":4418,"rds":3327,"rdv":2179,"rg ":6581,"rea":5698,"ree":2115,"ref":5778,"red":9285,"rei":3228,"reg":12569,"rem":5839,"ren":35981,"rek":8172,"rel":4778,"rer":20318,"rep":8966,"rda":5684,"rdl":3552,"rdi":4958,"rde":19629,"re ":73979,"rbu":2376,"rd ":17240,"ras":7043,"rat":9184,"rav":3015,"rbi":3863,"rbe":5664,"rag":2711,"ran":19588,"ram":7456,"ral":8750,"rak":4191,"raf":12560,"rad":5981,"rs ":8675,"ros":5462,"rom":6997,"ron":6959,"rop":5525,"rov":9073,"rod":9193,"rol":4308,"rof":2729,"rog":4064,"rna":5941,"rne":13070,"ro ":3052,"rma":5722,"rme":6882,"rli":7449,"rle":2859,"rla":5982,"rn ":5061,"rks":2379,"rko":2612,"rki":3108,"rke":16934,"rka":2079,"rm ":3363,"nær":3936,"rio":3486,"rit":8311,"ris":17487,"riv":5194,"rig":8523,"ril":2995,"rik":24390,"rin":21593,"rim":2649,"ria":5352,"ric":2697,"rid":3352,"rie":12327,"rif":3440,"rdø":2401,"rk ":15343,"rup":6154,"run":12301,"rum":4066,"ruk":11189,"rus":6418,"rva":2898,"rvi":2442,"rve":4784,"ry ":3420,"rsk":24627,"rsi":6764,"rso":5206,"rse":3955,"rta":2228,"rst":17535,"rte":20285,"rti":8810,"rua":2277,"rts":5729,"rt ":35053,"rri":3179,"rre":7643,"rra":4011,"sak":3680,"sal":2138,"sam":15315,"sbe":2623,"san":6993,"sat":4359,"sas":3809,"sa ":2445,"ryk":2399,"sha":2088,"shi":3201,"sje":5058,"sjo":34525,"sie":4330,"sid":8350,"sia":4578,"sk ":75137,"sit":9775,"sis":16636,"sin":9850,"sik":9289,"sda":2876,"sby":2166,"se ":19672,"sch":2395,"ser":35054,"ses":4849,"set":9508,"sfo":2877,"seg":6362,"sep":3173,"sen":41736,"sem":4309,"sel":10690,"sek":3507,"spo":8496,"spr":5617,"spe":5862,"spi":12660,"spa":2392,"sol":2063,"som":85971,"son":9679,"sor":2986,"sjø":3410,"st ":39863,"ss ":6097,"sli":3561,"slo":6037,"slu":2332,"sky":2874,"sla":9825,"sle":4538,"ski":8246,"skj":4074,"skl":2812,"sko":10936,"skr":10481,"sku":3651,"ska":22269,"ske":46720,"sma":2688,"sme":3758,"stå":5531,"stø":6651,"syn":2627,"sys":2439,"syk":3044,"sse":15330,"ssa":2824,"sso":3173,"ssl":2943,"ssk":3059,"ssi":8369,"sst":2689,"ste":66334,"stf":2230,"sta":38189,"stn":2344,"sto":16688,"sti":20167,"stl":6400,"stu":3784,"str":25344,"sty":4151,"sun":2773,"sut":4111,"sva":4458,"sve":9604,"svi":2603,"tak":2812,"tal":21730,"tab":3588,"tad":7590,"tba":6294,"tav":2813,"tat":16792,"tas":5433,"tar":7233,"tan":11218,"tam":2411,"te ":58351,"ta ":8154,"pa ":2052,"pe ":5528,"par":10024,"pas":4090,"pan":5764,"län":2721,"pen":10042,"per":17920,"pet":8954,"pes":3294,"pel":3770,"pla":9218,"lær":3470,"pil":11406,"pin":7171,"pis":4864,"por":10569,"pol":7076,"ppr":5964,"ppl":2678,"ppe":9671,"pp ":3799,"pub":5466,"pte":2999,"pri":10360,"pre":12096,"pro":23686,"prå":2961,"løp":3837,"mål":4336,"ra ":37023,"ngi":2137,"ngl":4160,"ngr":2861,"ngt":2215,"ngs":15959,"ni ":4401,"nge":45140,"nga":2868,"ngd":4050,"nhe":2871,"nel":12450,"nen":27435,"ner":33385,"net":21165,"nes":25627,"ng ":54251,"ned":4860,"nfo":4282,"nce":2258,"ne ":72733,"nby":5798,"ndt":4870,"ndr":12413,"nds":12659,"ndo":5458,"ndl":2779,"ndi":6968,"nde":56337,"nda":5748,"nal":12130,"nan":2828,"nar":2470,"nad":2448,"nd ":38901,"nav":9346,"nat":5154,"nas":7061,"na ":15576,"nyi":3711,"nua":2135,"nty":8509,"nto":3515,"ntr":8273,"nti":5299,"ntl":2516,"nta":6663,"nte":28676,"nsp":2257,"nst":14726,"nss":3014,"nse":21534,"nsj":2053,"nsi":3034,"nsl":2182,"nsk":39260,"nsa":2994,"nt ":24653,"ns ":20153,"noe":3540,"nom":6845,"nor":29131,"nov":2911,"nne":42931,"nna":2105,"nnb":5914,"nnl":4919,"nno":3889,"nni":5129,"nnk":2501,"nns":7691,"nma":2510,"nli":4969,"nn ":14981,"nla":6112,"nle":2679,"no ":2847,"nke":3676,"nkr":4191,"nia":6220,"niv":5311,"nis":15475,"nit":2397,"nin":25664,"nik":2124,"ogs":10014,"ogr":5237,"ogi":5246,"ogn":3334,"oge":2102,"ok ":7509,"ol ":2411,"ock":4853,"ode":7882,"of ":4012,"oen":3787,"odu":8565,"og ":134463,"oft":3759,"off":4398,"ofe":2299,"od ":2196,"obl":2263,"obe":3133,"nyt":4280,"jøe":2169,"jør":5841,"ote":3613,"ott":4146,"ots":2202,"oto":3139,"ost":4414,"otb":5531,"osi":2968,"ose":3901,"oss":3129,"ovi":8837,"ove":25154,"oun":9480,"our":3311,"opp":20548,"ope":3654,"opa":2135,"os ":5236,"or ":56677,"ork":4146,"orl":2445,"orm":10647,"orn":3102,"orr":3175,"ord":42679,"ore":13522,"orf":4509,"org":16810,"ori":9530,"ort":20720,"ors":26414,"m² ":2219,"ot ":7999,"orb":6069,"ora":3709,"ola":2381,"old":10833,"on ":35130,"oli":8827,"oll":6799,"olk":9387,"ole":9725,"ols":3475,"olm":2556,"olo":8072,"oly":3133,"oka":3954,"om ":107366,"okk":3118,"oks":3517,"okt":2231,"ona":9739,"ond":6224,"one":22668,"ong":10465,"oni":5288,"ono":3591,"ons":14426,"ont":6196,"oma":5119,"ome":6397,"omb":2444,"omi":3410,"omf":2921,"omm":37033,"omp":3178,"omr":6915,"oms":5919,"la ":5793,"le ":57070,"lde":10166,"lds":2282,"lad":2570,"lag":17793,"lan":58009,"lar":3088,"lat":7640,"las":9906,"lav":2641,"lba":2444,"ld ":6657,"lbu":3945,"kvi":3664,"kva":2672,"kur":2362,"kun":5248,"kul":5655,"kte":10988,"kst":5101,"ksj":6623,"ksi":2580,"ktu":4360,"kti":6575,"kto":3659,"kys":2223,"ls ":2559,"lok":3826,"lon":2509,"lom":14627,"log":6983,"los":2313,"lov":2717,"lme":3805,"lto":5869,"ltu":2394,"lub":3807,"lsk":13894,"lst":8591,"lv ":4083,"lta":2646,"lte":5587,"lse":19815,"lt ":21046,"lge":4495,"li ":4167,"lev":4182,"les":12321,"let":13065,"ler":55284,"lem":5548,"len":25387,"lek":9666,"lei":2319,"leg":6026,"led":5805,"lls":3758,"lo ":5941,"lhø":2797,"lla":5878,"lle":65120,"lli":10408,"llk":3343,"llo":12212,"lke":16528,"lkn":2938,"lm ":3452,"lje":2230,"ll ":17353,"lit":14639,"lis":9353,"lir":3649,"lin":18549,"liv":2248,"lia":5498,"lik":12256,"lig":60844,"lie":7273,"ma ":3762,"mai":2118,"mar":11919,"mas":4336,"mal":3128,"man":16587,"mat":5209,"mbe":7011,"me ":8290,"med":37095,"met":13813,"mes":7734,"mer":33081,"mel":15249,"men":33002,"mfa":2204,"lva":3696,"lve":4358,"lut":2852,"ly ":2856,"lym":3245,"lys":2102,"høy":5492,"hør":3562,"mpi":3576,"mpe":4413,"ms ":2196,"mod":2416,"mon":3257,"mor":2125,"mot":10268,"mt ":3487,"mst":3459,"mrå":7270,"mus":6700,"mun":25496,"min":11788,"mil":8071,"mis":4562,"mid":4108,"mle":3222,"mmu":24025,"mme":31877,"vær":6865,"ytt":5943,"yte":2764,"yst":6626,"ysk":7598,"yrk":2373,"yre":3443,"ypr":2466,"yr ":3697,"ype":2571,"ye ":2367,"yde":4954,"yer":2533,"yen":9116,"ya ":4898,"ykk":3634,"ylk":9083,"ymp":3267,"ygg":10777,"ygd":2788,"yin":3851,"tør":7504,"tøv":4228,"tår":3969,"sør":11014,"så ":11194,"røs":2166,"røn":4125,"røy":2397,"vir":2566,"råk":3246,"vik":8483,"vil":4773,"vin":16505,"vid":3526,"råd":8908,"vit":4787,"vis":10758,"vn ":5466,"vne":6818,"vol":2228,"vok":2535,"vor":4361,"ver":56905,"ves":12440,"vet":7953,"vei":7599,"veg":2114,"ven":15144,"vem":2207,"vel":5384,"ved":23018,"ve ":6401,"val":6677,"van":11534,"var":40025,"vat":3521,"va ":5156,"utø":4075,"usi":6124,"use":8793,"ust":8108,"uss":8811,"utg":6539,"ute":5272,"utt":5236,"uts":3096,"utv":4169,"us ":10711,"ut ":5919,"ure":6307,"urg":2892,"uri":2750,"urn":3039,"uro":3768,"urr":2556,"ur ":5709,"upp":6292,"ume":4177,"unt":9250,"uns":3775,"unk":3135,"uni":6920,"unn":14136,"und":31193,"ung":4670,"une":22585,"uks":4735,"ukt":5470,"uke":5765,"um ":8855,"ult":4324,"ull":5554,"uli":4543,"ule":2555,"un ":3708,"ugl":2152,"ugu":2387,"ude":2985,"udi":2221,"ue ":2256,"ues":2288,"uar":4525,"uan":2334,"ubl":5816,"ubb":3409,"ud ":2293,"typ":2478,"tyr":5657,"tys":4599,"ty ":11596,"tve":8575,"tvi":4656,"tur":14995,"tus":2293,"tun":2106,"tud":2813,"ts ":7521,"tre":18842,"tt ":26597,"tra":17913,"tri":10126,"tru":4763,"tro":7380,"try":2883,"tse":4787,"tsk":2860,"tsu":3903,"tst":7808,"tta":2344,"tte":44536,"tti":2906,"tts":8940,"to ":8546,"tni":6514,"tne":2955,"tob":2085,"ton":7030,"tok":6614,"tol":4307,"tor":21379,"til":54443,"tik":6372,"tif":3581,"tie":3348,"tig":5426,"tit":4010,"tis":16494,"tin":10509,"tio":5365,"tia":3220,"tid":14688,"tiv":7883,"tje":3021,"tli":7658,"tla":4715,"tle":2120,"tem":9786,"ten":44932,"tei":3878,"tek":5962,"tel":9339,"teg":7791,"ted":11925,"tfo":3043,"th ":3609,"tet":33199,"tes":6700,"ter":82933,"tgi":4175,"på ":52716,"ti ":4056,"the":4578},"n_words":[20399254,23799460,17069273],"name":"no"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/pa b/nlp_resource_data/langdetect/profiles/pa

new file mode 100755 (executable)

index 0000000..1140be0
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/pa
@@ -0,0 +1 @@
+{"freq":{"ਾਜ਼":17,"ਾਜਧ":12,"ਾਜਾ":13,"ੀਲ ":18,"D":18,"E":20,"F":29,"G":23,"A":52,"B":35,"C":59,"L":38,"M":47,"N":37,"O":37,"H":31,"I":60,"U":14,"T":45,"W":18,"P":47,"S":55,"R":24,"f":103,"g":125,"d":221,"e":655,"b":81,"c":194,"ੀਰ ":33,"a":561,"n":492,"o":398,"l":256,"m":260,"j":16,"k":37,"h":252,"i":537,"w":71,"v":42,"u":236,"t":467,"s":328,"r":400,"p":124,"z":13,"y":106,"x":19,"ਿਆ।":22,"ਾਤਰ":17,"ਾਤਾ":27,"ਾਤੀ":12,"ਿਆਣ":31,"ਿਆਦ":20,"ਿਆਨ":47,"ਿਆਲ":13,"ਿਆਰ":28,"ਿਆਵ":14,"ਿਆਸ":12,"ਾਣਕ":12,"ਾਣੂ":104,"ਿਆਂ":76,"ਾਣੀ":40,"ਾਣਿ":27,"ਾਣਾ":22,"ਾਨੇ":13,"ਾਨਾ":31,"ਿਊਟ":14,"ਾਨੀ":48,"੩੧ ":18," । ":128,"੩੦ ":23,"ਾਦੀ":22,"ਾਦਾ":19,"ਾਨਕ":26,"ੁਝ ":21,"।ਇਸ":14,"ium":27,"is ":22,"ion":36,"ੀਤ ":15,"਼੍ਰ":22,"੦੦੮":24,"ੀਪ ":375,"ੀਨ ":57,"ੀਮ ":51,"ਟੀ ":57,"ਜੋਂ":19,"ਾਗਰ":20,"ਾਕੀ":370,"ਾਕਿ":34,"ਟਾ ":22,"੨੩ ":13," m":24," o":57," h":17," i":57," d":21," e":15," f":30," a":133," b":21," c":34," t":120," w":40," p":41," s":54,"੨੨ ":14," r":16,"੨੧ ":14,"੨੫ ":14,"਼ਸੀ":29," H":25," I":54," N":32," O":31," L":21," M":43,"੨੪ ":13," B":32," C":54," A":46," F":17," G":21," D":15," E":17,"ਿਰ ":73," S":47," R":22,"਼ਹਿ":50," P":44," W":15," T":36,"ਜੀਵ":20,"੨੭ ":13,"੨੬ ":13,"ਿਲ ":25,"ਜੁਲ":54,"ਜਿੰ":30,"੨੯ ":16,"ਜੂਨ":35,"਼ਾਂ":36,"਼ਾਹ":32,"੨੮ ":15,"਼ਿਆ":14,"ੀਕ ":34,"ਜ਼ਮ":12,"਼ਿਲ":75,"ਜ਼ਾ":33,"ਜ਼ਿ":88,"ਾਂਦ":190,"ਜਾਂ":231,"ਜ਼ੀ":72,"ਾਂਤ":20,"ਾਂਸ":16,"ਜਾਬ":125,"਼ੁਰ":27,"ਜਾਤ":13,"ਜਾਣ":55,"ਜਾਦ":21,"ਜਿਮ":22,"ਜਿਲ":12,"ਜੀਅ":19,"ਿਸ ":72,"ਾਅਦ":30,"ਜਿਸ":70,"ਜਿਹ":19,"ਾਇਆ":59,"ਾਇਣ":62,"੨੦ ":14,"ਾਇਲ":17,"ਟਰ ":67,"ਾਈਟ":14,"ਾਈਡ":27,"ਾਉਂ":14,"ਾਈਨ":29,"ਾਉਣ":23,"਼ਬਦ":26,"ੀਂ ":52,"ਿਤ ":82,"ਟਨ ":17,"ਚੰਦ":21,"੧੮ ":12,"ਛੋਟ":13,"੧੯ ":14,"ੀਆ ":53,"ਿਨ ":1101,"਼ਰਵ":39,"ਜਨਮ":32,"ਜਨਵ":39,"ਜਨਸ":13,"ਜਦੋ":15,"ਜਧਾ":12,"ਿਬ ":69,"ੀਤਾ":41,"Co":14,"ੀਤੀ":42,"ੁਆਰ":46," In":28," Ma":13,"he ":69,"ਾੜੀ":12,"ੀਟਰ":38,"Ma":13,"Ol":12," Co":14,"In":29,"L ":13,"।":1840,"ੀਕਲ":22,"ੂਪ ":25,"ੀਕਨ":13,"ਿਲੀ":21,"ਿਲਾ":83,"Th":15,"ਿਲ੍":13,"ਿਲੋ":46,"ਿਲੇ":38,"ੂਨ ":35,"ਿਲਦ":12,"ਵ":3164,"ਲ":4004,"ਰ":5772,"ਿ":5178,"ਾ":10004,"਼":1111,"ਹ":4455,"ਸ":5034,"ਦ":5926,"ਧ":297,"ਤ":3377,"ਥ":271,"ਢ":42,"ਣ":777,"ਠ":69,"ਡ":853,"ਮ":2207,"ਯ":188,"ਬ":2031,"ਭ":430,"ਪ":1926,"ਫ":303,"ਨ":4410,"ਕ":3798,"ਗ":2047,"ਖ":660,"ਐ":62,"ਓ":66,"ਝ":61,"ਜ":2041,"ਟ":625,"ਘ":134,"ਛ":70,"ਚ":1522,"ਅ":1321,"ਆ":1270,"ਇ":2053,"ਂ":2880,"ਏ":233,"ਈ":490,"ਉ":422,"ਊ":48,"ੱ":1924,"ੰ":2555," a ":18,"੦":200,"੧":273,"੪":62,"ਿਮਨ":20,"੫":77,"੨":239,"੩":93,"੮":92,"੯":96,"੬":69,"੭":58,"੨੦੦":38,"ੜ":191,"ੀ":4636,"ੁ":1825,"ੂ":1115,"ੇ":3761,"ੈ":2081,"ੋ":1676,"ੌ":206,"੍":1046,"ੂਲ ":22,"ੀਜਿ":21," Ol":12,"ੂਰ ":25,"b ":18,"ਿਹਾ":91,"a ":87,"ਿਸੇ":29,"ਿਸ਼":57,"ਿਸਾ":23," Th":15,"ਿਸਤ":34,"ਚੌਂ":12,"ਚੋਂ":38,"ਿਵੇ":63,"ਿਵਾ":13,"ੀਕਾ":45,"i ":27,"ਿਟੀ":38,"ge":13," in":40,"ic ":12,"fi":14," is":12,"fo":16,"ਚਾਈ":23,"he":95,"ha":27,"gh":19,"go":13,"g ":34,"ea":30,"ਚਾਰ":34,"ec":17,"ਚਾਲ":16,"ed":45,"de":37,"di":36,"ia ":20,"ev":13,"h ":33,"Ind":16,"ee":14,"el":41,"ei":12,"en":68,"em":14,"et":19,"ੀਆਂ":178,"es":53,"er":114,"ੀਅਤ":34,"ca":15,"e ":169,"ਚੀਨ":33,"be":16,"da":20,"f ":44," of":43,"ct":18,"cs":14,"co":29,"ck":12,"ch":22,"ce":32,"c ":16,"ics":14,"d ":98,"at":58,"as":29,"ar":52,"al":54,"ai":16,"am":68,"an":97,"ac":18,"ad":16,"ab":12,"ਿਨਾ":26,"nt":47,"ns":26," am":51," an":24,"ਿਨੇ":13,"ੈ। ":755,"of":44,"om":42,"on":91,"ol":28," ਅ":1094,"os":19," ਇ":1749," ਆ":274,"ou":18,"or":54," ਏ":119,"r ":72," ਉ":313,"ow":13," ਈ":30," ਕ":1460,"pe":18," ਖ":182," ਗ":918,"ਿਨ੍":12," ਐ":59," ਓ":47," ਜ":1208," ਝ":16," ਟ":112,"po":12," ਘ":42,"pi":19," ਚ":259," ਛ":39,"lo":16,"ਜੋ ":82,"ll":20,"igh":12,"ly":15,"o ":26,"ma":17,"mb":17,"me":42,"mi":19,"mp":27,"mu":48,"na":37,"nc":21,"nd":68,"ne":28,"ng":50,"ni":32,"ਿਤਾ":77,"ਿਤੀ":12," ।":190,"ੀਅਮ":40,"m ":58,"ੀਅਨ":27,"ਿਥਿ":27,"ine":16,"ing":33,"li":35,"le":33,"ld":21,"la":31,"n ":137," co":22,"ht":18,"hu":17,"hi":29,"ho":16,"id":13,"ic":60,"ia":36,"ig":20,"in ":38,"ie":22,"k ":17,"ir":14,"is":39,"it":46,"iu":27,"il":21,"in":129,"io":41,"l ":48,"ਾਰਕ":16,"ਾਰਚ":41,"ਾਰਤ":101,"ਾਰਨ":27,"wo":13,"ਾਮਿ":15,"y ":61,"wa":12,"ve":29,"ਾਰੀ":54,"ur":21,"us":14,"ਾਰਾ":51,"ut":16,"um":56,"un":15,"ty":14,"ਾਰੇ":39,"ua":16,"to":25,"ts":12,"tr":30,"te":65,"ti":67,"ਾਬਕ":377,"th":116,"ta":28,"st":52,"se":26,"sh":12,"si":30," ੨":195," ੩":56," ੪":26," ੫":29,"u ":55,"ਚਰਲ":20," ੬":17," ੭":14," ੮":15," ੯":19,"rs":22,"rt":24,"ry":21," ੧":210,"ro":42,"ri":64,"ਚਲਾ":12,"re":47,"rd":17,"ੁਤ ":26,"ra":24,"t ":72," ਹ":2872," ਸ":2699," ਵ":2408,"ਾਬਲ":23,"ht ":12," ਰ":498," ਲ":715," ਬ":868," ਭ":339,"s ":155,"ੁਣ ":18," ਮ":1134," ਯ":133," ਨ":1023," ਪ":1056," ਫ":184," ਤ":951," ਥ":21,"ਜੇ ":27," ਦ":4245," ਧ":86," ਡ":55,"ਾਬੀ":48," ਢ":16,"pr":12,"ਾਬਾ":23,"ਿਗਿ":35,"ੁਰ ":13," s ":16,"ਾਹੀ":40,"ਾਹਿ":88,"ਿਚਾ":17,"hum":14,"ਾਸਿ":34,"ਾਸ਼":82,"ਾਸੇ":16,"ਜਾ ":63,"ਚਨਾ":19,"ਾਲੇ":56,"ਾਲਾ":46,"ਾਲੀ":54,"ਜ਼ ":52,"ਜੀ ":155,"ਿਖੇ":23,"ਾਵਾ":47,"ਿਖਾ":19,"ਾਵਲ":13,"ਿਕਾ":29," th":90,"ym":13,"ਿਕਸ":14,"ਹਾਂ":94,"ਹੀਰ":14,"ਹੀਨ":13,"ਹੁਣ":12,"ਹੁਤ":24,"ਹਿਰ":60,"ਹਿਲ":50,"ਹਿਨ":24,"ਹਿਬ":75,"ਹਿਸ":19,"ਹਾਲ":13,"ਹਾਰ":25,"ਹਾਨ":18,"ਹਿਤ":27,"ਹੀਂ":44,"ਹਾਸ":57,"ਹਾਈ":26,"ਜਨ ":22,"਼ਨ ":23,"ਹਰਿ":36,"ਹਨਾ":25,"er ":48,"es ":28,"ਸ੍ਰ":19,"ers":16,"en ":20,"ਸਿੱ":77,"ਸਿੰ":78,"ਸੂਰ":32,"ਸੂਬ":12,"ਸਿਧ":13,"ਸਿਰ":19,"ਹਨ।":438,"ent":21,"ght":14,"ਸੀ।":71,"ਸ਼ਨ":37,"ਸ਼ਤ":17,"ਸ਼ਰ":12,"ਸ਼ਬ":26,"ਸ਼ਹ":65,"ਸ਼ੁ":34,"ਸ਼ੀ":30,"ਸਾਂ":13,"ਸ਼ਿ":19,"ਸ਼ਾ":149,"ਸ਼ਖ":29,"ਸ਼ਟ":21,"ਸਾਨ":18,"ਸਾਰ":46,"ਸਿਕ":33,"ਸਿਖ":14,"ਸਾਲ":1144,"ਸਾਹ":90,"ਸਿਟ":36,"ਸੀਅ":29,"ਸਾਇ":73,"ਸ਼ੇ":16,"ਸ਼੍":23,"ਸਿਆ":13,"ਸਰਕ":13,"ਸਮੇ":23,"ਸਮਾ":24,"ਗੜ੍":19,"ਸਰੀ":12,"ਸਲਾ":21,"ਸਤੰ":39,"ਸਦੇ":13,"ਸਦਾ":17,"ਸਦੀ":15,"ਸਨੂ":15,"ਗੁਰ":167,"ਗੋਬ":26,"for":13,"ਸਟਾ":14,"ਗ੍ਰ":502,"ਸਨ।":37,"ਚਾ ":12,"ਸਤਾ":54,"ਸਥਾ":37,"ਸਥਿ":13,"ਗਰੇ":16,"ਗਰਾ":30,"ਗਰੀ":415,"ਿਚ ":162,"ਾਹ ":17,"ਾਸ ":54,"਼ਟਰ":18,"ਿਕ ":134,"ਕੰਪ":19,"ਾਲ ":1369,"੧੬ ":14,"cti":12,"ਗਸਤ":63,"੧੭ ":15,"ਾਰ ":238,"੧੪ ":14,"੧੫ ":13,"੧੧ ":12,"ਾਮ ":62,"ਗਿਆ":104,"੧੨ ":16,"ਗਾਂ":14,"੧੩ ":12,"਼ਖ਼":29,"ਾਬ ":99,"੧੦ ":14,"ਹੱਦ":12,"ਖਾਂ":21,"ਖ਼ਸ":29,"ਖਿਆ":40,"ਾਨ ":155,"com":13,"ਗਣਿ":17,"ਿਆ ":232,"ਾਦ ":29,"੦੮ ":24,"cs ":14,"ਾਣ ":28,"ਾਤ ":23,"ਖੇਡ":37,"ਖੇਤ":37,"ਾਜ ":52,"ਖੋਂ":14,"ed ":32,"ਕੌਮ":15,"ਾਗ ":23,"ਕ੍ਰ":37,"ਸੰਸ":37,"੦੦ ":21,"ਘਰ ":13,"ਾਈ ":117,"ਸੰਖ":18,"ਕਾਂ":22,"ਸੰਬ":52,"ਸੰਤ":12,"ਕਿਤ":57,"ਾਂ ":1353,"਼ੀ ":81,"ਹੋਏ":48,"ਹੋਇ":70,"ਹੋਈ":20,"ਕਾਲ":30,"ਕਾਸ":23,"ਕਾਬ":25,"ਕਾਰ":103,"ਕੀਤ":87,"ਹੋਰ":25,"ਹੋਣ":23,"ਕਿਲ":58,"ਕਿਸ":74,"ਕਿਹ":19,"ਕੁਝ":20,"ਕੁੱ":13,"dia":15,"਼ਾ ":51,"ਹਿੰ":31,"ਹੈ।":922,"ਕੇਸ":12,"ਹੁੰ":468,"ਕੋਈ":24," ਅਪ":47," ਅਨ":20," ਆਉ":20," ਅਧ":17,"ੱਖੀ":14,"ੱਖਾ":16," ਅਮ":64," ਅਰ":39," ਅਸ":17,"ੱਖਰ":18,"ਕਦੀ":15," ਅਜ":33," ਅਗ":67," ਅਕ":54," ਅਤ":411,"ੰਸਥ":15,"ਗਾ ":22,"ਕਨੀ":13,"ੰਸਾ":14,"re ":16,"ੱਚੋ":29,"ੱਛਮ":12,"ਕਤੀ":13,"ਕਤੂ":34,"ੱਜੋ":16,"rs ":13," ਉੱ":56," ਉਹ":51," ਉਸ":69," ਇੱ":255," ਇੰ":25," ਉਪ":25," ਉਨ":48," ਉਤ":19," ਏ।":18," ਇਹ":533," ਇਸ":723," ਇਲ":20," ਅੰ":217," ਅੱ":34,"ਾ। ":18," ਇਨ":39," ਇਤ":23," ਇਥ":21,"rig":16,"ਗਰ ":22," ਆਮ":22,"ੱਖਣ":15," ਆਰ":19," ਇਕ":94," ਆਦ":14," ਆਪ":76," ਆਬ":12,"ਸਟ ":13," ਕਰ":183," ੨ ":19," ਕਲ":399," ਕਹ":19," ਕਾ":86," ਕੀ":109," ਕਿ":260," ਕੁ":68," ਗਈ":32," ਕੇ":73," ਕੈ":22," ਕੋ":64," ਕੌ":24," ੩ ":14," ਕ੍":23," ਗਏ":21,"ਕਸ਼":26," ਓਲ":31," ਕਈ":21," ੧ ":21,"ry ":17,"ਸਤ ":69,"ਕਲਾ":14,"ਵਰਗ":12,"ਕਲੰ":379," ਏਫ":13," ਏਨ":15," ਏਸ":15,"ਵਰਸ":36," ਐਗ":22," ਏਲ":14,"ਵਰਤ":32,"ਵਰੀ":76," ਜ਼":109," ਜਿ":159," ਜਾ":364," ਜੁ":64," ਜੀ":148," ਜੂ":37," ਜੇ":15," ਜੋ":96," ਜੰ":39,"ਸਨ ":38,"ਕਰਕ":17," ੮ ":14,"ਕਰਦ":41,"ੱਡਾ":23,"ਕਰਨ":55," ਚਿ":16," ਚਾ":51," ਚੁ":16," ਚੀ":38," ਚੰ":37," ਚੱ":12," ਜਰ":13," ਛੋ":14,"ਕਰੀ":18," ੯ ":15,"ਕਰੋ":16," ਜਨ":89," ਜਦ":19,"ਕਲਚ":23," ੬ ":12,"ੱਤਰ":42," ਚਲ":15," ੭ ":13,"ਗੀ ":17,"ਸਭ ":25," ਗਰ":23," ਕੰ":44," ੪ ":15," ਖੇ":84," ਗਣ":16," ਖਾ":28," ਖਿ":13,"ੱਥੇ":12," ਖ਼":15,"ੱਤਾ":19," ਘਰ":13," ਗ੍":436," ੫ ":15," ਗੋ":35," ਗੁ":188," ਗਿ":86,"ੱਤੇ":24," ਤਰ":31,"ੰਖਿ":16," ਤਾ":41," ਤਿ":24,"ੰਗਾ":15,"ੰਗਰ":20," ਤਕ":27," ਤੱ":66," ਦਰ":56," ਦੂ":37," ਦਾ":950," ਦਿ":1171," ਦੀ":419," ਦੁ":51," ਦਸ":54," ਤੌ":38," ਤੋ":497," ਤੇ":160," ਟੀ":56," ਟਰ":13,"ੰਗ੍":60," ਮਈ":37," ਬੇ":15," ਬੋ":36," ਬਾ":492," ਬਿ":25," ਬੀ":32," ਬੁ":18," ਭਗ":25," ਬਹ":34," ਫ੍":14," ਪੱ":30," ਪੰ":163," ਬਲ":14," ਬਰ":25," ਬਨ":14," ਫ਼":66," ਪੜ":16," ਫਾ":12," ਫਿ":18," ਫੁ":20," ਬਣ":71," ਪੋ":18," ਪ੍":133," ਮੌ":17," ਮੋ":24," ਮੈ":37," ਮੀ":19," ਮੁ":481," ਮਾ":131," ਮਿ":97," ਮਹ":95," ਮਸ":25," ਬੰ":15,"st ":12," ਭੌ":12," ਮਨ":31," ਮਤ":12," ਭਾ":230," ਭੀ":16," ਬ੍":13," ਭਰ":19," ਨਹ":38," ਨਿ":106," ਨਾ":302," ਨੂ":313," ਨੇ":127," ਦੱ":14," ਨਵ":51,"ਗਏ ":15," ਦੇ":1425," ਦੋ":31," ਧਰ":56," ਪੁ":69," ਪੀ":18," ਪਿ":76," ਪਾ":125," ਪੈ":40," ਪੇ":13," ਪੂ":34," ਪਰ":200," ਪਹ":57," ਪਟ":13,"ਕੋ ":15," ਲੱ":19,"ਈ ":351,"ੰਦੇ":65," ਵਖ":19,"ੰਦੀ":31,"ੰਦਾ":409," ਵਰ":63," ਵਧ":14," ਵੀ":103," ਵਿ":1204," ਵਾ":793,"ੰਦਰ":40," ਵਸ":13," ਵੇ":12," ਵੈ":16," ਵੰ":18," ਵੱ":108," ਸਕ":45,"ੰਬਰ":125," ਯਾ":23," ਰਚ":14," ਯੂ":72,"ਖੇ ":23," ਮੰ":56," ਰਾ":142," ਰਿ":33," ਰਸ":75," ਰਹ":56," ਰੇ":12," ਲਈ":76," ਰੁ":23,"str":14,"ੰਪਿ":40," ਰੂ":37," ਰੋ":38," ਲਗ":21," ਰੱ":19," ਲਾ":29," ਲਿ":55," ਲੀ":380,"ਏ ":114," ਲੇ":15," ਲੈ":20," ਲੋ":37,"Oly":12,"ਗਤ ":30,"ਲੰਪ":29,"ਲੰਡ":379,"ਲੱਗ":14,"ਂ ":2409,"ੰਜੀ":16,"ੰਜਾ":127," ਸਭ":32," ਸਬ":19," ਸਮ":69," ਸਨ":75," ਸਪ":16," ਸਰ":55," ਸਟ":13," ਸਤ":61," ਸਥ":20," ਸਦ":12,"ੰਤਰ":46," ਹਰ":55," ਸ੍":21," ਸੋ":27,"ਅ ":12," ਸਾ":1296," ਸਿ":210," ਸ਼":257,"ਖੀ ":31," ਹਨ":519," ਸੇ":33," ਸੂ":56," ਸੀ":135," ਸੁ":40," ਹੋ":233,"ੰਡਲ":14,"ੰਡੀ":26," ਸੱ":24,"ਆ ":416," ਸੰ":107,"ੰਡਾ":15," ਹਾ":41," ਹਿ":46," ਹੀ":66," ਹੁ":491," ਹੇ":15," ਹੈ":1352,"ੰਡਰ":381,"ਟ ":103,"ਝ ":29," ਏ ":29,"ਜ ":145,"ਕਸ ":17,"ਚ ":1074,"ਘ ":72,"ਹਿ ":31,"ਗ ":162," ਚ ":20,"ਹਾ ":59,"ਖ ":140,"ਕ ":1139,"ਹੀ ":112,"ਓ ":12,"ੀ। ":55,"pic":12,"ਕਰ ":44,"ਵੰਡ":19,"ਰ ":1488,"ਵੱਖ":25,"ਹੇ ":27,"ਵੰਬ":36,"ਵੱਜ":17,"ਵੱਡ":34,"ਭ ":31,"ਹੈ ":423,"ਵੱਲ":20,"ਸਕਦ":19,"ਗਈ ":25,"ਮ ":350,"ਫ ":31,"ਕੇ ":100,"ਬ ":211,"ਓਲੰ":29,"ਪ ":461,"ਧ ":64,"ਨ ":1907,"ਥ ":48,"ਕਾ ":68,"ਦ ":171,"ਕਿ ":66,"ਣ ":217,"ਕੀ ":396,"ਹੋ ":30,"ਤ ":537,"ਠ ":22,"ਡ ":132,"ਖਣ ":24,"ਾ ":2802,"ਵਿਸ":21,"ਵਾਰ":49,"ਵਿਕ":13,"ੁ ":14,"ਵਾਲ":96,"ਵਾਸ":13,"ਵਿਗ":34,"ਵਿਖ":22,"ਵਿਚ":200,"ੀ ":2902,"ਵਿਦ":16,"ਿ ":127,"ਵਾਨ":14,"ਵਾਂ":742,"ਹ ":633,"ਸਰ ":20,"਼ ":152,"ਵ ":71,"ਸਾ ":23,"ਵੇਦ":48,"ਸ ":997,"ਵੇਂ":22,"ਸ਼ ":84,"ਲ ":1735,"ਸਸ ":13,"ਵਿੱ":870,"ਸੇ ":56,"ਹਨ ":79,"ਂ।":15,"ਕਨ ":15,"ੋ ":213,"ਸੀ ":89,"੍ ":22,"ਹਰ ":16,"ੇ ":2940,"ੈ ":444,"ੂ ":303,"ਕਟ ":14,"ਰਸ਼":34,"ਰਸਾ":69,"ਰਸਿ":50,"ਰਹਿ":56,"ng ":24,"ਰਹੇ":17,"nce":16,"ne ":14,"ndi":15,"ਰਾਂ":87,"ਰਾਜ":89,"ਰਾਬ":13,"ਰਾਨ":31,"ਰਿਆ":72,"ਰਾਣ":19,"ਰਾਸ":25,"ਰਾਹ":24,"ਰਿਕ":21,"ਰਾਵ":14,"ਰਾਮ":35,"nd ":32,"ਰਿਤ":27,"ਰੀਆ":20,"ਰਮਾ":139,"ਮੰਡ":22,"ਮੰਨ":14,"ਲੇ ":121,"ਰਵਾ":29,"ਰਵਰ":37,"ਰੈਲ":39,"ਰੋਜ":15,"ਰੋਮ":30,"ਰੋੜ":12,"nte":17,"ੰਕ ":58,"ns ":12,"ਰੀਸ":12,"ਰੀਬ":18,"ਰੀਕ":94,"ਰਿਸ":18,"ਰਿਹ":15,"ੰਗ ":69,"। ":1125,"ਕਈ ":21,"ਰੂਪ":26,"ੰਘ ":71,"ਰੈਗ":367,"ਰੈਕ":12,"ਰੇਗ":15,"ਰੇਜ":79,"ੰਜ ":22,"ਲਮ ":13,"ਰਕੇ":20,"of ":41,"ਲਣ ":22,"ਰਕਾ":23,"ਐਗਰ":22,"ਰਨਾ":16,"ਲਾ ":163,"ਰਦੁ":28,"ਰਦੀ":16,"ਰਦਾ":42,"ਰਦੇ":29,"ਰਮਨ":15," ਈ ":16,"ਰਬੀ":20,"ਲੀ ":116,"ਯੋਗ":18," ਆ ":21,"on ":41,"ਰਤਾ":15,"ਰਤਿ":15,"ਰਤੀ":63,"ona":13,"ons":12,"ਯੂਨ":34,"ਯੂਰ":14,"ਯੂਲ":19,"ਲੀਪ":371,"ੱਤ ":62,"ਲੀਵ":25,"ੱਦ ":13,"ਲਾਂ":56,"ਲਾਈ":52,"ਵਲ ":18,"ਲਿਆ":33,"ਲੀਅ":22,"ld ":18,"ਲੀਆ":17,"ਲਾਵ":16,"ਲਿਖ":25,"ੱਧ ":29,"ਵੇ ":16,"ਲ੍ਹ":16,"ੱਲ ":23,"ਵਾ ":26,"ਲੋਮ":22,"ਲੋਂ":20,"ਲੋਕ":25,"ਲੋਗ":23,"ਵੀ ":125,"ਲੇਖ":15,"ਲੈਂ":19,"ੰਤ ":18,"mb ":14,"ਵਖ ":12,"ੰਡ ":46,"ਲਚਰ":23,"mer":15,"ਲਗਾ":13,"ੰਨ ":21,"lym":12,"ੰਧ ":16,"।ਇ":25,"ੰਦ ":43,"ੰਥ ":14,"mpi":14,"ਰੱਖ":17,"ੱਕ ":255,"ਰੰਥ":15,"ਵਨ ":15,"ੱਖ ":91,"mu ":47,"ੱਚ ":827,"ਆਉਂ":16,"ਅਨੁ":13,"੍ਹਾ":96,"ਇਸ ":693,"ਰਾ ":103,"ਮਨੇ":20,"ਇਹ ":527,"ਮਨੁ":16,"ਮਨੀ":13,"ਅਤੇ":407,"ਰਲ ":24,"ਭਾਈ":15,"ਭਾਗ":17,"ਭਾਰ":147,"ਭਾਵ":14,"ਭਾਸ":44,"ਅਜਿ":15,"ਇਲ ":16,"ਰਨ ":81,"ਰਮ ":38,"ਅਰਥ":18,"ਅਮਰ":51,"ਯਾ ":16,"ਬੋਲ":29,"ਰਡ ":23,"ਅਪ੍":39,"ਰਤ ":82,"ਬ੍ਰ":15,"੍ਰੋ":27,"੍ਰੇ":90,"੍ਰੈ":416,"ਰਣ ":20,"੍ਰਮ":14,"੍ਰਦ":16,"੍ਰੀ":65,"੍ਰਿ":60,"੍ਰਾ":30,"੍ਰਸ":16,"੍ਰਹ":30,"ਰਥ ":20,"੍ਰੰ":13,"ਮੇਂ":24,"ਆਨੀ":21,"ਏ। ":23,"ਆਪਣ":51,"ਆਦਿ":12,"ਆਦਾ":18,"ਆਣਾ":27,"ਮੈਨ":17,"ਮਾਨ":46,"ਮਾਤ":26,"ਮਾਣ":116,"ਮਿਕ":21,"ਮਾਰ":62,"ਮਾਲ":18,"ਮਿਥ":28,"ਮਾਂ":39,"ਇਆ।":31,"ਮੁੱ":38,"ਮਿਲ":41,"ਮਿਸ":19,"ਮੀਟ":34,"ਮੁਖ":14,"ਮੁਕ":29,"ਮੁਤ":380,"ਰੋ ":14,"ਮਹਾ":43,"ਮਹਿ":32,"ਮਹੀ":13,"ਆਵਾ":13,"ਰੂ ":145,"ਭੌਤ":12,"ਆਰਾ":46,"ਰੀ ":651,"ਬੰਧ":25,"ਰੇ ":75,"ਲਈ ":78,"ਉਣ ":23,"ਮਰੀ":43,"ਇਥੇ":16,"ਬਣਾ":43,"ਇਨ੍":18,"ਪੜ੍":12,"ਫੁਟ":13,"ਫ਼ਰ":40,"ਇਣਕ":56,"ਉਸ ":51,"ਉਹ ":40,"ਈਆਂ":13,"ਇਤਿ":19,"ਮਨ ":35,"ਇਸਦ":16,"ਪ੍ਰ":176,"ਪੈਦ":15,"ਅੰਤ":31,"ਅੰਦ":15,"ਅੰਗ":91,"ਅੰਕ":65,"ਭੀ ":15,"ਇਲਾ":16,"ਈਨਾ":22,"ਰਜ ":32,"ਰਚ ":44,"ਬੀਜ":26,"ਰਗ ":14,"ਬਿੰ":25,"ਬਾਦ":22,"ਬਾਰ":32,"ਬਾਬ":12,"ਰਕ ":18,"ਬਾਕ":372,"ਬਾਲ":27,"ymp":12,"ਈਡਰ":15,"ਬਾਅ":27,"ਬਾਈ":14,"ਉਂਦ":22,"ਬਹੁ":25,"ਭਗਤ":25,"ਮੇ ":12,"ਬਲਾ":20,"ਪੱਛ":12,"ਪੰਜ":152,"ਬਰਾ":15,"ਫ੍ਰ":14,"ਮੀ ":18,"ਮਾ ":32,"ਸਮ":81,"ਸਭ":32,"ਸਬ":22,"ਸਫ":15,"ਸਪ":32,"ਸਨ":101,"ਸਵ":19,"ਸਲ":51,"ਸਰ":94,"ਾ।":27,"ਸਟ":58,"ਸਦ":49,"ਸਤ":207,"ਸਥ":50,"ਸਕ":73,"ਵੱ":108,"ਵੰ":62,"ਵੈ":22,"ਵੇ":119,"ਉਨ੍":43,"ਵਸ":16,"ਵਿ":1234,"ਵੀ":154,"ਵਾ":1015,"ਵਨ":21,"ਵਧ":14,"ਵਰ":201,"ਵਲ":39,"ਵਖ":19,"ਲੰ":416,"ਲੱ":23,"ਲੜ":14,"ੌਰ ":34,"ਲੋ":133,"ਲ੍":24,"ਲੇ":163,"ਲੈ":43,"ਲਿ":106,"ਲਾ":368,"ਲੁ":15,"ਲੀ":580,"ਲਹ":12,"ਰੱ":21,"ਰੰ":40,"ਲਵ":25,"ਲਬ":13,"ਲਮ":25,"ਲਤ":16,"ਲਣ":25,"ਲਦ":24,"ਲਚ":23,"ਲਕ":25,"ਲਗ":28,"ਰੈ":428,"ਰੋ":128,"ਰ੍":22,"ਰੀ":839,"ਰੁ":36,"ਰੂ":205,"ਲਈ":79,"ਰੇ":213,"ਰਹ":94,"ਰਸ":179,"ਰਿ":221,"ਰਾ":502,"ਮੱ":15,"ਮੰ":60,"ਰਲ":37,"ਰਵ":92,"ੀਤ":114,"ੁਆ":59,"ੀਦ":20,"ੀਬ":34,"ੀਮ":70,"ੀਨ":92,"ੀਪ":396,"ੁਖ":20,"ੀਵ":67,"ੁਕ":56,"ੀਰ":67,"ੀਲ":39,"ੇ।":14,"ੁਝ":24,"ੁਟ":25,"ੀਸ":29,"ੁਜ":22,"ੀਆ":241,"ਿਥ":45,"ਿਦ":23,"ਿਣ":20,"ੀਅ":117,"ਿਤ":204,"ੀਂ":64,"ਿਡ":14,"ਿਟ":58,"ਿਮ":51,"ਿਬ":77,"ਬਰ ":175,"ਿਪ":16,"ਿਧ":20,"ਿਨ":1162,"ੀਗ":18,"ਿਵ":98,"ੀਕ":136,"ਿਲ":265,"ਿਰ":130,"ੀਟ":47,"ਾੜ":27,"ੀਜ":44,"ਿਹ":111,"ਿਸ":263,"ਾਡ":16,"ਇੰਟ":13,"ਾਣ":249,"ਿਅ":43,"ਾਤ":101,"ਿਆ":517,"ਾਦ":96,"ਾਧ":15,"ਿਉ":16,"ਾਨ":318,"ਿਊ":33,"ਾਪ":44,"ਾਬ":594,"ਾਮ":104,"ਾਰ":660,"ਾਲ":1569,"ਾਵ":109,"ਿਖ":72,"ਿਕ":215,"ਿਗ":45,"ਾਹ":169,"ਿਚ":202,"ਾਸ":245,"ਿਜ":25,"਼ਾ":203,"਼ਿ":116,"ਾਅ":32,"਼ੀ":109,"ਾਂ":1630,"਼ੁ":43,"ਾਈ":221,"ਾਉ":53,"਼ੇ":19,"ਾਇ":185,"਼ੋ":16,"਼ੈ":21,"ਬਲ ":16,"਼੍":25,"ਾਕ":443,"ਾਗ":63,"ਾਖ":18,"ਾਜ":126,"ਾਚ":13,"਼ਨ":40,"਼ਤ":27,"਼ਟ":24,"਼ਸ":37,"਼ਹ":70,"਼ਵ":12,"਼ਰ":65,"਼ਬ":33,"਼ਮ":21,"਼ਖ":29,"ਹੱ":25,"ਹੰ":15,"ਹੂ":17,"ਹੁ":520,"ਹੈ":1355,"ਹੇ":48,"ਹਾ":329,"ੀ।":87,"ਹੀ":203,"ਹਿ":360,"ਸੰ":167,"ਸੱ":26,"ਹੋ":237,"ਹੌ":13,"ਸੂ":66,"ਸੁ":43,"ਸੀ":222,"ਸੇ":97,"ਹਨ":550,"ਸਹ":12,"ਸਸ":15,"ਸਿ":313,"ਸਾ":1477,"ਸ਼":623,"ਹਲ":14,"ਸੋ":30,"ਹਰ":67,"ਹਮ":13,"ਸ੍":26,"ਦਸ":57,"ਦਿ":1225,"ਦਾ":1748,"ਦੁ":90,"ਦੀ":615,"ਦੂ":48,"ਥੇ":53,"ਦਨ":53,"ਥੋ":13,"ਦਰ":129,"ਤੰ":44,"ਦਲ":21,"ਤੱ":67,"ਧਾ":77,"ਨਜ":12,"ਨਡ":15,"ਧੀ":16,"ਧਿ":28,"ਨਦ":13,"ਨਤ":17,"ਦੇ":1613,"ਦੋ":54,"ਨੁਸ":12,"ਧਰ":65,"ਨਕ":44,"ਤਸ":17,"ਤਵ":19,"ਤੀ":231,"ਤੂ":46,"ਤਿ":96,"ਤਾ":728,"ਣੇ":53,"ਤਨ":20,"ਤਪ":13,"ਤਤ":12,"ਤਰ":186,"ਤਲ":37,"ਤਮ":27,"ਉਹਨ":12,"ਥੀ":15,"ਥਿ":46,"ਥਾ":70,"ਤੋ":518,"ਤੇ":631,"ਨੁੱ":16,"ਤੌ":43,"ਇੱਕ":245,"ਣਕ":72,"ਨੂੰ":329,"ਡੇ":30,"ਣਾ":137,"ਣੂ":108,"ਣਿ":62,"ਣੀ":85,"ਤਕ":48,"ਨੇਂ":14,"ਟ੍":17,"ਟੇ":26,"ਨੇਜ":20,"ਟੀ":130,"ਟਿ":29,"ਡਿ":29,"ਡੀ":68,"ਨ।":476,"ਡਾ":86,"ਡਲ":20,"ਡਰ":410,"ਡਦ":12,"ਮਰ":69,"ਮਲ":18,"ਬੰ":44,"ਮਸ":29,"ਮਹ":96,"ਮੁ":499,"ਮੀ":82,"ਮਿ":154,"ਮਾ":391,"ਮੂ":19,"ਮੈ":49,"ਮੇ":59,"ਮ੍":15,"ਮੌ":19,"ਮੋ":28,"ਰਕ":76,"ਰਖ":17,"ਰਗ":45,"ਰਚ":72,"ਰਜ":81,"ਰਟ":18,"ਯਾ":37,"ਰਡ":28,"ਯੂ":73,"ਰਣ":36,"ਯੁ":14,"ਰਥ":37,"ਰਤ":198,"ਰਦ":131,"ਰਨ":116,"ਰਫ":22,"ਰਪ":20,"ਯੋ":27,"ਰਬ":46,"ਰਮ":236,"ਪੰ":164,"ਬਲ":53,"ਪੱ":37,"ਬਰ":215,"ਫ੍":15,"ਬਹ":34,"ਭਗ":29,"ਬੁ":20,"ਬੀ":120,"ਬਿ":55,"ਬਾ":578,"ਬੋ":46,"ਬੈ":14,"ਬੇ":32,"ਮਈ":41,"ਮਕ":17,"ਬ੍":15,"ਭਰ":22,"ਮਜ":12,"ਮਤ":30,"ਭਾ":256,"ਭੀ":18,"ਭਿ":14,"ਭੌ":12,"ਮਦ":18,"ਮਨ":98,"ਪਲ":21,"ਨੰ":12,"ਪਹ":57,"ਪਸ":12,"ਪਰ":222,"ਪੂ":52,"ਪੈ":44,"ਪੇ":23,"ਪੀ":37,"ਪੁ":90,"ਪਾ":168,"ਪਿ":130,"ਬਕ":380,"ਪੋ":22,"ਪ੍":176,"ਨ੍ਹ":75,"ਫਰ":17,"ਫਲ":16,"ਬਦ":46,"ਬਨ":25,"ਫ਼":106,"ਪੜ":21,"ਫਾ":21,"ਫਿ":21,"ਫੁ":23,"ਬਣ":71,"ਨਵ":102,"ਦੱ":14,"ਨਲ":14,"ਨਰ":14,"ਨਮ":39,"ਨੇ":196,"ਨੂ":341,"ਨੀ":200,"ਨੁ":33,"ਨਾ":539,"ਨਿ":163,"ਨਸ":36,"ਨਹ":40,"ਨ੍":89,"ਨੈ":12,"ਨੋ":19,"ਪਨ":15,"ਪਣ":56,"ਪਤ":28,"ਪਟ":24,"ਬਦ ":20,"ਕਰ":232,"ਕਮ":20,"੨ ":59,"ਕਲ":439,"ਕਨ":33,"ਕਦ":33,"ਕਟ":31,"ਕਤ":65,"੧ ":77,"ਕਈ":21,"ਕੱ":19,"ਕੰ":48,"ਗਲ":25,"ਗਰ":511,"੪ ":44,"ਖੋ":29,"ਖੇ":111,"ਗਦ":22,"ਗਣ":22,"ਗਤ":41,"ਖੀ":35,"ਖਾ":81,"ਖਿ":54,"ਖ਼":60,"ਕੜ":15,"ਕ੍":37,"ਗਏ":21,"ਖਰ":32,"ਕੌ":24,"੩ ":43,"ਕੋ":104,"ਗਈ":32,"ਖਦ":15,"ਕੈ":22,"ਕੇ":135,"ਕੁ":80,"ਕੂ":15,"ਖਣ":34,"ਖਤ":12,"ਕਾ":323,"ਕਿ":322,"ਕੀ":517,"ਕਹ":19,"ਕਵ":12,"ਕਸ":64,"ਏਲ":14,"ਐਗ":22,"ਪਹਿ":45,"ਏਸ":17,"ਓਲ":31,"੦ ":87,"ਜੇ":38,"ਜੈ":24,"ਜੋ":120,"ਜ਼":304,"ਜਾ":547,"ਜਿ":213,"ਜੀ":234,"ਜੁ":64,"ਜੂ":46,"ਪਿਤ":20,"ੰ ":328,"ਪਾਸ":19,"ਪਿਕ":29,"ਜਦ":23,"ਪਾਰ":18,"ਜਧ":12,"ਜਨ":116,"ਪਿਊ":14,"ਪਾਣ":18,"੯ ":52,"ਛੋ":17,"ਚੱ":12,"ਪਾਕ":32,"ਚੰ":38,"ਜਲ":16,"ਜਰ":27,"ਟਬ":13,"ਟਨ":25,"ਪਾਈ":14,"ਟਾ":78,"ਟਰ":121,"ਜੰ":41,"ੋਗਰ":27,"ਗੜ":29,"੬ ":50,"ਚਕ":12,"ਗਸ":65,"ਗਾ":75,"ਗੂ":16,"ਗੁ":197,"ਗੀ":24,"ਗਿ":132,"ਪੂਰ":40,"ਗੇ":24,"ਗ੍":505,"ਗੋ":52,"੫ ":50,"ਘਰ":19,"ਚਿ":28,"ਚਾ":99,"ਚੁ":16,"ਚੀ":48,"ਚੇ":17,"ਪਿੰ":41,"ਪੁਰ":63,"ਚੋ":42,"ਚੌ":23,"੮ ":73,"ਛਮ":12,"ਜਗ":13,"ਚਨ":19,"ਚਰ":32,"੭ ":49,"ਚਲ":24,"ਅਤ":454,"ਆਂ":297,"ਅਜ":34,"ਅਗ":69,"ਅਕ":61,"ਆਣ":33,"ਇਆ":135,"ਆਖ":12,"ਅਸ":23,"ਅਮ":113,"ਅਰ":63,"ਅਲ":18,"ਅਦ":42,"ਆਇ":12,"ਅਧ":17,"ਆਉ":22,"ਅਨ":55,"ਅਪ":47,"ਪਣੇ":34,"ਈਆ":13,"ਇਥ":21,"ਇਤ":25,"ਇਣ":62,"ਆਸ":22,"ਪਣੀ":15,"ਆਰ":95,"ਆਮ":24,"ਇਕ":101,"ਆਵ":17,"ਆਲ":16,"ਆਨ":54,"ਆਦ":35,"ਆਬ":13,"ਆਪ":78,"ਆ।":54,"ਬਾ ":22,"ਂਕ":18,"ਂਗ":31,"ਈ।":12,"ਂਟ":25,"ਂਡ":36,"ਂਸ":30,"ਂਤ":24,"ਂਦ":244,"ਉੱ":56,"ੋੜ ":16,"ਬੀ ":78,"ਏਨ":15,"ਏਫ":13,"ਇਨ":48,"ਅੱ":34,"ਇਲ":37,"ਅੰ":220,"ਈਟ":14,"ਇਹ":542,"ਮਈ ":41,"ਇਸ":741,"ਉਂ":46,"ਈਡ":27,"ਈਨ":32,"ਏ।":33,"ਬੇ ":13,"ਉਦ":21,"ਉਨ":51,"ੋਇਆ":71,"ਉਣ":28,"ਉਤ":19,"ਉਪ":26,"ੜ ":44,"ਇੱ":255,"ਇੰ":32,"ਉਸ":76,"ਉਹ":56,"ਊਟ":14,"ਪਰਮ":113,"ਨਕਸ":15,"ਪਤ ":12,"ਧਰਮ":31,"ਧਰਤ":22,"ਪਰ ":49,"ਧਾਂ":14,"ਉੱਤ":40,"ੋਬਿ":25,"ਦੁਆ":48,"ਦਿੱ":27,"ਦੁਨ":24,"ੋਮੀ":24,"ਦੂਸ":12,"ਦੂਜ":16,"ੋਮਨ":25,"੦੦":66,"੦੮":25,"੧੦":19,"੧੩":12,"੧੪":15,"੧੧":14,"੧੨":18,"੧੭":17,"੧੮":24,"੧੫":17,"੧੬":17,"੧੯":42,"੨੧":14,"੨੦":53,"ਦਾਂ":14," ੧੮":24," ੧੭":16,"ਦਾਨ":17," ੧੬":17," ੧੫":16," ੧੪":15," ੧੩":12," ੧੨":16," ੧੧":13,"ੋਲੀ":19,"ਦਾਰ":26," ੧੯":41," ੨੦":53," ੨੧":14,"ਦਿਆ":31," ੨੭":12," ੨੬":12," ੨੯":17," ੨੮":14," ੨੩":13," ੨੨":14," ੨੫":13," ੨੪":14,"ਦਾਸ":19," ੩੦":25," ੩੧":16,"ਦੀਆ":75,"ਦਿਨ":1097,"umb":16,"੍ਹ ":21," ੧੦":19,"ਦੋਂ":17,"੩੧":18,"੩੦":26,"um ":31,"੨੯":17,"੨੮":15,"੨੭":13,"੨੬":13,"੨੫":14,"੨੪":14,"੨੩":13,"੨੨":14,"ੱਤ":172,"ੱਢ":13,"ੱਡ":45,"ੱਧ":49,"ੱਦ":25,"ੱਥ":25,"ੱਗ":23,"ੱਕ":286,"ੱਖ":185,"ੱਛ":18,"ੱਜ":30,"ੰਸ":52,"ੱਚ":879,"ੱਟ":28,"ੱਠ":21,"ੱਲ":52,"ੱਸ":22,"ੰਥ":18,"ੰਤ":85,"ੰਧ":36,"ੰਦ":618,"ੰਡ":502,"ੰਬ":155,"ੰਮ":32,"ੰਨ":45,"ੰਪ":49,"ੰਕ":75,"ੰਖ":19,"ੰਗ":218,"ਦੇਸ":44,"ਦੇਵ":25,"ੰਜ":177,"ੰਟ":22,"ੰਘ":85,"ਨਵਰ":42,"ਨੀਆ":32,"ਨੀਅ":13,"ਨਿਵ":62,"ਨੀਕ":15,"ਨਿਆ":24,"ਬਕ ":378,"ਨਾਨ":32,"ਨਾਲ":179,"ਨਾਵ":16,"ਨਿਕ":18,"ਨਾਮ":38,"ੜ੍":38,"ਨਾਂ":88,"ੜੀ":29,"ੜਾ":29,"ੜੇ":20,"ty ":13,"ਨਸੰ":13,"ਨਹੀ":36,"ਏਫ ":13,"ਨਵੰ":36,"ੁਦ":13,"ੂਆ":13,"ੁਨ":42,"ੁਤ":412,"ੁਣ":26,"ੁਮ":18,"ੁਰ":279,"ੁਸ":47,"ਿੰ":258,"ੁਲ":97,"ਿੱ":1026,"ੈ।":922,"ੂਜ":17,"ੂਨ":78,"ੂਦ":14,"ੂਰ":124,"ਏਲ ":13,"ੂਬ":50,"ੂਪ":31,"ੂਸ":23,"ੂਲ":53,"ੁੰ":490,"ੁੱ":117,"ੂੰ":335,"ੇਂ":94,"ੇਕ":12,"ੇਖ":36,"ੇਦ":53,"ੇਤ":52,"ੇਡ":47,"ੈਂ":49,"ੇਟ":14,"ੇਜ":108,"ੇਗ":24,"ੇਰ":47,"ੇਲ":44,"ੈਕ":39,"ੇਵ":52,"ੈਗ":377,"ਧਾਰ":30,"ੇਨ":27,"ਧਾਨ":15,"ਧਿਆ":15,"ੈਣ":13,"ੈਦ":24,"ੇਸ":109,"ੈਟ":17,"ੈਲ":73,"ੈਰ":17,"ੈਨ":38,"ੈਸ":23,"ੋਂ":617,"ੋਇ":77,"ੋਈ":46,"tio":34,"thu":15,"ੋਟ":27,"ੌਂ":47,"ੋਡ":13,"ੋਜ":32,"ੋਧ":14,"ੋਨ":32,"ੋਪ":22,"ੋਣ":30,"ੋਤ":20,"ੋਏ":51,"ੋਗ":55,"ੋਚ":14,"ੋਕ":33,"ੌਜ":14,"ੌਤ":23,"ੋਮ":61,"ੋਬ":36,"ੋਲ":71,"ੋਰ":92,"ੋਵ":23,"ੋਹ":24,"ੋਸ":21,"ੋੜ":31,"ੌਮ":17,"ੌਰ":50,"ted":14,"੍ਹ":146,"੍ਰ":834,"ter":25,"the":71,"ਆ। ":38,"ਤੂਬ":34,"ੇਲ ":13,"ੇਰ ":12,"ਤੀਆ":19,"ੇਸ ":12,"ਤਿਹ":21,"ਤਿਆ":30,"ਤਾਨ":49,"ਤਿਕ":18,"ਤਾਬ":383,"ਨਕ ":14,"ਤਾਰ":18,"ਤਾਂ":61,"ੇਵ ":19,"ਨਮ ":32,"ਤੌਂ":25,"ਤੋਂ":485,"ੈਨ ":18,"ਤੌਰ":15,"ੈਲ ":41,"ਨਾ ":113,"ਥਾਂ":13,"ਥਿਤ":12,"ਥਿਹ":26,"ਥਾਨ":18,"ਦਸੰ":39,"ਨੀ ":107,"ੇਗਰ":13,"ਦਰਿ":26,"ਨੇ ":143,"ਤੰਬ":41,"ਤੱਕ":12,"ਤੱਤ":53,"ਂਟ ":12,"ੇਜੀ":27,"ੇਜ਼":73,"ੇਡਾ":24,"ਂਡ ":22,"ੈਂਡ":21,"ੇਡਦ":12,"ਂਗ ":18,"ੇਦਨ":48,"ੇਤੀ":16,"ੇਤਰ":23,"ੋਂ ":614,"ਤੋ ":20,"ਣਕਾ":14,"ਤਕਨ":13,"ਥੇ ":48,"ਦਨ ":52,"ੈਕਟ":16,"ੋਈ ":39,"ce ":16,"ੈਗਰ":366,"ੋਏ ":45,"ਥਾ ":23,"ੇਵਾ":19,"ੇਸ਼":80,"am ":12,"ੋਕ ":20,"al ":27,"ਣਿਤ":17,"ਣਿਆ":40,"and":32,"amu":47,"an ":23,"ੈਦਾ":18,"ਦਰ ":34,"ਣਾਇ":20,"ੌਂ ":41,"ੋਣ ":17,"ਦੋ ":25,"ਤਰਾ":24,"ਤਰੀ":36,"ੋਪ ":15,"ਦੇ ":1528,"at ":19,"ਦੀ ":510,"as ":15,"ੋਰ ":44,"ੋਲ ":12,"ਦਾ ":1619,"ati":17,"ਦਿ ":15,"ੜਾ ":14,"ੁਟਬ":13,"ੀਸਟ":12,"ੀਵਨ":13,"ੀਵਰ":32,"ੁਕਾ":27,"ਤਕ ":19,"ੜੀ ":26,"ੀਮਾ":12,"�":48,"ੁਰਦ":34,"ਡਦੀ":12,"ੜੇ ":18,"ੁਨਿ":12,"ੁਨੀ":20,"ਣੀ ":69,"ੁਤਾ":380,"ਣਾ ":86,"ਡਰਾ":13,"ਣੂ ":105,"ਣੇ ":51,"ਆਂ ":277,"ੁਸ਼":14,"ਂਦਰ":14,"ਂਦੀ":34,"ਂਦਾ":168,"ਂਦੇ":20,"ੈ।ਇ":14,"ੂੰ ":327,"ਤਰ ":83,"ਅਨ ":34,"ਡਾਂ":31,"ੁਰੂ":142,"ੁਰਾ":23,"ੁਰਸ":21,"ਿੰਦ":69,"ਿੰਡ":45,"ਿੰਘ":68,"ਿੰਗ":49,"ਡੀਅ":12,"ਿੱਤ":44,"ਿੱਧ":18,"ਿੱਖ":61,"ਅਤ ":35,"ਿੱਚ":863,"ੁਲਾ":52,"ੁਲੀ":15,"ਅਦ ":27,"ੂਰਜ":24,"ੂਰਬ":14,"ੂਬਰ":34,"ਤਾ ":184,"ਤੀ ":175,"ੂਨਿ":14,"ੂਨੀ":19,"ਤੇ ":613,"ਜੰਤ":20,"ਜੰਗ":15,"ਡਰ ":388,"ਆਨ ":22,"ੂਲੀ":19,"ਇਆ ":100,"ਆਪ ":24,"ਅਮ ":45,"ੁੱਖ":54,"ਅਰ ":17,"ੁੰਦ":474,"ਡਲ ":18,"ਟਬਾ":13,"ਡੀ ":25,"ਡਾ ":35,"ਨ। ":77,"ੇਂ ":52,"ਡੇ ":21,"ੜ੍ਹ":37,"ਟਰੀ":23,"ਟਾਂ":13,"ਟਾਇ":14,"ਅਕਤ":41,"ਅਕਾ":15,"ਅਗਸ":63,"ਆਮ ":19,"ਆਰ ":21,"ਣਕ ":58,"ਟਿਆ":13,"ਇਕ ":86,"ਟੀਮ":54,"ਟ੍ਰ":17},"n_words":[112478,136533,89577],"name":"pa"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/pl b/nlp_resource_data/langdetect/profiles/pl

new file mode 100755 (executable)

index 0000000..139647c
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/pl
@@ -0,0 +1 @@
+{"freq":{"D":76546,"E":48349,"F":59499,"G":97533,"A":113161,"B":112599,"C":110974,"L":74803,"M":131493,"N":86269,"O":63924,"H":50582,"I":84977,"J":54373,"K":109538,"U":33818,"T":71466,"W":122172,"V":29111,"P":225853,"S":204600,"R":87715,"Y":5329,"X":17841,"Z":57656,"f":154643,"g":760584,"d":1304112,"e":3541226,"b":467514,"c":1792078,"a":3833729,"n":2529442,"o":3537464,"l":1140017,"m":1150421,"j":965827,"k":1483844,"h":558914,"i":3732766,"w":2322567,"v":42704,"u":1069834,"t":1619824,"s":1819258,"r":2204575,"p":1082020,"z":1828688,"y":1372315,"x":13069,"é":11522,"á":5335,"ü":5350,"ö":4590,"ó":396123,"ę":271422,"ć":60605,"ą":370153,"ś":279181,"Ś":15543,"ń":123462,"ł":591480,"Ł":9157,"ż":255164,"Ż":6256,"ź":20372," l":139321," m":204189," n":260999," o":286725," h":33638," i":177645," j":136928," k":213385," d":243257," e":37159," f":50076," g":203060,"р":5358," a":148427," b":95754," c":133679," z":321847," u":97002," t":172994," w":969288," p":661390," s":339763," r":209751," J":52915," K":104698," H":47495," I":52640," N":80062," O":56243," L":69035," M":123296," B":105665," C":84648," A":95983," F":55238," G":73890," D":70098," E":42554," Z":55304,"к":5011," X":11746,"и":6265,"о":7740,"н":5938," S":182443," R":76715," P":214389,"а":9102," W":115231," V":20154," U":30651,"е":5223," T":64394," ż":19337," Ż":6158,"ęśc":13317," ś":51887," Ś":15346," Ł":9064," ł":17467,"A ":12056,"Da":8795,"Cz":10287,"Co":11705,"Ce":7894,"Ch":21362,"Ci":4668,"G ":10044,"Du":4576,"Do":17200,"Dr":5145,"De":9899,"Di":5692,"GC":12708,"Fe":4746,"Eu":8774,"Ge":6043,"Ga":8896,"I ":22055,"Fr":25795,"Fo":4852,"Fi":6166,"B ":5264,"C ":19133,"Au":8116,"Ar":11839,"Ba":23485,"Am":7571,"An":13752,"Al":15057,"Bu":9080,"Br":16403,"Ca":12445,"Bi":14211,"Be":12406,"Bo":15278,"Ku":5947,"Gó":8233,"Kr":17850,"Ko":31811,"Le":13922,"Li":15420,"La":13916,"Lu":9458,"Lo":10236,"Me":12645,"NG":5663,"Mi":29200,"O ":4778,"Ma":41474,"Mu":5320,"Mo":18522,"Ni":19737,"Ne":7072,"Na":24418,"P ":7412,"No":17077,"Ol":5489,"Od":7399,"PG":5183,"Ob":6870,"Gm":6336,"Gr":14758,"Go":7919,"Ha":11794,"He":11337,"II":18254,"Hi":5668,"Ho":9461,"In":12447,"Ja":16186,"L ":4866,"Je":18291,"Jo":8139,"Ka":25307,"M ":4990,"Ki":5813,"Un":7211,"VI":4920,"W ":15791,"Tu":5606,"Tr":10180,"To":9177,"Th":7956,"Te":9915,"Ta":10132,"V ":6364,"Sz":17267,"Sy":6237,"St":34755,"Su":6926,"Wo":12341,"Ws":4692,"Wi":28290,"Wa":16039,"We":8452,"Vi":5052,"X ":6678,"Pu":5975,"Pr":25703,"S ":11205,"Pe":7756,"Pa":34023,"Po":99382,"Pi":15484,"Os":6465,"Or":5855,"Se":13365,"Sc":7388,"Si":11311,"Sk":5337,"Sp":7038,"So":10398,"Ru":5061,"Rz":5267,"Sa":22530,"Re":17869,"Ro":19242,"Ra":13392,"b ":46793,"a ":1141880,"Wy":14854,"Za":17995,"Sł":20200,"i ":569208,"bó":4844,"gd":5266,"ge":34328,"ga":71454,"ać":8134,"fi":50271,"fr":12808,"fu":7503,"fo":23920,"j ":229252,"bę":5925,"có":12082,"gw":10035,"he":43730,"ha":50953,"gn":13137,"gm":84581,"gl":15240,"bą":7905,"gi":77017,"gh":5124,"gu":37274,"gr":81461,"go":201815,"du":56828,"dw":18508,"dy":56165,"dz":249442,"g ":46202,"ea":22981,"eb":20189,"ec":208980,"ed":135395,"de":106238,"dd":7144,"di":38677,"dk":20908,"dm":14590,"dl":41406,"do":162506,"dn":125390,"dp":5891,"ds":17240,"dr":43512,"ew":111327,"eu":15671,"ev":5608,"ey":6513,"ez":85170,"fa":15386,"h ":260015,"fe":16025,"eg":232853,"ef":12705,"ee":8458,"el":150281,"ek":109837,"ej":291522,"ei":21799,"ep":45340,"eo":16834,"en":264211,"em":150465,"et":92062,"es":172911,"er":319668,"ca":105725,"e ":876959,"by":41730,"bs":19604,"br":47012,"bu":41705,"bn":8407,"bo":39861,"bl":23444,"bi":69531,"be":48450,"dc":10689,"db":7638,"da":129820,"f ":12810,"cz":335815,"cy":107837,"cu":14533,"ct":15111,"co":51690,"cn":22110,"ck":80920,"ci":275500,"cj":115414,"ch":414464,"ce":179393,"c ":34764,"az":93181,"ay":6838,"ba":43868,"d ":139962,"at":193861,"as":161357,"ar":314898,"aw":119971,"av":7852,"au":36829,"ak":99776,"al":185695,"ai":24623,"aj":148717,"ap":33608,"am":123297,"an":453452,"ac":219676,"ad":149992,"ab":34080,"ag":38473,"ah":5887,"ae":10185,"af":22452,"nu":23194,"nt":96712,"ns":36134,"nr":4954,"no":152706,"nn":38516,"ny":249928,"oe":4808,"of":20662,"oc":101234,"od":293823,"oa":8273,"ob":82692,"ię":123546,"om":108472,"on":318937,"ok":137945,"ol":232069,"oi":39360,"ją":114675,"oj":102542,"og":59938,"oh":7680,"ot":70221,"os":163642,"ov":6489,"ou":18871,"op":86592,"oo":8474,"or":254276,"ję":19551,"r ":119247,"ow":537395,"kó":27850,"oz":62053,"pe":42396,"pa":107114,"pc":8415,"pl":33619,"pn":11150,"po":424408,"ph":5594,"ił":21358,"pi":86460,"ką":16568,"iń":19637,"lo":81679,"ln":79071,"lm":9683,"ll":32626,"ls":113022,"hó":9384,"lu":76951,"lt":13727,"o ":461084,"dź":5330,"eś":75573,"mc":10115,"ma":121066,"mb":17385,"me":89660,"mi":333776,"mn":11655,"mm":6313,"mp":24445,"mo":86555,"ms":11588,"mu":41658,"ió":8421,"my":18150,"p ":18753,"na":529472,"nb":5437,"nc":75955,"nd":63112,"ne":249253,"eż":40953,"nf":7374,"ng":54651,"gł":47489,"ni":692138,"ią":60315,"nk":45573,"jw":6916,"ju":20103,"js":79998,"jn":43747,"jo":26898,"jm":12943,"dł":15854,"ki":412466,"ke":10550,"kc":19574,"ka":229229,"m ":343638,"kw":12402,"gó":15509,"ks":37081,"kt":82034,"ku":103176,"ko":241820,"kr":97482,"kl":22672,"km":9933,"eł":15052,"li":209435,"eń":19329,"lk":31238,"le":182455,"ld":11857,"lg":4525,"la":173548,"lc":6468,"lb":16636,"n ":138864,"hr":17682,"dó":7283,"ht":5694,"hu":10241,"cą":8049,"ań":46393,"ał":115023,"hi":35646,"hn":15999,"ho":81700,"id":59864,"ic":201937,"aś":6694,"ib":17573,"ia":315115,"ig":21890,"if":5037,"ie":1136068,"hy":6565,"k ":111343,"ir":31982,"is":139600,"it":68750,"iu":39428,"iv":6169,"iw":23202,"ii":58678,"ij":14562,"dą":5425,"ik":74901,"il":54561,"im":169852,"in":280897,"io":133204,"ip":17035,"aź":5844,"jc":7619,"aż":13250,"je":192771,"jd":20070,"ji":76024,"iz":35250,"l ":52041,"ja":94283,"są":9617,"tó":52700,"wz":9495,"wy":175884,"só":9957,"rę":14692,"z ":208589,"oż":86148,"wi":428233,"pł":15785,"rą":23256,"wk":11864,"wn":80692,"wo":213117,"wr":13531,"ws":100565,"wu":6452,"ró":58140,"y ":457874,"wa":271461,"oś":108251,"wc":28885,"we":125766,"oń":26624,"oł":121062,"vi":10565,"uz":15301,"uw":4835,"ve":14040,"va":7846,"x ":7519,"ui":5984,"uj":65669,"uk":36207,"ul":35319,"ue":10441,"ug":25222,"ur":117881,"us":71220,"ut":47463,"um":45758,"un":53367,"up":30095,"ty":157784,"tz":6072,"tu":85903,"tt":12443,"pó":31075,"tw":127984,"ub":63557,"ua":10045,"ud":56663,"uc":35683,"w ":734288,"to":228890,"tn":41685,"tl":10672,"ts":8380,"tr":134088,"te":190777,"kż":7374,"tk":33051,"ti":40445,"th":19993,"nę":6037,"ta":271329,"su":26228,"ss":16223,"st":440956,"sy":40789,"sz":185716,"sw":7100,"sl":4597,"sk":362869,"sn":10482,"sm":8241,"sp":72745,"so":57024,"sc":108490,"se":50637,"sh":8501,"ną":21831,"sj":10343,"si":154595,"rz":330244,"u ":285894,"mę":6084,"sa":75170,"rr":8977,"rs":83617,"rt":71318,"ru":79284,"nó":11750,"rw":35761,"ry":128901,"rp":13436,"ro":332963,"rn":56890,"rm":40806,"rl":14366,"rk":29749,"ri":79907,"kł":24507,"rg":38504,"rf":5604,"re":197175,"iż":7681,"rd":33902,"rc":41834,"jś":4511,"rb":12273,"ra":338168,"t ":152714,"lę":5780,"mó":8836,"lą":12242,"s ":109715,"kę":4736,"py":18332,"pt":9892,"pu":38310,"pr":207712,"ps":10578,"ył":33312,"yń":8163,"zą":41404,"zę":35066,"uż":24054,"wł":15632,"zó":6325,"wę":7097,"uł":11203,"wą":11216,"wó":72388,"zg":11821,"zi":142936,"sł":33435,"zb":26540,"zc":34117,"zd":18869,"ze":326759,"tę":19569,"za":220009,"yz":12669,"zw":54934,"zy":216261,"zr":6357,"zu":31373,"zt":75735,"zo":93735,"zn":149848,"zp":13878,"zk":54785,"zj":15781,"zm":18594,"zl":6622,"yg":10175,"yf":6219,"yc":219665,"yd":32633,"yb":20679,"yw":52492,"yt":47612,"ys":92766,"yr":14041,"yp":20913,"yn":66941,"ym":103256,"yl":20573,"yk":74193,"yj":45803,"tą":4964,"yż":9849,"yś":4897,"zł":13341,"ów":161116,"ób":6381,"ój":5872,"ód":78413,"óc":5492,"ór":65730,"ól":16369,"ć ":59245,"ąd":23612,"ąc":135745,"ą ":117382,"óż":8608,"ół":34199,"ęb":8256,"ęc":19527,"ęd":32895,"ęg":11685,"ęk":11873,"ęp":16514,"ęt":22906,"ęs":9306,"ę ":96973,"ęz":11858,"ęś":18258,"ęż":7758,"ąg":22092,"ąs":12009,"ąt":12097,"ąz":17763,"ąż":21246,"ł ":59292,"łu":50120,"łt":4875,"ły":40224,"łk":15386,"łn":20167,"ło":189649,"łe":22223,"łc":7515,"ń ":16610,"ła":119873,"ńs":75332,"ńc":31072,"łó":33659,"śl":22153,"śn":19502,"śr":30296,"św":24072,"śc":92180,"Św":8984,"ś ":45778,"ść":40444,"łę":5560,"łą":10057,"źn":6441,"źd":5042,"ż ":20942,"żu":6315,"ży":40208,"żo":72896,"żn":19632,"żs":5910,"że":31935,"ża":27767,"żą":12050,"ąża":15191,"ęci":10430,"ędz":16930,"ędu":4976,"ęzy":9477,"ępu":8551,"ęks":6674,"ęst":6004,"łec":8332,"ła ":38802,"łan":5477,"ład":27517,"łac":9469,"ław":16966,"łas":4900,"łoż":68106,"łoś":19952,"ły ":21321,"ływ":5534,"łu ":9627,"łoń":16090,"łud":14450,"ług":11020,"łow":21447,"łos":9246,"łon":8256,"łno":15191,"łod":6015,"łka":8122,"ło ":21533," Ga":8827," Ge":5971," I ":4633," Fo":4786," Fr":25744," Fi":5918," Ha":11737," He":11280," Go":7898," Gr":14678,"ńcó":5655," Gm":6328," Ho":9424," II":9642," Hi":5632," Je":18244," Ja":16067," In":12303," Ka":25219," Ki":5713," Jo":8117," La":13845," Le":13833," Li":15290," Ko":31759," Kr":17816," Ku":5884," Gó":8232," Ma":41192," Mi":29080," Me":12575," NG":5574," Lo":10195," Lu":9428," Ne":6981," Na":24184," Ni":19681," Mo":18451," Mu":5268," Am":7370," An":13699," Al":15025," Ba":23126," Au":8090," Ar":11761," Be":12343," Bi":14133," Bo":15194," Br":16331," Bu":9050," Ca":12220," Ce":7873,"ńcz":4853," Ci":4561," Ch":21239," Co":11557," Cz":10255,"ńce":15534," Da":8706," Di":5639," De":9843," Dr":5105," Do":16986," Du":4568," Eu":8764," Fe":4580," Wy":14786," Ws":4674," Wo":12135," Wi":28155," We":8391," Wa":15970," Sł":20157," Za":17851," a ":33564," Os":6455," Or":5834," Po":99137," Pi":15394," Pe":7708," Pa":33893," No":17025," Ol":5468," Od":7369," PG":4658," Ob":6845," Ra":13288," Ro":19091," Re":17774," Pr":25548," Pu":5949," Sz":17219," Sy":6223," Su":6907," St":33766," Ta":10078," Th":7821," Te":9806," Tr":10135,"ńst":9625," To":9099,"ńsk":65260," Ru":5052," Sa":22468," Rz":5260," Si":11239," Sc":7241," Se":13251," So":10324," Sp":6969," Sk":5318," Vi":4980," Tu":5482," W ":13679," Un":6885," ja":24161," je":81709," im":8378," in":25684," is":7255," ka":27845," ki":9981," gw":7366," j ":15130," ha":5671," he":5463," gm":81906," gr":42165," go":8008," hi":10476," hr":4991," gł":25015," ni":54044," na":181345," mu":9539," mo":21735," ok":44273," on":4611," og":5234," od":78508," of":7838," ob":36134," no":11764," le":15807," li":34396," la":36395," ku":10205," kt":33550," ks":6999," kw":7071," km":9336," kl":10638," kr":29471," ko":64410," me":15598," mi":86248," o ":18463," ma":52176," lu":42219," lo":7327," ad":8368," am":10571," an":21235," ak":7205," al":15945," au":9619," ar":12965," as":16623," ba":14299," bi":14788," be":7689," bo":7340," by":20334," bu":7038," br":13094,"łuż":7054," ca":5242," el":9782," fa":6215," fu":5153," fr":6875," fo":9292," fi":17828," ge":6292," ga":16786," i ":123913," co":7528," ce":14033," ch":18489," ci":24028," da":17665," cy":4833," cz":55270," do":83523," dn":7531," dl":8990," dr":18598," de":38500," di":6649," dw":10226," du":6950," dz":27524," dy":8440," zm":8262," zo":12577," zn":26389," zw":23230," za":89284," zd":5616," ze":19904," zb":9872," zi":4717," sł":12543," są":8630," z ":107756," wy":79384," wz":8136," wł":13150," uż":7103," ru":4802," ry":6135," rz":19343," sa":14213," se":14117," si":84234," sk":26716," sp":31645," so":11139," ra":15542," re":46718," ro":89060," pu":7297," pr":169964," os":21891," ot":4775," op":16859," or":30563," r ":7257," ję":8026," oz":4712," pe":8239," pa":50520," pl":22703," po":329900," pi":36353," wa":17275," we":29944," wc":10065," ró":17486," wr":10056," wo":81271," ws":27715," wi":85470," pł":9284," w ":591873," ty":20926," tw":5981," pó":15190," tu":7399," us":6046," ut":8073," ur":32556," uk":7357," ul":5488," ta":20050," sw":5466," sz":26144," sy":14406," st":72132," su":7093," tr":22707," to":40581," th":4944," te":42334,"łów":28362," ła":6906,"łąc":7474," Św":8979," śr":24528," św":19258," że":5562," ży":7572,"GC ":12479,"Eur":7453,"Fra":21145,"II ":13664,"Her":4920,"Gra":4795,"Gmi":6178,"Bar":4693,"Bra":4520,"Cha":6684,"Dol":5831,"Nie":14925,"Now":7712,"Nor":6043,"PGC":4549,"Pie":5284,"Par":14525,"Poł":4889,"Pro":6911,"Prz":9834,"Pod":4669,"Pol":63410,"Pow":4846,"Rad":5296,"Jan":4677,"Jes":5251,"Kar":5182,"Kon":5190,"Kra":7526,"Koś":5328,"Gór":8184,"NGC":5540,"Mar":15306,"Mon":4650,"Mie":5310,"Mis":5057,"Wys":5223,"Wie":13213,"War":8050,"Sło":18142,"Str":5084,"Sta":19220,"Sie":4994,"Sai":4713,"Sch":5152,"Ros":4617,"Uni":6296,"The":5470,"bio":14193,"bli":16339,"bor":8274,"bow":6707,"bar":9707,"bec":8656,"ber":14109,"bel":6901,"bez":6239,"bia":7547,"bie":21077,"ca ":84053,"cac":4624,"był":19691,"ce ":116313,"bro":10887,"bra":11568,"bry":5366,"brz":9909,"bsz":5466,"bst":6710,"bur":11294,"bum":8181,"bud":9663,"by ":10708,"aka":5857,"am ":8178,"akc":4640,"aki":8717,"ajo":5413,"ajm":4898,"ajw":6681,"aju":12289,"al ":15001,"aja":6999,"ajd":19542,"aje":5093,"ain":11213,"ak ":12839,"aj ":9008,"agr":6880,"ago":6833,"ają":56423,"anu":7044,"any":57351,"ano":19191,"ann":5995,"ant":15544,"ans":10106,"ane":52267,"ang":21057,"ani":105534,"ank":9718,"ana":52659,"anc":30280,"and":28542,"amo":13297,"amp":4556,"ami":37349,"ame":32849,"ama":9031,"alo":8151,"aln":38047,"all":6281,"ali":35299,"ale":28164,"ala":17506,"alb":10365,"an ":25897,"akt":18470,"ako":21085,"abi":5944,"abs":7005,"ae ":4965,"ad ":24951,"ac ":5802,"afi":15590,"aga":5833,"ado":9262,"adm":7603,"adi":7361,"ade":12127,"ady":8896,"adz":20389,"ack":10335,"acj":42190,"aci":10932,"ach":91172,"ada":31955,"acz":24570,"acy":17521,"azo":11989,"azu":9191,"azw":14194,"aza":4626,"azd":9097,"azy":7482,"az ":24099,"ba ":9607,"at ":32096,"are":9332,"ard":17168,"arc":24556,"ara":29532,"aro":21478,"arn":16904,"arm":8931,"arl":5792,"ark":13034,"ari":18485,"aru":4729,"anó":4796,"ars":25559,"art":41017,"asa":22529,"ary":12822,"arz":32133,"asi":8550,"aso":5103,"ask":9308,"ar ":9731,"api":8704,"apo":6161,"as ":15649,"aut":8273,"awa":20666,"aws":12931,"awn":15933,"awo":12547,"awi":30734,"asz":13814,"ata":29924,"ść ":40186,"ast":57235,"asy":6947,"atk":6138,"atr":8063,"ato":27910,"ate":18972,"akż":7374,"ati":9455,"aw ":7482,"atu":20800,"aty":21623,"auk":5346,"Świ":8845,"ści":91962,"śni":16799,"śli":5074,"śro":7633,"śre":20628,"ślą":5357,"świ":21339,"jeg":6331,"jej":4751,"jed":34575,"jen":6642,"jew":58578,"jes":38109,"ji ":74801,"jal":7148,"jak":18567,"jaw":9057,"aźd":4510,"je ":30364,"jdu":17294,"jmu":7356,"jna":5939,"jny":19293,"jne":11196,"jow":13446,"jon":8504,"ito":6446,"ity":14838,"isk":17265,"ist":59047,"isz":10261,"ita":8740,"ite":12873,"iwe":4858,"iwi":4661,"ius":4598,"ium":6118,"is ":13674,"ion":48352,"iop":4645,"ior":18228,"ios":6430,"iot":6025,"ipc":5025,"ikó":5855,"iow":26535,"isi":4518,"isa":8456,"iu ":24955,"ire":4546,"ira":6326,"ja ":38334,"izy":4891,"izo":6649,"izm":4986,"iza":9287,"kim":128411,"kic":24931,"kie":126958,"dłu":9193,"km ":8293,"ki ":116429,"kań":16086,"kcj":11696,"kra":24036,"kre":18070,"kry":8710,"krz":7284,"ku ":72509,"kro":6037,"kow":64383,"kos":4831,"kor":8298,"kop":10012,"kon":28370,"kom":18077,"kol":18048,"kok":4910,"klu":5529,"ko ":39683,"kle":4640,"kla":7237,"jsk":40065,"jsz":8170,"ju ":15477,"jsc":28565,"kaz":5339,"kat":10533,"kar":21380,"kan":15572,"kal":9168,"kam":6459,"kad":5909,"kac":12564,"ka ":110422,"ha ":5601,"han":8170,"har":13309,"he ":11732,"her":6722,"ał ":24279,"cą ":7445,"ań ":4989,"ałe":8027,"ała":29091,"ało":22494,"his":8897,"ały":11839,"ańc":7994,"ańs":33252,"go ":160963,"god":4635,"gni":4919,"gmi":81864,"gos":5577,"gor":5388,"gow":8194,"gu ":24037,"gro":9171,"grz":5388,"gry":6845,"gru":20718,"gra":31694,"gwi":7275,"ców":11188,"iaj":4940,"iam":4548,"ial":8076,"ian":22005,"ias":25407,"iar":9378,"iat":20282,"ic ":6481,"iac":9488,"iad":9916,"iaz":7546,"id ":16421,"ia ":151774,"iet":14221,"iew":12649,"iel":59211,"iem":53337,"ien":34053,"ier":72550,"ies":27602,"ied":28632,"ieg":53169,"iek":15875,"iej":135012,"iec":108952,"icy":13868,"ict":6284,"icj":4509,"ick":18455,"ici":5695,"ich":42480,"ice":16432,"ie ":425948,"ica":18194,"ide":5884,"ida":21189,"icz":59703,"ijs":6419,"im ":137877,"ika":22144,"ii ":57658,"ibą":4979,"iał":33757,"ik ":16857,"imp":4698,"imi":11049,"inc":8772,"ind":7280,"ina":57100,"inn":9746,"ino":8623,"int":11381,"ins":5688,"ine":11157,"ież":14661,"ing":19340,"ini":80940,"iny":19314,"iko":7564,"iki":9122,"in ":24060,"ilo":5237,"ill":10795,"ień":7511,"ilm":5161,"ili":7909,"ieś":51901,"io ":7198,"how":11191,"hol":4719,"hor":6372,"hod":42292,"hni":9648,"hra":5522,"dów":6742,"fia":9588,"ewó":56727,"ez ":38609,"ews":7573,"eze":8964,"ezi":7185,"ezj":6360,"eta":10069,"etn":9079,"esp":11621,"eso":4594,"est":61386,"esz":24722,"eto":22583,"etr":10559,"ety":8345,"ew ":5885,"ewi":12221,"ewo":9270,"ewn":8260,"ewa":5038,"er ":34400,"epa":15408,"eją":6188,"es ":24102,"epu":5035,"epr":8058,"eri":17872,"erg":8537,"ere":16329,"erc":5702,"era":33404,"erb":5967,"et ":10917,"esk":6857,"esi":11275,"esa":4705,"erz":21989,"ery":23381,"eru":5751,"erw":27139,"ert":7815,"ers":24326,"ern":18633,"erm":6952,"erp":6669,"ero":30451,"eki":4817,"eko":5980,"eks":10430,"ekt":18859,"eku":6941,"en ":29550,"ela":11200,"ele":25422,"eli":15142,"eln":10771,"elk":19485,"ell":8172,"elo":10644,"elu":7577,"els":11724,"emc":9224,"ema":7081,"eme":7011,"emo":5689,"emi":25674,"ene":12261,"ena":8388,"end":7106,"enc":29197,"eno":4952,"enn":11782,"enk":5319,"eni":82488,"egł":18236,"ens":6693,"ent":47936,"ego":161252,"egi":24719,"ej ":196844,"egu":4666,"ek ":37209,"ein":6239,"el ":16434,"ejs":51321,"ejo":11481,"ejm":7156,"eje":7376,"eka":10766,"em ":73512,"gio":21748,"gie":12191,"gic":6767,"gii":7756,"gia":4982,"bą ":6293,"gi ":9182,"gen":12957,"gel":5153,"gar":6319,"gat":9377,"gaj":4881,"gal":8113,"gan":14815,"ga ":14947,"fun":4925,"fra":5853,"for":16996,"fic":8573,"fil":7378,"fik":6148,"ać ":7866,"czą":12269,"da ":43439,"czł":5966,"de ":15481,"dby":5063,"czę":22437,"dal":5635,"daj":6034,"dar":8045,"dan":24834,"daw":15145,"dcz":6407,"ctw":8655,"cy ":54622,"cus":4654,"cym":5360,"cyj":21911,"cyc":12366,"cz ":10143,"czy":47263,"czk":8277,"czn":94844,"czo":18714,"cza":38835,"cze":61835,"cki":56501,"chó":9144,"co ":6133,"cni":7419,"cią":23459,"cne":4800,"cny":4546,"cow":30428,"cję":4623,"cez":4969,"ch ":250098,"cer":7119,"ces":6923,"cen":13171,"cej":5254,"cel":7686,"ceg":6128,"ci ":63750,"cha":21398,"cia":18501,"cie":119729,"che":16574,"chi":12341,"cho":63783,"chn":12122,"chr":6017,"ciw":5743,"cja":35582,"ciu":5042,"cin":7038,"cio":9246,"cka":9993,"cji":61836,"ed ":7038,"ebi":5142,"ec ":12319,"dzą":8566,"ega":9608,"edl":5574,"edn":51333,"ede":12015,"eda":5582,"edz":17029,"edy":9486,"eds":6095,"ecj":5624,"eck":33379,"ech":31282,"eci":73659,"ece":7263,"ecz":26497,"ect":5183,"ecn":7076,"dyn":9974,"dys":5848,"dyc":5708,"dy ":20176,"dzy":15488,"dzt":56173,"dzo":7609,"dzk":13570,"dzi":114188,"dze":17343,"dza":11866,"dor":6162,"dom":7183,"dol":8096,"dok":4665,"doz":5829,"dow":41632,"dos":5363,"dmi":12552,"dna":7598,"dne":6153,"dni":85396,"dno":17791,"dny":5925,"dob":7583,"dst":8461,"duj":19180,"duk":7030,"dra":4842,"drz":4777,"du ":16409,"dro":12893,"dru":8195,"dia":9387,"der":13407,"del":4853,"dek":8141,"den":21584,"dem":8020,"dep":15043,"dle":22301,"dla":14281,"dko":7298,"do ":56353,"dio":7032,"die":6315,"rga":13673,"rgi":6819,"ret":5375,"res":16392,"rez":11929,"rg ":8389,"rdz":8372,"rea":6346,"rec":6681,"red":24223,"rej":11845,"reg":27968,"rem":8867,"ren":18517,"rek":5432,"rep":5124,"rcz":7138,"re ":17858,"rci":7329,"rch":9881,"rce":4547,"rca":6614,"raw":21807,"raz":29604,"rd ":6953,"ras":7862,"rat":14245,"raj":22570,"rai":5724,"ran":52894,"ram":15553,"ral":18585,"rak":14728,"rab":9860,"raf":17050,"rad":12311,"rac":33149,"rpn":4743,"ros":15610,"rot":7718,"rom":11621,"ron":23271,"rop":15650,"roz":22873,"row":46481,"rob":7374,"rod":59324,"roc":14309,"roj":6342,"roi":15518,"rol":7056,"rok":41872,"rog":11103,"rno":5279,"rny":6200,"rna":10748,"rne":9659,"rni":18640,"ro ":7330,"rma":13967,"reś":7847,"rmi":12636,"rla":5043,"rki":6506,"rka":6181,"riu":5278,"rii":14143,"rin":4886,"ria":19003,"kła":22407,"ric":4906,"rie":5586,"rwo":5268,"rws":10740,"nów":11177,"rz ":14438,"ryb":4752,"ryc":13818,"rug":6274,"rud":6357,"ruc":5354,"rup":16289,"run":6993,"rum":5890,"ruk":5290,"rus":7391,"rwa":7707,"rwc":4554,"ry ":30832,"rsk":40769,"rsz":11669,"rta":19705,"rst":9077,"rto":10205,"rte":6255,"rti":6031,"rtu":4870,"rty":7882,"rt ":8136,"ru ":8305,"rzę":9880,"sad":8969,"sam":12218,"san":8261,"sar":4951,"sa ":26422,"rzą":15714,"rze":166268,"rza":14480,"rzc":6541,"ryw":7519,"rys":7800,"ryt":14172,"ryk":17536,"rym":6312,"ryn":5422,"rzy":76075,"rzo":14189,"ną ":19254,"si ":6951,"sie":37561,"sia":7216,"sk ":6051,"sin":6607,"se ":4707,"sce":55140,"sch":21250,"sco":24974,"ser":11407,"sen":9224,"spo":28928,"spr":6284,"spe":6485,"spi":8532,"sow":15205,"son":7183,"sok":5885,"się":66198,"sob":7483,"su ":8612,"skł":11877,"st ":49786,"ski":241685,"sko":33991,"skr":5762,"sku":9283,"ska":44791,"sią":6562,"sz ":5749,"syn":5333,"sys":6083,"sza":29086,"stę":16122,"sze":30322,"szc":22167,"szp":5112,"szo":8648,"szt":14601,"szk":19152,"szy":32568,"ste":40839,"sta":110434,"stn":10874,"sto":55229,"sti":7248,"stk":11925,"stu":7827,"spó":13799,"stw":35135,"str":55283,"sty":30683,"sy ":5848,"tak":16147,"tal":13415,"tac":30341,"tad":4771,"taw":14561,"tat":11340,"tar":27950,"tan":40782,"tam":18213,"te ":12262,"ta ":47407,"jęz":8303,"pa ":7030,"ową":6832,"ję ":5820,"pca":4745,"par":31402,"pas":19500,"pac":4930,"pad":8610,"pal":4646,"pan":7407,"ką ":14644,"pań":7582,"pec":4830,"per":13425,"pej":4544,"pla":21906,"ple":4628,"ińs":18739,"pie":29091,"iłk":7468,"pio":5728,"pir":4743,"pis":14610,"poz":13437,"pow":94565,"por":15922,"pop":9036,"pot":7235,"pos":20600,"poj":4594,"pom":22714,"pon":6930,"pok":5372,"pol":56274,"poc":15903,"pod":44935,"pił":8661,"po ":9878,"pni":8916,"pub":6917,"pra":25179,"prz":116916,"pu ":5270,"pre":13822,"pro":44748,"put":4642,"puj":8407,"poł":82723,"py ":16720,"ląs":7826,"mów":5303,"ra ":38610,"ngi":4912,"ni ":23063,"nge":9484,"ią ":10996,"neg":51936,"nej":47963,"nek":13069,"nem":5441,"ner":9906,"net":21971,"nes":4536,"ndy":5147,"ng ":20546,"eży":7778,"nci":16716,"ncj":34860,"nce":9629,"ne ":77120,"eż ":13347,"ndr":6665,"ndo":5824,"ndi":8916,"nde":9089,"nda":6965,"nak":5404,"nal":20391,"nam":4778,"nan":15192,"nap":5117,"nar":14632,"nac":26022,"nad":18221,"nag":7300,"naj":37748,"nd ":10560,"nau":6824,"nat":12135,"nas":8986,"naz":14802,"naw":6217,"na ":315412,"moż":6857,"nyc":66044,"ny ":143123,"noś":21837,"nty":9827,"nto":7930,"ntu":6194,"ntr":10458,"nta":17677,"nte":16419,"nst":10393,"nu ":12191,"nt ":17005,"ns ":4556,"noc":23507,"nom":6299,"nos":13706,"nor":4839,"now":33236,"nne":10038,"nna":4663,"nni":6637,"nię":6603,"nny":11159,"głó":21438,"no ":23520,"nki":7495,"nkc":4543,"nka":8221,"nku":6543,"nko":6939,"eżą":8181,"iąg":20115,"iąz":16848,"nii":15486,"nie":303754,"nic":58316,"nia":115515,"niz":10946,"niu":18651,"nis":21291,"nio":33347,"gło":21325,"nim":8023,"nin":5902,"nik":44866,"ogr":14773,"ogi":15434,"ogo":4557,"oga":7462,"ją ":20069,"oid":31094,"ok ":8415,"ojs":6215,"ojn":8775,"oje":68149,"jąc":92228,"oce":7209,"och":19137,"oci":8093,"ock":7255,"ocn":14524,"obs":6969,"oby":5467,"ode":8229,"odk":11049,"odl":22987,"odo":21990,"odp":5300,"odn":35244,"ods":5690,"odr":4605,"ocz":25360,"of ":6324,"odc":6738,"odb":6509,"oda":13640,"odz":61637,"ody":9005,"odu":13447,"ofi":6001,"ięk":9725,"ięc":10576,"ięd":13216,"ięt":12440,"od ":47389,"obo":8696,"obr":12863,"obl":4516,"obn":4740,"obi":10703,"obe":11907,"nym":35914,"ię ":65182,"owy":75588,"osó":4510,"ków":24138,"ows":40460,"own":11322,"owo":53873,"owi":138750,"ozy":6621,"ozw":5341,"ozn":9119,"osł":6618,"ozb":6448,"oty":9500,"ote":7310,"otr":5518,"oto":11888,"otn":8681,"osz":15296,"ost":51897,"ota":6378,"osi":14132,"osk":7450,"ose":9237,"osp":5848,"oso":13740,"oró":5598,"owc":6201,"owa":124010,"owe":69144,"opo":27225,"opi":9321,"ope":10070,"opa":11132,"os ":8863,"opu":5185,"opr":8595,"or ":15758,"ork":4605,"orm":16889,"orn":6157,"oro":17951,"orc":4621,"ord":8495,"ore":9264,"org":11803,"ori":12364,"osa":8380,"ort":15205,"ors":22187,"oru":7711,"orz":34348,"ory":13310,"ora":31522,"ola":10017,"on ":32944,"oli":42609,"ole":26115,"ols":92465,"oln":14866,"olo":22365,"olu":5605,"oka":12522,"om ":7335,"oki":5566,"okr":39604,"oko":19282,"ogó":4810,"oku":40113,"ona":86386,"ond":4877,"one":29526,"oni":52759,"onk":6206,"onn":4626,"ono":16680,"ons":8848,"ont":10876,"onu":6969,"ony":38958,"oma":14351,"ome":9149,"omi":22956,"omp":9870,"omo":23578,"omu":5636,"la ":29248,"le ":27634,"lac":9698,"lak":7036,"lan":38327,"lam":5773,"lar":10967,"lat":37023,"las":14320,"ld ":4729,"lbu":8442,"koł":12944,"kul":5213,"kuj":7417,"kwi":7243,"krą":16281,"koś":9426,"kró":6396,"kte":5141,"ksz":11351,"ksi":5778,"kty":11231,"ktr":5362,"ktu":7021,"kto":8566,"krę":7083,"gól":4564,"gór":7545,"któ":37445,"lok":4711,"lon":16670,"log":15244,"lot":5895,"low":11925,"lno":13692,"lni":16273,"leż":15711,"lne":15681,"lny":18798,"lna":11882,"lud":5047,"lub":38824,"lsk":52175,"lu ":15294,"lsc":50391,"li ":18475,"lew":7878,"les":10216,"let":6569,"ler":6678,"lem":11032,"len":14544,"lek":14961,"lej":10577,"leg":25572,"lec":7985,"lla":5033,"lle":8878,"lli":5642,"lko":12509,"eńs":10006,"lka":5102,"lki":9010,"ll ":4740,"lit":23937,"lis":17192,"lip":7716,"lin":28801,"lim":6337,"liz":9545,"liw":7216,"lic":38891,"lia":7515,"eń ":8305,"lik":8729,"lii":5868,"ma ":13583,"mac":9833,"eś ":42823,"maj":8752,"mar":13792,"mas":6086,"mal":6141,"man":12874,"maz":13085,"mat":15538,"me ":5269,"eśc":9065,"eśn":12795,"eśl":8074,"mcz":8811,"met":12557,"mer":18450,"men":33777,"lut":5364,"hód":6854,"mpi":5561,"miń":6040,"moc":8677,"mod":4841,"mon":7419,"mow":13205,"mor":22691,"mu ":12663,"msk":7954,"my ":8132,"muj":8914,"mun":5245,"muz":6245,"mał":6431,"mi ":36519,"min":109517,"mis":7482,"mit":5081,"mic":8508,"mia":35513,"mie":85656,"mię":17160,"mni":5567,"wą ":9465,"wód":59175,"wór":6020,"źdz":4969,"zta":5990,"ztw":56537,"zu ":8060,"zuj":5309,"zur":6895,"zy ":52536,"zwa":16748,"zwi":19757,"zwy":6194,"zyw":7464,"zys":27664,"zym":15419,"zyn":18487,"zyk":17972,"zyl":5653,"zyj":5225,"zyc":26077,"zyd":4698,"zył":4808,"zi ":13438,"zał":9317,"zgr":4705,"zec":34126,"zed":16305,"zeg":13280,"zej":12005,"zeb":5093,"zdo":9053,"zes":26033,"zez":38687,"zew":14699,"zen":40191,"zem":9711,"zel":6475,"zek":13631,"zer":16763,"ze ":43444,"zch":5824,"zbi":11943,"zcz":25210,"zac":28997,"zaw":17852,"zaj":18917,"zam":9131,"zan":18287,"zak":6986,"zal":7485,"zar":15092,"zap":6541,"zas":21035,"zny":34365,"zos":17618,"zon":31868,"zow":26765,"zpo":4827,"ześ":12448,"zmi":5909,"zna":56249,"zno":5266,"zne":31393,"zni":17047,"zm ":5171,"zka":12896,"zko":15029,"zki":16987,"zeń":5484,"zib":8100,"zia":21275,"sła":11498,"zie":58529,"zin":14757,"sło":8768,"zio":8015,"słu":9392,"zja":5084,"zji":8382,"yty":7484,"ytu":9487,"yto":6663,"źni":4961,"yte":4862,"yta":5297,"ysz":7085,"yst":44231,"yso":5982,"ysp":7012,"ysk":11600,"tęp":15379,"za ":37664,"yzn":5243,"ywa":26508,"ywi":7128,"ywn":5067,"yce":6751,"ych":133947,"yci":10363,"ycz":50193,"yda":15003,"żaj":15742,"yck":5423,"ycj":6652,"że ":16017,"yjs":11565,"yka":23821,"ym ":69059,"yki":7812,"ykl":4610,"yko":9261,"yn ":8183,"yli":7761,"ymi":14584,"yms":4916,"yna":17425,"yni":13954,"yno":4822,"żen":5078,"yk ":13655,"yjn":27661,"tów":16736,"tór":35547,"są ":7387,"ożo":65228,"oży":4895,"oże":6845,"wy ":42246,"wsp":8605,"wsz":18473,"wst":10921,"wsc":10075,"wsk":48535,"rąż":16283,"wys":19278,"wym":21711,"wyk":10398,"wyn":5788,"wyd":14033,"wyb":6431,"wyc":28770,"sów":5741,"woś":26886,"wo ":32774,"wna":7537,"wne":21421,"wni":31749,"wią":20802,"wno":4837,"wka":4779,"wrz":6674,"wod":23078,"wię":20120,"wny":9160,"wow":8334,"wor":17209,"wol":5953,"woj":77026,"wcz":7095,"ośl":6671,"wch":7307,"we ":41958,"ośc":57589,"wca":6919,"wer":9502,"wej":33041,"weg":17678,"wed":4775,"wał":10230,"ość":33539,"wi ":5607,"pły":9060,"wis":10022,"wiz":5342,"wie":266248,"wid":6688,"wic":20177,"win":12633,"wia":38637,"wa ":75032,"wan":92067,"wal":11772,"waj":5843,"wat":12396,"war":32065,"wac":6719,"róż":7313,"wad":9110,"rów":28327,"ród":6783,"ról":5670,"oła":8333,"oło":72193,"ołe":8524,"ołu":20038,"ońc":21153,"ver":4892,"uzy":7699,"usk":8960,"usz":14475,"ust":14884,"ute":10402,"utw":6376,"uto":11126,"us ":13666,"ura":9619,"ure":5261,"urg":10533,"uro":16796,"urs":7537,"ury":7673,"urz":9377,"ują":40031,"upa":5160,"ur ":29893,"upy":9651,"umi":4996,"ume":5036,"unk":16732,"uni":8824,"und":6279,"une":6969,"uko":7594,"um ":20351,"uka":4715,"ult":6004,"uli":7259,"ula":8673,"uje":18646,"uja":4953,"ugi":6634,"ucz":7047,"udn":21514,"uch":15770,"udo":10709,"udz":6914,"ub ":30727,"pół":22236,"ubl":8802,"ube":7781,"tyw":7322,"tyj":4614,"tyk":20395,"tyl":6759,"tym":7939,"tyn":8462,"typ":6891,"tys":5969,"tyt":6677,"twó":5271,"ty ":25638,"twa":27679,"tur":19565,"tun":9184,"tyc":48848,"two":26167,"pól":4883,"twi":61091,"tre":5469,"tra":36373,"tri":6420,"tru":11588,"tro":23044,"trz":32044,"tu ":25361,"try":9123,"to ":54716,"tni":28308,"toc":7538,"toi":15874,"tos":10191,"tow":38410,"tom":6513,"ton":10929,"tok":8683,"tol":14627,"tor":34275,"top":9511,"tin":4571,"tio":8819,"tki":10465,"tko":7816,"tka":9207,"tle":5081,"tem":19837,"ten":10208,"tej":7771,"tek":9091,"tel":10074,"kże":7372,"teg":14724,"tec":7568,"ter":68917,"the":6053,"tał":27215,"żąc":10258,"zło":10911,"ył ":11114,"ży ":9032,"zęś":17553,"życ":6860,"yła":8647,"żyn":4577,"żyw":6989,"yły":6907,"yńs":6994,"żni":7269,"żon":68291,"zęd":7177,"zęs":4784,"ższ":5421,"ząd":13275,"ząc":15081,"zą ":5842,"ząt":4518,"uży":12473,"zów":5144,"wła":9785,"óżn":6877,"ół ":8476,"ółn":14744,"ów ":112668,"óra":6553,"óre":15215,"órn":8319,"óry":15904,"ór ":5776,"ówk":5666,"ówn":37744,"ódz":62547,"ód ":10612,"ób ":4595,"óln":9187,"ągu":15768,"ądz":6409,"ącz":8602,"ący":47223,"ące":25280,"ąca":47593,"ązk":9275,"ąza":4528,"ąsk":8772,"ątk":5004},"n_words":[44927968,50956492,36530760],"name":"pl"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/pt b/nlp_resource_data/langdetect/profiles/pt

new file mode 100755 (executable)

index 0000000..bbdf3ed
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/pt
@@ -0,0 +1 @@
+{"freq":{"D":84998,"E":149956,"F":100967,"G":92106,"A":261572,"B":132089,"C":229572,"L":100908,"M":162595,"N":83569,"O":115743,"H":53006,"I":86169,"J":66643,"K":27233,"U":54108,"T":95611,"W":30243,"V":63770,"Q":7457,"P":198289,"S":200312,"R":102796,"Y":7968,"X":13707,"Z":9504,"f":420930,"g":646820,"d":3123972,"e":5421795,"b":522239,"c":1736812,"a":5855900,"n":2824771,"o":4680584,"l":1547279,"m":2105049,"j":94292,"k":164391,"h":490405,"i":3472398,"w":44026,"v":454134,"u":1899338,"t":2255026,"s":2945228,"r":3035316,"q":191465,"p":1075380,"z":175309,"y":94919,"x":113550,"²":81071,"É":13693,"Á":9519,"í":209507,"ê":67610,"é":451892,"è":5136,"ç":214936,"ã":317848,"â":37738,"á":187832,"à":26465,"ü":5538,"ú":40758,"ô":23605,"õ":33734,"ó":129413," l":122890," m":228220," n":368310," o":307851," h":135081," i":114948," j":44060," k":90787," d":1636882," e":668130," f":265709," g":86172," a":553690," b":90195," c":638137," u":471782," t":166023," v":73530," q":119308," p":552163," s":358148," r":175170," J":65167," K":25787," H":50638," I":64806," N":77349," O":109788," L":96197," M":155484," B":122895," C":211790," A":232527," F":95595," G":81500," D":78496," E":140343," Z":9058," Y":7429," X":9721," S":182999," R":96218," Q":6965," P":187689," W":28455," V":54881," U":50226," T":85637," á":66191," à":25954," é":287636," ú":5989," Á":9484," É":13626,"A ":78627,"Da":9542,"Cu":6462,"Cl":8427,"Co":62457,"Cr":10302,"Ce":12150,"Ch":23975,"Ci":8714,"Ed":5016,"Do":10375,"De":23490,"Di":17511,"GC":5835,"Fe":14358,"Fa":9224,"Eu":7410,"Es":76180,"En":6707,"Em":8917,"El":11312,"Ge":12507,"Ga":11449,"I ":12898,"Fu":6893,"Fr":18796,"Fo":24543,"Fl":6042,"Fi":9055,"C ":14112,"Au":8700,"Ar":20685,"At":7248,"As":11784,"D ":5749,"Ba":33758,"Ag":5894,"Ab":6380,"Ac":5079,"Am":13589,"An":22486,"Al":38766,"Bu":7329,"Br":31392,"Ca":66526,"E ":5464,"Bi":7032,"Be":17271,"Bo":16310,"Le":17360,"Li":18877,"La":20252,"Lu":9478,"Lo":21075,"Me":18166,"NG":5897,"Mi":22675,"O ":59245,"Ma":62167,"Mu":14513,"Mo":23160,"Ni":6791,"Ne":10091,"Na":16341,"P ":5903,"No":30608,"Ol":8785,"Gi":5566,"Gr":19098,"Go":9769,"Gu":13341,"Ha":12782,"He":10653,"II":8816,"Hi":6333,"Ho":11804,"In":21460,"Ja":20297,"Je":5208,"Jo":23876,"Ju":10807,"Ka":6332,"Um":5798,"Un":17690,"Tu":6480,"Tr":12997,"To":12084,"Th":10714,"Ti":5247,"Te":19234,"Ta":10759,"UA":15545,"V ":6036,"St":11159,"Su":24311,"Wi":7399,"Sã":16301,"Wa":7041,"Vi":19753,"Va":11369,"Ve":14368,"Pr":23453,"S ":9133,"Pe":21497,"Pa":48708,"Pl":5522,"Po":57527,"Pi":15167,"Os":8721,"Or":9792,"Se":27899,"Sc":6387,"Si":12578,"Sh":5677,"So":14955,"Sa":39239,"Re":33199,"Ri":19155,"Ro":19693,"Qu":6140,"T ":5229,"Ra":8949,"b ":50847,"a ":2181662,"i ":168232,"ge":70306,"ga":87148,"bé":16913,"fl":8986,"fi":74478,"fr":59032,"fu":27785,"fo":117850,"dá":5256,"he":73854,"ha":210111,"gn":21302,"cê":5480,"gl":18191,"cç":8441,"gi":116568,"gh":7414,"gu":120239,"gr":61974,"cí":25911,"go":74169,"du":48733,"g ":26553,"ea":109055,"eb":27192,"ec":114155,"ed":73900,"de":1384296,"di":203085,"dm":36748,"do":671969,"ds":6146,"dr":29810,"ew":5596,"ex":56960,"eu":52678,"ev":46416,"ey":9665,"ez":25288,"fa":47754,"h ":22234,"fe":50703,"eg":161615,"ef":22289,"ee":12045,"el":215678,"aç":106998,"ej":12312,"ei":173784,"ep":64753,"eo":27810,"en":615409,"em":301673,"et":118849,"es":604827,"er":508227,"eq":14954,"aí":13564,"ca":322979,"e ":2017914,"bs":7191,"br":100433,"bu":28485,"bo":46844,"bl":22439,"bi":100249,"be":49470,"da":652111,"f ":9921,"cu":52185,"ct":31468,"cr":50241,"co":536282,"ck":15123,"cl":33604,"ci":328384,"ch":65018,"ce":215952,"c ":14270,"az":18902,"ay":13866,"ba":74690,"d ":43293,"at":195898,"as":441529,"ar":395911,"aq":7417,"av":49201,"au":56223,"ak":8531,"al":413966,"ai":134662,"aj":6551,"ao":31258,"ap":60289,"am":235425,"an":557322,"ac":100712,"ad":561485,"ab":137795,"ag":58484,"ah":7769,"ae":25110,"af":14102,"nu":24993,"nt":522402,"ns":188107,"no":344725,"nn":15103,"nz":5976,"ny":7562,"nv":17155,"oe":16285,"of":27077,"oc":103353,"od":80574,"oa":21115,"ob":40172,"om":360608,"on":341990,"ol":141927,"oi":109737,"oj":7341,"og":52000,"oh":5263,"ot":49005,"m²":81019,"os":507875,"ov":77341,"ou":126561,"op":64022,"oo":11470,"or":489375,"r ":303972,"ox":5797,"ow":11301,"oz":5717,"pe":192004,"lá":12510,"pa":207724,"pl":31072,"lé":9270,"lê":6741,"po":262183,"ph":9247,"pi":69522,"lo":175844,"lm":34639,"ll":48668,"ls":10434,"lp":6052,"lv":20781,"lu":42856,"lt":38629,"ly":6623,"o ":2000634,"ma":495019,"mb":74173,"iã":65284,"me":284803,"iá":6420,"iç":26049,"mi":135455,"mm":5698,"mp":92721,"mo":132550,"mu":111484,"p ":8620,"na":437221,"nc":193122,"nd":299787,"ne":119195,"nf":17343,"ng":76996,"nh":95890,"ni":224974,"nj":6755,"nk":6105,"ju":23595,"fí":5753,"jo":21637,"ki":12050,"ke":10814,"ka":8676,"m ":610401,"ko":5815,"gé":5229,"km":88160,"gê":12280,"li":257137,"lh":65430,"le":173629,"ld":17597,"lg":14230,"lf":5772,"la":234272,"lc":7172,"lb":14242,"n ":112737,"ht":6380,"hu":13446,"hi":44146,"dê":5692,"ho":87126,"dé":6382,"id":307392,"ic":326922,"ib":24713,"ia":390572,"ig":76460,"if":27051,"ie":76108,"k ":20205,"ir":178791,"is":322059,"it":260738,"iu":12350,"iv":114919,"ix":17582,"ik":5904,"eç":8830,"il":161901,"im":114838,"in":395415,"io":223313,"ip":55696,"je":12713,"iz":80563,"l ":281164,"ja":26393,"xi":25832,"tê":5442,"xo":8355,"té":18873,"xp":7291,"tí":13967,"tó":26791,"xt":10508,"z ":28371,"xc":16696,"xa":14045,"tã":10541,"tâ":11963,"tá":22795,"xe":8953,"sã":39079,"wi":7047,"sé":18935,"sí":5599,"ró":40309,"y ":44219,"wa":10938,"sá":5639,"ré":9925,"rç":11146,"vi":94338,"rã":8523,"vr":9835,"rí":29707,"rê":8332,"vo":58913,"uz":14799,"ux":5541,"uv":5858,"ve":130840,"rá":18536,"va":112071,"x ":15240,"ui":98859,"uj":5584,"ul":126725,"ue":171782,"ug":35554,"ur":125836,"us":87304,"ut":96190,"um":493013,"un":209874,"up":26206,"ty":5162,"tu":132302,"tt":15069,"pó":5919,"ub":41087,"ua":116112,"ud":26863,"uc":23327,"w ":7530,"pú":7028,"to":326748,"pé":22989,"tl":9288,"ts":8324,"tr":260066,"te":557844,"ti":259190,"th":25055,"ta":456660,"su":99840,"ss":133040,"st":452789,"sl":7670,"sk":8628,"sm":23244,"sp":74972,"so":134893,"sq":6495,"sd":6659,"sc":72024,"se":292294,"sh":13868,"si":227032,"u ":130375,"sa":133204,"sb":7374,"rr":74864,"rs":43158,"rt":193480,"ru":48286,"rv":18129,"ry":9625,"ní":7223,"rq":17132,"rp":10793,"ro":344915,"rn":53960,"rm":73617,"né":7825,"rl":15891,"nç":36309,"rk":7248,"ri":409856,"nã":10618,"rg":47427,"rf":6634,"ná":10363,"re":480208,"rd":53701,"rc":51264,"rb":27665,"ra":596837,"t ":61702,"mú":6929,"qu":189373,"mí":22028,"mé":15139,"má":8216,"mã":9155,"s ":1150995,"pt":9571,"pu":45299,"ló":14756,"lí":26351,"pr":175784,"ps":6678,"zi":14725,"ze":25298,"vá":5473,"za":80538,"zo":10261,"ví":24973,"ya":6552,"ys":5731,"yr":7114,"uí":16711,"uê":10113,"uç":8683,"² ":81055,"É ":12059,"ã ":5975,"ál":15193,"ác":6980,"ád":5021,"áv":6927,"áx":6998,"ár":84913,"át":10444,"ás":9114,"âm":6223,"ân":30468,"ão":308442,"à ":22252,"á ":25281,"ós":7667,"ôm":5559,"ôn":14217,"ói":30866,"óg":6061,"ód":6423,"ór":23808,"óp":8429,"ón":16555,"ól":8310,"ív":5979,"ín":37401,"ím":8935,"íp":23888,"ío":18396,"ít":18007,"ís":21982,"íf":5629,"íl":22187,"íc":11953,"íd":15269,"çõ":21574,"çã":131394,"ên":31853,"êm":5074,"ês":26627,"él":7493,"ém":24476,"én":6312,"és":8228,"ét":9564,"ér":30738,"éd":7542,"éc":31275,"ço":21084,"ça":38992,"é ":308148,"ún":5650,"ús":10148,"úb":7903,"õe":33653," Ga":11344," Ge":12440," Fo":24420," Fu":6872," Fr":18748," Fi":9007," Fl":5988," Ha":12738," He":10614," Go":9722," Gr":18935," Gu":13279," Gi":5513," Ho":11760," Hi":6304," Je":5193," Ja":20267," In":21384," Ka":6274," Jo":23796," Ju":10788," La":20143," Le":17264," Li":18789," Ma":61882," O ":56830," Mi":22595," Me":18100," NG":5699," Lo":21011," Lu":9458," Ne":9995," Na":16245," Ni":6760," Mo":23061," Mu":14430," A ":56821," Am":13551," An":22425," Al":38669," Ag":5878," Ac":5031," Ab":6362," Ba":33333," Au":8681," At":7218," As":11678," Ar":20591," Be":17207," Bi":6949," Bo":16224," Br":31284," Bu":7302," Ca":66026," Ce":12123," Ci":8645," Ch":23864," Cl":8347," Cr":10239," Co":62169," Cu":6320," Da":9487," Di":17388," De":23395," Do":10149," El":11281," Es":76102," En":6655," Em":8884," Eu":7403," Fe":14330," Fa":9149," Wi":7332," Sã":16295," Wa":6979," a ":159392," Os":8676," Or":9772," Po":57372," Pl":5480," Pi":15147," Pe":21311," Pa":48469," No":30533," Ol":8779," Ra":8887," Qu":6094," Ro":19582," Re":33096," Ri":19127," Pr":23377," Su":24284," St":10640," Ta":10684," UA":14219," Th":10656," Ti":5222," Te":19124," Tr":12898," To":12004," Sa":39159," Sh":5584," Si":12511," Sc":6287," Se":27797," So":14864," Va":11341," Ve":14300," Vi":19683," Tu":6372," Um":5787," Un":17669," ja":9410," im":11343," in":69292," il":7613," it":11439," jo":14428," ju":16927," ha":94775," gr":27016," go":7301," gu":5619," hi":11250," ho":15720," ne":9802," na":144146," mu":41919," mo":24736," on":8262," oc":7515," of":9658," ob":9893," nu":6152," no":190063," le":13833," li":23905," la":21762," gê":9642," km":87834," me":40602," mi":24825," o ":110021," ma":77227," lu":6557," lo":47174," ag":9251," ab":11690," ac":15052," ad":42206," am":26285," an":52016," ao":28506," ap":22430," al":29345," av":5103," au":17149," ar":22234," at":33443," as":63845," ba":35433," bi":6595," be":6338," bo":7718," br":28760," ca":66098," e ":241281," er":11510," et":6896," es":129714," en":44317," em":145368," el":17511," fe":16956," fa":35767," ex":43118," fu":24478," fr":49866," fo":99597," fi":29419," ge":13813," ga":14076," cl":11956," co":383123," cr":20621," ce":57678," ch":14448," ci":62716," da":258975," cu":12445," do":238864," de":1021034," di":92269," ed":7807," du":15330," sa":10534," se":193144," si":34152," so":24812," qu":119214," mú":6854," ra":8844," re":136757," nã":9894," ri":7562," ro":14358," pu":7134," pr":135007," lí":6039," os":51421," ou":60851," op":5907," or":37748," pe":115763," pa":93579," pl":9433," po":171127," pi":7518," sã":13196," sé":13507," va":8521," ve":22883," vo":11121," vi":22819," us":9297," ut":7175," um":444668," un":7084," ta":23369," su":56894," tr":33259," to":19262," th":6383," ti":9760," te":60106," É ":12023," à ":21932," ár":53832," ál":7915," é ":284550,"GC ":5727,"Est":57555,"Esp":12609,"Eur":5804,"Ele":5431,"Em ":5626,"Ger":5645,"Fra":12238,"Foi":10640,"For":9044,"II ":6274,"Gra":10045,"Int":5948,"Amé":5088,"Bai":6138,"Bar":6969,"Ale":12779,"Alt":5502,"Ant":8344,"Cal":7739,"Cam":11986,"Cas":10156,"Car":12659,"Cat":5648,"Can":7596,"Bra":20750,"Den":4991,"Chi":5974,"Cen":6997,"Cha":10337,"Cor":7280,"Com":13375,"Col":5164,"Con":24999,"Dis":5047,"Nov":9350,"Nor":13657,"Os ":6443,"Per":7552,"Par":15095,"Pau":10573,"Pal":5074,"Pro":8377,"Pol":9021,"Pos":21873,"Por":13937,"Jan":9313,"Jos":5715,"Jog":5307,"Lan":5276,"NGC":5668,"Man":7960,"Mar":25108,"Mon":7943,"Min":6224,"Mun":8484,"São":16298,"Sul":8673,"UA ":15445,"Sai":5389,"San":15530,"Rio":11763,"Val":5795,"Vil":5627,"Ver":7558,"Uni":16626,"Ter":5697,"The":7418,"Tra":5593,"bit":68890,"bil":5849,"bo ":5900,"bli":15859,"bol":16054,"bor":6814,"be ":8829,"ban":15903,"bal":9064,"bai":9744,"bas":9817,"bar":9261,"ber":18628,"bel":6221,"ca ":99422,"car":23995,"cas":22552,"cat":6662,"can":45210,"cap":11240,"cad":24057,"cam":12174,"cal":52109,"ce ":17573,"bri":17515,"bro":31772,"bra":35217,"bre":13206,"bur":6170,"bum":7625,"am ":33009,"aix":9798,"al ":180975,"ain":14382,"aio":19228,"air":8502,"ais":60324,"aia":5333,"ago":14405,"anu":5896,"ano":74765,"ant":129336,"ans":13455,"ane":21736,"ang":13849,"anh":31372,"ani":21090,"ana":43001,"anc":58051,"and":76029,"amo":8042,"amp":20451,"ami":6934,"ame":89963,"amb":25048,"ama":26496,"ao ":25066,"alt":10265,"alo":6981,"alm":23414,"all":8441,"alg":7598,"alh":11152,"ali":86609,"ald":5965,"ale":20569,"ala":17691,"an ":23333,"aba":10377,"abe":9561,"abi":55871,"abo":5404,"abr":10253,"ae ":16585,"aca":7889,"ab ":40485,"ai ":5437,"aga":6957,"age":17901,"ado":217915,"adr":8357,"adm":35408,"adi":9559,"ade":147619,"adu":9382,"aco":11338,"aci":33260,"ach":9385,"ace":11134,"ada":120249,"act":10015,"até":8607,"ató":5142,"ba ":7163,"aqu":7206,"amí":18994,"arg":7520,"are":23525,"ard":21302,"arc":12276,"ara":72529,"aro":8382,"arn":4993,"arm":6518,"arl":7205,"anç":24866,"ari":33288,"arq":9519,"arr":16718,"art":76275,"asa":6049,"asi":39800,"asc":20114,"ase":9902,"ar ":59582,"apa":10937,"alá":8272,"ape":8629,"api":9537,"apo":6995,"apr":7853,"as ":272706,"ava":17923,"aut":16096,"arç":7034,"avi":9973,"ave":10994,"ata":23226,"ast":51032,"ass":26073,"atr":16524,"ato":21654,"ate":18241,"ati":65753,"atu":18602,"aul":12887,"aus":5992,"jet":6359,"jan":5629,"jog":9592,"ito":67486,"itu":23834,"ism":9783,"isp":8481,"iss":14904,"ist":137228,"ita":115808,"ite":14296,"iti":9264,"ivr":6273,"ivo":19750,"isã":7956,"iva":52169,"ivi":13782,"ive":20602,"ipo":7067,"is ":98587,"ion":54065,"ior":20141,"ios":21003,"ipa":29796,"ipe":6360,"ir ":14429,"irr":7079,"iro":63701,"iri":6848,"ise":5201,"isc":10067,"isa":6981,"iu ":6072,"ire":22818,"ira":48023,"ja ":10608,"ixa":7209,"itâ":5590,"iz ":5440,"iza":68411,"km ":6889,"ki ":6235,"km²":80918,"gên":12105,"jul":5237,"jun":11429,"ha ":54934,"ham":13772,"han":7220,"har":10472,"has":8814,"hab":92261,"he ":16106,"hei":5849,"hec":22728,"her":9140,"hin":8200,"his":10200,"ho ":40610,"go ":31208,"cçã":8036,"gna":9579,"giã":60677,"gos":20347,"gov":5137,"gru":8390,"gra":31116,"gre":13275,"cíp":21053,"gui":6993,"gua":14762,"gue":29368,"gun":46422,"guê":7234,"iai":5206,"iam":5743,"ial":29154,"ian":36430,"ias":36377,"iad":18351,"ibu":6231,"ibe":5599,"ia ":236755,"ien":14065,"ier":7590,"ies":5878,"ied":5977,"iaç":6604,"ife":6300,"ifi":12350,"icu":5216,"ico":76077,"ici":43382,"ich":7968,"ice":7761,"ie ":27537,"ica":143651,"ido":58535,"idi":8244,"ide":55475,"ida":173600,"il ":28740,"im ":15779,"ige":5893,"iga":14060,"igi":13843,"igu":5678,"icí":20778,"igo":8521,"ign":14724,"imo":12687,"imp":12501,"ime":38876,"imi":7471,"inc":33960,"ind":23589,"ina":69514,"ino":22963,"int":49765,"ins":15328,"inf":7949,"ine":18603,"inh":19122,"ing":32761,"ini":53005,"inu":8866,"ila":11890,"in ":16212,"ilo":7283,"ill":18435,"ilm":5403,"ilh":18906,"ili":24989,"ile":27199,"ima":19294,"io ":111339,"hom":5231,"hos":8051,"hor":13996,"hum":7243,"fes":5911,"fer":17361,"fei":6807,"fam":20697,"ext":9301,"ez ":8287,"exp":6312,"exi":7134,"exc":16402,"eze":10386,"eta":21527,"ete":19661,"eti":12522,"esp":45070,"est":117047,"ess":36172,"eto":18111,"etr":18837,"eve":21643,"eva":5679,"evi":13265,"eus":10550,"eró":29181,"erí":19798,"ey ":6913,"er ":64588,"epa":35649,"açõ":12626,"eon":5303,"es ":232451,"epr":6051,"enç":6954,"eri":53238,"erg":9605,"ere":26129,"erc":25527,"erd":8470,"era":58345,"et ":10811,"equ":14020,"aís":9202,"esm":7907,"esi":25700,"esc":27105,"esd":5456,"ese":28703,"eu ":29870,"esa":56958,"erv":11462,"err":29563,"ert":32707,"ers":31036,"ern":27425,"erm":19525,"ero":27972,"en ":19630,"ela":57757,"ele":31916,"eli":10608,"elh":19021,"ell":12189,"elo":35477,"eo ":6077,"emb":29421,"ema":25633,"eme":11201,"emo":8721,"emi":9916,"emp":20050,"ene":11086,"enh":10752,"ena":27141,"end":70599,"enc":33249,"eno":17065,"eni":7335,"env":8603,"ens":111144,"ent":263509,"açã":84894,"ecç":6791,"ego":8230,"egi":67782,"egr":7999,"egu":58530,"eia":6927,"eis":10113,"eir":97869,"eio":5873,"ein":12110,"eja":8929,"el ":27319,"eit":17574,"em ":185457,"gin":9613,"gio":6344,"gic":7282,"gia":12807,"gen":19808,"ger":11186,"gem":16786,"ge ":8655,"gad":9856,"gas":7737,"gar":10290,"gal":15478,"gan":11614,"ga ":19137,"fut":9060,"fun":15140,"fra":39155,"fre":11644,"for":36958,"foi":67238,"bém":15726,"fic":31195,"fil":13806,"fin":10276,"fis":5455,"da ":386105,"de ":1113463,"dad":172007,"dal":5052,"dae":8716,"das":50851,"dan":7654,"dam":5121,"cul":25961,"cto":6036,"cti":5064,"cta":7890,"cur":7265,"cla":7926,"clu":9168,"cli":8238,"co ":77432,"con":114590,"col":16761,"com":260529,"cor":23539,"cos":23065,"cre":8780,"cri":27018,"cro":6255,"cea":7114,"ch ":7362,"cer":21587,"ces":47284,"ceu":5073,"cen":80958,"caç":9087,"cel":14050,"cei":7700,"cha":18824,"cia":84536,"ck ":7938,"cie":24606,"cid":84373,"che":14945,"chi":8861,"cim":6171,"cis":5750,"cin":23953,"cio":45369,"cip":32198,"ebo":13090,"ead":6607,"ean":5576,"eal":11859,"eat":4993,"ea ":57325,"efe":11937,"ei ":8171,"ega":11879,"edi":19796,"ede":22533,"eda":8333,"edo":8760,"ecl":7415,"eci":35040,"ece":14294,"ecu":5255,"ect":14984,"eco":11351,"dur":9714,"duz":6967,"dor":36541,"don":5597,"dos":101787,"diç":6127,"dmi":36030,"dua":7331,"dri":7071,"dra":5496,"dre":6857,"dro":6768,"dic":13696,"did":7827,"dia":53753,"der":26564,"des":61760,"dez":6564,"dec":9473,"def":5074,"dei":8639,"del":8957,"den":70173,"dem":10237,"dep":38994,"do ":499886,"div":11366,"din":7506,"dio":15896,"dir":15370,"dis":34873,"dit":6840,"dif":6362,"rga":9198,"ri ":5474,"rgi":6642,"rge":9033,"não":10106,"rgo":7927,"ret":18367,"res":88172,"rev":9016,"rg ":6617,"rea":66834,"ref":8795,"rec":27943,"red":8922,"rei":27335,"reg":80521,"rem":9993,"ren":24469,"raç":16220,"rel":14080,"nár":5684,"rep":8548,"rda":6649,"rdo":6927,"rdi":10382,"rde":15782,"re ":57889,"rci":7180,"rce":9738,"rca":20138,"rd ":8761,"rar":6504,"ras":65057,"rat":52626,"rav":11402,"rbi":16469,"rba":5129,"rai":12729,"rag":7162,"ran":101236,"ram":28519,"ral":30318,"rab":8082,"raf":5059,"rad":42430,"rac":11489,"rs ":6575,"ros":32986,"rot":8757,"rom":13204,"ron":14637,"rop":15939,"rou":5772,"rov":31049,"rod":14281,"roc":11812,"rol":6958,"rof":9288,"nçã":8381,"rog":7781,"rno":11572,"rna":23454,"rne":8579,"rmo":8053,"ro ":148425,"rma":34000,"rme":10601,"rmi":14095,"nça":24469,"riz":8006,"rio":44890,"rit":46897,"ris":18600,"rig":17891,"ril":10251,"rin":40225,"rim":24339,"ria":73821,"rib":7744,"ric":60975,"rid":13442,"rie":17826,"rup":9891,"rus":5606,"rva":5441,"rvi":6787,"ry ":6483,"rsi":7243,"rso":14017,"rta":49041,"rto":14307,"rte":57600,"rti":26907,"rtu":25752,"rt ":7638,"rqu":17086,"rro":14834,"rri":9266,"rre":21970,"rra":23654,"sad":9694,"san":5546,"sas":9730,"sar":5392,"sa ":76681,"sid":59438,"sic":18066,"sia":15495,"sit":17633,"sis":13892,"sin":13902,"sio":8799,"sil":41142,"sim":8594,"sig":13420,"scr":14392,"scu":5033,"sde":5601,"se ":99012,"sca":8201,"sce":12058,"sci":8690,"sco":17645,"ser":25139,"ses":9941,"set":9939,"seu":20458,"seg":45509,"sed":9373,"sen":34160,"sem":9716,"sel":5712,"spo":14510,"spe":12903,"spi":7965,"spa":15990,"sol":6214,"son":13806,"sor":8549,"sos":34321,"soa":5579,"soc":9148,"sob":10181,"st ":5646,"squ":6464,"smo":15213,"so ":33266,"ssã":5427,"stá":10797,"stã":5171,"stó":7858,"sse":16316,"ssa":18526,"sso":30852,"ssi":23013,"ssu":30134,"ste":119936,"sta":117494,"spé":16790,"sto":24915,"sti":39339,"stu":7367,"str":101454,"sua":22858,"sub":11326,"sui":27030,"sul":9138,"sup":6250,"sur":5128,"tai":5662,"tal":56545,"tad":72109,"tat":5635,"tas":21431,"tar":22207,"tan":74838,"tam":58144,"te ":182044,"ta ":108579,"pa ":8506,"pe ":6181,"par":108733,"pas":6728,"pac":5622,"pal":27847,"pan":19668,"láx":5857,"pec":9282,"pen":12180,"per":70169,"paí":6811,"pet":6411,"pes":15522,"pel":52058,"pla":10972,"ple":7590,"plo":5321,"pic":9020,"pin":7594,"pio":22913,"pir":7271,"pit":9245,"por":117200,"pop":23863,"pos":31387,"pon":14947,"pol":21044,"pod":12218,"po ":20718,"lês":5033,"pub":5596,"lít":10135,"pri":47853,"pre":42479,"pro":71854,"put":8461,"pul":26066,"mão":6273,"mér":6337,"míl":18580,"qua":26640,"que":121041,"qui":34023,"ra ":178784,"mús":6480,"ngo":5638,"ngl":12535,"ngu":12950,"ni ":5382,"nge":10906,"nga":5764,"nho":23066,"nha":43874,"nhe":24382,"nei":17925,"naç":10350,"nen":5777,"ner":21401,"net":5963,"nes":11864,"ng ":14902,"nco":14783,"nci":80668,"ncl":5010,"nce":69648,"nch":7394,"nca":5561,"ne ":24199,"ndr":9286,"ndo":94739,"ndi":27820,"nde":77208,"nda":61455,"nal":50464,"nam":7322,"nan":6772,"nar":8444,"nac":9251,"nad":24850,"nag":7446,"nai":6712,"nd ":10811,"nat":14635,"nas":35071,"na ":240257,"ny ":5694,"nsã":7776,"nvo":7994,"nve":5937,"num":5422,"nut":6804,"nto":104039,"ntu":19077,"ntr":64145,"nti":32354,"nta":52915,"nte":218759,"nso":37106,"nst":25404,"nse":18037,"nsi":55540,"nsa":5243,"nt ":11783,"ns ":20234,"nom":29572,"not":5470,"nos":46255,"nor":21961,"nov":11058,"nne":6439,"no ":209889,"nid":27726,"nic":53412,"nia":29100,"niz":6361,"niv":8244,"nis":48078,"nio":9439,"nim":8021,"nin":5656,"ogr":11168,"ogi":9366,"ogo":16032,"oga":7825,"oi ":78403,"ois":10929,"oje":5415,"ol ":15035,"oce":8227,"oci":14692,"oco":7976,"oca":52787,"ode":20265,"odi":6628,"odo":29483,"of ":5154,"oda":5699,"oes":5612,"odu":11939,"ofi":9565,"oa ":8425,"obr":14956,"oví":22195,"ote":8672,"oto":8649,"ost":25890,"ota":9466,"osi":9200,"oss":38213,"oso":8522,"ovi":11009,"ovo":6748,"ova":12269,"ove":21270,"ous":5680,"our":9672,"out":17508,"opo":7367,"ope":8960,"opa":6108,"os ":396479,"opu":24150,"oló":8455,"olí":12107,"or ":155288,"orm":37181,"orn":14580,"oro":7726,"orr":17850,"ord":17760,"ore":31129,"org":12371,"ori":29601,"ou ":76190,"osa":9113,"ort":72366,"m² ":81009,"orb":15491,"ora":44778,"ola":17798,"on ":35303,"oli":16249,"ole":10370,"olo":20061,"olu":6559,"olv":8428,"om ":129315,"ona":61961,"ond":42834,"onc":16183,"one":12259,"onh":24723,"ong":8421,"oni":11825,"ono":11037,"ons":37243,"ont":48875,"oma":22403,"ome":35533,"omb":7493,"omi":12319,"omp":29869,"omo":47819,"omu":62085,"la ":68801,"le ":30025,"lac":17518,"lad":10810,"lag":5216,"lan":32402,"lam":5387,"lar":20429,"lat":13280,"las":19631,"ld ":5145,"lbu":8512,"lon":10273,"lor":10947,"loc":44637,"log":14014,"los":17433,"lme":28363,"lti":6810,"lto":6807,"ltu":6502,"lub":6858,"lta":10527,"lho":31407,"lhe":5407,"lha":24850,"lgu":5605,"lev":8296,"les":20832,"let":10999,"ler":5749,"lem":18432,"len":11746,"laç":21723,"lei":31609,"leg":6064,"lec":6739,"lo ":60493,"lla":8917,"lle":13121,"lli":8289,"ll ":7390,"lit":15034,"lis":21665,"lio":5219,"lin":27495,"lim":6007,"liz":58462,"liv":7687,"lic":27450,"lid":13334,"lia":47125,"lig":7684,"ma ":312319,"mai":41100,"mad":18812,"mar":24207,"mas":19571,"mal":6120,"man":38877,"mat":11364,"mba":7627,"mbi":6786,"mbr":26443,"mbo":5222,"me ":30188,"med":7143,"met":15815,"mes":16176,"mer":33993,"mem":6344,"mel":7946,"men":135706,"mei":21546,"maç":6372,"mbé":15876,"lva":5641,"lve":5109,"lvi":6861,"mpi":7951,"mpe":15384,"mpr":13673,"mpo":26049,"mpl":10714,"içã":16989,"mod":7097,"mon":12314,"mor":10927,"mos":10632,"mpa":8900,"mui":6353,"mul":5806,"mun":87543,"ião":65139,"min":67980,"mil":11247,"mis":7225,"mit":9135,"mic":17167,"mia":5205,"mo ":71790,"vín":22165,"zem":7471,"zaç":7006,"zad":51320,"zon":5106,"uíd":7919,"uçã":7240,"za ":11487,"uês":7863,"tón":8038,"tór":13405,"tão":9310,"tân":11804,"tár":6234,"té ":9213,"xim":6106,"xia":6380,"xa ":5938,"xce":15412,"tá ":7267,"séc":5490,"sér":7647,"são":39006,"río":17761,"rói":28842,"róp":5175,"via":10682,"vil":9868,"vim":5528,"vid":18086,"vis":17869,"rço":7357,"vo ":18229,"vol":13908,"vos":6610,"rão":7259,"vez":5961,"ver":46986,"ves":7125,"vei":6022,"ven":17920,"vem":8996,"vel":15436,"ve ":14933,"val":11487,"van":5589,"var":6549,"vas":5897,"vad":9906,"va ":59685,"uzi":7134,"utó":6849,"usi":7047,"use":5760,"usa":11181,"ust":14662,"uss":6347,"uti":10782,"ute":14945,"uta":12601,"utu":10263,"uto":22443,"utr":11202,"us ":31309,"ura":55930,"ure":6495,"urg":9235,"uri":7874,"uro":13305,"ur ":5714,"upe":7143,"upo":9984,"uma":271540,"ume":7805,"unt":10008,"uni":47036,"und":70729,"una":50258,"unh":9065,"um ":201899,"ult":13394,"ulo":24134,"uli":6278,"ulh":9149,"ula":44299,"uil":6385,"uin":8363,"uip":5474,"uis":7425,"uia":5524,"uit":14697,"ul ":16765,"ui ":29672,"uga":10215,"ugu":19601,"uda":6128,"ude":5928,"ubr":7378,"uca":5380,"ue ":100430,"uer":13174,"ues":23125,"udo":7316,"uen":9436,"uel":7358,"púb":6723,"ua ":33662,"uas":11459,"uar":8119,"ual":23558,"uan":11328,"ubl":7183,"ube":7379,"uai":5297,"uad":12055,"tur":41731,"tus":5101,"tui":5138,"tul":5524,"tub":7669,"tua":23176,"tud":7868,"tug":23030,"tre":31460,"tra":95106,"tri":60532,"tru":9663,"tro":48136,"péc":16451,"to ":188185,"tod":9530,"tou":5354,"tos":40605,"tom":5946,"ton":10767,"tor":47860,"til":14634,"tig":9433,"tir":6653,"tit":12090,"tis":7919,"tin":26516,"tim":13089,"tip":5927,"tio":9200,"tia":5519,"tic":53927,"tid":12974,"tiv":62127,"tem":39133,"ten":72655,"tei":7527,"taç":9386,"tel":22078,"teg":5010,"teb":12305,"tec":8043,"th ":5190,"tes":82988,"ter":103003,"the":7042,"ço ":13183,"ém ":22415,"édi":7117,"éci":19647,"écu":6135,"éti":5315,"éri":21433,"ênc":16544,"êne":10481,"ês ":26475,"ção":131304,"ão ":306157,"ça ":19591,"çad":10194,"áti":8887,"áve":5112,"áxi":6942,"álb":7607,"áli":5630,"ári":27812,"áre":51471,"âni":17684,"úsi":6572,"úbl":7260,"ões":32837,"ôni":11441,"óri":16535,"óno":6648,"óni":7908,"óid":28287,"íti":12961,"íst":6738,"ínc":23886,"íng":5418,"íli":21725,"íod":17716,"ípi":21652,"ís ":6687,"íci":9019,"íde":5475,"çõe":21567},"n_words":[49778514,58587553,42469388],"name":"pt"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ro b/nlp_resource_data/langdetect/profiles/ro

new file mode 100755 (executable)

index 0000000..a5c7b8d
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ro
@@ -0,0 +1 @@
+{"freq":{"D":14206,"E":15258,"F":12709,"G":12710,"A":32029,"B":21807,"C":31096,"L":13478,"M":26711,"N":11195,"O":8313,"H":8411,"I":15485,"J":5893,"K":4820,"U":8262,"T":15341,"W":4878,"V":8350,"Q":589,"P":22438,"S":30214,"R":23700,"Y":1325,"X":1901,"Z":1899,"f":89747,"g":84289,"d":270437,"e":933940,"b":76172,"c":339926,"a":811577,"n":568157,"o":427370,"l":434670,"m":224465,"j":16485,"k":15291,"h":47000,"i":839847,"w":7236,"v":72644,"u":459551,"t":525861,"s":321476,"r":567673,"q":978,"p":199529,"z":55715,"y":12885,"x":14642,"²":280,"Î":4242,"É":135,"ß":93,"î":69716,"í":464,"é":1581,"è":290,"ç":171,"ä":380,"ã":94,"â":33080,"á":1243,"à":152,"ü":921,"ú":132,"ö":583,"ó":581,"ñ":107,"Ă":92,"ă":161614,"ā":266,"ć":89,"ı":136,"ī":130,"ş":69795,"ł":112,"ō":143,"š":140,"ţ":70418,"ū":99,"ǎ":351,"Ș":1233,"Ț":844,"ə":124,"ˈ":89,"́":280,"μ":153,"ν":326,"ο":443,"ι":274,"κ":132,"λ":209,"δ":99,"ε":222,"η":124,"α":406,"γ":123,"ά":91,"ί":118,"ό":118,"σ":174,"ς":360,"ρ":245,"π":122,"υ":100,"τ":203," l":54431,"ь":237," m":47832," n":26804," o":49358,"я":249," h":3267," i":28305," j":7583," k":2023,"ы":136," d":156105," e":67780,"х":112," f":51072,"ц":117," g":13468,"ч":249,"р":882," a":134125,"с":828," b":11159,"т":574," c":116614,"у":311," y":145," x":142," z":3353," u":43415," t":26873," w":1170," v":14303,"і":84," q":90," p":90611," s":84706," r":35629,"К":133,"Н":83,"М":137,"П":110,"Б":103,"А":114,"В":101," J":5857," K":4744," H":8304," I":15348," N":11099," O":8211," L":13327," M":26422," B":21644," C":30661,"Р":101," A":31745,"С":176," F":12566," G":12513," D":13925," E":15142,"л":649," Z":1885,"к":766," Y":1319,"й":327," X":1885,"и":1244,"п":176,"о":1327,"н":924,"м":284," S":29902,"г":187," R":23579,"в":707," Q":580,"б":205," P":22131,"а":1480," W":4821,"з":146," V":8268," U":8214,"е":1081," T":15157,"д":332," î":68405," Ă":90," É":134," Î":4219," ţ":2046," ş":40504,"ה":105,"ו":138,"א":92,"ל":89,"י":166," Ș":1227," Ț":843,"ר":104,"و":163,"ي":302,"ل":318,"م":220,"ن":216,"د":146,"ح":87,"ب":195,"ا":469,"ع":89,"س":117,"ر":219," А":114," Б":103," В":101," К":131," М":137,"A ":4154," П":110,"F ":568,"Da":2015,"Cu":1954,"Cy":113,"Cl":1608,"Co":8574,"Cr":1776,"Ce":2628,"Ch":2680,"Ci":1438,"G ":509,"Ec":471,"Ed":733,"Ea":729,"Du":1500,"Do":1908,"Dr":850,"De":3817,"Di":2658,"Fe":1913,"H ":435,"Fa":1302,"Eu":3402,"Ev":451,"Ex":957,"Er":513,"Et":184,"Es":2728,"En":827,"Em":497,"Ep":280,"Ei":240,"El":1974,"Ef":84,"Eg":290,"Ge":3177,"Cá":254,"Câ":416,"Ga":1765,"I ":3107,"Fu":513,"Fr":2787,"Fo":1889,"Fl":1292,"Fi":1643,"Bâ":129,"B ":735," Р":100," С":176,"C ":1390,"Av":423,"Au":2210,"Aw":103,"Ar":4794,"At":636,"As":1391,"D ":881,"Ba":4270,"Az":208,"Ae":279,"Af":611,"Ag":465,"Ah":119,"Ab":553,"Ac":3223,"Ad":1097,"Am":1924,"An":3647,"Ap":1490,"Ai":410,"Ak":87,"Al":4305,"Bu":3694,"Br":3092,"Ca":7562,"E ":767,"Bi":3068,"Be":2955,"Bo":2204,"Bl":574,"Ku":328,"Ky":124,"Kn":99,"Kl":149,"Kr":348,"Ko":662,"Le":1965,"Gă":138,"Li":3218,"N ":854,"La":3044,"Lu":1753,"Ly":104,"Ll":318,"Lo":1983,"Me":2961,"Mi":4169,"O ":1394,"Ma":9710,"Mc":201,"My":160,"Mu":2543,"Mo":4971,"Ni":1761,"Ne":2385,"Na":2454,"P ":736,"Q ":153,"Nu":750,"No":2851,"Ol":1045,"Om":198,"On":376,"Oh":91,"Oc":1033,"Od":206,"Of":171,"Oa":113,"Ob":305,"Gi":853,"Bă":725,"Gh":757,"Gl":444,"Gr":2243,"Go":1170,"Gu":1179,"Gy":91,"J ":354,"Ha":2364,"Dâ":128,"He":1745,"Că":581,"Hi":659,"Ho":1571,"Hr":249,"Hu":882,"Hy":112,"K ":358,"Ib":100,"Ia":1254,"Id":117,"Ie":183,"Ig":92,"Io":1307,"Im":1022,"In":3678,"Il":431,"Iu":1265,"Iv":162,"Is":1017,"It":790,"Ir":549,"Ja":1638,"L ":645,"Iz":203,"Ji":337,"Je":630,"Jo":1867,"Ju":935,"Ka":1128,"M ":833,"Kh":125,"Fă":159,"Ki":625,"Ke":541,"Ut":114,"Ur":420,"Up":83,"Um":91,"Un":5673,"Ul":182,"Pă":635,"Uc":368,"W ":229,"Ty":98,"Tu":1266,"Tr":2949,"To":1782,"Th":2212,"Ti":1500,"Te":2836,"Pâ":232,"Ta":1163,"V ":1269,"Sw":112,"Sz":144,"Sy":285,"St":5254,"Su":2579,"Wr":229,"Wo":691,"Wi":1273,"Ră":813,"Wh":234,"Wa":1039,"Sâ":220,"We":859,"Y ":147,"Vo":713,"Vr":133,"Vu":111,"Râ":933,"Vi":1974,"Vl":297,"X ":1079,"Va":2283,"Ve":1567,"Lă":421,"Pu":736,"Pr":4511,"S ":1482,"Pe":2677,"Pa":6072,"Pl":931,"Po":3427,"Pi":1539,"Ph":445,"Os":331,"Ot":294,"Ou":154," ا":202,"Ov":120,"Op":423,"Or":1772,"R ":901,"Kö":111,"Sf":926,"Se":3730,"Sc":1718,"Si":2479,"Sh":841,"Nă":243,"Sm":167,"Sl":471,"Sk":113,"Sp":3075,"So":2382,"Ru":1542,"U ":435,"Sa":3082,"Re":6657,"Ri":1089,"Mă":769,"Rh":203,"Ro":9754,"Qu":329,"T ":645,"Mü":187,"Ra":1639,"Wü":172,"b ":5072,"a ":212743,"Ye":128,"Tă":194,"Ya":168,"Yo":679,"Yu":120,"Z ":124,"Tâ":356,"Să":464,"Vă":154,"Za":383,"Ze":392,"Zi":317,"Vâ":376,"Zo":278,"i ":179645,"gd":205,"ge":13414,"câ":4391,"ga":11234,"gb":146,"fl":4423,"ff":711,"bâ":136,"fi":21259,"fr":4539,"fu":3903,"ft":996,"fo":28249,"j ":2040,"gy":251,"dâ":214,"he":10237,"ha":6256,"gn":1318,"gm":382,"gl":4043,"gi":18379,"gh":4326,"bă":2234,"gg":253,"gv":296,"gu":7182,"gt":188,"gs":381,"gr":9341,"cî":121,"go":3415,"dt":248,"du":15963,"dv":200,"dw":264,"dy":391,"dz":86,"g ":5818,"ea":59379,"eb":4646,"ec":30872,"ed":13772,"de":117725,"dd":311,"dg":263,"di":66800,"dh":111,"dj":215,"dm":1691,"eM":86,"dl":291,"do":13587,"dn":210,"ds":959,"dr":6694,"ew":1340,"ex":7454,"eu":5496,"ev":8185,"ey":1419,"ez":15187,"fa":8662,"h ":3714,"fe":10587,"eh":1815,"eg":14417,"ef":4744,"ee":3471,"el":50297,"ek":775,"ej":687,"ei":30112,"ep":15349,"eo":6294,"en":63988,"em":24435,"et":20416,"es":79718,"er":87843,"ca":58257,"e ":363120,"by":328,"bs":1372,"br":14880,"bu":7988,"bt":195,"bn":117,"bo":5650,"bl":6057,"bm":100,"bi":12351,"bb":308,"bc":108,"bd":241,"be":6318,"db":261,"da":18159,"f ":3092,"cz":119,"cy":195,"cv":646,"cu":45953,"ct":20001,"cs":291,"cq":100,"cr":11320,"co":42083,"cm":243,"cn":187,"ck":2747,"cl":5298,"ci":32813,"ch":13925,"ce":51893,"cc":2745,"c ":18619,"aP":84,"aC":95,"az":9879,"ay":1659,"ba":11641,"d ":22818,"at":93723,"as":21874,"ar":105841,"aq":90,"ax":1099,"aw":499,"av":8565,"au":21651,"ak":1416,"al":87116,"ai":18198,"aj":3933,"ao":374,"ap":15113,"am":19806,"an":87690,"ac":26532,"ad":16915,"aa":641,"ab":6836,"ag":7060,"ah":1692,"ae":3048,"af":7326,"nu":32036,"nt":67674,"ns":20163,"nr":1134,"np":113,"no":17936,"nn":2298,"q ":85,"nz":1824,"ny":836,"nw":156,"nv":2089,"oe":1765,"of":5818,"oc":21548,"od":10761,"oa":21531,"ob":5951,"om":43493,"on":56179,"ok":801,"ol":30865,"oi":8296,"oj":309,"og":8118,"oh":1090,"m²":273,"ot":11364,"os":27341,"ov":12192,"ou":7338,"op":17725,"oo":1964,"or":85929,"r ":49086,"ox":1908,"ow":1237,"oz":3227,"oy":555,"pe":42586,"lâ":532,"pa":25762,"pc":98,"pl":7066,"lé":112,"po":22942,"ph":1089,"pi":11751,"lo":40730,"ln":822,"lm":3343,"ll":6217,"ls":1221,"lp":822,"lv":2229,"lu":48097,"lt":11451,"lz":261,"ly":862,"o ":37058,"mc":90,"ma":44395,"mb":17891,"eş":9887,"hă":248,"me":40474,"mf":257,"eţ":7364,"ml":160,"iè":112,"mi":28493,"mn":4278,"mm":955,"mp":17238,"mo":11698,"mt":225,"ms":601,"mu":21624,"my":299,"p ":5018,"na":37617,"nb":568,"nc":20662,"nd":32264,"ne":45632,"nf":4807,"ng":14388,"nh":479,"ni":65371,"nj":736,"nk":1221,"nl":1126,"nm":330,"ju":7523,"jo":2219,"jl":509,"bţ":531,"ki":1735,"kh":269,"fă":1862,"cş":118,"gâ":388,"kf":113,"ke":1642,"ka":1464,"m ":15258,"ky":316,"ks":661,"kt":260,"ku":385,"ko":995,"kr":318,"kk":161,"cţ":4949,"kl":292,"km":1457,"li":54376,"lh":237,"gă":2958,"lk":383,"lj":135,"le":70613,"ld":4148,"lg":2038,"lf":1236,"hâ":94,"la":55152,"lc":2339,"lb":4110,"n ":164001,"hr":1429,"hs":252,"hw":257,"ht":1909,"hu":2313,"că":22340,"hi":13680,"hn":1591,"ho":3277,"hl":404,"hm":291,"id":10613,"ic":65212,"ib":4937,"ia":63950,"dă":2289,"ih":1392,"ig":9132,"aş":9011,"if":6185,"ie":62856,"hy":354,"k ":4658,"iq":165,"eî":135,"ir":13340,"is":36911,"it":62439,"iu":24448,"iv":15138,"iw":97,"ix":744,"ii":33158,"ij":1160,"aţ":19894,"ik":1190,"il":42983,"im":26881,"in":122257,"io":22518,"ip":8638,"je":1168,"fâ":980,"ji":658,"iz":12018,"iy":146,"l ":109782,"ja":1735,"pţ":503,"să":10006,"xi":3113,"xo":371,"té":92,"tî":216,"xp":1513,"xt":2105,"xu":441,"ww":338,"z ":3980,"xc":491,"xa":1180,"tâ":2269,"xe":1609,"oş":1503,"ră":15639,"wi":755,"oţ":1099,"sé":93,"wn":249,"wo":518,"sî":86,"wr":185,"ws":439,"vy":161,"y ":7000,"wa":1979,"sâ":398,"we":941,"vl":85,"ré":180,"nţ":14138,"vi":18551,"râ":2794,"nş":236,"vu":1822,"vr":1272,"rî":85,"vs":260,"rí":114,"vn":113,"vo":6165,"uz":4516,"uy":132,"ux":811,"uw":93,"uv":2854,"uu":147,"ve":20356,"va":13083,"x ":3188,"ui":36731,"uj":940,"mţ":245,"uk":412,"ul":105505,"ue":3558,"uf":779,"ug":3958,"pă":8327,"uh":313,"ur":41535,"us":16414,"ut":20794,"um":22214,"un":77098,"uo":207,"up":12202,"ty":1026,"tz":478,"tu":41090,"tt":2205,"tw":494,"tv":129,"ub":9499,"ua":12620,"ud":10993,"uc":12122,"w ":1362,"to":36489,"tn":678,"tm":622,"tl":1983,"ts":1249,"tr":54687,"tp":270,"pâ":2492,"tf":804,"te":135412,"td":104,"lţ":1071,"tk":89,"ti":57663,"th":5025,"v ":5358,"tb":1584,"tc":809,"ta":57242,"su":20142,"sv":111,"ss":3065,"st":121541,"sy":354,"sz":231,"sw":164,"sl":2146,"sk":1369,"sn":714,"sm":3046,"sp":10131,"so":10359,"sr":365,"sd":200,"sc":21675,"sf":2108,"se":33580,"sh":1776,"nă":16784,"sg":163,"si":25637,"rz":902,"u ":55858,"sa":21555,"sb":549,"rr":1909,"rs":9764,"rt":23612,"ru":36664,"rv":2469,"rw":210,"ry":1487,"rp":2476,"ro":43127,"rn":8536,"rm":17803,"né":122,"rl":4592,"nç":103,"rk":1642,"rj":246,"ri":114051,"mă":7183,"rh":1830,"rg":8792,"rf":1291,"nâ":643,"re":125851,"rd":9049,"rc":6829,"rb":4129,"ra":67426,"t ":87746,"qu":761,"mé":97,"iţ":6799,"mâ":11991,"lă":11508,"iş":3714,"s ":29143,"px":1165,"py":91,"pt":7489,"pu":17959,"pp":664,"pr":42710,"ps":1182,"ză":8661,"zâ":331,"uţ":3113,"xă":383,"uş":1691,"vă":2660,"zz":266,"sţ":297,"zf":124,"vâ":2188,"rş":635,"zg":128,"uă":2010,"zi":14356,"zb":1622,"zd":236,"ze":7824,"vá":88,"za":10358,"zv":1283,"zy":102,"zu":1714,"zt":122,"zo":3689,"zn":131,"rţ":3519,"zm":105,"zl":93,"tă":34729,"ye":514,"yc":205,"yd":240,"ya":763,"yb":116,"yw":120,"yu":112,"yt":228,"ys":611,"yr":295,"yp":159,"yo":376,"yn":435,"ym":285,"yl":584,"yi":202,"² ":275,"Î ":96,"În":3944,"Îm":147,"án":283,"ác":265,"ár":117,"ás":83,"âl":535,"âm":971,"ân":23884,"âi":324,"âu":2278,"ât":1719,"âr":2512,"à ":112,"á ":120,"アアア":92,"ón":162,"ó ":105,"în":66365,"ín":93,"îi":280,"îl":220,"îm":1778,"ía":115,"î ":282,"âş":625,"él":95,"én":106,"és":86,"ér":215,"ée":141,"èr":89,"é ":392,"ăc":1712,"ăd":1353,"ăi":1593,"ăj":113,"ăg":390,"ăm":1728,"ăn":1902,"ăl":2480,"ăr":15029,"ăp":813,"ăv":254,"ău":2670,"ăt":8950,"ăs":5402,"ăz":2233,"ăb":253,"Ă ":96,"ă ":108634,"ün":238,"ür":317,"ör":105,"ön":99,"öl":118,"îş":473,"ăţ":4694,"ăş":1107,"şo":1467,"şt":10708,"şu":3626,"şi":42994,"şn":265,"şm":86,"şa":1934,"şc":1152,"şe":2787,"ş ":4403,"şă":140,"ţe":5578,"ţi":45859,"ţu":4697,"ţa":7179,"ţ ":1101,"ţă":5758,"ア":157,"ǎ ":253,"Ți":110,"Ța":292,"Ș ":94,"Și":132,"Șo":113,"Șt":390,"Ț ":94,"Șc":133,"Șa":135,"Șe":144,"Ță":266,"之":98,"三":194,"丁":102,"ος":175,"ος ":175,"ς ":359,"ν ":94,"α ":133,"アア":124,"ск":290,"та":92,"ст":158," Ga":1748," Câ":415," Ge":3154," Cá":254," I ":808," Fo":1868," Fu":504," Fr":2770," Fi":1622," Bâ":129," Fl":1278," Ha":2353," He":1731," Dâ":128," Gy":84," J ":235," Go":1158," Gr":2202," Gu":1163," Bă":725," Gh":755," Gi":841," Gl":431," Ig":92," Ie":183," Id":117," Ib":97," Ia":1248," K ":217," Hy":109," Hu":880," Hr":248," Ho":1557," Că":576," Hi":652," Ji":335," Je":625," L ":250," Ja":1632," Iz":203," Iu":1263," Iv":159," Ir":548," Is":1004," It":785," Im":1017," In":3630," Io":1287," Il":430," M ":262," Ka":1125," Ke":531," Ki":614," Kh":124," Fă":159," Jo":1852," Ju":930," N ":206," La":3007," Le":1942," Gă":138," Li":3175," Kl":144," Kn":98," Ko":661," Kr":346," Ku":321," Ky":124," Mc":200," Ma":9571," O ":853," Mi":4107," Me":2921," Lo":1967," Ll":318," Ly":104," Lu":1743," Ne":2351," P ":197,"а ":320," Na":2442," Ni":1749," Mo":4944," My":159," Mu":2527," A ":2659," B ":442," C ":439," Ap":1483," Am":1916," An":3623," Ak":86," Al":4275," Ai":406," Ag":455," Ah":117," Ae":277," Af":571," Ac":3199," Ad":1079," Ab":530," Ba":4239," D ":276," Az":205," Aw":102," Av":410," Au":2198," At":631," As":1386," Ar":4774," Be":2935," Bi":3051," Bl":566," Bo":2181," Br":3074," Bu":3675," E ":267," Ca":7457," Ce":2586," Ci":1423," Ch":2660," Cl":1578," Cr":1755," Co":8451," Cu":1909," Cy":110," F ":199," Da":1990," Di":2603," De":3729," Dr":841," Do":1853," Du":1487," Ea":726," Ec":468," Ed":725," G ":189," El":1961," Ei":240," Eg":286," Ef":83," Et":180," Es":2724," Er":511," Ep":280," En":810," Em":492," Ex":936," Eu":3393," Ev":441," Fe":1900," Fa":1274," H ":241," Să":460," Tâ":354,"к ":95," Wr":229," Wo":669," Wi":1265," Ră":813," Wh":231," We":849," Sâ":220," Wa":1026," Y ":114,"й ":231," Zo":278," Ze":389," Vâ":375," Zi":312," Za":381," Yu":120," Yo":676," Ya":164," Tă":194," Ye":128," Wü":172,"о ":126,"н ":131," Vă":153," a ":36875," R ":300,"в ":116," Kö":111," Ou":147," Ov":103," Os":331," Ot":292," Or":1764," Op":422," Po":3392," Pl":908," Pi":1531," Ph":433," Pe":2652," Pa":6041," Q ":118," Nu":741," No":2833," Ol":1044," On":368," Om":193," Oh":91," Od":204," Oc":1030," Of":168," Ob":298," Oa":113," Mü":186," Ra":1619," T ":190," Qu":324," Ro":9725," Re":6614," Ri":1081," Mă":767," Rh":203," S ":442," Pr":4358," Pu":723," Lă":359," Sz":142," Sy":283," Sw":111," Su":2564," St":5193," Ta":1158," V ":299," Th":2181," Ti":1481," Pâ":232," Te":2810," Tr":2917," To":1756," Ru":1539," Sa":3068," U ":212,"е ":128," Sh":834," Nă":243," Si":2443," Sc":1686," Se":3700," Sf":922," So":2361," Sp":3054," Sk":111," Sl":463," Sm":160," Va":2273," X ":295,"и ":98," Ve":1551," Vi":1939," Râ":933," Vl":295," Vo":709," Vu":110," Vr":132," Tu":1248," Ty":98," W ":133," Uc":367," Pă":634," Ul":182," Um":91," Un":5644," Ur":414," Ut":114," ja":470," l ":435," iz":399," io":137," ip":103," im":2637," in":15177," il":210," iu":2496," is":1936," it":606," ir":289," ka":124," m ":620," fă":1009," ki":195," gâ":148," jo":1049," ju":5955," ha":692," he":341," gi":362," gh":227," bă":568," gl":415," gr":4643," cî":96," go":322," gu":1027," ia":2677," id":624," ie":296," dă":97," aş":533," că":5014," hi":694," ho":634," hr":508," ht":170," ni":1017," ne":2985," na":3334," p ":138," mu":6108," mo":5925," ol":415," om":2225," on":472," oc":2425," od":198," of":2241," oa":721," ob":2230," nr":213," nu":8582," no":5141," le":5248," gă":633," li":8299," n ":4015," la":23505," km":1385," me":7797," eş":107," mi":5999," o ":24882,"я ":186," ma":19097," lu":7950," lo":8243," ae":559," af":3722," ag":621," ab":871," ac":11612," ad":3606," am":3470," an":10041," ap":7602," ai":935," aj":609," al":27010," av":3789," au":7831," ax":133," ar":7993," at":2213," as":3512," d ":818," ba":2886," az":461," bi":2225," be":668," bo":1156," bl":302," bu":1418," br":1785," ca":30202," e ":424," c ":92," er":1837," et":1185," es":44667," en":2470," em":716," ep":805," ei":565," el":3115," ef":535," eg":319," fe":3014," fa":5201," eu":1087," ev":1448," ex":4580," fu":2227," fr":3215," fo":24062," fl":777," fi":11367," ge":4653," câ":3570," ga":964," i ":494," cl":2039," cm":100," co":29681," cr":3733," cc":163," ce":15081," ch":1829," ci":2979," f ":104," da":4946," cu":21872," do":6118," dr":1888," de":93131," di":45684,"ч ":147," ec":2186," ed":1091," ea":295," eb":276," du":3328,"ль":104," vă":281," zo":1012,"ла":120,"ли":124,"ко":168," ze":739," zb":153," zi":1040," zf":120," vâ":719,"ка":188,"ки":145," tă":177," să":3897,"ин":138,"ик":85," ww":167,"ий":142," tâ":352,"ич":160,"ри":89,"ро":151,"ра":156,"ре":83,"ос":104,"ор":119,"ол":94,"ов":307," uş":254,"но":129,"ни":117,"на":162," ru":1689," sa":12280," sf":678," se":17401," sc":5833," si":9687," sh":138," nă":1457," sl":456," sp":5202," so":3426,"ви":183,"во":101," ra":2561," re":17790," ri":1713," mă":1822," ro":8367," pu":4423," pr":28214," ps":399," s ":2092," px":1165," lă":254," mâ":207," os":197," ot":110," op":1754," or":11013,"ан":170," ox":160,"ал":98," pe":25650," lâ":421," pa":9591,"ар":142," pl":2462," po":12520,"ая":98," pi":2615," wa":449," sâ":329," we":213," wr":124," wi":111," ră":1828," x ":110," va":2539," ve":4691," uz":162," vo":2209," vr":384," vu":171," râ":1545," vi":3164," uc":310,"ес":86,"ер":137,"ен":133," tu":1391," us":154," ut":956," ur":2048," um":561," un":37329," ul":1504," pă":1596," ta":986," st":9754," su":13217,"ев":109," tr":8292," to":2637," th":2187," ti":4195," te":6492," pâ":1627,"Țăr":258," Î ":96," Îm":146," În":3919," în":65579," îl":200," îm":1722," îi":236," î ":223," îş":424," Ă ":90," ţa":471," şt":950," şo":136," şi":37976," şc":219," şe":338," şa":764," ţi":391," ţe":161," ţă":1008," Ță":265," Ța":292," Ți":110," Șe":142," Șc":133," Șa":134," Ț ":92," Șt":387," Șo":113," Și":132," Ș ":91,"ال":185,"ي ":110,"ن ":132,"AS ":90,"BC ":103,"Feb":468,"Fed":536,"Fel":141,"Fer":333,"Fes":98,"Bâr":109,"Fil":558,"Fin":381,"Fir":91,"Fie":114,"Ext":451,"Fam":192,"Fan":103,"Fal":98,"Far":165,"Fac":313,"Fab":84,"Era":97,"Eri":131,"Est":2527,"Eur":3119,"Eva":149,"Eve":110,"Eug":130,"Exp":186,"Exi":117,"Exc":103,"Evu":86,"El ":710,"Ele":546,"Enc":124,"Eng":174,"Ene":132,"Emi":260,"Elv":237,"Eli":210,"Epi":189,"Ent":184,"Câm":221,"Cân":103,"Các":221,"Ger":1707,"Geo":684,"Gen":482,"Gla":99,"Ghe":506,"Băl":198,"Ghi":176,"Băt":252,"Gil":105,"Gir":238,"Giu":159,"Gaz":102,"Gal":559,"Gam":141,"Gav":159,"Gar":269,"Gab":147,"Fun":252,"Fru":86,"Fro":141,"Flo":463,"Fla":640,"Fra":1957,"Fri":249,"Fre":285,"Fon":202,"Fot":362,"For":809,"Fox":86,"II ":1544,"Dâm":116,"Căl":223,"His":116,"Hil":108,"Hel":236,"Hei":175,"Hea":139,"Hen":243,"Hes":101,"Her":541,"Hal":221,"Hai":98,"Han":325,"Ham":243,"Har":682,"Hau":104,"Gur":143,"Guv":353,"Gua":120,"Gui":207,"Gre":662,"Gri":261,"Gra":547,"Gru":280,"Gro":360,"ţă ":4334,"Glo":227,"Goo":112,"Gol":285,"Got":84,"ţăt":101,"Gor":250,"ţăr":1089,"ţăm":207,"Inv":138,"Ioa":445,"Inf":292,"Ini":100,"Int":1173,"Ins":803,"Ion":531,"Ios":98,"Ior":163,"Ili":165,"Ill":92,"Inc":150,"Ind":606,"Imp":850,"In ":158,"Iaş":404,"Ier":125,"Ian":535,"Ial":83,"Hun":383,"Hum":90,"IX ":412,"Hug":91,"IV ":391,"IT ":137,"Hor":215,"Hou":138,"Hot":160,"Hom":127,"Hon":117,"Hol":365,"Hr ":133,"Arg":385,"Arh":350,"Are":1696,"Arc":194,"Ard":100,"Ara":584,"Arm":402,"Ari":241,"Apo":195,"Apr":477,"Ate":96,"Atl":247,"Ast":296,"Ass":154,"Asi":398,"Aso":236,"Art":517,"Au ":88,"Avi":111,"Ave":127,"Aut":413,"Aus":786,"Aur":237,"Apă":85,"Aug":551,"Bai":170,"Bal":441,"Ban":574,"Bab":153,"Bac":358,"Bad":439,"Baz":111,"Bay":92,"Bar":873,"Bat":155,"Bas":347,"Bav":161,"CD ":118,"Abr":91,"Aca":558,"Act":175,"Ada":136,"Ace":1958,"Acc":188,"Adu":103,"Adm":250,"Ado":166,"Adr":161,"Ade":100,"Afa":103,"Aer":222,"Age":153,"Afr":381,"Agr":117,"Air":208,"Al ":217,"Ala":198,"Alb":1104,"Alg":107,"Ali":309,"Alc":89,"Ale":1016,"Alf":175,"Alt":180,"Alm":145,"All":248,"Alp":203,"Ame":1318,"Amb":85,"Ama":153,"Ang":951,"Ani":130,"Ana":326,"And":938,"Ant":732,"Ann":166,"Apa":378,"But":87,"Bus":115,"Buz":155,"Bul":406,"Bun":176,"Bur":449,"Buc":1806,"Bud":195,"Bru":284,"Bră":148,"Ca ":119,"Cab":93,"Cal":738,"Cam":777,"Cas":1102,"Car":1723,"Cau":101,"Cat":1040,"Can":883,"Cap":564,"Bea":150,"Bet":104,"Ber":735,"Ben":285,"Bel":1128,"Bib":207,"Bil":178,"Bih":202,"Bis":1833,"Bir":155,"Bio":98,"Blo":128,"CN ":94,"CO ":85,"Bla":274,"Bre":357,"Bra":1081,"Bro":311,"Bri":819,"Bog":147,"Boe":111,"Bol":190,"Bon":168,"Boo":93,"Bor":292,"Bos":170,"Bot":287,"Bou":149,"Cuv":164,"Cur":363,"Cup":386,"Cul":405,"De ":453,"Dez":104,"Der":87,"Det":101,"Des":342,"Dev":138,"Deu":92,"Del":231,"Dem":343,"Den":254,"Dep":258,"Dea":199,"Dec":652,"Dam":105,"Dan":549,"Dar":174,"Dat":151,"Dav":338,"Dac":187,"Dal":97,"Chr":305,"Che":298,"Chi":1153,"Cip":91,"Cin":138,"Cio":191,"Cit":251,"Ciu":252,"Civ":93,"DN ":145,"Cle":88,"Cla":363,"Cea":181,"Ceh":224,"Cel":451,"Cen":839,"Cet":187,"Cer":452,"Cha":723,"Cri":558,"Cra":341,"Cre":321,"Cu ":179,"Cru":182,"Cro":263,"Cli":120,"Clo":93,"şă ":93,"Clu":841,"Coc":83,"Coa":119,"Cod":224,"Cop":211,"Cos":228,"Cor":1033,"Com":2355,"Col":727,"Coo":87,"Con":2677,"Cou":282,"Cot":116,"Cov":111,"Ea ":436,"FA ":238,"Egi":189,"FI ":85,"Edu":88,"Edi":420,"Eco":139,"Ech":197,"ţur":116,"Eas":194,"ţui":121,"ţul":4343,"FC ":245,"ţar":732,"ţat":1470,"Deş":155,"ţe ":1287,"ţa ":4852,"Dia":147,"Dic":236,"Dis":609,"Dir":129,"Dio":102,"Din":623,"Dim":132,"Die":129,"Div":260,"ţit":365,"ţiu":2393,"ţir":210,"Duc":160,"ţin":4093,"ţio":5262,"Dup":205,"ţil":2341,"ţim":464,"Dun":357,"Dum":429,"ţii":7297,"ţif":426,"EX ":88,"Dur":97,"ая ":95,"ţia":6859,"ţie":10551,"ţei":1511,"ţen":180,"ţel":1678,"ţes":190,"Dre":250,"Dra":255,"ţi ":5299,"Doi":184,"Dob":158,"Dou":102,"Dol":170,"Don":271,"Dom":358,"Dor":224,"ţea":461,"Nea":360,"Neg":223,"Nev":114,"Neu":159,"Net":137,"Nep":85,"Nas":96,"Nat":327,"Nav":140,"Nig":112,"Nic":784,"Nis":298,"Nin":99,"Nik":102,"Naţ":910,"New":711,"Nap":289,"Nam":92,"Num":477,"OS ":93,"Nou":391,"Nov":172,"Nor":1220,"Not":118,"Noi":520,"Nob":138,"Înc":117,"Oct":512,"Înt":212,"Ode":88,"PC ":107,"Oce":295,"Împ":136,"Obe":86,"În ":3310,"Oto":155,"Olt":287,"Oli":317,"Ola":252,"Ono":115,"One":96,"Ope":278,"Ora":462,"Ort":215,"Osc":136,"Ord":181,"Ori":268,"Org":211,"Peş":117,"Plo":152,"Ple":87,"Pla":572,"Pin":170,"Pit":107,"Pir":96,"Pie":423,"Pic":155,"Pia":306,"Pho":97,"Phi":218,"Ped":83,"Per":522,"Pet":738,"Pen":513,"Pel":85,"Pe ":267,"Pat":391,"Pas":174,"Par":3240,"Pav":117,"Pau":327,"Pac":294,"Pan":274,"Pap":271,"Pal":567,"Pub":157,"Pur":83,"Put":143,"Pro":1436,"Pri":1498,"Pre":1075,"Pru":161,"Pra":285,"Pod":203,"Poa":126,"Pol":793,"Pom":101,"Pon":142,"Poi":169,"Pot":185,"Pos":133,"Pop":623,"Por":507," ال":165,"Lăc":323,"SA ":143,"Rac":113,"Rad":382,"Rai":238,"Ram":106,"Mün":150,"Ran":107,"SD ":117,"Que":155,"Isa":101,"Irl":266,"Ita":687,"Isl":175,"Isr":173,"Ist":357,"Ira":132,"Iug":109,"Iva":127,"Iul":580,"Iun":435,"Izv":134,"Jae":83,"Jac":383,"Jap":315,"Jan":217,"Jam":257,"Jer":140,"Jea":190,"Jim":122,"Jos":531,"Jon":151,"Joh":545,"Joc":288,"Jud":261,"Jus":113,"Jur":102,"Jul":125,"Jun":115,"Kal":121,"Kan":96,"Kat":124,"Kar":336,"Ken":149,"Kir":86,"Kin":185,"Kie":90,"Kon":94,"Kos":163,"Kre":87,"Kra":118,"Lew":83,"Lev":90,"Let":83,"Les":120,"Leo":277,"Len":176,"Lei":93,"Leg":332,"Lee":87,"Lea":120,"Lau":147,"Laz":87,"Le ":90,"Las":86,"Lat":109,"Lar":154,"Lam":95,"Lan":338,"Lac":259,"Lab":85,"La ":1181,"Lle":258,"Lib":362,"Lic":139,"Lie":89,"Lig":414,"Lim":410,"Lin":461,"Lis":431,"Lit":301,"Liv":198,"MI ":96,"Lux":121,"Lup":107,"Lum":137,"Lun":558,"Lud":146,"Luc":356,"Lou":179,"Lov":137,"Los":244,"Lot":89,"Loc":483,"Lor":132,"Lon":361,"Meh":106,"Men":147,"Mem":162,"Mel":209,"Mes":271,"Mer":299,"Met":461,"Mec":135,"Med":595,"Mex":216,"Man":1131,"Mal":342,"Mar":4668,"Mas":368,"Mag":383,"Mad":271,"Maj":121,"Mai":903,"Mac":356,"May":100,"Max":155,"Mau":132,"Mat":338,"Mod":176,"Mol":1838,"Mon":1277,"Mos":318,"Mor":392,"Mou":115,"Mot":179,"Mih":609,"Mik":106,"Mij":92,"Mid":157,"Mig":86,"Mic":936,"Mit":232,"Mir":321,"Mis":279,"Mil":373,"Min":651,"Muz":367,"Mun":1060,"Mul":94,"Mur":508,"Mus":300,"NU ":88,"Săl":172,"Săr":113,"Târ":305,"XX ":206,"XV ":95,"Wre":180,"Wor":386,"Wol":146,"Whi":111,"Răd":101,"Răz":528,"Răs":89,"Wil":518,"Win":333,"Wie":86,"Wit":97,"ère":85,"Web":84,"Wes":374,"Sân":132,"Was":121,"War":230,"Wat":92,"Wal":290,"Vra":93,"ée ":112,"Vol":202,"Voi":185,"Vis":124,"Vit":135,"Vla":243,"Ziu":91,"Zon":137,"Zee":92,"Vâl":191,"Vâr":128,"âşt":551,"Yor":388,"You":200,"на ":94,"Stă":112,"Sys":110,"Stî":142,"Sur":132,"Sus":204,"Sul":83,"Sup":346,"Sun":219,"Sue":270,"Sud":589,"Suc":243,"Sub":164,"Str":664,"Stu":292,"Sti":105,"Sto":394,"Sta":2644,"Ste":728,"Teh":119,"Tea":170,"Tec":113,"Ten":121,"Tem":164,"Teo":264,"Tel":395,"Tan":144,"Tat":111,"Tar":189,"Tai":83,"Tal":112,"UA ":470,"Sfâ":498,"Shi":108,"She":197,"Năs":219,"Sho":130,"Sha":300,"Sim":365,"Sil":252,"Sig":138,"Sit":102,"Sis":180,"Sir":233,"Sin":438,"Sie":127,"Sib":370,"Sfi":241,"Sez":141,"Ser":760,"Sev":325,"Sf ":135,"Scr":179,"Sep":495,"Sen":217,"Sel":98,"Sem":99,"Sec":315,"Sea":111,"TV ":299,"Spa":2255,"Spi":211,"Spe":223,"Spr":121,"Spo":178,"Sof":165,"Soa":134,"Soc":483,"Sou":289,"Sov":352,"Sol":260,"Som":154,"Son":175,"Sor":138,"Sla":128,"Slo":264,"Roş":199,"Rus":998,"Sai":239,"Sam":225,"Sal":380,"Sab":86,"Se ":674,"Sco":358,"Sci":132,"Sch":680,"Sca":185,"Sax":130,"Sav":134,"Sat":330,"Sau":85,"Sar":201,"San":758,"ови":108,"TA ":103,"Rez":129,"Res":168,"Ret":90,"Rev":440,"Măn":337,"Măr":265,"Rhe":108,"Riv":111,"Rin":182,"Ric":330,"Rap":96,"Ref":116,"Rec":405,"Red":97,"Rei":117,"Reg":1929,"Rem":121,"Ren":298,"Rel":102,"Rep":1978,"Rea":273,"Rol":118,"Rob":343,"Roc":197,"Rod":127,"Roy":87,"Rot":120,"Ros":294,"Rom":7794,"SS ":239,"SO ":102,"Reş":139,"Vel":106,"Ven":242,"Vec":214,"ски":101,"Vas":382,"Van":141,"Val":1157,"Var":247,"Vic":330,"Vie":288,"Vir":168,"Vil":251,"Vin":215,"Râu":810,"Ver":423,"Ves":364,"Păm":274,"Păd":123,"Ung":582,"Uni":3993,"Un ":792,"VD ":93,"Ucr":344,"VI ":204,"Tex":109,"Ter":953,"Tes":175,"Pân":88,"Pâr":129,"Tha":108,"The":1600,"Thi":130,"Tho":227,"Tib":83,"Tim":723,"Tin":109,"Tit":147,"Tir":112,"Top":132,"Tor":259,"Tok":94,"Tol":279,"Tom":280,"Ton":126,"Tot":116,"Tou":121,"Tru":137,"Tro":183,"Tri":361,"Tre":328,"Tra":1888,"Tur":781,"Tul":107,"Tun":93,"Tud":91,"вич":136,"biz":191,"bis":684,"bit":942,"biu":329,"bio":569,"bir":494,"baţ":118,"bil":3719,"bin":1111,"bii":385,"beş":168,"bo ":147,"blu":442,"şa ":427,"blo":266,"ble":593,"bli":4345,"bla":309,"boa":326,"bol":798,"boi":1231,"bog":143,"biş":155,"biţ":97,"şe ":206,"şca":505,"şal":128,"şan":384,"şap":297,"bon":331,"bom":116,"bor":1299,"bot":233,"bos":94,"bov":171,"şar":126,"şas":296,"bou":197,"box":118,"şat":99,"be ":463,"ban":1063,"bal":2277,"bai":156,"baj":238,"bac":607,"bab":295,"án ":151,"baz":1371,"bat":704,"bas":635,"bar":766,"bdi":107,"bea":119,"bi ":576,"bei":94,"bee":100,"bec":120,"ber":2623,"ben":568,"bel":997,"bes":190,"bet":600,"bia":703,"bib":169,"bic":703,"bie":1278,"áce":223,"şnu":136,"buţ":387,"buş":122,"şoa":686,"şor":235,"şov":403,"ca ":12197,"car":20537,"cas":995,"cat":4867,"cau":568,"can":4982,"cap":1522,"caz":602,"cav":140,"cac":106,"cab":250,"cad":1721,"cam":769,"cal":7164,"caf":88,"cai":123,"şu ":299,"ce ":13744,"bri":8393,"bro":479,"şco":241,"bra":1280,"bre":732,"bu ":154,"şea":100,"bru":3685,"şed":992,"bso":227,"bse":314,"şez":265,"şev":87,"bst":592,"şel":294,"şef":250,"şer":161,"şen":171,"şi ":40695,"bur":1502,"bul":939,"bun":893,"bum":1812,"bui":746,"buc":290,"but":536,"bus":305,"buz":102,"şcă":238,"şie":311,"şii":223,"by ":257,"şia":83,"bră":230,"şit":599,"şir":239,"şin":563,"şil":101,"aka":186,"am ":1398,"ake":307,"aki":175,"ajo":811,"aju":1101,"al ":30968,"aja":297,"aje":549,"adă":644,"ail":713,"aim":241,"ain":2042,"aio":622,"air":263,"ais":301,"ait":135,"aiu":141,"ak ":275,"aie":333,"aid":284,"aic":780,"aib":127,"aia":581,"ahn":102,"ahi":154,"acă":1581,"ahu":103,"aho":221,"aj ":993,"adâ":150,"ârâ":145,"aha":317,"agl":117,"agm":128,"agh":501,"abă":348,"agi":1223,"agr":589,"agu":482,"agn":543,"ago":775,"anu":8793,"anz":399,"any":226,"ano":1227,"ann":872,"ant":7062,"ans":5187,"ane":4573,"ang":1746,"anh":83,"ani":17950,"anj":319,"ank":595,"ap ":209,"ana":3541,"anc":4078,"and":7634,"amu":1451,"amm":250,"amo":500,"amn":539,"amp":1577,"ams":162,"ami":2968,"ame":7488,"amb":1018,"ama":1598,"alz":98,"alv":263,"alu":3357,"alt":3527,"als":199,"alp":187,"alo":2252,"aln":118,"alm":409,"all":1429,"alk":125,"alg":190,"agă":206,"ali":13360,"alc":1323,"ald":503,"ale":15313,"alf":367,"ala":3376,"alb":2405,"an ":14955,"aku":88,"ako":128,"acţ":1206,"ârş":461,"aba":554,"abe":900,"abi":2311,"abl":317,"abo":655,"abr":755,"abs":332,"abu":239,"ae ":1225,"aca":773,"aaa":86,"aal":84,"aar":122,"ad ":1372,"ânt":3632,"şur":726,"ânu":94,"ânz":207,"şul":2492,"âns":100,"ac ":1040,"ştr":244,"âmt":99,"şti":6783,"şte":3540,"âmp":537,"âng":896,"âne":1748,"şta":102,"ând":5547,"âni":6072,"ab ":204,"ânc":472,"âna":293,"afr":149,"aft":132,"aff":130,"afe":369,"afi":1982,"afl":2509,"ai ":11311,"aga":899,"age":1103,"afu":111,"âur":92,"âul":1913,"aen":112,"ael":563,"aes":133,"aer":621,"ah ":371,"âte":342,"afa":1595,"ado":765,"ârs":182,"adr":1167,"ârt":98,"adm":1348,"adj":104,"adi":2476,"ârz":238,"âu ":266,"ade":2813,"ag ":301,"ână":2880,"adt":92,"adu":1730,"aco":1240,"acl":100,"ack":689,"aci":1819,"ach":1150,"ace":7711,"ât ":1298,"acc":1560,"ârb":268,"ada":3740,"ârf":276,"ârg":288,"af ":209,"acv":115,"ârn":129,"act":5256,"acu":1523,"ârl":126,"acr":543,"azo":188,"azi":1570,"arţ":1475,"azu":425,"aze":593,"avâ":822,"aza":1518,"azd":83,"azz":139,"avă":214,"asă":1423,"axi":362,"axo":262,"az ":338,"axa":101,"atâ":459,"ată":12359,"âi ":95,"ays":88,"aya":143,"aye":215,"ân ":1771,"âlc":198,"ba ":2953,"âmb":183,"ază":4808,"âln":280,"âin":172,"at ":24564,"amă":335,"arh":1164,"arg":1153,"arf":116,"are":42152,"ard":2497,"arc":2307,"arb":848,"ara":6502,"arp":413,"aro":1047,"arn":629,"arm":1225,"arl":2466,"anç":90,"ark":583,"ari":10760,"aru":1463,"arv":129,"arr":585,"ars":566,"art":13731,"au ":13208,"asa":2050,"ary":262,"arz":166,"asi":2001,"ană":4433,"ash":375,"asc":1871,"ase":2773,"aso":520,"asn":213,"asp":360,"ask":118,"asm":175,"asl":121,"ar ":8918,"apa":2757,"ape":1246,"api":1462,"aph":142,"apl":456,"apo":1977,"app":137,"apr":2890,"aps":139,"apt":1565,"apu":270,"as ":1819,"ală":6884,"ava":1432,"ax ":193,"auz":385,"aux":116,"aut":2879,"avr":217,"avo":325,"anţ":3393,"avi":1654,"anş":96,"ave":2326,"ay ":775,"awa":162,"avy":131,"avu":1095,"ară":4267,"av ":216,"ata":4096,"asu":901,"ast":6214,"ass":610,"asy":113,"atm":162,"alţ":185,"atl":201,"atr":2501,"ato":5035,"ate":24962,"atf":138,"atc":92,"ati":8684,"ath":466,"aw ":115,"aua":249,"auc":144,"att":328,"ats":134,"atu":9240,"aul":521,"aum":109,"aun":296,"aur":1060,"aus":649,"aud":358,"aug":1299,"apă":1796,"amţ":177,"Wür":170,"ка ":90,"ий ":128,"ич ":137,"jec":116,"jel":228,"jen":194,"fâr":376,"fân":580,"ji ":99,"jat":278,"jap":309,"jar":183,"jan":170,"jaz":89,"je ":321,"joa":246,"joc":694,"joz":167,"jos":164,"jor":690,"jit":146,"jin":221,"jaţ":96,"bţi":531,"jo ":103,"jlo":496,"itm":199,"itl":818,"itr":663,"ito":6294,"itu":8009,"itt":280,"its":137,"itz":191,"ity":420,"iub":117,"iuc":160,"iua":221,"iud":230,"ipă":404,"isk":89,"ism":2124,"isl":772,"iso":596,"isn":143,"isp":881,"iss":507,"isr":101,"isu":227,"ist":19214,"iv ":3796,"ita":17021,"itc":116,"ite":7821,"ith":307,"iti":4614,"ivo":234,"ivu":281,"ius":647,"iur":519,"ium":322,"iul":7954,"iun":9474,"iva":1811,"ix ":362,"ivi":3232,"inţ":4376,"ive":4336,"ipr":212,"ipo":338,"ipp":144,"ipu":386,"ips":239,"ipt":541,"ipi":1233,"aţă":1525,"ipl":436,"is ":3857,"ion":10900,"iop":116,"ior":1475,"ios":402,"iot":422,"iou":159,"iog":188,"iol":1112,"ipa":2480,"ipe":948,"iov":274,"ir ":799,"iru":345,"irs":100,"irt":172,"iro":730,"irm":483,"eîn":134,"irk":94,"irl":333,"iri":2227,"isi":1374,"ish":315,"ină":2514,"isf":103,"ise":3028,"isc":2092,"isa":444,"iu ":4493,"iqu":132,"ilă":788,"inâ":388,"ire":5707,"imă":577,"irg":179,"ira":895,"irc":947,"it ":8220,"ünc":112,"iză":370,"ja ":337,"ită":7145,"ixt":86,"isă":796,"ixe":89,"iz ":119,"ivă":1340,"izu":147,"izv":208,"izo":952,"izi":2792,"ize":859,"iza":6312,"kil":158,"kin":314,"kir":111,"kis":140,"km ":1110,"ki ":642,"făc":542,"făr":519,"kel":127,"ken":233,"kes":118,"ker":360,"ket":144,"fă ":122,"gân":291,"kfu":88,"ke ":452,"kra":120,"kre":109,"kt ":111,"kov":198,"km²":262,"kol":93,"ks ":224,"făş":535,"cţi":4945,"ko ":243,"jut":351,"jus":113,"jul":508,"jun":382,"jum":207,"jur":1124,"jud":4033,"juc":683,"kar":128,"kan":145,"kai":124,"kad":96,"ka ":524,"ha ":718,"ham":492,"han":1154,"hai":779,"hak":92,"hal":402,"hau":169,"har":1141,"has":93,"hat":192,"hae":287,"hag":86,"hab":90,"he ":3331,"hel":915,"hei":851,"hee":120,"hed":164,"hea":469,"hez":295,"hev":85,"het":520,"hes":378,"her":1057,"heo":670,"hen":622,"hem":280,"că ":14688,"hi ":1098,"dân":197,"căi":96,"căl":499,"căd":116,"căz":100,"căp":117,"căs":168,"căr":2073,"cău":418,"căt":3820,"hie":477,"hid":863,"hic":510,"hib":86,"hia":1153,"hip":1598,"hio":209,"hin":1334,"him":1433,"hil":698,"hii":152,"hiu":429,"hiv":233,"his":738,"hit":1266,"hir":636,"hiz":150,"hn ":414,"hle":150,"ho ":172,"gma":139,"go ":432,"gme":179,"glo":451,"gle":2336,"gli":771,"gn ":157,"gla":327,"gog":138,"goa":126,"gnu":85,"gno":150,"gni":185,"câş":533,"gne":487,"gna":170,"geş":236,"gs ":160,"gol":308,"gon":427,"gos":413,"gor":814,"got":99,"gov":160,"gu ":263,"gro":318,"gru":1821,"gra":4239,"gri":471,"gre":2106,"gto":159,"gui":168,"gum":179,"gul":1451,"gua":163,"gue":323,"gy ":135,"gră":290,"guv":925,"gur":1708,"gus":1778,"gvi":266,"iam":458,"ial":8383,"ian":7496,"ias":341,"iar":3383,"iau":106,"iat":2458,"ic ":11072,"iab":156,"iac":516,"iad":260,"iag":132,"ibl":375,"ibi":1287,"ibo":95,"ibr":438,"ibu":1167,"iaz":367,"id ":1405,"iba":208,"ibe":1115,"ia ":37192,"iet":2227,"ieu":92,"iev":591,"iew":117,"iez":102,"iel":766,"iem":1503,"ien":2907,"iep":154,"ier":4481,"ies":1526,"ied":283,"ief":128,"iei":9680,"aş ":2678,"ig ":320,"iec":2094,"ifu":325,"ifo":500,"ifr":208,"iff":103,"ife":1805,"ifi":2918,"dă ":1783,"ifa":105,"icr":452,"ics":163,"ict":2757,"icu":1908,"ico":2006,"ick":421,"icl":554,"ici":10582,"ich":1701,"ice":8554,"ie ":34669,"ica":13932,"idu":1244,"idr":458,"ido":239,"idi":1327,"idg":110,"ide":3884,"ida":1312,"if ":119,"iic":157,"iaş":207,"iaţ":1579,"idă":222,"iin":4917,"iil":3254,"iit":1059,"il ":2791,"ija":114,"iji":265,"ijl":490,"ijo":87,"im ":1225,"ika":140,"aţa":1077,"aşe":616,"ige":485,"aşc":90,"aşa":330,"iga":1279,"ii ":23496,"igm":125,"igh":1464,"igi":3078,"aşi":1717,"icâ":83,"igu":863,"aşu":2407,"aşt":704,"igr":211,"aşo":348,"igo":379,"ign":421,"dăc":87,"iha":624,"ică":10472,"ihi":105,"dău":132,"iho":423,"dăr":156,"ik ":233,"imo":864,"imn":359,"imm":132,"imp":4726,"ieş":578,"imf":123,"ime":4318,"ieţ":356,"imi":3490,"ip ":1099,"inc":6719,"ind":7549,"ina":7598,"imu":2173,"inn":213,"inm":129,"ino":1626,"int":13234,"ins":3523,"inf":1447,"ine":10869,"inh":83,"ing":4928,"ini":7437,"inl":374,"ink":229,"ioa":4790,"ioc":312,"iod":317,"inu":2953,"inv":735,"iny":91,"inz":228,"iko":237,"icţ":367,"aţi":16926,"iki":186,"ike":152,"aţe":270,"ila":1467,"ilb":98,"in ":44693,"ilo":9977,"ill":1766,"ilm":2143,"igă":285,"ilh":99,"ili":9285,"ild":180,"ile":12078,"ima":4046,"imb":4580,"io ":1569,"ily":129,"ilt":143,"ilu":581,"ilv":1166,"hiş":314,"how":106,"hol":376,"hom":292,"hon":245,"hos":162,"hot":319,"hou":159,"hov":191,"hoo":89,"hop":132,"hor":542,"hoe":88,"hoc":86,"hni":581,"hno":368,"hne":86,"heţ":86,"hul":158,"hua":109,"htt":211,"hte":132,"hro":140,"hre":117,"hri":377,"ht ":1261,"hra":577,"hiţ":84,"hy ":109,"hwa":155,"hum":1416,"hus":146,"hur":185,"fi ":1715,"ffe":164,"ffi":135,"feu":99,"fet":117,"fes":1040,"fer":4398,"fec":1006,"fed":383,"feb":993,"fem":538,"fen":538,"fel":943,"fib":88,"fia":458,"faz":136,"fas":102,"fat":182,"far":402,"fap":490,"fam":1470,"fan":745,"fal":367,"fai":144,"fac":2220,"fab":663,"ff ":157,"fe ":258,"euş":129,"eză":2753,"fa ":130,"exu":301,"ext":1548,"etă":1447,"exa":920,"ez ":2054,"ews":115,"exp":1323,"epţ":305,"esă":354,"exi":1412,"exc":376,"exe":917,"ezv":899,"ezu":584,"evă":275,"eza":830,"ezo":739,"eze":4121,"ezi":2997,"erţ":230,"eta":3771,"epâ":360,"ete":2601,"etc":489,"eti":2973,"eth":248,"etn":363,"esp":1890,"eso":846,"est":55654,"esu":952,"ess":1010,"ev ":251,"euc":86,"eud":222,"epă":213,"eum":115,"eul":878,"eun":627,"eto":805,"etr":2728,"ets":89,"ett":511,"etu":1024,"etw":102,"ew ":746,"eve":1810,"eva":1191,"evo":1396,"enţ":4104,"evi":2580,"eut":437,"eur":1309,"eus":264,"ex ":508,"ewi":114,"eră":1931,"evr":439,"ey ":1079,"ewa":103,"epe":1003,"epi":751,"eph":288,"er ":7653,"epa":1109,"eot":142,"eos":392,"eor":2351,"eom":197,"eol":739,"eop":176,"eon":471,"elă":165,"eiţ":151,"es ":5970,"ept":3637,"epu":3456,"epl":448,"epp":122,"epo":532,"epr":2782,"erk":178,"erl":756,"eri":23643,"erg":1582,"emă":409,"erh":102,"ere":9729,"erf":436,"erc":1961,"erd":628,"era":10049,"erb":1083,"et ":2968,"esk":95,"esl":106,"esf":637,"enă":298,"esh":139,"esi":1831,"esb":88,"esc":5445,"ese":2325,"eu ":1098,"esa":2035,"erz":182,"ery":156,"erv":1894,"eru":1701,"erw":96,"err":761,"ert":2020,"ers":6219,"ern":5307,"erm":6392,"erp":946,"ero":1890,"eki":83,"ecţ":1050,"en ":4825,"elb":109,"ela":2761,"eld":314,"elf":84,"ele":22448,"eli":2942,"elg":1037,"egă":470,"elm":166,"eln":87,"ell":1629,"elo":7272,"elu":2902,"elv":133,"els":286,"elt":287,"eo ":496,"emb":7566,"ema":2057,"eme":3406,"emn":2329,"emo":1281,"ehă":102,"emi":3276,"emu":1154,"emp":1201,"ems":94,"ep ":186,"ene":5641,"enh":144,"eng":1760,"enb":358,"ena":2023,"end":1865,"enc":997,"eno":950,"enn":450,"enk":132,"eni":7113,"enu":3506,"ens":2167,"ent":26508,"enr":218,"eoa":176,"enz":462,"eog":396,"eod":283,"eoc":172,"egl":156,"ego":650,"egn":92,"ege":2159,"ecâ":351,"egi":6402,"egh":185,"egr":796,"egu":603,"ehn":787,"eho":111,"ehe":163,"ehi":357,"ecă":398,"ek ":382,"eic":203,"eia":920,"eis":644,"eir":131,"eim":251,"eil":863,"ein":887,"eii":140,"edă":119,"eaţ":502,"eie":409,"eid":483,"eig":145,"eaş":214,"eja":336,"el ":7309,"eit":209,"eiu":167,"eke":85,"em ":1443,"eju":102,"giz":650,"giu":3192,"git":384,"gis":1283,"gir":91,"gil":280,"gim":999,"gaţ":227,"gip":292,"gin":3668,"gio":683,"gid":91,"gie":1712,"gic":1515,"gii":481,"gia":2426,"bău":105,"băr":238,"ght":1266,"băt":475,"băn":99,"ghi":1143,"ghe":1448,"gha":167,"cât":1186,"gi ":458,"câi":94,"cân":2264,"câm":225,"gen":4213,"geo":702,"get":450,"ger":3349,"ges":353,"gh ":216,"bă ":1169,"gea":351,"geb":118,"gem":253,"gel":950,"gda":132,"ge ":2096,"gaz":511,"gby":87,"gas":177,"gar":1662,"gau":115,"gat":2233,"gaj":154,"gam":133,"gal":1275,"gan":2759,"ga ":1522,"îşi":423,"fuz":427,"fur":551,"fus":129,"ful":658,"fun":1914,"ftw":294,"ft ":458,"fra":2347,"fre":553,"fri":632,"fiţ":85,"fro":290,"fru":551,"for":8285,"fos":13009,"fot":1551,"fon":2006,"fol":2314,"flă":1101,"fiş":196,"feţ":166,"foc":121,"foa":663,"fle":282,"fla":1128,"fli":157,"flu":1381,"flo":329,"fic":5576,"fie":1608,"fig":239,"fii":4052,"fil":2800,"faţ":1332,"fin":1931,"fir":582,"fis":97,"fit":276,"fiu":394,"fix":216,"fiz":803,"cuţ":360,"da ":5431,"dba":199,"de ":81669,"dac":568,"dad":169,"dal":1004,"dai":112,"daj":204,"dag":131,"dae":325,"dat":5418,"dar":2392,"dap":257,"dan":788,"dam":518,"day":88,"dav":93,"dau":120,"cup":2017,"cun":3394,"cul":7448,"cum":2248,"cui":2182,"cuf":114,"cub":97,"cuc":191,"cua":310,"ctu":3820,"ctr":1989,"cto":4340,"cti":3912,"cte":2267,"cta":977,"coţ":184,"cră":288,"cy ":117,"cve":360,"cvi":112,"cva":166,"cus":289,"cur":6209,"cut":5650,"cuv":696,"ctă":260,"Șco":105,"cks":236,"ckh":87,"cla":1563,"cle":639,"clu":1691,"cli":610,"clo":419,"ceş":351,"cmi":98,"co ":1054,"cni":95,"cod":494,"coe":86,"cof":296,"cog":235,"coa":845,"cob":156,"coc":134,"con":13122,"coo":319,"col":4664,"com":13771,"cor":2665,"cos":422,"cop":2252,"cov":597,"cot":422,"cou":153,"cs ":186,"clă":363,"cqu":87,"ct ":2355,"cre":3245,"cra":1687,"cri":4361,"cru":523,"cro":1167,"cu ":14527,"cci":346,"cca":249,"cce":1997,"cea":5425,"cez":1350,"ch ":1051,"cev":105,"cer":3600,"ces":6831,"cet":999,"ceu":226,"cen":3570,"cep":2469,"cem":1612,"cel":8680,"cei":1228,"ceh":124,"cee":645,"ced":641,"ci ":3845,"Ști":135,"cha":1202,"Ște":246,"chw":220,"chu":202,"cia":8398,"ck ":1749,"cie":2502,"cid":710,"cic":613,"che":3039,"chl":127,"chi":6835,"cho":240,"chn":170,"chs":144,"cht":297,"ciz":320,"civ":411,"caţ":977,"cil":1200,"cim":273,"cif":735,"caş":448,"cii":3633,"cir":703,"cis":679,"cit":1218,"ciu":1361,"cin":2022,"cio":753,"cip":3172,"cm ":96,"cke":198,"ed ":964,"eba":172,"ebe":338,"ebi":493,"ebo":152,"ebr":2253,"ebu":793,"ec ":965,"eac":1155,"eag":683,"eae":329,"ead":219,"eak":166,"ean":4116,"eal":2295,"eam":780,"ear":779,"eas":2985,"eap":381,"eav":360,"eat":1944,"eau":812,"eaz":4296,"eb ":255,"ea ":37128,"efi":943,"efl":126,"efo":468,"efa":467,"efe":1778,"eff":109,"ei ":24037,"ega":2463,"eft":125,"efu":229,"eek":131,"een":318,"eel":266,"eea":861,"eed":149,"eer":336,"eep":102,"eet":278,"edi":6693,"ede":2885,"eda":988,"eg ":239,"edu":500,"edo":913,"edr":363,"ecl":455,"eck":180,"ech":2954,"eci":4375,"ece":4160,"eca":1262,"ee ":711,"ef ":336,"ecv":378,"ecu":2928,"ect":7556,"ecr":556,"eco":3176,"dwi":90,"dwa":128,"dy ":306,"dve":124,"dur":1540,"dut":183,"dus":2699,"duş":94,"dor":982,"dop":268,"don":1156,"dom":1511,"dol":361,"dox":527,"dow":256,"dov":2135,"dou":2054,"dos":139,"ds ":634,"diţ":1218,"diş":157,"deţ":4616,"dmi":1594,"dne":111,"doa":843,"dob":218,"doc":617,"dof":143,"doi":1173,"Țar":280,"dun":278,"dum":103,"dup":2223,"dui":145,"dul":3772,"dub":195,"dua":160,"duc":3456,"dri":740,"dra":1452,"dt ":159,"dre":2042,"du ":925,"dro":713,"dru":1503,"dge":174,"dic":3233,"did":167,"dia":4181,"der":5158,"des":4508,"det":654,"deu":131,"dev":1104,"dez":1907,"deb":451,"dea":1065,"ded":225,"dec":2801,"def":691,"dee":144,"deg":147,"dej":85,"dei":744,"del":2089,"den":4447,"dem":1462,"dep":2099,"deo":975,"di ":446,"dle":126,"dla":95,"deş":387,"do ":923,"dja":87,"div":1213,"diu":1854,"diz":147,"dim":595,"din":41384,"dio":1557,"dip":176,"dir":1178,"dis":3885,"dit":1572,"die":1235,"dif":1731,"dig":295,"dii":366,"daţ":232,"dil":110,"rgu":556,"rhe":302,"rj ":109,"rha":273,"rcă":369,"rhi":1066,"măr":2035,"măs":677,"măt":499,"măn":620,"rfu":326,"rga":2411,"ri ":15694,"rgi":1524,"rgh":746,"rbă":345,"rge":1425,"rgo":215,"ret":2582,"res":5915,"rev":1477,"reu":1156,"rew":92,"rez":5275,"rey":127,"mă ":3118,"rfa":103,"rfe":202,"rfi":117,"nân":632,"rfo":213,"rdu":767,"rds":382,"rg ":1476,"reb":516,"rea":18674,"ree":777,"ref":1708,"rec":5448,"red":1424,"rei":3494,"rej":104,"reg":5973,"rem":2573,"ren":2941,"rek":143,"rel":3576,"rer":308,"reo":342,"rep":4378,"rf ":235,"rda":852,"rcu":1210,"rct":172,"rdo":623,"rdi":1685,"rde":964,"re ":51071,"rbu":436,"rco":232,"rci":1263,"rch":609,"rce":1470,"rca":1029,"ray":96,"raz":629,"rd ":3327,"rap":865,"rar":2929,"ras":1358,"rat":9747,"rau":625,"rav":765,"rbi":1165,"rbo":575,"rba":839,"rbe":545,"rc ":287,"raj":371,"rai":1767,"rah":296,"rag":920,"ran":9556,"ram":2869,"ral":5890,"rak":183,"rab":1135,"raf":2847,"rae":350,"rad":3282,"rac":2468,"rpu":387,"rpr":755,"rpo":278,"rs ":1565,"rpe":182,"rpa":307,"rpi":136,"ror":724,"ros":1102,"rot":1489,"rom":6745,"ron":3428,"roo":164,"rop":6403,"roz":338,"rou":752,"rov":4132,"row":176,"rox":732,"rob":828,"roa":1307,"rod":3362,"roc":2541,"roi":1094,"rol":1987,"rof":1423,"roe":237,"roh":90,"rog":1247,"rno":327,"rnu":701,"rp ":237,"rna":2969,"rne":1758,"rni":1303,"reţ":1190,"rmo":436,"rmu":522,"ro ":2108,"rma":9885,"rme":3062,"reş":4337,"rmi":1348,"rls":94,"rlo":266,"rgă":207,"rli":413,"rld":353,"rle":420,"rla":2658,"rn ":698,"rks":134,"rke":254,"rm ":799,"riz":1053,"rl ":211,"rip":540,"rio":5018,"rir":448,"rit":7787,"ris":4701,"riv":1528,"riu":2574,"rdă":172,"rig":3530,"raş":5092,"rij":321,"rii":7673,"ril":8587,"rik":126,"raţ":2453,"rin":9619,"rim":5400,"ria":7770,"rib":1255,"ric":11723,"rid":1224,"rie":15408,"rif":356,"rk ":829,"roş":363,"rsă":312,"rui":1637,"rug":366,"rud":206,"ruc":1749,"rur":434,"rup":3037,"run":754,"rum":1184,"rul":9603,"ruz":121,"rux":123,"rus":1703,"rut":1017,"rva":737,"rvi":837,"rve":659,"rvo":119,"ry ":1178,"rsk":120,"rsi":1656,"rso":2085,"rsc":90,"rsa":860,"rnă":515,"rsh":97,"rse":1189,"rta":3149,"rst":426,"rsu":1104,"rto":1146,"rte":6836,"rth":550,"rti":6445,"rua":1449,"rts":182,"rtr":166,"rtu":1485,"rtt":156,"riţ":1112,"riş":356,"rmâ":178,"rt ":2203,"rro":293,"rmă":1457,"rri":340,"rre":470,"rra":339,"ru ":12849,"rry":345,"sc ":2232,"sab":243,"sac":340,"sad":91,"sag":137,"sai":83,"saj":124,"sal":1207,"sam":459,"sbe":121,"san":693,"sau":8354,"sat":4809,"sas":155,"sar":1304,"sa ":3104,"ón ":120,"ruş":110,"rze":109,"rtă":914,"rzi":434,"sha":218,"sho":144,"năr":653,"năs":2089,"năt":775,"she":136,"scă":1232,"shi":636,"năl":262,"si ":1523,"sfâ":443,"siv":865,"sie":984,"sid":975,"sic":666,"sib":419,"sia":1164,"sk ":250,"nău":325,"sit":6826,"siu":1567,"sir":294,"sis":2018,"sip":93,"sin":2491,"sio":987,"sil":2030,"sim":1237,"sih":314,"sii":129,"sif":302,"sig":587,"scr":3431,"scu":7764,"sbu":216,"se ":12289,"sca":1105,"sce":729,"sci":701,"sch":1873,"sco":2491,"sex":343,"sey":122,"ser":5132,"ses":902,"set":368,"seu":253,"sfa":100,"sez":381,"sh ":403,"nă ":12507,"sfi":168,"sfe":495,"sfo":294,"sea":1239,"sei":529,"see":104,"sed":775,"sec":2976,"seb":398,"sep":1669,"seo":135,"sen":1576,"sem":2294,"sel":1265,"spu":788,"spo":1513,"spr":1609,"spe":3508,"spi":782,"spa":1327,"sou":172,"sov":534,"sol":1405,"som":99,"son":2390,"sop":133,"sor":985,"sos":101,"sod":147,"sof":666,"soa":1403,"soc":1613,"su ":139,"sra":283,"st ":20718,"ss ":769,"sli":132,"slo":207,"slu":183,"sla":1454,"sle":117,"ski":424,"sfă":558,"sko":122,"sm ":736,"ska":303,"sna":139,"sni":276,"sne":177,"smo":155,"seţ":118,"smu":996,"so ":251,"sma":281,"seş":442,"smi":416,"sme":397,"soţ":294,"stâ":421,"stă":4284,"sse":604,"ssa":443,"sso":336,"ssi":591,"ssu":98,"ste":53928,"spâ":306,"stf":531,"sta":11097,"stm":85,"sto":3769,"sti":8907,"stl":380,"stu":4079,"str":12855,"sty":123,"sud":1668,"sue":207,"sub":3715,"suc":517,"spă":179,"suf":275,"sul":3772,"sum":370,"sup":2843,"sun":3645,"sut":208,"sus":705,"sur":1681,"suv":86,"sy ":155,"tai":479,"taj":164,"tal":7011,"taf":116,"tag":288,"tab":988,"tac":696,"tad":477,"tc ":460,"tba":1494,"tax":122,"tav":190,"tau":320,"tat":21973,"tas":470,"tar":6168,"tap":132,"tan":6974,"tam":968,"tch":206,"te ":79600,"suş":110,"ta ":8238,"oză":144,"pa ":2141,"oxă":260,"pe ":11904,"par":10822,"pat":2004,"pas":403,"pay":83,"pac":752,"pad":108,"pab":102,"pag":502,"pal":1642,"pai":83,"pap":190,"pam":183,"pan":5506,"phe":183,"pha":169,"pho":127,"phi":253,"pi ":238,"ph ":186,"lân":506,"pea":2043,"pec":3297,"ped":445,"pen":9052,"per":11036,"pet":1065,"pes":1059,"pei":1300,"pel":911,"pla":2428,"pli":1624,"ple":1254,"plo":453,"plu":926,"phy":102,"pia":1002,"pid":288,"pic":1543,"pie":1703,"pii":550,"paţ":844,"pil":610,"pin":728,"pio":918,"pir":800,"pis":848,"pit":1452,"piu":839,"poz":1416,"por":4692,"pop":3746,"pov":414,"pot":1327,"pos":1095,"poi":429,"pog":111,"pom":116,"pon":1731,"pol":4578,"poa":1462,"poe":647,"poc":495,"pod":314,"ps ":147,"ppe":285,"peş":178,"po ":105,"pta":1006,"pse":287,"psi":410,"pso":85,"ptu":1231,"pub":3609,"pte":2615,"pti":445,"pto":319,"pra":2308,"pt ":1178,"plă":316,"pru":251,"psa":94,"pri":13510,"pre":11982,"pro":14449,"pră":140,"ptă":561,"pur":1199,"pus":1351,"put":2590,"pun":2005,"pul":6398,"px ":1151,"puş":180,"puţ":344,"mân":11918,"lă ":9449,"iş ":380,"lăc":209,"lăd":384,"lăr":280,"lău":157,"lăt":576,"işa":158,"işc":590,"işe":100,"işi":457,"işo":462,"işn":143,"işu":248,"işt":1090,"iţe":420,"iţi":4507,"iţa":1141,"lăţ":94,"iţă":635,"qua":125,"que":368,"qui":218,"ra ":10628,"rb ":100,"ngo":269,"ngi":1205,"ngl":3065,"ngv":280,"ngu":1282,"ngr":328,"ngt":167,"ngs":258,"ni ":7085,"nge":1701,"ncâ":95,"ngh":727,"nga":1437,"nha":227,"ncă":762,"neg":502,"nei":3106,"nel":2471,"nen":1433,"nem":658,"nep":189,"neo":674,"ner":4256,"net":2067,"nes":1905,"nev":324,"neu":474,"ng ":2660,"nea":6161,"nec":713,"ned":727,"nee":362,"nef":188,"nfi":1313,"nfo":1463,"nfl":766,"nfr":253,"nfu":123,"ney":389,"nez":1743,"nex":158,"nfa":94,"nfe":701,"ncr":242,"nct":978,"nco":1033,"nci":6536,"ncl":974,"nce":5421,"nch":1298,"nca":697,"ne ":15507,"nbu":225,"ndu":3412,"ndr":2268,"nds":282,"ndo":1104,"ndi":3514,"nde":6309,"ndb":189,"nda":4140,"ncy":92,"ncu":853,"nal":8677,"nam":764,"nan":953,"nap":143,"nar":3126,"nac":272,"nad":606,"nag":295,"nah":123,"nai":648,"naj":750,"nc ":203,"nbe":172,"nd ":10185,"nav":597,"nau":373,"nat":5278,"nas":876,"naz":192,"na ":10566,"muş":108,"moţ":117,"nyi":101,"ntă":3762,"nz ":336,"ntâ":712,"nsă":705,"noţ":101,"ny ":515,"noş":120,"nvi":271,"nux":134,"nve":1249,"nva":170,"nul":11142,"num":9716,"nun":659,"nui":2359,"nus":463,"nut":1282,"nuu":86,"nuv":87,"nur":524,"nua":2364,"nue":133,"nuc":341,"nty":205,"nto":1327,"ntu":5345,"nts":104,"ntr":20429,"nti":5120,"nth":232,"nta":5926,"nte":13468,"nsu":2110,"nsn":130,"nsm":304,"nsp":737,"nso":628,"nst":5554,"nsf":385,"nse":1387,"nsh":116,"nsi":3286,"nsl":126,"nsk":220,"nsc":408,"nsa":2214,"nu ":2393,"nru":101,"nri":188,"nre":465,"nt ":10809,"niţ":1334,"niş":600,"ns ":1634,"noc":153,"nod":134,"noa":802,"nob":170,"nog":217,"nol":681,"noi":1449,"nop":228,"nom":2958,"non":580,"not":496,"nos":3264,"nor":3599,"nov":557,"nou":744,"noz":170,"nr ":214,"nne":796,"nna":357,"nno":163,"nni":320,"nny":106,"nme":156,"nma":86,"neţ":165,"neş":1107,"nli":309,"ngă":685,"nn ":394,"nla":407,"no ":1211,"nlo":244,"nkf":88,"nke":129,"ncţ":1362,"nki":120,"nka":125,"nkt":91,"înţ":344,"nje":175,"nja":194,"nju":195,"nii":3788,"ndă":489,"naş":348,"nig":141,"nif":691,"nie":6578,"nid":183,"nic":6607,"nib":123,"nia":13403,"nk ":409,"niz":1868,"nix":112,"niu":1843,"niv":2033,"nis":4310,"nit":7595,"nir":583,"nio":766,"nim":2048,"nin":1227,"naţ":2629,"nik":97,"nil":2118,"ogr":2357,"ogu":334,"ogi":3138,"ogl":113,"ogo":258,"ogn":118,"oga":345,"ogd":87,"oge":523,"oi ":1939,"ohi":99,"oho":87,"ohn":440,"oha":219,"ocă":128,"ohe":86,"ois":381,"oir":138,"oiu":600,"oit":118,"oin":312,"oaţ":87,"oil":1036,"oaş":230,"odă":270,"înă":265,"oii":90,"oic":184,"oid":249,"oie":2586,"ok ":171,"oia":306,"obţ":455,"ol ":1950,"oiz":87,"oce":1371,"och":606,"oci":2584,"ock":1201,"ocl":282,"ocm":112,"oco":593,"împ":1524,"ocr":595,"obs":247,"obu":209,"oe ":171,"oca":5500,"occ":161,"îmb":234,"ode":1683,"odi":1388,"înv":645,"odo":1005,"înt":5727,"înr":506,"îns":1196,"odr":168,"ocu":4799,"înl":252,"oct":1467,"îng":438,"înf":1221,"of ":1300,"înd":673,"înc":2936,"îna":811,"oda":803,"oel":100,"oem":170,"oes":95,"oet":442,"oen":174,"ody":97,"odu":3682,"og ":550,"ofi":1691,"ofu":93,"oft":569,"ofo":587,"oez":194,"off":117,"ofe":1241,"ofa":91,"nzâ":121,"ob ":212,"îl ":203,"nză":237,"oc ":1888,"oap":522,"oan":2626,"oam":565,"oal":517,"oai":251,"oad":3000,"oac":410,"în ":51095,"oba":769,"od ":1364,"oar":8811,"oas":2028,"oat":2201,"obo":275,"obr":179,"obl":493,"obi":2216,"obe":608,"nza":270,"nze":236,"nuă":93,"nzi":410,"nzo":124,"nvă":302,"îi ":276,"otă":208,"oya":123,"oxi":1058,"oxe":145,"oz ":239,"ows":299,"own":215,"oră":461,"owi":83,"ovă":83,"orţ":934,"ozo":459,"oze":129,"ouă":1877,"ozi":1701,"oza":446,"otu":494,"oua":772,"ow ":278,"olţ":86,"oti":981,"oth":247,"ote":1814,"ott":429,"ots":89,"otr":617,"oto":1723,"ost":14678,"osu":176,"ota":1624,"otb":1480,"ov ":924,"osi":2418,"osh":105,"onă":692,"ose":1383,"osf":245,"osp":184,"oss":270,"osm":217,"osl":251,"oso":685,"osn":103,"oy ":170,"owa":92,"owe":145,"ovi":5242,"onţ":708,"ovo":380,"ovs":162,"ouv":83,"ox ":338,"ova":2953,"ove":2093,"orâ":88,"oug":124,"oui":176,"oul":710,"oun":598,"oup":188,"ous":347,"our":791,"out":476,"opo":1954,"opp":88,"opi":1812,"opl":160,"ope":4845,"oph":214,"opa":1115,"os ":2802,"opu":4445,"opr":1065,"opt":660,"ops":95,"oon":154,"ool":264,"ook":218,"ood":367,"or ":26373,"oot":145,"oor":339,"oop":114,"ork":590,"orl":449,"orm":7578,"orn":1037,"oro":968,"orp":872,"orr":316,"orc":286,"ord":5185,"ore":2827,"orf":352,"org":3181,"omă":1147,"ori":16103,"orj":132,"ou ":829,"osa":293,"osc":3302,"ort":5002,"ors":359,"orv":285,"oru":3894,"ory":244,"olă":311,"omâ":10245,"m² ":271,"ot ":1395,"orb":953,"ora":7147,"ola":2095,"old":2473,"olc":184,"on ":8021,"olj":96,"oli":6258,"oll":628,"olk":148,"olf":429,"ole":2350,"olg":83,"ols":165,"olt":1104,"olm":146,"oln":127,"olo":7169,"oly":117,"olu":4417,"olv":270,"om ":2396,"oki":92,"oke":139,"ona":10266,"ond":4478,"onc":1725,"onf":1659,"one":3635,"ong":702,"onj":211,"oni":6032,"onl":252,"onn":355,"ono":3183,"ons":6444,"ont":4103,"onu":2421,"onv":453,"ony":208,"onz":212,"oma":4275,"oo ":114,"ome":3698,"omb":2289,"omi":3631,"omm":249,"omp":6241,"omn":706,"omo":1504,"omu":6796,"op ":1017,"la ":23354,"lb ":181,"le ":40374,"lce":389,"lca":429,"lcl":134,"lch":83,"lci":164,"lcu":522,"lco":212,"lf ":227,"lde":317,"lda":182,"ldo":2003,"ldi":182,"ldu":132,"lab":636,"lac":1028,"lad":507,"lae":470,"lah":161,"lag":391,"laj":309,"lai":336,"lal":366,"lan":7785,"lam":2475,"lap":209,"lar":3822,"lat":4828,"las":2223,"lax":136,"lau":569,"lav":737,"lay":251,"lba":949,"ld ":1067,"lbe":538,"lbi":174,"lbo":115,"lbu":1909,"ky ":197,"kso":178,"lpi":329,"lph":128,"ls ":385,"lpt":166,"lon":2782,"lom":608,"lop":364,"loo":88,"lor":17380,"lod":381,"loc":8139,"log":3938,"loi":220,"los":2548,"lot":440,"lou":173,"lov":661,"low":107,"loz":395,"lni":416,"lne":136,"loa":1027,"lob":416,"lmo":117,"lmi":101,"leţ":90,"lme":740,"leş":291,"lma":385,"lna":86,"lmu":506,"lth":83,"lti":1438,"lto":515,"ltr":114,"ltu":1401,"lud":310,"luc":1053,"lub":644,"lua":671,"lug":175,"lue":1286,"lsi":131,"lso":99,"lst":136,"lta":1833,"lte":3356,"lu ":1358,"lse":84,"lsa":93,"liţ":378,"ía ":107,"liş":264,"lt ":1778,"găt":711,"gău":109,"găr":308,"găs":508,"lhe":118,"lcă":318,"lj ":83,"lgo":101,"lge":272,"lgi":975,"lbă":158,"li ":1453,"lga":583,"lfu":196,"lfo":121,"gă ":1042,"lfa":415,"lez":1735,"ley":333,"lex":1098,"leu":326,"lev":1072,"les":2557,"let":1046,"ler":1357,"leo":330,"lep":98,"lem":2259,"len":1650,"lek":93,"lel":1961,"lei":1502,"leg":2868,"lef":302,"lee":148,"led":336,"lec":2987,"leb":331,"lea":5194,"lls":187,"llu":109,"lly":238,"lo ":746,"lla":922,"lle":1509,"lli":1027,"llo":392,"ln ":118,"lm ":1367,"ll ":1537,"lit":10843,"lis":3477,"lir":181,"lip":665,"lio":847,"lin":3950,"lim":4821,"liz":3596,"liv":266,"liu":800,"lic":6840,"lid":584,"lia":5811,"lib":815,"lk ":118,"lik":113,"laţ":3598,"lil":379,"lii":609,"lig":1016,"laş":630,"lie":5757,"lif":704,"ma ":3957,"mb ":1486,"mac":332,"mai":9505,"maj":528,"mad":521,"mae":84,"mag":1419,"mar":6545,"mas":1035,"mal":1356,"mam":301,"man":9186,"max":174,"mat":6955,"mba":2864,"mbl":374,"mbi":1641,"mbe":588,"mbr":9100,"mbo":699,"me ":5049,"mbu":511,"med":2490,"meg":129,"mea":1453,"mec":632,"eş ":976,"met":2646,"mes":1043,"mer":5757,"mem":2715,"mel":4206,"men":12391,"mei":953,"hă ":127,"mez":105,"mex":145,"mfo":85,"luz":516,"lva":1356,"lve":558,"lvi":203,"lul":6901,"luj":787,"lui":26427,"lup":481,"lun":1806,"lum":1944,"lut":376,"lus":1014,"lur":909,"lux":157,"luv":232,"ly ":423,"ltă":720,"luţ":840,"mpi":1771,"mpe":1922,"mpr":847,"mpo":2882,"mpl":2315,"mpu":2400,"mps":120,"mpt":100,"ms ":246,"mog":102,"moc":469,"mob":459,"mod":2074,"mon":3526,"mom":281,"mol":693,"mov":314,"mor":1244,"mos":450,"mot":668,"mou":110,"mpa":3076,"mnâ":163,"miş":960,"miţ":248,"mto":98,"mst":145,"mnă":376,"moş":151,"my ":245,"mur":1183,"mus":438,"mut":273,"mpă":755,"mul":9511,"mun":7969,"muz":1842,"eşt":6641,"eşu":215,"eşa":195,"mi ":621,"eşi":561,"mbă":561,"eşe":1204,"min":5735,"mio":138,"mil":3851,"mir":1594,"mis":1523,"mit":6241,"miu":551,"miz":90,"mix":115,"eţ ":436,"mic":3196,"mia":589,"mig":156,"maş":321,"mif":173,"mie":1057,"mid":167,"maţ":1711,"mij":400,"mii":918,"mo ":311,"mn ":1067,"eţu":4047,"eţi":1636,"eţe":1089,"eţa":97,"mm ":112,"moa":417,"mnu":348,"mni":697,"mna":983,"mne":568,"mmy":134,"mp ":917,"mmo":100,"mma":189,"meş":407,"mme":257,"ăţă":263,"vă ":1622,"văr":375,"văz":128,"sţi":293,"văţ":340,"ziţ":770,"zvo":1221,"zua":152,"zur":207,"zul":782,"zut":213,"zz ":105,"rşi":489,"zi ":1628,"vâr":386,"vân":1791,"zec":677,"zei":499,"zea":690,"zdu":96,"uă ":1773,"zeu":674,"zes":98,"zen":2310,"zel":386,"zer":601,"ze ":1470,"zbo":1342,"zbu":165,"zf ":120,"zac":138,"zah":127,"zam":95,"zan":432,"zar":1712,"zau":120,"zat":4549,"zoa":213,"zot":117,"zor":484,"zom":97,"zon":1721,"zol":318,"zof":297,"rţă":100,"zo ":122,"rţe":443,"rţa":169,"rţi":2629,"rţu":83,"rţ ":87,"uăz":94,"zib":96,"zia":1295,"zie":586,"zid":234,"zic":2773,"zii":433,"zin":2192,"zim":92,"zil":806,"zaţ":846,"zio":247,"zir":91,"zis":507,"zit":1014,"ziu":1413,"yst":189,"ysi":92,"ys ":152,"tăţ":3951,"yon":90,"za ":1974,"ywo":86,"ye ":94,"tă ":27261,"yer":209,"ya ":253,"yan":149,"yn ":128,"yle":187,"yo ":86,"yne":94,"tăl":523,"tăn":89,"tăm":184,"tăz":367,"tăt":383,"tăr":1670,"tăp":108,"yin":96,"tîn":184,"xt ":219,"xtr":935,"xtu":126,"xte":487,"xti":276,"xpr":511,"xpo":137,"xpl":405,"xpe":325,"xon":265,"ăţi":4022,"ăţe":252,"ăşe":185,"ăşi":118,"ăşo":168,"ăşu":379,"ăşt":194,"xul":217,"xua":180,"săm":195,"săn":94,"săi":149,"tât":368,"târ":286,"tân":1123,"să ":6878,"tâi":116,"tâl":280,"tâm":92,"xem":547,"xer":167,"xec":276,"xel":184,"pţi":497,"xis":976,"xil":150,"xim":851,"xid":122,"xic":439,"xig":85,"săr":830,"săs":204,"săp":211,"săt":291,"său":989,"xcl":154,"xce":187,"xe ":275,"xat":91,"xan":620,"xac":149,"ww ":168,"www":168,"oţi":926,"oţe":90,"wn ":181,"ws ":286,"wre":154,"wor":208,"woo":193,"răş":270,"răţ":90,"sân":187,"ră ":10046,"wer":171,"wel":112,"wei":114,"web":177,"oş ":108,"răb":152,"răd":292,"răc":168,"răg":121,"răj":88,"răi":753,"răn":163,"răm":369,"oşt":362,"oşu":230,"oşa":206,"oşi":435,"sâr":193,"wis":119,"wig":94,"wic":83,"win":151,"rău":112,"răs":796,"răt":330,"răr":765,"răz":877,"wa ":123,"wan":150,"wal":599,"way":113,"war":714,"viţ":222,"viş":91,"vro":102,"vri":167,"vre":765,"vra":124,"vsk":216,"vut":1014,"vul":660,"vy ":150,"via":1245,"nţ ":202,"vio":597,"vir":425,"vaţ":256,"vil":1004,"vin":4712,"vig":218,"vii":288,"vic":1489,"vid":1022,"vie":1344,"viz":1818,"viu":343,"vit":1797,"vis":1422,"nţa":4036,"nţe":2750,"nţi":4753,"nţu":270,"vo ":269,"veţ":303,"veş":233,"voa":145,"voc":768,"vod":247,"voi":344,"vol":2372,"von":307,"vor":1265,"vot":169,"vos":108,"nţă":2104,"vi ":279,"rât":86,"râu":1467,"râr":88,"vez":158,"ver":5755,"ves":2284,"vet":135,"râm":188,"rân":915,"vei":383,"veh":194,"veg":396,"ven":3368,"vel":1246,"vea":1522,"ved":733,"vec":1443,"ve ":2034,"val":1848,"van":2141,"var":1789,"vat":1229,"vas":355,"vaz":94,"vac":381,"vad":175,"vai":89,"uză":207,"uzâ":97,"va ":4378,"usţ":288,"uzu":128,"uzi":2633,"uvâ":532,"uze":512,"uza":616,"ută":2040,"uxe":267,"usă":1693,"uz ":167,"ură":2750,"upă":3161,"ux ":398,"uvi":580,"unţ":1150,"uve":1576,"urâ":83,"ună":2252,"ush":108,"usi":1228,"use":1795,"usc":466,"usa":280,"uu ":89,"usu":380,"ust":4305,"uss":282,"uso":94,"ulţ":391,"uth":338,"uti":1787,"ute":2406,"uta":1302,"utt":99,"uts":136,"utu":1616,"uto":3287,"utr":237,"us ":5190,"ulă":561,"ut ":7246,"urb":404,"ura":6173,"urd":294,"urc":802,"ure":4226,"umă":1226,"urg":1586,"uri":11434,"urk":100,"urm":1472,"urn":1161,"uro":4600,"urp":138,"urr":101,"urs":2050,"urt":1427,"uru":770,"ury":127,"urz":103,"unz":236,"upa":1044,"ur ":1460,"păş":90,"upi":117,"upe":1889,"upo":171,"upr":3078,"upl":134,"upt":549,"upu":1021,"ump":261,"umu":1472,"umi":4694,"umo":274,"umn":200,"uma":1213,"umb":1802,"ume":7127,"unt":4936,"uns":365,"unu":4147,"unk":173,"uni":10730,"uno":4024,"unn":89,"unc":3006,"und":2612,"una":3959,"ung":1996,"une":10686,"up ":833,"ucţ":871,"um ":3644,"ulu":21791,"ult":5763,"uls":125,"ulp":273,"ulo":572,"ulm":181,"ull":195,"uli":2417,"ugă":238,"ulg":558,"ule":2260,"ulc":332,"ula":6764,"ulb":106,"un ":26513,"uid":145,"uie":791,"uaţ":284,"uil":227,"uin":534,"uir":346,"uis":305,"păt":304,"păs":368,"uk ":108,"mţ ":178,"uia":763,"uit":3995,"ul ":62901,"ucâ":92,"ugh":328,"ugi":103,"ugb":125,"uge":384,"ugo":240,"ufu":127,"ui ":29161,"uga":410,"păi":109,"ucă":1628,"păd":164,"păr":2478,"păm":252,"ugu":1856,"uha":84,"uj ":675,"uco":175,"ucr":1114,"uct":1068,"ucu":1790,"uda":563,"ude":4922,"udi":2446,"ubo":209,"ubm":96,"ubs":626,"ubt":149,"ubu":425,"uca":1046,"ue ":469,"uce":1968,"ucc":519,"uci":711,"uch":237,"ucl":385,"uck":119,"uer":254,"ues":246,"pă ":4354,"uff":102,"ufe":113,"ufi":109,"ufl":99,"udu":394,"udo":402,"ug ":96,"ued":469,"uea":236,"uen":1166,"uel":339,"ub ":2463,"tuţ":713,"ua ":1456,"uat":4356,"uar":3767,"ual":1937,"uan":471,"ubi":491,"ubl":4031,"ube":197,"uba":209,"ubc":93,"ubd":102,"ud ":2013,"uad":93,"uc ":293,"tze":90,"tyl":114,"tuş":101,"ty ":780,"twa":283,"trâ":424,"tur":9303,"tus":270,"tut":920,"tui":1608,"tul":18833,"tun":827,"tum":175,"tua":5218,"tud":1979,"tue":106,"tug":400,"tz ":249,"two":138,"toţ":88,"tră":1759,"toş":149,"ts ":552,"tiţ":743,"tiş":169,"tre":13329,"tt ":332,"tra":10048,"tri":7435,"tru":15106,"tro":4167,"tu ":332,"tsc":140,"tsu":91,"tst":102,"tta":206,"tte":678,"tti":236,"ttl":111,"tto":150,"ttp":214,"tts":95,"teş":321,"tme":92,"tma":135,"to ":1187,"tmo":189,"tni":304,"tne":121,"tp ":211,"tna":149,"tno":89,"tof":98,"tod":1039,"toc":696,"toi":238,"tog":562,"tob":142,"toa":4019,"tou":153,"tov":167,"tos":511,"tot":1088,"tox":87,"tom":2691,"ton":3163,"tol":1579,"tor":17808,"top":634,"tr ":2261,"tii":1559,"til":2704,"taţ":1027,"tif":641,"tie":3100,"taş":87,"tig":849,"tir":1365,"tit":3982,"tis":1483,"tin":5976,"tim":3231,"tip":1320,"tio":2187,"thu":1487,"tia":853,"tib":219,"tic":12965,"tid":1357,"tiz":272,"tiu":313,"tiv":7387,"lţi":875,"tli":252,"tlu":762,"tla":333,"tle":501,"tem":5382,"ten":2986,"teo":833,"tep":232,"tei":2254,"tej":251,"tel":8378,"tee":83,"tef":321,"teg":901,"teh":698,"tea":10690,"teb":131,"tec":1715,"ted":479,"pân":2359,"tfo":164,"tfe":489,"tfa":104,"th ":1080,"tez":721,"tex":467,"tev":339,"teu":83,"tet":387,"tes":961,"ter":17000,"ti ":4750,"pâr":90,"tho":249,"thr":172,"the":1255,"thi":201,"tha":314,"ăpi":117,"ăr ":639,"ărg":110,"ări":6448,"ărc":171,"ăre":1669,"ărb":346,"ăra":1104,"ăpu":119,"ăpt":200,"ălă":333,"ămâ":1238,"ăsp":468,"ăsu":674,"ăst":933,"ăta":498,"ăte":138,"ăpâ":120,"ăti":182,"ălţ":396,"ăto":3911,"ăro":211,"ăru":1791,"ărt":197,"ărs":101,"ău ":1645,"ăsc":1578,"ăse":603,"ănă":998,"ăsi":241,"ăut":251,"ărâ":207,"ătu":1360,"ătr":2034,"ăud":174,"ăul":112,"ăsă":746,"ără":1000,"ăzu":203,"ărţ":804,"ăzi":480,"ăze":118,"ăzd":88,"ăzb":1237,"ătă":689,"ăuţ":165,"ăce":168,"ăca":435,"ăci":357,"ăcu":469,"ăde":218,"ădi":504,"ădu":392,"ăi ":268,"ăia":213,"ăie":323,"ăcă":141,"ăl ":131,"ăin":267,"ăit":151,"ădă":164,"ăil":231,"ăgă":140,"ăle":134,"ălc":90,"ăld":111,"ăla":258,"ălb":84,"ăma":126,"ămi":144,"ăli":530,"ălu":247,"ăne":410,"ănc":84,"ăni":157,"ză ":7802,"zăr":410,"zăt":197,"zău":117,"zân":327,"ürt":162,"uş ":157,"xă ":377,"uţi":2814,"uşu":97,"uşt":92,"uşo":225,"uşe":140,"uşc":117,"uşi":662},"n_words":[8213281,9706468,7977386],"name":"ro"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ru b/nlp_resource_data/langdetect/profiles/ru

new file mode 100755 (executable)

index 0000000..228e71e
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ru
@@ -0,0 +1 @@
+{"freq":{"D":12949,"E":10306,"F":8444,"G":9635,"A":18320,"B":11812,"C":18729,"L":10133,"M":18025,"N":9350,"O":8480,"H":7852,"I":36250,"K":4555,"U":4649,"T":15138,"W":7160,"V":11078,"P":15071,"S":24620,"R":10933,"X":14985,"f":12270,"g":18709,"d":30289,"e":107154,"b":13058,"c":32324,"a":88793,"n":72221,"o":77565,"l":46717,"m":27155,"k":11440,"h":29689,"i":78874,"w":9055,"v":9950,"u":33414,"t":65000,"s":53078,"r":72396,"p":19491,"z":3846,"y":15658,"x":6168,"́":88446,"ь":418052,"э":80439,"ю":190059," o":3966,"я":756306,"ш":164289,"щ":122233,"ъ":10879,"ы":569718,"ф":163421,"х":364215,"ц":246632,"ч":362951,"р":2010938," a":3919,"с":1933070,"т":1865754,"у":741481," t":5450,"ё":73990,"И":44043,"Л":43881,"К":96587,"Н":57501,"М":84561,"П":106733,"О":53561,"Б":68591,"А":88041,"Г":58489,"В":87514,"Е":15516,"Д":50036,"З":22723,"Ж":9219,"Ш":21342," H":6358," I":14743,"Ю":9600," N":6330,"Я":13672," O":4736," L":7278,"Э":22580," M":13492,"Т":45273," B":8977,"У":23856," C":13678,"Р":75819," A":12906,"С":148731,"Ц":10320," F":6714,"Ч":15862," G":7609,"Ф":36383," D":9169,"Х":20421," E":6845,"л":1373509,"к":1271742,"й":669125,"и":2990916," X":9849,"п":840034,"о":3520831,"н":2386826,"м":997958,"г":667312," S":16699,"в":1520663," R":8106,"б":419069,"а":2917550," P":10356," W":6218,"з":510343,"ж":242497," V":5002,"е":2737376,"д":981077," T":11223," А":74197," Б":66425," В":84271," Г":55440," Д":46951," Е":13561," Ж":8620," З":21445," И":40121," К":90300," Л":41939," М":81442," Н":51890," О":47190," П":101697,"Co":3768,"I ":12785," б":96560," а":114180," г":188817," в":459004," е":32581," д":177939," з":81625," ж":31277," й":4814," и":369580," л":56771," к":233167," н":240854," м":177553," п":455990," о":265653," Р":63171," С":125419," Т":41894," У":21149," Ф":32286," Х":19430," Ц":9599," Ч":15364," Ш":14654," Э":20859," Ю":9415," Я":13422,"C ":3880," т":131031," у":81588," р":185096," с":401251," ц":26307," ч":72261," ф":71734," х":30938," ш":31141," ю":9238," я":35568," э":51707,"Ma":3809,"II":10412,"VI":3879,"Th":4668,"X ":5588,"S ":3829,"a ":12175,"i ":4290,"he":9109,"g ":4678,"ea":4162,"ec":3745,"de":5482,"h ":4087,"el":5609,"en":10589,"et":4586,"es":8452,"er":17575,"e ":29556,"f ":4058,"ch":4543,"ce":3891,"c ":3917,"d ":10245,"at":8929,"as":4202,"ar":11277,"al":8675,"am":4333,"an":13622,"ac":3987,"nt":7777,"ns":3721,"of":4484,"om":4497,"on":14150,"ol":4531,"os":3740,"ou":4174,"or":10679,"r ":11184,"ll":5038,"o ":6694,"ma":4022,"me":5234,"na":4836,"nd":7645,"ne":5797,"ng":6066,"ni":4386,"m ":5088,"li":5421,"le":7129,"la":5868,"n ":18576,"ic":8588,"ia":4454,"ie":3873,"k ":4036,"is":6529,"it":5343,"il":4505,"in":14086,"io":6997,"l ":9526,"y ":8801,"ve":4522,"x ":4062,"ur":4265,"us":4229,"to":4590,"te":9298,"ti":9123,"th":7132,"ta":6068,"st":7180,"ro":7763,"ri":8232,"re":8155,"ra":7850,"t ":13376,"s ":22614,"́в":5893,"́д":3913,"́й":4535,"́м":3791,"́л":9850,"́н":15831,"́р":9052,"́с":4186,"́т":6032,"К ":4112,"О ":4223,"А ":10572,"В ":18440,"ья":15305,"ью":22763,"эл":12007,"эк":10864,"эр":3980,"эн":8177,"эт":23791,"юб":4435,"юг":4684,"юд":5402,"юз":4895,"юл":6476,"юн":7127,"юр":4992,"юс":3863,"ют":24842,"юч":8444,"ющ":38941,"яж":4621,"яе":26767,"яд":10334,"яв":18798,"яб":17686,"ян":29130,"ям":15306,"ял":9628,"яз":22046,"ях":9639,"яс":6449,"ят":41978,"яр":7070,"яю":12156,"ящ":14702,"щи":42463,"ще":53089,"ща":18897,"ъе":8051,"ый":115257,"ык":21356,"ые":44083,"ыд":3688,"ыв":20957,"ыш":9845,"ыч":6916,"ых":88893,"ыт":10971,"ыс":15933,"ыр":7427,"ып":8198,"ын":9614,"ым":40979,"ыл":22285,"ьк":10517,"ьд":4034,"ье":17860,"ьз":12362,"ьб":9420,"ьш":16573,"ьс":28658,"ьт":14115,"ьн":93902,"ьм":17794,"хе":5192,"хи":15842,"хн":9628,"хо":65229,"хр":8875,"хс":5190,"ху":8026,"фф":4256,"ха":22339,"ци":120621,"цк":12640,"цо":4729,"цу":6546,"ца":29752,"це":46729,"чл":3954,"чн":40293,"чи":44063,"чк":5394,"чт":9754,"цы":8412,"че":142550,"ча":74585,"шо":6953,"шн":6580,"шк":7840,"шл":7340,"ши":40508,"шт":17550,"ше":40253,"чё":6388,"ша":19003,"ск":366421,"см":15604,"сл":64229,"со":139920,"сн":41282,"ср":13223,"сп":78471,"св":31144,"рё":5020,"сб":4953,"се":102280,"си":96102,"рш":7321,"ры":45781,"рь":12146,"са":63371,"ря":46850,"рр":14529,"рс":52200,"рт":57700,"ру":105041,"рх":14747,"рц":4365,"тн":65472,"тм":3983,"тл":9354,"тк":17989,"тс":84335,"тр":155968,"то":273813,"те":235202,"тд":4232,"тв":124455,"сё":6893,"ти":199676,"сы":9941,"сь":18264,"та":266195,"ся":89398,"тб":4549,"су":38119,"сф":3977,"сс":88379,"ст":540751,"сч":5733,"сш":4240,"сх":8473,"сц":3788,"ур":58652,"уп":39068,"ут":30026,"ус":56106,"ум":19318,"ул":47107,"ун":41314,"уи":4250,"уз":23986,"ук":30175,"уд":42687,"уг":33231,"уж":19840,"уе":15435,"уа":10851,"тя":20201,"уб":26698,"ув":5212,"ты":37440,"ть":74835,"тч":3850,"тт":6006,"ту":57964,"фу":8016,"фр":15690,"фо":29756,"фи":45878,"фе":25068,"фа":16306,"ую":32657,"ущ":17182,"уч":38493,"уш":8215,"ух":10059,"ёт":8989,"ён":22517,"ёр":12412,"ёл":4881,"ём":5538," II":3728," Ma":3710,"а ":616120,"Р ":10559,"С ":6434,"Ис":5682,"Ин":6739,"к ":120218,"Из":4203,"Ив":3823,"й ":507309,"Ле":11105,"Ли":8362,"Ла":6540,"Ку":8515,"Ко":21705,"м ":237652,"Кр":10087,"Ки":8491,"Ка":27548,"л ":64722,"На":20616,"Не":6593,"Ни":9075,"Му":3926,"Мо":21246,"о ":404148,"Ма":25177,"Ми":12922,"Ме":9781,"Ло":5700,"н ":135483,"Лу":3901,"Па":15703,"Пе":17521,"Пи":4313,"Пл":3876,"с ":91974,"По":29660,"р ":85184,"Ос":6849,"Ор":3751,"От":4582,"Об":5237,"Он":3901,"Ол":8342,"Но":9337,"п ":8470,"в ":366455,"Ам":4416,"Ан":11755,"Ал":14756,"Ав":5862,"Ба":12391,"Ар":8812,"б ":7229,"д ":64292,"Во":11807,"Ве":14286,"Ви":9847,"Га":7333,"Бо":12192,"г ":35799,"Бр":14227,"Бе":12321,"Би":3781,"Ва":10241,"Бу":4848,"Ди":3742,"Дж":8980,"Де":6705,"ж ":6087,"До":7659," Th":4595,"Ев":5669,"Ге":11433,"Гр":11587,"Го":10198,"е ":450788,"Да":6449,"и ":574901,"За":11852,"з ":53551,"ь ":133456,"е́":15696,"ы ":130752,"и́":16589,"ё ":6265,"Яв":5487,"ю ":63842,"я ":490850,"Эт":4232,"Ст":11217,"Су":5933,"Та":9444,"Ти":3758,"Те":7001,"ф ":6051,"То":6501,"Тр":6028,"Ту":4280,"Ук":4529,"х ":211491,"Пр":23361,"Ра":11292,"Ре":10364,"Ри":7652,"СР":8553,"т ":206463,"Ро":21953,"Ру":5315,"СС":13685,"СШ":6418,"Са":18244,"Св":4560,"Си":8236,"Се":17477,"у ":114144,"Со":23127,"Це":5862,"ш ":6132,"а́":24040,"Че":6673,"ША":6418,"ц ":11865,"Фр":9086,"Фе":6645,"Фи":4961,"Ха":6486,"ч ":21859,"Хо":3899,"ль":255358,"лы":10808,"лю":22731,"мб":8994,"ля":104684,"ма":130982,"лё":9664,"ме":172608,"ми":140084,"мл":6471,"лл":23806,"лн":13959,"ло":163517,"лс":10825,"лт":4507,"лу":35325,"ла":158459,"лж":4211,"ле":205849,"лд":3683,"лг":5390,"лк":10879,"ли":255185,"км":7931,"кн":9321,"кк":4284,"кл":31050,"кр":60959,"кс":31731,"ко":383857,"кт":66101,"ку":41628,"кц":9152,"ка":219076,"кж":14683,"ки":210538,"кв":18587,"ке":35320,"йн":21970,"йо":19765,"йр":3918,"йк":4778,"йл":4612,"йш":7189,"йс":69884,"йт":6056,"ия":163126,"ию":29435,"ищ":4465,"иш":6262,"у́":5838,"йд":4105,"ио":61311,"ип":31810,"им":121890,"ин":226639,"ик":130372,"ил":122490,"ии":133366,"ий":180537,"иц":62824,"ич":108247,"иф":12791,"их":84356,"ит":178471,"иу":4714,"ир":73443,"ис":176435,"ри":235653,"рк":23089,"рл":11046,"рм":37955,"рн":63599,"ро":363111,"рп":4622,"ра":429505,"рб":11729,"рв":24770,"рг":34028,"рд":18157,"ре":279331,"рж":10477,"пь":4901,"пы":11436,"пр":223152,"пп":19192,"пт":6272,"пс":5990,"пу":32229,"пи":56651,"пн":7871,"по":247631,"пл":31561,"ою":7698,"оя":26385,"па":81900,"пе":95232,"ощ":8932,"ош":11972,"оч":34323,"оц":13939,"оэ":5861,"ос":271102,"ор":307758,"оп":60188,"оо":12934,"ох":15431,"оф":19467,"оу":5082,"от":162519,"ок":93705,"ол":228863,"ом":207667,"он":199509,"ож":50393,"оз":54318,"ои":31770,"ой":205541,"ов":359690,"нё":5120,"ог":196172,"од":289964,"ое":90767,"ня":35079,"оа":5488,"об":119246,"нь":18690,"ны":243593,"нц":33951,"нч":4962,"нт":90634,"нс":97833,"нф":7203,"ну":26904,"но":402371,"нн":163382,"нр":4657,"нк":27682,"нз":4096,"ни":399245,"не":163042,"нг":32092,"нд":56240,"мё":3976,"нв":7556,"на":409072,"мя":17291,"мь":4683,"мы":32208,"му":54274,"мс":10476,"мп":42062,"мо":86128,"мн":19897,"мм":15488,"ге":39235,"гд":7818,"ги":64206,"гн":5587,"го":307537,"гл":27873,"гм":4198,"гр":76619,"гу":22493,"дв":21897,"да":163117,"вг":7437,"ве":186015,"ви":112054,"вк":13130,"вл":60895,"вм":3919,"вн":71300,"во":212143,"вп":4179,"вр":34804,"вс":46803,"ву":21309,"вт":16905,"вх":16990,"вш":21902,"вы":73241,"вь":4164,"га":61976,"вя":19117,"вё":4031,"би":27123,"бе":43659,"бр":63607,"бн":9059,"бо":89921,"бл":44500,"бу":21528,"бс":7153,"ва":210954,"бы":35755,"бъ":8402,"бщ":13191,"ад":83035,"ае":38603,"аж":22477,"аз":106308,"аб":41193,"ав":144996,"аг":30571,"ам":106564,"ан":372615,"ап":45088,"аи":20853,"ай":49922,"ак":101386,"ал":220430,"ах":49626,"аф":15259,"ач":35451,"ац":44113,"ас":161556,"ар":181597,"ау":19997,"ат":182329,"ба":30240,"ая":142316,"аю":29293,"ащ":9761,"аш":12824,"зс":6594,"зр":12483,"зу":17726,"зк":4107,"зи":46141,"зо":48202,"зн":42107,"зм":17172,"зл":7624,"ив":68972,"иг":39987,"зя":3693,"иа":42640,"иб":20393,"иж":14048,"из":124756,"ид":43626,"ие":128645,"зы":33503,"жо":5749,"жс":4009,"жу":6473,"жи":33668,"жн":29430,"за":99672,"зв":49033,"жё":3790,"зг":3804,"зд":32492,"зе":22277,"еф":7165,"еу":4109,"ет":190550,"ес":211743,"ер":322951,"еп":23032,"ео":22288,"ен":392846,"ем":131477,"ел":205217,"ек":97566,"ей":99541,"еи":5939,"ез":47927,"еж":36786,"ее":34825,"жд":33436,"же":89890,"жа":21003,"ея":12292,"ещ":8165,"еч":24475,"еш":11291,"ех":17181,"ец":24531,"дс":40426,"др":39150,"ду":60618,"дн":71944,"дм":11329,"дп":4848,"до":102671,"ди":135662,"дл":31466,"дк":7688,"де":155382,"дд":3909,"о́":17497,"дж":10265,"еб":21454,"ев":101298,"дё":3920,"ег":62011,"ед":133360,"дь":6956,"еа":12623,"дя":7893,"дш":3897,"ды":16920," Яв":5485," ар":13568," ба":10710," ав":18221," ад":9137," ал":10374," ак":9085," ан":12535," ам":9443," ап":7635," бу":7250," ва":6199," бы":23597," би":6156," бе":13045," бр":8596," бо":19871," бл":6242," вт":4810," вх":16859," га":5609," вы":32582," ви":13232," ве":27020," во":59783," вс":13316," вр":12829," вл":4862," вк":4169," вн":5530," дв":15611," да":12181," гу":4609," го":108387," гл":8578," гр":29774," ге":11208," ги":5523," ед":6587," ег":8307," до":35767," др":17969," ду":6602," де":43890," ди":14485," дл":25835," же":14087," за":57830," зв":5213," жи":10062," зн":7274," иг":13915," из":63621," ил":26819," ин":23235," им":21022," ис":39365," их":4602," ию":10395," ка":46349," ки":10920," кр":19920," ко":101579," кн":6962," км":7101," кл":11050," ку":8839," ла":5400," ли":22008," ле":16122," ло":3818," ме":49176," ми":21788," лю":4902," ма":35222," мо":29145," мн":7010," му":26862," ни":8598," не":49337," на":155501," но":20718," ок":24600," он":5812," од":28184," об":63971," от":54560," ор":15811," ос":31704," оп":15294," по":164771," пл":14871," пи":11060," пе":48196," па":21414," Ре":10285," Ра":11205," Ро":21862," Ри":7649," Пр":23192," Пе":17464," Па":15586," с ":51024," По":29407," Пл":3863," Пи":4292," От":4556," Ос":6825," Ор":3729," Те":6933," Ти":3719," То":6441," Тр":5998," Ст":11107," Су":5919," Та":9379," Св":4548," Си":8202," Се":17430," у ":4211," Со":23029," Ру":5277," СС":6857," СШ":6404," Са":18190," Фр":9072," Фи":4937," Фе":6613," Ук":4507," Ту":4270," Це":5856," Хо":3885," Ха":6470," Че":6653," Эт":4213," Ба":12348," Ар":8769," в ":244550," Ан":11697," Ам":4402," Ал":14708," Ав":5843," Ва":10204," Бу":4830," Бо":12018," г ":7009," Бр":14207," Бе":12299," Би":3719," а ":10629," Ев":5656," Ди":3719," Дж":8956," Де":6645," До":7584," Га":7281," Ве":14245," Ви":9808," Во":11759," Да":6384," Ге":11389," Го":10155," Гр":11539," Ис":5607," Ин":6668," к ":15541," Ки":8443," Ка":27418," и ":157390," За":11752," й ":4397," Ив":3814," Из":4180," Му":3903," Мо":21203," о ":7496," На":20476," Не":6542," Ни":9034," Но":9287," Об":5188," Он":3896," Ол":8330," Ко":21565," Кр":10028," Ку":8476," Ла":6504," Ле":11080," Ли":8319," Ло":5687," Лу":3887," Ма":25091," Ме":9720," Ми":12850," В ":17470,"II ":7232," ра":87254," ре":41392," ро":33130," пр":177960," пс":3748," пу":8711," св":25177," си":22645," се":47025," сл":16395," см":6218," ск":7720," сп":19254," ср":11080," сн":4707," со":89660," ру":13053," са":14863," ти":6431," те":38937," то":20252," тр":22707," ст":53938," су":12938," та":27006," ук":4204," ус":11044," уп":6557," ур":3864," ун":4890," ул":9872," ту":5700," фо":12379," фр":10534," фу":6744," фе":9085," фи":20989," фа":9016," уч":17593," хр":4873," хо":7233," ху":6092," ха":4633," ци":3784," це":18264," чт":6942," чи":6042," че":25547," ча":26885," ша":3808," шт":13545," эк":9725," эл":8825," эт":18390," юг":4276," ян":5667," яз":10931," яв":12558,"аз ":5770,"ад ":7406,"Явл":5341,"ав ":5373,"ам ":14841,"ан ":31178,"ак ":26116,"ал ":21311,"ай ":4239,"авш":9597,"авт":9645,"ага":8115,"аго":7572,"ада":10767,"ади":12630,"аде":10895,"адм":7561,"адн":6641,"адь":3777,"аем":7453,"ает":25830,"аже":6271,"ажд":5556,"аба":3999,"або":17739,"абр":6755,"ава":11200,"авн":21035,"авл":26690,"авс":4043,"аво":12671,"аве":7803,"авг":5378,"ави":21691,"ало":13331,"алл":5996,"ала":22307,"али":48595,"але":13975,"амм":7674,"амо":8686,"амы":4469,"аля":5952,"ама":7226,"аль":73407,"ами":28699,"аме":19983,"анн":37419,"ано":18426,"анс":39188,"ант":20680,"анц":15483,"аны":7448,"ань":4229,"ана":29768,"анд":25091,"анг":9722,"ани":99562,"ане":11955,"анк":9278,"азр":7025,"азо":13307,"азн":9789,"азл":4533,"ази":10605,"азе":3818,"азд":5347,"азв":17619,"аза":10657,"аин":6236,"аиб":4114,"азы":8101,"айс":4889,"айо":18909,"айн":4793,"акт":16378,"ако":12308,"акж":14674,"аки":5796,"ака":8072,"ах ":31165,"ас ":6451,"ар ":4822,"ат ":18392,"ая ":136821,"ба ":4571,"Пол":8430,"Пор":6259,"При":5525,"Пре":6670,"Про":7157,"Рос":14485,"Рас":4325,"Рес":3751,"Сан":8587,"ССС":5587,"ССР":7399,"США":6399,"Сос":6600,"Сов":3947,"Сев":6002,"Сер":4458,"Ста":4121,"Укр":4080,"Фед":3907,"Фра":5981,"ША ":6384,"а́н":5689,"лам":6144,"лан":15129,"лас":29897,"лат":7877,"ме ":7744,"ля ":50911,"ма ":24448,"лав":17972,"лаг":7649,"лад":11258,"лы ":5725,"ль ":38018,"кую":3788,"кус":4925,"кул":8752,"ктя":6372,"кци":8948,"кре":6974,"кра":17934,"кри":5557,"кру":16231,"кро":7021,"лу ":5744,"кры":6225,"кса":6759,"кси":5075,"кта":5223,"кте":4216,"кти":10807,"кто":14126,"ктр":8094,"кту":4971,"кла":9863,"кло":3891,"ло ":19382,"клю":8248,"кни":4141,"ког":53446,"ков":40843,"ком":58598,"кон":29711,"коп":5499,"кор":15168,"кос":5742,"кот":33255,"кое":17818,"кой":65146,"кол":25577,"кие":12731,"кин":10160,"ким":14411,"кий":81796,"ких":35222,"ле ":19103,"кже":14673,"ли ":56435,"ква":7601,"ках":4702,"кат":7467,"кар":8017,"кам":8824,"кан":23037,"как":20319,"кал":11552,"каз":10972,"кая":36891,"каб":6283,"кад":5211,"ла ":40418,"йши":4432,"йся":5983,"йск":44468,"йст":14738,"кт ":8556,"ку ":9345,"йны":8079,"кс ":4079,"йон":18007,"ко ":19496,"км ":4431,"ки ":43486,"ке ":21167,"иям":6120,"иях":4963,"нва":6008,"од ":33146,"ная":57610,"нах":9504,"нац":5517,"нау":8200,"нач":22853,"ог ":6440,"нан":6213,"нам":8570,"нал":25338,"нат":9581,"нас":17157,"нар":16443,"нап":11298,"над":9721,"нак":5181,"наи":4066,"наз":20814,"нде":5878,"нда":8693,"нгл":9288,"нге":3994,"нга":3873,"ое ":58821,"ней":12822,"нек":4897,"нем":9855,"нен":19222,"нер":13236,"нес":9300,"нет":5791,"нег":6018,"нев":4773,"нее":5864,"нди":11172,"ндо":5836,"ндр":8570,"нив":5586,"нии":26244,"низ":11864,"ник":36850,"ний":18207,"ниг":4932,"ние":74066,"ок ":21671,"ой ":187929,"ны ":35382,"нь ":8902,"ня ":11152,"мый":5262,"мых":5149,"мыш":3735,"ов ":92702,"нт ":11498,"мпо":5172,"мпи":13008,"мпе":6698,"мпа":7653,"мот":4724,"ну ":6285,"мпь":3765,"мск":6474,"мун":14544,"мул":3800,"муз":9742,"мик":7960,"мил":4443,"мии":4851,"мич":7046,"мин":28165,"мир":17908,"мит":4140,"но ":62002,"мму":3810,"мно":11450,"мод":4772,"мог":5035,"мов":6153,"мой":4768,"мож":6250,"мон":9329,"мом":4520,"мол":6893,"мос":8228,"мор":8527,"нд ":4096,"мац":4061,"ляю":8990,"мая":9013,"лял":3785,"мал":9300,"ляе":22475,"мат":21959,"мас":5866,"ляр":4924,"мар":12011,"ман":23543,"люч":8195,"мец":4888,"мес":13130,"мет":17123,"мен":50820,"ни ":19557,"мер":29417,"мез":5962,"меж":12542,"мед":8168,"не ":38359,"лён":4990,"мы ":9798,"льн":93765,"на ":153332,"мя ":11127,"льм":14104,"льк":7925,"льз":12358,"льд":3831,"льб":7821,"лья":5362,"льш":14923,"льт":14050,"льс":23881,"лощ":4920,"му ":16323,"лок":5500,"лог":21990,"лод":5038,"лож":18221,"лор":4516,"лос":11300,"лот":8157,"лом":9527,"лон":6111,"лов":35245,"луч":9778,"луж":6113,"лся":9183,"лиг":4087,"лив":6013,"лиз":10430,"лии":15024,"лим":9347,"лий":6287,"лик":17837,"лез":6815,"лей":13214,"лев":9323,"лег":4328,"лед":13548,"лее":8350,"лес":4757,"лер":5420,"ми ":51806,"лен":60261,"лем":10309,"лек":22756,"лет":13846,"лли":5515,"лла":3879,"лле":7201,"лиц":18482,"лич":17052,"лис":14036,"лит":29052,"лин":18545,"лия":7551,"лко":3705,"пас":4125,"оящ":5066,"ояб":5483,"пад":12246,"пал":13951,"рг ":4862,"оян":3986,"пан":12047,"пар":16176,"ре ":17661,"ра ":44965,"оюз":3745,"пий":7004,"пио":5573,"пис":24111,"пла":9596,"пле":8010,"ро ":9844,"пло":9025,"пед":4071,"ри ":19004,"пер":55359,"пес":4491,"печ":4195,"пец":6528,"ори":37323,"орд":5846,"оре":20523,"орг":13852,"орс":6547,"оро":76961,"орм":17625,"орн":11312,"опу":5280,"ора":20394,"опе":10688,"опи":6454,"опо":11431,"опр":11046,"оор":3720,"опа":4525,"отд":3834,"оте":8329,"отк":7498,"отл":4401,"оти":8154,"ото":45196,"отн":11373,"отр":8447,"осу":15201,"отв":5315,"ота":11881,"осе":5320,"оси":7543,"оск":15979,"осл":18494,"осн":17062,"осо":11923,"осп":4485,"осс":25551,"ост":122012,"ору":9047,"орт":19622,"оры":19028,"оря":6038,"осв":3854,"омм":4672,"оми":12071,"оме":14471,"ома":19204,"оля":5909,"оль":52719,"олу":10916,"олн":13122,"по ":36706,"оло":54184,"олл":5951,"олж":3930,"оле":21412,"оли":27404,"ола":9666,"окр":16259,"окт":7805,"оку":5419,"око":16003,"ооб":4126,"онс":12501,"онт":8231,"онц":5168,"они":16817,"онк":3985,"оно":19353,"онн":16949,"она":44812,"онд":5190,"оне":18296,"омо":11288,"омп":18935,"ому":11330,"оше":5000,"пы ":9126,"очи":4013,"оче":6235,"очн":15161,"оща":4802,"офе":5906,"офи":7745,"оты":3797,"оце":7042,"оци":5856,"охо":7896,"нят":6570,"няе":3761,"ова":79059,"обы":7576,"общ":12061,"объ":7061,"обр":22953,"обо":16065,"обн":4315,"обл":17532,"оби":5216,"обе":11035,"ных":63297,"ные":28042,"ным":27407,"ный":86741,"па ":9132,"оит":3855,"оис":6658,"оим":4265,"ойс":6061,"ойн":7471,"оке":4198,"ока":10728,"ожи":4007,"ожн":11252,"озд":12188,"озв":5021,"ози":6006,"озн":9808,"оиз":10355,"одн":39144,"оди":52480,"оду":22578,"одр":3912,"одс":13394,"одо":20011,"оды":5756,"оед":4040,"оев":9413,"одя":6688,"оен":8911,"оек":3819,"оже":24113,"ожд":8010,"ове":40857,"овк":4823,"овл":5356,"ови":24912,"ово":42247,"овн":14676,"овр":5254,"овс":17603,"овы":17385,"ога":5667,"овя":4334,"огд":4881,"оги":20707,"огл":3937,"ого":132906,"огр":15611,"ода":58550,"оде":17121,"от ":31995,"ноя":6122,"нос":44637,"нор":3684,"нол":4077,"ном":37955,"ной":71982,"ное":29866,"ног":57728,"нод":4260,"нов":54046,"нны":69394,"нно":59505,"ор ":24212,"нни":6472,"нна":20166,"нко":5353,"он ":29089,"нкт":5008,"нка":4499,"ом ":104456,"ния":84752,"нию":8268,"нир":5623,"нис":17636,"нит":8771,"ним":17208,"нин":5963,"нич":11022,"них":9088,"ниц":23421,"нце":5996,"нци":17698,"нцу":5080,"ную":9327,"нфо":3773,"нтя":5255,"нут":4116,"нта":17028,"нте":10866,"нти":10987,"нто":10091,"нтр":15601,"нск":63751,"нст":16529,"сам":12116,"сан":12409,"ряд":7767,"сат":6375,"сво":14816,"те ":15248,"све":4519,"свя":9460,"сев":8467,"сел":21166,"ти ":46330,"сен":10794,"сем":7371,"сет":3970,"сер":16452,"сис":11359,"сит":9045,"сий":14008,"сии":9496,"син":6230,"сил":7978,"сим":6904,"скв":8290,"ски":131938,"ска":46869,"сли":3844,"сле":23336,"сла":10537,"ску":8391,"ско":157451,"сме":6005,"слу":9361,"сло":15249,"то ":29238,"сня":3968,"соб":15821,"сов":27452,"соз":9281,"сок":8059,"сом":3851,"сно":26195,"тр ":8572,"сны":3885,"спе":13580,"спа":4761,"спи":4683,"сос":21656,"сор":6482,"соо":5100,"сон":7291,"соц":4226,"сре":10218,"ту ":8116,"спу":6484,"спо":39170,"спр":6952,"рри":9506,"роц":5562,"рош":3692,"рот":12263,"роф":6754,"рох":5095,"роп":12392,"рос":28947,"ст ":16255,"рта":13460,"рст":15956,"рто":4691,"рти":13560,"рск":18805,"рсо":4243,"рси":7173,"рую":4208,"рту":7614,"рук":7417,"руг":20792,"руд":5213,"руж":6808,"руп":20038,"рус":13228,"рхи":6150,"сь ":14955,"ся ":83299,"та ":65491,"рыт":4303,"рых":5096,"рый":5524,"рые":4885,"тв ":5853,"рад":12120,"раж":10828,"раз":53144,"раб":18768,"рав":32412,"рам":17118,"ран":56502,"раи":8567,"рай":21830,"рак":10369,"рал":23925,"рах":8851,"раф":10748,"рац":10644,"рас":32059,"рат":31627,"рая":5022,"ращ":3710,"рбу":3964,"рва":4083,"пью":3789,"реб":6936,"рев":19639,"рег":20011,"ред":53953,"реа":5135,"рет":10005,"рес":14958,"реп":5228,"си ":5105,"рен":15519,"рем":25502,"рел":11664,"рек":12317,"рей":7082,"рез":11794,"реж":11533,"ржа":3754,"реч":6532,"рво":6073,"се ":6838,"рвы":9033,"рга":14493,"рге":4800,"рги":4021,"рия":13567,"рию":4048,"рио":6627,"рим":11681,"рин":20707,"рик":20481,"рил":3781,"рии":19852,"рий":6871,"рич":8657,"рит":22893,"рир":3686,"рис":16618,"рка":4003,"риа":11930,"риг":4581,"рив":4645,"рид":5404,"риз":6655,"ск ":4696,"рни":9740,"рна":12681,"рок":11103,"рол":12179,"ром":23332,"рон":16716,"рож":7867,"роз":4092,"рои":18841,"рой":14224,"ров":68815,"рог":16486,"род":57888,"рое":10921,"рны":14512,"рно":18416,"рла":4760,"рко":4371,"рми":8750,"рма":15602,"со ":3903,"ппы":6617,"пра":26585,"при":52484,"пре":53035,"ру ":6954,"про":87029,"поп":4906,"пор":17269,"пос":28755,"пот":5403,"поэ":3864,"ппа":6927,"рт ":5103,"The":3957,"под":23464,"пов":12924,"пон":8324,"пом":6183,"пол":68859,"пок":4701,"поз":9391,"пуб":8028,"пус":5784,"пут":5691,"пул":3714,"ры ":15775,"са ":16206,"ря ":29812,"рь ":4987,"вар":15705,"ват":16606,"вая":7821,"ваю":8055,"ге ":6711,"вае":10913,"вав":4587,"ван":65485,"вал":18282,"быт":4123,"быч":3846,"был":18105,"га ":14808,"бъе":7043,"бще":10673,"вы ":7062,"бур":7473,"бря":21845,"вто":16228,"все":9508,"вск":27242,"вст":7859,"вра":8893,"вре":17466,"вро":6581,"вол":13485,"вок":4441,"вой":25086,"вои":3759,"воз":12251,"вое":18754,"вод":28316,"вог":6893,"вов":10300,"вны":14251,"вор":11524,"вос":15224,"воп":3740,"вом":8183,"вни":4616,"вне":8106,"вна":13669,"вно":25418,"вля":30877,"вле":21719,"вла":3874,"го ":153532,"вкл":4066,"вка":4191,"вич":20022,"вия":4450,"виж":4984,"вил":7784,"вин":10044,"вис":5933,"вит":12981,"вид":14202,"вес":15630,"вет":22334,"вер":37523,"вен":36297,"ги ":6381,"вел":6076,"век":17069,"вед":16797,"вгу":5781,"ва ":46587,"ающ":19084,"ают":9616,"ачи":5855,"бы ":3828,"ащи":4099,"аще":3705,"ауч":3787,"аук":4642,"афи":6466,"ахо":7975,"ача":13504,"аче":11447,"аци":41912,"апр":13134,"апа":11764,"апо":3746,"апи":8487,"арх":7888,"арс":18625,"арт":22698,"арь":4529,"аря":9072,"аре":7851,"ард":6979,"ара":21925,"арн":5879,"арм":4715,"аро":18219,"ари":19835,"арл":4156,"арк":7904,"асс":17733,"аст":73387,"ась":5439,"ата":17783,"аси":4710,"асе":9811,"асл":4318,"асп":18234,"асн":7236,"ату":10612,"аты":6297,"ать":13519,"ате":41110,"ати":28209,"атн":5152,"ато":21641,"атр":7249,"бол":22933,"бом":6832,"бой":5049,"бор":14402,"бот":13748,"бно":3859,"бро":3695,"бри":6979,"бре":4187,"бра":24036,"бла":17775,"бли":15168,"бле":4490,"во ":31067,"ви ":4378,"бес":5023,"бер":15432,"бел":4899,"бит":4733,"бил":3862,"ве ":17457,"даю":4004,"дах":4951,"дан":21423,"дам":3898,"дар":18766,"дат":8984,"дви":7106,"дал":8909,"дав":5333,"ев ":8761,"дек":7455,"дей":9763,"дем":5618,"дел":26493,"ден":32485,"дер":21083,"ей ":52679,"дес":4085,"дет":5015,"дея":8136,"дву":4656,"дво":4445,"ее ":25157,"ез ":4626,"ды ":12967,"дь ":4234,"ех ":3705,"дст":24032,"дск":13686,"дро":6400,"дру":10418,"дре":7577,"дра":7639,"ет ":49624,"дун":5024,"ец ":6315,"ен ":23878,"дия":4777,"диц":4594,"ем ":32646,"диа":3739,"див":3812,"дим":5230,"дин":31699,"ел ":6867,"дио":3858,"дис":4374,"дит":31866,"дии":5066,"дил":7726,"ек ":8976,"дны":11957,"дож":6200,"дов":21587,"дос":6949,"дор":10512,"дол":8011,"док":5562,"дон":5574,"дом":8881,"дна":11456,"дни":9495,"дне":7494,"ер ":23656,"дно":27634,"для":23991,"дми":9130,"вып":7258,"вый":9785,"вым":6060,"вых":10632,"выс":8860,"al ":4210,"да ":70397,"вяз":5698,"гал":7699,"вят":8003,"ган":16467,"гар":4062,"де ":21659,"гда":5300,"вую":5927,"вхо":16916,"вше":5430,"вша":3781,"вши":12432,"вые":6905,"гон":3740,"гол":6702,"гос":13811,"гот":3744,"гор":32983,"гов":9981,"год":69990,"гру":17285,"ду ":34904,"гро":3890,"гра":40520,"гре":6189,"гус":6354,"ген":9872,"гер":7302,"ди ":8816,"гии":7384,"гио":11815,"гич":7565,"гих":4581,"гла":12247,"до ":13248,"гли":6452,"жан":5670,"еят":7722,"за ":17832,"еще":5057,"жит":5272,"жив":7938,"жис":4312,"жес":7568,"жет":3940,"жду":12980,"жел":6716,"жен":38841,"жде":11274,"жда":4735,"жск":3945,"жно":12596,"жни":4413,"жны":6194,"жур":3704,"ежи":6716,"ежд":16924,"еду":5404,"едс":18751,"еза":4036,"езн":5938,"езо":10562,"езд":5637,"ези":9501,"ева":10267,"еви":11883,"еве":16643,"еат":3835,"дящ":4359,"его":29841,"еда":12960,"еде":22810,"еди":27267,"едо":10112,"едн":12312,"евн":10381,"же ":22970,"ево":16350,"евр":9678,"евс":6183,"евы":4574,"еге":5203,"еги":14109,"ент":47594,"енс":9645,"енц":4019,"ени":132856,"ено":11907,"енн":84711,"ена":18893,"емя":7341,"ене":16352,"енд":7186,"емь":4324,"емы":10998,"еор":5073,"ены":6940,"ень":8052,"епо":3992,"ерх":5155,"ерр":10216,"ерс":17748,"ерт":9897,"ерл":3718,"ерм":10872,"ерн":25326,"еро":19985,"ери":40656,"ерк":7071,"ерг":7482,"ерж":8654,"ере":44413,"ера":37126,"ерв":20819,"ерб":7899,"ейн":5541,"ейс":18646,"еки":4636,"еко":11088,"ект":27081,"екс":15558,"ейш":5926,"ека":19196,"ели":21048,"ело":19740,"еле":41611,"ела":9361,"емл":3751,"емо":8767,"еми":9433,"ему":3888,"емп":6664,"ель":74971,"еме":23861,"еля":16614,"ема":12283,"елё":3840,"ехн":5852,"ецк":9009,"еци":7333,"еча":3853,"ечн":3687,"ече":11632,"еше":3757,"есе":5474,"еск":73489,"есн":5953,"есп":9644,"есс":15061,"ест":79162,"еся":5372,"ета":21699,"ети":10490,"ете":10421,"етр":11442,"ето":9221,"етн":10174,"етс":48640,"еты":4087,"ибо":7839,"иве":7414,"иви":3809,"ива":16306,"иал":20951,"иан":8939,"иже":6481,"идо":4070,"иев":3803,"ием":14066,"ией":9608,"игр":16265,"иго":4097,"ида":4859,"иде":14015,"иво":7871,"ивн":17940,"ивш":6456,"ига":5856,"иги":5053,"икл":4956,"икр":3804,"ико":26837,"ике":6203,"ики":16651,"ика":34115,"ийс":37107,"изм":10707,"изо":8828,"изн":6897,"изи":10371,"изд":4964,"иза":12843,"изв":18761,"ион":41500,"инц":6456,"ины":10703,"иня":5979,"иод":4236,"ине":14798,"ини":31286,"инн":3826,"ино":20897,"инс":24611,"инт":7204,"инф":4063,"ина":38691,"инд":6333,"инг":12127,"ими":15581,"име":21870,"имс":3708,"имо":9905,"имп":12615,"има":14708,"иль":23825,"или":46988,"иле":5170,"илс":4907,"илл":5632,"ило":11583,"ила":9561,"иси":5473,"иса":15121,"исх":5676,"ист":74674,"исс":10637,"исп":16317,"исо":5087,"исл":10745,"иск":11427,"ити":13816,"ите":56212,"ита":24816,"ись":6230,"иту":6461,"ито":18876,"итс":8689,"ипа":14414,"ипе":3791,"ира":11140,"ире":4647,"иру":8486,"ири":4818,"иро":27497,"иха":3800,"ихо":4447,"ице":7291,"ица":16979,"ици":25302,"ицы":6049,"ить":5325,"ифи":3877,"ичи":6000,"ичн":11017,"ича":4726,"иче":66707,"июн":5092,"июл":5316,"ка ":55202,"ив ":4402,"зав":10564,"ид ":4294,"зви":4153,"зве":16359,"зва":15033,"зац":9007,"зат":4555,"зап":11760,"зан":11015,"зам":3927,"зак":6826,"зде":5994,"зда":15915,"ие ":94285,"зво":7967,"ий ":139271,"зер":5619,"ии ":131775,"зем":4097,"из ":38626,"зид":4005,"зил":7018,"ил ":8940,"ик ":27882,"ин ":25833,"им ":24359,"зия":4934,"зит":4573,"зме":6834,"зли":4448,"зна":20529,"зни":4774,"зно":8729,"ир ":5392,"зны":4336,"зов":19642,"ис ":4567,"зон":5974,"зор":7083,"ит ":29545,"зра":8237,"зск":6535,"их ":71178,"зуе":5324,"ич ":17567,"зыв":8397,"ию ":18909,"зык":19420,"ия ":146910,"ьшо":3872,"ьши":3819,"ьше":5810,"ьян":5268,"ьют":3946,"ьма":3914,"ьна":7658,"ьни":4302,"ьно":44690,"ьны":33869,"ько":6149,"ion":5234,"ьзу":6190,"ьзо":5415,"ьту":4384,"ьст":6382,"ьск":16866,"ям ":5239,"ют ":10384,"эко":4504,"эле":8958,"это":14311,"ых ":86372,"he ":5907,"ыва":15848,"ье ":4950,"ые ":43888,"ыл ":8892,"ым ":27422,"ый ":115051,"ычн":4405,"ья ":5879,"ью ":17235,"ьбо":5724,"ьев":4427,"ьер":4030,"ьм ":7924,"ыка":9496,"ыла":5382,"ыми":11026,"ыпу":5527,"ысо":4693,"ённ":14954,"er ":6599,"es ":4533,"яза":3917,"яет":23887,"язы":12445,"явл":14387,"ябр":17050,"ях ":9267,"ютс":8148,"юте":4109,"юща":7138,"ющи":22555,"юще":8725,"юча":3971,"юля":5121,"ят ":7009,"юня":4749,"яют":4520,"яющ":7517,"яще":6980,"ящи":4896,"янс":6220,"янв":5611,"яни":5071,"ями":7921,"ярн":3999,"ято":4282,"яти":7069,"яте":9306,"уще":14055,"уча":15033,"учн":4074,"учи":4977,"уче":7765,"фес":5738,"фев":5319,"фер":4649,"ующ":10346,"фин":4428,"физ":4024,"фил":15705,"фик":3811,"фиц":4922,"фре":4266,"фра":6501,"фор":17745,"фон":4486,"ца ":19779,"це ":5162,"хан":4526,"хар":5112,"хра":4915,"хно":4200,"хни":3959,"хож":3747,"хов":4677,"ход":41602,"цы ":7957,"худ":5932,"сск":14412,"сси":32096,"ссо":7973,"сса":7005,"ссе":5970,"стн":20518,"сто":67851,"стр":70127,"ств":108223,"сте":31686,"сти":71515,"ста":85694,"сст":5885,"суд":15247,"сть":44809,"сты":4236,"сту":9860,"сущ":7215,"схо":7227,"ты ":16675,"ть ":55254,"тав":37473,"так":23239,"тал":22250,"там":8524,"тан":36504,"тай":3709,"тат":27244,"уг ":6960,"тар":13509,"тбо":4051,"тву":7862,"сёр":3801,"тво":31741,"тви":10153,"тве":41916,"тва":21790,"тех":6332,"тем":17566,"тел":76816,"тео":3852,"тен":11539,"тер":49417,"теп":3783,"тет":16644,"тек":8246,"тей":6713,"тив":26421,"тие":8972,"ук ":4128,"тка":5169,"тич":24503,"тия":7560,"тии":4630,"тий":5054,"тин":12258,"тик":14625,"тил":7099,"тир":5823,"тис":5149,"тип":5281,"тит":7029,"тки":3943,"тно":26382,"ток":11951,"тол":12417,"той":7374,"тны":17802,"тов":29921,"тог":9531,"тни":13284,"тна":5384,"ут ":4381,"тре":17053,"тра":54933,"три":18286,"тор":95430,"том":20153,"тон":9780,"ус ":4688,"точ":11868,"тоя":12927,"тст":5913,"тся":53866,"тро":41481,"тру":10653,"тск":19176,"туг":4888,"туп":6027,"тур":21906,"тью":9623,"тый":5760,"ую ":18777,"уго":4888,"уги":7037,"уга":8925,"уда":16459,"тяб":11568,"убл":8850,"убе":4682,"узы":8785,"узс":4917,"уже":5996,"ует":9543,"уем":3815,"уди":4265,"удо":10125,"уме":6894,"уль":15447,"уля":5536,"ули":11772,"укт":4529,"уко":4826,"ука":4047,"упп":14974,"упн":6779,"упр":5417,"ура":6947,"ург":8767,"уре":4620,"унк":5527,"уна":6858,"уни":18074,"уст":16092,"усс":16109,"ута":4019,"уры":4919,"урн":11401,"уро":5529,"уск":4624,"ших":7365,"шир":4576,"шин":5562,"ший":10857,"шен":11201,"шая":6983,"шта":14038,"щих":9037,"щие":5309,"щий":14604,"щин":3869,"щее":7976,"щей":4091,"щег":3871,"щен":12451,"щес":17297,"щая":10075,"щад":4691,"on ":6654,"цен":15360,"чи ":4183,"цел":4906,"цев":3998,"цес":4907,"цер":5569,"циа":13419,"ций":4974,"ции":29185,"цие":3800,"цип":12774,"цио":18812,"ция":20642,"ча ":3694,"цар":3955,"цуз":4865,"цко":4797,"цки":5412,"чем":5556,"чен":25493,"чел":10826,"чес":78218,"чер":6254,"чет":6065,"чле":3904,"чин":7894,"чив":3802,"чис":7524,"чит":8014,"ше ":5808,"чаю":4561,"час":36891,"чат":3981,"чал":9929,"чае":6846,"чны":14169,"чно":19736,"что":8092,"tio":4098,"Кар":6020,"Кра":3888,"Кор":4309,"Кон":5008,"Мар":7555,"Мин":4281,"Мос":11770,"Нас":4395,"Ник":4718,"Нов":5069,"Оли":6672,"Пар":6298,"СР ":8410,"Пет":6304,"Пер":7246,"Але":6898,"Бра":9561,"Бол":4163,"Бел":4463,"Вел":5787,"Гра":3978,"Гер":5952,"Джо":3835,"Евр":4076},"n_words":[36763344,40893832,29165701],"name":"ru"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/sk b/nlp_resource_data/langdetect/profiles/sk

new file mode 100755 (executable)

index 0000000..97f9880
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/sk
@@ -0,0 +1 @@
+{"freq":{"D":6323,"E":4459,"F":4516,"G":4726,"A":10071,"B":10822,"C":7078,"L":6128,"M":12887,"N":7988,"O":4702,"H":6933,"I":5526,"J":8422,"K":8631,"U":2767,"T":8739,"W":2331,"V":8838,"P":14691,"S":17764,"R":6826,"Z":3853,"f":24135,"g":31408,"d":146397,"e":441234,"b":79948,"c":129399,"a":438168,"n":323513,"o":480147,"l":181843,"m":157226,"j":124158,"k":216062,"h":115571,"i":290888,"w":4336,"v":227972,"u":141151,"t":244072,"s":230794,"r":287939,"p":133920,"z":92877,"y":75446,"x":5807,"²":567,"í":50255,"é":45424,"ä":4507,"á":98447,"ý":65120,"ú":38043,"ô":6119,"ó":12207,"ď":2808,"Č":2146,"č":43571,"ĺ":1392,"ľ":19160,"ň":4992,"Ž":835,"ž":30532,"ť":13944,"Š":1929,"š":30141,"Eur":843," l":8693," m":31843," n":43761," o":37444," h":15481," i":8376," j":50814," k":41715," d":25382," e":6213," f":10527," g":4634," a":59730," b":16567," c":8497," z":33097," u":8583," t":26154," v":69047," p":76912," s":70586," r":25827," J":8409," K":8577," H":6876," I":5508," N":7962," O":4629," L":6098," M":12833," B":10746," C":6977," A":10024," F":4466," G":4666," D":6268," E":4439," Z":3841,"и":615,"о":715," S":17653," R":6787,"а":792," P":14620," W":2297," V":8804,"е":558," U":2760," T":8683," ú":5607," ď":577," č":11970," Č":2146," ž":4854," Ž":833," Š":1929," š":8376," ľ":2475,"Fra":832,"A ":1884,"Da":900,"Co":1488,"Ce":608,"Ch":1551,"Do":1311,"De":1168,"Di":711,"Fa":561,"Eu":967,"Ge":866,"Ga":734,"I ":1477,"Fr":1190,"Fi":588,"II ":663,"C ":1204,"Au":625,"Ar":1181,"D ":632,"Ba":1863,"Am":652,"An":1498,"Al":1539,"Bu":812,"Br":2094,"Ca":1126,"Bi":824,"Be":1438,"Bo":1866,"Kr":1442,"Ko":2065,"Le":1477,"Li":1427,"La":1055,"Lu":551,"Lo":964,"Me":2050,"Mi":1818,"O ":889,"Ma":4427,"Mo":1742,"Ni":925,"Ne":1824,"Na":2123,"P ":647,"No":1174,"Ob":931,"Gr":1156,"Ha":1121,"He":1131,"Ho":1814,"Hr":608,"In":1551,"Ja":1230,"Je":4572,"Jo":995,"Ju":970,"Ka":2174,"Tu":851,"Tr":1334,"To":1259,"Th":927,"Ti":635,"Te":1474,"Ta":1216,"V ":2449,"St":2440,"Sv":647,"Su":614,"Wi":788,"Vy":658,"Vo":610,"Vi":1001,"Va":862,"Ve":1362,"Má":761,"Pr":3288,"S ":1036,"Pe":1293,"Pa":2196,"Pl":567,"Po":4755,"Pi":686,"Or":779,"R ":612,"Se":1430,"Sc":541,"Si":786,"Sl":3359,"Sp":1454,"So":1120,"Ru":1051,"Sa":1654,"Re":1050,"Ná":658,"Ri":640,"Ro":1422,"Ra":1481,"b ":2037,"Zá":884,"a ":144799,"Za":604,"Ze":702,"i ":37987,"fy":881,"ge":3887,"ga":3383,"bý":960,"fi":7159,"fr":2471,"ač":5846,"fu":1355,"aď":870,"ft":581,"fo":3584,"bí":670,"j ":28903,"cú":1023,"gy":705,"aľ":716,"dá":2384,"he":5122,"ha":6720,"gn":1011,"gm":704,"gl":1862,"gi":5874,"gh":727,"gu":2013,"gr":4226,"cí":1219,"go":2083,"du":5115,"dv":2815,"dy":3861,"dz":7413,"g ":2452,"ea":2540,"eb":13761,"ec":10112,"ed":25726,"de":19219,"di":12919,"dk":1536,"dm":1805,"dl":3720,"do":19760,"dn":17745,"dp":1210,"ds":3840,"dr":7033,"ew":678,"ex":2935,"eu":1904,"ev":7688,"ey":664,"ez":6532,"fa":1707,"h ":31639,"fe":2493,"bá":980,"eh":6797,"eg":2721,"ef":1680,"ee":903,"el":18740,"ek":12213,"ej":29432,"ei":1554,"ep":6024,"eo":3559,"en":52359,"em":14359,"et":17096,"es":24526,"er":36633,"ca":6242,"e ":118531,"by":4846,"bs":2133,"br":6137,"bu":4959,"bn":3071,"bo":22917,"bj":2626,"bl":4818,"bi":5066,"bc":1208,"bd":823,"be":9106,"dc":900,"db":1133,"da":10163,"f ":1519,"cu":1353,"ct":1197,"co":4858,"cn":1009,"ck":22292,"ci":21044,"ch":49042,"ce":12657,"c ":5075,"az":7180,"ay":741,"ba":5147,"d ":13711,"at":24996,"as":19402,"ar":21528,"ax":910,"av":19342,"au":4063,"ak":14537,"al":32605,"ai":1980,"aj":18129,"ao":743,"ap":5241,"am":14227,"an":46975,"ac":10837,"ad":20535,"ab":3624,"ag":2783,"ah":4710,"ae":1120,"af":1683,"nu":6191,"nt":13652,"ns":14480,"ič":3422,"jí":607,"no":31919,"nn":5955,"nz":946,"ny":10740,"hľ":938,"oe":798,"ká":9527,"of":3569,"oc":9865,"od":30890,"oa":684,"ob":22771,"jú":8749,"om":35081,"ké":8895,"on":18042,"ok":25444,"ol":27522,"oi":991,"oj":9152,"og":4545,"oh":6762,"ot":13213,"m²":561,"os":25341,"ov":73503,"ou":15818,"op":7814,"oo":910,"jč":568,"or":43666,"kú":2339,"r ":14190,"kô":554,"kó":650,"ow":1410,"oz":12370,"pe":7850,"lá":5471,"pa":10171,"ký":14124,"pl":7248,"lé":1049,"pn":1091,"po":39723,"ph":797,"pi":7377,"eň":1574,"lo":31336,"ln":9477,"hé":650,"lm":1360,"ll":2885,"ls":1845,"lu":4726,"lt":2514,"ly":3287,"o ":72538,"ma":12807,"mb":3559,"hý":981,"me":31227,"iá":2685,"ml":855,"eš":1638,"mi":19191,"mn":3217,"mm":559,"mp":2180,"mo":13618,"ií":1342,"ms":1427,"mu":6601,"ió":1393,"my":3831,"p ":1992,"na":48626,"nc":6297,"nd":6014,"ne":40950,"nf":1138,"ež":4352,"ng":5209,"ni":32826,"nk":4794,"jv":2323,"eď":654,"ju":2887,"eč":2428,"js":2102,"jn":2178,"jo":2645,"jm":1649,"jk":595,"ki":2025,"dľ":1507,"ke":14025,"gá":620,"kc":1532,"dĺ":1132,"ka":21392,"m ":45126,"ky":12637,"kt":22417,"ku":19211,"kv":1473,"ko":41039,"kr":12909,"kl":6886,"km":4376,"kn":913,"li":23938,"lh":1065,"lk":2208,"há":3987,"le":30292,"eľ":8588,"ld":1271,"la":30632,"lb":1616,"n ":17290,"hr":6312,"dí":2091,"hv":2290,"ht":1214,"hu":6079,"hi":3695,"hn":2117,"ho":33938,"hl":4308,"hm":1007,"id":6006,"ic":28125,"ib":2309,"dý":547,"ia":31792,"ih":1011,"ig":2598,"if":1907,"eá":715,"ie":44145,"hy":2860,"dô":739,"k ":16704,"ir":3821,"is":15561,"it":16315,"iu":3712,"iv":6007,"eó":677,"aš":705,"ii":2337,"ij":1891,"ik":11705,"il":11847,"im":4228,"ať":2167,"in":30878,"io":5948,"ip":2091,"je":54049,"až":3807,"ji":2876,"iz":4693,"l ":16112,"ja":8703,"xi":1668,"té":3643,"tí":5214,"tó":2119,"xt":831,"sú":8148,"z ":10294,"ož":4892,"tá":7149,"oš":1310,"wi":924,"sé":733,"sí":1587,"rč":1607,"ró":1912,"vy":11079,"rô":641,"vz":3443,"y ":38794,"rú":2323,"rý":5792,"wa":909,"we":537,"sá":670,"vl":2565,"ré":5591,"vk":1421,"nš":1355,"vi":16426,"vu":2054,"vr":2972,"vs":3111,"rí":8465,"vn":10339,"vo":27546,"uz":979,"uv":941,"oľ":2603,"ve":29704,"rá":15217,"vc":1194,"va":30715,"x ":1187,"ui":653,"uj":8555,"uk":2909,"ul":5494,"ue":827,"ug":1127,"uh":4698,"ur":8100,"us":8894,"ut":6390,"um":6737,"un":5022,"up":5983,"ty":4889,"pô":2385,"tz":568,"tu":8639,"tt":1333,"tv":9012,"ub":3487,"ua":850,"ud":5636,"uc":2067,"w ":706,"to":52129,"tn":7740,"tm":1030,"tl":2323,"ts":3310,"tr":24800,"oč":7254,"pí":1372,"pá":1327,"te":28885,"tk":5618,"ti":28975,"th":2145,"v ":48812,"ta":24342,"su":4004,"sv":4018,"ss":1524,"st":58751,"sy":3565,"sl":11132,"sk":44161,"sn":3377,"sm":2199,"sp":10573,"so":10148,"nč":1255,"sc":2214,"sf":608,"se":10443,"sh":654,"si":7301,"rz":1417,"u ":50841,"nú":2094,"sa":16525,"ný":22118,"rr":843,"rs":6494,"rt":4523,"ru":10434,"rv":5009,"nó":730,"ry":3916,"ní":8389,"rp":1158,"ro":47621,"rn":10561,"rm":4664,"né":16353,"rl":1127,"rk":2824,"ri":34707,"jš":2481,"rh":695,"iž":1338,"rg":3525,"rf":643,"ná":21179,"re":40029,"rd":2875,"rc":3598,"rb":1553,"ra":41482,"t ":11585,"mô":731,"mí":2439,"mé":722,"iť":1158,"iš":1309,"má":4691,"mä":1078,"lý":1452,"s ":19518,"lú":1099,"py":785,"pt":2178,"pu":2861,"ló":2004,"lí":2945,"pr":41771,"ps":1762,"zý":682,"zá":7603,"zé":647,"už":5423,"vš":2161,"yč":534,"zí":915,"uš":1188,"vý":14470,"sť":7157,"Hor":544,"zh":1017,"vä":2642,"zi":7079,"rš":1164,"zb":854,"zd":5109,"ze":5890,"vá":7870,"za":12274,"yz":1332,"zv":3082,"zy":2080,"zs":1637,"uč":1098,"zr":1156,"zu":1736,"zo":9017,"zn":10586,"ví":1466,"zp":940,"zk":1321,"vé":4595,"zm":4191,"zl":2181,"yh":649,"uá":1391,"yc":3925,"yd":1380,"tý":3031,"yb":1209,"tú":3411,"yv":2400,"yt":2758,"ys":5625,"yr":1431,"yp":2058,"yn":1959,"ym":2723,"yl":992,"yk":2457,"yj":557,"zť":610,"yš":1686,"ším":570,"šír":654,"šíc":1062,"ťou":1264,"² ":565,"ťah":745,"ťaž":646,"ší ":1009,"Bra":1002,"Bol":718,"ám":3217,"án":4821,"áp":2703,"áj":1169,"ák":2947,"ál":8570,"áh":1148,"áb":1278,"ác":7329,"ád":5449,"ä ":797,"áz":2921,"áv":6149,"ár":8087,"át":6184,"ás":2422,"á ":30384,"ôs":1461,"ôr":598,"óg":1873,"ód":1271,"ór":1755,"óp":1200,"ón":3191,"óm":1121,"ív":4551,"íz":1142,"ín":4699,"ím":4401,"íp":1003,"ír":2051,"ít":2918,"ís":3116,"ík":3632,"íl":827,"íc":3944,"íd":1198,"í ":13762,"áž":617,"áš":773,"ém":3465,"én":937,"ét":1878,"ér":2157,"äč":1548,"éd":556,"éc":848,"éh":11280,"áľ":830,"é ":23043,"áč":817,"ät":901,"äz":636,"úč":2819,"ýk":611,"ôž":785,"ýc":17590,"ýz":1464,"ýv":2338,"ýs":969,"ýr":1274,"ým":8307,"úz":3206,"ý ":29957,"úp":696,"ún":866,"úl":805,"úv":565,"út":1527,"ús":2474,"úr":2901,"úh":1142,"úd":1600,"úb":634,"úc":5635,"ú ":11007,"íš":617,"ôz":595,"ôv":1382,"íž":736,"Nem":547,"Nac":597,"ýš":544,"Če":1109,"či":6991,"čk":1668,"čl":1517,"če":6035,"ča":9218,"ď ":922,"č ":1105,"čn":7371,"čo":2826,"čt":541,"ču":1013,"ďa":1413,"čš":1550,"čí":3126,"Nov":633,"ľ ":1691,"ľa":5647,"ľk":2350,"ľm":577,"ľn":1481,"ľo":3506,"ľs":1492,"ľu":1677,"ĺž":1136,"ň ":1274,"š ":574,"ší":3431,"ťo":1542,"Par":649,"ťa":2465,"ť ":9577,"še":3604,"ša":1343,"šo":1393,"šp":1178,"šn":1118,"šk":1726,"ši":5749,"št":7288,"Št":611,"ňa":1527,"ňo":1237,"ňu":845,"Poľ":669,"Pro":592,"Pre":892,"Pod":721,"žs":1011,"žu":918,"žn":3570,"žo":977,"že":5249,"žd":912,"ža":964,"žk":1609,"ži":6988,"ž ":2893,"ží":3206,"Má ":570,"SA ":809,"Rak":660,"šš":674,"Je ":3173,"Kar":601,"Mes":567,"Mal":724,"Mar":1239,"Mic":549,"áľo":612,"ého":11278,"čít":911,"čís":890,"ém ":887,"éck":749,"Veľ":631,"čuj":916,"čná":722,"čné":1259,"čný":1760,"étk":856,"äčš":1545,"ému":984,"éri":720,"Zem":545,"ďal":723,"Str":817,"Sta":811,"Spo":812,"Slo":2603,"Rus":613,"čši":967,"The":619,"Tur":593,"šet":1217,"šen":585,"štr":907,"šti":599,"šta":1049,"šov":613,"ško":920,"šia":856,"šie":2695,"štá":2185,"átu":553,"bje":988,"áte":733,"bja":1624,"átk":775,"átn":563,"áto":1594,"ást":705,"bil":932,"bo ":9920,"ávi":600,"bli":1892,"ávn":1077,"bla":1662,"áva":2985,"bod":650,"bol":6856,"boj":571,"ázv":643,"ázo":1232,"bor":1466,"bov":774,"áln":5252,"álo":634,"be ":819,"áme":834,"ákl":1321,"ban":633,"bal":748,"áko":904,"án ":994,"áns":810,"bdo":679,"áno":783,"ámy":668,"ár ":1209,"bec":1805,"ber":2830,"ben":812,"bez":962,"ápa":2237,"árs":742,"bia":583,"bie":1078,"át ":1099,"áro":2670,"árn":1647,"ára":896,"ách":1879,"áci":4745,"ádz":3180,"ca ":4873,"ál ":697,"ce ":4495,"bri":844,"bro":596,"bra":2640,"bre":863,"bu ":703,"bný":856,"bsa":802,"bur":546,"bum":1025,"bud":728,"by ":1911,"ábo":552,"byv":1443,"aka":544,"am ":1578,"ake":675,"aji":1734,"ajm":1012,"ajn":753,"ajs":1164,"ajv":1703,"al ":3190,"ak ":1606,"ahu":1045,"ahr":640,"aho":724,"aj ":3272,"adá":567,"ajú":4186,"anu":914,"any":631,"ano":1953,"ann":2369,"ant":2498,"ans":3130,"ane":1786,"ang":1885,"ani":9481,"ank":901,"ana":1968,"anc":2108,"and":2251,"amo":1677,"amn":620,"ami":3340,"ame":3786,"ama":959,"alo":3047,"ali":4506,"ale":12669,"ala":2255,"alb":970,"an ":2683,"akt":2087,"ako":5403,"abs":539,"ae ":616,"aca":545,"ad ":2628,"ac ":550,"afi":807,"ado":1644,"adr":867,"adl":645,"adn":3195,"adi":1835,"ade":3266,"adu":813,"aco":855,"aci":1802,"ach":4201,"ace":1242,"ada":1861,"azo":539,"arš":738,"aze":872,"asť":2479,"azy":1600,"axi":588,"atí":1130,"az ":763,"azé":596,"ba ":1862,"azý":599,"at ":1606,"are":1302,"aná":2114,"ard":1508,"arc":1318,"ara":2212,"aro":2009,"ané":3292,"ark":750,"ari":2403,"aní":1813,"ars":1457,"art":1675,"asi":1254,"aný":3599,"aso":888,"asn":1265,"ar ":1338,"akú":915,"alá":536,"apo":817,"apr":1545,"as ":1458,"alý":759,"ava":2084,"aut":1518,"avs":622,"avo":3114,"avn":2304,"avi":3514,"ave":2415,"avy":1063,"avu":898,"av ":719,"ata":1116,"ast":8626,"atn":905,"atk":1336,"atr":1680,"ato":2184,"ate":5705,"ati":5001,"aur":606,"až ":1038,"jeh":1159,"jej":1018,"jed":5235,"jek":1206,"jem":636,"jen":1683,"ji ":665,"ažd":650,"jad":617,"jav":2393,"jan":692,"jaz":1897,"je ":42288,"jne":544,"jov":1599,"jom":624,"jin":1578,"itn":565,"ito":1341,"itu":927,"its":591,"ity":888,"isk":1618,"isl":1645,"iso":851,"ist":5989,"ita":1760,"ite":3279,"iti":1519,"ivo":1530,"ius":597,"ium":775,"iva":615,"ivi":709,"ive":1126,"is ":1825,"ion":1878,"iou":1025,"iov":556,"iro":841,"iné":635,"isc":586,"iný":615,"iu ":2243,"iná":2028,"it ":625,"izá":1011,"ja ":1196,"itý":663,"ité":878,"itá":857,"eór":603,"izo":825,"izm":1796,"ivá":562,"úry":632,"úsk":784,"úst":1144,"dĺž":1131,"úto":587,"km ":3082,"ki ":721,"úra":899,"dľa":1494,"ked":587,"keh":993,"kej":7958,"ke ":2633,"kci":1471,"kra":3230,"kre":4381,"kt ":843,"ku ":12244,"kro":679,"kri":597,"koz":582,"kov":7957,"km²":548,"kou":1936,"kos":1132,"kor":769,"kop":788,"kon":5151,"kom":7319,"kol":2625,"úzs":928,"úze":2017,"koc":987,"kni":537,"ko ":9276,"kla":4718,"jvo":598,"juh":1174,"jsk":1191,"jst":719,"ečn":971,"ju ":598,"jmä":776,"kat":1398,"kar":683,"kan":1054,"kal":813,"kam":835,"juž":746,"ka ":13070,"jvä":876," Ga":726," Ge":865," Fr":1186," Fi":580," Ha":1115," He":1127," Gr":1147," Hr":606," Ho":1810,"ha ":1403," Je":4569," Ja":1224," In":1544,"cúz":960,"han":1109," Ka":2166,"hal":560,"har":1373," Jo":992," Ju":970," La":1046," Le":1473," Li":1420," Ko":2060," Kr":1440," Ma":4398," Mi":1809," Me":2047,"he ":1247,"dá ":670," Lo":960," Lu":549," Ne":1808," Na":2120," Ni":923," Mo":1735,"dáv":586,"her":779,"hem":744," Am":651," An":1497," Al":1528," Ba":1854," Au":624," Ar":1172," Be":1433," Bi":821," Bo":1848," Br":2086," Bu":806,"his":874,"hit":630," Ca":1107," Ce":603," Ch":1547," Co":1476," Da":898," Di":709," De":1164," Do":1293,"hla":2518," Eu":965,"ho ":17722," Fa":559,"gli":887," Wi":780," Vy":655," Ze":700," Za":604,"gió":559,"gov":534,"údi":539," a ":25661," Zá":880,"úci":2715,"úca":1146,"gra":2234,"úce":1226," Or":774," Po":4736," Pl":565," Pi":684," Pe":1292," Pa":2173," No":1171," Ob":929," Ra":1479," Ro":1415," Re":1042," Ná":657," Ri":640," Pr":3280,"gus":545,"gré":723," Má":761," Sv":647," Su":611," St":2410," Ta":1213," V ":1733," Th":921," Ti":628," Te":1468," Tr":1332," To":1247," Ru":1047,"úhv":785," Sa":1649," Si":774," Sc":533," Se":1428," So":1114," Sp":1448," Sl":3357," Va":861," Ve":1359," Vi":989," Vo":610," Tu":845," ja":3535,"iam":1045,"ial":965,"ian":2212," až":955,"iar":1167," je":43951,"iat":1943," in":4275,"iac":3458,"iad":1663,"ibl":710," dĺ":994," ka":3354," m ":975,"ĺžk":983," ke":682," ju":2279," ha":599," he":772," gr":1776,"ia ":16846," dô":541," k ":1516," id":930," ic":916," hi":1068," hl":2110," hm":698," ho":3959," hr":2464," hu":1376," hv":1179,"iet":1590,"iev":722," ni":2558,"iez":1705,"iel":2995," ne":6825,"iem":1362,"ien":1741," na":26193,"ier":2554,"ies":2613,"ied":1764," my":693,"ieh":899,"iek":2897," mu":967," mo":3923," mn":871," ok":5647," oc":805," od":6282," of":678," jú":829," ob":11824,"ifi":1055," no":2104," le":2787," li":2051,"ick":16621," la":1441," kv":711," ku":1362,"ici":1650," kt":14067,"ich":3629,"ice":1897," kn":595,"ie ":19237," km":3854," kl":1191,"ica":2319," kr":5176," ko":7181," me":12179," mi":3009," o ":2049," ma":5130,"idl":708,"ide":1943,"ida":634," lo":1116," ab":764," am":1765," an":2592," ap":672," aj":2712," ak":5587," al":12145," au":1810," ar":1452," at":917," as":1460," ba":944,"il ":2232," bi":1273," be":1234," bo":7806," bl":568,"ieč":555," by":978," bu":1277," br":1360,"ať ":1963,"im ":540,"ika":2006,"ii ":2231,"ik ":1390,"imo":583," en":822,"ime":618," el":1140," ek":582,"imi":618," fe":960,"inc":1687,"ind":914,"ina":4033," fa":975," ex":1042," fu":1061,"inn":1036," fr":1747,"ino":2279," fo":1624,"int":1331,"ins":1181,"inf":730,"ine":3297,"iež":1149,"ing":1758," fi":2971,"ini":1596,"ink":652," ge":918," ga":535," bý":593,"iká":1538,"inu":1322,"iny":3026," fy":712,"iko":1475,"ikl":600," ce":3110,"ike":783," ch":2700," ci":1436,"ila":1287," da":904,"in ":1617,"iky":1322,"iku":1353," do":7552," dn":677,"ilo":2707," dl":624,"ill":858," dr":3425,"iln":537,"ilm":766," de":4244,"ili":1886," di":2885,"ieľ":575," dv":1786," du":557," vý":6398," zm":1324," zl":1104," zo":2693," zn":2605," zr":719," zv":1435,"hok":654,"hol":1107,"hom":770," za":7013," zd":916,"hos":535," ze":560,"hov":3246," vä":714,"hor":2878,"dí ":1492,"hod":4341," tý":674,"hnu":694," z ":7975," sú":8023,"huj":767,"hud":1122," vš":1583," už":616," zá":4976,"hu ":2863,"hro":776,"hra":2720," ru":986," sa":11939," se":4266," sc":711,"hyb":690," si":1947," sm":780," sl":4314," sk":5136," sp":8017," so":2515," mô":637," ra":2110," re":5038," ná":5061," ri":1768," ro":13226," pr":34757," ps":641," s ":5627,"hy ":1193," má":2179," os":2878," ot":721," op":1633," or":1845," oz":1525," pe":1662," lá":612," pa":3109,"hrá":1069,"hve":833,"hvi":1289," pl":3456," po":28142," pi":764," vy":7755," rô":570," vz":3164," sí":744," va":859," ve":5421," rá":581," vn":537," vo":6181," rí":775," vr":1254," vi":1930," vl":2079," ty":876," pô":1710," tv":1722," tu":1188," us":808," ur":1335," um":1379," un":575," ta":2281," v ":29073," sy":2275," st":8244," sv":3511," su":1035," pí":654," tr":3662," to":6094," th":647," ti":1601," te":5857," Če":1108,"far":539," čí":1229," čo":669,"ext":753,"ez ":733," če":1631," čl":1364," či":2100,"exi":778," ča":4766,"ezd":2253,"evá":647,"eze":538," ďa":573,"eta":1992,"ete":2049,"eti":2038,"etn":619,"etk":1257,"esp":670,"esn":932,"eso":1170,"est":8782,"esu":1559,"eto":2278,"etr":1705,"ets":539,"erá":1903,"eve":2813,"eva":786,"evo":552,"evn":668,"evi":600,"eur":694,"er ":5579,"epa":610,"eol":809,"es ":3178,"ept":842,"epu":784,"epl":609,"epo":608,"epr":993,"eri":5783,"ejš":2097,"erg":805,"ená":3253,"ere":1934,"era":3225,"et ":1437,"emí":1248,"esk":2173,"esi":987,"ený":3926,"ese":2018,"esa":609,"erz":959,"erv":1342,"eru":692,"ení":1974,"ert":777,"ers":1662,"ern":3749,"ené":2633,"erm":1031,"ero":3934,"eko":1708,"ekt":4312,"eku":666,"en ":3989,"ela":1378,"ele":4527,"eli":2168,"eln":1067,"elk":666,"ell":742,"elo":1876,"els":755,"emb":1776,"ema":956,"eme":4087,"emo":708,"emi":2282,"ene":3296,"ena":1245,"end":639,"enc":1378,"eno":3633,"enn":1593,"enk":694,"eni":7601,"ens":6528,"ent":6824,"eká":616,"eob":637,"egi":891,"ej ":24245,"eho":5071,"ek ":1552,"eja":658,"el ":2590,"ejo":597,"eke":577,"eka":784,"em ":1276,"git":547,"gie":653,"gic":1619,"gia":694,"gen":1449,"geo":632,"býv":889,"gal":592,"gan":1363,"fyz":775,"fun":734,"fra":1611,"ače":897,"ači":550,"ačo":893,"ačn":1506,"aču":636,"for":2552,"fic":1396," ľa":1263,"fil":2159,"fik":1059,"fin":689," ľu":1198,"da ":3499,"de ":4522,"dal":718,"daj":777,"dat":690,"dan":1747,"ňov":688,"ňuj":774,"cko":2082,"chá":3113,"cky":1826,"ciá":1302,"cií":762,"cká":2980,"cké":4043,"com":638,"cov":2204,"cou":843,"cký":6183,"ch ":28988,"cer":678,"ces":1333,"cen":1906,"cel":1592,"ci ":4101,"cha":2180,"chu":807,"chy":636,"cia":4155,"cie":3755,"cic":1003,"che":1676,"chl":672,"chi":1393,"cho":6193,"chn":823,"chr":824,"cii":806,"ciu":1018,"cio":1122,"cke":2891,"ed ":1409,"ebe":670,"ebo":10260,"ebr":822,"ec ":2815,"edk":836,"edm":627,"edn":6191,"edi":1893,"ede":2996,"eda":1823,"edz":3180,"edy":1130,"eds":867,"edo":1996,"eck":2662,"ech":1905,"eci":648,"ece":694,"ecn":583,"dy ":3344,"drá":560,"dvo":1617,"dzi":3041,"dze":584,"dza":3109,"dor":670,"dop":717,"dom":1779,"dol":1125,"dok":1204,"dov":3715,"dos":1422,"dpo":810,"dmi":940,"dna":890,"dne":4630,"dni":563,"dno":4328,"ôzn":583,"dob":3335,"dst":1631," ús":674," úz":1992,"duk":644,"duc":748,"dné":1797,"dra":864,"dná":1325,"dre":621,"du ":2083,"dro":1109,"dní":530,"dru":2904,"dsk":1498,"dný":2826,"ôso":1218," úč":553,"dic":713,"dia":1795,"der":1566,"dec":966,"dej":622,"del":1924,"den":5230,"dep":553,"ňa ":821,"dla":715,"do ":3088,"dlo":1153,"dlh":673,"dli":698,"div":859,"din":1669,"dio":707,"dis":1227,"die":2214,"ôvo":1350,"rga":1225,"ri ":3554,"rgi":554,"rge":623,"ižn":609,"ret":2145,"res":5921,"nás":953,"rev":1170,"náv":764,"náz":1745,"rez":1012,"reb":974,"rea":838,"nác":533,"rec":1162,"red":6538,"rej":1959,"reg":962,"reh":593,"nám":1836,"rem":1118,"ren":3464,"rek":894,"rel":975,"nál":1168,"nár":2386,"rep":1499,"óri":1260,"ná ":9951,"re ":5558,"rch":2256,"raz":2538,"rd ":682,"ras":1183,"rat":3414,"rav":4611,"óps":667,"môž":675,"raj":2984,"ran":7562,"ram":1814,"ral":930,"rak":1723,"raf":1089,"rad":3121,"rac":1364,"rs ":542,"ros":1960,"rot":1344,"rom":3824,"ron":1914,"rop":884,"roz":4727,"rou":580,"rov":10282,"rob":1537,"rod":4794,"roc":1978,"ní ":2500,"roj":1677,"rol":693,"rok":6707,"rof":585,"rog":1008,"rno":1434,"rny":1014,"rna":1377,"rež":722,"rne":2865,"rni":769,"rmo":565,"jší":951,"nét":1209,"ro ":773,"rma":1073,"riá":846,"néh":4120,"rka":749,"né ":10505,"rio":767,"rit":1644,"ris":1567,"rig":582,"jši":1295,"ril":574,"rik":952,"rin":1063,"rim":561,"ria":5593,"rib":817,"ric":4203,"rid":872,"rie":6455,"rk ":572,"nóm":573,"ruh":2733,"rum":558,"ruk":768,"rus":1271,"rva":761,"rvk":581,"rve":789,"rvo":671,"ry ":2619,"rsk":3778,"rný":683,"rst":1039,"rto":675,"rti":742,"roč":900,"ním":1698,"ník":2268,"rmá":1093,"níc":1168,"rt ":585,"rné":631,"ru ":1693,"sad":597,"sah":1086,"sam":778,"nýc":6239,"óno":796,"ným":3498,"san":671,"sa ":11522,"ruž":608,"ný ":12353,"ón ":764,"nú ":1355,"rvá":821,"ógi":1295,"rvý":709,"rzi":638,"si ":1248,"sie":988,"sia":1333,"sil":824,"se ":2313,"sch":1140,"sev":1936,"ser":859,"sed":553,"sep":711,"sen":856,"sel":864,"spo":4892,"spr":1629,"spe":1357,"spi":721,"skú":704,"ský":5535,"sov":2280,"ské":3823,"son":682,"ská":3010,"sok":687,"soc":946,"sob":2176,"su ":2117,"st ":1149,"slo":4792,"slu":716,"sky":4413,"sla":2054,"sle":1956,"ski":627,"skl":733,"sko":6875,"skr":1088,"sku":6994,"ska":3470,"ske":5984,"sno":764,"sne":932,"so ":1147,"sme":1025,"stí":1337,"sté":1586,"stá":1256,"syn":543,"sys":1453,"ste":3353,"sta":10107,"stn":2428,"sto":7431,"sti":10473,"stv":3596,"stu":2162,"spô":638,"str":11165,"sts":614,"sve":2116,"svo":1166,"tak":1277,"tal":2000,"tad":583,"tav":3617,"tat":1770,"tas":556,"tar":2723,"tan":2795,"te ":3202,"ta ":5658,"ký ":7646,"kús":915,"ouž":1757,"ozá":729," št":4899,"pa ":715," šk":769," šp":1040," Št":611,"kú ":741,"osť":3922,"pe ":579,"lá ":591,"kýc":4322,"kým":2061,"par":1586,"pat":1317,"pad":2850,"pal":635,"pan":1020,"pev":859,"láv":675,"lác":551,"pec":635,"lád":794,"pen":751,"lán":627,"lár":543,"per":2191,"lát":631,"pel":545,"pla":2526,"ple":978,"pln":576,"plo":927,"ply":661,"pie":636,"pin":2362,"pis":1414,"pit":610,"poz":1867,"pr ":604,"por":2069,"pop":662,"pov":2089,"pou":1725,"pot":3320,"pos":2178,"poj":1973,"poh":1271,"pom":1667,"pon":991,"pok":841,"pol":5900,"poc":543,"pod":5807,"po ":2974,"psk":735,"pub":965,"pte":802,"poč":2131,"pra":4417,"lín":914,"prv":2306,"pri":8032,"pre":12441,"pro":5879,"lóg":1712,"poľ":636,"prá":2352,"prí":5030,"lý ":669,"má ":1849,"mä ":776,"mál":594,"mác":652,"iť ":1130," ži":2499," že":1412,"mí ":997,"ra ":5741,"eži":1187,"ngl":1479,"ni ":1003,"nge":617,"ncú":959,"neh":1387,"nej":10418,"nen":2040,"nem":1523,"nep":832,"ner":1759,"net":1021,"nes":1971,"nev":549,"ež ":1107,"ng ":1176,"nec":565,"nfo":594,"nač":2104,"nez":594,"nco":652,"nci":2449,"nce":985,"ne ":14236,"ndr":573,"ndo":698,"ndi":836,"nde":718,"nda":660,"nak":996,"nal":1254,"nam":2854,"nan":1218,"nap":1468,"nar":576,"nac":1381,"nad":1643,"naj":5055,"nd ":984,"nav":578,"nat":1148,"nas":648,"naz":679,"na ":25237,"mys":1196,"ión":1128,"mož":865,"nyc":1783,"ntá":825,"nož":749,"ny ":7794,"nut":1486,"nto":1924,"ntu":602,"ntr":1488,"nti":2074,"nta":1252,"nte":2106,"nst":911,"nsk":12019,"nný":1960,"nu ":2533,"ičn":889,"nt ":1523,"noh":590,"nom":6261,"not":1866,"nos":7269,"nor":620,"nov":6617,"nou":2761,"nne":1300,"nno":871,"nič":860,"no ":2111,"nka":1319,"nky":647,"nko":836,"eží":971,"nie":10761,"nic":3941,"nia":4773,"niz":1417,"niu":577,"niv":740,"nis":1117,"nit":919,"nin":802,"nik":3242,"ogr":1539,"ogi":1853,"ohr":721,"ohu":747,"oho":1731,"oha":677,"oj ":736,"ok ":6372,"ohy":806,"ojv":611,"ojo":1042,"ojn":974,"oji":587,"oje":2846,"oja":533,"ol ":4900,"oce":1370,"och":5397,"oci":1231,"ock":745,"obs":891,"oby":2302,"ká ":7010,"ode":2380,"odi":1507,"odo":2687,"odp":783,"odn":6582,"ods":1082,"odr":641,"of ":750,"oda":688,"kál":578,"ody":570,"odv":550,"odu":1909,"kác":693,"hľa":877,"ofi":1501,"júc":4607,"oba":663,"od ":5680,"obo":1131,"obr":1755,"obl":2050,"obn":2229,"obj":2245,"obi":1583,"obd":699,"obc":1130,"obe":3213,"nym":950,"jú ":3234,"owi":541,"ový":6197,"ozm":884,"ové":3486,"ozn":2417,"ozl":939,"ozo":2454,"ozd":725,"ová":4125,"otv":654,"otk":570,"oti":1114,"ote":984,"otr":702,"oto":4368,"otn":953,"ost":12819,"ota":944,"ov ":17465,"osi":598,"osk":702,"osp":667,"osl":1521,"oso":997,"orú":1676,"orý":4753,"ovi":5011,"ovn":3441,"oré":3735,"orí":947,"ovo":4172,"ovs":2190,"ova":12599,"ovc":817,"orá":2973,"ove":10952,"opo":1188,"opi":803,"ope":1090,"opa":913,"os ":1011,"oló":1594,"opr":1001,"olí":768,"or ":2600,"ork":577,"orm":2535,"orn":2464,"oro":4190,"ord":568,"ore":3404,"oná":934,"org":1589,"ori":3545,"ou ":11758,"osa":791,"ort":1007,"ors":1188,"oru":1061,"onó":542,"ory":689,"omá":669,"m² ":561,"ora":1870,"ízk":541,"íto":1670,"ola":3491,"on ":2680,"oli":3155,"olk":708,"ole":1539,"oln":999,"olo":6182,"oly":771,"olu":1129,"oka":1003,"om ":22923,"ké ":4451,"ísl":936,"oke":803,"odľ":1432,"okr":4784,"íta":936,"oko":5187,"okt":536,"oku":4912,"ona":1883,"ond":761,"onc":866,"one":891,"ívn":1999,"oni":1442,"onk":548,"ono":1770,"ons":933,"ont":1301,"oma":1179,"ome":2634,"omi":1538,"kéh":4205,"omp":910,"omn":567,"omo":2048,"omu":756,"íva":2087,"la ":7993,"íms":589,"ína":690,"íns":810,"ím ":3028,"íka":561,"íko":789,"ín ":1967,"le ":3820,"eľ ":1241,"íro":535,"ísa":578,"lad":4725,"ípa":543,"laj":540,"lan":3456,"lam":627,"lat":2521,"las":3716,"lav":3631,"lbu":1038,"krá":1832,"kva":670,"kut":584,"kup":2670,"kum":749,"kul":1450,"kuj":582,"ky ":10576,"íck":596,"ích":1516,"íci":652,"ídl":750,"ktr":1255,"kti":779,"kto":15825,"ík ":1507,"ktú":719,"kyc":690,"ktí":857,"lok":542,"lon":552,"lom":1766,"loh":1318,"log":1964,"los":1486,"lov":10431,"loz":1479,"lno":1179,"lež":1031,"lne":3312,"lny":1606,"lič":614,"lna":768,"loč":1880,"lsk":1225,"íc ":720,"lné":534,"liž":568,"lu ":1579,"lný":723,"li ":2968,"lez":532,"lex":642,"eľs":559,"les":1366,"let":1210,"ler":562,"eľo":2345,"eľm":560,"len":3929,"eľn":732,"eľk":1782,"lek":1883,"lej":738,"led":1215,"hád":2792,"lec":545,"leb":10035,"eľa":1087,"lo ":4534,"eň ":963,"lle":613,"lli":641,"lko":992,"lit":2952,"lis":1429,"lin":2535,"liz":1155,"liv":1148,"lic":2198,"lia":1970,"lik":1405,"lie":1755,"ma ":1882,"hý ":549,"maj":736,"mag":530,"mar":970,"mal":1423,"man":1923,"mat":2750,"mbe":991,"mbr":959,"me ":2180,"med":3229,"mec":1667,"met":2730,"mes":5087,"mer":6360,"iál":1808,"mel":722,"men":7980,"ly ":1425,"lož":1837,"ltú":839,"mpl":621,"moc":890,"mod":1086,"mon":943,"mom":711,"mov":2255,"mor":1939,"mos":887,"mot":1139,"mu ":3156,"msk":1141,"my ":1946,"mus":1219,"mun":908,"mi ":7759,"min":2367,"mil":710,"mit":579,"mic":1764,"mia":681,"mie":3132,"ií ":1338,"mno":1169,"Čes":624,"výr":1121,"výs":904,"výv":564,"vým":1096,"výz":1213,"sť ":5294,"sťo":1256,"čas":7534,"zná":1803,"čen":2971,"čes":715,"zsk":1303,"či ":954,"zvy":552,"zuj":707,"čia":1892,"čit":761,"čin":1952,"čka":799,"zyk":1455,"člo":565,"čo ":704,"čle":748,"čeľ":579,"čov":1446,"vý ":3640,"čno":1723,"čne":1110,"výc":4086,"zi ":1861,"zač":716,"vác":643,"zdr":695,"zen":1178,"zem":2639,"vár":636,"zer":667,"vá ":4679,"zdi":837,"zde":597,"zaj":992,"zan":554,"zal":1158,"zar":737,"zos":869,"zor":857,"zof":1473,"zov":2409,"zo ":1433,"väč":1491,"zme":835,"véh":1297,"zna":5129,"zmu":1473,"zmy":578,"zne":607,"zni":1787,"vé ":2772,"zko":574,"zlo":1388,"zdí":688,"väz":626,"zin":1015,"zik":680,"zit":695,"yva":1437,"ytv":779,"yst":2333,"yso":679,"ysl":1049,"za ":3910,"yzi":807,"ych":3717,"tým":683,"yda":835,"uár":767,"túd":552,"tý ":1101,"túr":2064,"týc":709,"ym ":918,"yko":908,"tín":557,"tív":2085,"tém":1500,"tí ":1453,"ože":1844,"ožn":749,"oži":948,"tár":638,"tát":2299,"táv":759,"tác":584,"tál":1075,"súč":1671,"té ":1097,"xis":565,"súh":1058,"sús":702,"sú ":2895,"tá ":794,"sér":579,"síd":652,"rče":541,"rči":735,"vzť":595,"vzn":1177,"vzd":855,"vyr":543,"vyd":936,"vys":1291,"vyt":825,"vyv":562,"rú ":1621,"rý ":3747,"vyš":1099,"rýc":1417,"rís":711,"rít":1609,"ríp":667,"rím":628,"rík":630,"vní":633,"vné":1199,"vrc":1038,"vst":887,"vsk":2197,"vu ":969,"vný":1142,"vuj":550,"vy ":2098,"voľ":584,"róp":1084,"rôz":582,"via":1774,"vil":1662,"vať":944,"vin":2530,"vic":742,"vid":1528,"vie":3018,"nšt":824,"vit":1615,"vis":1230,"važ":892,"ré ":3238,"vla":1346,"ráľ":818,"veľ":1567,"vo ":5058,"réc":806,"réh":740,"vne":2233,"vna":1235,"vno":1530,"vny":872,"rí ":1490,"voc":735,"vod":4147,"voj":3988,"vol":995,"vom":1769,"vor":3219,"vot":802,"vos":2079,"vov":1480,"vou":852,"vlá":818,"vi ":826,"vez":833,"ver":4560,"ves":693,"oľs":926,"rát":1564,"vet":2834,"ráv":2279,"vej":3167,"ven":6559,"rán":812,"oľn":609,"vel":598,"rál":1811,"vek":1439,"ved":2502,"vec":641,"rác":1499,"rá ":3488,"ve ":3005,"val":2956,"van":10444,"var":1669,"vat":3033,"vac":763,"vaj":1282,"va ":7123,"urč":1357,"uró":1104,"usk":1487,"ust":1684,"uti":837,"ute":592,"uto":2154,"us ":3503,"uri":613,"uro":616,"ujú":3483,"upi":2474,"ulá":681,"upe":584,"upn":539,"umb":576,"ume":1559,"unk":1091,"uni":1311,"ukt":726,"um ":2635,"ult":1375,"ulo":557,"uli":689,"uhu":583,"uje":4632,"uho":1552,"ugu":540,"ude":572,"uch":1187,"uh ":722,"udo":1777,"ubl":1069,"tvá":863,"typ":873,"ty ":3084,"očí":1245,"trí":632,"tvo":4048,"trá":1603,"tve":851,"tva":2149,"tur":912,"tuj":829,"tup":1550,"pôv":1057,"pôs":1202,"pís":996,"tná":587,"oče":1143,"tre":4383,"tra":5412,"oča":775,"tné":1110,"oči":806,"tri":3765,"tru":1300,"tro":6109,"očn":2672,"tu ":3234,"tný":1463,"tsk":2708,"toč":963,"to ":9348,"tne":1495,"tno":1220,"toc":600,"tou":572,"tov":6017,"tos":575,"tom":2788,"ton":1192,"tok":6341,"tol":1656,"tor":19018,"top":560,"tký":805,"tik":2033,"tif":575,"tie":2901,"tit":878,"tis":1709,"tin":2110,"tio":893,"tia":1542,"tic":5672,"tiv":708,"tko":851,"tka":2298,"tli":832,"tky":1006,"tla":755,"teľ":4587,"tem":1660,"ten":2822,"tep":676,"tej":1310,"tek":1233,"tel":2263,"tec":925,"ted":826,"tex":545,"ter":5642,"ti ":7610,"tač":896,"ží ":1158,"žív":1856,"zťa":609,"yšš":666,"úča":1833,"zýv":664,"ľko":705,"ľom":604,"ľký":597,"ľov":2803,"žin":599,"žil":543,"živ":1147,"žit":1013,"žia":1434,"žij":593,"žov":702,"žno":718,"žne":1488,"že ":2141,"ľav":960,"ľad":1602,"žen":2262,"ľa ":2178,"zác":1029,"záv":683,"záp":1954,"zák":1752,"uži":784,"ýra":532,"užn":1127,"ýro":660,"ým ":5866,"ými":2162,"ôže":563,"ých":17568,"žsk":553,"ľsk":1094,"ľud":1147,"zém":590,"vše":1630,"uží":1835,"ýzn":1190,"ýva":1701},"n_words":[5274251,6043345,4170145],"name":"sk"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/sl b/nlp_resource_data/langdetect/profiles/sl

new file mode 100755 (executable)

index 0000000..cb96171
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/sl
@@ -0,0 +1 @@
+{"freq":{"D":7697,"E":4326,"F":7483,"G":7482,"A":12478,"B":11289,"C":8095,"L":10635,"M":14480,"N":8171,"O":6564,"H":9046,"I":7126,"J":7268,"K":11757,"U":2722,"T":7824,"W":1905,"V":10234,"Q":288,"P":16672,"S":26591,"R":9751,"Y":433,"X":356,"Z":6077,"f":21271,"g":83131,"d":174445,"e":575804,"b":82172,"Fed":58,"c":56405,"a":599039,"n":410604,"o":476628,"l":247955,"m":151651,"j":244223,"k":242702,"Fel":92,"h":59332,"i":522865,"w":2524,"v":222867,"Fer":370,"u":129396,"t":241091,"s":268170,"r":314054,"q":489,"p":164089,"z":94676,"y":6418,"x":1284,"²":123,"Î":185,"É":129,"Á":62,"Fil":230,"í":2013,"Fin":151,"ë":81,"ê":309,"é":3055,"Fir":82,"è":584,"ç":174,"ä":365,"â":114,"á":3560,"à":133,"ü":557,"ú":870,"ø":79,"ö":488,"ô":419,"ò":234,"ó":1670,"ē":62,"đ":109,"Đ":87,"ā":111,"ć":1063,"Č":1470,"č":56247,"ŕ":73,"ő":245,"ł":69,"ō":86,"Ž":2008,"ž":30614,"Š":2528,"š":51762,"Fak":387,"Fal":70,"Far":111,"Eri":80,"Est":69,"Eti":62,"Ern":82,"Eur":142,"Eva":78,"Evr":794,"Ein":58,"́":345,"Ele":144,"Eko":86,"μ":124,"ν":240,"Ena":60,"ο":342,"ι":214,"κ":105,"λ":169,"δ":61,"ε":131,"η":95,"α":330,"β":59,"γ":116,"ά":79,"ί":88,"Emi":63,"Eli":70,"ό":99,"σ":119,"ς":264,"ρ":222,"π":98,"φ":61,"υ":78,"τ":148," l":19333,"ь":113," m":28795," n":57198,"я":125," o":41952," h":4815,"ш":89," i":55210," j":67383," k":54205,"ы":63," d":42036,"ф":78,"х":75," e":9710,"ц":79," f":9447," g":14912,"ч":253,"р":642,"с":417," a":19069," b":18884,"т":428," c":4312,"у":369," y":127," x":151," z":31775," u":19039," t":27202," w":136," v":56888," p":90919," s":76633," r":20918,"HK ":93,"И":69,"К":110,"Н":65,"М":132,"П":82,"Б":103,"А":111,"Г":80,"В":98,"Д":60," J":7259," K":11717," H":8994," I":7097," N":8139," O":6489," L":10542," M":14428," B":11242," C":8027,"С":106," A":12432," F":7459," G":7430," D":7664," E":4306,"л":503,"к":526," Z":6067,"й":182," Y":432," X":347,"и":1084,"п":111,"о":912,"н":714,"м":224,"г":177," S":26492," R":9719,"Ger":142,"в":657," Q":287,"б":103," P":16617,"а":1195,"з":72," W":1888,"Geo":282," V":10198,"Gen":320," U":2704,"е":796,"д":264," T":7802," č":5716," Č":1467," Đ":87,"Gla":255,"Gia":71,"HL ":80," Á":62,"Gio":131," É":127," Î":185,"Gir":61,"Giu":113," ž":7506," Ž":2008," Š":2526," š":7903,"ա":58,"Gan":63,"Gal":184,"Gam":76,"Gar":174,"Gab":108,"و":76,"ي":148,"ل":189,"م":127,"ن":117,"Fun":63,"د":71,"ب":101,"ا":258,"ر":124,"Flo":89,"Fla":62," А":111," Б":103," В":98," Г":71,"Fra":1714," Д":60," И":69," К":110,"Fri":169," М":132," Н":65,"A ":1521," П":82,"Fre":167,"For":1812," α":63,"F ":287,"Da":1043,"Cu":190,"Cv":64,"Cy":61,"Cl":275,"Co":1643,"Cr":347,"Ce":1243,"Ch":1233,"Ci":465,"G ":285,"Ed":331,"Ea":63,"Dv":143,"Du":856,"Do":1622,"Dr":1054,"De":1300,"Di":918,"Fe":729,"H ":307,"Fa":834,"Eu":245,"Ev":1000,"Er":320,"Et":115,"Es":220,"En":413,"Em":164,"Ep":63,"Ei":124,"El":403,"Ek":172,"Eg":130,"Ge":887,"Ga":902,"I ":771,"Fu":219,"Fr":2141,"Fo":2047,"Fl":225,"Fi":681,"B ":327," С":104,"II ":321,"C ":846,"Av":1043,"Au":489,"Ar":1294,"At":316,"As":365,"D ":417,"Ba":1844,"Az":412,"Af":338,"Ag":178,"Ah":87,"Ab":563,"Ac":172,"Ad":372,"Am":987,"An":1820,"Ap":398,"Ai":256,"Aj":81,"Ak":264,"Al":2250,"Hit":92,"Bu":797,"Br":2247,"Ca":1535,"E ":345,"Bi":1004,"Hid":63,"Be":2195,"Bo":1910,"Hil":81,"Bl":438,"Bj":272,"Hip":72,"Kv":109,"Ku":509,"Kn":259,"IE ":65,"Kl":445,"Kr":2275,"Ko":3816,"Le":3189,"Lj":1723,"Li":1298,"N ":264,"La":1624,"Lu":757,"Ly":76,"Lo":1512,"Me":2219,"Dž":96,"Mi":1869,"Ml":211,"O ":680,"Ma":6025,"Mc":133,"Mu":580,"Mr":101,"Mo":2460,"Nj":399,"Ni":828,"Já":138,"Ne":1598,"Na":2980,"P ":622,"Hel":136,"Ny":85,"Hei":136,"Nu":95,"No":1554,"Ok":258,"Ol":321,"Om":113,"On":187,"Og":200,"Oh":58,"Oc":80,"Od":1568,"Hen":142,"Her":380,"Ob":922,"Gi":553,"Gl":510,"Gr":1909,"Go":1659,"Gu":367,"Gv":83,"Gy":129,"Cô":91,"J ":297,"Ha":1563,"He":1085,"Hi":543,"Ho":701,"Hr":4064,"Hu":331,"K ":438,"Ib":78,"Id":108,"Ig":180,"Im":513,"In":1624,"Il":230,"Iv":415,"Is":1036,"It":703,"Ir":277,"Ja":1804,"L ":443,"Iz":883,"Ji":68,"Je":1541,"Jo":1290,"Hab":58,"Ju":2001,"Hal":126,"Haj":64,"Ka":2508,"Han":157,"M ":389,"Ham":119,"Har":293,"Ki":622,"Győ":92,"Hau":402,"Ke":544,"Us":272,"Ur":262,"Up":188,"Um":80,"Un":907,"Uk":147,"Ul":103,"W ":109,"Ty":77,"Tu":672,"Tr":1582,"To":1468,"Th":691,"Ti":704,"Te":1011,"Ta":1070,"V ":1619,"Côt":91,"Sw":66,"Sz":701,"Sy":147,"St":3308,"Sv":2528,"Su":652,"Wo":195,"Wi":724,"Wh":64,"Wa":405,"We":272,"Vz":150,"Vo":999,"Vr":488,"Vs":193,"Vu":139,"Vi":1620,"Vl":200,"X ":215,"Va":913,"Ve":3902,"Má":81,"Pt":177,"Pu":321,"Pr":3992,"S ":1529,"Pe":1571,"Pa":2197,"Gui":73,"Lé":59,"Pl":789,"Po":5526,"Pi":1027,"Ph":198,"Os":738,"Ot":263," ا":112,"Op":322,"Or":646,"R ":294,"Oz":78,"Se":2093,"Sc":592,"Si":1170,"Sh":254,"Sn":82,"Sm":234,"Sl":6573,"Sk":484,"Sr":839,"Sp":1374,"So":1617,"Ru":971,"Grč":81,"U ":245,"Sa":2421,"Re":3735,"Rd":107,"Ri":922,"Rh":128,"Ro":1660,"Qu":211,"T ":330,"Ra":1534,"Mü":85,"Gre":332,"Gri":94,"Gra":810,"Grb":113,"b ":3960,"Gru":126,"Gro":222,"a ":211856,"Yo":229,"Gle":73,"Z ":240,"Glo":95,"Gol":160,"Gor":907,"Gos":120,"Za":1749,"Zd":593,"Ze":881,"Zi":330,"Zg":337,"Vé":82,"Zm":60,"Zl":152,"Zo":102,"Zn":141,"Zu":99,"God":61,"Zr":80,"Zv":306,"i ":134220,"gd":128,"ge":5872,"ga":24650,"fj":134,"Inf":332,"fl":264,"ff":331,"fi":4842,"fs":419,"fr":3897,"ač":5802,"fu":678,"ft":369,"fo":2045,"Int":382,"fn":91,"j ":12717,"gy":387,"dá":73,"he":3525,"ha":5343,"gn":1368,"gm":166,"gl":6996,"gi":8855,"gh":503,"gg":192,"gv":88,"gu":2562,"gt":124,"gs":203,"bč":5121,"gr":10356,"cí":78,"go":15666,"dt":312,"du":5066,"dv":5016,"dw":173,"dy":144,"dz":369,"g ":4531,"Ima":116,"ea":2500,"eb":7309,"ec":6168,"ed":29381,"de":25427,"Ili":79,"dd":738,"dg":429,"di":24057,"dh":181,"dk":1177,"dj":1456,"dm":1512,"dl":1112,"do":16087,"dn":14568,"dp":1050,"ds":3632,"dr":13867,"ew":457,"ex":237,"eu":1040,"ev":19283,"ey":856,"ez":12153,"fa":2439,"h ":28964,"aú":301,"Ind":547,"fe":2538,"bá":91,"eh":4032,"eg":21346,"ef":1414,"ee":651,"Ime":316,"el":42176,"ek":16159,"ej":9407,"ei":1868,"ep":9382,"eo":3145,"en":61076,"em":38275,"et":32921,"es":22879,"er":40015,"ca":8624,"bz":65,"e ":176361,"bv":197,"by":89,"bs":2013,"br":6519,"bu":2540,"bt":104,"bn":3084,"bo":9119,"bj":937,"bk":143,"cL":77,"bl":11842,"bm":547,"bh":211,"bi":17028,"bb":158,"bd":603,"be":10505,"dc":181,"db":1176,"da":28148,"f ":2764,"cz":61,"cy":123,"cv":189,"cu":951,"ct":549,"cs":478,"cq":69,"cr":248,"co":5124,"cm":121,"cn":95,"ck":1329,"cl":245,"ci":15112,"ch":3183,"ce":12343,"cc":353,"c ":6745,"az":10124,"ay":618,"ba":6779,"d ":25792,"at":30196,"as":24786,"ar":39381,"ax":151,"aw":283,"av":30027,"au":2021,"ak":12407,"al":45711,"ai":1701,"aj":23033,"ao":367,"ap":6069,"am":13386,"an":71011,"ac":6592,"ad":26926,"aa":226,"ab":7824,"ag":7152,"ah":7624,"ae":1031,"af":1621,"nu":5151,"nt":10999,"ns":25512,"ič":13659,"nr":295,"np":329,"no":51231,"nn":1495,"nz":984,"ny":526,"nw":82,"jó":58,"nv":255,"oe":569,"ká":135,"of":3946,"oc":3247,"od":39028,"oa":584,"ob":20785,"om":19239,"on":23036,"ok":14837,"ol":27351,"oi":2548,"oj":11831,"og":11726,"oh":1523,"m²":117,"ot":18168,"os":30813,"ov":55859,"ou":2158,"op":10262,"oo":1164,"or":35498,"jč":131,"r ":20683,"ox":97,"kó":141,"ow":586,"kö":75,"oz":8175,"oy":187,"pd":65,"lá":269,"pe":10198,"pa":26706,"Igr":74,"pc":146,"pl":6471,"lé":474,"pn":2144,"po":53400,"ph":657,"pi":10229,"pj":64,"pk":185,"lo":38721,"ln":9297,"lm":1635,"hé":133,"ll":3272,"ls":3383,"lp":1148,"lv":458,"lu":4829,"lt":2975,"lz":195,"ly":422,"hô":58,"Idr":67,"o ":117173,"mc":91,"md":104,"ma":23441,"mb":4609,"dž":1661,"iá":58,"me":39489,"mf":152,"mk":917,"iè":113,"ml":1947,"mi":12739,"eš":4276,"mj":85,"mn":2345,"mm":463,"mp":2668,"mo":14761,"hč":66,"mr":915,"mt":95,"ms":4458,"mv":69,"mu":5539,"my":115,"p ":1343,"na":91417,"nb":277,"nc":8502,"nd":6315,"ne":43250,"já":118,"nf":942,"ež":5003,"ng":4878,"nh":345,"ni":72174,"nj":24738,"nk":3254,"ić":821,"nl":278,"nm":267,"jv":1409,"jt":127,"ju":14494,"eč":5728,"js":10383,"jp":458,"jn":4767,"jo":15551,"jl":168,"jm":397,"jk":398,"kj":752,"ki":56467,"kh":121,"ke":31686,"kd":477,"kc":1346,"ka":42561,"fü":98,"m ":32866,"jz":162,"ky":130,"ks":2035,"kt":5285,"ku":8273,"kv":2313,"ko":52366,"gí":90,"kr":10395,"kk":78,"kl":5176,"km":1749,"gé":62,"kn":1425,"dš":398,"li":49096,"lh":245,"lk":2648,"lj":32952,"le":34235,"há":163,"ld":976,"lg":1256,"lf":539,"hâ":70,"la":37335,"lc":2469,"lb":823,"n ":53330,"hr":1878,"hs":96,"dí":74,"hw":70,"ht":692,"hu":981,"hj":61,"hk":1523,"hi":3365,"hn":1375,"ho":9636,"hl":379,"dè":58,"dé":139,"hm":293,"id":5268,"ic":16189,"ib":3404,"ia":4880,"ih":24332,"ig":5375,"if":1125,"ie":2994,"hy":222,"dú":72,"k ":19129,"iq":110,"ir":15616,"dč":73,"is":20587,"it":17935,"iu":664,"iv":10504,"ix":194,"aš":9037,"ii":2454,"ij":44668,"ik":30544,"il":26890,"im":25029,"in":69797,"io":5253,"ip":4264,"jc":271,"jb":886,"je":114329,"jd":485,"až":1346,"bš":153,"ji":18381,"jh":438,"iz":20523,"l ":16581,"ja":44343,"tä":161,"pš":106,"xi":105,"té":252,"tí":147,"tó":164,"ww":69,"z ":12173,"xa":126,"ož":4663,"tá":178,"nž":333,"wi":424,"oš":3587,"sé":150,"wn":164,"wo":138,"sí":94,"rč":573,"ws":163,"ró":226,"rô":65,"vz":2935,"y ":2958,"wa":490,"sá":124,"we":348,"rè":76,"vl":5625,"vm":123,"ré":314,"vj":1191,"vk":1293,"vh":61,"nš":866,"vi":26446,"vg":688,"vt":1201,"vu":1200,"vr":6944,"vs":7139,"vp":572,"rí":254,"vn":18643,"vo":20029,"uz":834,"ux":225,"uv":1705,"uu":61,"ve":45929,"rá":407,"vd":234,"vc":816,"vb":122,"va":33280,"x ":755,"mš":2061,"ui":868,"uj":5428,"uk":2181,"ul":7777,"ue":1036,"uf":172,"ug":6200,"lž":478,"uh":1992,"ur":10254,"pč":78,"us":9210,"ut":2587,"um":3595,"un":5703,"uo":144,"up":17758,"ty":284,"tz":304,"tu":11745,"tt":1249,"tw":125,"pó":70,"tv":10431,"ub":7050,"ua":3183,"ud":8270,"uc":1029,"w ":442,"to":33340,"tn":10733,"pé":68,"tm":2491,"tl":2152,"ts":2917,"oč":7724,"tr":18852,"tp":111,"tg":72,"tf":115,"te":38423,"pá":79,"td":278,"tk":3275,"tj":2887,"lš":176,"ti":34585,"th":1742,"v ":46495,"tc":114,"ta":42943,"su":2438,"sv":4904,"ss":1521,"st":64846,"sy":100,"sz":642,"sw":95,"sl":11373,"sk":60513,"sn":5408,"sm":2123,"sp":12768,"so":15794,"nč":1620,"sr":3075,"sd":114,"sc":1363,"sf":304,"se":40074,"sh":762,"sj":389,"kš":233,"si":9098,"rz":2218,"u ":23884,"sa":10107,"sb":1404,"mč":427,"rr":991,"rs":12425,"rt":8497,"ru":11754,"rv":10562,"rw":89,"nó":68,"ry":877,"ní":142,"rp":731,"ro":31529,"rn":11113,"né":172,"rm":4944,"rl":1460,"rk":7297,"nç":122,"rj":7769,"jš":3373,"ri":44436,"rh":1257,"rg":4613,"iž":2554,"rf":351,"ná":202,"re":43763,"rd":5068,"rc":1442,"rb":2537,"ra":68896,"t ":20636,"qu":409,"mí":90,"lč":258,"mé":211,"iš":12414,"má":431,"s ":18299,"px":59,"pt":1806,"pu":4524,"ló":186,"pp":444,"lí":193,"pr":43317,"ps":1569,"Hum":81,"yő":97,"vž":60,"zá":92,"už":4277,"vš":416,"uš":1692,"Hrv":3876,"yí":60,"Hra":77,"rž":2418,"zz":215,"vč":549,"zg":2406,"Hor":93,"zh":2141,"zi":10015,"rš":3989,"zb":1418,"zc":70,"zd":3868,"ze":5649,"vá":315,"za":21344,"Hon":64,"Hok":73,"Hol":102,"zv":5632,"zs":530,"zr":2612,"uč":1996,"zu":2176,"zt":382,"zo":6065,"zn":9166,"ví":186,"zp":931,"zk":568,"zj":291,"zm":3200,"vé":151,"zl":2570,"ye":263,"yc":118,"yd":123,"ya":395,"yt":90,"ys":299,"Hoc":85,"yr":276,"yp":87,"yo":155,"yn":206,"ym":124,"yl":250,"yk":172,"yj":189,"yi":151,"Arg":159,"Arh":66,"Are":64,"Ard":164,"šču":315,"Ara":107,"Arm":203,"ščo":81,"ščn":64,"Ari":101,"šči":1771,"Apo":252,"šče":2731,"šča":1225,"Atl":156,"Ast":131,"Ass":80,"Art":149,"Avt":105,"Avs":709,"Ave":81,"Auv":73,"Aut":64,"Aug":99,"zš":796,"Azi":191,"Azu":144,"Bak":78,"Bal":229,"Ban":193,"Bab":79,"Bad":69,"Bar":430,"Bat":109,"Bas":111,"Bav":61,"Aba":278,"Ada":80,"Ado":61,"Afr":271,"Air":92,"Al ":78,"šće":104,"Aka":59,"Akv":95,"Ala":127,"Alb":257,"Ali":77,"Ale":389,"Alf":117,"Alt":76,"All":122,"Alo":87,"Alp":662,"Ame":656,"Ama":87,"Ang":426,"Ana":133,"And":409,"šč ":144,"Ant":584,"Ann":63,"Buz":68,"Buk":74,"Bur":210,"Bud":88,"Bru":110,"Bož":77,"² ":120,"DA ":787,"DD ":66,"Cal":180,"Cam":157,"Cas":187,"Car":397,"Cat":66,"Can":211,"Cap":85,"Bea":66,"Bes":153,"Ber":630,"Beo":125,"Ben":301,"Bel":559,"Biz":86,"Bje":262,"Bil":214,"Bis":245,"Bit":75,"Bio":71,"Blo":64,"Ble":72,"Bla":204,"Bre":545,"Bra":506,"Bro":313,"Bri":606,"Boh":133,"Bog":157,"Bol":225,"Boj":60,"Bon":91,"Bor":510,"Bos":176,"Bou":105,"Îl":184,"Der":74,"Des":135,"Dev":75,"Dek":70,"Del":334,"Dem":64,"Den":114,"Deb":67,"Dam":75,"Dan":339,"Dar":97,"Dav":176,"Dal":121,"Chr":147,"Che":154,"Chi":125,"ám":76,"án":655,"Cit":86,"áj":126,"Cir":117,"ák":172,"ál":309,"ác":115,"ád":136,"áz":116,"áv":123,"ár":751,"át":278,"ás":315,"ât":67,"Châ":62,"Cla":118,"Cel":355,"Cen":243,"Cer":450,"Ces":88,"à ":58,"á ":69,"Cha":633,"Cre":137,"Cor":603,"Com":262,"Col":181,"Con":211,"Cou":99,"ós":120,"ót":110,"óv":101,"ôm":59,"ôn":89,"Duš":60,"ód":99,"ór":148,"ón":273,"óm":66,"ól":124,"ók":78,"ó ":118,"Drž":130,"ív":88,"íz":94,"ín":324,"ír":134,"ít":126,"ís":96,"ík":80,"íl":246,"íj":224,"íd":88,"ía":72,"Egi":80,"ên":58,"êr":99,"éz":94,"ék":134,"él":241,"éj":87,"ém":138,"én":587,"és":167,"ét":261,"ér":344,"év":100,"éd":144,"ée":75,"Edw":93,"èn":67,"èr":126,"ço":94,"é ":306,"ät":162,"Do ":87,"ć ":449,"Dia":61,"Dic":151,"Dis":100,"Dir":87,"Dio":72,"Din":68,"Die":63,"Div":106,"Dub":388,"Dun":175,"ün":108,"ür":209,"Dvo":99,"Dru":220,"ún":88,"új":321,"úr":86,"Dre":102,"Dra":452,"íš":61,"Dob":200,"ôt":129,"ör":97,"Dou":72,"Dol":339,"Don":353,"Dom":230,"Dor":129,"Ned":79,"Nea":69,"Nem":513,"Nek":87,"Nev":71,"Neu":84,"Jás":105,"Nep":74,"Nas":351,"Nat":199,"Nav":148,"Nic":123,"Niz":179,"Nik":208,"OJ ":106,"New":248,"Nap":119,"Nar":283,"Nam":213,"Nan":68,"Nag":201,"Nah":254,"Naj":294,"Nad":104,"Na ":449,"ći":324,"OV ":124,"ća":95,"će":161,"Či":127,"Če":527,"Ča":315,"Ču":69,"Čr":316,"či":13203,"čj":3069,"čk":2033,"čl":1507,"če":9878,"ča":7103,"čb":207,"č ":3958,"đe":65,"Đu":65,"čn":11436,"čo":315,"čr":1069,"ču":2190,"čv":95,"Nji":74,"Nje":320,"Nov":818,"Nor":440,"Not":61,"Odv":1174,"Ogr":131,"Obč":247,"PL ":89,"Okr":139,"Nyí":60,"Obs":170,"Obi":89,"Obr":75,"Obo":60,"Od ":141,"Île":184,"Oto":151,"Oli":186,"Ont":118,"Ope":134,"Ore":118,"Org":93,"Ost":65,"Osj":214,"Osm":58,"Osn":93,"Po ":423,"š ":428,"Pli":67,"Ple":143,"Pla":466,"Pin":72,"Pik":79,"Pit":66,"Pis":66,"Pir":250,"Pie":165,"Phi":96,"Ped":76,"Per":309,"Pes":215,"Pet":518,"Pen":134,"Pel":66,"šč":6357,"šć":151,"Pat":125,"Pas":151,"Par":669,"Pav":183,"Pau":156,"Pad":61,"Pac":78,"Pan":157,"Pap":82,"Pal":206,"Pak":100,"še":3798,"ša":2845,"šo":760,"šp":1066,"šn":1922,"šk":22840,"šl":713,"ši":4197,"šj":684,"šv":183,"šu":178,"št":5493,"Še":283,"Ša":280,"Šm":149,"Šo":105,"Ši":182,"Šk":346,"Šu":75,"Št":406,"Šv":259,"Šp":321,"Pož":327,"Ptu":112,"Pro":707,"Pri":1370,"Pre":1114,"Prv":307,"Pru":73,"őr":96,"Pra":329,"Pod":622,"Pok":159,"Pol":1336,"Pom":102,"Pon":195,"Pog":199,"Poi":167,"Poj":93,"Pot":652,"Pos":334,"Pov":181,"Pop":91,"Por":385,"Poz":75,"žr":68,"žu":4511,"žn":3757,"žo":181,"žc":62,"žb":1014,"že":6604,"žd":91,"ža":4974,"žk":205,"žj":1185,"žl":145,"žg":130,"ži":6432,"Žu":1147,"Ža":131,"Ži":199,"Že":372,"RS ":268," ال":88,"ž ":1046,"Rac":68,"Rad":357,"Ram":58,"Mün":75,"Ran":79,"Rak":65,"Que":120,"Irs":68,"Ita":679,"Isl":158,"Ist":728,"Ira":123,"Inš":104,"Iva":381,"Izv":187,"Izr":254,"Izd":68,"Jac":161,"Jad":111,"Jav":92,"Jar":108,"Jap":208,"Jan":529,"Jam":191,"Jak":171,"Jel":130,"Jer":198,"Jes":136,"Jez":169,"Jea":145,"Izš":73,"Je ":506,"Jos":354,"Jor":75,"Jon":77,"Joh":406,"Jug":908,"Jud":70,"Jup":160,"Jur":234,"Jul":171,"Jož":199,"LA ":59,"Juž":280,"Kam":278,"Kal":271,"Kap":166,"Kan":372,"Kat":241,"Kas":83,"Kar":544,"Kaz":101,"Kav":68,"Ker":149,"Ken":111,"Kis":102,"Kir":77,"Kit":180,"Kin":85,"Klo":59,"Kli":73,"Kle":103,"Kla":125,"Kon":517,"Kom":582,"Kol":204,"Kos":201,"Kor":867,"Kop":552,"Kov":80,"Kot":149,"Koz":113,"Knj":131,"Kob":74,"Koc":70,"Kre":107,"Kra":1273,"Kri":388,"Krk":87,"Kro":118,"Krš":94,"Koš":77,"Koč":102,"Kul":64,"Kun":76,"Kur":69,"Kva":82,"Lev":100,"Let":1929,"Les":99,"Leo":137,"Len":143,"Lau":76,"Law":92,"Le ":107,"Lag":58,"Lah":83,"Las":91,"Lat":85,"Lar":113,"Lam":67,"Lan":280,"Lab":97,"La ":179,"Lju":1716,"Lib":144,"Lig":77,"Lim":128,"Lin":177,"Lip":125,"Lit":222,"Luk":154,"Lui":81,"Lun":67,"Lud":105,"Luc":114,"Lou":148,"Lov":122,"Los":65,"Lot":115,"MS ":61,"Loi":123,"Log":104,"Lor":123,"Lon":188,"Lok":174,"Lič":69,"Lež":206,"Meh":106,"Men":114,"Mel":123,"Mes":284,"Mer":173,"Met":220,"Med":836,"Mač":73,"Mez":77,"Man":399,"Mal":462,"Mar":2349,"Mas":191,"Mag":152,"Mad":1011,"Maj":111,"Mak":235,"Mai":69,"Mac":176,"McL":66,"Max":58,"Mau":68,"Mat":387,"Mla":152,"Mod":111,"Moh":80,"Moj":64,"Mol":94,"Mon":601,"Mos":493,"Mor":297,"Mou":64,"Mot":314,"Mih":227,"Mik":175,"Mic":218,"Mit":62,"Mir":154,"Mis":119,"Mil":386,"Min":343,"NK ":115,"Mur":270,"Mus":87,"Moš":81,"çoi":67,"Wor":66,"Wol":82,"Wik":71,"Wil":329,"Win":160,"ère":90,"Wes":77,"War":73,"Wal":129,"Vzh":106,"Vse":94,"Vrb":98,"Vra":69,"Vrh":141,"Vol":145,"Voj":511,"Vod":126,"Viš":166,"Več":124,"Vis":126,"Vit":109,"Vla":188,"Zla":131,"čuj":826,"čun":696,"Vél":58,"čut":108,"črk":423,"čre":113,"črt":288,"éte":58,"črn":193,"ču ":419,"Zna":123,"Zdr":568,"ény":95,"Zap":71,"Zar":126,"Zas":101,"Zav":104,"Zag":314,"Zah":111,"Zak":109,"Zal":123,"ékt":72,"én ":269,"éli":106,"Zgr":67,"Zgo":249,"éra":62,"Zim":226,"Zel":179,"Zem":564,"ов ":64,"Zač":142,"之":72,"三":107,"Zad":119,"Za ":213,"Yor":161,"на ":82,"Szo":98,"Sza":480,"Sys":60,"Sve":2328,"Sup":72,"Sud":142,"Str":416,"Stu":150,"Sti":77,"Sto":225,"Sta":2102,"Ste":244,"Teh":61,"Ten":108,"Tem":109,"Teo":88,"Tel":99,"Tek":83,"Tam":78,"Tan":81,"Tar":117,"Tak":138,"Tal":65,"Ta ":195,"Sko":62,"Skr":66,"Sku":204,"Ska":62,"Sha":82,"Sim":142,"Sil":93,"Sis":205,"Sir":188,"Sin":157,"Sib":59,"Sez":419,"Ses":154,"Ser":176,"Sev":340,"Sen":145,"Sel":241,"Sem":91,"Sei":131,"Sed":75,"Srb":301,"Sre":466,"TV ":116,"Sv ":98,"Spa":211,"Spl":481,"Spi":65,"Spe":164,"Spr":96,"Spo":313,"Sod":63,"Sok":63,"Soc":117,"Sob":124,"Sou":80,"Sov":270,"Sol":119,"Som":85,"Son":199,"Sop":149,"Sor":59,"Sla":201,"TO ":84,"Slo":6226,"Sli":73,"So ":69,"Rož":72,"Rus":577,"Rud":148,"Sai":290,"Sam":204,"Sal":223,"Sad":71,"Sco":63,"Sch":394,"Sav":417,"Sat":60,"Sau":74,"Sar":260,"San":495,"ови":138,"Rač":75,"SI ":76,"Res":82,"Rev":59,"нов":79,"Rim":348,"Rib":92,"Ric":128,"ät ":150,"Ras":66,"Rav":91,"Raz":271,"Rde":102,"SG ":79,"Rec":63,"Red":122,"Rei":91,"Reg":444,"Ren":132,"Rek":120,"Rep":2306,"Rog":117,"Rob":196,"Roc":78,"Rod":85,"Rou":102,"Ros":172,"Ron":234,"Rom":237,"SS ":448,"SO ":89,"Vel":2994,"Ven":157,"ски":72,"Vas":75,"Van":123,"Val":340,"Var":221,"Vid":128,"Vic":103,"Vie":59,"Vir":205,"Vil":211,"Vik":75,"Vin":206,"Ver":276,"Ves":184,"Ukr":102,"Uni":842,"Ura":80,"Ust":217,"Upo":141,"Trž":77,"Ter":179,"The":362,"Tho":193,"Tih":67,"Tim":73,"Tis":131,"Tir":83,"To ":334,"Top":167,"Tor":193,"Tok":64,"Tol":157,"Tom":179,"Ton":82,"Tou":87,"Tru":58,"Trs":127,"Tro":210,"Trn":82,"Tri":282,"Trg":65,"Tre":271,"Tra":244,"Tur":330,"Tuk":62,"ši ":887,"šev":379,"šem":193,"šel":270,"šen":350,"šes":555,"šer":80,"šeg":118,"šek":215,"ša ":1145,"še ":1531,"šar":175,"šav":187,"šah":216,"šaj":99,"šal":222,"šan":614,"Šve":109,"Švi":128,"što":95,"štr":65,"šte":2122,"šti":888,"šta":268,"šuj":72,"štv":1659,"štu":262,"švi":77,"šve":96,"špa":358,"šov":61,"špo":639,"št ":64,"šu ":58,"ško":7029,"šlj":265,"šla":312,"вич":166,"šo ":156,"šić":67,"šni":492,"šnj":513,"šne":308,"šna":327,"šič":190,"šno":281,"šol":499,"šic":246,"šib":61,"šin":843,"šil":171,"šim":108,"šik":69,"ših":534,"šit":122,"šir":718,"šje":233,"šja":121,"šji":274,"ška":2789,"ški":6172,"ške":6766,"́н":79,"cLa":71,"bju":136,"bje":482,"bja":288,"áto":72,"biz":60,"bis":206,"bit":738,"biv":1271,"bio":487,"bir":767,"ász":140,"bil":9355,"bim":90,"bin":672,"bij":500,"bo ":1111,"blj":4814,"blo":131,"ble":384,"bli":5531,"bn ":78,"bla":906,"bod":537,"bok":127,"bol":2141,"boj":794,"bog":423,"boh":73,"bič":489,"bno":809,"bna":528,"bni":923,"bne":712,"bmo":523,"biš":76,"bon":136,"bom":159,"bor":1964,"áza":65,"bot":328,"bos":204,"bov":492,"bou":88,"áln":82,"be ":2097,"bam":109,"ban":872,"bak":214,"bal":951,"baj":107,"áko":72,"bah":107,"bac":170,"án ":124,"baz":156,"bav":207,"bat":235,"bas":207,"bar":1233,"ánt":85,"bdo":308,"áno":61,"ány":67,"bde":133,"azš":326,"ánd":61,"bda":111,"bi ":1760,"bej":129,"beh":88,"ár ":304,"bec":216,"ber":3398,"ben":1839,"bel":842,"bez":109,"bes":1297,"bet":155,"baú":258,"bho":160,"bia":67,"bib":116,"bic":125,"áro":114,"ári":73,"áci":80,"buš":62,"ca ":7059,"car":308,"cas":90,"cat":115,"can":153,"cam":193,"cal":177,"cah":276,"ce ":3980,"bri":1142,"bro":951,"brn":171,"bra":2116,"bre":1080,"bu ":321,"brs":91,"bru":767,"bsk":752,"bso":165,"bse":534,"bst":433,"boč":145,"bur":549,"bul":151,"bum":224,"buj":619,"bud":138,"buc":60,"bus":181,"bve":153,"by ":66,"bož":173,"aka":2233,"am ":1501,"ake":764,"akc":307,"aki":595,"ajk":170,"ajl":126,"aji":853,"ajo":3508,"ajp":438,"ajm":203,"ajn":1032,"ajs":1806,"ajt":95,"aju":1775,"ajv":1227,"al ":5680,"ajb":861,"aja":4631,"ajd":410,"ajc":106,"aje":1321,"ajh":414,"ail":145,"ain":656,"air":120,"ais":193,"ak ":1483,"ahk":1261,"ahl":62,"ahi":99,"ahu":67,"aht":170,"aho":1733,"aj ":3209,"agy":182,"aha":1141,"agl":148,"agm":59,"agi":325,"agr":2630,"agu":186,"agn":486,"ago":1560,"anu":2488,"anz":168,"ano":4496,"ann":617,"ant":2576,"ans":8949,"ane":3760,"ang":2192,"anh":58,"ani":12168,"anj":11003,"ank":1463,"ap ":82,"ana":5327,"anc":5066,"and":2328,"amu":229,"amm":99,"amo":2039,"amn":424,"amp":456,"ams":594,"amk":68,"ami":1934,"adž":1259,"ame":3673,"amb":620,"ama":1428,"ao ":196,"alv":164,"alu":459,"alt":357,"als":1671,"alp":219,"alo":3988,"aln":5663,"alm":581,"all":563,"alk":590,"alg":235,"ali":12519,"adš":352,"alj":2204,"alc":1638,"ald":278,"ale":2748,"alf":59,"Šam":81,"ala":5384,"alb":313,"an ":7080,"aks":331,"akr":499,"aku":1530,"akt":954,"ako":3028,"akn":93,"akl":239,"aba":528,"abe":1164,"abi":922,"abl":2808,"abn":285,"abo":950,"abr":232,"abs":510,"abu":83,"ae ":446,"aca":87,"ad ":1592,"ac ":892,"ab ":93,"afo":67,"afr":130,"aft":161,"afs":248,"aff":75,"afe":70,"afi":418,"ai ":215,"aga":832,"age":434,"ael":296,"ah ":2832,"afa":85,"ado":1208,"adr":500,"adl":177,"adk":221,"adn":2260,"adm":341,"adg":67,"adj":251,"adi":2931,"add":208,"adc":70,"ade":1675,"ag ":119,"adz":178,"ads":309,"adu":492,"aco":154,"ack":237,"aci":3927,"ach":405,"ace":463,"acc":103,"ada":12281,"adb":350,"af ":280,"act":73,"azn":554,"azm":458,"azp":368,"azo":464,"arš":186,"azi":1682,"azl":1367,"azk":65,"azv":987,"azu":362,"azr":436,"azt":209,"azs":228,"aze":313,"azg":172,"aza":617,"Špa":241,"azb":60,"azd":489,"avč":119,"azz":92,"az ":748,"ayl":61,"aye":106,"Šta":81,"Šte":144,"ba ":1670,"Štu":63,"at ":1858,"arh":471,"arg":219,"are":1974,"ard":2757,"arc":477,"arb":245,"ara":3352,"arp":61,"aro":2967,"arn":2439,"arm":683,"arl":640,"anç":98,"ark":1199,"arj":3449,"ajš":619,"ari":4347,"aru":188,"arv":516,"arr":314,"ars":3622,"art":3803,"au ":230,"asa":521,"ary":261,"akš":197,"asi":1326,"ash":118,"asc":90,"asb":1150,"ase":7736,"aso":654,"asn":756,"asp":358,"ask":223,"asm":118,"asl":1005,"ar ":4861,"apa":403,"Šen":181,"ape":643,"api":1573,"aph":75,"apn":71,"apl":264,"apo":1357,"app":73,"apr":1152,"aps":75,"apt":76,"apu":142,"as ":2234,"avc":158,"avb":92,"ava":3257,"ax ":66,"aux":63,"aut":447,"avs":1220,"avt":891,"avr":171,"Ško":256,"avo":1996,"avn":8963,"avk":259,"avl":3158,"avi":5083,"anš":152,"avj":154,"avg":550,"ave":2196,"Šma":126,"ay ":236,"awa":58,"avz":193,"avu":60,"arč":77,"awn":81,"anž":60,"av ":1266,"ata":1793,"asu":583,"ast":7399,"ass":309,"anč":577,"atm":280,"atn":621,"atk":2238,"atl":115,"atr":682,"ato":4303,"ate":7456,"alš":59,"ati":8392,"atj":90,"ath":285,"att":232,"ats":603,"atu":979,"aul":252,"aum":70,"aun":59,"aur":182,"aus":178,"aud":100,"auk":85,"ος":138,"ος ":138,"ς ":264,"ν ":77,"Zve":255,"α ":118,"еви":63,"ий ":84,"ич ":167,"až ":103,"jeg":1394,"jej":1047,"jed":501,"jec":86,"jep":94,"jer":828,"jek":653,"jel":447,"jem":3606,"jen":7756,"jez":1544,"jes":258,"jet":1512,"jev":3484,"jač":189,"ji ":9921,"aža":200,"ažd":84,"aže":590,"ažj":94,"aži":70,"ažn":142,"jhe":109,"jhn":247,"jad":100,"jat":493,"jas":144,"jav":1972,"jap":253,"jar":116,"jal":4024,"jak":772,"jan":7670,"jam":569,"jah":261,"jaj":1388,"jaz":69,"jbo":832,"jce":82,"je ":90645,"izš":397,"jci":81,"jde":159,"jda":105,"jna":637,"ješ":262,"jmo":79,"jni":1155,"jne":1049,"jič":83,"jno":1777,"eč ":1193,"jol":65,"jon":462,"jos":83,"jor":101,"jpo":317,"jpr":102,"ск":123,"jiv":675,"jit":167,"jis":201,"jim":790,"jin":1168,"bšk":61,"jik":186,"jil":336,"jaš":1441,"jij":72,"jig":518,"jih":2539,"jic":888,"те":99,"ječ":100,"ст":65,"ул":59,"jn ":65,"jko":128,"jka":118,"jo ":14224,"jma":181,"jlo":62,"itn":496,"itm":98,"itl":62,"itk":153,"itr":810,"ito":1788,"itv":1047,"itu":620,"itt":146,"its":553,"itz":130,"ity":105,"isk":1191,"ism":148,"isl":637,"iso":1260,"isn":596,"isp":122,"iss":283,"inč":105,"isu":398,"ist":8943,"isz":119,"iv ":665,"ita":4085,"itd":78,"ite":3294,"ith":151,"iti":3232,"itj":163,"ivo":647,"ivn":1608,"ivu":289,"inž":250,"ius":336,"ium":162,"iva":2833,"ix ":146,"inš":354,"ivi":1461,"ivj":69,"ivk":187,"ivl":505,"ive":1813,"ipr":269,"ipo":673,"ipp":99,"ipu":59,"ips":60,"ipt":181,"ipi":261,"ipl":628,"is ":1968,"ion":2321,"iop":95,"ior":98,"ios":95,"iot":214,"iog":97,"iok":133,"iol":587,"iom":79,"ipa":1086,"ipe":338,"iov":126,"ir ":1030,"iru":579,"irs":381,"irt":62,"iro":1658,"irn":1433,"irk":3666,"iri":1414,"irj":549,"isi":572,"ish":146,"ise":765,"isc":429,"isa":2843,"iu ":69,"iqu":102,"ilč":111,"ire":909,"irg":67,"ira":3405,"irc":132,"it ":737,"ünc":71,"ivč":108,"ür ":68,"ivš":143,"ja ":24669,"iz ":4501,"izu":488,"izv":2556,"izr":1033,"izs":209,"izp":299,"izo":951,"izn":244,"izm":1841,"izl":440,"izk":366,"izj":155,"irš":113,"izi":2488,"izh":387,"izg":268,"ize":500,"izd":905,"izb":341,"iza":2030,"kaš":110,"kih":6752,"kij":103,"kim":1029,"kil":157,"kie":91,"kiv":97,"kin":454,"kip":232,"kir":103,"kis":399,"kit":420,"kaž":123,"kje":723,"km ":698,"ki ":46414,"ked":200,"keg":3700,"kej":985,"kem":8923,"kel":218,"ken":160,"kes":145,"ker":562,"ket":264,"kev":340,"key":87,"kač":293,"ke ":15947,"kci":1307,"kda":435,"kra":4811,"krb":350,"kre":318,"kt ":433,"ksa":263,"kse":141,"ku ":1833,"kro":1818,"krv":182,"kri":2242,"koz":528,"kov":6012,"km²":98,"kot":4172,"kos":954,"kor":1260,"kop":980,"koo":149,"kon":3042,"kom":2571,"kol":3036,"kok":1914,"koj":101,"koh":100,"kog":105,"kof":1624,"kod":500,"ks ":201,"kmu":212,"kme":268,"kmo":345,"koc":91,"kob":135,"kne":116,"kni":115,"knj":1121,"klu":447,"ko ":24162,"kma":99,"kle":646,"kla":1462,"klo":710,"kli":1093,"klj":712,"jvo":146,"jut":62,"jus":126,"jul":555,"jun":698,"jur":257,"jve":859,"jvi":346,"joč":433,"jub":2164,"juj":406,"jug":1198,"jud":1091,"jsk":8627,"jst":1435,"ečj":1199,"ečk":404,"eči":887,"ečn":355,"ečo":63,"eču":68,"ju ":6445,"jse":265,"jiš":131,"eča":390,"eče":1036,"již":650,"kaz":528,"kav":355,"kat":5637,"für":83,"kar":3117,"kas":410,"kap":270,"kan":1808,"kal":3239,"kam":632,"kaj":559,"kak":434,"kah":366,"kai":81,"kad":433,"kac":404,"juž":772,"ka ":23634,"juč":601,"jze":62," Ga":900," Ge":881," I ":207," Fo":2042," Fu":219," Fr":2139," Fi":677," Fl":224," Ha":1561," He":1083," Cô":91," Gy":129," J ":84," Go":1655," Gr":1898," Gu":360," Gv":83," Gi":550," Gl":510," Ig":179," Id":108," Ib":77," K ":116," Hu":329," Hr":4060," Ho":699,"ha ":480," Hi":543," Ji":67," Je":1541," L ":110," Ja":1800," Iz":882," Iv":415," Ir":276," Is":1032," It":703," Im":512," In":1618," Il":228,"ham":322,"han":759," M ":155,"hai":79," Ka":2499,"haj":1477,"hal":392," Ke":540,"hau":83," Ki":619,"har":950,"has":60,"hat":80," Jo":1288," Ju":1997,"haf":95,"hae":137,"hab":68,"had":64," N ":83," La":1551," Le":3180," Li":1291," Lj":1723," Kl":443," Kn":255," Ko":3810," Kr":2271," Kv":109," Ku":508," Mc":133," Ma":6006," O ":269," Ml":210," Mi":1860," Dž":96," Me":2209,"he ":889," Lo":1511," Ly":76," Lu":757," Já":138," Ne":1590,"а ":247," P ":279," Na":2970," Nj":398," Ni":827," Mr":101," Mo":2454," Mu":575,"hek":94,"hel":368,"hei":125,"heb":110," A ":314,"het":73,"hes":152,"her":719,"heo":145,"hen":330,"hem":187,"hi ":171," B ":212," C ":442," Ap":398," Am":984," An":1817," Ak":264," Al":2235," Ai":256," Aj":81," Ag":178," Ah":85," Af":338," Ac":171," Ad":366," Ab":561," Ba":1832," D ":134," Az":412," Av":1041," Au":488," At":316," As":363," Ar":1291," Be":2192,"hie":110,"hid":239,"hic":86," Bi":1002,"hia":98,"hip":210,"hio":70," Bj":272,"hin":407,"him":147," Bl":437," Bo":1900,"hil":201,"hik":78,"hij":109," Br":2242," Bu":794,"his":178,"hit":605,"hir":188," E ":92," Ca":1521," Ce":1243," Ci":464," Ch":1220," Cl":266," Cr":346," Co":1629," Cu":189," Cv":64," Cy":61," F ":123," Da":1039," Di":915," De":1297," Dr":1054,"hkr":181," Do":1606,"hko":1232," Du":855," Dv":142," Ea":63,"hn ":214," Ed":330," G ":65,"hla":193," El":398," Ek":172," Ei":122," Eg":129," Et":115," Es":219," Er":320," Ep":63," En":408," Em":164," Eu":245," Ev":1000," Fe":726,"ho ":80,"hma":181," Fa":830," H ":159,"gma":95,"go ":1697,"glo":304," Z ":182,"gle":2479,"gli":810,"glj":228,"gla":2978," Wo":190," Wi":722," Wh":62," We":269," Wa":403,"й ":140," Vz":150,"gog":353," Zr":80," Zu":99,"god":1736," Zv":306,"gob":81," Vé":82," Zm":60," Zl":152," Zo":102," Zn":141," Zd":593," Ze":880," Zg":337,"gič":79,"gno":136," Zi":327,"gni":103,"gnj":147," Za":1745,"gne":563,"gna":300," Yo":229,"gs ":62,"о ":66,"goz":506,"goj":282,"н ":123,"gom":421,"gol":311,"gon":470,"gos":3146,"gor":2864,"got":348,"gov":2780,"gu ":806," a ":458,"р ":62,"gro":564,"grm":63,"gru":125,"bču":59,"grs":221,"gra":6878,"grb":61,"bči":4969,"gri":272,"gre":942," R ":122,"в ":115," Oz":78," Os":737,"gto":98," Ot":263," Or":643,"goč":401," Op":320," Po":5507," Lé":59," Pl":785," Pi":1026,"gul":122," Ph":190,"gua":77," Pe":1565,"gub":149," Pa":2188,"gue":188," Ny":85," Nu":95," No":1550," Ol":321," Ok":257," On":182," Om":107," Oh":58," Og":200," Od":1566," Oc":80," Ob":907," Ra":1525," Mü":85," T ":114," Qu":210,"új ":254,"goš":89," Ro":1658," Re":3734," Rd":107," Ri":916," Rh":128," S ":293," Pr":3986,"gur":145,"gus":676," Pt":177," Pu":321,"gun":145," Má":81," Sz":701," Sy":146," Sw":66," Sv":2526," Su":652," St":3290," Ta":1068," V ":1176,"gya":99," Th":689," Ti":702," Te":1006," Tr":1582,"gyk":115," To":1461," Ru":970," Sa":2415," U ":88,"е ":69," Sh":246," Si":1155," Sc":584," Se":2090," So":1610," Sp":1368," Sr":838," Sk":482," Sl":6566," Sm":233," Sn":82," Va":911,"и ":110," X ":144," Ve":3898," Vi":1616," Vl":200," Vo":997," Vu":138," Vr":488," Vs":192," Tu":669," Ty":77,"grš":1077," Uk":147," Ul":103," Um":80," Un":907," Up":188," Ur":251," Us":272," ja":3029," l ":65,"iam":240,"ial":1334," iz":12947,"ian":961," ji":1172,"ias":76,"iar":121," je":57692,"iat":484," io":104," ip":64," im":7503," in":30104," il":218,"ic ":1048,"iac":130," is":755," it":1465,"iag":159," ir":117,"ibl":600," fü":65,"ibi":274," ka":7604,"ibo":889," m ":545,"ibn":134,"ibr":170," kj":700," ki":23502,"ibu":174," ke":816," jo":1895,"id ":690,"iba":621,"ibe":314," ju":3473," ha":275," he":621," gi":540," gl":3153," gr":2915," go":2718,"ia ":1100," gu":114," k ":178," ib":60," id":224," ig":1379,"ib ":92," hi":1051," hk":181," hl":97," ho":1169," hr":955," hu":193,"iet":196,"ieu":64," nj":2131," ni":1612,"iel":266," ne":7311,"ien":545," na":42601,"ier":585,"ies":172,"ied":168,"ieg":71,"и́":65," mu":575," mr":220,"ig ":209," mo":4823," mn":787," mm":80," ok":4185," ol":657,"ifu":62," om":674," on":135," og":566," oh":232,"ifo":187," oc":331," od":6669," of":580," ob":12756,"ifr":67,"ife":161,"ifi":406,"ih ":22740," nu":342," no":2512," np":247,"ifa":63," le":11324," lj":1128,"icr":71,"ics":75,"ict":217," li":1970,"icu":137,"icn":80," n ":309,"ico":974,"ick":188," la":3283," kv":554," ku":716,"ici":3252,"ich":665,"ice":3027," kn":1159,"ie ":718," km":1016,"ica":6322," kl":1067," kr":5141," ko":11511," me":12878," dž":64,"idu":106," mi":1837,"ids":75," ml":614,"я ":69,"idr":346," o ":887,"ido":732," ma":6202,"idn":485," lu":270,"idi":434,"idg":71,"ide":1356,"ida":711," lo":1132," af":94," ag":154,"aša":593," ab":327," ac":59," ad":284,"aše":251," am":1331,"ašk":6766," an":2354,"aši":200," ap":760,"ašn":368,"iin":107," ak":715,"iim":2227," al":6858," av":1858," au":60," ar":1254," at":457,"ašt":146," as":1847," d ":320," ba":1756," az":58,"il ":5175,"ija":12085," bi":10040,"ije":12707," be":1503,"iji":5965," bo":2432," bl":915,"ijo":6465,"ijs":4363," bu":186,"iju":165," br":1850," ca":173," e ":85,"im ":3856,"ika":6461,"ige":666,"iga":1104,"aš ":78,"igl":183,"igh":214,"igi":601,"igu":193,"igr":1537,"igo":249,"ign":280,"ij ":2784,"т ":70," b ":94,"ihe":100,"iha":530,"ihi":87,"iho":729,"ik ":9136,"у ":103," c ":70," er":65,"imo":1733,"imn":223," et":360," es":185," en":4762,"ims":2333," em":109," ep":251,"imp":926,"imf":91," el":1413,"ime":9448," ek":723,"imk":790,"imi":2451,"ip ":337," fe":1007,"inc":748,"ind":925,"ina":10483," fa":1370,"imu":325,"а́":109," ev":586," fu":480,"inn":113," fr":3146,"ino":5575,"ašč":462," fo":718,"int":1271,"ins":4445,"inf":316," fl":126,"ine":5190,"inh":91,"ing":1310,"inj":1193," fi":2355,"ini":4151,"ink":465," ge":1321," ga":4003,"iod":295,"inu":433," i ":115,"inv":62,"inz":59," cl":64,"iko":4804," cm":100,"ikl":979," co":254,"iki":3287," ce":2735," ch":72,"ike":4355," ci":552,"ila":7057,"ilb":66," f ":61,"in ":31978," da":5215,"ikv":58," cv":150,"ikt":162,"iku":799,"ikr":245,"iks":94," do":7621," dn":241,"ilo":4202,"ill":1121,"ilk":457," dr":7206,"iln":1875,"ilm":587,"ilh":107," de":11720,"ilj":766,"ili":2960,"ild":76,"ilc":146," di":5516,"ile":1135,"ima":2334,"imb":293,"ч ":172," ed":1001,"io ":829," dv":2054," du":1905,"ils":366,"ilt":85,"ilu":372,"ilv":125,"ль":58," vč":201," zm":655," zl":487,"ла":90," zo":151," zn":3542," zu":280,"ле":83," uč":541," zr":511,"ли":66," zv":1609,"hok":781,"hol":497,"hom":250,"hon":167," za":14763,"ко":128,"hos":66," zd":1135,"hot":761," ze":976," zb":804,"hov":2393,"ку":58,"hop":61," zi":186,"hor":342," zg":1780,"ка":91,"ки":115,"hod":3942,"hni":523,"hno":289,"hnu":58,"hna":106,"hne":92," z ":4835,"ин":98,"ик":81,"ий":90," ož":121,"ич":191,"ри":87,"ро":86,"ра":120,"ре":58,"htt":71,"hto":63,"htn":72,"hte":153," už":81,"ор":104,"ол":82,"ов":288,"hu ":190,"hrv":430,"но":113,"hro":110,"hre":73,"ни":87,"hri":399,"ht ":188,"на":128,"hra":653,"hiš":212," ru":1131," u ":188," sa":1902," se":18649," sc":148," si":3326," sh":137," sn":739," sm":996," sl":6094," sk":4932," sr":1993," sp":10659," so":10507,"ви":220," t ":95," ra":7736," re":7014," rd":178," ri":2298," ro":2318," pt":209," pu":582," pr":31964," ps":336," s ":4586," px":59,"hy ":96,"ва":103,"ад":75," os":4746," ot":1470,"hum":246," ov":185,"hun":84,"hus":107," op":2076," or":2176,"hur":142,"ан":143,"ак":68," oz":3235," pe":3830," pa":8196,"ар":96," pl":2669," po":40217," pi":2589," y ":101," vz":1551," x ":129," va":2039," ve":6450," uv":1301," vn":129," vo":6456," vp":464," vr":2767," vs":3256," vi":2299," vk":283," vl":673," vm":85," ud":201,"ет":79,"ер":77,"ен":63," tv":236," tu":5865," us":2479," ut":152," ur":2875," up":8796," um":682," un":617," uk":539," ul":238," ug":179," ta":4305," v ":29906," st":7117," sv":3870,"о́":61," su":765,"ев":143," oč":210," tr":3796," tl":193," to":2461," th":371," ti":1200," tk":120," te":8405,"fi ":65,"ffe":78,"ffi":69,"fes":342,"fer":516,"fed":154,"feb":634,"fen":145,"fek":403,"fel":89," Ča":315," Či":126," Če":525,"faz":112,"fat":64,"far":143,"fan":669,"fak":920,"fal":122,"ff ":62,"fe ":105," Đu":65,"fa ":189,"aúj":298," ču":115," čr":759,"eyr":71," Ču":69,"exa":83,"ez ":873," Čr":316," če":1273," čl":1304," či":437," ča":1729,"ezu":507,"eza":1493,"ezd":639,"ezn":2303,"ezo":3022,"eví":88,"euč":161,"eze":1111,"erš":61,"ezi":1770,"eta":9837,"ete":2260,"etd":101,"etj":1541,"eti":2887,"elš":67,"eth":104,"etn":3058,"etl":581,"etk":435,"esp":164,"esn":1865,"eso":719,"est":9497,"esu":218,"enč":254,"esr":69,"ess":341,"ev ":4515,"emš":2022,"eto":5019,"etr":1803,"ets":629,"ett":335,"etu":861,"etv":352,"ew ":247,"eve":3581,"evd":110,"evc":189,"eva":3659,"evo":978,"evn":851,"evl":113,"evk":344,"evj":120,"enš":165,"evi":3431,"euv":65,"eut":92,"eur":130,"eus":174,"ex ":78,"evu":106,"evr":568,"evs":223,"evt":159,"ey ":565,"evz":91,"epe":218,"epi":485,"eph":184,"er ":9909,"epa":2146,"eos":79,"eor":822,"eom":284,"eol":485,"eop":91,"eon":196,"es ":3592,"ept":897,"epu":2717,"epl":115,"epn":163,"elé":65,"epp":110,"epo":984,"epr":1096,"erk":1130,"erl":288,"ejš":1389,"eri":6828,"erj":1569,"erg":1075,"erh":74,"ere":2880,"erf":64,"erc":387,"erd":237,"era":3623,"erb":290,"et ":2922,"esj":60,"esk":362,"esl":322,"esm":246,"esi":510,"esc":198,"ese":3032,"eu ":71,"esa":1428,"erz":1265,"ery":65,"erv":377,"eru":570,"emč":403,"err":420,"ert":841,"ers":1249,"ern":2752,"erm":750,"erp":166,"ero":2923,"eki":619,"ekl":439,"ekm":687,"eko":1561,"ekr":138,"eks":912,"ekt":2647,"eku":579,"ekv":164,"en ":7706,"elb":111,"ela":2552,"eld":165,"elc":327,"elf":63,"ele":5343,"eli":7355,"elj":12080,"elg":274,"ehé":68,"elm":113,"eln":478,"elk":390,"ell":946,"elo":6099,"elu":1481,"els":386,"elt":135,"eo ":194,"emb":2938,"ema":2409,"edž":154,"eme":2514,"emd":94,"eml":1047,"emn":341,"emo":1461,"emi":2775,"emu":847,"emp":655,"ems":675,"ep ":100,"ene":4626,"enh":70,"eng":267,"enb":171,"ena":6583,"end":853,"enc":1426,"eno":7193,"enn":350,"enk":486,"enl":165,"eni":11655,"enj":3527,"enu":1052,"ens":9125,"ent":4611,"enr":131,"enz":343,"eog":377,"eod":241,"eob":101,"egl":515,"ego":1324,"egn":116,"ege":392,"egi":3464,"ej ":1349,"eha":369,"egr":207,"egu":231,"egy":59,"ehn":551,"ehr":149,"eho":984,"ehi":163,"ek ":3893,"eic":68,"eis":179,"eir":92,"eim":143,"eil":148,"ein":591,"eid":153,"eja":1145,"el ":3464,"eiz":90,"eit":89,"ejs":725,"ejo":1467,"ejn":247,"ebš":60,"eji":700,"eje":1829,"ekd":397,"eke":552,"ekc":78,"eka":3433,"em ":19835,"eju":371,"gl ":122,"git":143,"gis":130,"gir":72,"gim":692,"gij":4610,"gik":62,"gip":136,"gin":441,"gio":182,"gie":59,"gib":460,"gih":484,"gia":72,"ght":197,"gha":74,"ggi":66,"gač":142,"gi ":1057,"gen":1655,"geo":621,"get":148,"ger":535,"ges":132,"gh ":98,"geb":124,"geg":139,"gem":133,"gel":572,"gej":96,"gda":62,"ge ":1547,"gac":68,"gad":460,"gah":82,"gas":134,"gar":537,"gat":408,"gaj":272,"gam":198,"gal":701,"gan":2184,"ga ":19235,"fur":60,"fte":66,"fun":414,"ft ":152,"ačb":149,"ača":451,"fra":3096,"fre":206,"ače":1435,"ačj":77,"ačk":163,"fri":439,"ači":1402,"fsk":383,"fro":120,"ačn":463,"ačr":174,"aču":1108,"fov":120,"for":1149,"fos":82,"fot":167,"fon":272,"fol":119,"ač ":265,"fič":180,"fla":71,"fic":219,"fie":63,"fig":93,"fij":889,"fil":1101,"fik":288,"fin":542,"fit":105,"fiz":1064,"fja":62,"db ":97,"da ":14922,"dbe":281,"dba":322,"dbi":164,"dbo":232,"de ":3505,"dac":99,"dal":2088,"daj":1290,"dag":370,"dah":122,"dae":198,"dat":2652,"dar":1727,"dan":3799,"dam":251,"dav":343,"dda":258,"dde":307,"dce":69,"cul":97,"cto":99,"cti":246," Îl":184,"cy ":79,"cve":165,"cus":130,"cur":63,"cks":59,"cko":94,"cla":60,"cle":108,"co ":1155,"con":195,"col":193,"com":102,"cor":143,"cos":2856,"cot":60,"cou":95,"cs ":326,"cqu":61,"cro":134,"cu ":453,"cci":147,"cca":61,"cea":385,"ch ":526,"cev":1776,"cer":935,"ces":1318,"cet":77,"cen":988,"cep":196,"cej":81,"cem":898,"cel":1340,"ceg":119,"ced":151,"ci ":2670,"cha":428,"chw":61,"chu":146,"cia":974,"ck ":555,"cie":148,"cid":94,"che":837,"chl":72,"chi":339,"cho":135,"chm":179,"chn":116,"cht":110,"civ":109,"cij":6609,"cik":850,"cil":290,"cim":128,"cif":150,"cih":145,"cir":298,"cis":476,"cit":587,"cin":475,"cio":727,"cip":225,"cm ":87,"cke":249,"cka":102,"ed ":7632,"eba":410,"ebe":1030,"ebi":1286,"ebl":102,"ebn":705,"ebo":328,"ebr":1109,"ebu":659,"ec ":3986,"eac":70,"eag":60,"eae":122,"ead":77,"eak":245,"ean":473,"eal":308,"ear":308,"eas":76,"eap":68,"eat":181,"eau":166,"eb ":1452,"ea ":232,"efi":249,"efo":206,"efa":145,"efe":483,"ei ":95,"ega":14282,"een":162,"eh ":1490,"eer":68,"eev":58,"edk":372,"edl":222,"edm":691,"edn":4723,"edh":90,"edi":2962,"edj":165,"ede":4374,"ône":77,"eda":2590,"edb":168,"eg ":627,"edt":128,"eds":1765,"edv":775,"edu":552,"edp":144,"edo":1120,"edr":537,"eck":137,"ech":121,"eci":430,"ece":981,"eca":75,"ee ":129,"ef ":166,"ecu":64,"ect":136,"eco":75,"dož":69,"dvs":488,"dwa":102,"dy ":102,"dvi":1792,"dve":1124,"dvo":628,"dur":65,"dus":252,"dva":885,"duš":167,"drž":2000,"dzo":143,"dzi":74,"dze":62,"dor":425,"dop":136,"don":666,"dom":1025,"dol":2617,"dok":396,"doz":167,"dow":104,"dov":2766,"dot":187,"dos":834,"dr ":66,"dpi":182,"dpr":535,"dpo":228,"ds ":150,"diš":1126,"dmi":400,"dmo":356,"dna":1989,"dne":2475,"dni":3893,"dež":1743,"dnj":2661,"dno":3481,"dič":320,"dob":2688,"doc":83,"dod":289,"dog":505,"dst":1636,"dso":71,"dte":147,"dun":61,"duj":151,"dul":104,"duk":196,"duh":1475,"duc":137,"dri":615,"dra":2046,"dt ":61,"dre":1648,"du ":2258,"dro":1558,"drs":225,"dru":5537,"dsk":1257,"dse":412,"dge":82,"dgo":243,"dic":712,"did":62,"dia":709,"dho":97,"ôte":98,"der":1069,"des":1368,"det":180,"dev":559,"dez":147,"deb":135,"dea":152,"ded":131,"dec":783,"def":191,"dej":655,"del":8200,"dek":748,"den":2287,"dem":894,"dep":1917,"deo":172,"di ":7874,"dle":116,"dla":418,"dko":292,"dkr":290,"dki":137,"dme":496,"dma":184,"do ":2879,"dlo":208,"dlj":134,"dli":196,"dja":348,"dje":967,"div":552,"diu":58,"diz":86,"dim":344,"din":3211,"dio":320,"dip":467,"dir":3392,"dis":608,"dit":557,"die":123,"dif":117,"dig":180,"dih":219,"dij":1635,"dik":285,"dil":1003,"dka":143,"dke":226,"dju":78,"deč":365,"rgy":70,"rgu":403,"rhe":114,"rha":142,"rhi":372,"rhu":89,"rhn":68,"rho":225,"iža":226,"rga":1747,"ri ":6205,"rgl":58,"iži":369,"rgi":606,"ižj":188,"iže":597,"rge":583,"rgo":351,"ižn":1012,"rgn":87,"ret":1927,"res":1802,"rev":1974,"reu":328,"rez":1267,"rh ":216,"rfi":77,"rfo":61,"rač":1077,"rdu":102,"rds":107,"rg ":550,"iž ":81,"reb":2051,"rea":620,"ree":132,"ref":492,"rec":915,"red":9699,"rei":333,"rej":1975,"reg":4478,"reh":767,"rem":2494,"ren":2516,"rek":2213,"rel":1062,"rer":139,"reo":180,"rep":1325,"rda":399,"rcu":159,"rdo":364,"rdn":359,"rdi":2193,"rde":564,"re ":2956,"rbu":279,"rbs":360,"rci":266,"rch":173,"rce":392,"rca":244,"raz":7036,"rd ":765,"rap":609,"rar":714,"ras":1404,"rat":4465,"rau":89,"rav":10605,"rbi":646,"rbo":438,"rba":318,"rbe":206,"rc ":65,"raj":3231,"rai":122,"rah":639,"rag":448,"ran":10813,"ram":1883,"ral":3846,"rak":1045,"rab":4754,"raf":1035,"rae":188,"rad":7153,"rac":1106,"rpu":173,"rpo":96,"rs ":406,"rpe":109,"rpa":103,"ror":143,"ros":2112,"rot":1454,"rom":2391,"ron":2361,"roo":117,"rop":1736,"roz":633,"rou":216,"rov":3453,"row":90,"rob":744,"roa":69,"rod":3371,"roc":719,"roj":1187,"roi":1061,"rol":591,"rok":1027,"rof":772,"roe":143,"rog":1831,"rno":2826,"jšč":126,"rič":1125,"rns":80,"rnu":76,"rna":2170,"rež":741,"rne":1804,"rnj":672,"rni":3001,"rmo":552,"rmu":1839,"ro ":1744,"rma":1564,"rme":357,"rmi":362,"reš":389,"rlo":204,"rlj":61,"rli":272,"rle":264,"rla":328,"rn ":268,"rkv":475,"rku":166,"rkt":89,"rks":59,"rkn":66,"nço":87,"rko":671,"rki":446,"rke":698,"rka":3906,"rm ":91,"reč":850,"rju":560,"rji":433,"rja":3706,"raž":658,"rje":3010,"riz":1024,"rl ":197,"rip":1323,"jšo":114,"rio":714,"rir":527,"rit":2886,"ris":2096,"riv":1041,"riu":103,"rih":1145,"rig":966,"rij":4080,"raš":557,"jši":1298,"rii":2351,"ril":2003,"rik":1886,"jšn":99,"rin":1694,"rim":3965,"jša":896,"ria":1305,"rib":1637,"ric":1600,"rid":1029,"rie":837,"jše":763,"rif":96,"rk ":603,"roš":477,"rož":1407,"ruj":117,"ruh":77,"rug":3066,"rud":146,"ruc":91,"rup":276,"run":235,"rum":444,"rul":77,"ruk":405,"ruz":173,"rus":1264,"rut":93,"rva":5491,"rvi":1217,"rve":3036,"rvo":519,"rvn":190,"ry ":643,"rsk":7463,"rsi":153,"rso":356,"rsa":151,"rse":525,"rta":677,"rst":3047,"rtm":1893,"rtn":868,"rto":742,"rte":567,"rth":222,"rti":1649,"rub":82,"rua":643,"rts":92,"roč":1581,"rtu":402,"rtv":101,"riš":1946,"rt ":931,"rro":107,"mči":401,"rri":145,"rre":246,"riž":633,"rra":237,"ru ":1442,"rry":141,"sab":104,"sac":80,"sad":165,"saj":415,"sak":747,"sal":910,"sam":1606,"sba":85,"sbe":912,"sbi":124,"san":1213,"sat":1223,"sas":75,"sar":861,"sav":254,"sa ":1996,"ruž":2445,"ón ":126,"ruš":505,"rze":647,"rza":241,"ryj":60,"rzo":76,"rzi":1047,"sha":61,"sho":59,"shr":72,"she":68,"shi":263,"si ":1301,"sje":270,"siv":155,"seč":117,"sid":220,"sic":368,"sia":89,"sk ":194,"sit":181,"sir":202,"sis":1373,"sip":188,"sin":800,"kšn":168,"sio":244,"sil":1296,"sim":507,"sij":849,"sik":175,"sih":502,"saš":156,"sif":95,"sig":135,"sbo":112,"sbu":112,"se ":9988,"sca":143,"sce":180,"sci":213,"sch":525,"sco":197,"sev":1928,"ser":946,"ses":2057,"set":1089,"sez":2688,"sh ":159,"sfe":159,"sfo":66,"sei":64,"seh":402,"seg":907,"sed":3642,"sec":285,"seb":3164,"sep":883,"sen":803,"sem":2072,"sel":7746,"sek":379,"sej":258,"spu":64,"spo":2044,"spr":1492,"spe":1038,"spl":748,"spi":220,"spa":7101,"sot":293,"sou":74,"sov":1364,"sol":487,"som":341,"son":987,"sop":193,"sor":556,"sos":106,"sod":1527,"sof":102,"sok":589,"soj":102,"soc":318,"sob":183,"su ":1066,"nčn":750,"nči":273,"nče":232,"sre":2496,"srb":412,"nča":248,"st ":5603,"ss ":196,"sli":984,"slo":6036,"slu":603,"sla":2779,"sle":851,"ski":17344,"skl":1236,"sko":15385,"skr":668,"sku":2764,"skv":191,"ska":8402,"ske":14199,"sič":233,"sno":1962,"sna":435,"sni":1800,"snj":102,"sež":377,"sne":1079,"smo":133,"smr":229,"smu":290,"so ":8342,"sma":396,"smi":413,"sme":609,"sz ":97,"sza":136,"sze":92,"szt":71,"sse":338,"ssa":253,"sso":262,"ssi":329,"ste":4516,"sta":14712,"std":76,"stm":122,"stn":2769,"sto":9875,"sti":10401,"stj":760,"stk":173,"stl":589,"stv":6827,"stu":585,"soč":125,"str":7508,"sub":140,"suh":69,"sul":130,"sum":64,"suj":230,"sup":155,"sun":61,"sur":243,"sve":2955,"svi":91,"svo":1687,"tai":89,"taj":1653,"tak":1296,"tal":5739,"taf":62,"tag":120,"tah":112,"tab":357,"tac":360,"tad":115,"td ":92,"taz":72,"tav":4146,"tat":2093,"tas":324,"tar":2532,"tap":65,"tan":6147,"tam":415,"tch":88,"te ":3456,"tde":172,"ta ":16983,"ovš":201," št":2696," šv":175," ši":477,"pa ":6188," šk":1366," šo":471," šp":977," ša":246," še":1284," Šv":259," Šu":75," Št":405," Šp":321," Šo":105," Šm":149," Šk":346," Ši":182," Še":283," Ša":280,"ovč":87," šč":159,"pci":101,"pe ":794,"par":3565,"pat":325,"pas":565,"pav":120,"paz":284,"pac":108,"pad":8267,"pah":117,"pak":187,"pal":492,"paj":410,"pap":279,"pan":5531,"phe":97,"pha":91,"pho":59,"phi":118,"pi ":445,"ph ":153,"pev":467,"pač":83,"pea":111,"pec":334,"ped":485,"pen":956,"pep":58,"per":2309,"pet":1059,"lás":63,"pes":1184,"peh":707,"pel":589,"pek":319,"pla":1692,"plj":349,"pli":1265,"ple":1082,"plo":1730,"piz":105,"peč":83,"phy":92,"pia":104,"pid":74,"pic":81,"pih":119,"pij":931,"pik":108,"pil":764,"pin":2417,"pio":83,"pir":404,"pis":3588,"pit":455,"poz":877,"pr ":382,"por":6507,"pop":763,"pov":2801,"pou":108,"pot":2623,"pos":4743,"poi":280,"poj":1082,"pog":1928,"pom":2891,"pon":1163,"pok":980,"pol":5783,"pob":187,"poe":84,"pod":14532,"ps ":70,"plé":235,"ppo":61,"ppe":190,"lén":278,"peš":381,"po ":5444,"pič":147,"pno":567,"pnj":141,"pež":186,"pni":1125,"pne":206,"pna":93,"pse":98,"psi":273,"psk":942,"pso":79,"ptu":77,"pub":2962,"pte":795,"pti":568,"pto":218,"poč":106,"pra":8108,"pt ":65,"piš":190,"prv":4499,"prs":213,"prt":260,"pru":88,"pu ":357,"pri":11604,"pre":12368,"pro":5665,"poš":266,"pož":71,"pur":71,"pus":357,"put":68,"pun":78,"pul":238,"px ":59,"puš":308,"már":283,"iš ":113,"iše":281,"iša":220,"išl":273,"išn":372,"iši":543,"išk":6388,"išj":568,"išt":1077," Ži":199," Že":372," Ža":131," Žu":1147,"išć":99,"išč":2416," ži":1623," žl":73," ža":202," že":1263," žu":4184,"mén":60,"lčn":96,"qua":66,"que":223,"qui":88,"ra ":5234,"rb ":124,"ežn":508,"ngo":291,"ežj":186,"ngi":146,"eži":1395,"ngl":1754,"ežk":163,"ngu":231,"ngr":157,"ngt":104,"ngs":96,"ni ":21439,"eže":1628,"nge":731,"ngh":66,"nga":200,"eža":393,"nha":154,"nj ":974,"nhe":59,"neh":90,"neg":6715,"nej":1610,"nei":60,"nel":292,"nek":1786,"nen":595,"nem":5832,"nep":765,"neo":239,"ner":2095,"net":1642,"nes":1470,"nev":638,"neu":164,"ndv":192,"ež ":616,"ng ":946,"nea":167,"neb":386,"nec":499,"ned":247,"nfo":286,"nfr":73,"nač":1875,"ney":179,"nez":634,"nfa":314,"nfe":121,"nco":3080,"nci":1963,"nck":74,"nce":1597,"nch":251,"nca":769,"ne ":16673,"nbu":104,"ndu":365,"ndr":735,"nds":176,"ndo":673,"ndi":1187,"nde":738,"nda":1053,"ncu":165,"nak":2105,"nal":3824,"nam":2274,"nan":3844,"nap":1555,"nar":4271,"nac":538,"nad":2224,"nag":2320,"nah":1010,"nai":103,"naj":4748,"nc ":424,"nab":409,"nbe":101,"nd ":921,"nav":1486,"nau":169,"nat":1633,"nas":9591,"naz":549,"na ":45810,"muč":308,"mož":442,"nyi":95,"nz ":124,"nož":611,"ny ":285,"nve":127,"nuk":69,"num":110,"nun":197,"nuj":664,"nus":215,"nut":274,"nua":1914,"nud":60,"ntv":66,"nto":1165,"ntn":394,"ntu":151,"nts":282,"noč":122,"ntr":823,"nti":1732,"nth":129,"ntj":84,"nta":1740,"nte":2274,"nsp":149,"nso":136,"nst":4879,"nsf":59,"nse":185,"nsi":206,"nsk":18646,"nsc":84,"nsa":368,"nu ":1386,"iču":99,"ičn":8484,"ičk":636,"njš":744,"iči":660,"nri":146,"niž":206,"iče":649,"iča":1089,"nt ":1816,"niš":3134,"npr":251,"ns ":498,"noc":81,"nod":149,"noa":98,"nob":153,"nog":856,"nof":95,"nok":237,"nol":557,"noi":86,"noj":94,"noo":307,"nop":217,"nom":2492,"non":367,"not":1702,"nos":5345,"nor":656,"nov":7837,"noz":157,"ič ":1910,"nne":487,"než":185,"nna":175,"nić":78,"nno":127,"nni":311,"nič":1334,"nma":70,"neš":91,"ići":278,"nlj":152,"nn ":230,"no ":28820,"nke":357,"nki":201,"nm ":145,"nkc":434,"nka":820,"nku":97,"nko":712,"nkt":84,"nkr":119,"nji":3942,"njk":62,"nje":10740,"nja":5163,"ić ":428,"nju":1491,"neč":59,"njs":1007,"njo":572,"nij":12636,"naš":856,"nih":7675,"nig":117,"nif":78,"nie":143,"nid":118,"nic":4360,"nia":191,"nk ":305,"niz":1781,"niv":1064,"nis":1438,"nit":923,"nir":658,"nio":254,"nim":2568,"nin":1324,"nik":9988,"nil":489,"obč":4860,"ogr":2005,"ogu":149,"ogi":1571,"ogl":820,"ogo":3723,"ogn":253,"oga":915,"oge":466,"ohr":268,"ohl":58,"ohi":154,"oho":271,"ohn":289,"oha":244,"ohe":84,"oj ":853,"ois":217,"oir":158,"oit":129,"oin":114,"oim":560,"oid":870,"ok ":1437,"ojz":107,"ojv":145,"oju":183,"ojs":1259,"ojo":151,"ojn":3019,"ojm":105,"oji":1325,"oje":1842,"oja":2676,"ol ":641,"oiz":309,"oce":920,"och":145,"oci":1214,"ock":526,"oco":76,"obs":723,"obv":173,"obu":288,"oca":63,"odg":241,"ode":2090,"odk":490,"odl":499,"odi":2311,"odj":811,"odo":2908,"odp":861,"odm":304,"odn":5885,"ods":542,"odt":73,"odr":2425,"of ":1863,"odd":348,"odc":60,"odb":552,"oda":3554,"oel":114,"oen":93,"odz":124,"odv":669,"odu":1390,"og ":1603,"ofi":975,"ofj":119,"ofs":134,"oft":92,"ofo":143,"oh ":61,"oev":59,"off":75,"ofe":287,"ofa":151,"oa ":64,"ob ":1309,"oc ":81,"oam":62,"oak":62,"oba":1124,"od ":12681,"oar":69,"obo":1557,"obr":1689,"obl":2408,"obn":1165,"obm":522,"obh":168,"obj":732,"obi":1733,"obd":521,"obe":1617,"nza":121,"nze":101,"nzi":167,"nzo":136,"nzu":292,"oz ":588,"ows":139,"own":81,"ozv":174,"ozm":90,"ozn":2233,"ozl":77,"ouč":124,"ozo":660,"ozd":363,"oze":956,"ozj":60,"orš":73,"ozi":1844,"oza":922,"otu":189,"oud":99,"ouc":89,"ow ":91,"otl":190,"otj":79,"oti":1413,"oth":99,"ote":2905,"ott":253,"ots":334,"otr":1161,"oto":3788,"otn":1790,"ost":12934,"osu":137,"osv":805,"ota":1246,"ov ":9875,"osi":707,"osk":3606,"ose":3687,"osf":145,"osp":608,"oss":155,"onč":622,"osr":908,"osm":266,"osl":2901,"oso":665,"osn":920,"ovz":1051,"owe":104,"ovj":805,"ovi":6438,"ovn":6797,"ovl":1128,"ovk":184,"ovr":972,"ovp":60,"ovo":2629,"ovs":1169,"ova":8912,"ovc":416,"ove":14862,"olž":454,"oug":85,"oui":145,"oul":129,"oun":202,"ous":295,"our":376,"out":124,"opn":423,"opo":1059,"opi":1707,"opk":100,"opl":563,"ope":1736,"oph":167,"opa":1127,"os ":1104,"opu":601,"opr":1200,"opt":284,"ops":761,"ook":176,"ood":98,"or ":2915,"oot":78,"oos":261,"oor":234,"ork":374,"orl":98,"orm":2841,"orn":2246,"oro":2344,"orp":318,"orr":124,"orc":198,"ord":1013,"ore":2187,"orf":214,"org":1762,"ori":3690,"orj":1749,"ou ":161,"osa":1004,"osc":65,"ort":1525,"ors":2919,"orv":191,"oru":646,"orz":678,"ory":80,"m² ":116,"ot ":4531,"orb":248,"ora":6836,"olč":72,"ola":1198,"old":256,"olc":287,"on ":4034,"olj":3299,"oli":8521,"oll":233,"olk":1067,"olf":200,"ole":2539,"olh":61,"olg":662,"ols":682,"olt":165,"olm":216,"oln":1077,"olo":4662,"olp":199,"olz":68,"olu":478,"okc":353,"oka":3564,"om ":4672,"oki":527,"oke":1295,"okr":2091,"oks":347,"oko":2939,"okl":420,"okv":386,"okt":717,"oku":681,"ona":3944,"ond":566,"onc":719,"onf":152,"one":1109,"ong":354,"onj":581,"oni":3533,"onk":153,"onn":242,"ono":2372,"ons":2332,"ont":1027,"onu":380,"onv":124,"ony":108,"onz":388,"oma":3776,"ome":4659,"omb":385,"omi":1226,"omm":146,"oml":116,"omp":471,"omn":379,"omo":1779,"omt":60,"omu":612,"omr":152,"oms":494,"op ":353,"la ":14406,"kuž":64,"ína":99,"ín ":66,"ílo":74,"kuš":168,"le ":7709,"lce":1290,"lca":262,"lci":518,"lcs":234,"lf ":143,"őr ":86,"lde":127,"lda":104,"ldo":98,"ldi":83,"lab":207,"lac":514,"lad":2741,"lah":1475,"lag":668,"laj":371,"lai":176,"lal":158,"lak":556,"lan":4164,"lam":493,"lap":158,"lao":145,"lar":855,"lat":1921,"las":3314,"lau":130,"lav":3720,"lay":77,"laz":215,"lba":139,"ld ":368,"lbe":231,"lbi":66,"lbo":79,"lbu":240,"kvi":678,"kve":490,"kva":1108,"kus":268,"kur":144,"kup":2763,"kun":201,"kum":211,"kul":2114,"kuj":269,"koš":266,"ky ":76,"kta":268,"kte":244,"ksp":155,"kst":183,"ksi":550,"kso":225,"ksn":135,"kuh":70,"ktr":1174,"koč":387,"ktu":785,"kti":841,"ktn":114,"kto":1394,"krš":290,"íja":164,"kož":132,"lpo":59,"lps":155,"lpe":443,"lpi":97,"lph":69,"ls ":116,"lol":70,"lok":331,"lon":766,"lom":1330,"lop":499,"lor":376,"lod":293,"loc":74,"loh":68,"log":2821,"loj":149,"lpa":140,"los":423,"lot":1148,"lou":85,"lov":16172,"loz":578,"lno":1801,"lić":59,"lnj":113,"lni":3993,"lež":843,"lne":1558,"lob":711,"lič":1905,"lmo":111,"lmi":113,"leš":1366,"lme":146,"lma":647,"lp ":83,"lna":1726,"lmu":66,"hér":83,"lms":222,"lti":179,"lto":145,"ltr":80,"loč":1559,"lts":67,"ltu":477,"luc":178,"lub":512,"lug":80,"lue":65,"lsk":2568,"lso":81,"lst":393,"lta":305,"lte":1274,"ljš":529,"liž":879,"lu ":1284,"liš":2924,"ía ":66,"lt ":201,"lhe":70,"lj ":3087,"lha":78,"lgo":251,"lge":200,"lgi":319,"li ":11443,"lga":376,"lfr":59,"lač":361,"hât":62,"lfo":59,"lfi":89,"lfa":90,"ház":65,"lez":1147,"ley":245,"lex":102,"lev":543,"les":1759,"let":8204,"ler":672,"leo":160,"lep":918,"lem":1328,"len":1784,"lek":2164,"lel":63,"lei":115,"lej":216,"leh":60,"leg":676,"lef":95,"led":2297,"lec":1220,"leb":105,"lea":97,"lg ":68,"lls":71,"llu":110,"lly":120,"lo ":8977,"lla":550,"lle":786,"lli":672,"llo":334,"lko":692,"lku":60,"ln ":62,"lka":768,"lke":292,"lki":170,"ljs":1060,"leč":258,"lju":2873,"ljo":339,"ljn":358,"lm ":297,"lje":13663,"ll ":501,"lja":8525,"ljk":59,"laž":224,"lji":2324,"lit":3130,"lis":1861,"lir":252,"lip":265,"lio":237,"lin":3453,"lim":1141,"liz":841,"liv":1015,"liu":90,"lic":2658,"lid":143,"lia":493,"lib":162,"lk ":580,"lik":9667,"dšk":353,"lil":332,"laš":191,"lij":4137,"lig":667,"lih":765,"lie":274,"lif":220,"ma ":6156,"luž":388,"mb ":135,"mac":507,"mah":62,"maj":3291,"mak":294,"mad":706,"mag":926,"mar":1529,"mas":537,"mal":1048,"mam":116,"man":2972,"maz":61,"mat":4492,"mba":322,"mbi":346,"mbe":1994,"mbr":581,"mbo":391,"mbn":601,"me ":3191,"mbu":131,"mde":94,"med":6987,"meg":105,"mec":75,"met":4065,"mev":70,"mes":5031,"mer":4601,"mem":950,"mel":2712,"meo":80,"men":7879,"meh":365,"mek":1496,"mej":987,"mez":436,"mač":311,"mfo":74,"luz":65,"lva":240,"lve":110,"lvi":73,"luk":109,"luj":862,"lun":128,"lum":243,"lut":187,"lus":353,"ly ":206,"loš":1311,"ltä":132,"lož":762,"lza":60,"luč":58,"luš":63,"mpi":770,"mpe":599,"mpo":176,"mpl":480,"mpu":59,"mpt":60,"ms ":130,"mog":546,"mob":319,"mod":647,"mon":864,"mok":221,"moj":62,"mom":240,"mol":590,"mov":1124,"mor":2946,"mos":831,"mot":853,"mou":135,"mpa":312,"moz":59,"mre":315,"mrl":143,"mrt":287,"mu ":1595,"miš":516,"moč":1268,"mso":95,"msk":4107,"moš":595,"my ":71,"mur":404,"mus":157,"mut":85,"mul":1968,"mun":583,"muz":183,"dža":1295,"mi ":4700,"dži":218,"meč":117,"maž":60,"min":1835,"ešn":268,"mio":71,"mil":655,"mim":87,"mir":615,"mis":564,"mit":679,"ešt":114,"miz":89,"mic":185,"eša":172,"eše":255,"mie":124,"mid":113,"ešk":2738,"mik":628,"mij":927,"maš":150,"eši":154,"mih":245,"mo ":2844,"mlj":1176,"mle":124,"mla":591,"mki":710,"mka":116,"mm ":88,"ešč":404,"mič":519,"mni":612,"mnm":116,"mno":883,"mna":379,"mne":300,"meš":231,"mma":103,"mme":132,"Če ":73,"Čep":77,"Češ":144,"Črn":256,"rža":2068,"rže":110,"rži":130,"ča ":1709,"čal":131,"čam":203,"čan":1056,"včn":97,"čar":720,"čas":2119,"čaj":758,"čak":128,"vče":97,"vča":256,"zre":618,"uče":431,"uča":350,"zra":1527,"če ":2812,"uču":301,"zro":341,"učn":253,"uči":543,"čat":86,"čav":96,"čba":88,"čbe":68,"víl":96,"ziš":265,"zte":155,"čeg":102,"čen":2815,"čem":362,"čel":601,"ček":421,"čev":1119,"čet":926,"čes":137,"čer":105,"zto":106,"čep":104,"zse":225,"zu ":354,"zst":173,"zva":406,"zvi":2202,"zve":1702,"či ":940,"zvr":346,"zvo":918,"zuj":323,"čez":81,"zur":194,"zul":378,"zum":402,"zun":254,"zus":115,"čij":767,"čih":292,"čic":410,"čk ":142,"čit":716,"čis":78,"čin":8235,"čil":961,"čim":249,"čko":555,"čkr":195,"čka":504,"zzo":59,"čke":298,"čki":315,"zza":68,"čjo":263,"čju":736,"čja":509,"čje":920,"čji":637,"češ":144,"člo":587,"čo ":156,"čle":236,"čla":635,"čob":73,"čič":263,"čić":71,"čni":3672,"čno":2056,"čna":1890,"čne":3797,"čiš":160,"zgl":160,"zi ":788,"zač":773,"zha":357,"zgu":93,"zgr":400,"zgo":1653,"zej":178,"zdr":1230,"zdj":83,"zdo":198,"zdn":338,"zet":178,"zen":780,"ván":80,"zem":1646,"zel":858,"vár":138,"zer":530,"ze ":1270,"zbo":448,"zbi":532,"zbu":82,"zbr":206,"zda":703,"zdi":107,"zde":1030,"zab":392,"zad":779,"zac":850,"zaz":93,"zd ":136,"zbe":87,"zai":81,"zaj":329,"zag":420,"zah":1609,"zam":289,"zan":1549,"zak":553,"zal":882,"zar":791,"zap":901,"zav":812,"zas":719,"zat":657,"zod":127,"zob":347,"zor":519,"zom":118,"zon":2846,"zol":213,"zof":467,"zpe":152,"zpa":113,"zoz":300,"zov":527,"zpr":114,"zpo":448,"ال":104,"zo ":291,"zma":791,"zmn":81,"zmo":266,"zme":1501,"zmi":301,"zna":6251,"zmu":231,"zno":574,"zič":152,"ršč":1404,"zne":577,"zni":1693,"zka":85,"zko":152,"zkl":76,"zki":60,"zku":97,"zla":344,"zli":1716,"zle":63,"zlo":375,"zho":1743,"rša":147,"zia":58,"rše":205,"zid":236,"zic":70,"zij":1942,"ršj":95,"zaš":153,"rši":594,"ršn":88,"zin":229,"zim":197,"zil":483,"zik":2477,"ršk":1367,"zio":176,"zir":1419,"zis":483,"zit":356,"ziv":530,"zja":105,"zje":158,"yst":102,"ysi":60,"yro":69,"yon":68,"za ":8558,"ye ":58,"yer":80,"ya ":130,"yar":104,"yku":71,"yle":59,"yi ":114,"yje":66,"yja":58,"ن ":69,"ožu":71,"ože":1171,"oža":441,"ožb":309,"ožn":777,"oži":853,"ožj":690,"ožg":76,"ož ":115,"té ":62,"tät":153,"ći ":280,"xan":70,"ošč":370,"wn ":129,"ws ":103,"rče":97,"rči":287,"wor":59,"wer":110,"wel":68,"nže":273,"oš ":92,"wis":86,"ošt":692,"oši":152,"oše":119,"wic":65,"ošo":62,"ošn":419,"win":68,"ošk":1384,"oša":203,"vzo":115,"vzn":60,"vzr":280,"vzp":183,"vzg":93,"vze":217,"vzd":233,"vrš":1631,"vzh":1577,"vza":88,"wal":60,"war":181,"viš":902,"vrt":231,"vrs":1856,"vrn":62,"vro":1339,"vri":65,"vrh":387,"vre":731,"vra":482,"vso":118,"vst":1541,"vse":2779,"vsk":1809,"vsi":97,"vu ":881,"vsa":763,"vto":922,"vtr":73,"voč":112,"vts":62,"vul":228,"via":94,"vk ":203,"vio":100,"vir":2583,"vik":86,"vil":3700,"vim":485,"vin":3336,"vig":158,"vih":947,"vaš":4759,"vij":2768,"vic":1147,"vid":829,"vie":118,"nše":75,"vja":192,"viz":807,"nšt":314,"vit":1128,"vis":1138,"več":3002,"vje":763,"vka":371,"vju":170,"vko":123,"vke":182,"vkl":325,"vla":601,"vle":130,"vlo":204,"vlj":4644,"vo ":5275,"vme":74,"rén":67,"veš":647,"vež":91,"vne":3166,"vna":1676,"vno":7416,"vić":274,"vnj":103,"vni":6241,"nšč":326,"vič":1157,"vob":458,"vod":2329,"vog":94,"voj":6252,"vol":768,"vok":302,"von":637,"vom":445,"vor":1714,"vot":255,"vos":442,"vov":212,"voz":481,"vpi":58,"vpl":277,"vpr":143,"vgu":581,"vi ":4563,"vač":184,"vey":59,"vez":2827,"ver":4490,"ves":828,"vet":5958,"vdo":109,"vej":290,"veh":446,"veg":583,"rán":91,"ven":15616,"vem":1214,"vel":2593,"vek":749,"ved":1600,"vec":1132,"vcu":59,"vca":96,"ve ":3712,"vci":431,"vce":194,"val":4949,"vak":400,"van":6929,"vam":248,"var":2538,"vat":726,"vas":1162,"vaz":61,"vac":503,"vad":923,"vai":77,"vaj":1562,"vah":328,"va ":7766,"uzi":197,"urš":301,"uze":286,"uza":111,"urč":177,"ux ":175,"uva":93,"uve":379,"uvr":1131,"usl":98,"usm":167,"usk":1090,"usi":676,"use":332,"usa":242,"usu":60,"ust":3507,"uss":205,"usp":414,"uso":223,"usn":197,"utl":65,"utn":316,"uth":139,"uti":197,"ute":579,"uta":264,"utt":69,"uts":102,"uto":194,"utr":115,"us ":1853,"ut ":342,"urb":124,"ura":3309,"urd":58,"ure":1237,"urg":652,"urj":220,"uri":925,"url":67,"urk":122,"urm":63,"urn":892,"uro":433,"urs":478,"urt":136,"uru":97,"ury":61,"upa":4858,"ur ":618,"upi":1896,"upe":339,"upo":4232,"upr":4636,"upl":66,"upn":1389,"umr":152,"umu":107,"umi":296,"umo":158,"uma":367,"umb":237,"ume":1232,"unt":159,"uns":232,"unk":535,"unj":73,"uni":1649,"uno":147,"unc":267,"und":490,"una":1305,"ung":160,"une":190,"up ":120,"uks":65,"ukr":140,"uku":79,"ukt":476,"uko":214,"ukn":60,"ukl":92,"uki":118,"ukc":123,"uke":60,"um ":819,"uka":277,"ulu":100,"ult":2100,"uls":66,"ulo":296,"ull":127,"ulk":70,"ulj":355,"uli":1131,"ule":1791,"ulf":70,"uld":69,"ula":1090,"un ":304,"ukv":294,"uig":66,"mšk":1972,"uil":118,"uin":78,"uir":59,"uis":202,"uk ":155,"uje":4627,"uji":154,"ujo":137,"ujs":75,"uit":156,"ul ":297,"uja":181,"ugh":94,"ugi":1041,"lži":298,"uge":696,"lžn":64,"ugo":2841,"ugl":86,"uga":776,"uhi":80,"uho":1525,"ugu":295,"uha":183,"uj ":60,"uda":313,"ude":487,"udj":82,"udi":5813,"udn":106,"ubo":133,"ubn":112,"ubs":149,"ubr":361,"uca":72,"ue ":195,"uce":114,"uci":418,"uch":129,"uck":88,"uer":142,"ues":152,"uh ":100,"uds":601,"udo":411,"ug ":215,"ued":59,"uen":108,"uel":148,"ub ":374,"uar":2656,"ual":170,"uan":113,"ubi":368,"ubl":4817,"ube":341,"uba":207,"ud ":197,"trž":74,"ty ":183,"tvu":608,"tvo":3047,"tve":2705,"tvi":733,"tva":3299,"tur":2565,"tus":332,"tut":264,"tuj":422,"tul":174,"tun":135,"tum":104,"tub":74,"tua":118,"tud":5466,"tuc":67,"tug":184,"tz ":159,"toš":66,"ts ":250,"tiš":235,"tmá":241,"trd":395,"tre":2501,"oče":1653,"tt ":133,"oča":1047,"tra":4316,"trj":88,"očj":1644,"očk":616,"očl":103,"će ":102,"trm":72,"trg":328,"tri":4216,"oči":1373,"trs":733,"oču":60,"tru":1087,"trt":467,"tro":4048,"trn":84,"očn":740,"tu ":1700,"try":92,"tsc":117,"tsk":2341,"tta":143,"tte":299,"tti":218,"tto":129,"ttp":71,"tts":81,"toč":930,"tma":1947,"to ":6420,"tmo":86,"tmi":151,"teš":130,"tež":380,"tni":5182,"tne":1620,"ća ":63,"tp ":71,"tna":1556,"tič":5076,"tno":2333,"tod":350,"toc":762,"toj":799,"toi":353,"toh":82,"tog":270,"tob":807,"tou":124,"tov":5531,"tos":1124,"tot":329,"toz":100,"tom":1530,"ton":1758,"tok":2514,"tol":3650,"tor":3549,"top":2051,"oč ":359,"tij":686,"til":753,"tik":2804,"tif":115,"tie":119,"tih":1408,"tig":120,"tir":1113,"tit":785,"tis":2436,"tin":2891,"tim":668,"tip":613,"tio":1015,"thu":192,"tia":170,"tib":79,"tic":1448,"tid":147,"tji":207,"tju":277,"teč":443,"tjo":443,"tiz":729,"tiv":1588,"tje":1033,"tja":867,"tki":1874,"tko":496,"tku":163,"tka":483,"tke":192,"tlj":122,"tli":831,"tlo":345,"tla":447,"tle":327,"tem":4827,"ten":1364,"teo":775,"tep":103,"tei":263,"tej":504,"tek":2989,"tel":4105,"tef":109,"teg":1181,"teh":652,"tea":89,"teb":91,"tec":139,"ted":342,"th ":436,"tez":323,"tev":3243,"tet":1626,"tes":590,"ter":10610,"ti ":9411,"tho":148,"the":481,"thi":96,"tha":151,"zšl":321,"zši":325,"zše":149,"Živ":99,"yőr":92,"Žel":286,"ža ":604,"Žup":1073,"žko":65,"žle":58,"žlj":64,"žju":249,"žke":66,"žin":1764,"žim":160,"žil":222,"žir":92,"živ":1723,"žit":230,"žis":155,"žja":222,"žje":479,"žji":178,"žic":387,"žig":82,"žij":171,"žič":188,"žnj":265,"žni":1372,"žno":1191,"žna":269,"žne":657,"žo ":93,"žeš":277,"že ":817,"žbi":268,"žbe":388,"žbo":86,"žav":2001,"žba":234,"žaj":196,"žal":97,"žan":339,"žar":1500,"žga":114,"ži ":1033,"žev":1038,"žej":69,"žek":75,"žel":1057,"žem":871,"žen":2078,"žef":104,"ždi":82,"žuž":106,"užb":674,"uže":844,"užu":116,"uži":1380,"užn":1159,"žuj":195,"žup":4080,"žiš":141,"žu ":73,"všč":108,"všk":96,"vše":78,"vši":66,"yír":60,"ušč":304,"ušt":314,"ušn":118,"ušk":172,"uši":260,"uše":213,"uša":205},"n_words":[5788075,6773679,5606921],"name":"sl"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/so b/nlp_resource_data/langdetect/profiles/so

new file mode 100755 (executable)

index 0000000..9d0c19c
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/so
@@ -0,0 +1 @@
+{"freq":{"YO ":13,"jec":34,"jee":32,"D":313,"E":183,"F":66,"G":214,"A":673,"B":249,"C":240,"L":152,"M":367,"N":163,"O":122,"H":180,"I":236,"J":129,"K":173,"U":82,"T":107,"W":226,"V":11,"Q":76,"P":22,"S":486,"R":114,"Y":96,"X":120,"Z":10,"f":458,"g":2154,"d":5233,"e":4497,"b":2102,"c":900,"a":24510,"n":3878,"o":5982,"l":3786,"m":2460,"j":397,"k":2897,"h":3132,"i":6615,"w":2306,"v":27,"u":3829,"t":1545,"s":2871,"r":2895,"q":718,"p":77,"z":23,"y":3607,"x":1698,"jaa":13,"jab":16,"jar":10,"jam":12,"Xam":11,"joo":14,"Xas":10,"jis":14,"jir":95,"jii":13,"jid":17,"jo ":15,"Far":12,"isk":69,"ism":12,"isl":25,"iso":22,"isu":42,"ist":67,"ita":17,"is ":71,"ion":20,"ir ":84,"irs":56,"irt":28,"iro":22,"irk":32,"iri":56,"isi":32,"ish":96,"ise":18,"isb":17,"Wux":23,"isa":134,"ire":16,"ira":131,"iyi":10,"iyo":394,"iya":423,"iye":65,"ixi":16," l":598," m":880,"kii":161," n":189," o":537," h":365," i":795," j":267," k":1328," d":1214," e":328," f":95," g":401," a":1317," b":593," c":361," y":296," x":283," u":599," t":376," w":1834," q":291," p":20," s":807," r":112,"km ":14," J":125," K":142," H":119," I":161," N":93," O":34," L":81," M":322," B":217,"khd":24," C":229,"kha":11," A":275," F":59," G":169," D":236," E":41," Z":10," Y":40," X":90," S":438," R":66," Q":69," P":18," W":211," U":33," T":83,"kee":20,"key":11,"kh ":38,"Web":10,"Waa":56,"ku ":434,"kor":15,"Wax":40,"koo":94,"War":17,"XEE":11,"مد":16,"Gal":22,"و":25,"ي":76,"ف":13,"ق":12,"ل":77,"م":62,"ن":31,"ه":13,"د":46,"ح":26,"ب":37,"ة":21,"ا":98,"أ":11,"ع":29,"ش":21,"س":23,"ر":49,"kar":49,"kas":30,"kan":49,"kal":143,"kam":32,"kad":48,"kac":14,"kab":10,"kaa":81,"ka ":1268,"A ":83," Ga":53," Ge":18,"Da":59,"DU":11,"Cu":18,"Co":13,"DE":11," Fi":17,"Ce":13,"DH":15,"Ci":23," Ha":35,"Du":13,"EY":13," Go":61," Gu":12,"EG":11,"De":45,"EE":45,"EL":14,"Di":29,"Dh":36,"H ":16,"GA":19,"Fa":23," IY":15,"Er":12," Ho":29,"ha ":334," Hi":37,"Ge":18," Ji":25,"Ga":53,"حم":18,"HA":35,"I ":13," Ja":63," KA":16," Is":32," It":29,"GM":12," In":35,"Fi":17,"ham":43,"han":102," Ka":28,"hal":48,"haw":17,"hax":44,"haq":58," Ki":19,"har":45,"has":76," Kh":10," Ju":19,"hah":12,"hab":77,"haa":189,"had":144,"hac":36,"AS":15,"AR":23," MA":17,"AX":27," La":22,"AY":15,"BA":11," Li":11,"C ":10,"AD":43,"AA":51,"AB":14,"AG":11," Ko":23,"AH":23,"hay":333,"AL":37," Ku":26,"AM":13,"AN":35," Ma":180,"Ax":18,"Ar":12,"D ":22,"بن":10," Mi":27,"Ba":101,"CA":15,"Af":65,"بد":10,"he ":25,"Aa":22,"Ab":33,"Ad":10,"Am":17," Lu":25,"Al":38," Ne":14,"Bu":30," Na":32,"Ca":127,"DA":43,"E ":30,"Bi":19,"Be":25,"hda":27,"Bo":30,"Hin":18," Mu":78,"hel":22,"Ku":26,"hee":112,"Ko":23,"hey":26,"hex":72,"Li":11,"N ":26,"her":11,"MA":41,"La":22,"Lu":25,"hi ":27,"Mi":27,"NK":10,"ال":51,"O ":34,"NA":12,"Ma":180,"Mu":79,"Ne":14,"Na":32," Am":16," Al":38,"Nu":16," Af":65,"No":12,"OO":18," Ad":10," Aa":22," Ab":33," Ba":101," CA":12," Ax":18," Ar":12,"hig":23," Be":25,"hid":12," Bi":19,"hin":40,"Go":61,"him":17,"Gu":12," Bo":30,"hii":170," Bu":30,"his":24,"hir":31,"Ha":35," Ca":127,"Hi":37," Ce":13," DE":10," Ci":23,"IN":12,"Ho":29," DH":13,"IS":10," Co":12," Cu":18,"IY":20," Da":59," Di":29," Dh":36,"In":36," De":45,"Is":32,"It":30,"Ja":63,"KA":33," Du":13,"Ji":25," Er":12,"Ju":19,"LA":35,"Ka":28,"Kh":10,"ho ":53,"Har":14,"Ki":19,"LE":16," Fa":23,"gma":64,"go ":32," Xi":13," Xa":51,"UU":11,"yuu":26," Wu":23,"To":11,"Th":10," Wi":15," We":12,"Ta":37," Wa":133,"St":13,"Su":23,"Wu":23,"gob":97,"Wi":16,"Wa":133,"XA":19,"We":12,"XE":12,"Y ":18,"yst":29," Yu":14,"yso":15," Ya":10,"WA":26,"gmo":41,"ysa":93,"Qa":26,"Qo":17," م":12,"RA":10,"S ":18," ع":21," ا":48,"goo":52,"R ":20," ب":13,"gsa":14,"gu ":229,"Si":17,"Sh":86,"gsi":12,"So":180,"Ru":12,"U ":11,"Sa":70,"TA":13,"Re":13,"SH":11,"Ro":11,"yoo":24,"Qu":16,"SA":16,"Ra":20,"gud":22," Nu":16," No":12,"gta":43," Ra":20," Qu":16,"b ":130," Ro":11," Re":13,"guu":20,"gun":12,"a ":5909," Qo":17," Qa":26,"شي":10," Su":23," St":13," Ta":37,"Ya":10," Th":10,"Yu":14," To":11," Ru":12," Sa":70,"Xa":51,"YO":15," Sh":86," Si":17,"Xi":13," So":180," WA":20,"ري":12,"Gob":48," ja":60,"i ":853,"ye ":36,"ian":11," iy":365," ji":127,"ge":93," je":47,"ga":1135,"fk":16,"Ing":16," im":15," in":148," il":54," ii":23,"ic ":14,"fi":49,"fr":45,"fu":47,"ft":29,"fo":18," is":155," ka":688," kh":13,"hd":44,"he":286," ki":46," ke":11,"ha":1580,"gn":11,"gm":108," jo":14,"gl":15,"gi":72,"id ":171,"gu":305,"iba":32,"gt":52,"gs":27,"gr":15," ju":17,"go":196,"du":188,"dw":36,"dy":13,"g ":83," ha":190,"ea":16,"eb":72,"yee":61,"ec":51," he":28,"ed":360,"de":252,"dd":113,"di":494,"dh":632,"dk":189,"dl":33," go":117,"do":234,"dn":22," gu":55,"ia ":36,"ex":102,"ey":554,"fa":110,"h ":441," id":15,"fe":17,"eh":54,"ib ":32,"eg":202," hi":20,"ee":1263,"el":242,"ek":35," ho":120,"ei":12,"yey":26,"en":172,"em":31,"et":26,"es":93,"er":287,"ya ":266,"ca":427," ni":37,"e ":881," ne":15,"bs":21," na":54,"br":36,"bu":104,"bt":55,"bn":18,"bo":234,"bk":30,"bl":13," mu":48,"ig ":10,"bi":355,"bb":15,"bd":41,"be":201,"db":11,"da":2087," og":18,"f ":98,"cy":18," of":16,"cu":41,"ct":11,"cs":27,"co":62,"cm":24,"cn":13,"cl":19,"ci":73," nu":10,"ch":33," no":73,"ce":64,"cd":20,"yad":111,"yag":10," le":91,"c ":51,"yaa":287," la":334,"icm":22," ku":465,"ici":14," km":14,"ica":25," ko":88," me":49,"az":10,"ay":1458,"idu":13," mi":187,"ba":817,"d ":893,"at":134,"as":580,"yd ":29,"ido":43,"ar":1307,"aq":237," ma":590,"ax":1066,"aw":157,"idk":12,"yay":52," lu":25,"ak":76,"al":1647,"idi":35,"yaw":11,"idh":19,"ai":29,"aj":59,"yar":45,"am":590,"an":1951,"yaq":50,"yan":13,"ac":260,"ida":140,"ad":2243,"aa":4171," lo":138,"ab":630,"ag":664,"ah":1152,"yah":134,"af":128,"iib":15,"nu":38,"iic":11,"nt":263," af":45,"ns":59," ah":473," aa":208,"iig":13," ab":31,"iid":50,"no":160,"nn":18," ad":49,"q ":34," am":103," an":18,"iik":48,"iin":164,"ny":57,"yka":17,"iil":93," al":21,"iim":26,"iis":199,"iir":65,"of":78,"iiq":14,"oc":29," ax":10,"od":156," ar":26,"ob":291," aq":21," as":29,"om":340,"on":186," ba":344,"ok":16,"ol":273," ay":246,"og":129,"il ":80,"ot":41,"os":90," bi":107,"op":10,"oo":1738," be":63,"or":236,"oq":49,"yn ":105," bo":34,"r ":475,"ox":10,"ow":125,"oy":128," bu":35,"pa":14," ca":238,"im ":21,"ika":50,"lo":386,"ige":10,"lm":39,"ll":110,"ls":27,"iga":247,"ii ":339,"lw":14,"lu":48,"igi":31,"yo ":488,"ly":56,"igu":13,"igt":12,"o ":2012,"ma":1465,"mb":52,"mh":21,"me":199,"mk":39,"mi":333,"mp":19,"mo":102,"yna":98,"mu":85,"ihi":82,"yni":14,"na":851,"nb":30,"yne":30,"nc":10,"nd":137,"ne":107,"nf":30,"ng":58,"ynt":29,"ni":213,"nk":312,"nl":21,"imo":20,"ju":17,"jo":31," ee":295,"imi":21,"ki":203,"kh":95,"ke":48,"ind":29,"ina":80," fa":48,"yga":15,"ka":1778,"yi ":19,"m ":103," fu":10,"ino":13,"kt":20," fo":12,"ku":558,"int":102,"ins":10,"ko":130,"ine":14,"ing":16," fi":17,"ini":10,"km":16,"ink":82," ge":36,"li":577,"lk":332,"le":352," ga":186,"ld":23,"lg":22,"inu":15,"la":1306,"lb":52,"iny":13,"n ":1478," co":22,"ht":11,"hu":92,"ikh":54," ce":15,"hi":387,"hn":16,"ho":217," ci":36,"ila":160,"id":471,"ic":103,"yin":59,"ib":108,"ia":61,"ih":88,"in ":262,"ig":350," da":424,"if":21,"yih":49,"yig":21," cu":34,"hy":12,"k ":24,"iq":21," do":45,"ilo":13,"ir":438,"is":630,"it":49,"ill":18,"ilk":32,"ix":28,"ilm":12,"ii":1062,"ij":21,"ik":134," de":120,"ili":51,"il":385,"im":170,"in":663,"io":30," di":70,"yir":13," dh":511,"ima":76,"je":69,"ji":178,"iy":896," du":39,"l ":398,"ja":82,"xi":123,"xo":56,"xm":34,"xw":27,"xu":185,"xb":18,"xa":850,"xe":161,"xd":67,"wg":11,"wi":81,"how":15,"wl":60,"wo":26,"wu":102,"hog":13,"y ":1137,"wa":1722,"wd":13,"hoo":55,"we":185,"hor":60," yi":55," yu":13,"uy":12,"ux":164,"uw":34,"uu":720," ye":13,"ve":10," ya":211,"x ":140," xo":33,"uj":15,"uk":28,"ul":200,"uf":20," xi":90,"ug":210,"uh":16,"uq":90,"ur":259,"hna":12," xu":39,"us":114,"ut":54,"um":90,"un":214,"tu":47,"ub":104,"ua":11,"ud":145,"uc":17," xe":16,"w ":59," xa":103,"to":175,"hul":37,"tr":25,"te":120,"ti":246,"th":37,"ta":784,"su":111,"ss":19,"st":173,"sw":12,"sl":47,"sk":106,"sm":25,"so":371,"sr":10,"sc":17,"se":101,"sh":456,"ي ":20,"xme":19,"si":404,"xma":13,"u ":1296,"sa":722,"sb":21,"rr":20,"rs":115,"rt":160,"ru":77,"rw":11,"rx":11,"ry":27,"ro":144,"rn":40,"rm":32,"rl":22,"rk":200,"ri":397,"hu ":11,"rg":35,"re":258,"rd":49,"rc":12,"rb":25,"ra":754,"t ":51,"qu":35,"qs":10,"xoo":44,"qo":163,"IYO":15,"qi":33,"qe":23,"qa":334,"qd":61,"s ":240,"pu":15,"pr":14," ru":12," u ":194," sa":221," se":17," si":157," sh":112," so":259," qu":21,"xya":13," ra":48," re":33,"ن ":17," ro":11," qe":14," qa":168," qo":69," qi":18," oo":464," or":10,"huu":29," wa":1582," we":88," wo":12," wu":102," wi":39," uu":195,"xud":12,"xuu":133,"Hoo":12," tu":36," us":16," ur":10,"م ":11," um":12," un":11," ug":131,"yg":19," ta":231,"ye":133,"yd":48,"ya":998,"yb":27,"xwe":21,"xy":17," su":25,"yu":34,"ys":166," to":18," th":15," ti":62,"yo":522,"yn":280," te":11,"yk":19,"yi":189,"fee":11,"xey":58,"xee":54,"far":32,"fad":21,"faa":24,"Suu":12,"Axm":14,"xir":17,"xis":13,"xil":26,"xii":17,"xid":14,"xig":24,"Sta":10,"xa ":169,"eyb":17,"eya":63,"eys":74,"Tal":11,"eyn":163,"eyo":14,"eyk":10,"xda":51,"eyd":16,"eye":14,"exa":10,"exd":12,"exe":51,"xe ":46,"xar":38,"Ban":18,"Baa":14,"Bad":22,"xam":54,"xan":16,"Bar":23,"xay":166,"xba":16,"xaa":341,"xad":27,"xag":13,"wux":100,"Aas":11,"Shi":22,"She":12,"Sha":50,"ex ":21,"Af ":19,"ey ":159,"er ":103,"es ":21,"eri":33,"ere":30,"era":49,"Afr":32,"esh":28,"esa":10,"ers":11,"ern":14,"ekh":16,"en ":89,"ela":47,"ele":26,"eli":17,"ell":42,"elo":15,"emb":19,"ena":28,"wla":53,"eny":12,"egm":90,"ego":14,"egt":11,"Som":32,"Soo":136,"woq":10,"el ":65,"wda":13,"Buu":11,"Bur":11,"we ":12,"gir":17,"gii":26,"wey":124,"wee":27,"gey":15,"gee":44,"wi ":14,"wis":10,"wii":22,"Sal":11,"gab":12,"gac":45,"gad":26,"DA ":20,"gaa":436,"gar":35,"gay":21,"gal":70,"gan":69,"ga ":388,"San":27,"wa ":22,"Cab":27,"waq":26,"wan":30,"wal":39,"wax":715,"way":45,"Cal":18,"war":52,"was":18,"Car":40,"waa":581,"wad":168,"Bel":10,"fur":37,"Bis":12,"fri":39,"fii":15,"Boo":10,"fka":13,"da ":918,"de ":22,"dad":131,"daa":159,"dab":19,"dal":113,"WAX":16,"dag":65,"dah":101,"dar":51,"dan":291,"dam":39,"day":61,"dax":79,"daw":32,"Cum":10,"dda":74,"dde":11,"ddi":17,"cun":14,"EEY":13,"EEL":14,"EGM":11,"Deg":30,"cyo":15,"uxu":126,"Daa":22,"Dag":10,"Dal":10,"uxa":15,"uun":88,"uul":63,"uum":13,"uug":15,"uud":50,"uux":10,"ux ":12,"uus":29,"uur":74,"uuq":18,"uut":24,"uwa":28,"co ":26,"cma":23,"ush":13,"usi":11,"use":13,"uu ":316,"usu":26,"uso":11,"uti":16,"uta":19,"cod":10,"com":11,"uqa":33,"uqd":36,"ura":37,"ure":10,"uri":31,"urk":17,"urt":32,"uru":37,"ur ":39,"csi":14,"uma":56,"unt":32,"unk":27,"uni":11,"una":85,"cel":30,"uka":13,"cee":17,"uls":10,"ulo":20,"ull":14,"ulk":27,"uli":14,"ule":16,"ula":26,"un ":29,"che":12,"ul ":36,"ciy":12,"cii":28,"uga":40,"ugu":128,"ugs":11,"ed ":184,"ebi":20,"uf ":13,"uda":33,"udi":12,"eb ":12,"udu":37,"ug ":18,"ega":53,"ub ":32,"eek":25,"een":99,"eel":138,"eem":18,"eeb":23,"eeg":65,"eed":229,"eey":113,"eh ":42,"ees":56,"eer":157,"edk":18,"edi":12,"ede":22,"eda":72,"uba":39,"ubb":11,"edu":15,"ud ":36,"edo":11,"ecl":12,"ece":25,"ee ":319,"dwe":25,"dwa":11,"duu":57,"tuu":22,"doo":96,"dow":37,"tri":10,"The":10,"dna":12,"to ":75,"Dhe":14,"Dhu":12,"dun":12,"dul":20,"dug":23,"too":69,"du ":45,"tii":59,"tig":10,"tir":66,"dha":335,"tio":16,"tic":26,"dhu":33,"dib":25,"dhi":112,"dhe":122,"dho":21,"der":19,"dex":18,"dey":16,"dee":48,"deg":96,"den":15,"di ":38,"dle":11,"dla":17,"tee":36,"dku":14,"dki":33,"do ":77,"ter":36,"diy":39,"din":26,"ti ":29,"dir":60,"dis":51,"dig":42,"dii":165,"dil":12,"dka":134,"the":16,"rga":14,"ri ":48,"rge":14,"rey":42,"ree":110,"rda":15,"rdh":16,"re ":77,"rco":10,"rax":25,"ray":99,"rar":15,"ras":44,"rat":10,"rba":11,"rah":41,"ran":54,"ram":17,"rak":12,"rab":82,"raa":165,"rad":87,"rs ":11,"roo":48,"rna":16,"rne":11,"rni":10,"ro ":63,"rma":23,"Nab":15,"rla":13,"rku":10,"rko":10,"rki":41,"rke":18,"rka":117,"riy":58,"ris":28,"rig":31,"rii":110,"rik":46,"rin":21,"ric":16,"rya":13,"rur":10,"run":18,"ruu":10,"ry ":11,"rsi":16,"rsa":63,"rsh":15,"rta":110,"rto":18,"rte":11,"rti":11,"rub":12,"saa":120,"sab":11,"sad":52,"sag":23,"sah":11,"sal":49,"sam":47,"sbi":14,"san":191,"sas":14,"sar":33,"say":43,"sa ":99,"sha":242,"sho":46,"she":41,"shi":83,"si ":68,"siy":42,"sid":91,"shu":10,"sil":13,"sim":38,"sii":82,"sig":32,"se ":61,"sh ":17,"see":14,"sow":16,"som":59,"soo":214,"soc":14,"su ":25,"sla":30,"sku":37,"ska":59,"so ":55,"sma":15,"حمد":15,"ste":15,"sta":66,"sto":28,"sti":41,"sub":11,"suf":12,"sug":13,"sul":11,"suu":22,"tal":42,"tag":10,"tah":87,"taa":194,"tad":13,"tay":60,"tar":33,"tan":31,"tam":13,"te ":13,"ta ":272,"bka":23,"biy":71,"bis":28,"bir":12,"bil":48,"bin":31,"big":38,"bii":37,"bo ":47,"bol":129,"bna":15,"boo":24,"bba":12,"be ":19,"ban":61,"bal":43,"bah":27,"bad":232,"baa":96,"bab":12,"bay":35,"bax":34,"bas":10,"bar":156,"bdi":25,"bdu":11,"bi ":69,"bee":145,"ber":11,"bey":12,"ca ":55,"car":35,"cas":13,"can":24,"cay":13,"cab":20,"cad":53,"caa":145,"cal":33,"cag":16,"bri":13,"bra":15,"bsa":11,"bta":33,"bti":13,"bur":20,"bul":12,"buu":52,"aka":19,"am ":40,"aki":23,"aji":27,"ajo":16,"qa ":12,"al ":136,"ahi":41,"qar":20,"qay":16,"aho":10,"qad":44,"qab":47,"qaa":149,"ahd":20,"qan":14,"qal":17,"ahe":26,"aha":697,"agm":13,"agt":24,"agu":76,"ago":29,"aq ":22,"qdi":38,"qda":17,"any":23,"ano":51,"ann":10,"ant":70,"ans":32,"ane":21,"ang":10," ال":46,"ani":87,"ank":185,"ana":385,"anb":26,"and":92,"amu":23,"amo":10,"amk":32,"amh":19,"ami":82,"ame":93,"amb":16,"ama":257,"aly":20,"qey":14,"alo":160,"alm":17,"all":22,"alk":165,"alg":17,"ali":424,"ald":14,"ale":110,"ala":480,"alb":42,"an ":924,"aba":194,"abd":37,"abe":56,"abi":146,"abk":18,"abo":40,"abt":38,"abu":36,"aca":130,"aab":114,"aac":13,"aaa":15,"aaf":38,"aag":64,"aad":398,"aaj":28,"aak":21,"aah":75,"aan":742,"aal":743,"aam":113,"aas":211,"aar":259,"aaq":41,"aaw":32,"aat":37,"aay":89,"aax":19,"ad ":334,"qiy":15,"ac ":19,"aa ":1110,"qii":10,"ab ":33,"afr":11,"aft":15,"afi":18,"aga":458,"age":12,"ah ":325,"afa":38,"ado":85,"adl":23,"adk":153,"adn":12,"adh":26,"adi":223,"add":96,"ade":66,"ag ":29,"adw":22,"adu":44,"aci":16,"ace":10,"Qar":12,"acd":15,"ada":1138,"af ":19,"acy":15,"acs":19,"qor":48,"qoo":60,"qof":24,"axi":13,"axm":15,"axo":15,"axu":15,"axa":702,"axb":16,"axd":50,"axe":90,"ayi":11,"ayo":52,"ayn":115,"ays":84,"ayu":13,"axy":16,"axw":26,"ayb":10,"aya":151,"ayg":11,"ayd":32,"aye":26,"ba ":84,"qur":24,"at ":11,"arg":25,"are":96,"ard":30,"arb":14,"ara":357,"aro":72,"arn":19,"arm":17,"arl":10,"ark":135,"ari":153,"aru":20,"ars":39,"art":72,"asa":99,"ary":14,"asi":106,"ash":156,"ase":12,"aso":31,"ask":17,"ar ":198,"as ":80,"aqa":111,"aqi":13,"aqo":51,"ax ":98,"awe":20,"ay ":932,"awa":46,"awl":31,"awi":33,"ata":37,"asu":12,"ast":33,"ato":18,"ate":17,"ra ":58,"ati":34,"ngi":20,"ni ":47,"Isl":11,"neh":11,"ng ":11,"nee":16,"nfu":25,"ney":14,"ne ":43,"ndh":18,"ndi":22,"nan":17,"nac":45,"nad":83,"nah":41,"nab":18,"naa":131,"Ito":28,"nbe":15,"nd ":69,"AXE":10,"AY ":10,"nba":11,"AXA":12,"nay":47,"nax":11,"na ":412,"Jab":13,"Jan":13,"Jam":22,"KA ":11,"KAL":10,"nya":38,"AAL":13,"ADA":25,"nuu":21,"nto":13,"nti":37,"nta":176,"nte":24,"nsi":15,"nsa":22,"AHA":14,"noo":67,"noq":18,"nna":11,"ALA":17,"nle":12,"no ":59,"nki":22,"nka":271,"AN ":16,"nii":13,"nih":11,"nig":39,"niy":10,"nis":15,"nim":17,"nin":39,"ogu":24,"oga":60,"Jub":11,"ol ":60,"oco":11,"odi":15,"of ":38,"oda":43,"ofe":10,"LA ":12,"د ":29,"oba":86,"od ":60,"obo":134,"obi":38,"ة ":21,"oyi":94,"oya":10,"owl":29,"ow ":45,"ost":14,"ota":10,"ose":28,"os ":15,"oon":114,"ool":98,"oom":198,"oof":13,"oog":60,"ood":123,"oob":124,"or ":39,"ooy":111,"oow":16,"oot":14,"oos":65,"oor":31,"Koo":13,"ore":44,"ori":14,"osa":11,"ort":21,"oqo":37,"oqd":11,"ora":61,"ola":52,"on ":52,"olk":99,"ole":20,"olo":14,"oly":10,"ona":28,"onf":25,"oni":16,"onk":11,"ons":12,"ont":14,"oma":298,"oo ":749,"omp":12,"la ":241,"le ":159,"laa":281,"lab":61,"lac":11,"lad":232,"laf":10,"lah":96,"lag":116,"lal":23,"lan":88,"lam":27,"las":21,"lay":70,"lba":15,"lbe":31,"kuw":22,"kuu":18,"kun":22,"kul":14,"kto":17,"MAD":13,"lom":11,"loo":176,"lmo":12,"lmi":13,"lma":10,"lsh":13,"Luu":11,"li ":92,"lga":16,"ley":29,"leh":35,"lee":98,"lo ":165,"lla":49,"lle":32,"lka":311,"lki":14,"lis":19,"lin":48,"lim":15,"liy":204,"lid":28,"lia":24,"lib":24,"lil":40,"lii":17,"lig":30,"ma ":133,"maa":361,"mac":36,"mah":24,"mad":229,"mag":226,"mar":193,"mas":14,"mal":133,"man":32,"may":23,"max":25,"mba":26,"mbe":10,"me ":19,"med":68,"mee":72,"mey":24,"luq":12,"luu":17,"مد ":15,"lya":33,"lyo":10,"Mar":22,"Mas":10,"Mag":51,"Mad":20,"Maa":17,"Max":25,"moo":35,"muq":17,"muu":16,"mul":10,"Mux":13,"mhu":20,"Muq":24,"Mud":14,"mi ":19,"min":17,"mil":14,"mis":11,"miy":27,"mig":18,"mid":170,"mij":10,"mii":25,"mo ":60,"mka":33},"n_words":[94077,109135,83288],"name":"so"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/sq b/nlp_resource_data/langdetect/profiles/sq

new file mode 100755 (executable)

index 0000000..fe3fc7a
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/sq
@@ -0,0 +1 @@
+{"freq":{"D":2951,"E":2593,"F":3211,"G":3160,"A":5867,"B":4180,"C":1638,"L":2905,"M":4987,"N":3399,"O":1229,"H":1921,"I":3066,"J":1447,"K":6663,"U":1057,"T":3497,"W":518,"V":1916,"Q":766,"P":4792,"S":7666,"R":2957,"Y":251,"X":569,"Z":898,"f":14736,"g":26110,"d":50009,"e":167179,"b":16462,"c":10479,"a":127255,"n":113931,"o":70925,"l":52723,"m":56721,"j":47732,"k":46937,"h":74009,"i":153617,"w":760,"v":25346,"u":51367,"t":143796,"s":94347,"r":123043,"q":14863,"p":40169,"z":11328,"y":8479,"x":1327,"Ë":504,"Ç":673,"Fil":198,"ë":138684,"ç":2147,"Fja":191,"Evr":427,"ο":279,"α":259," l":7602," m":19599," n":36778," o":2746," h":2298," i":13440," j":4180," k":16392," d":18386," e":22497," f":7928," g":6344," a":8088," b":5231," c":3040," z":2178," u":2660," t":29267," v":9770," q":6524," p":21985," s":22399," r":6234," J":1246," K":6396," H":1660," I":2018," N":2979," O":928," L":2729," M":4746," B":3697," C":1374," A":5029," F":3007," G":2987," D":2696," E":2074," Z":866," Y":222," X":455," S":7145," R":2508," Q":712," P":4598," W":483," V":1621," U":882," T":3065,"Gje":385," ç":814,"Gji":334," ë":7724,"Gju":218," Ç":648," Ë":309,"Gja":201,"Fra":312,"A ":555,"For":238,"Da":377,"Co":247,"Ch":193,"Du":229,"Do":262,"Dr":303,"De":624,"Di":445,"Dh":185,"Fe":366,"Fa":366,"Ev":475,"El":231,"Ga":322,"I ":544,"Fr":441,"Fo":342,"Fl":194,"Fj":196,"Fi":584,"BA":277,"C ":177,"Au":418,"Ar":625,"At":240,"Ba":1072,"Af":186,"Am":331,"An":632,"Ai":327,"Aj":203,"Al":611,"Bu":398,"Br":503,"Ca":335,"E ":302,"Bi":247,"Be":668,"Bo":499,"Ku":625,"Ky":228,"Kr":523,"Ko":2407,"Le":440,"Li":756,"La":432,"Lu":465,"Lo":267,"Me":757,"Mi":743,"Ma":1783,"Mu":398,"Mo":476,"Nj":248,"Ni":232,"Nd":207,"Ne":392,"Na":320,"No":435,"Gj":1228,"Gr":581,"Go":208,"Gu":206,"Ha":528,"He":303,"II":206,"Hi":261,"Ho":278,"In":443,"Is":481,"It":205,"Ja":385,"Je":182,"Jo":291,"Ju":322,"Ka":1334,"Kj":278,"Ki":240,"Un":312,"Tu":198,"Tr":315,"To":375,"Pë":415,"Th":341,"Ti":609,"Te":638,"Ta":357,"St":538,"Su":265,"Wi":200,"Vi":295,"Va":291,"Ve":412,"Qe":273,"Pu":185,"Pr":997,"Pe":613,"Pa":1058,"Po":615,"Pi":181,"Kë":300,"Se":537,"Si":523,"Sh":3256,"Sk":218,"Sp":262,"So":375,"Ru":267,"Rr":230,"Sa":579,"Re":810,"Ri":264,"Në":821,"Ro":386,"Ra":365,"Gre":313,"b ":407,"a ":28448,"Xh":218,"i ":33796,"ge":656,"ga":7640,"fj":347,"fl":382,"fi":3904,"fs":1052,"fr":816,"fu":1400,"ft":1033,"fo":1696,"bë":2042,"j ":4723,"hf":238,"he":19006,"ha":4203,"gl":638,"gj":7884,"gi":492,"gu":1606,"gr":2543,"cë":728,"go":1368,"du":2130,"dy":756,"g ":1263,"ea":970,"eb":315,"ec":617,"ed":3466,"de":5126,"di":8348,"dh":17111,"dj":1045,"dm":281,"do":4139,"ds":204,"dr":2528,"ew":200,"ex":233,"eu":592,"ev":3916,"ez":1764,"fa":1842,"h ":2770,"fe":1185,"eh":617,"eg":2448,"ef":474,"el":4768,"ek":4482,"ej":2627,"ei":482,"ep":1848,"eo":855,"en":13277,"em":3767,"et":19558,"es":10616,"er":15019,"eq":1060,"ca":1358,"e ":72317,"br":1628,"bu":1107,"bo":1748,"bj":242,"bl":1253,"bi":2183,"be":1978,"da":2581,"f ":429,"co":479,"ck":313,"ci":4788,"ch":654,"ce":1081,"c ":318,"az":1629,"ay":236,"ba":3411,"d ":2270,"at":11806,"as":8047,"ar":20865,"aq":1562,"av":2909,"au":845,"ak":4327,"al":8585,"ai":935,"aj":4288,"ap":2172,"am":3780,"an":16760,"ac":1088,"ad":3850,"ab":980,"ag":1063,"ah":884,"ae":194,"af":1439,"nu":1486,"nt":6015,"ns":1390,"jë":8682,"no":2581,"nn":299,"q ":809,"ny":268,"nx":351,"oe":253,"of":894,"oc":773,"od":2400,"oa":235,"ob":841,"om":4147,"on":11462,"ok":1665,"ol":4317,"oi":918,"oj":2100,"og":2207,"oh":3303,"ot":2941,"os":4887,"ov":2272,"ou":371,"op":2168,"oo":285,"kë":4598,"or":15900,"oq":456,"r ":22499,"oz":751,"pe":3763,"pa":6781,"pl":799,"lë":3066,"po":4293,"pi":2577,"pj":1679,"lo":4348,"hë":4530,"lm":750,"ll":8126,"ls":381,"lu":2680,"lt":1030,"ly":235,"o ":4990,"ma":6825,"mb":3911,"me":13242,"iç":252,"mi":11206,"mj":687,"mp":1128,"mo":2299,"mr":960,"mt":825,"ms":189,"mu":2606,"p ":820,"na":4803,"nc":1647,"nd":14065,"ne":6011,"nf":360,"ng":8085,"ni":9080,"nj":10024,"nk":623,"jy":374,"jv":200,"jt":2182,"ju":2055,"jr":203,"js":193,"jn":989,"fë":409,"jo":2391,"jm":201,"kj":232,"ki":2183,"ke":4651,"ka":7856,"m ":4929,"ky":261,"ks":1435,"kt":3378,"ku":6445,"ko":7127,"gë":1191,"kr":3597,"kl":551,"km":406,"kn":219,"li":13297,"lk":295,"lj":608,"le":5962,"ld":260,"lg":209,"la":6555,"lb":400,"n ":21363,"hr":608,"hs":268,"hp":932,"hq":3267,"hv":498,"ht":18411,"hu":3735,"hj":1397,"hk":4243,"hi":5167,"hn":261,"ho":1827,"dë":2976,"hm":1740,"id":1902,"ic":1580,"ib":803,"ia":5265,"ih":920,"ig":1500,"if":611,"ie":1546,"hy":320,"k ":3634,"iq":788,"ir":3308,"is":14282,"it":23622,"iu":865,"iv":2165,"ij":2183,"eç":454,"ik":8974,"il":7041,"im":10832,"in":18975,"io":3678,"ip":5685,"je":13383,"ji":4180,"iz":2757,"l ":3714,"ja":7433,"xh":647,"të":37539,"z ":666,"së":6850,"y ":1757,"wa":196,"vl":237,"vj":627,"vi":5977,"vr":700,"rë":7665,"vo":819,"uz":609,"uv":188,"ve":12217,"vd":565,"va":1908,"x ":256,"ui":282,"uj":950,"uk":1866,"ul":3717,"ue":2301,"uf":993,"ug":1106,"uh":1399,"uq":312,"ur":9836,"us":3067,"ut":2692,"um":2889,"un":3916,"që":4381,"up":1314,"ty":1124,"tu":7405,"tt":246,"ub":1079,"ua":6884,"ud":1227,"uc":369,"w ":197,"pë":9674,"to":7414,"tm":513,"ts":310,"tr":5518,"te":16922,"tj":1903,"ti":17731,"th":4122,"v ":372,"tb":272,"ta":11255,"su":1269,"sv":254,"ss":508,"st":7650,"sy":189,"sl":488,"sk":843,"sn":215,"sm":700,"sp":784,"so":3394,"sq":194,"sc":195,"se":5448,"sh":35309,"sj":384,"si":11685,"u ":4144,"sa":3279,"rr":4311,"rs":2938,"rt":4337,"ru":3266,"rv":415,"ry":2015,"rq":210,"rp":488,"ro":7859,"në":24491,"rn":818,"rm":3136,"rl":428,"rk":1734,"rj":983,"ri":20960,"rh":205,"rg":2062,"rf":941,"re":16088,"rd":2048,"rc":625,"rb":1536,"ra":14927,"t ":30601,"qy":921,"qu":769,"më":7228,"qj":229,"qi":4293,"qe":2941,"qa":297,"s ":14264,"pt":2183,"pu":2168,"pr":4557,"ps":322,"zë":930,"zg":391,"zh":1019,"zi":2576,"zb":204,"ze":1031,"za":1391,"zy":222,"zu":773,"zo":1184,"vë":1622,"zj":202,"zm":366,"ye":1323,"yt":1297,"ys":927,"yr":1681,"yp":187,"yn":208,"ym":190,"yl":202,"Art":179,"Aut":217,"Bas":348,"Ai ":287,"Ame":229,"Ber":210,"Bot":188,"Ës":295,"Çm":287,"アアア":185,"ër":16534,"çë":180,"ëp":625,"ëm":1771,"ën":7250,"ël":747,"ëz":648,"ëv":1509,"ës":19082,"ët":3696,"ëh":461,"ëj":261,"ëd":282,"ë ":85317,"çm":262,"çi":247,"çe":269,"ça":442,"ç ":230,"Nob":257,"Per":268,"Pas":194,"Par":336,"Pro":243,"Pri":459,"Pre":232,"Ish":253,"Ita":196,"ア":259,"Jug":202,"Ka ":178,"Kal":215,"Kar":206,"Kjo":277,"Kon":248,"Kom":428,"Kos":1014,"Kor":248,"Ky ":207,"Lig":189,"Mal":292,"Mar":396,"Maq":235,"Mad":210,"Min":180,"Ësh":295,"çmi":226,"ëhe":440,"ëm ":968,"ël ":264,"ëll":356,"ën ":3829,"ënt":362,"ëng":554,"ënd":1225,"ëna":195,"ëmi":309,"ëmb":216,"ëdh":247,"Sta":228,"Shk":524,"Shq":1268,"Sht":446,"Ser":197,"Rep":364,"Në ":708,"Uni":285,"The":241,"Tir":480,"Për":406,"çan":218,"bje":222,"bis":202,"bim":300,"bin":241,"ble":244,"bli":768,"bol":404,"bot":888,"be ":208,"ban":768,"baj":338,"baz":296,"bas":738,"bar":627,"bi ":668,"ber":314,"bel":434,"bet":408,"ca ":491,"cav":177,"cak":266,"ce ":197,"bri":501,"bra":276,"bre":593,"bur":304,"bul":254,"aka":205,"am ":292,"ake":288,"aki":184,"afë":206,"ajo":282,"ajt":819,"al ":1170,"aja":239,"ak ":563,"ahi":215,"aj ":1938,"agj":197,"ago":189,"ano":603,"ant":917,"ans":360,"ane":1253,"ang":643,"ani":2814,"anj":226,"ana":956,"anc":511,"and":1227,"amu":220,"amp":346,"ami":877,"ame":814,"amb":181,"ama":345,"alt":205,"alo":479,"all":1341,"ali":1967,"ale":1756,"ala":461,"an ":2721,"aks":181,"aku":485,"akt":1546,"ako":591,"aba":207,"abe":271,"aft":183,"afi":534,"ai ":361,"adm":223,"adh":1978,"adi":505,"ade":333,"aci":658,"ada":241,"azi":334,"aze":285,"azh":332,"atë":1708,"azë":204,"at ":2869,"arg":323,"are":2127,"ard":768,"ara":2483,"aro":307,"anë":3784,"arm":193,"arl":201,"ark":631,"arj":231,"ari":2009,"aru":212,"arr":782,"ars":590,"art":1488,"asa":351,"asi":536,"ash":2587,"ar ":6112,"apa":246,"alë":446,"apo":1058,"as ":2094,"aqe":844,"aqi":214,"aqj":186,"amë":284,"ava":276,"aut":366,"arë":1850,"avi":245,"ave":2116,"ata":707,"asu":211,"ast":1173,"atr":379,"ato":1090,"apë":205,"ate":982,"ati":2851,"atu":362,"aty":276,"アア":222,"jed":481,"jeo":227,"jer":1714,"jek":498,"jel":290,"jen":1698,"jes":2300,"jet":2586,"jev":418,"ji ":371,"jat":998,"jas":292,"jar":261,"jal":704,"jak":272,"jan":2402,"je ":2467,"joh":964,"jon":213,"fër":208,"jit":1409,"jis":586,"jim":228,"jin":428,"jik":319,"jih":193,"jo ":859,"ito":557,"ipë":1386,"itu":2464,"iud":180,"iso":211,"ist":3511,"iv ":257,"ita":1245,"ite":1604,"ith":1252,"iti":3696,"itj":291,"irë":662,"iut":194,"iva":248,"ivi":385,"ive":1169,"ilë":642,"ipt":1558,"ipi":226,"is ":537,"ion":2522,"ikë":1269,"ior":219,"ipa":1258,"ipe":572,"ir ":232,"inë":1756,"iro":194,"iri":583,"isi":327,"ish":5381,"ise":279,"isa":571,"iu ":213,"imë":180,"ire":227,"ira":796,"it ":11180,"ja ":2116,"itë":896,"isë":2854,"izu":355,"izo":367,"izm":294,"izi":684,"iza":492,"kim":794,"kin":237,"kip":221,"kis":422,"km ":346,"ken":688,"ket":291,"ke ":3158,"kra":414,"kre":377,"kt ":208,"kry":877,"ku ":990,"kro":261,"kru":297,"kri":1336,"kov":192,"gët":303,"kos":230,"kor":588,"kon":2239,"kom":1483,"gël":243,"kol":575,"koh":623,"kod":386,"gë ":240,"ko ":239,"kla":195,"jtë":442,"jut":199,"jtj":180,"jtu":499,"jua":282,"juh":823,"jug":449,"jta":181,"jti":303,"jnë":775,"jt ":307,"kat":718,"kar":404,"kas":189,"kan":1846,"kal":946,"kam":249,"kak":195,"ka ":2506,"jyr":194," Ga":321," Fo":338," Fr":441," Fi":583," Fl":194," Fj":187," Ha":528," He":302," Go":206," Gr":579," Gu":204," Gj":1228," Ho":278,"ha ":1068," Hi":261," Je":182," Ja":384," Is":478," It":205," In":442,"ham":267,"han":292,"hap":354," Ka":1328," Ki":237,"har":380," Kj":278,"has":365,"hat":909," Jo":291," Ju":322," La":426," Le":440," Li":748," Ko":2406," Kr":523," Ku":622," Ky":226," Ma":1780," Mi":739," Me":757,"he ":11876," Lo":267," Lu":464," Nd":206," Ne":390," Na":320," Nj":246," Ni":229," Mo":474," Mu":395,"hek":391,"hel":179,"hej":224,"het":2896,"hes":233,"her":715,"heq":210,"hen":1147,"hem":522,"hfa":178,"hi ":531," Am":329," An":631," Al":605," Ai":327," Aj":202," Af":185," Ba":1069," Au":417," At":239," Ar":624," Be":668," Bi":243,"hin":747,"him":565,"hil":181," Bo":497," Br":498," Bu":397,"his":436,"hit":1517,"hir":335," Ca":327,"hje":1227," Ch":191," Co":245,"hka":488," Da":377," Di":443,"hke":445," Dh":185," De":621,"hki":188," Dr":303,"hkr":988," Do":261,"hko":663,"hku":970," Du":229," El":231," Ev":475," Fe":366,"dë ":206," Fa":359,"cë ":308," Xh":218,"gli":277," Wi":198,"gjë":270,"gon":186,"cës":254,"gos":180,"gor":539," a ":180,"gru":420,"gra":1045,"gri":269,"gre":682," Kë":300," Po":615," Pi":181,"gul":267," Pe":612," Pa":1054," No":435," Ra":364," Në":821," Ro":384," Re":809," Ri":264," Pr":996,"gur":313,"gus":310," Pu":185," Qe":273," Su":263," St":520," Ta":356," Th":340," Ti":608," Te":634," Tr":311," To":373," Pë":415," Rr":230," Ru":266," Sa":578," Sh":3248," Si":519," Se":536," So":373," Sp":261," Sk":217," Va":291," Ve":411," Vi":293," Tu":194," Un":311," ja":2600,"ial":691,"ian":1176," je":756," in":1156," is":1788," it":245," ka":3973,"ibr":249," ki":698," ke":222," jo":218," ju":582," ha":507," he":538," gj":3833," gr":1301,"ia ":2790," gu":290," hi":627," dë":296," ho":209," hu":257," nj":8261," ni":252," ng":5742," nd":3078,"iel":198," ne":1135,"ien":214," na":398,"ier":413," mu":1092," mo":751," of":214,"ifi":230," nu":833," no":407," le":585," li":3581," la":1142," ku":2373,"ici":344,"ich":192," ky":232," km":370,"ie ":357," kl":275,"ica":291," kr":2025," ko":4104," me":6491," mi":1023," mj":371," ma":2979," mb":1927," lu":944,"idi":290,"idh":520,"ide":545," ll":324," lo":338," af":314," ad":323," am":508," an":1204," ap":925," ai":237," aj":220," ak":811," al":253," au":397," ar":1194," at":742," as":478," ba":1617,"il ":230,"ija":200," bi":487,"ije":259," be":279," bo":1013," bu":403,"iju":269," br":668," ca":214," e ":17735,"im ":2164,"eça":298,"ika":1647,"iga":202,"igj":563,"igu":244,"icë":252,"ij ":910,"ihe":650,"ik ":1689,"imo":407," et":343," es":291," en":262," em":840," el":514,"ime":1522," ek":723,"imi":5114,"ip ":230," fe":444,"ind":2986,"ina":1004," fa":908,"imt":634," fu":1154," fs":638,"ino":317,"ijë":223," fr":355,"int":466," fo":1164," bë":626,"ins":260,"inf":213,"ine":1441," fl":312,"ing":422," fj":334," fi":2335,"inj":211,"ini":1146,"iq ":474," ga":620,"inu":257," i ":9701,"iko":537,"iki":386,"ike":2366," ci":2530,"ila":1202," da":481,"in ":7905,"ikt":220,"iku":640," do":799,"ilo":291,"ill":1867," dr":678,"ilm":536," de":1917,"ilj":319,"ili":1491," di":2033," dh":10408,"ile":229,"ima":293," ed":1307,"io ":303," du":817," dy":651,"hpr":194," vë":300," zo":209,"hpe":278," zy":211," za":245,"dës":713," zb":179,"hkë":454," zh":535,"hoq":360,"hor":539,"dër":1831," zg":360," të":19747,"hme":1293,"hul":218,"hua":438,"hty":181,"htu":748,"htr":565,"htm":182,"hti":611,"htj":309,"hte":3639,"hta":810,"hsh":200,"hro":441,"ht ":2187,"hqi":3061," ru":213," rr":1833," u ":1417," sa":775," se":1591," si":4496," sh":10305," sk":290," sp":362," so":432," qu":416," qy":665," t ":287," ra":1563," re":1537," ri":358,"htë":9010," në":16417," ro":441," pu":708," pr":3527," qe":1420," më":4730," os":1270,"hum":1047," kë":1907," or":779,"hur":1522," pe":1738," pa":4231,"hvi":461," pl":373," lë":638," po":2196," pi":398," pj":1282," së":2396," va":567," ve":3244," vd":524," vo":419," rë":271," vi":3777," vj":545," vl":199," ty":371," tu":272," us":358," që":3732," uj":303," ta":476," st":879," su":385," tr":1582," to":411," pë":7156," th":1109," ti":1065," tj":633," te":3252,"fes":283,"fer":357,"faq":704,"fam":375,"fak":216,"ezë":244,"ez ":343,"etë":2083,"ezu":183,"eza":184,"ezo":208,"eze":303,"ezi":227,"eta":1185,"ete":1425,"etj":331,"eti":2581,"eth":892,"eso":355,"est":534,"esv":178,"eto":1111,"etr":567,"etu":501,"eve":3023,"erë":1553,"evi":375,"esë":1317,"epe":182,"er ":1621,"eor":217,"ekë":226,"eqi":478,"es ":2822,"epu":459,"elë":206,"epr":500,"eri":4116,"erg":266,"ere":548,"era":1561,"erb":418,"et ":8412,"emë":345,"esm":180,"esh":1754,"esi":1453,"ese":746,"eu ":239,"esa":635,"err":624,"ert":290,"ers":1252,"ern":432,"erm":871,"enë":512,"ero":390,"eki":248,"ekn":209,"eko":404,"egë":220,"eks":618,"ekt":1016,"eku":676,"en ":3312,"ela":426,"ele":1238,"eli":682,"ell":702,"elu":266,"emb":225,"ema":440,"eme":707,"emo":232,"emi":771,"emr":507,"ene":497,"ena":430,"end":3296,"enc":779,"eno":271,"eni":647,"enj":342,"ens":203,"ent":2477,"eog":217,"egj":763,"ego":445,"ej ":1063,"egu":355,"ek ":360,"eja":234,"el ":689,"ejt":732,"eka":189,"em ":322,"gju":704,"gjy":338,"gje":2061,"gji":3096,"gja":1196,"gim":207,"gaz":249,"gar":602,"gat":279,"gan":669,"ga ":5443,"ftë":453,"fus":270,"fut":207,"fun":698,"fra":187,"fri":223,"fsh":1041,"bër":688,"for":1268,"bët":491,"bëh":244,"fil":1245,"fik":505,"fin":217,"fis":186,"fit":614,"fiz":441,"fja":339,"da ":488," Çm":287,"de ":479,"dal":409,"dat":201,"dar":567,"dan":203," Ës":295,"ces":191,"ci ":188,"cia":487,"cil":2535,"cio":782,"ean":206,"eat":241,"ega":205,"edh":1894,"edi":471,"ede":284,"eda":183,"edo":295,"eci":308," çm":240,"dyt":203,"dy ":376,"dur":609," ës":7645,"dor":1271,"don":593,"dom":180,"dok":214,"dos":301,"dmi":238,"dod":448,"duk":778,"dua":191,"dri":384,"dra":292,"dre":608,"dry":602,"dro":361,"dha":308,"dhu":653,"dia":304,"dhj":875,"dhi":1463,"dhe":12079,"der":1468,"des":314,"det":819,"dh ":431,"deg":199,"del":210,"dek":243,"den":513,"dem":246,"di ":1814,"do ":585,"dhë":1093,"dje":856,"dim":1054,"din":518,"dio":362,"diq":424,"dis":945,"dit":1503,"dik":388,"rga":634,"ri ":4362,"rgj":700,"ret":2035,"res":1012,"rev":322,"rez":402,"rfa":431,"rbë":484,"rfs":234,"rg ":266,"rea":282,"rej":1398,"reg":1272,"reh":210,"rem":230,"ren":1060,"rek":369,"req":525,"rdo":695,"rdi":283,"rdh":548,"re ":5892,"rca":229,"rd ":234,"rap":224,"raq":331,"ras":547,"rat":1584,"rav":877,"rbi":290,"rba":177,"rbe":317,"raj":258,"rah":340,"ran":2020,"ram":546,"ral":554,"rak":530,"rab":220,"raf":580,"rad":1584,"rs ":444,"ror":1129,"ros":215,"nës":866,"nët":322,"rot":314,"rom":357,"ron":1148,"nën":960,"rop":794,"rov":207,"rod":677,"roc":243,"roj":419,"roi":191,"rol":305,"rok":233,"rof":239,"roh":382,"rog":307,"rne":248,"rmo":182,"rmu":349,"ro ":340,"në ":21951,"rmb":202,"rma":1098,"rme":349,"rmi":261,"rku":392,"rko":294,"rki":195,"rke":238,"rja":228,"rje":742,"riz":365,"rip":239,"rio":403,"rit":2857,"ris":2678,"riv":179,"riu":462,"rih":296,"rig":343,"rij":533,"ril":452,"rik":1578,"rin":1760,"rim":1881,"ria":1506,"rie":292,"rk ":231,"rtë":586,"rye":967,"rue":227,"rur":358,"rup":598,"rus":297,"rve":228,"rrë":463,"rsi":760,"rso":384,"rsa":288,"rsh":704,"rse":194,"rta":523,"rto":212,"rte":551,"rth":191,"rti":1091,"rua":870,"rtu":307,"rt ":743,"rmë":369,"rri":1024,"rrj":335,"rre":1446,"rra":326,"rru":264,"saj":699,"san":214,"sat":207,"sa ":1426,"rys":615,"sha":1190,"shm":1425,"sho":753,"shp":846,"shq":1997,"shr":422,"sht":17551,"she":1082,"shf":232,"shi":1551,"shk":3709,"si ":3797,"sje":269,"sid":277,"sia":412,"shu":1255,"sit":1207,"sir":214,"sis":1576,"sip":1484,"sin":587,"sio":712,"sim":613,"sik":262,"se ":3292,"ser":504,"set":189,"sh ":1239,"sen":315,"spo":232,"spe":251,"sot":202,"sov":1143,"son":618,"sor":814,"st ":714,"shë":1713,"sla":211,"ske":209,"sma":252,"sme":323,"stë":596,"sse":192,"ste":1041,"sta":1312,"sto":730,"sti":1254,"stu":543,"str":1367,"sua":229,"sue":178,"sur":369,"sve":238,"taj":547,"tal":816,"tav":260,"tat":899,"tas":209,"tar":4736,"tan":685,"te ":5516,"tbo":238,"ta ":2326,"pa ":427,"pe ":427,"par":1922,"pat":286,"pas":2622,"pak":267,"pan":389,"pi ":351,"per":1941,"pet":184,"pes":328,"pla":231,"plo":258,"pje":1425,"pju":181,"pia":517,"pik":379,"pin":244,"pio":290,"pit":346,"poz":244,"lër":206,"por":810,"pop":489,"lëv":248,"lët":262,"lës":678,"pos":377,"lën":745,"pon":243,"pol":674,"lë ":644,"po ":983,"pta":1447,"pub":598,"pti":317,"pto":261,"pra":544,"pri":797,"pre":1386,"pro":1746,"pun":526,"pul":588,"qar":179,"qe ":553,"qet":282,"qev":185,"qer":357,"qen":825,"qed":256,"qi ":290,"qit":232,"qip":2962,"qis":408,"më ":5423,"mës":389,"mër":628,"mën":311,"qua":358,"quh":192,"qyt":799,"ra ":3887,"ncë":386,"ngj":549,"ngl":339,"ngu":279,"ngr":213,"ni ":1941,"nge":207,"nga":5420,"ndë":2245,"nen":1014,"ner":340,"net":653,"nes":253,"nev":297,"ng ":438,"nez":219,"nci":280,"nce":340,"nca":359,"ne ":2694,"ndu":806,"ndr":1255,"ndo":1605,"ndj":868,"ndi":3173,"nde":1242,"nda":1141,"nal":768,"nar":589,"nd ":1287,"nav":195,"nat":905,"nas":219,"na ":1164,"ntë":218,"nxh":177,"nuk":387,"num":496,"nua":315,"nto":514,"ntr":261,"nti":1877,"nta":460,"nte":1912,"nst":368,"nsi":197,"nt ":562,"nom":633,"non":199,"jës":1000,"jër":345,"nor":452,"nov":244,"një":7204,"jë ":6765,"no ":251,"ngë":383,"nji":251,"nje":980,"nja":288,"njo":1016,"nie":218,"nic":209,"nia":391,"niz":506,"niv":445,"nis":1563,"nit":1162,"nim":624,"nin":724,"nik":917,"ogr":753,"ogj":624,"oi ":722,"ohu":860,"ohe":1613,"oj ":252,"ojn":599,"oje":249,"oja":193,"ol ":185,"oci":316,"odh":931,"ode":218,"odi":416,"of ":206,"odu":227,"ofe":213,"obe":363,"otë":804,"ovë":1008,"ozi":268,"otu":316,"oti":443,"ote":245,"oto":318,"opë":263,"ost":299,"ota":201,"osh":522,"ose":1363,"oso":1152,"ovi":292,"orë":1117,"ova":484,"ove":273,"oqë":260,"opo":219,"opi":438,"ope":206,"os ":591,"opu":565,"kën":763,"okë":307,"or ":3887,"kët":1025,"kës":1506,"kër":189,"orm":1202,"onë":412,"orr":382,"orc":230,"ord":255,"ore":3570,"org":551,"ori":2679,"ort":629,"oru":235,"ot ":268,"ora":318,"ola":251,"on ":2800,"oli":1087,"oll":1071,"ole":276,"olo":874,"ohë":394,"oka":403,"ogë":224,"oku":205,"ona":1138,"ond":187,"one":812,"onj":684,"oni":2539,"ojë":384,"ono":475,"ons":340,"ont":1418,"oma":638,"kë ":735,"ome":460,"omb":682,"omi":782,"omp":538,"omo":183,"omu":542,"la ":1588,"le ":1504,"lan":809,"lam":463,"lar":790,"lat":987,"las":448,"lav":342,"lba":191,"kut":329,"kus":201,"kur":1434,"kup":473,"kun":397,"kul":958,"ky ":230,"kth":196,"kte":440,"ksi":671,"kuf":336,"kua":615,"ktr":411,"ktu":532,"kti":608,"kto":809,"llë":931,"lon":395,"hën":1076,"hëm":639,"lor":960,"hër":431,"log":828,"loj":558,"loi":197,"hës":946,"lot":202,"lmi":262,"ltu":283,"lua":941,"luf":367,"lue":177,"lsi":217,"li ":2003,"lez":179,"lev":346,"les":310,"let":699,"ler":367,"lem":300,"len":674,"lek":623,"lls":213,"llu":548,"hë ":1014,"lla":1048,"lle":383,"lli":2212,"llo":1314,"lm ":208,"lje":416,"ll ":881,"lja":178,"lit":1752,"lis":1010,"lir":324,"lio":179,"lin":3413,"lim":1223,"liz":412,"lic":188,"lid":353,"lia":455,"lib":286,"lik":872,"lig":270,"ma ":556,"maj":406,"mak":224,"mad":655,"mar":852,"mas":295,"mal":684,"man":1367,"mat":1185,"mba":779,"mbl":257,"mbi":849,"mbe":401,"mbr":294,"me ":7511,"mbu":225,"med":203,"met":1333,"mev":393,"mes":776,"mer":1044,"mel":355,"men":1333,"mbë":676,"lum":373,"mpj":180,"mpi":353,"ëtë":434,"mpo":182,"mon":405,"mor":732,"mos":301,"mri":623,"mra":263,"mua":310,"mta":664,"mur":234,"mul":232,"mun":1237,"muz":308,"ës ":6600,"ëpi":229,"ër ":5836,"mi ":2352,"ëse":300,"ësa":205,"ërt":763,"ërs":761,"ërp":333,"ënë":345,"ëro":630,"ërr":212,"mje":546,"ërk":679,"ërm":698,"ërg":545,"ëri":1847,"min":2761,"ërf":795,"mil":488,"mim":764,"ërd":700,"mir":472,"ëra":457,"mis":535,"ërb":763,"ët ":1589,"mit":2427,"mik":541,"mij":213,"ëti":364,"ëto":188,"ëpë":198,"ëta":971,"ësh":8836,"ësi":2416,"ëso":446,"ësu":202,"ërë":456,"ëve":1277,"zua":506,"zyr":215,"zgj":372,"zi ":247,"zet":248,"ze ":310,"zan":199,"zak":278,"zat":320,"vës":631,"zon":428,"zoh":205,"vë ":592,"zmi":236,"zhv":458,"zim":386,"zik":591,"zis":248,"zit":331,"yrë":392,"yte":900,"ysh":673,"yrt":226,"yra":227,"yre":469,"za ":311,"ytë":280,"yes":421,"yer":268,"yeq":202,"të ":32538,"tëp":239,"tër":1782,"tët":229,"tës":1610,"tëv":188,"tën":440,"tëm":288,"xha":211,"xhi":228,"Çmi":287,"së ":5936,"sëm":198,"sën":372,"vro":528,"veç":250,"vil":586,"vin":191,"vic":177,"viz":481,"vit":3504,"vis":321,"vje":622,"rë ":3679,"vog":268,"rën":1295,"rët":460,"rës":1368,"rëv":444,"rëz":253,"vep":456,"ver":1289,"vet":682,"ven":1374,"vel":341,"vdi":416,"ve ":7283,"val":324,"var":488,"va ":428,"uzi":400,"urë":434,"ush":1610,"usi":271,"ust":434,"uti":207,"ute":336,"utb":206,"uto":619,"us ":390,"umë":772,"ut ":820,"ura":1239,"ure":247,"urg":207,"uri":1038,"uro":455,"unë":434,"urr":247,"urt":615,"qër":295,"qës":313,"ur ":4520,"upi":325,"upt":364,"umr":331,"umi":458,"uma":183,"umb":309,"ume":412,"që ":3460,"uni":521,"ujë":192,"und":1633,"una":206,"up ":226,"uku":221,"ukt":232,"uke":519,"ult":579,"uhë":641,"ull":1873,"uli":199,"ula":307,"un ":316,"uk ":422,"ujt":177,"ugo":283,"uft":506,"uhe":480,"uha":223,"udh":352,"udi":562,"ues":1871,"ufi":398,"ug ":248,"ua ":660,"uar":4550,"ual":287,"uan":427,"ubl":681,"uaj":694,"tyr":784,"tur":3146,"tul":217,"tua":1367,"tud":541,"tue":838,"tre":747,"tra":1541,"tri":1729,"tru":612,"tro":732,"tu ":717,"tme":342,"to ":593,"pë ":222,"toj":273,"toh":424,"pës":324,"tom":222,"ton":869,"tok":323,"tol":198,"për":8954,"tor":3712,"tij":916,"til":200,"tik":1967,"tit":3342,"tis":395,"tin":4094,"tim":1479,"tio":325,"thu":272,"tia":238,"tiv":962,"tje":1286,"tja":349,"thë":407,"tem":772,"ten":475,"tek":589,"tel":426,"th ":586,"tev":1136,"tet":4737,"tes":527,"ter":1775,"ti ":2991,"tho":197,"ths":192,"the":828,"thi":467,"tj ":257,"tha":760,"BA ":261,"zë ":417},"n_words":[1760559,2076420,1518161],"name":"sq"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/sv b/nlp_resource_data/langdetect/profiles/sv

new file mode 100755 (executable)

index 0000000..a648588
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/sv
@@ -0,0 +1 @@
+{"freq":{"D":81614,"E":57703,"F":65307,"G":58568,"A":101184,"B":88354,"C":71198,"L":68342,"M":88836,"N":55651,"O":30870,"H":85820,"I":53417,"J":41709,"K":67803,"U":32439,"T":73345,"W":29206,"V":48264,"P":64540,"S":183432,"R":54519,"Y":8819,"X":3563,"Z":5647,"f":573497,"g":796472,"d":1393951,"e":3122256,"b":426497,"c":439610,"a":2769748,"n":2605268,"o":1513455,"l":1576016,"m":1015154,"j":171593,"k":981769,"h":549646,"i":1975038,"w":38244,"v":702277,"u":641405,"t":2130744,"s":2008119,"r":2700083,"q":5367,"p":544314,"z":28076,"y":232177,"x":48501,"Å":5245,"Ö":9894,"é":11827,"å":315734,"ä":562602,"ü":3814,"ö":429328," l":92862," m":210837," n":78082," o":280066," h":123124," i":363557," j":42422," k":154387," d":271388," e":324754," f":359134," g":79790," a":307677," b":150256," c":22076," y":5622," u":95544," t":189989," v":181674," p":161981," s":494735," r":78305," J":40952," K":63008," H":83187," I":43524," N":52283," O":27251," L":64682," M":82331," B":82669," C":63320," A":84049," F":59275," G":56022," D":77148," E":54653," Z":5389," Y":8451,"и":3560,"о":3458," S":163518," R":51071," P":59385,"а":4143," W":28309," V":40400," U":30253," T":68207," å":25155," ä":214016," ö":25267," Å":5217," Ö":9837,"A ":15096,"F ":4011,"Da":13315,"Cl":3705,"Co":16319,"Ce":3786,"Ch":12336,"Ed":3462,"Do":5286,"De":40359,"Di":6082,"Fe":3321,"Fa":6453,"Eu":5433,"Er":5959,"En":12408,"El":5544,"Ge":7613,"Ga":6615,"I ":11291,"Fr":15828,"Fo":6429,"Fl":4338,"Fi":8854,"B ":3976,"C ":5688,"Au":4696,"Ar":9978,"As":4625,"D ":3210,"Ba":14125,"Ad":3948,"Am":3905,"An":15253,"Al":15279,"Bu":5535,"Br":13255,"Ca":15038,"Bi":5634,"Be":16801,"Bo":13961,"Bl":4691,"Ku":4607,"Gö":6526,"Kr":7039,"Ko":8264,"Le":10243,"Li":14030,"La":13793,"Lu":7481,"Lo":9958,"Me":11631,"Mi":11197,"Ma":32867,"Mu":4647,"Mo":11964,"Ni":7405,"Ne":10761,"Na":8237,"P ":3714,"Ny":3435,"No":15326,"Ol":4960,"Gr":10737,"Go":5035,"Gu":8716,"Ha":34281,"He":15981,"II":4483,"Hi":4225,"Ho":12929,"Hu":4943,"K ":4256,"In":13186,"Is":3759,"Ja":10130,"Je":5266,"Jo":15004,"Ju":4068,"Ka":19254,"Fö":6773,"M ":4536,"Ki":6491,"Ke":4271,"Up":4420,"Un":6136,"Ty":4282,"Tu":3610,"US":9839,"Tr":7442,"To":8743,"Th":13440,"Ti":6372,"Te":8278,"Ta":5863,"V ":6654,"Sy":4319,"St":38584,"Sv":20759,"TV":4696,"Su":6305,"Wo":3225,"Wi":9262,"Wa":7328,"We":4981,"Vi":10913,"Va":8115,"Ve":4995,"Pr":8839,"S ":6544,"Pe":11351,"Pa":14568,"Po":7593,"Pi":4124,"Or":4779,"Se":8850,"Sc":5830,"Si":8063,"Sh":4489,"Sk":9057,"Sp":7173,"So":11179,"Ru":3598,"Sa":15777,"Re":10230,"Ri":6924,"Ro":13031,"SA":10320,"Ra":8269,"b ":9647,"a ":475159,"Yo":5029,"Sö":3933,"Vä":9609,"bö":8648,"i ":355245,"fy":6327,"gd":12439,"ge":171880,"gf":3700,"ga":96208,"gb":3970,"fj":3473,"fl":21495,"ff":12688,"bå":4938,"fi":46296,"bä":6295,"fr":73976,"fu":7874,"ft":39725,"fo":44582,"j ":14814,"gy":3273,"he":67749,"ha":96551,"gn":17191,"gl":17131,"gj":3942,"gi":45303,"gh":13879,"gg":27749,"gu":21320,"gt":19887,"gs":51929,"gr":63119,"go":20646,"dt":3864,"du":15217,"dv":9506,"dy":5840,"g ":162227,"ea":25581,"eb":31218,"ec":33642,"ed":130488,"de":463243,"dd":77303,"dg":4171,"df":3528,"di":70271,"dh":3483,"dk":3943,"dj":7587,"dm":5988,"dl":16308,"do":35107,"dn":14977,"ds":66886,"dr":50264,"ew":8421,"ex":20287,"eu":8550,"ev":26002,"ey":12032,"fa":41395,"h ":210556,"fe":29350,"eh":9823,"eg":38958,"ef":28657,"ee":11526,"el":240548,"ek":41076,"ej":4317,"ei":17866,"ep":29857,"eo":14539,"en":790036,"em":80183,"et":324009,"es":162582,"er":632238,"ca":14258,"e ":392431,"by":19363,"br":42376,"bu":29326,"bo":47200,"bl":35898,"bi":33086,"bb":11237,"be":121628,"db":6093,"da":116179,"f ":20176,"cu":4292,"ct":5938,"co":13763,"ck":84782,"ci":26831,"ch":224513,"ce":44300,"c ":6962,"az":4871,"ay":9160,"ba":47465,"d ":342945,"at":219537,"as":105952,"ar":493878,"ax":4752,"aw":3514,"av":157525,"au":25162,"ak":38031,"al":200396,"ai":14768,"aj":13462,"ap":42925,"am":154793,"an":490452,"ac":25353,"ad":157516,"aa":3218,"ab":17368,"ag":71089,"ah":8954,"ae":7341,"af":19685,"nu":26246,"nt":115132,"ns":230163,"nr":8077,"no":73752,"nn":76927,"jö":13694,"ny":12036,"nv":25830,"oe":5372,"of":28837,"oc":231970,"od":38865,"oa":7955,"ob":24414,"om":258987,"on":206933,"ok":32662,"ol":124858,"oi":5452,"kå":10126,"oj":3321,"og":35630,"kä":17405,"oh":11968,"ot":63544,"os":44373,"ov":30611,"ou":23986,"op":33718,"oo":9810,"or":224835,"r ":824959,"ow":7736,"kö":11089,"pe":96760,"pg":3698,"pa":50445,"pl":23211,"pn":3697,"po":41753,"ph":7115,"lä":54929,"pi":19978,"lå":14272,"lo":57125,"ln":12525,"lm":43916,"ll":251645,"ls":70931,"lp":6851,"lv":21024,"lu":27907,"lt":40778,"hö":19038,"ly":18029,"o ":47987,"md":3318,"ma":128458,"mb":42798,"mg":3435,"mh":5236,"me":195253,"mf":8701,"mk":4460,"ml":20883,"mi":57579,"mn":28583,"mm":63704,"mp":23791,"mo":40002,"mr":10561,"mt":14449,"ms":22455,"mu":35770,"my":8276,"p ":30112,"na":190258,"nb":8963,"nc":14129,"nd":274187,"ne":121340,"nf":12519,"ng":233628,"jä":20365,"nh":9404,"ni":141019,"nj":5464,"nk":26276,"nl":21248,"nm":4189,"ju":37966,"jo":14819,"gå":19638,"ki":37850,"kh":15721,"ke":98699,"ka":235170,"m ":249455,"fö":204508,"ky":14497,"gö":7789,"ks":31609,"kt":90094,"ku":21062,"kv":10463,"ko":98732,"kr":52042,"kl":32687,"km":8676,"kn":24158,"li":194145,"hå":8698,"lh":8848,"hä":12770,"lk":26056,"lj":22122,"le":171301,"ld":48550,"lg":7505,"lf":10995,"la":251597,"lb":23488,"n ":932062,"hr":6279,"ht":5130,"hu":27638,"hj":3586,"dä":13148,"då":7895,"hi":28029,"hn":5105,"ho":40609,"hl":4048,"id":81847,"ic":45251,"ib":12524,"ia":49113,"ig":122539,"if":22472,"ie":80770,"dö":42849,"hy":3755,"k ":153527,"ir":29485,"is":203197,"it":107910,"iu":7188,"iv":58134,"ix":3284,"ii":3814,"ik":122112,"il":185111,"im":21187,"in":352330,"io":82024,"ip":11363,"je":24984,"få":5419,"fä":10915,"iz":3965,"l ":174554,"ja":31220,"tä":25279,"xi":4781,"tå":13823,"xt":11670,"sö":12206,"z ":7095,"xa":4078,"xe":8556,"sä":22008,"wi":3916,"så":23262,"rö":18726,"y ":50286,"wa":8836,"we":6979,"vl":8639,"rä":38698,"rå":61767,"vi":89468,"vt":4439,"vu":11778,"vr":3344,"vs":16546,"vn":4095,"vo":9064,"uv":14989,"ve":165060,"vd":3502,"va":154942,"x ":11441,"ui":6645,"uk":16033,"ul":41627,"ue":11748,"ug":17372,"ur":68778,"us":77613,"ut":71499,"um":42001,"un":129135,"up":38948,"ty":33055,"tu":36176,"tt":213949,"tv":19150,"ub":16763,"ua":23575,"ud":29487,"uc":9950,"w ":8904,"to":108377,"tn":19214,"tm":5831,"tl":17707,"ts":67257,"tr":105466,"tg":11122,"tf":8472,"te":298793,"tk":5397,"tj":8416,"ti":267169,"på":66737,"th":27907,"v ":145502,"tb":14691,"ta":250228,"su":13835,"sv":52306,"ss":78482,"st":354837,"sy":19796,"sl":47405,"sk":296865,"sn":10814,"sm":23482,"sp":69363,"so":182040,"sr":7516,"sd":11605,"sc":12968,"sf":11899,"se":125434,"sh":21233,"sg":4215,"sj":15800,"si":100666,"nö":3871,"u ":12792,"sa":104819,"sb":15311,"rr":35803,"rs":138379,"rt":88500,"ru":68780,"rv":15260,"ry":24021,"rp":8643,"ro":113836,"rn":86552,"rm":33875,"rl":42446,"rk":70077,"rj":9549,"ri":267243,"nå":6703,"nä":22292,"rh":9656,"rg":57899,"rf":20073,"re":276377,"rd":70096,"rc":6855,"rb":28710,"ra":275879,"t ":541739,"mö":6674,"qu":3576,"må":14078,"mä":17850,"s ":365755,"lö":8782,"pt":22670,"pu":12367,"pp":63825,"pr":69684,"ps":16974,"vä":52142,"zi":3697,"vå":20606,"za":3995,"yg":18262,"yf":3366,"yc":9359,"yd":14899,"ya":7829,"tö":15637,"yt":14189,"ys":34208,"yr":23125,"yp":6499,"yn":12472,"ym":10053,"yl":9018,"yk":5316,"å ":92466,"äc":6371,"Ös":5286,"ö ":8541,"én":3279,"åv":3406,"ån":81191,"åt":17902,"ås":6794,"år":43331,"åg":13520,"åe":3379,"ål":17121,"åk":8868,"åd":24022,"ät":25342,"äv":21618,"äx":8215,"äm":18322,"äl":34618,"än":82390,"äp":6744,"äs":34864,"är":272645,"äd":8120,"äg":24196,"äk":15670,"öv":17529,"öt":11908,"ör":194602,"ös":15048,"öp":9470,"ön":13721,"öl":7954,"öm":7511,"öj":4231,"ök":5353,"ög":10789,"öd":115283,"一":3570," Ga":6575," Ge":7551," I ":6565," Fo":6382," Fr":15807," Fi":8815," Fl":4322," Ha":34254," He":15951," Go":5001," Gr":10670," Gu":8675," Hu":4927," Ho":12906," Hi":4166," Je":5254," Ja":10108," Is":3740," In":13126," Fö":6755," Ka":19203," Ke":4195," Ki":6438," Jo":14976," Ju":4056," La":13680," Le":10167," Li":13966," Ko":8252," Kr":7023," Ku":4598," Gö":6518," Ma":32732," Mi":11127," Me":11596," Lo":9918," Lu":7453," Ne":10698," Na":8152," Ni":7391," Mo":11908," Mu":4615," Am":3890," An":15211," Al":15206," Ad":3934," Ba":14044," Au":4688," As":4526," Ar":9902," Be":16738," Bi":5612," Bl":4680," Bo":13888," Br":13201," Bu":5515," Ca":14547," Ce":3768," Ch":12294," Cl":3646," Co":16148," Da":13258," Di":6045," De":40274," Do":5127," Ed":3453," El":5523," Er":5935," En":12351," Eu":5425," Fe":3308," Fa":6390," Sö":3929," Wi":9211," We":4953," Wa":7296," Vä":9600," Yo":5022," Or":4762," Po":7530," Pi":4113," Pe":11321," Pa":14488," Ny":3423," No":15289," Ol":4952," Ra":8225," Ro":12950," Re":10191," Ri":6904," Pr":8806," Sy":4310," Sv":20662," TV":4398," Su":6295," St":38280," Ta":5840," Th":13403," Ti":6347," Te":8213," US":9725," Tr":7389," To":8642," Ru":3585," Sa":15732," Sh":4450," Si":8029," Sc":5776," Se":8798," So":11126," Sp":7111," Sk":9040," Va":8077," Ve":4954," Vi":10865," Tu":3571," Ty":4268," Un":6118," Up":4407," ja":13806," få":4608," fä":3840," in":71154," is":5201," ka":33444," fö":177054," gå":6683," ki":4333," ke":3269," jo":4366," ju":20755," ha":54753," he":14640," gi":5926," gr":27602," gu":3989," dö":38312," id":4924," dä":12563," då":6671," hi":6661," ho":7645," hu":13035," ne":4922," na":17184," my":5673," mu":11845," mo":16924," ok":9806," ol":7701," om":30114," kä":11926," oc":192923," of":14077," ny":4446," nu":5991," no":24830," le":9968," hä":8278," li":23241," la":18204," kv":5049," ku":9406," ky":7530," km":5693," kl":6892," kr":12841," ko":48113," me":100026," mi":15594," ma":40584," hö":10368," lo":3574," ad":5086," am":17761," an":46161," ap":9654," ak":3619," al":18398," av":131499," au":10184," ar":19850," at":35597," ba":19765," bi":15445," be":42377," bo":12025," bl":19693," by":8036," br":16393," e ":3326," et":57787," en":197679," el":30240," ef":10304," eg":6206," fe":13408," fa":16157," ex":7709," fu":4697," fr":64090," fo":25076," fl":15697," bå":3785," fi":26001," ge":16400," ga":6755," i ":270663," bö":6965," fy":5095," ce":5878," ci":5961," da":13590," do":7611," dr":6511," de":166176," di":13002," vä":22816," yt":3626," tä":6373," sö":6783," ru":4704," ry":5559," sa":28037," se":37040," sj":9848," si":31916," sm":3812," sl":17774," sk":41581," sp":27636," so":132908," mö":3254," ra":6370," re":30692," ri":11458," nå":5590," nä":11487," ro":9439," pu":3945," pr":32472," ps":3376," s ":6480," mä":6492," må":7724," or":14441," kö":4010," pe":10275," pa":12607," pl":6656," po":22401," lå":10106," lä":20032," rö":4188," så":14446," sä":12510," va":97104," ve":11661," rä":4287," vi":43226," ty":10671," tv":8751," tu":4474," ut":38370," ur":7073," up":19177," un":29257," ta":17457," sy":11768," st":66122," sv":36306," su":3592," tr":20781," to":9251," th":7205," på":64716," ti":85531," te":14430," Ös":5280," år":16994," åt":5457," än":4390," äl":3304," är":187240," äv":12511," ös":4997," öv":12751,"Fin":4121,"Eri":4081,"Eur":4760,"En ":5663,"Eng":3595,"Öst":5201,"Fra":6710,"Fre":3915,"Hel":4484,"Her":3363,"Han":20213,"Har":3270,"Gra":3275,"Ind":4121,"Hon":4948,"Alb":3593,"And":5658,"Car":7211,"Ber":6424,"De ":4186,"Det":11763,"Den":16516,"Dan":4412,"Cha":4718,"Cou":3952,"New":5646,"Nor":12355,"Per":3613,"Pet":3811,"Par":5960,"Pro":3506,"SA ":9788,"Joh":8659,"För":6368,"Kal":4090,"Kar":6529,"Göt":5300,"Lin":4919,"Man":3676,"Mal":3953,"Mar":11224,"Söd":3244,"Wil":4251,"Väs":5464,"Yor":3342,"Sve":19151,"Str":3346,"Sto":17434,"Sta":8398,"Ste":4189,"TV ":4653,"äga":3677,"äge":8468,"ägg":3687,"äck":6350,"änn":6181,"äns":11234,"ämn":7168,"äms":3895,"äng":10799,"änd":34964,"äpp":6344,"Sch":3849,"San":4701,"är ":211888,"älv":4078,"äll":14085,"äkt":7912,"äkn":3856,"äld":3913,"än ":11102,"äve":13254,"ävl":6172,"äxt":6157,"ärk":4071,"ärl":8853,"ärm":4235,"ära":8245,"ärd":6093,"äre":3225,"ärn":6279,"ärs":4830,"äst":24789,"ätt":18556,"åde":18255,"ågo":4233,"åna":9220,"ång":25948,"åll":6899,"ån ":40117,"åre":4622,"åt ":4155,"ård":7005,"år ":24938,"åte":4737,"ått":4014,"Upp":4062,"Tys":3259,"The":8834,"USA":9498,"bis":3915,"bil":16029,"bin":3462,"ble":7136,"bli":9670,"bla":13299,"bok":5630,"bol":12338,"bor":13470,"bbe":3314,"ban":15887,"bas":6749,"bar":10200,"beg":3230,"ber":59590,"ben":7083,"bel":10187,"bes":11143,"bet":17916,"ca ":4920,"ce ":7249,"bri":10671,"bro":7219,"bra":3701,"bre":3338,"bru":13260,"bur":4739,"bun":4581,"bum":12276,"by ":6960,"byg":8032,"aka":4483,"am ":15544,"ake":4072,"al ":27066,"ain":4963,"aj ":9347,"ags":6901,"agn":5084,"anv":11075,"anu":13080,"ano":4964,"ann":22378,"ant":22398,"ans":71523,"ane":7517,"anf":3613,"ang":11657,"ani":15707,"ank":9321,"anl":8379,"ap ":4055,"ana":13418,"anc":5238,"and":127973,"amt":9744,"amm":21193,"aml":16952,"amo":3480,"amn":18156,"amp":5088,"amh":4267,"ami":11713,"amf":3558,"ame":25638,"amb":4074,"ama":6369,"alv":3571,"alt":10698,"als":5130,"alo":3365,"alm":9277,"all":37991,"alk":5184,"ali":21293,"ald":6403,"ale":25866,"ala":19113,"alb":12779,"an ":132556,"akt":15324,"ad ":44388,"aft":3394,"aff":3221,"afi":4365,"aga":8161,"age":18047,"adm":3257,"adi":7918,"ade":78504,"ag ":17369,"ads":8986,"ack":8031,"ach":4677,"ace":4160,"ada":3703,"af ":3707,"at ":35834,"are":94208,"ard":11975,"arb":9854,"ara":27724,"aro":4452,"arn":18125,"arm":4137,"arl":13544,"ark":16487,"ari":37821,"arr":8387,"ars":21054,"art":35107,"asi":5423,"ase":5556,"ask":3632,"ar ":171511,"apa":8845,"ape":7645,"app":4146,"apr":9022,"as ":45411,"ava":3381,"avs":7927,"avi":4830,"ave":5436,"ay ":3818,"av ":125744,"ata":7543,"ast":21648,"ass":11560,"ato":8615,"ate":21603,"ati":48093,"att":63033,"ats":14038,"atu":6856,"aur":3255,"aug":8539,"jer":3396,"jen":6427,"fäl":3203,"fär":6347,"jan":14542,"je ":5239,"jor":7802,"itu":3337,"itt":20776,"ity":3491,"isk":74955,"ism":4171,"iss":12117,"ist":54624,"iv ":6421,"ita":12733,"ite":17827,"iti":20829,"ius":3596,"iva":11600,"ivi":7901,"ive":20061,"is ":19720,"ion":60198,"ir ":3453,"irk":5584,"isi":5287,"ish":4776,"ise":7902,"isa":8098,"ire":6030,"it ":10727,"ja ":4918,"kil":7942,"kiv":4415,"kin":6276,"gån":6463,"går":10145,"kis":6751,"kho":12716,"kel":5730,"ken":19496,"kes":3520,"ker":29895,"ket":13180,"key":3901,"ke ":13529,"kra":8741,"kre":6291,"kt ":28682,"ksa":5304,"ksd":4444,"kro":5638,"kri":26131,"kot":3776,"kor":12173,"kon":20182,"kom":35824,"kol":9820,"ks ":3388,"kna":9166,"kni":12636,"klu":5282,"kla":15409,"kli":6794,"jul":9684,"jun":10738,"jur":5620,"kat":7694,"kar":20696,"kas":4078,"kap":15909,"kan":45226,"kal":21132,"kam":3425,"kad":6591,"ka ":105354,"för":137814,"föd":63483,"ha ":4594,"ham":7622,"han":27562,"hal":5140,"hav":3856,"har":31426,"had":5793,"he ":12764,"hel":8034,"het":17433,"her":10299,"hen":4180,"hem":4216,"då ":5689,"där":12388,"hin":3839,"his":5950,"gli":6286,"gla":6605,"gni":3551,"gna":6982,"gs ":13333,"gon":4582,"gor":4044,"gsk":4034,"gru":20067,"gra":18317,"gt ":15114,"gre":14703,"gst":6826,"gsm":3229,"gus":10096,"grä":4757,"ial":7807,"ian":12040,"ic ":4032,"ibl":3325,"id ":28142,"ibe":3733,"ia ":17620,"iet":8507,"iel":7300,"ien":33298,"ier":11523,"ies":3414,"ig ":23812,"ift":11636,"ick":11163,"ici":4587,"ich":7049,"ice":6107,"ie ":9770,"ica":5181,"ids":4482,"idr":4877,"idn":3610,"idi":9977,"ide":12497,"ida":10207,"il ":15253,"ika":38088,"ige":28072,"iga":25749,"igh":9261,"igi":4197,"igg":9635,"igt":12741,"ik ":16104,"ime":3935,"ind":16350,"ina":18896,"inn":24862,"ino":16003,"int":17796,"ins":22420,"inf":4654,"ine":12860,"ing":135978,"ini":12277,"inl":6319,"ink":4368,"inv":10239,"ike":30688,"ila":3896,"in ":36372,"ikt":12966,"iks":9950,"ilo":5583,"ill":91188,"ilk":6078,"ilm":10451,"ilj":12364,"ili":8683,"ild":15424,"ile":4047,"io ":6788,"ils":3516,"hol":17007,"hon":3609,"hri":3262,"hum":3706,"hus":7589,"huv":8614,"död":37434,"fes":6198,"fer":3302,"feb":8379,"fat":13524,"far":5867,"fam":7188,"fal":3610,"ext":4721,"exe":4418,"eta":15891,"ete":27600,"eti":4494,"esp":9596,"est":29943,"ess":19229,"ev ":6530,"etr":3912,"ets":17582,"ett":72343,"ety":4212,"ew ":5717,"eve":6119,"eva":3607,"evi":3537,"ey ":7588,"elä":5847,"er ":284777,"eor":5154,"es ":68641,"ept":9611,"epp":4218,"epr":4795,"erk":19481,"erl":7951,"eri":65274,"erg":20456,"erh":4470,"enä":3549,"ere":6711,"erf":4436,"era":63522,"erb":6250,"et ":167149,"esk":4221,"esi":7482,"ese":7369,"erv":5509,"err":9816,"ert":13225,"ers":49047,"ern":40094,"erm":7787,"ero":5248,"eki":3784,"eko":7026,"ekt":14604,"en ":561421,"ela":36480,"ele":19021,"eli":7809,"eln":6244,"ell":67565,"els":32415,"elt":6933,"emb":26969,"ema":5588,"eme":8121,"emm":3946,"emo":6706,"emi":7410,"emp":5979,"ene":8729,"enh":4318,"eng":9041,"enb":3805,"ena":11883,"end":16477,"eno":10317,"enn":10256,"eni":7932,"ens":78815,"ent":44100,"enr":3651,"ege":11795,"egi":9515,"egr":5273,"eis":3601,"ein":4280,"el ":33296,"em ":8949,"öte":5512,"gjo":3575,"öst":10660,"git":4157,"gis":8031,"giv":6533,"gin":4809,"gio":4718,"gic":3318,"gif":3672,"örs":34131,"öra":7478,"örb":6194,"örd":7646,"ghe":6517,"öre":25188,"örf":10182,"örj":5505,"örk":3758,"ggn":3889,"gge":11686,"gga":4221,"gi ":4509,"öpi":3748,"ör ":72432,"gen":70552,"get":15734,"ger":32986,"ges":7614,"gel":15568,"gde":4040,"ge ":21010,"ön ":5859,"öm ":3264,"gas":5062,"gar":34878,"gat":4458,"gan":12485,"ga ":26894,"ögs":4071,"ödr":4070,"bör":6225,"frå":39311,"frä":4616,"fte":17736,"fta":8409,"fun":3567,"ft ":9311,"fra":14521,"fri":9705,"for":23834,"fot":7918,"fol":8477,"fle":7024,"flo":3656,"fly":6056,"fic":5656,"fil":11844,"fik":3556,"fin":14513,"fis":3388,"öve":12758,"da ":24533,"dd ":64224,"de ":113906,"dad":11525,"dal":5199,"dag":13716,"dat":8203,"das":4305,"dar":13228,"dan":22463,"dam":6624,"dda":3257,"dde":6028,"cks":6976,"ckh":12838,"ckn":5609,"ckl":6273,"öd ":37381,"öde":6673,"ödd":63162,"ch ":194706,"cer":9280,"cen":9841,"cem":8851,"cha":5427,"cia":6069,"ck ":16616,"cie":3837,"che":9375,"chi":3577,"cir":4840,"cke":18899,"cka":6286,"ed ":60806,"ebo":7090,"ebr":11024,"ean":3203,"eat":4659,"ea ":3260,"efo":3657,"eft":11252,"edl":4748,"edi":6332,"edd":3460,"ede":13539,"eda":22012,"edr":4279,"eck":13530,"eci":3498,"ece":9407,"dvä":3688,"dor":6126,"don":6243,"dom":6715,"ds ":21182,"dmi":3679,"dni":12167,"dst":6539,"duc":4844,"dri":7974,"dra":24350,"dre":6617,"dro":7925,"dsk":11908,"dia":4497,"der":69482,"des":48453,"det":49713,"dec":9237,"del":42235,"den":110439,"dem":7457,"dle":5138,"dla":5392,"dli":4421,"din":7458,"dio":6335,"dis":13125,"dit":3226,"die":4951,"dig":14250,"dju":3500,"näm":4493,"när":11567,"näs":3945,"rga":9150,"ri ":24498,"rgi":4148,"rge":9457,"rgs":6893,"ret":18000,"res":20396,"rev":5759,"rfa":11243,"rds":5163,"rg ":18322,"rea":6436,"red":13615,"reg":14934,"rem":4053,"ren":33831,"rek":11220,"rel":8037,"rer":8988,"rep":9910,"rda":6312,"rdn":3815,"rdi":6203,"rde":19138,"re ":105451,"rbu":4396,"rd ":16211,"rar":16692,"ras":12995,"rat":27144,"rav":4107,"rbe":10058,"rag":4677,"ran":40758,"ram":18971,"ral":14704,"rak":6953,"rab":3374,"raf":9108,"rad":32041,"rs ":30266,"rr ":4068,"rlä":3700,"ror":5868,"ros":5853,"rot":10704,"rom":7155,"ron":11508,"rop":10007,"rov":7683,"rod":8744,"roc":6434,"rol":8031,"rof":6006,"rog":7715,"rna":45582,"rne":7715,"rni":4580,"ro ":5673,"rma":11324,"rme":6990,"rli":7045,"rld":8530,"rle":4342,"rla":6287,"rn ":13803,"rks":6561,"rko":6401,"rki":4271,"rke":8992,"rka":21148,"rm ":4966,"rja":6006,"rl ":5718,"rio":4391,"rit":18193,"ris":26661,"riv":11699,"rig":24904,"någ":4585,"ril":11007,"rik":49805,"rin":29008,"rim":3215,"ria":11330,"ric":6800,"rid":7589,"rie":22621,"rif":3813,"rk ":11840,"rup":12524,"run":19578,"rum":7564,"ruk":6602,"rus":3564,"rva":5265,"rvi":3304,"ry ":6473,"rsk":18149,"rsi":7847,"rso":8438,"rsp":7925,"rsa":13567,"rse":4696,"rta":8444,"rst":26007,"rss":3568,"rte":14318,"rth":3501,"rti":15874,"rua":8438,"rts":3730,"rt ":25088,"rri":6493,"rre":7456,"rra":8312,"sak":4471,"sal":8723,"sam":44053,"san":6209,"sat":10892,"sar":8127,"sa ":10458,"rys":4892,"sho":4590,"shi":4912,"sju":4204,"sie":4171,"sid":6241,"sk ":90513,"sit":12645,"sis":10405,"sin":20425,"sio":10464,"sik":13865,"sig":10296,"sda":5839,"sde":3304,"se ":11017,"sch":5874,"ser":32108,"ses":3294,"set":6663,"sed":10233,"sep":9935,"sen":30980,"sel":3990,"spo":4913,"spr":13840,"slä":13781,"spe":39400,"spa":4154,"som":124363,"son":32242,"sor":7813,"skå":7923,"soc":6710,"st ":46043,"ss ":9668,"sli":3748,"slo":3319,"slu":6612,"sky":3403,"sla":13504,"sle":3801,"ski":13633,"skl":5724,"sko":16207,"skr":17282,"sku":3455,"skt":14119,"sfö":5604,"ska":106537,"ske":8526,"sjö":6673,"sni":4681,"sjä":3770,"sma":7643,"sme":3479,"stå":11548,"stä":10967,"syd":3945,"stö":8124,"sys":5192,"svä":4118,"sse":11502,"ssa":10371,"sso":16553,"ssl":3516,"ssi":9151,"sst":3551,"ssp":3681,"ste":53976,"sta":88293,"stn":5297,"sto":24174,"sti":35352,"stu":7051,"str":43893,"sty":3941,"sva":8262,"sve":36524,"tal":35934,"tag":10048,"tad":34944,"tav":3738,"tat":22284,"tas":6110,"tar":38387,"tan":25694,"tam":3270,"te ":29236,"tbo":6832,"ta ":57380,"pa ":4951,"par":19834,"pas":4154,"pan":9819,"läg":9136,"lär":4866,"läp":5670,"län":20491,"läk":8254,"pen":15225,"per":22100,"pet":5946,"pel":41073,"pla":14119,"pin":6560,"lån":4645,"pis":3811,"låt":5580,"por":8039,"pop":3841,"pos":4574,"pol":17194,"pps":6631,"ppt":7044,"ppl":4401,"ppa":4036,"ppe":13206,"pp ":11986,"pub":4680,"pte":14474,"pru":4841,"psa":5456,"pri":16288,"pre":11182,"pro":27839,"prå":5570,"män":8000,"mäs":4442,"mål":5318,"mån":4648,"ra ":70040,"ngl":9266,"ngr":3616,"ngt":3834,"ngs":25507,"ni ":11042,"nge":55986,"nga":25508,"ngd":5985,"jäl":6437,"jär":7781,"nhe":3391,"neh":3788,"nel":8830,"nen":21817,"ner":30394,"net":14471,"nes":10240,"ng ":83505,"neb":3747,"ned":3616,"nce":6161,"ne ":14860,"ndr":19487,"nds":29703,"ndo":7016,"ndl":5881,"ndi":11752,"nde":95443,"nda":25018,"nal":16599,"nam":15523,"nan":10186,"nar":23024,"nad":14500,"nd ":63041,"nat":21915,"nas":10608,"na ":68634,"ny ":3572,"num":3857,"nus":4249,"nua":9868,"nty":4644,"nto":5785,"ntr":13553,"nti":10623,"nta":12228,"nte":31914,"nsp":4485,"nst":23393,"nss":4316,"nse":13545,"nsi":5055,"nsl":4609,"nsk":92387,"nsa":6209,"nri":3216,"nt ":21290,"ns ":54594,"nom":26142,"nor":19884,"nov":9987,"nne":21868,"nna":22185,"nno":3272,"nni":10650,"nns":9117,"nli":10002,"nn ":5037,"nla":5932,"no ":3529,"nke":3380,"ngå":3281,"nkt":4581,"nkr":3354,"nfö":4263,"nie":10939,"nia":3660,"niv":6000,"nis":23117,"nit":5715,"nio":3817,"nin":62497,"nik":4407,"ogr":10374,"ogi":6891,"ohn":3565,"kän":11729,"oha":5651,"kåd":7525,"ok ":3568,"ol ":3524,"och":189597,"oci":4767,"ock":32660,"ode":11255,"of ":8033,"odu":7243,"og ":7992,"oft":6611,"off":4118,"ofe":5148,"od ":6498,"obe":13311,"nvä":12582,"nvå":8371,"jör":3329,"köp":4602,"ote":5241,"ott":18376,"ots":4197,"oto":5671,"ost":8837,"ota":4879,"otb":6812,"osi":3701,"ose":3675,"oss":3221,"ovi":7790,"ove":12742,"oun":6352,"our":5919,"opp":7590,"ope":5756,"opa":4338,"os ":9800,"or ":33039,"ork":5444,"orm":13532,"orn":10771,"orr":11303,"ord":31398,"ore":8252,"org":22560,"ori":16030,"osa":3703,"ort":28485,"ors":16477,"ot ":11261,"ora":9653,"ola":11528,"on ":79327,"oli":25700,"oll":18015,"olk":10298,"ole":5201,"ols":3442,"olm":16608,"olo":12711,"oly":3210,"oka":3360,"om ":168644,"oke":3223,"okr":4246,"okt":9964,"ona":12877,"ond":7866,"one":32789,"ong":8629,"oni":7172,"ono":6812,"ons":23539,"ont":10940,"oma":9422,"ome":9673,"omb":3421,"omi":4970,"omm":30299,"omk":3355,"omp":5523,"omr":7897,"oms":4749,"op ":3482,"la ":37263,"le ":15487,"lde":11008,"lda":11000,"lds":6297,"ldr":3289,"lac":3318,"lad":21513,"lag":21635,"lan":79933,"lam":3972,"lar":35655,"lat":17536,"las":17797,"lba":3463,"ld ":9319,"lbu":12650,"kvi":3264,"kva":4810,"kus":3478,"kun":6039,"kul":5875,"kså":3945,"kta":4658,"kte":14929,"kti":15289,"kto":12389,"kyr":9176,"gör":5454,"ls ":8684,"lom":6295,"lor":5534,"lod":5577,"log":11812,"los":3900,"lot":3451,"lni":4907,"lme":6188,"lma":4184,"lms":5047,"lti":5165,"lub":5423,"lsk":12154,"lss":5166,"lst":11990,"lta":4364,"lte":4556,"lse":12155,"lsa":4733,"lt ":15274,"häl":3201,"här":4070,"li ":11521,"lev":10258,"les":8492,"let":19531,"ler":49986,"lem":6956,"len":25248,"lek":6522,"led":11323,"lls":18041,"llt":8125,"llv":5128,"lhö":4242,"lla":46043,"lle":53525,"llh":4880,"lli":11251,"lln":3548,"lkr":4019,"ln ":4441,"lke":7097,"lm ":18320,"lje":9323,"ll ":78820,"lja":3349,"lit":24473,"lis":16271,"lin":35763,"liv":5593,"lic":4558,"lia":6013,"lik":14533,"hål":7111,"lig":48482,"lie":9952,"ma ":10795,"maj":9292,"mar":31731,"mas":5427,"mal":5574,"man":40392,"mat":14610,"mbe":28393,"me ":3701,"med":67946,"met":18659,"mes":8746,"mer":42686,"mel":15875,"men":33082,"lva":3742,"lve":7060,"lun":3206,"lut":8771,"lyg":4277,"hög":7787,"hör":6448,"mpi":3263,"mpe":6539,"mpo":3301,"ms ":6348,"mod":5091,"mon":5448,"mok":4020,"mor":4951,"mot":12947,"mt ":8137,"mst":8336,"mrå":8280,"mus":13621,"mun":18113,"mfö":3853,"min":18579,"mil":15426,"mis":6696,"mit":4808,"mli":14006,"mla":4634,"mn ":9157,"mni":3641,"mne":9027,"mmu":16790,"mma":26822,"mme":13993,"vå ":7032,"väg":7022,"vän":13490,"vär":10896,"väs":8080,"väx":7295,"vån":8847,"ytt":4640,"yta":3770,"yst":7068,"ysk":15364,"yrk":11273,"yra":3515,"yde":3835,"yck":8174,"ya ":3709,"ygg":9192,"xte":5064,"tör":12969,"täl":6340,"xem":4510,"tår":5717,"tär":4618,"täv":5108,"söd":5258,"så ":8428,"sån":9241,"sät":7762,"röm":5410,"rör":3335,"vs ":5015,"vud":9343,"rät":6983,"råk":6071,"vik":5560,"vil":12065,"rån":39221,"vin":14334,"råd":11141,"vid":23789,"vit":5788,"vis":16981,"vli":3664,"rän":8989,"räk":4195,"räm":4496,"räd":4698,"ver":61891,"vet":10570,"ven":64039,"vem":8723,"vec":5468,"ve ":6856,"val":12123,"van":15891,"var":99905,"vat":6662,"va ":9694,"uvu":9284,"usi":13675,"use":6334,"ust":23739,"utg":8418,"uti":3815,"ute":6086,"uta":8659,"utt":3405,"uts":5227,"utv":5038,"us ":21399,"ut ":10089,"ura":3453,"ure":6299,"urg":5379,"uri":5728,"urn":5209,"uro":7154,"urs":7342,"ur ":12946,"upp":32257,"umb":4012,"ume":9374,"unt":7357,"unk":5142,"uni":16744,"unn":5336,"und":51074,"ung":19464,"une":3661,"ukt":5611,"um ":17518,"ult":6645,"ull":6732,"uli":10267,"un ":12274,"ugu":9934,"ude":4661,"udi":4906,"uce":4932,"uds":4702,"udo":3784,"uar":18907,"ubl":5383,"ubb":6201,"två":7001,"typ":3540,"tyr":3920,"tys":8073,"ty ":8321,"trö":5020,"tve":6253,"trä":9790,"tur":14907,"tun":3943,"tud":5778,"tyd":4220,"ts ":26900,"tre":12611,"tt ":116227,"tra":36819,"tri":14508,"tru":8774,"tro":12216,"try":3989,"tse":5043,"tsk":5040,"tst":3437,"tta":33856,"tte":27085,"tti":9635,"ttn":5259,"tts":5893,"ttr":3221,"to ":4895,"tni":11444,"tjä":4558,"tna":3229,"tod":3739,"toc":13127,"tog":4722,"tob":9412,"tom":4310,"ton":13637,"tol":5901,"tor":39112,"til":75428,"tik":17763,"tif":5408,"tie":6576,"tig":8660,"tit":6965,"tis":30207,"tin":14073,"tio":41843,"thu":4634,"tia":4024,"tic":3207,"tid":19952,"tiv":14860,"tli":6668,"tla":6760,"tem":18447,"ten":56126,"tek":5853,"tel":9950,"teb":4880,"tec":5494,"th ":6931,"tex":3539,"tet":22695,"tes":9480,"ter":115723,"tgi":4603,"på ":62169,"ti ":13119,"the":7679},"n_words":[31862602,36956776,26222440],"name":"sv"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/sw b/nlp_resource_data/langdetect/profiles/sw

new file mode 100755 (executable)

index 0000000..e745ecb
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/sw
@@ -0,0 +1 @@
+{"freq":{"jer":348,"jen":305,"ji ":6234,"D":1805,"E":874,"F":1081,"G":1202,"A":4461,"B":2717,"C":2251,"L":1530,"M":12761,"N":2782,"O":860,"H":1677,"I":2605,"J":2641,"K":12188,"U":3120,"T":5185,"W":4730,"V":1116,"P":2090,"S":3343,"R":1632,"Y":517,"Z":395,"f":11048,"g":13829,"d":15034,"e":46694,"Feb":214,"b":19688,"c":9784,"a":289584,"n":90468,"o":57043,"l":42025,"m":53651,"j":21456,"k":76835,"h":32492,"i":164978,"w":60984,"v":3863,"u":57506,"t":40551,"s":35298,"r":27443,"p":13501,"z":18893,"y":38832,"x":501,"jar":185,"jan":137,"jaw":201,"é":167,"jim":1500,"jin":4267,"jil":163,"jij":492,"jia":221,"jib":3854,"ito":288,"itu":317,"itw":269,"isp":140,"ist":592,"ita":1061,"ite":213,"iti":334,"ivy":133,"iwa":2430,"ius":183,"ipo":224,"ipi":265,"is ":521,"ion":720,"iop":279,"ipa":165,"ipe":219,"iro":173,"iri":997,"isi":902,"ish":5756,"isa":694,"ire":164,"ira":314,"ja ":1529,"iyo":4644,"iye":227,"izo":242,"izi":413,"iza":568," l":8602,"kif":518," m":27935," n":19872," o":327,"kik":333," h":7652," i":9059,"kij":166,"kim":258," j":5212,"kil":389," k":27977," d":1010," e":802," f":914,"kia":390," g":257," a":6533," b":1252," c":2191,"kiw":279," y":17767," z":2257,"kin":442," u":4361,"kio":148," t":2402,"kip":379," w":34366," v":1482,"kis":520," p":2154,"kit":315," s":6097," r":837,"ki ":2193," J":2627," K":12017," H":1638," I":2128," N":2678," O":803," L":1487," M":12665," B":2646," C":2112," A":4277," F":1046," G":1172," D":1740," E":782," Z":375," Y":513,"и":142," S":3229," R":1588,"а":137," P":2015," W":4707," V":1031," U":3052," T":5117,"kea":156,"kem":150,"ke ":1988,"ku ":187,"kri":520,"kon":141,"koa":3734,"ko ":1214,"ل":165,"ا":240,"juu":155,"jul":257,"jum":177,"kaz":5045,"kaw":137,"kat":14149,"kar":374,"kas":316,"kan":2795,"kao":197,"kal":354,"kam":1048,"kad":160,"kab":375,"ka ":19783," Ga":196,"Da":365," Ge":229,"Co":364," Fr":177,"Ch":770," Ha":622," He":218," Go":142,"Do":469," Gr":177," Gu":142,"De":497,"Di":169,"Fe":311," Id":148,"Fa":160," Hu":173," Ho":177," II":154,"ha ":2668," Hi":392,"Ge":229," Ji":535,"Ga":198," Je":286,"I ":397," Ja":792,"Fr":177," Ir":284," Is":141," It":181," In":316," Ik":143," Il":224,"ham":522,"han":444,"hap":154," Ka":2225,"hai":238,"haj":163,"hak":611,"hal":314," Ke":708," Ki":3568,"har":1714,"has":255,"hat":148," Jo":255,"II ":207," Ju":691,"hag":267,"hab":181,"had":740," La":231," Le":207," Li":441," Ko":414," Ku":695," Kw":4009,"Au":181," Ma":4258," Mb":461,"Ar":475,"As":222," Mk":3388,"Ba":771," Mi":685," Mj":478," Me":615,"Af":445,"he ":544,"Ag":372," Lo":213,"Am":241,"An":463,"Ap":290," Lu":315,"Al":840," Ne":518,"Bu":429,"Br":278," Na":464,"Ca":592," Ni":435,"Bi":308," Mt":420,"Be":362," Mp":146," Mo":643,"Bo":282," Mu":471," Mw":545,"Ku":695,"Kw":4009,"Ko":415,"hez":299,"Le":210,"Li":441,"hes":336,"her":275,"hen":226,"hem":395,"La":231,"Lu":315,"Lo":213,"Me":621,"hi ":3880,"Mi":690,"Mj":478,"Mk":3388,"Ma":4263,"Mb":461,"Mw":546,"Mu":475,"Mt":420,"Mp":146,"Mo":643,"Ni":437,"Ne":518,"Na":466," Ap":290," Am":240," An":463," Al":833,"Ny":247," Ag":372," Af":443,"No":466," Ba":766,"Ok":277," Au":181," As":222," Ar":474," Be":362," Bi":308,"hio":2603,"Gr":177,"Go":143,"hin":1991,"him":244,"hil":432,"Gu":142," Bo":282,"hii":230," Br":278," Bu":429,"his":266,"hir":394,"Ha":622," Ca":582,"hiy":239,"He":219,"II":286,"Hi":393," Ch":768,"Ho":179,"Hu":173," Co":362,"K ":152,"Id":148," Da":365," Di":167,"In":317," De":495,"Ik":143,"Il":226,"Is":141,"It":181," Do":469,"Ir":284,"Ja":792,"Ji":536,"Je":286,"Jo":255,"Ju":691,"Ka":2234,"Has":225,"ho ":334," Fe":311,"Ki":3577," Fa":159,"Ke":708,"Us":172,"Ut":325,"Ur":181,"go ":920,"Un":355,"Uk":150,"Ul":189,"Ui":244,"Uj":249,"Uh":170,"Uf":251,"Uc":175,"Tu":237,"To":205,"Th":275,"Te":258," Wi":3377,"Ta":3841," We":188," Wa":1003,"St":260,"Su":178,"Wi":3380,"Wa":1003,"We":189," Zi":141," Za":152,"Vi":670," Yo":250,"Pr":150,"Pe":270,"goz":233,"Pa":858,"Po":195,"Pi":163,"gom":190,"gon":205,"gos":279,"gor":306,"Se":532,"gu ":424,"Si":424,"Sh":518,"So":239,"Ru":370,"Sa":668,"Re":188,"Ri":138,"Ro":385,"Ra":354," Po":195,"guj":253," Pi":163," Pe":270," Pa":857," Ny":247," No":466," Ok":277," Ra":354,"b ":211," Ro":385,"gwe":166," Re":188," Ri":138,"gwa":280,"guz":429," Pr":150,"a ":143240," Su":178," St":248," Ta":3838," Th":274,"Yo":250," Te":257," To":205," Ru":370," Sa":668," Sh":517," Si":421," Se":528," So":239," Vi":666," Tu":231,"Za":152,"Zi":141," Uc":175," Uf":251," Uh":170," Ui":243," Uj":249," Uk":150," Ul":189," Un":355," Ur":181," Us":172," Ut":325," ja":134,"iak":142,"i ":52347,"ian":874," ji":4522,"ias":364,"ge":1928,"iar":235," je":226,"ga":2900," im":145," in":3363," ik":274," il":4878,"fi":1075,"fr":504,"fu":1927,"fo":752,"ibl":142,"ibi":603," ka":16147,"gw":483," ki":3027,"he":2541,"ibu":4111,"ha":8898,"gl":145,"gi":1836,"gh":1233,"gu":1858,"iba":566," ju":300,"go":2336,"du":838,"dw":136,"g ":607," ha":1606,"ea":1091,"eb":539," he":144,"ec":251,"ed":686,"de":1841,"di":4816,"dh":617,"do":1639,"ia ":9119,"dr":203,"ew":912,"ex":163,"eu":261,"ev":332,"ey":739,"ez":1828,"fa":6104,"h ":704," id":219,"fe":174,"eh":737," hi":990,"eg":644,"ef":303,"ee":307,"el":2120,"ek":2577,"ej":155," ho":139,"ei":650,"ep":643,"eo":1165,"en":9965,"em":2423,"et":1296," hu":4749,"es":2258,"er":4147," nj":147,"ca":364," ni":9330,"e ":10467," ng":147," nd":690,"bw":843," nc":2455," na":6269,"br":408,"bu":5373,"bo":2905," mw":6857,"bl":321," mu":4335," mt":648," ms":331,"bi":2134," mp":280," mo":680," mn":1501,"be":1280," mm":157,"ifu":393,"da":3239,"f ":246,"ifo":606," of":164,"co":390," ny":523,"ck":301,"ci":283,"ch":7388,"ce":365,"ifa":585," le":184,"c ":192," li":859," la":7153," ku":5668,"ich":830," kw":2736," km":140,"ica":140," ko":150," me":184," mf":368,"az":6015,"ay":5308," mi":1257,"ba":6016," mj":5191," mk":1617,"d ":1205,"at":22079,"as":4908,"ar":9773," ma":3449," mb":469,"aw":1490," mc":155,"av":414,"au":1667," lu":341,"ak":14678,"al":8458,"idi":551,"ai":5267,"aj":1998,"ao":6210,"ap":5739,"ide":157,"am":9111,"an":29556,"ac":1224,"ad":3126,"ida":813,"aa":1773,"ab":2568,"ag":1596,"ah":1414,"ae":682,"af":1092,"nu":591,"nt":1270,"ns":4895,"no":1318,"nn":478," am":1335," an":488,"nz":5093," ai":153,"iin":242,"ny":7307," aj":134," ak":183," al":2589,"of":4380," au":941,"oc":308,"od":678,"oa":4118,"ob":631," at":195," as":220,"om":1846,"on":3853,"ok":2328," ba":679,"ol":1930,"oi":1488,"oj":1425,"og":855,"oh":360,"ija":140,"ot":1280," bi":222,"os":1066,"ov":580,"ou":534,"ije":137,"op":845,"oo":318,"or":2938,"iji":1232,"r ":1622,"ow":244,"oz":397,"oy":154,"pe":836,"pa":6921,"po":1264,"ph":151,"pi":2193,"ika":13864,"lo":1408,"lm":337,"Ida":135,"ll":791,"ls":182,"iga":224,"ii ":525,"lu":868,"lt":178,"igh":170,"igi":384,"ly":147,"o ":24303,"mc":173,"igo":169,"ma":8274,"mb":6660,"mh":261,"me":2630,"mf":564,"mk":1733,"ml":210,"mi":3477,"mj":5199,"mn":1546,"mm":321,"mp":578,"ihe":138,"mo":6079,"mr":140,"mt":753,"ms":447,"mu":6394,"mw":6988,"ihi":187,"p ":352,"na":23279,"nc":2788,"nd":5575,"ne":2353,"ng":6858,"ni":24361,"nj":567,"nk":135,"imo":196," es":141," en":369,"ju":713,"imf":161,"ime":354," el":223,"jo":133,"imi":180,"ki":6922,"kh":154,"ind":834,"ke":2748,"ina":8001," fa":353,"ka":45110,"imu":392,"m ":727," fu":177,"kw":3124,"ino":181,"ks":210,"kt":463,"ku":10532,"ins":133,"ko":5804,"ine":479,"ing":1959,"kr":669," fi":274,"ini":4598,"km":156,"li":17984,"le":2997,"ld":221,"lf":159,"la":14880,"lb":250,"iny":275,"n ":3144,"iko":612,"hw":492,"ht":198,"hu":6825,"iki":2488,"hi":11111," ch":2090,"hn":150,"ho":1180,"ila":4379,"id":1813,"ic":1403,"ib":5595,"ia":11251,"ih":490,"in ":378,"ig":1252," da":146,"if":1790,"ie":672,"iku":2496,"k ":628,"ilo":373,"ir":1982,"is":9376,"it":2904,"ill":288,"iu":466,"iv":385,"iw":2556,"ii":989,"ij":1580,"ik":19966," de":224,"ili":8251,"il":13887,"im":4832,"in":17333,"io":4395,"ile":321,"ip":1169,"ima":914,"je":934,"imb":2471,"io ":2960,"ji":17145,"iz":1362,"iy":4997," du":302,"l ":1018,"ja":2368,"z ":191,"wi":1773,"wo":202,"vy":671," za":1702,"y ":1239,"wa":56175," zi":456,"we":2203,"vi":1632,"vu":418,"vo":138,"uz":1451,"uw":2877,"uv":252,"uu":3068," ye":258,"ve":578," ya":17428,"va":328,"x ":213,"ui":563,"uj":4429,"uk":1643,"ul":2575,"ue":357,"uf":741,"ug":901,"uh":626,"ur":1919,"us":3274,"ut":2784,"um":5397,"un":5099,"uo":368,"up":1077,"ty":166,"tu":2287,"tt":391,"tw":473,"ub":1112,"ua":2111,"ud":534,"uc":476,"w ":435,"to":4407,"huk":345,"hul":146,"tl":220,"ts":343,"tr":455,"te":2280,"ti":12092,"th":999,"ta":14867,"su":644,"ss":500,"st":1842,"sw":308,"sl":142,"sk":865,"sm":139,"sp":289,"so":683,"sc":179,"se":5649,"sh":8151,"si":4764,"u ":13704,"sa":7736,"rr":220,"rs":467,"rt":620,"ru":2279,"ry":287,"ro":1786,"rn":619,"rm":257,"rl":223,"rk":320,"ri":8157,"rg":403,"re":3855,"rd":556,"rc":143,"rb":136,"ra":5018,"t ":1231,"s ":3025,"pt":348,"pu":357,"pw":193,"pr":381," sa":589," se":4480," si":369," sh":318," ra":432," ri":188,"hwa":473,"huo":175,"hum":2789,"hun":282,"hus":506,"hur":418,"huu":1333," pe":176," pa":632," pi":931," wa":33135," we":275," vy":396," wi":862," vi":1013," uc":144,"zi":8597,"ze":368,"za":8043," tu":189,"zw":257," us":165," ut":249," up":502," um":247,"zu":272," un":1571," uk":210,"zo":952," ul":573," uh":139," ta":1410,"ye":2395,"ya":24129,"yu":306," to":170," th":289,"yo":5888," te":201,"yi":4283,"Apr":266,"Asi":146,"Aru":195,"far":316,"fam":283,"fan":4203,"fal":292,"fa ":488,"eya":259,"Bah":237,"Bar":140,"eza":1136,"ezo":172,"ezi":237,"eta":229,"ete":154,"eti":253,"est":247,"ett":212,"ew ":355,"evi":165,"ewe":148,"ey ":361,"ewa":358,"er ":615,"epa":149,"es ":640,"ept":299,"eri":650,"ere":660,"era":456,"Afr":406,"esh":359,"ese":306,"esa":279,"eru":498,"Ago":254,"ert":152,"ers":339,"eku":184,"en ":297,"ela":204,"ele":786,"eli":360,"ell":177,"eo ":852,"emb":1055,"ema":157,"eme":314,"emi":276,"emu":365,"ene":704,"eng":671,"ena":283,"end":498,"eno":221,"eni":486,"ens":4087,"ent":441,"eny":1803,"Ali":478,"ege":351,"Ame":158,"ehe":647,"Ana":176,"el ":260,"eke":267,"eka":1754,"giz":193,"gir":232,"gin":349,"gid":165,"ght":136,"gha":925,"gi ":572,"gen":204,"ger":781,"ge ":611,"gaz":140,"gar":155,"gan":693,"ga ":1334,"Cal":307,"fup":194,"Bib":137,"fua":317,"fum":143,"fun":167,"fri":445,"fu ":810,"for":356,"fo ":342,"fil":269,"fik":168,"fiz":146,"da ":1525,"de ":752,"dad":386,"dae":220,"dar":151,"dan":305,"dam":173,"Des":272,"Dar":167,"Chi":216,"Chu":136,"Cha":300,"ch ":165,"cha":2430,"chu":596,"ck ":143,"che":571,"chi":3152,"cho":370,"ed ":154,"ebr":313,"ea ":663,"ei ":346,"efu":197,"edi":297,"ee ":156,"don":150,"dom":308,"dol":151,"dog":335,"dun":335,"dha":302,"dia":330,"dhi":240,"der":146,"deg":261,"del":152,"di ":2661,"do ":429,"Dod":240,"diy":201,"din":291,"dis":387,"dik":302,"ri ":2373,"rez":420,"rea":148,"ref":154,"reh":266,"ren":163,"rek":1672,"re ":305,"rd ":213,"ras":256,"rat":173,"Ni ":218,"New":381,"rai":160,"ran":867,"ram":226,"rab":297,"rad":150,"ron":135,"rog":253,"rne":169,"rni":283,"ro ":593,"riw":166,"ris":508,"ril":300,"rik":1688,"rin":373,"ria":769,"rib":1011,"ric":160,"rk ":191,"ruf":262,"rum":452,"ruk":315,"rus":423,"ry ":194,"rse":228,"Nya":144,"rua":234,"rt ":160,"ru ":273,"sab":458,"sac":139,"san":482,"sas":180,"sa ":5643,"Nov":242,"sha":1745,"sho":271,"she":240,"shi":5099,"si ":1365,"siw":355,"sia":608,"shw":458,"shu":187,"sis":157,"sin":881,"sil":283,"sim":158,"sik":319,"sey":212,"ser":175,"set":147,"Okt":259,"seh":319,"sen":4083,"sem":335,"spa":151,"son":242,"su ":198,"st ":167,"sko":136,"ska":599,"so ":134,"ssa":198,"ste":192,"sta":295,"sto":444,"sti":401,"str":197,"swa":181,"tai":280,"taj":233,"tak":462,"tal":339,"taa":220,"tab":242,"taw":344,"tat":292,"tar":668,"tao":3872,"tan":641,"tam":288,"te ":507,"ta ":6480,"pa ":765,"pat":4120,"pak":235,"pap":248,"pam":300,"pan":895,"pi ":233,"ped":156,"Pap":368,"pia":789,"pil":189,"pin":267,"pis":162,"pit":144,"po ":743,"pte":287,"pri":298,"pwa":189,"Rai":176,"ra ":1932,"ngo":958,"ngi":1065,"ngu":1084,"ngw":363,"ni ":18823,"Iri":209,"nge":937,"nga":1742,"Ita":147,"neo":505,"nes":161,"ng ":405,"nch":2504,"ne ":911,"ndu":263,"ndo":574,"ndi":1835,"nde":1085,"nda":1162,"nak":251,"nal":257,"nam":1855,"nan":221,"nao":1457,"nap":185,"nac":183,"nad":288,"naf":402,"nai":158,"naj":196,"nd ":409,"nat":353,"nas":439,"nay":454,"na ":15738,"Jan":271,"Jam":281,"nya":1379,"Jer":215,"nye":1338,"nyi":4239,"nus":133,"nua":282,"Jim":174,"Jin":277,"nti":403,"nta":151,"nte":177,"nsi":211,"nsa":4269,"nt ":232,"ns ":140,"nne":236,"no ":948,"nji":138,"nja":269,"Joh":134,"nia":4199,"nis":530,"ogo":593,"ois":1291,"oji":173,"oja":1149,"Jul":285,"Jun":259,"odo":288,"of ":150,"ofu":134,"ofa":3991,"oa ":3810,"oan":188,"oba":375,"nza":3817,"nzi":1111,"Kai":144,"Kag":175,"Kal":167,"Kan":354,"Kat":474,"Kas":372,"Kar":232,"Ken":632,"ozi":165,"Kis":329,"Kir":165,"Kit":204,"Kin":148,"Kib":138,"Kia":309,"ote":378,"Kik":287,"Kil":453,"Kim":202,"oto":331,"Kig":295,"Kii":249,"ost":309,"ota":195,"ove":320,"opo":325,"os ":178,"or ":161,"Kon":197,"orn":300,"oro":673,"ore":188,"ori":369,"ort":147,"ora":378,"ola":427,"on ":838,"oli":431,"ole":357,"olo":331,"oka":1580,"oke":163,"oko":236,"oku":141,"ona":230,"ond":383,"one":151,"ong":860,"oni":784,"oma":766,"omb":303,"omi":249,"omo":182,"op ":143,"la ":8089,"le ":1011,"Kwa":3975,"laa":157,"lai":293,"lak":564,"lan":660,"lam":497,"lat":186,"lay":3727,"Kus":393,"lba":165,"kuz":236,"kuw":2713,"kuu":1305,"kut":1795,"kus":492,"kur":190,"kup":186,"kun":409,"kum":210,"kul":297,"kuj":187,"kwe":591,"kwa":2512,"kub":762,"kuf":233,"kuh":134,"kua":620,"kto":308,"lom":136,"loj":136,"lme":241,"Lin":225,"lug":350,"lu ":155,"li ":2787,"lez":192,"lew":193,"lev":140,"les":155,"leo":178,"lem":198,"len":254,"lek":133,"lo ":347,"lla":138,"lle":153,"lli":198,"ll ":147,"lit":241,"lis":337,"lip":257,"lio":738,"lin":627,"lim":922,"liz":411,"liy":4415,"liw":979,"lic":340,"lia":1497,"lik":2742,"lil":529,"lih":179,"lif":397,"ma ":2611,"mb ":139,"maa":449,"maj":397,"mak":522,"mad":206,"mae":140,"mag":342,"mar":439,"mas":613,"mal":159,"mam":161,"man":1055,"mat":406,"mba":3047,"mbi":361,"mbe":389,"mbo":2343,"me ":516,"mbu":267,"mch":170,"met":211,"mer":252,"men":492,"mfa":152,"mez":387,"mfu":373,"Mei":250,"Man":216,"Mar":1940,"Mas":472,"Mag":282,"Mak":206,"Mac":287,"Mbe":273,"mpi":142,"mon":163,"moj":1127,"mpa":160,"Mor":279,"mu ":1602,"mtu":175,"mto":226,"Mic":182,"Mis":147,"msh":144,"mta":228,"mwe":383,"mwi":345,"Mko":3178,"mwa":6205,"Mku":138,"Mji":464,"muj":3839,"muz":374,"mhu":232,"Mtw":147,"mi ":359,"mji":5175,"min":192,"mil":749,"Mwa":460,"mit":295,"mia":630,"mik":321,"mo ":4413,"mku":1038,"mko":539,"mna":1501,"mmo":145,"Wik":149,"Wil":3077,"Wan":148,"zwa":252,"zi ":5785,"zai":249,"zaj":254,"zam":177,"zan":3194,"zal":783,"zar":173,"zo ":612,"zia":533,"zin":815,"zil":197,"zik":548,"zis":240,"一":303,"yof":3874,"yot":286,"za ":2981,"ye ":1320,"yen":237,"ya ":21762,"yar":252,"yan":567,"yao":167,"yam":250,"yak":657,"yo ":973,"yin":213,"yik":3954,"一一":144,"Tan":3407,"Tab":164,"Shi":315,"Sin":201,"Sep":283,"we ":401,"wez":265,"wen":1037,"wim":286,"wil":741,"Sal":197,"vyo":257,"wa ":33121,"wap":4111,"wan":3901,"wal":617,"wam":169,"wak":9923,"way":141,"wat":368,"war":238,"was":172,"wai":2667,"wah":176,"vu ":165,"vya":351,"vil":200,"vin":183,"vit":187,"vis":284,"Rom":180,"vem":244,"Vij":328,"uzi":743,"uza":470,"Uje":235,"uwa":2760,"uvu":174,"ush":417,"usi":1319,"use":183,"usa":176,"uu ":2892,"usu":216,"ust":207,"uso":141,"uti":211,"ute":137,"uta":560,"Uin":218,"utu":215,"uto":1436,"us ":536,"Ung":252,"ura":183,"ure":140,"uri":491,"uru":630,"unz":137,"Ula":150,"upa":554,"upi":311,"umu":162,"umi":484,"umo":2705,"uma":686,"umb":661,"ume":297,"uo ":238,"uni":940,"und":747,"una":1741,"ung":1193,"uku":302,"uko":457,"uki":429,"uka":247,"ulu":258,"uli":1405,"ule":192,"ula":478,"ukw":139,"uhu":267,"uji":4010,"uja":302,"Utu":261,"ugh":514,"ufu":352,"uhi":136,"ugu":137,"udi":174,"ubw":695,"uch":343,"ufa":176,"ufi":189,"ua ":369,"uat":317,"uar":494,"uan":690,"uba":185,"Uch":175,"ty ":146,"twa":450,"tur":369,"tun":270,"tum":424,"Ufa":219,"ts ":214,"tu ":896,"The":164,"tts":142,"to ":986,"tob":268,"tom":167,"ton":281,"tok":1553,"tol":482,"tor":246,"tik":8147,"tis":158,"tin":351,"tio":199,"thu":171,"tia":156,"tem":384,"ten":273,"tel":171,"th ":160,"ter":432,"ti ":2389,"the":225,"thi":213,"biw":209,"bis":191,"bil":315,"bin":256,"bo ":2326,"bli":173,"bor":262,"be ":229,"bam":230,"ban":516,"bal":619,"bah":147,"baa":227,"bab":179,"bay":333,"bar":432,"bao":277,"bi ":662,"ber":216,"bel":151,"bey":251,"bia":222,"ce ":176,"bu ":4649,"bru":221,"bur":149,"bun":177,"bwa":786,"aka":10583,"am ":337,"ake":1982,"aki":644,"aji":1355,"aju":170,"al ":304,"aja":293,"ain":393,"air":222,"ais":2933,"aif":267,"aid":437,"ahi":308,"aha":751,"agh":475,"agu":395,"aoi":1233,"anu":344,"anz":4756,"any":4453,"ano":638,"ann":141,"ant":323,"ans":490,"ane":261,"ang":1660,"ani":7747,"anj":260,"ana":4702,"anc":133,"and":2300,"amu":1047,"amo":1890,"amp":179,"amh":222,"ami":838,"ame":637,"amb":1658,"ama":1868,"ao ":4649,"alo":269,"alm":262,"all":133,"ali":5324,"ale":476,"ala":1026,"alb":152,"an ":1167,"akr":376,"aku":502,"ako":215,"aba":751,"abe":140,"abi":660,"abo":208,"abu":582,"ae ":291,"aad":302,"aan":389,"aal":140,"aam":185,"aar":236,"aa ":361,"afi":303,"ai ":477,"aga":223,"age":227,"afu":225,"aen":162,"ael":172,"afa":411,"ado":269,"adh":288,"adi":1538,"ach":840,"ada":637,"azo":205,"azi":5401,"aza":186,"ayo":638,"aya":4140,"aye":284,"ba ":2178,"are":1998,"ard":317,"ara":2057,"aro":249,"ari":3153,"aru":316,"art":243,"au ":993,"asa":1084,"asi":1169,"ash":895,"ask":665,"ar ":568,"apa":4869,"api":162,"apo":406,"as ":271,"aut":148,"awa":1126,"awi":190,"ata":10070,"ast":167,"ass":197,"ato":634,"ate":225,"ati":9962,"ath":135,"atu":749},"n_words":[1316698,1560317,1165243],"name":"sw"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ta b/nlp_resource_data/langdetect/profiles/ta

new file mode 100755 (executable)

index 0000000..facb43d
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ta
@@ -0,0 +1 @@
+{"freq":{"D":612,"E":464,"F":392,"G":381,"A":1033,"B":541,"C":1226,"L":485,"M":882,"N":489,"O":332,"H":425,"I":870,"K":359,"T":782,"W":287,"V":311,"P":878,"S":1046,"R":508,"f":1073,"g":1998,"d":2759,"e":8290,"b":1281,"c":2926,"a":10092,"n":6488,"o":5986,"l":4436,"m":3185,"k":952,"h":3181,"i":7588,"w":661,"v":857,"u":3410,"t":6374,"s":4316,"r":6588,"p":2229,"y":1574,"x":422,"ித ":544,"ில ":698,"ிய ":7880," m":283," o":562," d":292," a":454," c":438," t":733," p":559," s":429," r":341," K":344," H":326," I":509," N":361," L":347," M":788," B":464," C":741," A":832," F":341," G":361," D":414," E":381," S":843," R":426," P":727," T":657,"ாங்":593,"ாசி":571,"ாகி":1226,"ாகு":5727,"ாகா":403,"ாக்":2294,"ாகக":726,"ாகப":554,"ாகம":1267,"ாகவ":971,"ாகங":337,"ாகத":594,"ானி":1010,"ாந்":804,"ாநி":2007,"ானத":779,"ாப்":821,"ான்":1998,"ாடு":3023,"ாட்":5816,"ாடல":810,"ாடி":363,"ாடக":761,"ாத்":727,"ாண்":1485,"ாதம":306,"ாது":396,"ாதா":507,"ாதி":637,"ாணப":442,"ாணத":365,"ாஸ்":323,"ிங்":711,"ிச்":643,"ிசை":617,"ிடி":332,"ிடப":478,"ிடம":356,"ிடத":463,"ய":68757,"ம":127919,"ன":81690,"ந":46905,"ப":128120,"த":166688,"ண":25132,"ாரா":410,"ாரி":765,"ா":101548,"ி":184542,"ஸ":4761,"ஹ":1196,"ார்":5370,"ழ":21652,"வ":79830,"ஷ":1166,"ர":109358,"ற":58813,"ல":91486,"ள":62504,"ஏ":2039,"எ":16082,"ஊ":889,"உ":10751,"ஈ":845,"இ":32538,"ஆ":19361,"அ":23958,"ஃ":562,"ாறு":598,"ஞ":2219,"ட":101059,"ாலத":613,"ஜ":3516,"ச":50507,"ங":15929,"ாலம":447,"ாற்":1321,"க":197565,"ஒ":11957,"ஓ":1288,"ஐ":1009,"ாயக":290,"ாமல":357,"ாமி":434,"b ":301,"ாம்":1576,"ோ":14983,"ொ":17674,"ை":61878,"்":437238,"ௌ":348,"ூ":11561,"ாயி":304,"ு":185888,"ீ":8590,"ே":20308,"ெ":21142,"ாரண":513,"a ":854,"ாரத":304,"ாய்":1383,"ாரம":364,"ிகா":518,"ாவட":2380,"ாவத":750,"ாவர":299,"ிகை":370,"ிக்":6769,"ாழ்":1243,"ிகோ":462,"ாவி":3762,"ாவா":383,"ாலு":670,"ாலி":445,"ாளர":1347,"ால்":3272,"ாலை":675,"ாளா":324,"ாளி":348,"ிகள":3325,"ிகழ":507,"ாள்":696,"ிகப":373,"i ":647,"ினை":666,"ge":282,"ினி":620,"ga":286,"ினா":801,"ினர":628,"ினம":287,"ிந்":854,"he":687,"ha":472,"g ":377,"ea":314,"ec":300,"ed":390,"de":481,"di":495,"ிப்":4537,"h ":377,"el":404,"en":840,"em":293,"et":386,"es":700,"er":1464,"ின்":17928,"ca":348,"e ":1989,"da":327,"f ":407,"ct":322,"co":377,"ch":435,"ce":352,"ியே":373,"ியூ":340,"ிரத":942,"ியு":993,"ிரம":574,"ியோ":394,"ியை":838,"ிரப":448,"ியவ":789,"ியல":3188,"ியர":810,"ியி":4273,"ியா":6620,"ியத":1203,"ிமு":497,"d ":748,"at":1135,"as":419,"ியம":1215,"ar":1055,"ியன":1057,"ிமை":522,"ியப":743,"ியக":532,"al":1246,"ai":378,"ap":295,"am":828,"an":1637,"ac":346,"ad":343,"ிமா":411,"nt":654,"ns":302,"of":412,"om":409,"on":1256,"ol":463,"ிடை":404,"ou":276,"or":868,"ிட்":1455,"ிடு":635,"r ":1101,"pa":374,"lo":330,"ll":406,"o ":326,"ma":480,"mb":415,"me":406,"mi":333,"na":737,"nd":662,"ne":446,"ng":642,"ni":455,"ித்":3054,"m ":787,"ிணை":304,"ிண்":482,"li":572,"le":592,"ிதி":409,"la":866,"n ":1450,"ht":310,"hu":372,"hi":390,"ic":779,"ia":502,"ig":354,"is":617,"it":600,"il":395,"in":1071,"io":687,"l ":875,"ிஸ்":689,"y ":837,"ve":327,"x ":278,"ur":632,"us":340,"um":519,"un":283,"tt":320,"to":401,"tr":385,"te":756,"ti":1167,"th":985,"ta":522,"st":661,"se":294,"si":396,"rt":301,"ro":534,"ri":933,"re":684,"ra":1084,"t ":1022,"ீடு":326,"ீட்":724,"s ":1609,"ிரி":3610,"ிரா":1282,"ிர்":1294,"ிறப":627,"ிரை":1621,"ிரே":484,"ிறந":629,"ிறத":2122,"ிரு":4047,"ிறா":398,"ிறி":463,"ிறு":1338,"ிலங":466,"ிலம":1354,"ிற்":1776,"ிலத":1747,"ிலா":649,"ிலு":1997,"ிலி":1089,"ில்":23900,"ிலை":1645,"ிலே":416," த":26595,"ிழக":839," ய":1289," ம":33424," ப":40065," ந":18582,"ிழம":471,"ிழர":540," வ":24461," ல":1016," ர":1685,"ிளை":610," ஸ":454," ஹ":795," ஈ":841," உ":10726," ஊ":885," எ":15892," ஏ":2016," அ":23901,"ிழி":439," ஆ":19313," இ":32484," ச":22093,"ிவந":792," ஜ":2159," ட":1284," ஐ":986," ஓ":1277," ஒ":11946," க":32960,"ிவர":337,"ிழ்":3205,"ிவம":324,"ிவி":1334,"ிவா":521,"ிவு":1106,"ீதி":457,"ீர்":719,"ீன்":337,"ுச்":849,"ுங்":895,"ீவு":404,"ீழ்":360,"ுக்":10659,"ுகழ":319,"ுகி":3737,"ுகா":398,"ுகள":4206,"ுட்":1213,"ுடை":558,"ுடி":1579,"ுடு":653,"ுடன":1030,"ுநா":474,"ுனி":297,"ுபட":358,"ுந்":3187,"ுன்":993,"ுனை":280,"ுப்":4254,"ுது":799,"ுதி":3266,"ுண்":554,"ுதல":2039,"ுதப":476,"ுதன":363,"ுத்":6198,"ுரி":516,"ுரு":740,"ுரை":511,"ுறி":2453,"ுறு":308,"ுறை":2894,"ுற்":571,"ுளா":391,"ுளி":404,"ுள்":6921,"ுமை":377,"ுமு":342,"ுமா":937,"ும்":36442,"ுரம":392,"ுவை":304,"ுவி":1320,"ுவா":2204,"ுவர":1344,"ுவம":286,"ுவன":802,"ுவத":1697,"ுழு":744,"ூடி":578,"ூட்":652,"ூன்":565,"ூரா":577,"ூரி":517,"ூர்":1687,"ூறு":472,"ூலம":428,"ூற்":526,"ூல்":531,"ூலி":289,"அக்":419,"அதி":733,"அதன":471,"அணி":287,"அணு":326,"அடி":812,"அப்":365,"அன்":299,"அனை":343,"ஆகி":979,"ஆகு":4632,"ஆக்":285,"அழை":700,"அவர":558,"ென்":1743,"அல்":2972,"அளவ":598,"ெப்":657,"அரு":474,"அறி":1258,"அமெ":613,"அமை":3884,"அரச":1350,"அம்":318,"ெண்":620,"ஆங்":1422,"ெட்":778,"ெடு":671,"ஆசி":448,"ஆண்":2706,"ஆட்":480,"ஆம்":1544,"ைய ":1236,"ஆய்":339,"ஆரம":608,"ஆறு":307,"ஆற்":386,"இக்":380,"ஆவத":1054,"ஆவா":643,"ேயே":307,"ேட்":292,"இங்":400,"இசை":860,"ேண்":284,"இடத":337,"ேதி":335,"இடம":403,"இடை":433,"இத்":877,"இது":3256,"ேசி":564,"ேசு":307,"ேசப":275,"இணை":776,"இதன":761,"இந்":4776,"ெயர":1260,"ெயற":334,"ெயல":776,"இப்":496,"இன்":581,"ெர்":351,"ெறு":383,"ெய்":2021,"ெரி":1478,"ெரு":1277,"இயற":542,"இயல":277,"ெல்":969,"இம்":396,"இரண":675,"ெளி":2069,"இயக":775,"ெற்":1979,"இயங":307,"இலக":714,"இலங":1277,"இரு":3631,"இரா":2331,"இவ்":582,"இவை":339,"இவர":1304,"ைப்":5756,"இல்":712,"ையை":1039,"ையே":430,"ையத":341,"ையா":3283,"ையி":4758,"ையு":2157,"ையம":618,"ைமை":414,"ைத்":2427,"ைந்":3238,"ைநக":408,"ைச்":1529,"ேளம":371,"ேளக":469,"ேற்":1275,"ேலி":402,"ேலு":410,"ேர்":1922,"ேறு":768,"ேரூ":552,"ேரி":362,"ைக்":5944,"ேவை":525,"ைகள":3265,"்க ":1447,"ைவு":308,"ைவி":438,"ைவர":377,"ொன்":519,"்ப ":344,"ோக்":753,"ொள்":838,"ொழு":386,"ொழி":2875,"ொல்":850,"ொலை":513,"ொற்":349,"்ள ":4240,"ொரு":2529,"்ற ":3387,"்ட ":3672,"ொகு":770,"ொண்":2337,"ொது":899,"்த ":5466,"ொடு":300,"ொடங":359,"ொடர":1238,"ோப்":332,"ோன்":1334,"ோயி":659,"ோர்":868,"ோரி":632,"ோரா":279,"ோவி":367,"ோட்":1097,"ோடு":420,"ோது":603,"்கை":2757,"்கோ":938,"்கொ":643,"்க்":1264,"்கூ":637,"்கு":14044,"்கெ":383,"்கே":554,"்கா":4646,"்கி":8271,"்கள":15575,"்கல":1350,"்கர":1200,"்கவ":322,"்கப":3910,"்கம":1458,"்கத":1249,"்கண":471,"்கட":610,"்கங":350,"்சா":888,"்சி":4832,"்சு":884,"்சை":360,"எல்":437,"எழு":1211,"எடு":356,"எண்":595,"எதி":426,"எனப":826,"என்":8881,"்தா":3028,"்து":12325,"்தி":19739,"்த்":1974,"்தோ":339,"்தொ":367,"்தை":2288,"்தத":1008,"்ணி":349,"்தம":644,"்தப":1133,"்தன":629,"்தவ":959,"்தர":950,"்தல":971,"்பை":508,"்பே":287,"்பெ":966,"்போ":717,"்பொ":819,"்பி":4248,"்பா":4739,"்பூ":376,"்பு":5441,"்ப்":1248,"்பந":489,"்னை":343,"்பன":396,"்னி":1023,"்னா":524,"்பட":14770,"்னு":1025,"்பத":6302,"்பவ":714,"்பம":709,"்பர":1317,"்நி":304,"்நா":1754,"்பக":746,"்னர":478,"்ந்":2122,"்டத":3858,"்டன":373,"்டப":375,"்டம":1689,"்டர":578,"்டங":415,"்ச்":1088,"்தக":679,"்ட்":1036,"்டோ":535,"்டை":1115,"்டு":8476,"்டி":5785,"்டா":1951,"்டவ":480,"்வை":282,"்வத":502,"்வர":423,"்வி":886,"்வா":1151,"்வு":876,"்வே":640,"்ஸ்":867,"்மா":564,"்மன":338,"்யு":285,"்மை":1050,"்மு":410,"்யப":368,"்ளத":1941,"்லு":367,"்லூ":277,"்லி":796,"்லா":1037,"்றை":756,"்ளா":277,"்ளி":589,"்ளன":998,"்லை":579,"்றா":2083,"்றி":2742,"்று":6173,"்லத":2780,"்றழ":440,"்றவ":489,"்றம":405,"்றல":286,"்றத":1488,"்றன":1215,"ஐக்":380,"ஏற்":808,"ஊர்":281,"உரி":362,"உரு":1122,"உறு":409,"உலக":903,"உயி":570,"உயர":401,"உள்":3206,"உதவ":285,"உண்":440,"என ":651,"மக":2002,"மங":313,"மத":1505,"மண":816,"மட":614,"மன":2412,"யல":4462,"யற":1277,"யர":2780,"யவ":955,"ரக":668,"ரச":2387,"ரங":785,"ரட":422,"யா":11572,"யு":3982,"யூ":642,"ரண":1624,"யி":11562,"யீ":287,"ரத":2928,"ரப":1866,"யே":1284,"ரன":414,"யை":1942,"ரம":3652,"ய்":4254,"யோ":852,"மய":595,"மம":514,"மற":2418,"மர":1631,"மல":1338,"யக":1931,"யங":868,"மா":15034,"மி":8348,"மீ":1286,"மு":9054,"மூ":1942,"யத":2022,"மெ":1524,"மே":3645,"மை":8679,"யன":3058,"யப":1368,"மொ":2460,"மோ":314,"ம்":59753,"யம":2603,"பங":437,"னவ":812,"பக":2737,"னல":278,"னர":2255,"னம":1302,"ந்":20330,"நோ":770,"னப":1121,"பன":834,"னை":2914,"னே":371,"பந":774,"பத":7767,"பண":1265,"னு":2330,"னி":5915,"னா":2676,"பட":17323,"னக":412,"நவ":322,"நல":298,"நே":639,"நெ":1117,"நு":803,"நீ":1405,"னத":1823,"நூ":1307,"நி":7179,"னட":305,"நா":7474,"னங":575,"ப்":34661,"பொ":3990,"போ":3721,"பல":2780,"பள":417,"பழ":657,"பவ":1066,"னோ":401,"பம":782,"ன்":48370,"பய":2032,"பர":3306,"பற":996,"பூ":994,"பே":2341,"பெ":6101,"பை":873,"பி":11022,"பா":9941,"பு":9453,"பீ":476,"தை":3979,"தோ":1044,"தொ":4546,"தே":2805,"தெ":1540,"த்":34813,"தழ":321,"தவ":1763,"தூ":638,"தீ":1160,"து":39998,"தா":7230,"தி":33129,"தப":1845,"ணை":1673,"தன":4207,"தந":348,"தத":1637,"தள":614,"தல":4546,"தற":1613,"தர":2410,"தய":384,"ண்":13361,"தம":6359,"நட":1795,"நக":2290,"ட்":26642,"டோ":698,"டே":309,"டை":5277,"டெ":379,"டு":23773,"டி":13483,"தட":432,"ணா":409,"ணி":3340,"ணு":927,"தக":1336,"ணவ":477,"தங":477,"ணம":1051,"ணர":362,"ணத":619,"ணப":596,"ணக":587,"ணங":275,"ாக":19861,"ாங":594,"ாச":1598,"ாஜ":510,"ாட":11489,"ாத":4020,"ாண":3622,"ான":10191,"ாந":3098,"ாப":1499,"ார":8693,"ாற":2311,"ாம":3725,"ாய":2982,"ிக":14337,"ாழ":1765,"ாவ":8717,"ால":7588,"ாள":3674,"ாஸ":333,"ிங":712,"ிச":2403,"ிஞ":334,"ித":5697,"ிண":920,"ிட":5029,"ிம":2804,"ிந":1298,"ிப":5786,"ின":22037,"ிள":1636,"ிழ":6094,"ீக":398,"ிவ":5662,"ிய":32313,"ிர":16244,"ிற":8462,"ில":35051,"ீட":1354,"ிஸ":713,"ீச":331,"ீத":858,"ீன":904,"ுக":21093,"ீழ":591,"ீவ":685,"ீர":1646,"ுட":5430,"ுங":895,"ுச":1183,"ஸ்":4460,"ஹா":281,"வந":1310,"வன":1956,"ழை":1756,"வப":343,"வர":10633,"வம":1198,"ழ்":5992,"வள":1206,"வழ":1620,"வற":1169,"வல":1808,"வக":1767,"வச":597,"வங":465,"ழி":4553,"வட":4450,"ழா":328,"வத":5184,"வண":714,"ழு":3446,"வெ":3428,"வே":3105,"வை":3927,"வோ":475,"வ்":1486,"ங் ":289,"வா":8141,"வீ":991,"வி":15999,"வு":6267,"ஷ்":653,"ரோ":1002,"றப":1173,"ரை":4461,"றன":1468,"ர்":31916,"றம":626,"றத":3830,"ரூ":812,"ரு":27373,"ரீ":451,"ரே":1017,"றந":763,"ரெ":389,"றங":282,"ரா":8822,"ரி":12841,"ரல":936,"ரள":279,"ரர":859,"றக":313,"ரவ":1332,"லப":708,"லய":334,"லம":3186,"ற்":17148,"லர":750,"று":11549,"லத":5640,"றை":4508,"றா":2833,"றி":8018,"றல":363,"றழ":451,"லக":2902,"றவ":909,"லங":2178,"ளர":1895,"ல்":45274,"ளம":1268,"லோ":710,"ளப":432,"ளன":1049,"லை":8052,"லே":726,"ளத":2348,"லூ":633,"லு":4434,"லா":5224,"லி":6034,"ளங":419,"ளக":843,"லவ":809,"ழர":599,"ழல":340,"ள்":22645,"ழம":740,"ளை":6098,"ளி":11162,"ளு":3887,"ளா":3042,"ழங":730,"ழக":1857,"ளவ":1419,"எழ":1223,"எல":532,"கமா":1059,"கம்":3148,"என":11165,"எட":532,"எண":597,"எத":529,"கரத":596,"கரம":753,"கரா":588,"கரு":1925,"கரி":702,"கரை":390,"கர்":1591,"ஊர":477,"கற்":356,"கலை":1083,"உய":994,"உர":1677,"உற":823,"உல":1062,"உள":3281,"கலா":280,"கழக":436,"கல்":1081,"களை":4493,"கள்":9772,"களி":6714,"களா":1687,"ஈழ":284,"களு":3253,"உட":908,"உத":494,"உண":848,"இம":448,"கழ்":778,"இந":4780,"இன":1053,"இப":504,"இவ":2571,"இய":2152,"இர":7002,"இற":478,"இல":3110,"கவி":404,"கவல":360,"இட":1530,"இண":833,"இத":5637,"இங":400,"இச":1168,"ஆய":571,"ஆம":1554,"ஆற":784,"ஆர":1083,"ஆவ":1949,"இக":392,"கவு":1099,"ஆன":368,"ஆப":334,"ஆட":604,"ஆண":2834,"ஆங":1423,"அவ":1452,"ஆச":513,"அர":2426,"அம":5128,"ஆக":6177,"அழ":1041,"அள":802,"அல":3618,"அற":1466,"அந":302,"அப":488,"அன":910,"அண":903,"அத":1978,"அட":1678,"அச":379,"அக":783,"ஃப":444,"காண":1037,"காத":305,"காட":1197,"காக":926,"கிப":294,"கிர":1793,"கிற":2778,"கிய":4454,"கிழ":1101,"கில":2584,"கிள":303,"டங":1551,"டக":2338,"ஞர":411,"ஞ்":1568,"கான":927,"டம":3786,"காப":476,"டப":1246,"டன":1702,"காம":341,"கார":1102,"கால":2029,"டத":5604,"கிக":280,"காவ":696,"டா":3183,"கிட":434,"டவ":894,"டற":287,"டல":2022,"கின":3040,"டர":1996,"குள":472,"குர":499,"குற":3053,"ஜன":765,"கும":15556,"குழ":660,"குவ":564,"கூட":1198,"ஜெ":307,"ஜி":368,"ஜா":311,"குக":975,"கீழ":409,"ஜூ":479,"குட":1461,"குப":912,"குத":3019,"சட":699,"சந":447,"சன":838,"சத":750,"ங்":15912,"சம":2542,"சர":1035,"சப":743,"சல":387,"சீ":884,"சு":4739,"சா":3736,"சி":12172,"சை":2278,"செ":5006,"சே":2116,"கூற":724,"சூ":935,"ச்":6667,"சோ":703,"சொ":1085,"கெட":300,"சக":830,"சங":473,"க்":40970,"கைப":322,"கோ":3746,"கொ":4347,"கே":1313,"கை":5279,"கைய":1982,"கெ":595,"கைக":801,"கூ":2267,"கு":33794,"கீ":677,"கி":19030,"கா":10953,"கவ":2505,"கொல":275,"கொள":837,"கொண":2252,"கொட":399,"கம":4718,"கர":7242,"கற":456,"கல":3250,"கள":26914,"கழ":1431,"கன":1126,"கப":5340,"கட":3472,"கத":3423,"கண":2578,"கக":1180,"கச":448,"கங":1149,"ஒன":2135,"க்க":31923,"ஒர":8154,"கோர":492,"கோய":755,"கோவ":374,"ஓர":763,"கோட":700,"ஒல":300,"ஒள":283,"கோண":286,"ஏற":1035,"ஐக":383,"க்ட":361,"சக்":631,"்ட":31792,"்த":54197,"்ண":1523,"am ":356,"்ந":4822,"்ப":46615,"்ன":4757,"்க":63255,"்ச":10386,"al ":518,"ோம":420,"ோய":1038,"ோர":2106,"ோற":374,"ோல":725,"ோவ":577,"ோட":1840,"ோப":835,"ோன":1596,"ோத":1196,"ோண":311,"ொர":2700,"ொற":791,"ோச":380,"ொழ":3265,"ோக":1274,"ொல":1580,"ொள":880,"ொட":2489,"ொன":573,"ொண":2347,"ொத":1247,"ொக":1140,"and":321,"an ":334,"்வ":6141,"்ஸ":1010,"்ல":6654,"்ற":20507,"்ள":9314,"்ம":4103,"்ர":1658,"்ய":1630,"சு ":1200,"ூன":663,"ூத":353,"ூர":3051,"ூற":1305,"ூல":1999,"ூழ":431,"ுந":4175,"ுண":1116,"ுத":14095,"ுய":369,"ுர":3253,"ும":39489,"ுன":1831,"ுப":5592,"ுவ":9858,"ூக":634,"ுழ":1268,"ுள":8293,"ுல":1485,"ுற":6692,"ூட":1699,"ைவ":1932,"ைந":3816,"ைப":6286,"ைய":15390,"ைம":945,"ேல":1655,"ேற":2141,"ேர":3972,"ேய":740,"ேவ":930,"ைக":9533,"ேள":1091,"ேன":443,"ேம":327,"ைத":2725,"ைச":1726,"ெள":2310,"ெல":1418,"ேக":639,"ெய":4586,"ெற":2754,"ெர":3220,"ெப":689,"ென":2012,"ேண":375,"ேத":757,"ேட":601,"சி ":2531,"ேச":2248,"ெக":289,"ெண":621,"ெட":1667,"சை ":644,"ச் ":3259,"ati":457,"ஒளி":281,"ஒலி":275,"ஒரு":7842,"ஒன்":2133,"கே ":463,"கை ":1667,"கா ":505,"ச ":506,"க ":8647,"கு ":5427,"கி ":1073,"கச்":348,"ா ":4799,"ி ":16257,"கங்":1149,"ீ ":453,"ு ":58822,"கக்":1119,"ே ":3658,"ை ":18418,"ோ ":1470,"் ":167861,"கன்":508,"கப்":5197,"கணி":938,"கண்":880,"கத்":2578,"கதை":393,"கணக":371,"கடல":480,"கட்":1973,"ட ":4634,"ண ":792,"த ":6809,"ஓர்":624,"ன ":8248,"ப ":465,"க் ":7275,"ர ":1232,"ய ":9777,"ம ":541,"ழ ":296,"ள ":4773,"ல ":2350,"ற ":3775,"வ ":559,"ஜன்":468,"ion":604,"டி ":2152,"டா ":281,"டு ":6809,"ஜூல":309,"டை ":967,"ட் ":1325,"சங்":473,"he ":350,"சத்":482,"சட்":614,"சமய":437,"ங்க":15531,"சம்":940,"சமூ":398,"சன்":386,"சப்":396,"சந்":419,"சர்":489,"சிவ":528,"சில":849,"சிற":1550,"சிர":415,"சிய":2735,"சிப":459,"சின":560,"சித":585,"சிங":311,"சிக":977,"சால":592,"சார":993,"சாத":575," of":373,"சென":474,"சேவ":338,"சைக":291,"சேர":1297,"செல":774,"செய":2741,"சும":401,"சுற":312,"சுர":362,"சுவ":666,"சீன":355,"ing":312,"சுக":389,"சுத":324,"சூழ":384,"ச்ச":3340,"சைய":843,"சொல":606,"சொற":288,"டக ":523," th":509,"ட்ப":853,"ட்ட":19814,"ட்ச":2725,"ட்க":1235,"டைய":2172,"டைப":576,"டைக":531,"ணி ":660,"er ":398,"es ":343,"ண் ":586,"து ":27911,"தை ":1771,"தே ":341,"தி ":2552,"தா ":680,"டங்":1549,"டக்":1238,"ஞர்":315,"ஞ்ச":1519,"டமா":725,"டமை":297,"டம்":2072,"டர்":1679,"டன்":1240,"டப்":1121,"டத்":4115,"டது":1165,"டிய":2991,"டிப":984,"டின":1240,"டித":607,"டிவ":1222,"டில":1216,"டிர":537,"டாண":290,"டிட":382,"டிச":401,"டிக":1256,"டாவ":366,"டார":303,"டுப":1408,"டும":4650,"டுத":2954,"டுவ":1115,"டுள":785,"டுக":5293,"டாக":695,"டல்":1207,"தாக":1315,"தான":1137,"தாள":370,"தால":691,"தாவ":511,"திக":2302,"திச":403,"தாய":342,"தார":1089,"தின":3651,"திப":796,"திட":432,"திவ":299,"திம":291,"தில":7958,"திற":783,"திர":5993,"திய":6938,"துக":2030,"தீவ":583,"துண":389,"துட":443,"துப":404,"துள":2935,"துற":954,"துர":579,"தும":1383,"துவ":2062,"தூர":370,"தேச":1067,"தெற":289,"தென":697,"தைக":641,"தேவ":512,"தைச":349,"தேர":391,"தைய":745,"தொட":1925,"தொக":1037,"தொழ":704,"தொல":527,"தப்":1587,"ண்ம":630,"ண்ப":578,"தமி":4632,"தமா":339,"ண்க":445,"ண்ண":1196,"ண்ட":9721,"தம்":1071,"தயா":275,"தற்":1515,"தலா":427,"தலி":372,"தல்":1648,"தலை":1494,"தர்":937,"தவர":662,"தவி":419,"த்த":29791,"ணங்":275,"ணத்":603,"த் ":4416,"ணக்":539,"ணுக":317,"ணிக":735,"ணிய":592,"ணின":373,"ணித":474,"ணைய":630,"தந்":298,"தனி":760,"தன்":2053,"தனை":537,"ணைக":298,"தது":557,"தத்":779,"ணம்":660,"ணப்":571,"தங்":477,"தகவ":361,"தக்":543,"ணர்":292,"ந்ந":363,"ந்த":19873,"ng ":303,"னம்":921,"னர்":1811,"பகு":1877,"னது":1015,"னத்":650,"நேர":433,"நெட":597,"னப்":1023,"நோக":445,"நிக":520,"நாள":908,"நாய":319,"நான":310,"நாத":286,"நாட":4702,"நிய":299,"நிர":619,"நிற":1236,"நில":3876,"நீர":548,"நீத":282,"நுட":372,"நூற":484,"நூல":812,"னக்":283,"னங்":574,"பற்":788,"பலர":285,"பர்":1261,"பரி":581,"பரப":390,"பயன":1636,"பம்":365,"of ":362,"பவர":561,"பல்":1018,"பன்":334,"னைத":388,"பந்":594,"னைய":458,"னைவ":278,"னைக":358,"ன்ம":1125,"ன்ற":10571,"ன்ப":7799,"ன்ன":4301,"பமா":350,"ன்ட":278,"ன்க":958,"ன்ச":382,"னும":1622,"னார":414,"படம":801,"னிக":390,"னால":860,"படத":483,"படை":1050,"னின":559,"பட்":5520,"னிய":1758,"னில":287,"படி":920,"படு":7705,"னித":927,"பத்":1191,"பது":4499,"பதா":323,"பதி":845,"பண்":714,"பதற":308,"பணி":371,"பங்":436,"பக்":415,"னவர":288,"on ":583,"பல ":913,"ன் ":22050,"னை ":1086,"பட ":307,"னா ":289,"னி ":918,"ப் ":6813,"பை ":319,"நடி":497,"பு ":2795,"நடை":406,"பி ":577,"நகர":2167,"மணி":297,"ரே ":289,"மது":349,"மதி":359,"மத்":609,"ரு ":7941,"மட்":449,"ர் ":16957,"ரை ":1204,"றன ":856,"மன்":1232,"மனி":790,"மல்":385,"மலை":666,"யக்":1396,"மம்":421,"மரப":361,"மரு":359,"லக ":292,"மற்":2141,"றி ":716,"மாந":2194,"மான":2448,"மாத":610,"மாக":4202,"மிய":352,"மின":800,"று ":3977,"மாவ":2559,"மிக":1315,"மாற":977,"மார":743,"யங்":868,"யா ":1169,"ய் ":704,"யை ":1008,"யே ":849,"மகா":275,"ரா ":278,"ரி ":1486,"மங்":313,"மக்":1291,"மி ":296,"மே ":505,"மை ":1120,"ம் ":53178,"பின":1559,"பிய":742,"பிர":3751,"பிற":1054,"பில":819,"பிக":486,"பிட":1127,"பாத":646,"பான":683,"பாய":372,"பால":940,"பாள":428,"பார":981,"பாக":1005,"பாட":3059,"பாண":798,"பூர":411,"புத":1109,"புவ":296,"புல":660,"புற":357,"புர":1163,"பீட":289,"புக":1683,"பேர":1352,"பெண":307,"பெய":1296,"பெர":1861,"பெற":2100,"பேச":556,"போட":316,"பொற":395,"பொர":2127,"பொத":938,"போல":281,"போர":737,"போன":1011,"போத":864,"ப்ட":297,"ப்ப":26657,"ப்ர":488,"ளகர":469,"ளக்":350,"லம்":2464,"லமா":457,"லர்":460,"லப்":639,"ழ் ":1697,"ற்க":4226,"ற்ச":313,"ற்ப":2340,"ற்ற":9892,"றைக":742,"றைய":1266,"வே ":437,"ளத்":348,"ளது":1904,"லைக":1350,"லேய":338,"வை ":1638,"லும":2793,"லுக":305,"லிர":748,"லிய":1493,"லில":989,"லூர":479,"லுள":616,"வி ":735,"லிக":541,"லாற":423,"லாள":499,"லாம":957,"லிப":277,"லின":516,"வு ":1989,"லாக":721,"லான":565,"லாந":354,"ளங்":417,"லகி":465,"றம்":427,"ர்ச":956,"ர்க":5162,"ர்வ":1496,"ர்ம":805,"ர்ப":1555,"ர்ந":1909,"ர்த":1839,"ர்ட":360,"றப்":1150,"ள் ":12275,"ரைய":957,"றந்":718,"ரைப":1450,"றனர":342,"ளை ":2755,"ரைக":424,"றது":3133,"றத்":625,"லத்":2727,"லது":2796,"றுவ":1225,"றும":3125,"றுப":986,"றுத":532,"றுக":1202,"றில":535,"றிவ":770,"றிய":1917,"றின":604,"றிப":1027,"ழு ":279,"றித":312,"றார":526,"றிக":1316,"றாண":394,"றாக":1080,"ழி ":982,"லங்":2176,"லக்":1207,"றழை":438,"ரபு":329,"ரப்":990,"ல் ":34411,"ய்க":394,"ய்த":601,"ய்ய":1010,"ய்வ":739,"ரம்":2579,"ரமா":606,"ரர்":568,"ரலா":483,"யுள":362,"ரண்":815,"ரதே":555,"ரத்":1609,"ரது":278,"ளன ":701,"லை ":2437,"ரன்":291,"ளி ":635,"ராவ":406,"ரிக":2239,"ரால":435,"ராய":388,"ராம":869,"ரிச":541,"ரித":580,"ரிம":685,"ரின":1028,"ரிப":456,"ராக":2778,"ராச":370,"ராஜ":438,"ராட":1211,"ரான":615,"ருங":473,"ருட":752,"ருக":4077,"ருப":877,"ரும":2991,"ருத":1851,"ருந":3294,"ரிய":3104,"ரில":735,"ரிவ":1163,"ருவ":2848,"ருள":1674,"ரூர":620,"umb":290,"ரல்":362,"றங்":282,"மொழ":2149,"யப்":1295,"மைந":2631,"மைய":1873,"மைப":1460,"யன்":2673,"யமை":293,"யம்":1435,"ம்ச":279,"ம்ம":840,"ம்ப":4940,"யமா":620,"மூல":729,"மூன":356,"முழ":336,"மூக":412,"மீட":317,"மிழ":4631,"முற":1812,"மும":549,"முன":1115,"முத":1979,"முட":981,"முக":1304,"மென":349,"மெர":642,"றை ":1441,"மேல":805,"மேற":920,"மைக":943,"மேள":882,"யத்":1278,"யது":423,"லி ":729,"லா ":378,"ரங்":785,"ரசி":802,"ரசு":618,"யும":2862,"யிர":1042,"யில":6388,"யார":713,"யாவ":2647,"யாள":533,"யாழ":607,"யிட":437,"யின":2554,"யிய":701,"யாக":2905,"யாட":446,"யான":1415,"tio":513,"thu":314,"யல்":3022,"யலா":554,"யலி":566,"யற்":1115,"யர்":1747,"ter":301,"the":291,"யவர":291,"யவற":284,"வும":1702,"வாக":2334,"வாத":306,"வாச":280,"விக":832,"வாழ":922,"வால":316,"வான":889,"வாய":566,"வார":1236,"வித":708,"விண":306,"வின":2946,"விச":326,"விட":1092,"வில":3171,"விள":1040,"விய":1955,"விர":904,"விற":467,"வுக":1344,"வீர":321,"ஸ் ":1708,"வ்வ":1164,"வேற":846,"வேத":466,"வெள":2232,"வைய":661,"வைத":379,"வைக":768,"வேல":442,"வற்":1078,"வரி":937,"வரு":1641,"வரா":523,"வரை":1143,"வர்":4658,"வலை":345,"வல்":686,"வளர":473,"வழங":567,"ழ்ந":1868,"ழ்த":442,"ழ்வ":520,"ழ்ப":759,"ழ்க":412,"வரத":404,"வரல":407,"வம்":690,"வழி":728,"வடக":309,"வடி":817,"ழிப":303,"ழிக":564,"ழுவ":343,"ழும":521,"வட்":2653,"ழிய":1078,"ழில":887,"ழுத":1586,"வதற":541,"வதா":350,"வது":3201,"வத்":327,"வப்":301,"ழைக":1109,"வந்":1239,"வனம":324,"ழைய":304,"வன்":676,"ள்ள":9236,"ள்க":555,"ழர்":543,"ழமை":586,"வகை":1129,"வங்":465,"ழங்":729,"ளாக":1168,"ளிப":395,"ளின":2302,"ளால":742,"ளிக":491,"ளில":4426,"ளிய":1106,"ளிவ":966,"ளுட":327,"ளுக":1636,"ளும":1077,"ளுள":663,"ளைக":897,"ளைப":388,"ளைய":1472,"லைவ":579,"லைய":1591,"லைம":331,"ளனர":279,"லைப":623,"லைந":485,"லைத":348,"ளப்":366,"ல்ல":5939,"ல்வ":1233,"ல்ப":618,"ல்ந":316,"ல்க":1857,"ளம்":661,"ளமா":505,"ளர்":1471,"ழகத":295,"ளவை":282,"ழக்":933,"ளவு":424,"ளவி":419," நட":1718," நக":1501," நா":4504," நி":4279," நெ":933," நே":568," நூ":1176," நீ":1214," நு":519," நவ":316," பட":1904," பண":1105," பத":1073," பன":342," நோ":691," பக":1834," பங":296," பு":2928," பா":4041," பி":5649," பூ":498," பெ":4588," பே":1964," பற":718," பர":1247," பய":1942," பழ":608," பல":2289," பொ":3056," போ":2704," மன":1180," மட":504," மண":418," மத":890,"ிக ":435," மக":1685," மை":493," மே":3012," மெ":772," மொ":2003," மு":6572," மீ":747," மி":1990," மா":7055," மூ":1353," மர":989," மற":2304," மல":556,"ால ":279," டி":434,"ார ":281," தட":289," தக":507," தர":734," தற":353," தல":1342," தம":4737," தய":281," தன":1258," து":1451," தீ":745," தூ":310," தி":4210," தா":1353," தெ":1340," தே":1470," தொ":4079," தோ":559,"�":1175," ஸ்":417,"ான ":5124,"ாண ":290,"ாத ":411," ரா":553," யா":744," வட":1713," வண":356," வத":741," வக":1085," வச":354," வர":2716," வல":690," வழ":1368," வள":796," வந":384," வி":5451," வீ":639," வா":2177," வை":533," வே":1627," வெ":2854," உண":846," உத":491," உட":908," ஈழ":284," உள":3279," உல":1058," உற":820," உர":1675," உய":988," இண":833," இத":5634," இட":1528," இங":400," இச":1164," ஆவ":1943," இக":391," ஆய":570," ஆம":1553," ஆற":781," ஆர":1069," ஆப":332," ஆன":365," இவ":2571," இல":3087," இற":478," இர":6995," இய":2150," இம":448," இப":504," இன":1051," இந":4772," எட":530," எத":528," எண":593," என":11142," ஊர":477,"ாக ":5165," அக":780," அச":377," அட":1678," அத":1971," அண":901," அந":301," அன":909," அப":485," அம":5119," அர":2421," அற":1466," அல":3607," அள":796," ஆக":6168," அழ":1038," அவ":1449," ஆங":1422," ஆச":512," ஆட":602," ஆண":2832," சர":400," சம":1684," சந":326," சட":638," சங":283," சக":625," ஜூ":466," ஜெ":287," ஜன":633," சோ":491," சொ":885," சா":1798," சி":3884," சீ":724," சு":1865," சூ":738," செ":4613," சே":1783," ஒர":8153," ஒன":2134," ஏற":1035," ஐக":383," எழ":1219," எல":516," கவ":401," கு":5595," கூ":1517," கா":3988," கி":3235," கீ":461," கொ":3490," கோ":2006," கை":284," கே":530," கண":2002," கத":414," கட":2692," கன":550," கழ":306," கள":362," கல":1562," கர":2268," ஒல":300," ஒள":281," ஓர":762,"ஸ்க":347,"ஸ்ட":597,"ஸ்த":763},"n_words":[2733895,2995227,2314467],"name":"ta"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/te b/nlp_resource_data/langdetect/profiles/te

new file mode 100755 (executable)

index 0000000..2a23704
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/te
@@ -0,0 +1 @@
+{"freq":{"ొరి":395,"D":266,"E":177,"F":114,"G":196,"A":519,"B":275,"C":368,"L":194,"M":322,"N":270,"O":138,"H":192,"I":350,"J":111,"K":142,"U":147,"T":316,"V":136,"P":450,"S":521,"R":276,"f":670,"g":908,"d":1461,"e":4276,"b":801,"c":1375,"a":5389,"n":3366,"o":3005,"l":2162,"m":1421,"j":112,"k":391,"h":2159,"i":3899,"w":466,"v":490,"u":1533,"t":3619,"s":2456,"r":3191,"p":1106,"్వ ":163,"y":766,"x":252,"్ల ":671,"్ర ":1330,"ొని":112,"ొన్":374,"్య ":1153,"్మ ":263,"ొదల":145,"ొదట":235,"ొత్":201,"్ప ":113,"్న ":986,"్ద ":312,"ొట్":128,"్ధ ":159,"్త ":383,"్థ ":302,"్ణ ":129,"ొక్":438," l":107," m":164," o":442," h":116," i":265," e":97," f":148," a":447," b":182," c":254,"్క ":414," t":666," w":134," p":388," s":269," r":221," J":106," K":133," H":173," I":300," N":209," O":104," L":173," M":286," B":241," C":316," A":428," F":98," G":181," D":227," E":138," S":425," R":253," P":381," V":124," U":129," T":274,"ొంద":344,"ొండ":319,"ل":112,"ఉద్":166,"ا":171,"ఉన్":1055,"ఉత్":337,"ఉపయ":174,"్కా":196,"్కి":134,"్గం":289,"్కు":347,"్కృ":177,"్కర":105,"్కల":130,"్కడ":197,"Ma":108,"Na":100,"र":112,"ा":126,"In":167,"Th":137,"Ra":122,"b ":223,"a ":692,"i ":282,"ge":141,"ga":105,"he":507,"ha":394,"gh":206,"ోబర":105,"ోయి":127,"g ":144,"ea":167,"ec":109,"ed":292,"de":174,"di":203,"h ":262,"ోమీ":223,"el":221,"en":398,"em":129,"et":241,"es":437,"er":625,"ca":177,"e ":1121,"ౌ":562,"్":81970,"ై":3997,"ొ":4527,"ోర్":149,"ో":18200,"ె":10608,"ే":14569,"ీ":10360,"ు":57347,"ూ":6654,"be":157,"ృ":1348,"da":157,"f ":323,"ct":100,"co":201,"ci":133,"ch":165,"ce":190,"ఘ":447,"ఛ":149,"చ":15206,"c ":186,"జ":11057,"ట":14914,"ఞ":164,"ఐ":184,"ఓ":160,"ఒ":2543,"క":36325,"గ":19947,"ఖ":1770,"ఈ":2196,"ఉ":3268,"ఊ":158,"ఎ":1173,"ఏ":730,"ం":43411,"అ":7490,"ఆ":3748,"ఇ":3357,"హ":4682,"ోలు":99,"స":22838,"ay":103,"ి":69802,"ా":61461,"d ":570,"at":690,"ళ":2227,"as":298,"ల":47610,"ar":638,"ర":61617,"ష":5311,"శ":7224,"వ":26890,"ప":26129,"ఫ":1064,"al":617,"న":46878,"ai":134,"మ":31451,"య":19501,"ap":114,"బ":7288,"am":273,"భ":4603,"an":764,"ac":150,"ణ":4171,"ad":159,"ఠ":238,"డ":16340,"ab":131,"ద":24907,"ag":119,"ధ":5601,"త":28253,"థ":2033,"nt":373,"ns":115,"of":309,"ోత్":140,"om":163,"on":653,"ol":158,"ot":144,"os":117,"ou":169,"op":107,"or":408,"r ":477,"pe":126,"pa":178,"ph":109,"lo":168,"ోదా":327,"ll":190,"o ":201,"ma":274,"mb":260,"me":185,"mi":151,"p ":107,"na":453,"nc":108,"nd":442,"ne":251,"ng":261,"ni":232,"ోనే":164,"ోనూ":131,"ోని":1569,"ka":99,"m ":247,"li":262,"le":271,"la":311,"n ":759,"ht":245,"hu":238,"hi":188,"ho":127,"id":115,"ic":417,"ia":224,"ig":225,"ie":147,"ir":142,"is":387,"it":280,"iv":131,"il":208,"in":615,"io":345,"ోపా":108,"l ":472,"ోజక":467,"ోజు":1239,"y ":444,"vi":97,"ve":202,"va":103,"x ":181,"ul":111,"ur":199,"us":218,"um":308,"un":137,"tt":129,"w ":99,"to":206,"tr":177,"te":479,"ti":559,"th":697,"ta":313,"ss":111,"st":281,"se":152,"sh":199,"si":231,"u ":109,"sa":101,"rs":111,"rt":125,"ry":99,"ro":238,"rn":149,"ri":529,"re":343,"ra":524,"t ":648,"s ":985,"px":158," అ":7472," ఇ":3349," ఆ":3719," ఉ":3240," ఈ":2188," ఊ":158," ఏ":725," ఎ":1157," ఐ":178," ఒ":2539," ఓ":155,"ొలి":167," క":10463," ఖ":358," గ":5736," ఘ":134," చ":6063," జ":5157," ట":497,"ొల్":112,"ోకి":106,"ోక్":172,"ోగి":184,"ya":98,"అసె":136,"అల్":163,"అరబ":119," డ":594," త":4637,"అర్":220," ధ":398," ద":3929," న":6927," ఫ":510," ప":14617," భ":2487," బ":2399," య":1063," మ":10243," ర":6209," ల":4976," వ":10214," శ":2160," ష":216," స":10032," హ":1355,"అమె":140,"అయి":199,"అభి":134,"అప్":103,"ఆధా":126,"్స్":527,"్సు":127,"్సి":119,"్సవ":107,"్ష్":173,"్సర":1414,"ఇంట":99,"ఇండ":128,"ఇంద":101,"ఇంక":425,"అక్":257,"్మక":121,"్మం":100,"్రం":1049,"్మీ":100,"్మి":779,"్మా":335,"్యక":335,"్యం":700,"్రజ":278,"్యా":2178,"్యు":431,"్యల":131,"్యవ":431,"్రక":927,"్యప":108,"్యమ":477,"్యత":143,"్యన":109,"్ధి":325,"్ధా":136,"్నద":482,"్ని":1493,"్నూ":99,"్నా":798,"్పట":140,"్పడ":119,"్పా":169,"్పి":132,"్పు":480,"్వత":158,"్శక":248,"్వవ":145,"్వర":328,"్వన":132,"్వే":159,"్వి":140,"్వా":901,"్వహ":104,"్షణ":127,"్షి":442,"్యే":161,"్రధ":337,"్రప":534,"్రత":526,"్యూ":276,"్రద":990,"్రయ":138,"్రల":146,"్యో":108,"్రమ":1464,"్రభ":230,"్రస":657,"్రహ":283,"్రవ":528,"్రి":1462,"్లం":396,"్రీ":1126,"్రు":244,"్రా":3971,"్రే":209,"్రె":472,"్రో":201,"్లా":2320,"్లి":708,"్లీ":237,"్లు":538,"్లూ":142,"్లె":187,"్లో":427,"్వం":342,"్ళు":159,"అధి":174,"అనగ":184,"్జా":141,"అదే":167,"్టమ":140,"్టణ":417,"అన్":250,"్ఞా":122,"అనే":708,"అని":495,"అను":265,"్గొ":101,"్గా":358,"్గర":100,"్గమ":101,"్చు":270,"్చి":657,"్చే":115,"అతి":194,"అత్":202,"్తు":1013,"్తూ":214,"్తి":885,"్తా":732,"్థల":171,"్త్":578,"్ణా":297,"్థం":104,"్ణు":99,"్తర":228,"ఆంధ":768,"్దు":149,"్ది":135,"్నం":164,"్దా":110,"్ధం":145,"్థా":493,"్థి":196,"ఆంగ":412,"్టి":519,"్టా":168,"్టు":592,"్టీ":169,"్టె":145,"్టే":107,"్టో":130,"్ట్":1367,"్తం":168,"్తక":101,"్డు":210,"్డి":275,"్డా":104,"ఇస్":134,"ఇవి":157,"ఈయన":143,"ఉంద":273,"ఉండ":260,"ఉంట":317,"ఇక్":152,"ఆస్":106,"ఆయన":107,"ఆర్":219,"ఇది":937,"ఇతన":164,"ఇతడ":125,"ంగు":105,"ంగా":1620,"ంగీ":190,"ంగ్":842,"ుబ్":104,"ంచం":115,"ంచడ":196,"ంచబ":269,"ురం":244,"ుమా":305,"ంచి":2098,"ంచా":900,"ంచు":217,"ంచే":428,"ునర":115,"ును":308,"ుని":447,"ంకట":194,"ున్":877,"ంకా":441,"ంగం":127,"ంకు":100,"ంక్":110,"ుపు":220,"ంకే":111,"ుప్":149,"ంగల":110,"ంట్":162,"ుతా":153,"ుతు":406,"ంటూ":289,"ంటు":309,"ంటి":346,"ంటా":630,"ంటే":263,"ండవ":146,"ండర":435,"ండల":1528,"ండా":227,"ుత్":335,"ుతో":165,"ండు":332,"ండి":934,"ుదల":235,"ండే":174,"ండ్":373,"ంతం":178,"ునక":539,"ుద్":398,"ుట్":223,"ుటు":199,"ుడి":231,"ుడై":99,"ుడు":1669,"ూడా":424,"ెం ":180,"ుస్":362,"ువు":534,"ువా":295,"ులన":225,"ులక":309,"ుర్":295,"ురి":204,"ురు":297,"ురా":315,"ులో":1709,"ుల్":297,"ులు":2048,"ులల":295,"ీపు":398,"ీని":626,"ీన్":101,"ుంట":481,"ుంచ":300,"ీటి":186,"ీడా":155,"ుంద":899,"ుండ":917,"ుంబ":248,"ీటర":270,"ీసు":107,"ీస్":140,"ుగు":1331,"ుగా":707,"ుఖ్":339,"ీవి":185,"ీవు":115,"ుక్":186,"ుకొ":218,"ుకో":172,"ుకు":711,"ీలు":116,"ీరు":184,"ీర్":177,"ీరా":161,"ీరి":154,"ేక ":252,"ృతి":120,"అంద":240,"ృత్":158,"అంత":284,"అంట":498,"ృద్":113,"ేట ":153,"ృష్":430,"ూబ్":222,"ంవత":1405,"ూర్":913,"ూరు":920,"ూరి":178,"ూమి":99,"ంస్":461,"ూరం":247,"ూడి":137,"ూన్":122,"ంనగ":106,"ంధి":102,"ంద్":655,"ందు":615,"ందీ":121,"ందూ":127,"ంది":3900,"ందా":120,"ందర":242,"ంత్":325,"ంతో":223,"ంతి":125,"ంతా":223,"ంతు":154,"ంతమ":531,"ంతర":294,"ంప్":193,"ంబం":197,"ంధ్":823,"ంబ్":147,"ంబర":346,"ంబా":123,"ంలో":2499,"ెక్":232,"ెగొ":368,"ెట్":682,"ేంద":485,"ెద్":322,"ెను":111,"ెన్":295,"ెడ్":327,"ైన ":1371,"ెంక":174,"ెండ":749,"ెంట":159,"ెంద":1703,"ెంబ":376,"ేయబ":121,"ేర్":242,"ేరు":810,"ేశమ":156,"ేవా":233,"ేవి":124,"ేవు":99,"ేశం":473,"ేశ్":948,"ేషన":164,"ేశా":391,"ేసు":136,"ేసే":128,"ేస్":215,"ేసి":385,"ైదర":183,"ైనా":98,"ెప్":281,"ెర్":117,"ంత ":201,"ెరి":197,"ెరు":117,"ండ ":260,"ెళ్":227,"ెల్":334,"ెలి":127,"ెలు":1031,"ెస్":269,"ంచ ":118,"ేట్":100,"ేదా":763,"ేది":324,"ేత్":212,"ేపల":115,"ేని":152,"చక్":98,"జీ ":270,"జు ":869,"చడం":113,"చడా":97,"జ్ ":143,"చబడ":285,"చయి":169,"చర్":162,"చరి":182,"టం ":155,"చిమ":219,"చిన":1351,"చిత":839,"చాయ":132,"చార":671,"చాల":242,"చాడ":421,"చిం":529,"చుట":109,"చుక":184,"చెర":102,"చేత":111,"చేశ":113,"చేస":711,"చేయ":279,"చేర":138,"చెం":1703,"చెప":157,"టక ":161,"చ్చ":739,"జకవ":368,"జట్":132,"జయం":110,"జయన":97,"జనా":117,"జనవ":101,"జన్":546,"జరి":198,"జరు":168,"జర్":124,"జిల":1917,"జాత":476,"జాబ":132,"టు ":571,"డం ":328,"టీ ":163,"టి ":1355,"ట్ ":976,"జూన":102,"జుల":578,"టే ":302,"జీవ":381,"జ్య":270,"జ్ఞ":163,"కలవ":99,"కలద":130,"కల్":174,"కలు":183,"కలి":375,"కళా":130,"కవర":379,"కవి":191,"కమై":100,"కము":112,"కరణ":169,"కరి":149,"కరీ":108,"కర్":483,"కృష":374,"కృత":281,"కూర":117,"కేం":355,"కెట":368,"కేత":124,"కొల":164,"కొన":532,"కొత":121,"కొం":355,"కాక":237,"కాన":220,"కిం":115,"కాం":219,"కిల":278,"కుం":288,"కాల":753,"కావ":146,"కాశ":150,"కార":1413,"కువ":215,"కుల":554,"కుమ":181,"కూడ":482,"గంల":249,"కున":307,"కుడ":387,"కుట":214,"క్త":493,"క్ట":234,"క్ర":1156,"క్ష":1241,"క్య":503,"క్స":190,"కోట":132,"కోవ":118,"కోస":136,"క్క":1351,"గణి":99,"చు ":184,"గాం":158,"గిం":251,"చి ":1132,"గల్":120,"ఖ్య":557,"గరం":165,"గము":239,"గర్":467,"గోద":304,"జక ":102,"గ్గ":278,"గ్ల":521,"గ్ర":2803,"గొర":375,"గొం":139,"గుం":380,"గీత":230,"గిన":273,"గిర":115,"గిల":409,"గాన":344,"గాల":538,"గార":322,"గుర":382,"గుల":283,"చే ":527,"గుత":171,"చంద":164,"The":111,"ఒకట":320,"ఈ ":1802,"కడ ":129,"ఆ ":194,"ం ":8664,"జ ":124,"ట ":666,"చ ":142,"క ":4039,"ఖ ":417,"గ ":279,"కా ":636,"కి ":2878,"కీ ":159,"గం ":342,"కు ":3007,"కే ":115,"కల ":176,"ో ":9069,"ొ ":154,"్ ":12057,"ూ ":882,"ే ":3256,"ె ":384,"కంగ":154,"ై ":627,"కంప":118,"క్ ":597,"al ":302,"కటి":342,"ప ":310,"కడప":102,"గి ":200,"ర ":2230,"య ":2100,"and":212,"గు ":978,"మ ":914,"an ":162,"భ ":236,"త ":2433,"ణ ":915,"గ్ ":310,"కని":99,"డ ":765,"కన్":185,"న ":8370,"గే ":100,"ధ ":351,"ద ":698,"కత్":143,"థ ":364,"గల ":283,"ి ":25169,"ు ":28831,"ీ ":2596,"ా ":10070,"గర ":98,"ళ ":256,"ara":113,"గా ":3484,"as ":101,"ల ":3843,"ష ":236,"స ":172,"వ ":1710,"ate":158,"ati":274,"శ ":151,"ఎక్":197,"ఒక ":1730,"ఎన్":170,"ఏర్":182,"ఏప్":108,"కం ":201,"పతి":175,"పత్":270,"పని":184,"పదా":153,"బ్ ":398,"పద్":166,"పబడ":136,"పర్":323,"పరి":615,"పయో":173,"is ":130,"ion":314,"మం ":434,"బీ ":112,"పడి":153,"పట్":664,"పటి":143,"నుల":119,"నుమ":118,"నుం":937,"పండ":161,"పంచ":381,"నిష":106,"నిస":137,"నుక":147,"నేద":234,"నేక":128,"నెల":200,"ఫ్ ":137,"న్స":247,"న్న":4348,"న్య":363,"న్మ":479,"నర్":180,"నల్":184,"నవల":101,"నవర":120,"నవి":505,"నవా":119,"నాన":204,"నాల":394,"నిక":2247,"నామ":117,"నాయ":625,"నార":407,"నాగ":146,"నాట":222,"నాడ":330,"నిం":101,"నిమ":603,"నిప":174,"నిన":273,"నివ":153,"నిల":173,"నియ":723,"నిర":621,"నిజ":109,"నిచ":107," In":167,"పై ":288,"ధుల":101," Ma":108,"he ":336,"నటి":194,"నటు":148,"ధాన":583," Na":99,"ధిక":249,"ధాల":110,"ధార":372,"ధిం":266,"ధ్ర":813,"ధ్య":539,"ధ్వ":128,"నము":174,"నప్":98,"నది":1069,"ప్ ":131,"నకు":648,"ధర్":117,"ద్ద":912,"ద్ధ":740,"ద్య":705,"ద్ర":1024,"ద్వ":366," Ra":122,"పు ":1089," Th":135,"పి ":170,"నగర":694,"నగా":212,"దుర":131,"దుల":359," in":168,"ic ":167,"దుక":199,"నంల":98,"దీన":642,"నంద":215,"నంత":121,"దూర":349," of":288,"నంగ":97,"దిన":1883,"దాల":166,"దార":401,"దిగ":97,"దావ":310,"దిక":108,"దాన":215,"దాయ":205,"దిం":248," an":154,"igh":175,"ing":133," co":127,"in ":172,"దేశ":1819,"దేవ":424,"దరా":213,"దరి":108,"దరు":98,"దర్":467,"దలై":227,"దము":121,"ht ":153," ri":133," px":158,"hum":206," th":488,"యక్":252,"మల్":108,"యకు":188,"ళం ":125,"మహా":326,"మహబ":214,"మస్":105,"భ్య":159,"మర్":164,"మము":1248,"మరి":1583,"్య":7206,"్ర":17902,"్భ":147,"్మ":2496,"్ళ":500,"్ల":6272,"్ష":1432,"్స":2574,"్వ":3316,"్శ":403,"భూమ":101,"ొత":206,"ొన":623,"ొద":448,"er ":201,"ొడ":148,"ొట":141,"ొక":526,"ోత":253,"ోద":489,"ోధ":191,"ోన":2053,"ోప":279,"ోజ":1789,"es ":247,"ోట":326,"మన్":141,"ోడ":255,"ొల":420,"ోక":416,"ోగ":536,"ొప":97,"ొమ":110,"ొర":534,"మధ్":230,"ోహ":103,"ోస":258,"ోష":120,"ోవ":239,"ోళ":97,"ోల":348,"ర్ ":2488,"ోర":356,"ోయ":204,"ోమ":362,"ోబ":166,"్ప":1717,"్బ":390,"్థ":1504,"్ద":1267,"్ధ":1039,"్న":5055,"్డ":878,"్ణ":861,"్త":4966,"మద్":165,"్ఞ":163,"్ట":4282,"్చ":1364,"్జ":379,"్క":2197,"్గ":1283,"ౌర":127,"ెక":273,"ెగ":454,"ెట":731,"ేం":571,"ెడ":401,"ెన":560,"ెద":446,"ెబ":128,"ెప":323,"ెర":576,"ెయ":109,"ేక":542,"ెల":1742,"ెళ":238,"ెస":311,"ేజ":151,"ేడ":137,"ేట":442,"ేద":1312,"ేత":579,"ేమ":144,"ేన":288,"ేప":266,"ైక":118,"ేవ":789,"ేశ":2277,"ేయ":473,"ేర":1324,"రు ":4219,"ేల":302,"ైట":119,"ేస":935,"ేష":349,"ైద":293,"ైప":117,"ైన":1789,"ైల":172,"ent":124,"ైర":115,"రీ ":399,"లం ":787,"ొం":865,"ుప":753,"ుబ":195,"ుమ":706,"ుర":1675,"ుడ":2134,"ుత":1344,"ుణ":146,"ుద":870,"ున":2892,"ూచ":102,"ూట":134,"ూడ":838,"ుల":6256,"ుళ":166,"ూక":114,"ువ":1504,"భుత":182,"ుష":187,"ుస":535,"ూబ":239,"ూప":393,"ూర":2460,"ూమ":147,"ూత":151,"యంగ":158,"భివ":99,"ూన":263,"యంల":180,"ూల":418,"యంత":161,"భావ":218,"ృత":512,"ృద":127,"భాష":346,"ృష":487,"భాగ":404,"ెం":3543,"భార":1164,"ght":162,"రా ":340,"రి ":1816,"బ్ద":180,"బ్య":153,"బ్ర":421,"బ్బ":268,"బ్ల":214,"మంల":137,"మంత":183,"మంద":199,"మండ":1520,"మంచ":160,"బూబ":215,"యి ":1015,"యా ":446,"యు ":1545,"బహు":111,"రత ":484,"రణ ":257,"బాల":182,"బాద":326,"చం":331,"చక":136,"గూ":124,"గు":2649,"గీ":290,"గి":1482,"గా":5233,"గ్":4119,"గో":680,"గొ":668,"గే":237,"చూ":162,"చే":2135,"చె":2255,"చి":4513,"చా":1586,"చు":758,"జం":238,"చీ":143,"జక":593,"చ్":872,"చన":284,"చడ":226,"చల":202,"చబ":291,"చర":425,"చయ":212,"జీ":767,"జు":1617,"జూ":225,"జా":1179,"జి":2298,"బర్":394,"జె":108,"జే":138,"జ్":689,"జట":138,"జన":1034,"జల":247,"జర":556,"జయ":406,"జమ":127,"ఞా":122,"టణ":417,"టన":141,"టమ":207,"టర":627,"టల":211,"టా":1086,"టం":232,"టక":349,"బరు":119,"ఏర":193,"ఒక":2332,"కప":131,"కన":498,"కథ":162,"కళ":218,"కల":1454,"కర":1217,"కమ":420,"కత":246,"కడ":370,"ఖం":105,"కట":682,"కం":721,"గన":109,"గమ":341,"ఖ్":606,"గల":557,"గర":1021,"గవ":148,"గడ":113,"ఖా":134,"గణ":180,"గత":148,"కె":553,"కే":816,"కొ":1436,"కో":857,"క్":5977,"కవ":683,"మ్ ":245,"కా":4315,"కి":3602,"గం":786,"కీ":401,"కు":5487,"కూ":725,"కృ":665,"ఇవ":231,"ఉం":885,"ఇస":145,"ఈయ":144,"ఉత":340,"ఉద":238,"ఉన":1071,"ఉప":428,"ఊర":103,"యన ":254,"మే ":227,"బడు":128,"బడి":519,"బడ్":106,"మె ":112,"ఎం":154,"ఎక":206,"ఫిబ":101,"ఎన":226,"ఎల":121,"ఏప":110,"ంట":2314,"ండ":4890,"ంచ":4715,"ంజ":258,"ంఖ":105,"ంగ":3503,"ంఘ":151,"ంక":1284,"ము ":3877,"ంస":547,"ంహ":127,"ంశ":194,"ంవ":1428,"ంల":2578,"ంభ":203,"మీ ":169,"రం ":1690,"ంబ":1040,"ంప":925,"ంన":133,"ంధ":1232,"ంద":6277,"ంత":2617,"అం":1210,"మి ":320,"అక":311,"మా ":484,"అత":486,"ఆం":1221,"అడ":112,"ఆగ":134,"అవ":290,"అస":267,"ఇం":883,"అప":148,"అద":293,"అధ":264,"అన":2095,"అల":293,"ఆక":150,"అభ":173,"అమ":349,"అయ":276,"అర":448,"ఆస":172,"ఇద":1021,"ఇత":411,"ఆద":158,"ఆధ":194,"ఇక":159,"ఆల":169,"ఆర":366,"ఆమ":98,"ఆయ":142,"ed ":203,"హా":889,"హీ":131,"హి":1070,"హై":216,"హు":207,"హ్":323,"సి":2638,"సా":1884,"సహ":150,"సే":373,"హన":103,"సె":515,"హద":248,"సూ":343,"సు":1296,"సీ":269,"హర":312,"హమ":119,"స్":7213,"సో":169,"హబ":217,"సై":168,"ీన":965,"ీప":582,"ీమ":216,"ీయ":835,"ుం":2995,"ీడ":294,"ీత":443,"ీద":226,"ప్ట":148,"ుచ":224,"ీస":356,"ుజ":115,"ుట":570,"ీర":995,"ీల":543,"ుఖ":811,"ీవ":537,"ుక":1639,"ుగ":2452,"ిప":1339,"ిధ":481,"ప్ర":6213,"ిన":7334,"ిభ":310,"ిమ":1311,"ిబ":193,"ీం":175,"ప్ప":767,"ిడ":512,"ిట":428,"ిద":1195,"ిణ":298,"ప్త":145,"ిత":2784,"ిహ":142,"ిస":1526,"ిష":595,"ీట":504,"ిల":4318,"ిర":1588,"ియ":3643,"ిశ":675,"ివ":1160,"ీక":548,"ిళ":268,"ాప":991,"ాబ":791,"ాభ":232,"ాద":1312,"ాధ":720,"ాన":5606,"ాణ":587,"ాత":1866,"ాథ":104,"ాట":1172,"ిం":5711,"ాడ":1873,"ిజ":737,"ాష":1417,"ిగ":1743,"ాశ":424,"ిచ":498,"ాహ":545,"ాస":1648,"ాళ":307,"ాల":7697,"ావ":1502,"ిక":5536,"ాయ":2657,"ామ":3420,"ార":9423,"ాం":2393,"ాజ":1333,"ాచ":406,"ాగ":1102,"ాఖ":177,"ాక":1433,"ళ్":660,"ళా":226,"ళు":204,"వం":1022,"ళి":199,"లూ":372,"లె":853,"లే":1247,"లై":507,"లొ":182,"లో":10420,"ల్":5828,"లల":1531,"లవ":368,"పొం":272,"లస":257,"లి":3093,"లా":4226,"లు":7313,"లీ":875,"లన":1389,"లద":177,"లత":291,"లర":153,"లమ":664,"లయ":457,"లభ":102,"లప":338,"లగ":164,"లక":1319,"ళం":168,"రె":1156,"రే":571,"లం":1397,"రీ":1644,"రు":6712,"రూ":370,"ర్":10720,"రై":236,"రొ":149,"రో":1788,"రవ":803,"రర":106,"రల":270,"రి":8236,"రా":9645,"రహ":470,"రస":1070,"పోయ":117,"సల":125,"సర":1986,"సవ":160,"సన":295,"షే":125,"సమ":856,"ష్":2463,"సభ":337,"షు":200,"షి":679,"హం":127,"సత":144,"షా":261,"షల":199,"షన":300,"శే":116,"శో":101,"శ్":2342,"శా":1493,"శి":616,"శీ":111,"సం":3277,"శు":134,"షణ":177,"శర":141,"శమ":213,"వ్":1485,"శప":105,"వై":429,"వే":1068,"వె":798,"పెర":121,"వృ":262,"శత":192,"పెద":371,"వు":1823,"వి":5005,"వీ":727,"వా":3654,"పెట":114,"వహ":409,"వస":762,"వవ":165,"వల":675,"శక":362,"వమ":143,"వర":2599,"వయ":126,"వబ":126,"పేర":775,"వద":165,"వన":469,"శం":714,"వడ":233,"వత":1750,"వచ":500,"పేట":181,"వక":188,"వగ":122,"పర":1387,"పయ":194,"పబ":173,"పుడ":183,"పుట":123,"పశ":239,"పవ":130,"పల":696,"పీ":121,"బం":418,"పు":3004,"పున":212,"పా":2599,"పి":1196,"పుల":197,"పై":426,"పుర":635,"పె":871,"పే":1148,"పూ":679,"పో":542,"పొ":493,"ప్":7624,"ఫి":208,"బంధ":198,"బడ":901,"ఫా":98,"పుక":147,"నప":317,"నన":175,"ధ్":1531,"నమ":463,"నల":465,"నర":396,"నవ":1165,"నస":228,"నా":3446,"ని":11511,"నూ":449,"పం":874,"నీ":780,"ను":3710,"నె":404,"నే":1616,"నై":122,"నో":217,"న్":7783,"పక":194,"పట":848,"పడ":347,"పత":560,"పద":604,"పూర":466,"పన":354,"మస":145,"మహ":733,"యక":627,"మవ":108,"మల":476,"మర":2137,"మమ":1293,"భ్":184,"మయ":233,"మ్":1298,"యమ":802,"మో":201,"యబ":173,"మొ":695,"యప":181,"మై":1143,"యన":1189,"మే":512,"మె":642,"యత":237,"మూ":561,"యణ":150,"ము":8249,"యడ":124,"రం":3512,"మీ":939,"మి":2427,"మా":3556,"రజ":343,"యా":3425,"యస":173,"రచ":538,"రక":1588,"యవ":582,"రగ":211,"యర":125,"యల":345,"రభ":250,"రబ":182,"యో":1070,"రయ":209,"రమ":3193,"య్":536,"యే":292,"రన":144,"యొ":325,"రప":785,"రత":1738,"రధ":361,"రద":1093,"యి":2020,"రణ":774,"యూ":387,"యు":2564,"భజ":115,"బహ":139,"ఫ్":329,"బల":111,"బర":571,"బె":226,"పిల":285,"బో":143,"బి":436,"బా":1142,"బూ":263,"బు":283,"మం":2740,"బీ":198,"భవ":117,"పిం":315,"పాడ":180,"పాట":306,"బ్":1702,"పాక":97,"మక":263,"పార":352,"మధ":257,"మద":259,"పాల":798,"మన":607,"భా":2350,"యం":1483,"భి":337,"మణ":175,"పాత":213,"భూ":233,"భు":226,"పాద":161,"మత":260,"తం":1159,"డె":175,"డై":152,"డే":590,"డూ":98,"డు":4437,"డ్":1447,"తగ":160,"తక":320,"ణు":288,"ణి":460,"తడ":168,"థం":125,"ణా":811,"పశ్":219,"ణమ":366,"డక":111,"డం":469,"టీ":380,"టి":2635,"టూ":417,"టు":1542,"టె":412,"టే":482,"టో":219,"ట్":4408,"డల":1645,"డవ":326,"డి":3130,"ణం":589,"డీ":118,"డా":1416,"డన":117,"డప":181,"యం ":918,"డర":467,"డమ":180,"దూ":569,"ది":7459,"దా":2559,"దు":1821,"నం":1132,"దీ":937,"దశ":127,"దల":498,"దవ":129,"దమ":252,"దర":1036,"దన":184,"నద":1194,"నత":159,"ధు":301,"నడ":153,"ధి":1062,"ధా":1185,"నట":452,"నగ":969,"నక":819,"ధర":205,"ధమ":185,"ద్":4147,"దో":106,"దై":97,"పల్":507,"ధన":195,"దే":2544,"తి":3113,"తా":2188,"తు":2234,"తీ":1022,"దం":398,"తూ":504,"తవ":124,"ణ్":199,"తమ":1218,"తయ":143,"తర":1102,"తల":412,"తత":106,"తద":425,"తన":727,"తప":172,"ధం":257,"థి":222,"దట":256,"థా":607,"దగ":155,"త్":7311,"దక":372,"థల":206,"తె":1297,"తే":283,"తొ":248,"తో":1099,"తన ":222,"re ":103,"తమ ":119,"తర ":195,"rna":99,"డర్":430,"rig":147,"డలం":220,"డలా":772,"డలమ":349,"తి ":1458,"తా ":207,"తు ":133,"దం ":185,"తీ ":111,"డాక":183,"తూ ":177,"ణంగ":137,"డిస":137,"డియ":171,"డిప":137,"డిన":531,"డిగ":126,"డాన":284,"డిం":217,"డున":104,"డుత":225,"డుద":220,"తే ":137,"డుగ":154,"టుం":409,"టుడ":112,"టుక":107,"టుల":128,"టాన":112,"టిం":317,"టార":521,"టాయ":145,"టిక":258,"టిన":213,"టిల":128,"టెం":107,"టూర":324,"టెస":126,"ట్ట":1520,"ట్న":97,"ట్ల":382,"ట్ర":1237,"టోబ":103,"థం ":106,"ణి ":141,"ణా ":186,"డే ":362,"టణం":170,"టణమ":136,"ఞాన":102,"డ్ ":409,"టర్":527,"తం ":582,"px ":156,"డప ":101,"డవ ":125,"డల ":98,"డు ":3436,"ణం ":367,"డి ":1391,"డా ":647,"దక్":263,"ng ":118,"దగ్":111,"nal":119,"ని ":5553,"nd ":184,"నా ":456,"nat":122,"తొల":175,"త్స":1562,"త్వ":496,"త్ప":99,"త్య":731,"త్ర":2659,"త్మ":140,"త్త":1214,"న్ ":2086,"nte":106,"నీ ":393,"ను ":1891,"నూ ":221,"థాన":257,"థాప":165,"దటి":194,"నే ":783,"తలు":105,"తర్":233,"తరా":100,"తరు":178,"తయా":124,"of ":287,"తని":130,"తను":162,"తదే":370,"ద్ ":290,"ణ్య":125,"తము":568,"తరం":98,"తమి":191,"తమై":176,"తూర":292,"తీస":125,"తీర":136,"తీయ":499,"తుం":576,"తువ":193,"తుల":549,"తున":363,"తుడ":101,"తుత":113,"తెల":1095,"or ":106,"ధి ":298,"on ":261,"తిం":103,"తాన":260,"తాబ":131,"తాయ":183,"తార":646,"తాల":289,"తిక":313,"తిప":135,"తిన":244,"తిల":128,"తిర":258,"ona":120,"ణము":230,"ణాట":124,"ణాన":134,"ణాల":199,"దే ":212,"తడు":135,"నం ":570,"దీ ":144,"దు ":735,"దూ ":152,"దా ":781,"ది ":4694,"mb ":192,"డ్ర":225,"డ్డ":567,"డైన":112,"త్ ":276,"తో ":862,"తంత":146,"తంల":160,"తంగ":162,"ధం ":174,"హైద":184,"హ్మ":154,"హరి":153,"హిం":546,"హాస":146,"హిత":221,"హార":226,"ాం ":154,"ాల ":1055,"ార ":189,"ాయ ":112,"ామ ":130,"ాష ":111,"ిక ":816,"ాడ ":161,"ాన ":423,"ాణ ":106,"ాత ":349,"షిణ":213,"షియ":100,"సత్":128,"ష్ణ":466,"ష్ట":1410,"ష్మ":151,"ష్య":127,"శ్వ":481,"శ్ర":732,"శ్చ":253,"సూర":115,"సుమ":114,"సుప":118,"సుల":177,"సుక":241,"సిన":1016,"సెప":103,"హదా":155,"సెం":276,"సేవ":130,"స్వ":634,"స్స":166,"స్ట":634,"స్త":2396,"స్క":441,"స్య":121,"స్ల":199,"స్థ":1195,"స్ప":97,"హబూ":215,"సర్":165,"సరా":545,"సరి":163,"సరం":158,"సము":220,"సమా":187,"సరమ":805,"సిం":294,"సాగ":118,"సార":233,"సాయ":137,"సామ":225,"సాల":116,"సాధ":336,"సిద":483,"సాహ":220,"సాం":202,"ాషల":143,"ిగా":515,"ిగి":822,"ాశి":102,"ాష్":1035,"ాసన":172,"ాస్":763,"ాసి":162,"ాసు":140,"ాహ్":114,"ాహి":212,"ిచే":186,"ాల్":490,"ాలె":525,"ాలో":374,"ాలి":340,"ాలా":319,"ాలు":1799,"ికం":141,"ాళ్":125,"ికల":174,"ావర":347,"ిక్":462,"ికె":356,"ికి":2410,"ికా":407,"ాశం":101,"ికీ":129,"ావు":393,"ావి":174,"ావా":119,"ాయక":252,"ామా":422,"ామి":301,"ువ ":165,"ాయణ":146,"ాము":207,"ామీ":105,"ారం":644,"ాయన":97,"ామ్":178,"ాయల":128,"ాయా":112,"ాయి":1156,"ారణ":247,"ాయు":174,"ారత":1096,"ారమ":516,"ాలం":253,"ారు":2695,"ారా":731,"ారి":987,"ారె":142,"ార్":1699,"ాలక":349,"ాలన":602,"ాలల":704,"ాలయ":271,"ాపా":110,"ాపి":155,"ాపు":243,"ుల ":664,"ాప్":131,"ాబ్":203,"ామం":407,"ాబా":337,"ాబి":105,"ాభా":138,"ామమ":1222,"ాయం":147,"ాధి":274,"ాధా":195,"ాద్":200,"ాది":262,"ాదా":163,"ాదు":323,"ానం":337,"ాన్":1330,"ాని":2256,"ానా":239,"ాను":249,"ానీ":138,"ానమ":140,"ానవ":144,"ాణి":118,"ాణా":137,"ాతం":140,"ాత్":473,"ాతి":287,"ాతీ":294,"ాటి":344,"ాటక":227,"ింద":1127,"ింప":204,"ింహ":109,"ున ":292,"ాణం":105,"ాడు":1280,"ాటు":186,"ాట్":155,"ించ":3485,"ింగ":388,"ాచా":147,"ాజక":97,"ాజ్":274,"ాజు":235,"ాజీ":183,"ిస్":1152,"ిసె":112,"ిసి":108,"ిశ్":326,"ిష్":330,"ివర":212,"ీకా":119,"ివి":228,"ివా":253,"ివృ":121,"ిలు":223,"ిలి":585,"ిల్":2371,"ిలో":795,"ీకర":173,"ిరి":250,"ిరా":106,"ిర్":662,"ిరు":214,"ిమా":662,"ిమి":102,"ియన":542,"ియా":466,"ియు":1543,"ియో":582,"ిభజ":97,"ిబ్":136,"ిభా":151,"ిప్":171,"ిపో":97,"ిపా":281,"ిపి":237,"ిన్":485,"ినే":113,"ినా":300,"ినీ":170,"ిని":1388,"ినవ":476,"ిధ్":101,"ినద":304,"ిద్":918,"ిది":107,"ితో":97,"ిత్":1390,"ితు":127,"ితా":191,"ితి":106,"ితమ":107,"ితం":125,"ీంన":105,"ిడి":106,"ిడు":224,"ిట్":149,"ిజ్":98,"ిజయ":327,"ిమ ":204,"ిన ":3712,"ిధ ":97,"us ":116,"umb":214,"ిత ":340,"ిణ ":131,"ాగా":294,"ాగం":166,"ాకు":548,"ాకా":261,"ాక్":268,"ాగమ":122,"ీయ ":614,"ుఖ ":363,"ాంప":98,"ాంత":945,"ాంచ":129,"ాంక":177,"ాంగ":129,"ాండ":272,"ాంట":124,"tio":240,"thu":204,"tic":109,"ీద ":119,"ter":183,"ీత ":149,"the":294,"రెగ":378," టె":198,"రెడ":266,"రెం":338," డి":256," తర":402," తల":122," తయ":126," తమ":296," తన":208,"రుల":346,"రువ":337," తీ":259,"రూప":260," తు":102," తూ":204," తా":347," తి":295," దర":275,"రిస":226,"రీక":244,"లంక":104,"రిశ":167,"లంగ":98,"రియ":2151,"రిల":238," దా":338," ది":263," నం":121," దీ":636," దు":100," దూ":346,"రీడ":197," త్":129," దక":226,"రుగ":346," తే":107," తె":1239,"రుక":143,"లంల":221," తో":176,"రీల":108," తొ":234,"రీర":165,"రుప":202,"రుద":98," దగ":103,"రుత":145,"రుడ":402," ని":1694," నా":986," నే":224," నె":256," ను":1002," నీ":159," పం":410," ధ్":128," నవ":245," నల":140," నర":118," నట":342," నద":241," ద్":355," దే":830," నగ":454," ధర":125," పు":828," బం":140," పి":453," పా":1265,"ర్జ":254,"ర్చ":276," పూ":375,"ర్గ":822," పై":166," పే":871,"ర్క":210," పె":701," పర":985," పల":194," పశ":232,"ర్ల":676," పట":506,"ర్భ":97,"ర్మ":671,"ర్య":611,"ర్ర":164," పత":150,"ర్వ":752,"ర్శ":394," పద":483,"ర్ష":165," పన":214,"ర్స":153,"ర్డ":192,"ర్త":647,"ర్ణ":379," న్":102,"ర్ట":234,"ర్ప":512,"ర్ద":134,"ర్థ":298,"ర్న":217,"ర్ధ":247," బె":166," మం":1840," బు":104," బా":514," బి":196," బహ":138," ఫి":158,"రోజ":1261," పో":317," పొ":389," ప్":5580," మె":212," మే":243," మొ":630," మీ":304," రం":324," ము":950," మా":1189," మి":579," మూ":307," మహ":682," మర":1780," మల":118," మద":144," మధ":241," మన":339," భా":1788," భూ":198," మత":120," బ్":391," రక":185," రచ":380," యా":130," యు":226," యొ":307," రహ":158," రా":2479," రి":152," రూ":154," రే":110," రె":413," రో":1305," లక":229," లా":183," లి":124," లీ":414,"లక్":278," లే":936," లో":2471," వం":288,"లకు":713," వచ":271," వర":687," వల":223," వన":123," శత":174," వృ":111," వు":163," వీ":487," వి":2741," వా":1113," వహ":154," వస":211," శర":115," వ్":920," వే":517," వై":295," వె":707," శి":276," శా":559," సం":2964," శ్":619," సభ":227," సమ":725," సర":319," సత":113," సో":126," సై":131," స్":1289," సహ":136," సా":1231," సి":984," సూ":295," సీ":113," సు":551," సె":214," సే":170," హా":151," హి":411," హై":215,"శం ":350,"లదు":145,"లనా":127,"లను":769," అద":293," అన":2095," అధ":264," అప":148," అమ":347," అభ":171," అర":447," అయ":275," అల":292," ఆక":148," అవ":290," ఆగ":133," అస":267," ఇం":883," అక":309," అత":484," ఆం":1219," అడ":112,"లతో":261," అం":1209,"లము":453," ఉప":425," ఉత":340," ఉద":238," ఉన":1059," ఈయ":143," ఉం":878," ఇస":144," ఇవ":231," ఇత":411," ఇద":1021," ఆస":171," ఆల":168," ఇక":159," ఆమ":98," ఆయ":141," ఆర":349," ఆద":158," ఆధ":194," ఎన":222,"లపా":106," ఎక":205," ఎం":151," ఊర":103,"లయం":174,"లసి":100,"వి ":1063," ఒక":2330," ఎల":121," ఏప":110,"వు ":683," ఏర":193," కడ":121," కన":324," కథ":133," కళ":192," కల":950," కర":453," కమ":146," కవ":208," కూ":537," కు":935," కృ":305,"లలో":1368," కా":1477," కీ":118," కి":608," కొ":916," కో":463," కె":104," కే":473," క్":1409," కం":234,"లవు":100,"లున":100,"లుప":124,"లుగ":1377," చం":138,"లీప":400," చర":227,"లిస":141,"లియ":176," గల":204," గణ":98,"లూర":257," గొ":123," గ్":2511," గో":479," గు":835,"లుస":156,"లువ":181," గా":766,"లాం":234," జీ":342," జూ":200," జి":1930," జా":544,"లిప":160,"లిన":457,"లాల":533," చి":1236," చా":356," జం":150," చూ":133,"లిచ":103,"లిక":169," చే":1234," చె":2126,"లిగ":302,"లిం":322," జర":424,"లాన":792," జట":132,"లాక":349," జన":782,"మాన":550,"మిం":658,"మాణ":180,"మాత":176,"మాజ":244,"మిత":118,"మిన":106,"మాల":362,"మార":804,"మిగ":392,"మిక":194,"మూర":145,"మూల":144,"ముస":98,"మూడ":128,"మీద":160,"రంభ":126,"రంగ":724,"మిళ":192,"మీట":267,"ముం":165,"మున":805,"ముద":263,"ముల":1642,"ముగ":175,"ముఖ":794,"రంల":639,"లన ":189,"మెర":163,"మొద":382,"యబడ":162,"మైన":961,"యన్":580,"యమై":151,"యము":343,"మ్య":156,"మ్మ":696,"రకు":296,"రకా":757,"యలు":117,"లి ":884,"లా ":1657,"రక్":177,"యవా":98,"యవస":290,"యాం":168,"రజల":129,"రజా":121,"లు ":5068,"రచయ":170,"లీ ":280,"రచి":149,"యాత":175,"యిం":162,"యాన":300,"యాప":194,"యిత":265,"యిన":317,"యాల":977,"యాయ":147,"యార":226,"యాస":145,"లె ":114,"యున":120,"యుద":115,"యుడ":244,"రణం":198,"లై ":139,"యుల":185,"రతద":370,"రత్":271,"లొ ":129,"రతి":294,"రతీ":189,"లో ":7792,"రదా":133,"రదే":797,"యేక":110,"ల్ ":1097,"రధా":328,"రపం":261,"యొక":313,"యోగ":322,"రప్":212,"యోజ":484,"రబ్":122,"రభు":164," న ":334,"య్య":475,"రమా":132,"రము":2455,"రమై":178," వ ":948,"రలో":108,"రవే":152,"రవర":163,"రసి":474,"రసా":125,"రహద":155,"రహ్":108,"ళు ":149,"రస్":344,"వం ":351,"రాం":782,"రాష":1004,"రిగ":429,"రాశ":107,"రిచ":101,"రాస":176,"రాల":911,"రావ":479,"రిక":1063,"రాయ":398,"రామ":2348,"రార":157,"రిమ":109,"రిన":146,"రిప":316,"రిత":332,"రీం":118,"రాజ":925,"రాచ":155,"రాక":131,"రాబ":238,"రాన":501,"రాత":218,"రాణ":222,"రిం":589,"సి ":308," ఈ ":1799,"వవి":98,"శకు":169,"వల్":164,"వలన":113,"వర్":961,"వరి":638,"వరా":124,"వరం":218,"వరక":228,"వబడ":120,"వని":123,"�":414," ఆ ":193,"వేశ":97,"వెన":102,"వేత":144,"వేద":126,"వేర":137,"వెల":135,"వెళ":168,"స్ ":1102,"శతా":121,"వెం":227,"వుల":548,"వృద":108,"వృత":103,"విభ":250,"విశ":387,"విష":171,"విస":184,"విల":124,"వివ":223,"వీట":109,"వీర":311,"వుడ":102,"సే ":113,"వున":206,"వాట":147,"వాడ":389,"విం":186,"వాత":351,"వాద":217,"వాన":154,"వాయ":104,"వార":1074,"వామ":220,"విక":160,"వాల":287,"వాస":216,"విజ":433,"విడ":296,"విత":252,"విన":171,"విధ":335,"విద":344,"వస్":529,"వహి":267,"సు ":228,"శము":175,"వ్య":1048,"వ్ర":129,"వ్వ":183,"షన్":215,"సంక":132,"శివ":165,"సంగ":237,"సంఘ":104,"సంవ":1416,"సంస":438,"సంబ":140,"సంప":159,"శాఖ":118,"శాన":154,"శాల":347,"శార":99,"శాస":530,"ల్ప":168,"ల్ల":3746,"ల్గ":136,"లేద":781,"వ్ ":114,"లేక":128,"లెం":508,"లోమ":231,"లోన":1869,"లోక":284,"లైన":312,"వంత":146,"వంట":147,"వంశ":116,"సం ":188,"వగా":101,"శ్ ":738,"ళ్ళ":500,"ళ్ల":102,"శంల":250,"వత్":1433,"సభ ":189,"ష్ ":119,"షి ":144,"వచ్":413},"n_words":[958347,1064684,731588],"name":"te"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/th b/nlp_resource_data/langdetect/profiles/th

new file mode 100755 (executable)

index 0000000..2852f43
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/th
@@ -0,0 +1 @@
+{"freq":{"D":2767,"E":2382,"F":1744,"G":2229,"A":4566,"B":2837,"C":4944,"L":2537,"M":3661,"N":2375,"O":1879,"H":2029,"I":2684,"J":1202,"K":1239,"U":982,"T":3940,"W":1428,"V":1153,"P":3623,"S":5025,"R":2458,"f":4548,"g":7700,"d":10407,"e":36793,"b":4586,"c":13011,"a":35754,"n":27098,"o":27360,"l":17507,"m":9775,"k":3233,"h":11517,"i":30062,"w":2927,"v":3191,"u":11836,"t":23508,"s":19258,"r":25409,"p":8180,"z":1147,"y":6708,"x":1329,"ี่ก":1219,"ี่ค":798,"ี่จ":1624,"ี่แ":1639,"ี้ย":1300,"ี่เ":7102,"ี่ไ":1804,"ี่ใ":2548,"ี่ท":1342,"ี่น":1525,"ี่ต":2048,"ี่ม":5943,"ี่ย":5902,"ี่ป":3449,"ี่พ":1219,"ี้จ":758,"ี่ส":5079,"ี่ร":1809,"ี่ห":1170,"ี่อ":2340,"ู่ ":1041,"ี้เ":1017," l":799," m":1059,"ึกษ":2877," o":2241," h":734," i":844," d":1129," e":750," f":963," a":1997," b":936," c":1950," t":2090," p":1738," s":2098," J":1111," K":1075," H":1671," I":1700," N":1646," O":1116," L":1847," M":2970," B":2345," C":3940," A":3318," F":1429," G":1940," D":1996," E":1607," S":3963," R":1952," P":2925," W":1239," V":824," T":2982,"ีปร":905,"ีนา":987,"ีวิ":1802,"ิ่ง":2648,"ีลั":1263,"ีรถ":1177,"ิโล":1164,"ีย์":1013,"ียบ":1082,"ียน":6852,"ียร":1356,"ียม":1021,"ียว":3305,"ียก":3983,"ิเม":917,"ียง":6904,"ิเว":1991,"ีฬา":1237,"ิ่น":1155,"ิ่ม":2587,"ิ้น":1155,"Co":1009,"Ch":760,"ือ ":11318,"ิทย":5393,"ิทธ":1425,"ินส":994,"ินแ":951,"ินเ":2159,"ินท":2179,"ิธี":1094,"ุด ":981,"Ma":1030,"ำแห":1099,"ิยม":1454,"ิมพ":1364,"ำเภ":2344,"ำเน":1700,"ีขน":839,"ีกา":2681,"ิภา":937,"ำให":1486,"ิร์":1529,"ิริ":860," ศ ":10355,"ีคว":2479,"ิย์":865,"ิยา":1638,"ิลป":1713,"ิวเ":1224,"Th":1304,"ิศา":1224,"ิสต":1802,"ิษั":1436,"ีชื":1806,"ิหา":1129," พ ":6930,"a ":4409,"ีที":1523,"i ":1629,"ge":1151,"he":3360,"ha":1753,"g ":1914,"ea":1472,"ec":1050,"ำตั":743,"ed":1616,"de":1807,"di":1292," ค ":3588,"h ":1481,"ำดั":807,"el":1781,"en":3747,"em":1263,"et":1470,"es":2908,"er":5983,"ca":1761,"e ":9681,"be":757,"da":1163,"f ":1782,"ct":1070,"co":1453,"ci":974,"ch":1872,"ce":1660,"ำนั":1183,"ำนา":1182,"c ":1262,"ำนว":1889,"ำบล":1639,"d ":3285,"at":4029,"as":1743,"ar":4079,"al":3985,"ai":1077,"ap":790,"am":1525,"an":5743,"ac":1436,"ad":868,"ag":1036,"ae":972,"nt":2832,"ns":1221,"no":1065,"ิกส":1152,"of":1717,"oc":793,"od":802,"om":1831,"on":5797,"ol":1972,"ot":991,"os":1228,"ou":1259,"op":973,"or":3612,"r ":3528,"pe":1355,"pa":819,"po":838,"ph":815,"lo":1494,"ll":1602,"o ":1832,"ma":1651,"me":1665,"mi":1301,"p ":991,"na":2372,"nc":1198,"nd":2063,"ne":2324,"ng":2747,"ni":2058,"าใน":1710,"m ":1949,"li":2285,"le":2488,"la":2669,"n ":6518,"ht":776,"hi":1241,"าแล":1356,"ho":994,"id":1548,"ic":3537,"ia":2041,"ig":855,"ie":1170,"k ":958,"ir":922,"is":2584,"it":2279,"il":1527,"าเร":879,"in":5078,"าเล":858,"io":2597,"าเป":2477,"ิกา":4876,"l ":3592,"ิงเ":1081,"ำว่":911,"y ":3290,"ve":1463,"ul":919,"ur":1530,"us":1967,"ut":915,"um":918,"un":1108,"ty":767,"tu":815,"tt":965,"ิงห":822,"to":1767,"ำหร":2147,"tr":1441,"te":3369,"ำหน":2147,"ti":3875,"th":2569,"ta":2328,"ss":1290,"st":2642,"so":847,"se":1374,"si":1814,"่":261149,"้":168823,"๊":2314,"๋":1031,"์":89548,"เ":290692,"rs":890,"แ":101902,"rt":1263,"โ":60108,"ใ":72132,"ไ":49049,"ๆ":5416,"ry":1040,"็":73338,"ro":2585,"ู":59743,"ุ":70364,"ื":81927,"ri":3400,"ึ":35498,"ี":202861,"ิ":162696,"re":2827,"ำ":45160,"า":458974,"rd":813,"ั":226972,"ะ":131072,"ฮ":6759,"ra":3541,"ฯ":856,"t ":3621,"ฬ":1901,"ิชา":1303,"อ":306842,"ส":153652,"ห":123525,"ศ":53898,"ษ":33643,"ว":181464,"ฤ":7018,"ล":179449,"ย":192630,"ร":390285,"ภ":30986,"ม":229909,"ฝ":4270,"ผ":20687,"ฟ":16906,"พ":93163,"น":423152,"ธ":26604,"ป":143207,"บ":116540,"ต":148116,"ด":152617,"ท":165794,"ถ":31675,"ฑ":2084,"ฐ":9273,"ณ":25387,"ฒ":2670,"ญ":19027,"s ":7041,"ฎ":2682,"ฏ":3414,"จ":90844,"ฉ":5998,"ช":80847,"ซ":33709,"ค":113272,"ฆ":1276,"ง":265772,"ก":280773,"ข":76134,"ิดา":785,"ิดห":1109,"ิติ":1042,"ิตา":823,"ิตร":1253,"ิดเ":1452,"ิตย":978,"ิดต":796,"ิดจ":807,"ิดข":1153,"ิดก":963,"าพั":805,"ามก":1469,"ามค":978,"ำกั":1274,"ามป":1047,"ายค":1118,"ามร":1484,"ายก":2414,"ายข":1115,"ามห":1771,"ายช":1084,"ามส":2270,"ายท":1440,"ายถ":2265,"ามี":784,"ายต":803,"ามา":3948,"ายน":2971,"ารจ":1307,"ายว":810,"ามแ":1031,"ารค":1595,"ารก":1847,"ามเ":3279,"ารข":1167,"ายอ":907,"ายส":1187,"ายห":1214,"าบั":892,"าปร":879," ผ":2939," ฟ":1382," พ":16963," น":7737," ธ":1538," ป":9774," บ":5180," ต":11154," ด":4226," ท":12249," ถ":2578," ณ":758," จ":10643," ช":5003," ซ":8285," ค":16867," ก":16644," ข":5596,"าพร":989,"าพย":2192,"ิ์ ":838,"าที":3260,"านก":1639,"านข":1418,"านค":1839,"าทส":973,"านน":858,"านท":1970,"าธิ":1501,"านต":791,"าธา":1105,"านใ":891,"านเ":2380,"านแ":1122,"านห":886,"านอ":1049,"านส":931,"านี":3469,"านา":1107,"านั":986,"าณา":1725,"าดเ":977,"าตุ":736,"าติ":3417,"าตั":740,"าตร":1001,"าดใ":893,"ี้ ":1390,"ี่ ":11834,"าหล":992,"าหา":1172,"าอย":985,"าอา":792,"าอั":1363," ล":3504," ว":6837,"ั่ง":3130," ม":16388," ภ":4698," ร":8182," ย":2545," อ":16304,"าศา":1885," ฮ":1180," ศ":11904," ห":17474," ส":15924,"าศั":834,"าวเ":1521," ๆ":1601," ไ":6086," ใ":10228," โ":13479," แ":27651," เ":59355,"าวิ":2918,"าวอ":979,"าสา":971,"ั้น":6620,"าสน":1688,"ั้ม":1016,"ั่ว":2312,"าษา":8207,"าสต":5817,"ั่น":1240,"ั้ง":16337,"าร์":6532,"ารแ":3430,"ารใ":1567,"ารเ":5283,"ารี":821,"ำคั":1775,"ารา":3089,"ารอ":1573,"ารส":2796,"ารศ":1222,"ารว":1014,"ายใ":2848,"ารร":1717,"ายแ":1390,"ารย":1381,"ายเ":3128,"ารพ":1130,"ารป":2397,"ารน":1040,"ารบ":1445,"ารท":2651,"ารต":1493,"ารถ":2935,"ายา":887,"ารณ":2161,"ารด":1027,"าลี":979,"าลั":2863,"ากค":746,"ากก":2663,"ากน":1203,"ากท":1139,"ากฏ":908,"ากอ":816,"ากส":915,"ะเจ":2079,"ากล":1523,"ากร":2608,"ากม":872,"ะเก":848,"ับอ":925,"ับส":1520,"ับเ":2350,"ับแ":746,"ันด":1864,"ันต":2569,"ันท":6775,"ันน":1059,"ันธ":3528,"ับก":2418,"ับค":1139,"ันย":781,"ันว":1287,"ันส":772,"ันอ":2900,"ับท":1243,"ับป":1271,"ันใ":1276,"ับร":1129,"ันแ":786,"ันเ":2354,"ันไ":773,"ันก":1116,"ัตร":1718,"ัดเ":1458,"ัตถ":1084,"ัดอ":823,"ัดส":1175,"ัติ":3172,"ัตว":1677,"ัดก":1357,"ัณฑ":1093," ๆ ":1525,"ัฐอ":1242,"ัฒน":2389,"ึง ":1975,"ีย ":3548,"าชน":1229,"าชว":1452,"าชอ":929,"าชิ":1510,"าชา":1285,"าชก":821,"ัวเ":1510,"ัศน":1087,"าจั":1830,"าจา":3547,"างๆ":1330,"ัวอ":898,"างเ":3767,"างไ":829,"ัวล":1090,"างใ":893,"างแ":1340,"างอ":972,"างส":1811,"างห":1367,"างว":1756,"างม":850,"างร":1234,"างป":1308,"างต":1708,"างท":1838,"างด":961,"างช":741,"างจ":1536,"างค":2031,"างข":1895,"างก":3719,"ัลบ":846,"ะได":777,"าคา":932,"าคม":4556,"าขา":1162,"ะเล":1717,"ากเ":1543,"ะเภ":1470,"าขอ":2080,"ีน ":1380,"ะเท":8450,"ากั":1026,"ากา":3774,"ัมพ":821,"ะเป":3470,"ะทร":812,"ะทา":1011,"ะที":1426,"ะทำ":752," เก":2484," เข":1752," เค":1359," เจ":1771," เช":2450," เซ":916," เด":1861," เท":1287," เน":1184," เป":26794," เพ":2641,"ะบบ":2022,"ะนา":1109,"ะปร":1082,"ะบา":1151,"ิต ":1048,"ิด ":1226,"ิง ":1118,"ะดั":1976,"ะดู":960,"ีก ":1051,"ะวั":4431," อย":1707,"ังเ":1838," อำ":1631," อิ":974," อั":2991," อา":2413,"ะสา":867," ออ":1734,"ิม ":803," วั":2877," วิ":1583,"ะอง":1516,"ัญญ":1369,"ะหว":2921,"ัญช":920," สถ":1382," สม":1526," หน":1504," สุ":760," สำ":1162," สา":2037," สิ":1143," สั":1226," ส่":1561," หร":10471," หล":1146," หม":2524,"ักด":1151," รว":890,"ักษ":6831," ระ":1251," รั":1312,"ักร":4702," รา":1609,"ะพุ":1098,"ะพั":826,"ักจ":822,"ักก":1221,"ะมา":2026,"ะมี":2632,"ะยั":766,"ะยะ":834,"ิน ":3149,"ักเ":1448,"ะยา":744," ภา":4274,"ะรา":2815," มี":8842," มั":898," มา":1778," มิ":1110,"ัจจ":2448,"ังส":1789,"ังห":4456," มห":1285,"ังก":5613,"ังค":1611,"ังจ":853,"アアア":748," พร":4751," พฤ":1063," ผู":1667," ปล":779," ปร":4492," ปี":1463," ปั":1211," บร":1264," บา":1052," นิ":1151," นั":2273," นา":1380," ทร":1302," ที":5982," ทำ":956," ทั":838," ทา":1100," ต่":814,"าล ":975," ถน":741," ตา":1753," ตั":4204," ตำ":1120," ตร":740,"าร ":2963,"าว ":2585,"ิก ":1504,"ะจั":762,"ะจำ":1148," ชา":799," ชื":1329," ซึ":6040," จำ":738," จา":1386," จั":3956," จะ":808,"ะชา":1877," คื":4533," คำ":751,"ะกอ":2553," คว":1268," ขอ":3435," ก่":1078," คร":1482," คน":1274,"ะคร":1889," กิ":1156," กั":1618," กา":4296," กล":1472," กร":3165,"ะกู":837,"ะกา":2005,"เคร":3765,"เคย":899,"เข้":3046,"เคล":946,"เกี":2538,"เกา":1880,"เกิ":4857,"เขต":2529,"เกล":1212,"เกม":1559,"เกร":925,"เขี":2819,"เขา":2400,"เก็":770,"ใน ":1513,"เซี":1618,"เจ้":6297,"เฉี":1063,"เชื":1779,"เชิ":917,"เชี":2100,"เช่":2859,"เซล":810,"เงิ":900,"เฉพ":1150,"ู้เ":1092,"ู่ห":1111,"ู้จ":1153,"ู่ท":2435,"ู่บ":1069,"ู่ใ":3858,"ู่เ":814,"ู้ท":742,"ア":1141,"ุ่น":2796,"ุ่ม":4128,"ุ่ง":863,"ูมิ":1515,"ูปแ":1059,"ูนย":864,"ื่อ":28338,"ุษย":1008,"ื่น":2592,"ื้อ":3123,"ื้น":2966,"ุรี":1349,"ุริ":781,"ุทร":864,"ุทธ":2795,"ุบั":2386,"ือเ":2326,"ือท":952,"ือน":2586,"ือส":1106,"ืออ":845,"ือว":1045,"ุดท":873,"ุดใ":860,"ุตร":863,"ุงเ":1324,"ึ่ง":15334,"ึ้น":5621,"ือด":804,"ือง":6868,"ือข":1238,"ือก":2818,"ุคค":864,"้า ":2891,"็กซ":748,"็จพ":2426,"็นเ":5516,"็นแ":1460,"็นโ":1337,"็นก":3439,"็นช":1716,"็นค":1866,"็นจ":1055,"็นผ":1694,"็นบ":1033,"็นป":1559,"็นภ":2128,"็นม":771,"็นพ":2607,"็นต":2314,"็นน":1867,"็นท":2718,"็นอ":2614,"็นห":2383,"็นส":4810,"็นร":2205,"็นว":1944,"่ขอ":746,"่งช":1246,"่งข":3362,"่งก":1222,"่งป":1000,"่งท":1841,"่งต":1001,"่งแ":1232,"่งเ":5701,"่งใ":2695,"่จะ":1049,"่งส":1203,"่งห":867,"่งอ":1487,"่งม":1795,"่งร":800,"็อก":867,"่ตั":1043,"่ที":2658,"่ทา":851,"่นด":850,"่นเ":1175,"่น้":1389,"่ปร":1522,"่ปุ":1738,"ไทย":5121,"ไม้":1526,"ไม่":4922,"่น ":4848,"ไฟฟ":846,"一":1792,"ไว้":1146,"่อ ":2927,"้ง ":1525,"่า ":8977,"้น ":3595,"้ว ":1138,"โบร":910,"โปร":1570,"ใกล":918,"โดย":14241,"โทร":1350,"ใช้":8306,"โลเ":1118,"โลก":2916,"โรม":847,"โรง":1846,"็ก ":901,"็น ":2078,"ใต้":3035,"ในช":1757,"ในต":1018,"ในค":959,"ในก":4474,"ในส":3229,"ในห":971,"ในอ":1785,"ในว":1980,"ในป":5976,"ในบ":1100,"ในท":1717,"ในร":2523,"ในพ":1416,"ในภ":1762,"ในแ":1213,"ในเ":3488,"ให้":6947,"่ง ":3045,"ได้":12204,"ใหม":1526,"ใหญ":3818,"เป็":52874,"เบิ":951,"เบี":820,"เปิ":978,"เปล":1464,"เท่":1160,"เธอ":1279,"เนิ":1467,"เนื":3043,"เนี":1468,"เต็":792,"เทค":856,"เทอ":765,"เทศ":9046,"เที":1082,"เทพ":2509,"เดอ":1340,"เดี":3292,"เดิ":2706,"เดื":969,"เตอ":2325,"เด็":3070,"เอ็":1297,"เอเ":1135,"เอง":1028,"เห็":854,"เอก":1715,"เหล":3579,"เส้":1396,"เหม":1208,"เสี":3555,"เหต":864,"เหน":2585,"เสร":829,"เศษ":814,"เศส":1255,"เวี":1017,"เวล":1640,"เล็":2263,"เล่":1978,"เวณ":1738,"เลี":1467,"เลื":1233,"เร็":978,"ไป ":836,"เลข":1395,"เริ":2046,"เรี":7952,"เรื":4783,"เยอ":752,"แข่":1582,"เมร":2010,"แก่":1962,"แก้":737,"เมื":12490,"แกร":866,"เภอ":2333,"เมต":1938,"เภท":1385,"เพล":3180,"เพร":748,"เพี":982,"เพื":3833,"แพท":735,"แผ่":1298,"แนว":1987,"แบบ":4625,"แปล":1818,"แบ่":978,"แดน":987,"แตก":800,"แต่":6839,"แทน":1132,"แดง":995,"แอน":801,"แสด":2090,"แห่":6124,"แหล":799,"แหน":1000,"และ":28762,"แลน":1347,"แล้":2088,"แยก":1197,"แรก":3318,"โคร":1607,"แม่":2248,"แรง":831," แล":16653," แม":1055," โค":742," แอ":790," แห":1041," โด":7452," เว":860," เล":918," เร":2527," เม":4395," เอ":1986," เส":1106," แต":2750," ได":1910," ไม":1165," โร":1135," ใช":876," ใน":8170,"์ใน":1573,"์เน":739,"์เป":837,"์เร":776,"์แล":1851,"์ขอ":1461,"์กา":933,"้ใน":1845,"์ที":2950,"์ตู":1145,"์ปร":767,"้นม":919,"้นส":1040,"้นต":741,"้นท":2678,"้นก":769,"้ที":841,"้ปร":770,"้นไ":757,"้นใ":1235,"้นเ":2203,"่าเ":2240,"アア":942,"่าน":3867,"่าว":1950,"่าย":3313,"่าก":964,"่าง":12952,"่สุ":2074,"่สา":868,"่อก":1359,"่อข":925,"่อม":2720,"่อย":2839,"่อท":791,"่อต":2073,"่อป":1191,"่อน":4061,"่อง":9204,"่อเ":3452,"่อใ":1119,"่ออ":896,"่อว":3681,"่อส":1953,"่วน":5177,"้งอ":3416,"้งส":845,"้งห":1146,"่วง":2102,"้จะ":789,"้จั":1225,"่วม":1638,"้งเ":1096,"่วย":1812,"้งแ":3344,"่วไ":1216,"่ยว":2420,"่ยม":1269,"่รู":752,"้งข":959,"้งท":1116,"่ละ":738,"้กั":1036,"่มา":795,"่ยน":1164,"่มี":6169,"้เป":2320,"้แก":1074,"้เก":745,"้าอ":1154,"้าส":795,"้าห":1044,"้าแ":781,"้าเ":1817,"้าท":1477,"้าร":977,"้าย":2779,"้าม":1703,"้าว":1169,"้าน":6510,"้าง":6437,"้อน":1455,"้อย":2449,"้อม":2269,"้อง":6049,"้ว่":734,"้วย":5521,"่ให":733,"่ได":1564,"่ไม":926,"่ใช":1862,"่ใน":4343,"้รั":2515,"่เก":1904,"่เข":763,"่เร":861,"่เป":2235,"้มี":1570,"่เห":1028," Ma":1010," Ch":742," Co":992," Th":1261," of":1521," co":752," th":1128,"�":5212,"กา ":1384,"กร ":1188,"ก่ ":760,"一一":774,"ของ":31894,"ก่อ":3663,"ขัน":1726,"ขาย":803,"ขาว":1067,"กี่":1664,"กีย":867,"กิโ":1154,"ค์ ":769,"กีฬ":1239,"กิน":769,"กิด":3855,"กิจ":1573,"กำห":759,"กูล":885,"กุล":1306,"กุม":773,"ขนา":2496,"กใน":2081,"กแล":866,"กแบ":824,"กเร":871,"กเม":773,"กเป":1534,"กเฉ":1039,"กเข":1098,"ข่ง":1611,"ข่า":854,"ข้อ":2417,"ข้า":4839,"คณะ":1457,"ขีย":2776,"ขึ้":5628,"คคล":827,"กจะ":954,"กจา":1248,"กคร":1721,"กกา":2113,"กกั":1063,"กกว":874,"คน ":1108,"กขอ":2098,"กซ์":975,"คร ":1077,"คม ":4347,"กล้":2502,"กล่":1300,"กษร":1944,"กว้":773,"กว่":3702,"กษณ":3331,"กษั":896,"กษา":3668,"กรม":2005,"กรณ":1173,"กรา":2022,"กรี":1057,"กรุ":2471,"กระ":5410,"กรร":3527,"กรว":929,"กฤษ":3489,"กลุ":3431,"กลั":865,"กลา":3035,"กัด":1452,"กัน":7783,"กับ":9982,"กาศ":2249,"กาล":1237,"การ":39240,"กาย":2168,"กาะ":1419,"กษ์":1004,"กอง":1215,"กอบ":2433,"กอา":822,"กที":2316,"กต่":822,"กติ":1015,"The":934,"กตั":798,"กดิ":842,"กมา":1372,"กรก":1149,"กภา":827,"กปร":1145,"งค์":4532,"งงา":824,"งจั":1172,"งจา":3155,"งชา":2378,"งกร":1669,"งกฤ":3370,"งกล":1847,"งกา":5753,"งกั":3274,"งขึ":1653,"งขั":1578,"งขอ":4624,"งข้":745,"งคว":1083,"งคล":878,"งคร":2365,"งต่":740,"งตั":1510,"งตะ":916,"งนั":904,"งที":5394,"งทา":994,"งทั":1042,"งปร":4913,"งนี":1331,"งผู":776,"งปี":968,"งด้":1068,"งสม":789,"งศ์":3006,"จจุ":2329,"งวั":2363,"งอย":4387,"งออ":847,"งอั":937,"งอา":1391,"งหน":1700,"งสุ":772,"งสื":1073,"งสิ":892,"งหม":1633,"งสร":855,"งสั":912,"งสา":1650,"งหล":2008,"งหว":4124,"งหา":876,"งภา":1562,"งมี":2826,"งมา":1681,"งพร":2199,"งรา":1862,"งรั":1304,"งระ":937,"จพร":2446,"งาน":5529,"งิน":899,"คลอ":868,"คลื":949,"คล้":1302,"ควา":10480,"ครง":1144,"ครอ":2563,"ครั":3903,"ครา":1915,"คริ":2019,"ครี":810,"ครื":2934,"คุณ":1069,"คือ":7513,"คาร":1456,"คัญ":1773,"คิด":898,"คำว":930,"คอม":1178,"งๆ ":1126,"al ":2103,"ค่า":1374,"and":1056,"an ":1298,"คู่":872,"ค์เ":768,"ค์ก":947,"ati":1494,"ค้า":1139,"ชวง":936,"ชอา":798,"ชาว":3493,"ชาย":1746,"ชาต":3190,"ญ่ ":976,"ชาช":786,"ชัย":783,"ชาก":1050,"ชัน":900,"ชิง":1090,"ชิก":963,"ชั้":1496,"ชั่":737,"ชีย":1882,"ชีว":1797,"ชุด":1385,"ชุม":779,"ชื่":9015,"ช่ว":2205,"ช่อ":1132,"ช่น":2842,"ซอร":818,"ช้ใ":1180,"ช้เ":1405,"ซีย":1624,"ซึ่":8007,"งไม":762,"งไป":735,"งได":1184,"งให":1499,"งเห":1953,"งเส":869,"งแต":2125,"งเพ":1132,"งเป":5244,"งเศ":1374,"งเล":753,"งเร":2306,"งเม":1299,"งเท":2214,"งเด":1083,"งเก":2093,"งเข":1000,"งเค":853,"จริ":1485,"งใต":746,"งใน":4860,"งโด":1508,"งแร":2003,"งแล":1588,"จอม":844,"จัก":4523,"จะม":1426,"จัง":4135,"จึง":1351,"ซ์ ":949,"จีน":2020,"จุบ":2333,"จุด":1489,"จัน":795,"จาก":13356,"จะเ":1881,"จัด":3836,"จาร":1224,"จำน":1462,"จิต":1255,"ฉพา":1147,"ชกา":919,"จ้า":6405,"ชนิ":2672,"ชนะ":817,"ฉีย":1060,"is ":767,"ion":2025,"ญิง":1134,"ญี่":1736,"ญญา":1273,"he ":1580,"ฐอเ":1010,"ดย ":1795,"ic ":1027,"ia ":801,"ica":989,"ine":780,"ing":1208,"in ":877,"ต้ ":1080,"ติ ":2277,"er ":1893,"es ":1392,"ด้ ":1252,"ด์ ":1553,"ent":1216,"ฒนา":1571,"ตร ":2238,"ดา ":815,"ดี ":1229,"ฐาน":2388,"ณ์ ":911,"ดีต":1119,"ดำเ":759,"ดิม":1868,"ตถุ":806,"ดิ์":821,"ท์ ":857,"ดีย":3255,"ดูก":1062,"ดือ":962,"ดับ":3761,"ดัง":1352,"ดาร":853,"ดาว":2014,"ดิน":2900,"ดหน":1239,"ดยก":793,"ดยเ":1721,"ดยม":1638,"ดยส":889,"ดยอ":756,"ดยท":955,"ดนต":1392,"ดที":1456,"ดตั":945,"ed ":825,"ทศ ":1067,"ณาจ":1453,"ณะเ":897,"ทย ":2718,"ดจา":942,"ดขึ":1374,"ดขอ":1317,"ดกั":890,"ดกา":1277,"ต์ ":1632,"ุ ":1109,"นี ":1305,"นา ":1502,"ี ":16016,"ิ ":4510,"ำ ":2043,"re ":749,"า ":25017,"ต้น":2996,"ต้อ":1614,"ต่ง":1112,"ต่อ":4486,"ต่า":3505,"ต่ล":740,"ู ":1781,"นๆ ":762,"่ ":16723,"้ ":5526,"ต์ศ":958,"ๆ ":4266,"ry ":820,"ถาป":753,"ถาน":3944,"ทธศ":829,"ถือ":1350,"ทธิ":2039,"ถูก":2600,"ถึง":6398,"น์ ":1653,"ถิ่":814,"์ ":28985,"ดเม":981,"ดเล":1139,"ดเป":846,"ตรง":1229,"ตรก":784,"ดใน":1781,"ตรี":2898,"ตริ":1006,"ตรา":1575,"ตระ":1105,"ตร์":7354,"ดให":1327,"ตวร":760,"ด็จ":2802,"ตว์":1614,"ด้แ":1081,"ด้เ":1187,"ด้า":3056,"ด้ว":5447,"ด้ร":2493,"ตอร":2348,"ตอน":1918,"ตัว":7601,"ตะว":2896,"ติก":1508,"ติน":757,"ติด":1536,"ตั้":8579,"ตาล":1104,"ตาร":1322,"ตาม":4567,"ตำบ":1639,"ธ์ ":976,"ติเ":954,"ติแ":866,"ติศ":1101,"ตำแ":988,"ถนน":2048,"ตุล":748,"นะ ":754,"ตูน":1178,"นขอ":3384,"นข้":932,"นคว":826,"ธรร":2826,"นคร":3912,"นกร":1617,"นกล":2158,"นกา":7632,"นกั":1355,"นฐา":769,"นช่":1290,"ท้อ":1115,"นชื":1549,"นชา":864,"ท่า":2380,"ท้า":1066,"นจา":1033,"นจั":1141,"นจะ":747,"ข ":896,"นธร":779,"นธุ":1127,"นทร":2489,"นทั":791,"นทา":2750,"นที":13368,"นต้":1225,"นถึ":809,"นต์":1445,"ธิ์":953,"นด้":1065,"ก ":14487,"นตร":4843,"นติ":835,"นตั":2147,"นตา":888,"นด์":1948,"ธิก":846,"นตก":1337,"นดั":1314,"นดิ":1223,"ธาร":1184,"ง ":27727,"ค ":5823,"ธาน":978,"นภา":4234,"บกา":2494,"ช ":2112,"นพื":1006,"จ ":1697,"นพร":3282,"นผู":1561,"นปี":3086,"นปร":4565,"นปั":802,"นบุ":863,"นบร":1204,"ธุ์":901,"นนิ":1044,"นนี":1085,"นนา":1321,"นนั":1485,"ญ ":928,"นธ์":2163,"ทรง":2910,"ทยา":5565,"ทรท":914,"ทรา":963,"ณ ":3143,"ทร์":996,"ถไฟ":2170,"ทพม":811,"ต ":4123,"ด ":8336,"ท ":2444,"น ":40656,"ทสม":792,"ทหา":866,"ป ":2071,"ทอง":1146,"บ ":5246,"ทอร":781,"ทศอ":828,"ฟ ":941,"ทวี":828,"พ ":8598,"ทศไ":1737,"ทศเ":766,"ม ":15299,"ทิน":1259,"ทั่":2123,"ทั้":3993,"ย ":16564,"ร ":10052,"ทำใ":1466,"ทิศ":1076,"ล ":7227,"ว ":7449,"ทัศ":1092,"ทาง":10997,"ทะเ":1496,"ทัพ":825,"ทาน":903,"ศ ":12299,"ษ ":3178,"ส ":5552,"ปี ":4823,"ทุก":1239,"อ ":17206,"ที่":56152,"ะ ":8344,"ปฏิ":1862,"บอล":946,"ng ":1471,"บิน":950,"ne ":979,"บาล":1707,"บั้":776,"บาง":2680,"บัน":3828,"บาท":1409,"nd ":964,"บัต":1362,"บัญ":812,"บวน":840,"นไป":997,"นไม":765,"นให":1732,"นได":1498,"บรา":1706,"บริ":5195,"นใน":4356,"น้ำ":5368,"น้า":3676,"น้อ":1479,"น่า":1206,"น่ว":1000,"น่ง":989,"ปรา":2156,"ประ":25303,"ปริ":937,"nt ":807,"บุค":937,"บีย":817,"บุร":1542,"นหม":998,"นสั":1468,"นสา":1759,"นสำ":796,"นสี":845,"นหน":3532,"นสุ":1261,"นหล":1601,"นส่":1409,"นสถ":1319,"นสม":1639,"นอย":1426,"นอั":1719,"นอา":1493,"นออ":2019,"นอก":1327,"นอง":1083,"นอิ":1016,"of ":1513,"นับ":1179,"นาง":1305,"นาค":1491,"นาด":2642,"นัง":1602,"นัก":6595,"บคว":838,"นยา":1119,"นยุ":813,"นมา":2249,"นมี":1207,"บขอ":813,"นรู":1331,"นรา":1843,"นระ":2096,"นรั":1125,"นย์":888,"นวน":2013,"นวง":1415,"นว่":1138,"นวา":814,"นวิ":1095,"นวั":1960,"บปร":1255,"บบเ":871,"ปกค":1297,"นเค":1189,"นเก":1640,"นเข":1425,"นเจ":957,"นเด":1933,"นเป":2635,"นเพ":1597,"นเท":2022,"นเร":1822,"นเม":2536,"นเส":1392,"นเว":905,"นเอ":1413,"นเห":886,"นแบ":783,"นแร":793,"นแล":1965,"นแห":1112,"นโด":2101,"บรม":1009,"บรร":1513,"นโล":1115,"นั้":3921,"นาม":2497,"นาย":2144,"นาน":1674,"นิน":1076,"นิด":3216,"นิก":1334,"นิว":770,"บด้":1147,"นิย":2396,"นีย":1820,"นีร":1196,"on ":2708,"บที":1359,"นี้":6260,"นือ":2427,"นึ่":6892,"นื้":1785,"นื่":1254,"นุษ":996,"le ":881,"ป่า":900,"ป็น":52765,"ผ่น":1156,"ฝรั":1300,"ผู้":7889,"ปิด":1097,"ปาก":1194,"ปัจ":2459,"บ้า":2101,"ปลา":2984,"ปลี":1161,"บ่ง":1079,"ผลง":1088,"ผลิ":1382,"ปแบ":1026,"ปุ่":1831,"พรร":1714,"พยา":839,"พยน":2010,"พมห":762,"ยน ":4029,"มี ":1149,"มา ":896,"ฝ่า":781,"ยม ":948,"พลั":814,"พวก":739,"พลง":2750,"พระ":17707,"พรา":866,"ยง ":1443,"ผ่า":1411,"ฟฟ้":828,"รม ":1362,"พัฒ":1494,"ม่ ":849,"พัน":3666,"พาะ":1326,"ยว ":1115,"พื่":3826,"พูด":1126,"พื้":2671,"ยา ":1376,"พิธ":838,"พิม":1132,"พิว":738,"พีย":958,"พุท":2067,"มขอ":1433,"มกร":823,"มกั":1433,"มกา":1276,"ฟ้า":1943,"มชา":782,"ลก ":1247,"ภอเ":800,"ลง ":792,"ย์ ":2750,"ระ ":742,"รี ":2785,"รา ":766,"ยขอ":1509,"มมา":901,"มริ":2047,"มรา":849,"ยงเ":962,"ยงใ":1176,"ยชน":801,"มหม":966,"มอง":738,"มหา":5731,"มาช":1046,"มาจ":2680,"มัย":2145,"มาก":4211,"มัน":2325,"มัก":1644,"ภาค":3301,"ฤษ ":2433,"ภาพ":5887,"ภาย":1934,"ภาษ":8339,"มตร":1922,"ร์ ":8527,"มทั":835,"มที":1983,"มนต":905,"มนุ":1036,"ภูม":1417,"มปร":865,"มพิ":882,"มพร":946,"มพ์":1080,"ยกา":2466,"มภา":858,"ยกั":787,"ยกว":1726,"ยกร":1291,"ยาน":1202,"ยาล":2584,"ยาว":2212,"ยาม":807,"ยาย":2092,"รณร":750,"ยาศ":1408,"รณ์":2303,"ยัง":2948,"ยาก":955,"วง ":1110,"ยอด":1124,"ยอร":940,"ยอย":754,"ม่เ":922,"ม่อ":742,"ยสา":1028,"รจั":932,"ม่น":1356,"ม่ม":1196,"ยวก":2166,"รงเ":2099,"มใน":892,"รงก":1367,"รกเ":741,"มเห":813,"รขอ":1042,"ยมี":1590,"รกร":1251,"มเก":770,"มเด":3008,"มเป":1110,"ลี ":1049,"ยปร":756,"มู่":1176,"ลา ":1161,"มูล":1423,"มื่":6537,"ละ ":3400,"มุท":857,"มือ":8088,"ยนต":2593,"ยที":1815,"ยทั":824,"ยถึ":2220,"มีเ":1932,"มีอ":1345,"มีล":1533,"มีห":1060,"มีส":1890,"มีผ":1122,"มีพ":1283,"มีร":1326,"มีท":746,"มีน":1210,"มีป":899,"มีค":3125,"มีช":2478,"มีข":1119,"มีก":2644,"มาเ":1480,"มาร":4462,"มาย":5367,"มาต":1157,"มาณ":1914,"ราบ":751,"ราน":740,"ราณ":897,"ราว":1444,"รั่":1334,"รั้":3560,"ราม":1910,"ราย":2763,"ราะ":1374,"ริญ":758,"ริก":3434,"ริง":1138,"ริน":744,"ริษ":1438,"ริส":1925,"ริม":1124,"ริย":2233,"ริเ":2100,"รีส":866,"ริ่":2037,"รีย":9363,"ระก":4332,"ระจ":2540,"ระช":2359,"ระด":3552,"ระน":1422,"ระบ":4735,"ระท":2215,"ระธ":815,"ระย":1763,"ระม":3043,"ระร":2751,"ระพ":1841,"รัก":1669,"ระส":2582,"รัช":1049,"ระห":3447,"ระว":1743,"รัฐ":5336,"ระอ":2225,"รับ":6837,"ระโ":884,"ราค":879,"ระเ":13435,"ราก":1663,"รัส":1059,"ราช":9495,"ราจ":779,"ราง":1497,"ย่า":4021,"ย่อ":1669,"วย ":1162,"รอง":3048,"รอบ":1542,"รอน":750,"รวม":2782,"รวง":825,"รษท":797,"รศึ":1157,"้ใ":2847,"้ไ":1284,"้แ":2652,"้เ":7277,"ยแล":953,"รรค":1151,"้ำ":5903,"้า":37643,"รรด":1433,"้อ":17505,"รรณ":1404,"้ห":1048,"้ส":2215,"์จ":1166,"์ช":1393,"วน ":1214,"์ค":1626,"์ก":3825,"์ข":1876,"รมั":970,"์ว":1155,"์ล":906,"์ส":3129,"์ห":969,"์ศ":1201,"์พ":863,"์ร":1348,"์ม":2202,"์ท":4622,"์ต":2998,"์ด":2081,"์ป":1557,"์บ":1254,"์น":1942,"์ไ":1599,"รรษ":1352,"รย์":876,"รรม":6124,"์เ":8295,"์แ":4502,"์โ":1808,"์ใ":2084,"ยใน":2770,"์อ":1673,"รพร":845,"ยเป":1383,"แอ":2776,"แส":3385,"โซ":1292,"แห":8308,"แล":33169,"แว":982,"โจ":1094,"แย":1484,"แม":4364,"โค":3872,"แร":4857,"โพ":1125,"ยุโ":733,"ใก":924,"โม":2045,"รปก":742,"โบ":1679,"โป":2206,"โท":2397,"โน":2242,"โด":16368,"โต":2472,"โอ":4042,"ใช":8587,"ใจ":1545,"โล":5940,"โร":5880,"โย":1454,"ไข":762,"ไก":791,"ใน":43294,"ใบ":873,"ใต":3040,"ใด":738,"ให":12321,"ไซ":1010,"ไฟ":3494,"ไม":7091,"ไร":1255,"ได":12875,"ไต":1235,"ไท":6054,"ไป":6850,"ไอ":1157,"ไว":1681,"ไล":879,"ยู่":11314,"ไห":1089,"รปร":1489,"็ก":4152,"็ง":850,"็จ":3701,"็ต":1012,"็ด":1111,"็บ":1365,"รดิ":1521,"็น":54742,"่ง":35546,"่ค":1361,"็ม":1660,"่ข":1323,"่ก":2880,"่จ":2216,"่ช":990,"็อ":1898,"่ด":1050,"่ต":3007,"่ถ":906,"่ท":5172,"่น":17313,"รที":1499,"่ป":4265,"่บ":1847,"่ผ":743,"่พ":1708,"่ม":15356,"้ข":1096,"้ก":2888,"่ร":2826,"้ค":1674,"่ย":6368,"่ล":1168,"ยุค":1712,"้จ":2773,"่ว":14132,"้ง":18710,"่ส":6307,"่ห":2814,"รถไ":2194,"้ช":1217,"่อ":46408,"่า":42021,"้ด":1042,"รทำ":739,"้ท":1936,"้ต":1075,"รทั":1036,"้ป":1226,"้บ":1138,"้น":24081,"๊ก":920,"้พ":1364,"่ใ":7390,"้ร":3437,"่โ":798,"่แ":2495,"้ย":1840,"่เ":10182,"้ม":4709,"้ว":10098,"่ไ":2895,"้ล":760,"เอ":9583,"เฮ":1007,"ศส ":746,"เศ":2656,"เส":8693,"เห":10183,"เล":12934,"เว":8237,"แก":4707,"เภ":3987,"เม":21131,"แข":3088,"เย":2348,"เร":18471,"แค":2187,"แพ":2260,"โก":2046,"แบ":6575,"แน":2938,"แผ":2002,"แป":2594,"แถ":810,"แต":8070,"แท":1899,"แด":2231,"เช":8753,"เซ":6021,"เจ":10004,"เฉ":2866,"เง":1146,"เค":8385,"เข":11528,"เก":16975,"เฟ":1048,"เพ":11355,"เผ":802,"เป":58033,"เบ":4728,"เน":8994,"เธ":1416,"เท":18401,"เต":6088,"เด":13846,"ลที":1395,"ูเ":2044,"ู่":14654,"ู้":10588,"ูป":4480,"ูน":2921,"ูร":1466,"ุโ":1437,"ูม":1757,"ุเ":849,"ูล":3073,"ุ่":7901,"ุ้":931,"ุ์":914,"ูต":1281,"ร่า":1627,"ูด":1928,"ร่ว":1692,"ื้":6123,"ุส":1052,"ื่":31115,"ุษ":1254,"ูง":2041,"ุล":3040,"ุร":4653,"ูก":5828,"ุม":4112,"ุบ":2913,"ุป":1128,"ุธ":955,"ุน":3561,"ร้า":4429,"ุต":2413,"ุด":8113,"ุท":4856,"ุณ":1621,"ุญ":872,"ือ":41365,"ร้อ":2730,"ึ้":5783,"ึ่":15365,"ุค":2816,"ุง":2994,"ุก":3888,"ุข":1229,"ืน":1145,"ี่":66161,"ี้":8515,"ีเ":7027,"ีใ":1123,"ีแ":2432,"ีโ":1682,"ีอ":2953,"ิ์":1865,"ึง":8316,"ีล":2237,"ีว":3304,"ีส":4174,"ิ้":2507,"ิ่":6446,"ีฬ":1240,"ีห":1469,"ีพ":2697,"ึก":5012,"ิเ":7222,"ีม":1954,"ิแ":1214,"ีย":34404,"ิโ":3549,"ีร":4629,"ีต":2652,"ีท":2505,"ีน":5775,"ีบ":2030,"ีป":2156,"ีผ":1219,"ิส":6204,"ิห":1497,"ิศ":3177,"ิษ":2523,"ีช":3206,"ีจ":1149,"ิว":4515,"ร์ส":1173,"ำใ":1865,"ิล":5788,"ำไ":839,"ีด":1673,"ิอ":948,"ิป":1641,"ร์เ":3538,"ิบ":2835,"ิน":18424,"ิธ":2036,"ิท":9171,"ิต":10282,"ิด":13442,"ิร":3582,"ีค":3922,"ำแ":2282,"ิย":5047,"ีข":2272,"ำเ":6458,"ิม":7019,"ีก":7455,"ิภ":1128,"ิพ":1917,"ร์แ":1733,"ร์โ":778,"ำว":1118,"ิจ":3243,"ิช":2649,"ำส":738,"ิค":1757,"าโ":3139,"ำร":1745,"าใ":3181,"ำล":1253,"าไ":2775,"ิง":7259,"ำห":4498,"ิญ":1127,"ำท":973,"ำบ":1971,"ำน":4890,"ำด":1025,"ำต":1664,"ิก":12850,"าแ":4055,"าเ":12583,"ำม":1274,"ำป":747,"ำพ":760,"าล":10057,"าย":37411,"ำค":2190,"าร":63897,"าศ":5622,"าษ":8889,"ั่":6714,"าว":19634,"ำจ":874,"าอ":4975,"าส":13760,"ั้":24167,"าห":5552,"าะ":4602,"าต":8268,"าด":6631,"าณ":5371,"าน":35220,"าธ":2890,"าท":8396,"าถ":784,"าผ":775,"ร์ก":1498,"าป":2833,"าบ":4733,"าม":30146,"ำก":2269,"าภ":1081,"าฟ":1063,"าพ":8053,"ะเ":24505,"าข":3951,"ัม":2974,"ะแ":2571,"ัย":8749,"าค":10848,"ะโ":2150,"ะใ":1602,"ะไ":1960,"ัล":3755,"าง":41529,"ัว":11006,"าจ":10067,"ัศ":1321,"าช":13729,"ัส":4036,"าซ":1724,"ร์ด":1551,"าญ":933,"ร์ต":2154,"ร์ท":1160,"ัฒ":2408,"ัด":14375,"ัณ":1273,"ัต":9709,"ัท":2203,"ร์ม":749,"ับ":25368,"ัน":39721,"ัพ":2281,"าก":31704,"ฮั":845,"ฮิ":902,"ฮา":872,"ะช":3044,"ะจ":3682,"ะข":1220,"ะก":7907,"ะค":3630,"ะธ":897,"ะน":3483,"ะบ":5762,"ะป":1992,"ะพ":3693,"ะด":4349,"ะต":2884,"ะถ":831,"ะท":5514,"ัช":1701,"รเล":875,"ะห":5596,"ะส":5803,"ะอ":4641,"ัญ":5583,"ัฐ":5386,"ะม":6798,"ัก":25499,"ะภ":863,"ะร":4376,"ัค":775,"ะย":3056,"รเม":755,"ะล":1194,"รแข":1253,"ัจ":2657,"ะว":5659,"ัง":23502,"อป":2561,"รเป":876,"อน":18187,"อบ":7490,"อท":2655,"อธ":1085,"อต":3868,"อถ":749,"ฬา":1653,"อด":5925,"ห์":1891,"อห":2050,"อส":6091,"อว":5778,"อล":4526,"อร":15873,"อย":21178,"อม":9516,"อภ":865,"อฟ":1564,"อพ":1610,"อี":4000,"อื":1874,"อุ":3605,"อั":11299,"อา":13455,"อำ":3262,"อิ":6605,"ออ":10729,"อะ":2163,"ฮอ":754,"อ่":1463,"อ็":1408,"อโ":1451,"อแ":1805,"อไ":1254,"อใ":1838,"อเ":12919,"ษ์":1229,"สอ":3466,"หญ":5078,"us ":1226,"สห":2331,"ลงา":1081,"สั":9793,"สะ":1590,"สี":9218,"หต":869,"สิ":7137,"สำ":6109,"สา":15975,"สู":4642,"หน":20409,"สุ":8301,"สื":2761,"หม":12564,"สเ":3613,"สแ":834,"หย":1301,"หล":16331,"สโ":1107,"หร":18933,"ส่":6557,"ส้":1769,"หว":8188,"ส์":4529,"หอ":742,"หา":12502,"หั":3338,"หิ":1200,"อก":17318,"อข":2506,"อค":2179,"อง":70500,"อจ":1268,"ห็":854,"อช":1235,"ห่":6955,"ห้":8276,"ษฐ":1153,"ศอ":967,"ษฎ":870,"ว์":2314,"ว้":3043,"ศส":1760,"ศษ":814,"ว่":16781,"ษท":1007,"ศึ":3055,"ศิ":2300,"ศา":8110,"ษณ":3808,"ศั":2984,"สก":2168,"ศู":867,"ศไ":1794,"สง":2785,"ษร":2008,"ศเ":1312,"ษย":1193,"ศ์":3007,"สถ":6089,"สน":4818,"ษา":12813,"รใน":1139,"ษั":2349,"สต":9741,"สด":3309,"สภ":1290,"สม":10925,"สว":2702,"สร":6497,"สล":1065,"รใช":756,"วย":9723,"วม":5477,"วร":6029,"วล":3904,"วท":1967,"วบ":1401,"วน":11532,"วป":795,"รูป":4257,"วณ":1942,"วด":2173,"วต":1183,"วง":11814,"วจ":1546,"ล็":3380,"รุ่":859,"วช":1103,"ล่":6051,"ล้":7758,"ล์":1887,"ศร":2364,"วโ":1470,"วแ":1429,"วเ":5629,"ศว":1021,"วไ":2301,"วใ":947,"ศน":1526,"วั":23102,"วา":16144,"วะ":947,"วี":3571,"ศท":825,"ศต":1496,"วิ":15211,"วส":1841,"ศจ":1019,"วอ":3830,"วห":1387,"รื่":5863,"ลย":1821,"ลม":2014,"ลร":753,"ลบ":1840,"ลน":3276,"ลป":2282,"ลต":1315,"ลท":2213,"ลด":1361,"รือ":16002,"ร์":31431,"ร็":1969,"ฤษ":4446,"ร่":5273,"ร้":7400,"รใ":2364,"รไ":1004,"ลง":7031,"รุง":2477,"ลจ":865,"ลใ":839,"ลโ":860,"วค":1204,"ลแ":1254,"ลเ":4918,"วข":1721,"วก":5218,"ลู":2435,"ลุ":4907,"ลื":3984,"ลี":7428,"ลำ":2409,"ลิ":7229,"ลั":14699,"ลา":17258,"ละ":32443,"ลอ":5473,"ลห":915,"ลส":1856,"ลว":3836,"ลล":2565,"ยา":16472,"รณ":5716,"รด":4057,"ยิ":1263,"รต":2974,"รถ":6175,"ยี":1186,"รท":5128,"ยื":1006,"รน":2627,"ยุ":5509,"ยู":12722,"รบ":2528,"รป":3771,"รพ":2742,"ยเ":7850,"รม":12636,"รย":2421,"ยแ":3243,"รค":3841,"มโ":1965,"ยร":3391,"มแ":3080,"ยล":1160,"มไ":1232,"มใ":1839,"รจ":2340,"ยว":7799,"รง":8918,"ม่":10097,"รช":1118,"ยศ":1015,"ยห":1973,"ม้":2616,"ยส":4483,"ยอ":5464,"ยั":4025,"ยะ":1489,"รี":20028,"ริ":20674,"รู":7587,"รื":21925,"รุ":4970,"รแ":4843,"รโ":1568,"ลค":797,"ลก":4719,"รเ":8258,"ลข":2218,"รล":1190,"ยไ":1866,"ยใ":4743,"รร":16031,"ยโ":1499,"รส":5411,"ย่":5792,"รษ":2069,"รศ":2322,"รว":7099,"รอ":8553,"ย์":8004,"รห":1918,"รา":29845,"รั":23940,"ระ":59662,"มต":4461,"มถ":925,"มท":4160,"ภั":1399,"มณ":1076,"ภา":21089,"มด":2959,"มพ":4702,"ยก":11626,"มภ":1402,"มน":5654,"ภู":2200,"มบ":2018,"มป":2831,"มผ":871,"มจ":1684,"มง":1139,"มข":2191,"มก":5523,"มค":2300,"ภอ":2334,"มช":2595,"ฟ้":2049,"มื":14762,"ยน":14895,"มุ":2792,"มี":30856,"ยถ":2445,"ยท":4014,"ยด":1555,"ยต":2395,"มิ":5697,"มั":8313,"มา":29552,"ยภ":836,"รก":8900,"มเ":11478,"ยม":7097,"รข":1777,"ยพ":1762,"ยบ":2166,"มู":3200,"ยป":1662,"มศ":1140,"มว":3354,"ยจ":1218,"ยง":8717,"มล":1770,"ยค":2918,"รู้":1907,"มร":6136,"มย":1297,"ยข":2004,"มม":2832,"มะ":1294,"มอ":4804,"มห":8473,"มส":4913,"ยช":2472,"ลาย":5187,"ฝร":1302,"ผ่":2887,"ฝั":851,"ลาด":921,"พท":1912,"พน":931,"ลาน":856,"พบ":1789,"ปแ":1689,"ปเ":1359,"ผล":4309,"ป้":1280,"ป็":53080,"ป่":1217,"ลำด":791,"ลำต":811,"ผิ":1067,"ผู":7995,"ฟร":1492,"ฟล":850,"ฟฟ":1043,"พเ":1196,"ลิม":935,"ฟอ":994,"พ์":1176,"ฟิ":1569,"ภท":1390,"ลิต":1534,"ฟุ":809,"พฤ":1487,"พล":6364,"พย":3936,"พร":23104,"ลีย":1437,"พม":1303,"พอ":1240,"ฝ่":810,"พว":940,"พี":2676,"พิ":6438,"พา":3798,"พั":6166,"พู":1636,"พุ":2493,"พื":7226,"นย":4074,"ลี่":2431,"บข":1492,"นม":6695,"บค":3089,"นร":8383,"นล":2573,"นศ":1853,"บจ":1007,"นว":11694,"นบ":4831,"ธุ":1517,"นน":9487,"นผ":3027,"นป":10367,"นพ":7135,"นภ":4756,"บก":4476,"นฟ":756,"นั":15362,"นา":19798,"บด":2422,"นำ":2990,"บต":1487,"นิ":13549,"นี":13301,"นึ":7032,"บท":4278,"นื":5497,"บน":4184,"นุ":3598,"นส":15743,"นห":8286,"ธ์":2434,"นอ":14760,"นะ":2415,"ธร":3542,"นค":9380,"ลือ":2681,"ธย":895,"ทเ":1085,"นข":6066,"นก":17019,"นจ":5539,"นง":887,"ทุ":2331,"ธา":3053,"นด":8599,"ธั":762,"นท":23586,"นธ":5365,"นต":15763,"ธิ":5695,"นถ":1559,"ธี":1300,"ท้":2426,"นซ":2369,"ธศ":832,"นช":5687,"ท่":3112,"นฐ":772,"ท์":2028,"ธอ":1358,"บ้":2221,"ปส":1027,"บ่":1386,"บใ":1319,"ปร":33042,"บโ":1424,"บไ":1667,"ปล":7500,"ลื่":1081,"บแ":1614,"บเ":5338,"ลูก":1704,"ปุ":2101,"ผน":738,"ปิ":2900,"ปี":8407,"ปะ":1033,"ปั":4469,"ปา":3048,"ปอ":1507,"น่":3725,"น็":766,"นๆ":1075,"บว":2339,"นไ":5452,"บล":3184,"นใ":7373,"นโ":6749,"บร":11830,"นแ":9829,"นเ":26787,"บม":1153,"บภ":734,"ปก":3047,"บพ":1205,"ลุ่":3574,"บป":2025,"บบ":7643,"ปน":1077,"บุ":4548,"บู":1144,"ปท":1302,"บิ":3104,"ปต":759,"บี":2069,"บา":8404,"ปด":805,"บั":9157,"ปฏ":1873,"บอ":4696,"น์":3647,"บส":3398,"น้":10964,"บห":1553,"ด็":3505,"ตว":2762,"ตล":1318,"ดไ":896,"ตส":1012,"ด้":20496,"ด่":844,"ตะ":4134,"ตอ":5825,"ด์":2932,"ติ":15034,"ตำ":3584,"ตา":9651,"ตั":18129,"ดื":1116,"ดิ":9252,"ตต":1078,"ดี":8816,"ตถ":1332,"ดุ":891,"ตน":2338,"ตบ":1002,"ดู":2853,"ตร":23291,"ดโ":963,"ดใ":3358,"ดเ":6934,"ดแ":2124,"ตย":2013,"ดล":1605,"ดว":1797,"ดห":2122,"ดส":2822,"ดอ":4967,"ณ์":3582,"ดั":6569,"ดำ":2067,"ดา":6669,"ดด":953,"ณิ":917,"ดต":2322,"ณี":962,"ดท":2962,"ดน":4803,"ดป":1230,"ดพ":988,"ตก":3537,"ดม":1612,"ดย":15171,"ดร":2790,"ทอ":3103,"ทห":1219,"ทส":1351,"ทศ":9520,"ทว":1599,"ที":59203,"ทิ":4236,"ทำ":5708,"ทา":13110,"ทั":9949,"ทะ":1722,"ทพ":2598,"ทน":2186,"ถุ":1404,"ทบ":1025,"ถู":2630,"ถึ":6411,"ทธ":4848,"ถื":1366,"ถไ":2196,"ลอง":1534,"ธง":1259,"ทร":10558,"ทย":11986,"ต้":8814,"ต์":5183,"ต่":15088,"ต็":806,"ลอด":966,"ถา":5861,"ถิ":1257,"ตู":1933,"ตี":1997,"ตุ":3328,"ถน":2338,"ตแ":903,"ทค":1361,"ตเ":2007,"ฐา":2537,"ฐอ":1245,"ฐม":814,"ละค":2575,"ฏิ":1907,"ละก":1772,"ละน":936,"ณา":2378,"ณะ":4781,"ละท":1069,"ละต":1127,"ดช":1164,"ดจ":1760,"ดง":3356,"ดค":1484,"ณร":978,"ละส":1767,"ดข":3245,"ดก":3268,"ลัง":3957,"ละร":817,"ละม":1718,"ลัก":5507,"ละพ":787,"ละป":964,"ณฑ":1856,"ลับ":937,"ละอ":1448,"ละไ":841,"ลาง":2453,"ลาก":914,"ละเ":4488,"ละแ":849,"ฒน":2389,"ลัย":2733,"ซั":988,"ซา":2350,"ซอ":2165,"ซู":753,"ซี":4385,"ซึ":8304,"ซิ":3377,"ญช":978,"ญญ":1807,"ซ์":1764,"ญิ":1154,"ญา":2270,"ญี":1753,"ญ่":3821,"งิ":983,"งา":6266,"งอ":10609,"งห":11834,"งส":12858,"งศ":4623,"จจ":3207,"งว":4781,"งๆ":1493,"งใ":8125,"ลล์":930,"งไ":4482,"งแ":10959,"งโ":5091,"จร":2901,"จม":901,"งเ":26671,"จพ":2589,"ลวง":2170,"จน":3023,"จี":2856,"จิ":3604,"จึ":1361,"จั":14501,"จะ":8562,"จำ":4839,"จา":15882,"จอ":2342,"ฉล":792,"จเ":946,"ฉพ":1153,"ชก":1650,"จุ":5283,"ฉา":1061,"ชน":7089,"ฉี":1150,"จ้":6665,"ชย":735,"ชร":794,"ชว":2061,"ชบ":758,"ชม":820,"ชา":13533,"ชี":5071,"ชิ":4498,"ชื":9652,"ซน":1061,"ชุ":2369,"ชส":1160,"ชอ":1761,"ชั":4493,"ซล":1003,"ช่":6714,"ช้":8872,"คน":5647,"คต":1444,"คม":6449,"คร":21353,"คย":1040,"tio":1636,"ข่":2813,"ข้":7740,"คค":1249,"ลบั":843,"คณ":2195,"คู":1293,"คุ":2351,"คื":7785,"คี":1217,"ลนด":1156,"คิ":2002,"คโ":1144,"คเ":1342,"คว":12568,"คล":7051,"คา":4157,"คำ":3659,"คั":3422,"คอ":3384,"งข":9913,"งก":19903,"งค":13884,"ค่":1995,"ค้":2249,"ค์":5026,"งบ":3074,"งน":4757,"งท":10771,"งพ":4931,"งผ":1609,"งป":7671,"งย":2006,"งม":7023,"งภ":1825,"จก":1261,"งล":2323,"งร":6456,"งช":5175,"งง":1006,"งจ":7033,"งซ":1220,"งต":6752,"งถ":1603,"งด":3806,"กภ":1049,"กพ":1569,"กป":1954,"กน":2935,"กบ":1539,"กท":4244,"กต":4649,"กด":2721,"กฏ":948,"กฎ":1349,"กซ":2148,"กช":1615,"กจ":3262,"กง":1224,"กค":3642,"กข":3351,"ter":1286,"กก":5870,"กู":1343,"กเ":10000,"กั":20495,"กา":51462,"กำ":3149,"กิ":9995,"ขต":2558,"กี":4840,"ขน":4066,"กุ":3321,"กส":5176,"กษ":11940,"กห":1741,"กอ":8499,"กะ":1354,"กย":1271,"กม":4662,"กฤ":4112,"กร":27767,"กล":15020,"กศ":807,"กว":6893,"ขี":3132,"ขา":5865,"the":1233,"ขุ":925,"ขึ":5668,"ก์":773,"ขอ":32858,"ก๊":859,"ก้":1794,"ขั":2842,"กไ":1756,"กใ":2974,"กโ":1476,"กแ":3798,"ก่":6422,"ก็":3535,"ขว":1297,"วกั":2290,"วขอ":818,"ลเม":1412,"ษา ":1133,"ล่น":1147,"ล่า":2925,"ล้ว":2260,"ล้อ":942,"ล้า":3112,"วงก":746,"วงศ":3050,"ล็ก":2525,"วนใ":973,"วนห":850,"วนก":966,"วที":983,"วยเ":735,"วรร":3044,"วร์":805,"วยก":1046,"วมก":896,"วอร":1058,"วลา":1487,"วละ":990,"ส์ ":2631,"วีย":1077,"ศตว":752,"วิน":754,"วิต":1559,"วิท":5574,"วิช":1485,"วาม":10406,"วาง":1213,"วัล":909,"วัน":10402,"วัต":2453,"วัด":5975,"วัฒ":910,"วัง":868,"ศน์":1136,"วเต":851,"ศรี":1384,"วไป":1324,"อก ":1672,"ว่า":16578,"ว้น":750,"ว้า":1054,"อง ":7392,"ศัย":746,"ศัก":1311,"ษณะ":2477,"ศาส":7153,"ศึก":3054,"ษณ์":883,"ศิล":1821,"ษที":956,"ศูน":857,"ษย์":1021,"ศไท":1672,"สกุ":748,"อน ":3168,"สงค":1388,"อม ":778,"อย ":1240,"ษาก":925,"ษัต":893,"ษัท":1443,"สตร":5931,"ษาอ":1852,"ษาเ":921,"สดง":2076,"สนา":1982,"สต์":1797,"สถา":5432,"สมเ":2557,"สมุ":1016,"สมา":1528,"สมั":2283,"สภา":1234,"สัญ":1359,"สัง":2218,"สัต":1750,"สัน":1365,"สาข":1007,"สัม":975,"สำน":1154,"สิง":944,"สาธ":1138,"สาย":2246,"สำค":1783,"สาร":3032,"สาม":4593,"สาว":795,"สอง":2064,"หญิ":1133,"สหร":1602,"หญ่":3811,"สร้":3626,"หมา":5219,"หมื":980,"หมู":1311,"หม่":2082,"หรื":14127,"หรั":3559,"สเต":1342,"หมด":1024,"สุร":1490,"สูง":1909,"สือ":1352,"หนด":759,"สุด":3697,"หน่":2681,"หน้":3646,"หนั":1888,"สูต":893,"หนื":2453,"หนึ":6865,"สู่":776,"สำเ":741,"สิท":1099,"สิน":1011,"สำห":2054,"สิ่":1600,"สีย":3370,"หตุ":849,"สี่":1091,"สีเ":803,"สุข":808,"หิน":734,"หาว":2346,"หัว":2009,"หาร":3693,"หาน":1005,"หาก":917,"ส้น":1351,"ส่ว":5073,"ส่ง":1119,"หว่":2958,"หวั":4202,"หล่":1190,"หลั":4230,"หลื":1267,"หลา":3254,"หลี":1577,"หลว":2304,"องจ":2432,"องค":5587,"องข":1511,"องก":4220,"องน":1770,"องท":3322,"องด":1078,"องต":1671,"องช":1455,"องว":1091,"องร":2706,"องพ":2170,"องม":1890,"องภ":735,"องป":3285,"องบ":1250,"องส":4135,"องห":2070,"องอ":2544,"อกเ":2315,"อกแ":903,"อขอ":1542,"อกจ":1222,"อกอ":862,"อกั":828,"อกา":1632,"อตั":1635,"อดี":1193,"อนก":910,"อที":1522,"ห่ง":6109,"ัก ":1014,"องโ":2181,"องใ":1090,"องไ":1059,"องเ":6096,"องแ":2259,"ห็น":791,"ห้เ":1731,"ัง ":1312,"ัน ":5497,"อมา":1234,"อมพ":870,"ับ ":2373,"อมเ":1000,"อมู":1073,"อย่":3805,"อยู":11352,"อร์":11720,"อวั":2718,"อวิ":799,"าก ":2111,"อว่":1274,"อนด":815,"อธิ":902,"อนท":1025,"อนุ":1065,"อบด":1116,"อนไ":741,"อนเ":1384,"ัด ":1246,"อิส":796,"อิน":2322,"อำเ":2340,"อีก":2134,"อำน":838,"อาห":1406,"ัส ":1055,"อาศ":740,"อาร":1724,"อาจ":1975,"อัล":1451,"อาณ":1739,"าช ":1056,"อื่":1772,"ออก":6565,"ัย ":1671,"อสา":832,"อัง":3552,"อาก":1660,"อัน":2487,"ัว ":1389,"อัก":2043,"าง ":4454,"อ่า":789,"าม ":1873,"าย ":3426,"าณ ":1847,"อเม":2930,"อเป":1207,"าน ":3701,"อเส":824,"อเร":1957,"อเช":951},"n_words":[7308152,7320273,4252865],"name":"th"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/tl b/nlp_resource_data/langdetect/profiles/tl

new file mode 100755 (executable)

index 0000000..d68c2b3
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/tl
@@ -0,0 +1 @@
+{"freq":{"D":3787,"E":3422,"F":1488,"G":3274,"A":19564,"B":7360,"C":4965,"L":4772,"M":7066,"N":4375,"O":1801,"H":3508,"I":8185,"J":1743,"K":5752,"U":1449,"T":5401,"W":1008,"V":1273,"Q":420,"P":11919,"S":10977,"R":3177,"Y":471,"X":334,"Z":463,"f":2669,"g":178562,"d":29955,"e":64572,"b":36938,"c":9968,"a":433329,"n":268000,"o":112013,"l":93919,"m":59846,"j":824,"k":54159,"h":28813,"i":161924,"w":15439,"v":3263,"u":56864,"t":84874,"s":113569,"r":58943,"q":542,"p":56795,"z":2158,"y":61992,"x":703,"Fil":231,"í":225,"é":238,"á":213,"ü":252,"ā":268,"Est":484,"Eng":311,"Ene":272," l":8609," m":26008," n":63523," o":8411," h":4923," i":23816," k":20018," d":6208," e":1886," f":514," g":3652," a":41230," b":12249," c":1285," y":708," u":3275," t":9124," w":1107," v":245," p":25524," s":37805," r":2505," J":1713," K":5573," H":3377," I":7706," N":3834," O":1616," L":4623," M":6813," B":6692," C":4389," A":18851," F":1372," G":3134," D":3532," E":3171," Z":430," Y":427," X":260," S":10261," R":2927," Q":408," P":11641," W":933," V":1041," U":1379," T":5149,"ا":230,"A ":369,"Da":1003,"Co":1457,"Ce":320,"Ch":481,"Do":225,"De":356,"Di":1582,"Fe":231,"Eu":252,"Es":754,"En":687,"Em":287,"Ge":222,"Ga":499,"I ":336,"Fr":271,"Fi":327,"C ":400,"Au":300,"Ar":695,"As":627,"Ba":3221,"Ay":1525,"Ag":601,"BC":219,"Ab":447,"Ad":339,"Am":736,"BN":257,"An":10961,"Ap":381,"Ak":282,"Al":957,"Bu":508,"Br":412,"Ca":1287,"Bi":744,"Be":555,"Bo":628,"Hil":266,"Ku":269,"Kr":232,"Ko":739,"Le":701,"Li":502,"N ":411,"La":1279,"Lu":1605,"Lo":364,"Me":527,"Mi":786,"Ma":4128,"Mu":386,"Mo":519,"Ni":284,"Ne":669,"Na":1605,"No":859,"Ok":212,"Ol":246,"Gi":363,"Gr":1131,"Go":284,"Gu":257,"Ha":926,"He":607,"Hi":663,"Ho":302,"Hu":700,"Im":288,"In":3099,"Ik":390,"Il":402,"Is":900,"It":1478,"Ir":258,"Ja":583,"Jo":504,"Ju":288,"Ka":3627,"Hap":403,"Ki":336,"Un":1045,"Tu":257,"Tr":725,"Ts":305,"To":291,"Th":568,"Ti":1025,"Te":370,"Ta":1438,"St":601,"Su":844,"Wi":302,"Wa":226,"Vi":399,"Va":226,"Pu":319,"Pr":784,"S ":239,"Pe":753,"Pa":3232,"Po":515,"Pi":5199,"Ph":519,"Or":329,"Se":751,"Sc":225,"Si":3949,"Sh":296,"So":479,"Sa":2369,"Re":1018,"Ri":307,"Ro":908,"Qu":349,"Ra":347,"Gre":522,"Gri":303,"b ":919,"a ":94136,"Sü":216,"Za":253,"i ":12212,"gd":846,"ge":1642,"ga":29192,"gb":804,"Ing":2213,"fi":306,"fo":309,"gy":549,"he":2166,"ha":12797,"gn":612,"gm":735,"gl":3792,"gk":3515,"gi":5586,"gh":1271,"gg":2300,"gu":3141,"gt":1399,"gs":3014,"gr":1126,"gp":2172,"go":3120,"dt":243,"du":1243,"dy":626,"g ":113439,"ea":1899,"eb":1161,"ec":685,"ed":1334,"de":3129,"di":5171,"do":3993,"Ilo":317,"dr":607,"ew":354,"ex":243,"eu":223,"ev":449,"ey":804,"ez":509,"fa":228,"h ":1453,"Ind":341,"fe":234,"eh":1900,"eg":2092,"ee":503,"el":4423,"ek":1455,"ei":449,"ep":1032,"eo":1328,"Imp":228,"en":9846,"em":2892,"et":2497,"es":7771,"er":9807,"ca":1369,"Ika":371,"e ":10545,"by":545,"br":2325,"bu":3483,"bo":2508,"bl":1265,"bi":7420,"be":1655,"da":7330,"f ":892,"cu":348,"ct":832,"cr":267,"co":1527,"ck":419,"ci":1114,"ch":1408,"ce":1134,"cc":244,"c ":679,"az":285,"ay":33855,"ba":16262,"d ":6470,"at":29146,"as":20781,"ar":16773,"aw":10001,"av":702,"au":1712,"ak":11317,"al":29791,"ai":3152,"ao":3807,"ap":8378,"am":13495,"an":101085,"ac":1440,"ad":5449,"aa":5389,"ab":8765,"ag":22898,"ah":9461,"ae":824,"nu":2551,"nt":6347,"ns":5167,"no":9894,"nn":491,"ny":2397,"of":922,"oc":711,"od":3388,"oa":467,"ob":1692,"om":3049,"on":28949,"ok":1666,"ol":5417,"oi":423,"og":2491,"oh":829,"ot":1896,"os":4877,"ov":562,"ou":1121,"op":3555,"oo":5633,"or":6366,"r ":5313,"ow":489,"oy":1082,"pe":2826,"pa":25955,"pl":758,"po":5042,"ph":416,"pi":11096,"lo":7059,"lm":376,"ll":2040,"ls":232,"lp":216,"lu":4808,"lt":766,"ly":1884,"o ":35938,"ma":23630,"mb":3361,"mg":9391,"me":3130,"mi":5214,"mm":511,"mp":3371,"mo":2915,"mu":4995,"p ":2357,"na":50921,"nc":944,"nd":5861,"ne":3717,"ng":124226,"ni":11322,"nl":1083,"ki":5108,"kh":660,"ke":653,"kb":240,"ka":28062,"m ":2592,"ky":234,"ks":1102,"kt":1232,"ku":3832,"ko":5455,"kr":397,"kl":2445,"li":16771,"le":7298,"ld":536,"la":41710,"lb":521,"n ":41545,"hr":229,"hu":1485,"hi":7425,"hn":275,"ho":2024,"id":2936,"ic":2306,"ib":3763,"ia":4281,"ih":1209,"ig":7316,"if":239,"ie":1122,"hy":251,"k ":4018,"ir":2572,"is":21654,"it":15297,"iu":510,"iv":697,"iw":1139,"ii":644,"ik":10689,"il":18574,"im":4535,"in":32448,"io":2262,"ip":8303,"iz":356,"iy":6303,"l ":8749,"ja":305,"z ":468,"wi":4192,"wo":282,"ws":238,"y ":26266,"wa":7331,"we":663,"vi":929,"vo":293,"uz":222,"uy":540,"uw":938,"uu":278,"ve":1065,"va":717,"x ":270,"ui":867,"uk":2223,"ul":10829,"ue":1048,"ug":2281,"uh":1222,"ur":4663,"us":3997,"ut":2718,"um":4571,"un":11124,"uo":841,"up":1559,"ty":936,"tu":5349,"tt":423,"tw":245,"ub":2276,"ua":2314,"ud":744,"uc":353,"w ":2307,"to":12113,"tn":494,"tl":670,"ts":639,"tr":2947,"te":5598,"ti":12280,"th":1499,"ta":23206,"su":2916,"ss":697,"st":7061,"sy":6204,"sl":357,"sk":592,"sm":643,"sp":948,"so":5456,"sc":415,"se":6243,"sh":1082,"si":9719,"u ":1011,"sa":50153,"rr":466,"rs":1459,"rt":2397,"ru":1218,"ry":2421,"rp":226,"ro":6522,"rn":1128,"rm":690,"rl":534,"rk":708,"ri":11065,"rg":616,"re":7270,"rd":981,"rc":435,"rb":496,"ra":14328,"t ":17794,"qu":482,"s ":20143,"py":238,"pt":257,"pu":5533,"pp":386,"pr":1443,"Hul":231,"za":466,"zo":453,"ye":2458,"ya":16851,"yb":251,"yu":966,"yt":264,"ys":635,"yr":507,"yo":11759,"yn":980,"Ara":274,"Apr":218,"Asy":352,"Ayo":1506,"Bag":274,"Ban":328,"Bay":1488,"Bat":317,"Abr":300,"Adi":220,"Ago":307,"BN ":257,"Ale":251,"Alt":229,"Ame":579,"Ang":10467,"Car":262,"Bib":239,"Com":233,"Col":584,"Dis":592,"üd":216,"Nat":309,"New":234,"Nag":476,"Nor":372,"Nob":295,"Pin":243,"Pil":4628,"Phi":298,"Per":237,"Pas":236,"Par":288,"Pag":236,"Pan":1029,"Pam":480,"Pal":436,"Pro":250,"Pra":302,"Que":246,"Isa":506,"Ita":502,"Ito":837,"Jam":280,"Jos":222,"Kab":349,"Kag":252,"Kal":869,"Kan":214,"Kat":442,"Kas":598,"Kar":312,"Kon":276,"Leo":280,"Lat":224,"Lun":1069,"Man":377,"Mal":287,"Mar":952,"May":1105,"Mat":220,"Min":268,"Süd":214,"Zam":212,"一":621,"一一":311,"Sur":412,"Sta":256,"Tag":668,"Siy":420,"Sil":411,"Set":224,"Si ":2472,"Sam":277,"Sal":320,"San":795,"Sa ":539,"Rey":251,"Rep":257,"Rom":461,"Uni":749,"The":357,"Tim":465,"Tin":234,"Tre":501,"Tsi":277,"bis":856,"bit":236,"bil":2673,"bin":1170,"big":1028,"bo ":562,"bli":613,"bla":330,"bol":310,"bon":590,"ban":3506,"bak":379,"bal":915,"bag":1029,"bah":2518,"bae":247,"bab":1067,"bay":2496,"baw":340,"bat":1046,"bas":870,"bar":318,"bi ":266,"ber":655,"bel":279,"bib":346,"bid":220,"ca ":245,"can":254,"ce ":518,"bri":332,"bra":317,"bre":1519,"buo":641,"bul":259,"bun":360,"bum":222,"buh":438,"but":324,"bye":332,"aka":4840,"am ":867,"aki":2053,"akh":385,"al ":4667,"ail":347,"ain":942,"ais":377,"ak ":1818,"aig":364,"ahi":1712,"ahu":567,"aho":893,"aha":5905,"agk":1800,"agl":815,"agm":409,"agg":261,"agh":599,"agi":2389,"ags":1175,"agt":890,"agu":765,"ago":873,"agp":2086,"anu":622,"any":1534,"ano":2033,"ant":1925,"ans":2023,"ane":370,"ang":54220,"ani":2704,"anl":847,"ap ":1248,"ana":5631,"anc":342,"and":2377,"amu":411,"amo":399,"amp":1054,"ami":2241,"ame":660,"amb":1287,"ama":6244,"ao ":2560,"aly":681,"alu":885,"alo":1422,"alm":256,"all":403,"ali":4268,"ale":940,"ala":15201,"alb":295,"an ":25875,"aku":295,"akt":378,"ako":347,"akl":349,"aba":5223,"abe":285,"abi":1848,"abo":410,"abu":558,"ae ":378,"aca":217,"aaa":370,"aan":2340,"aal":228,"aas":476,"aar":1310,"ad ":1547,"aga":6180,"agb":737,"agd":421,"ado":1685,"adi":344,"ade":215,"ag ":2962,"ada":1065,"ayo":1284,"ayn":676,"ays":426,"ayr":437,"ayu":244,"ayb":228,"aya":7316,"ba ":1007,"at ":11450,"are":434,"ard":406,"arc":218,"ara":6094,"aro":928,"arl":281,"ark":258,"ari":2740,"ars":347,"art":1318,"asa":4127,"ary":1252,"asi":1053,"ase":1923,"aso":401,"aon":1107,"ar ":1507,"apa":4168,"api":926,"apo":968,"apu":525,"as ":8041,"avi":292,"ay ":22870,"awa":4711,"awi":3261,"ata":9859,"asu":540,"ast":1119,"asy":2814,"atl":420,"ato":908,"ate":772,"ati":3674,"ath":277,"aw ":1870,"atu":1238,"aun":478,"aug":319,"itn":327,"ito":5794,"itu":365,"ity":376,"üdt":214,"ism":492,"ist":2727,"isy":1436,"ita":2453,"ite":522,"iti":1834,"iwa":1084,"ius":321,"ive":465,"ipo":219,"ipp":279,"ipi":5953,"is ":1468,"ion":1384,"ipa":894,"iro":325,"iri":318,"isi":1179,"ish":487,"ise":486,"isa":12350,"ire":294,"ira":1126,"it ":3154,"iyo":1973,"iya":3692,"iye":538,"kik":335,"kil":1497,"kin":1435,"kip":244,"kit":510,"ki ":400,"kha":550,"koy":367,"kop":294,"kon":1105,"kom":504,"kol":972,"ko ":1504,"kla":2111,"kay":687,"kat":4082,"kau":481,"kar":1361,"kas":2326,"kap":1524,"kan":3294,"kal":2502,"kam":987,"kak":1114,"kah":780,"kai":697,"kag":306,"kad":618,"kab":2686,"kaa":306,"ka ":4085," Ga":496," Ge":219," Fr":268," Fi":324," Ha":926," He":600," Go":283," Gr":1130," Gu":254," Gi":361," Hu":696," Ho":300,"ha ":509," Hi":657," Ja":583," Ir":258," Is":900," It":1471," Im":284," In":3092," Ik":389," Il":402,"ham":439,"han":3970," Ka":3625,"hal":1546," Ki":335,"har":726,"has":418,"hat":613," Jo":502," Ju":282,"hah":296,"hag":932,"hab":327," La":1273," Le":698," Li":486," Ko":738," Kr":232,"hay":2401," Ku":269," Ma":4113," Mi":784," Me":525,"he ":726," Lo":362," Lu":1602," Ne":660," Na":1601," Ni":283," Mo":516," Mu":380,"her":391,"hen":301,"hi ":256," Ap":381," Am":732," An":10945," Ak":280," Al":957," Ag":601," Ad":338," Ab":444," Ba":3213," Ay":1524," Au":300," As":625," Ar":684,"hig":262," Be":553," Bi":731,"hip":233,"hin":2102,"him":317," Bo":624,"hil":1124," Br":411," Bu":508,"hit":252,"hiy":1822," Ca":1275," Ce":318," Ch":477," Co":1435," Da":1001," Di":1559," De":354," Do":218," Es":753," En":683," Em":287," Eu":251," Fe":229,"gma":404,"go ":1131," Sü":216,"gle":2258,"gli":561,"gla":763," Wi":299,"gko":362," Wa":224," Za":253,"gna":429,"gmu":288,"gpu":1231,"gpa":814,"gon":726,"gos":419,"gor":456,"gsa":661,"gsi":395,"gra":396,"gre":308," Or":329," Po":510,"gui":242," Pi":5194,"gum":355," Ph":516,"gul":619," Pe":751," Pa":3220,"gsu":217,"gso":1676," No":856," Ol":245," Ok":212,"gta":876," Ra":342," Qu":347," Ro":902," Re":1013," Ri":307," Pr":781,"gus":319," Pu":319,"gun":828," Su":840," St":567," Ta":1427,"gya":341," Th":565," Ti":1025," Te":369," Tr":725," Ts":304," To":289," Sa":2358," Sh":292," Si":3946," Sc":224," Se":748," So":478," Va":226," Vi":396," Tu":256," Un":1043,"ial":214,"ian":1841," ip":821," im":295," in":1205," ik":2724," il":559,"ic ":391," is":12317," it":4118,"ibl":298,"ibi":852," ka":12843,"ibo":539," ki":1772,"id ":624,"iba":1373,"ibe":241," ha":2342," he":266," gi":1221," gr":273,"ia ":1668," gu":642," ib":1270," hi":1628," hu":502," ni":3203," ng":29501," ne":259,"ien":313," na":26725," mu":2405,"ig ":1084," mo":343," of":748," no":3658," le":482,"ict":361," li":1169," la":4719," ku":1926,"ich":298," kl":1518,"ica":454," ko":1618," me":432," mg":9378," mi":835," o ":6532,"ido":754," ma":12562," lu":1621,"ide":429,"ida":693," lo":567," ag":287," aa":479," an":11487," ap":351," ak":588," al":688," aw":224," ar":1782," at":8491,"iit":294," as":396," ba":6398," ay":15608,"il ":1019," bi":3286," be":279," bo":263," bl":303," bu":1574," ca":215,"im ":631,"ika":5845,"igd":340,"ige":351,"iga":3450,"igm":226,"igi":608,"iha":656,"ihi":488,"ik ":530,"imo":757," es":477," em":253,"imp":821," el":218,"ime":284," ek":212,"imi":396,"ip ":377,"ind":1253,"ina":12544,"imu":413,"ino":1637,"int":735,"ins":494,"ine":925,"ing":7215,"ini":1991," ga":1201,"inu":1256,"iko":1955," co":595,"iki":665,"ila":8566,"in ":3710," da":2205,"iku":803,"iks":322,"ilo":675,"ill":466," de":1318,"ili":6551," di":1938,"ima":551,"imb":583,"io ":525,"ily":587," du":359,"hol":219,"hon":897," ye":394,"hul":550," sa":28976," se":2207," si":4160," so":365," t ":331," re":1237," ri":835," pu":1239," pr":891," s ":272,"hum":213," op":300," or":499," pe":850," pa":17344," pl":235," po":2264," pi":2508," wa":403," wi":495," tu":1581," ur":1186," up":417," um":236," un":787," ta":3807," su":1227," tr":542," th":440," ti":1513," te":623,"eyn":227,"eta":377,"eti":227,"esp":220,"eso":216,"est":873,"ess":243,"esy":238,"eto":217,"etr":240,"ety":252,"ey ":259,"er ":1701,"es ":4679,"epu":270,"eri":1261,"ere":336,"era":1743,"erb":250,"et ":365,"esi":397,"ery":584,"ert":284,"ers":877,"ern":685,"erm":213,"ero":994,"eks":315,"ekt":395,"en ":605,"ela":487,"ele":614,"eli":929,"ell":447,"eo ":676,"emb":1031,"ema":617,"eme":244,"emo":250,"emi":251,"emp":247,"ene":540,"eng":2031,"ena":338,"end":702,"enc":265,"ens":1983,"ent":2381,"ego":798,"ege":495,"ehi":1394,"el ":1262,"eka":275,"gka":2797,"git":1186,"gis":283,"gil":253,"gin":2170,"gha":696,"ggi":233,"gga":1607,"gi ":930,"gen":305,"gda":265,"gdi":448,"ge ":872,"gbi":301,"gba":311,"gag":358,"gah":212,"gas":721,"gar":389,"gat":1029,"gaw":951,"gay":1184,"gam":1160,"gal":2016,"gan":8911,"gap":698,"ga ":10893,"da ":1215,"de ":630,"dad":487,"daa":229,"dal":1278,"dai":337,"dag":294,"dah":789,"dat":526,"dar":615,"dan":716,"dam":364,"cti":321,"co ":247,"com":405,"ch ":248,"cha":297,"cia":252,"ck ":232,"che":214,"ed ":364,"ebr":372,"ean":218,"ear":435,"eap":318,"ea ":269,"ega":369,"edi":305,"dya":287,"dor":701,"don":511,"dos":801,"dti":217,"dul":227,"duk":249,"dia":320,"der":377,"des":307,"del":555,"dek":245,"den":404,"di ":862,"do ":1342,"diy":230,"din":1025,"dis":689,"dit":243,"dig":691,"rga":250,"ri ":1883,"res":909,"rea":358,"reg":609,"reh":760,"ren":823,"rel":229,"rer":338,"re ":1785,"raw":1292,"rd ":279,"rap":411,"ras":562,"rat":708,"rag":342,"ran":3019,"ram":928,"ral":1710,"rab":295,"raa":290,"rad":734,"rs ":234,"ros":487,"rot":255,"ron":875,"roo":656,"rop":486,"rod":249,"rol":389,"rna":412,"rne":221,"ro ":2000,"rma":307,"riy":449,"rit":895,"ris":641,"rig":228,"ril":614,"rik":932,"rin":2120,"ria":1493,"ric":296,"rie":322,"rk ":265,"rya":408,"rup":247,"rus":239,"ry ":568,"rsi":370,"rso":362,"rte":436,"rti":1012,"saa":543,"sab":432,"sag":340,"sah":246,"sak":534,"sal":1402,"sam":846,"sap":373,"san":13859,"sas":644,"sar":588,"say":842,"sa ":29066,"ryo":1185,"shi":267,"si ":909,"siy":1352,"sid":252,"sia":279,"sit":454,"sis":871,"sip":588,"sin":2020,"sil":886,"sim":530,"sik":667,"sig":229,"se ":703,"ser":748,"ses":480,"sh ":511,"sen":3264,"spe":263,"spa":238,"son":453,"sod":1704,"st ":395,"ss ":216,"sla":213,"smo":455,"so ":2182,"sye":492,"sya":1319,"syo":3972,"syu":310,"ste":723,"sta":2233,"sto":847,"sti":1338,"str":1197,"sub":226,"sul":377,"sum":409,"suk":222,"sun":606,"sus":423,"tak":378,"tal":1825,"tag":2356,"taa":516,"tab":313,"tad":729,"tay":910,"taw":1462,"tat":2169,"tas":1294,"tar":426,"tap":454,"tao":2928,"tan":3973,"tam":283,"te ":1421,"ta ":2620,"pa ":793,"par":2056,"pat":1511,"pas":473,"pay":244,"paa":836,"pab":282,"pag":5944,"pah":588,"pak":582,"pal":1553,"pap":964,"pam":2183,"pan":7375,"pi ":308,"per":1160,"pel":619,"pla":312,"pik":487,"pil":381,"pin":8127,"pis":517,"pit":481,"por":477,"pop":1613,"pos":639,"pon":910,"pol":471,"ppi":245,"po ":494,"pua":1158,"pub":334,"pri":362,"pre":283,"pro":666,"put":222,"pun":775,"pul":2278,"ra ":2777,"ngo":322,"ngi":702,"ngl":2579,"ngk":1509,"ngu":1141,"ngr":246,"ngs":1763,"ni ":1903,"nge":218,"ngg":1999,"ngh":357,"nga":5141,"nel":268,"ner":635,"net":328,"nes":773,"ng ":107416,"nce":344,"ne ":832,"ndu":336,"ndo":855,"ndi":1104,"nde":357,"nda":2122,"nak":3214,"nal":1987,"nam":1069,"nan":5284,"nao":281,"nap":1249,"nar":667,"nad":394,"nag":3410,"nah":1201,"nai":343,"nab":490,"nd ":696,"nau":287,"nat":2025,"nas":5521,"nay":628,"naw":416,"na ":22155,"nya":1473,"nul":380,"num":251,"nun":621,"nus":223,"nut":274,"nub":254,"nto":1170,"ntu":218,"ntr":369,"nti":1113,"nta":1619,"nte":1041,"nsy":292,"nso":1561,"nst":288,"nse":338,"nsi":398,"nsa":1639,"nt ":493,"ns ":315,"nod":444,"noo":3554,"nom":260,"non":1292,"nla":384,"no ":3206,"nlu":471,"nid":488,"nib":262,"nia":263,"niy":508,"niw":515,"niv":230,"nis":952,"nit":1670,"nim":327,"nin":947,"nik":402,"nil":1831,"ogr":242,"ohi":487,"ok ":961,"ol ":1113,"oby":351,"ode":251,"of ":743,"og ":1738,"ob ":490,"od ":2512,"obe":281,"nyo":656,"oto":243,"ost":571,"ota":234,"osi":296,"ose":518,"oso":256,"oy ":861,"oun":298,"opo":245,"opi":377,"ope":331,"os ":2497,"opu":1605,"oon":4273,"ook":406,"oob":454,"or ":1386,"ork":236,"orm":244,"oro":293,"ord":364,"ore":565,"org":297,"ori":875,"osa":225,"ort":603,"ory":303,"ot ":787,"ora":483,"ola":327,"on ":10511,"oli":798,"oll":560,"ole":828,"olo":1158,"ona":1108,"ond":258,"one":582,"ong":14203,"oni":381,"ono":505,"ons":429,"ont":363,"ony":253,"oma":662,"ome":348,"omi":484,"omm":343,"omp":399,"omo":218,"op ":492,"la ":7051,"le ":657,"laa":472,"lab":1035,"lad":618,"lah":600,"lag":1451,"lal":5124,"lak":2054,"lan":8966,"lam":1407,"lap":417,"lar":1484,"lat":1650,"las":3731,"law":4043,"lay":1014,"lba":235,"ld ":260,"kuy":222,"kun":1182,"kum":367,"kul":1301,"ksy":373,"ksi":218,"ktu":302,"kto":418,"lon":1346,"loo":506,"lor":222,"loh":474,"log":979,"los":411,"lto":257,"lug":603,"li ":620,"les":2603,"lem":360,"len":771,"leh":645,"leg":553,"lea":415,"lo ":1738,"lla":492,"lle":760,"ll ":320,"lit":1575,"lis":938,"lip":5499,"lin":1820,"lim":971,"liy":225,"lic":224,"lid":294,"lia":495,"lib":291,"lik":1738,"lil":346,"lii":293,"lig":604,"lih":227,"ma ":1406,"maa":656,"mab":485,"mah":1161,"mai":353,"mak":856,"mad":287,"mag":1774,"map":283,"mar":866,"mas":904,"mal":1856,"mam":1144,"man":4514,"may":4380,"mat":2454,"mba":1424,"mbr":990,"mbo":357,"me ":332,"med":215,"met":267,"mes":455,"mer":827,"men":704,"luk":293,"lup":252,"lun":1075,"lum":601,"lut":234,"lus":323,"lur":429,"lya":1073,"lyo":437,"mpi":405,"mpe":729,"mpo":376,"mpu":307,"mog":636,"mon":703,"mot":220,"mpa":1165,"mus":340,"mut":297,"mul":2510,"mun":975,"mga":9380,"min":1128,"mil":657,"mis":408,"mit":1305,"mik":544,"mo ":604,"mmu":273,"zon":372,"yun":445,"ysa":370,"yro":432,"yos":430,"yon":8581,"yea":322,"yeg":287,"yen":248,"yem":924,"ya ":5481,"yag":346,"yar":349,"yan":8889,"yal":500,"yo ":2106,"yna":235,"yni":658,"wit":490,"wig":2597,"wik":476,"wa ":1340,"wan":2646,"wal":851,"wak":321,"wat":242,"war":240,"wag":936,"ver":481,"ve ":253,"va ":222,"uya":331,"uwe":288,"uwa":445,"usi":309,"usa":537,"usy":243,"usu":363,"ust":456,"uti":335,"ute":297,"uta":417,"utu":829,"uto":373,"us ":1413,"ura":1051,"ure":277,"uri":1726,"uro":457,"uny":224,"uon":264,"upa":766,"ur ":441,"upo":374,"ump":364,"umu":925,"umi":469,"uma":1769,"umb":268,"uly":233,"uo ":439,"unt":520,"unu":378,"uni":882,"uno":1281,"und":796,"una":2530,"ung":3950,"uku":387,"uko":610,"um ":503,"uka":453,"ulu":1033,"ult":291,"ulo":1229,"uli":710,"ula":6857,"uin":299,"ugn":288,"uga":1124,"uha":936,"ubo":309,"ubr":217,"ubu":535,"ue ":245,"uez":255,"uan":1897,"ubi":307,"ubl":385,"uba":263,"tye":228,"ty ":596,"tur":814,"tut":349,"tul":834,"tuk":407,"tun":595,"tum":594,"tub":614,"tra":868,"tri":838,"tro":879,"to ":7587,"tna":387,"tom":241,"ton":2028,"tol":467,"tor":922,"til":777,"tik":1397,"tig":242,"tir":721,"tit":772,"tis":888,"tin":3062,"tim":775,"tip":230,"tio":1023,"tib":304,"tid":407,"tiy":331,"tlo":414,"tem":511,"ten":352,"tel":392,"th ":280,"tes":226,"ter":1925,"ti ":555,"the":612,"tha":237},"n_words":[2110634,2489828,1864789],"name":"tl"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/tr b/nlp_resource_data/langdetect/profiles/tr

new file mode 100755 (executable)

index 0000000..ed84e9b
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/tr
@@ -0,0 +1 @@
+{"freq":{"D":20468,"E":16965,"F":15212,"G":16978,"A":46399,"B":33447,"C":16597,"L":11614,"M":29609,"N":10463,"O":12233,"H":17902,"I":6437,"J":5964,"K":30894,"U":5006,"T":27864,"W":5022,"V":7330,"P":19836,"S":32251,"R":13212,"Y":12032,"Z":2430,"f":59766,"g":101779,"d":361700,"e":798707,"b":176889,"c":86462,"a":1029574,"n":718304,"o":261031,"l":623439,"m":275808,"j":10372,"k":334154,"h":85819,"i":785261,"w":7611,"v":106213,"u":275921,"t":334795,"s":315971,"r":646028,"q":1181,"p":80800,"z":105754,"y":255623,"x":3410,"²":1600,"Ç":4715,"Ü":2583,"Ö":3499,"î":1753,"é":1705,"ç":69086,"â":3605,"ü":157542,"ö":58180,"ğ":69432,"ı":399538,"İ":20804,"ş":111613,"Ş":5518," l":7220," m":26771," n":32386," o":55007," h":21337," i":59190," k":78855," d":82641," e":36355," f":17358," g":42591,"р":1499," a":67898,"с":1190," b":117978,"т":957," c":5691," y":76657," z":4398," u":11497," t":53835," v":55379," p":15531," s":57822," r":8817," J":5925," K":30750," H":17802," I":6388," N":10386," O":12150," L":11515," M":29458," B":33275," C":16419," A":46229," F":15133," G":16827," D":20328," E":16897,"л":1101," Z":2402,"к":1097," Y":11995,"и":2029,"о":2304,"н":1554," S":31973,"в":1191," R":13120," P":19696,"а":2550," W":4958," V":7287,"е":1678," U":4989," T":27682," ç":18143," ö":14326," ü":15280," Ç":4706," Ö":3487," Ü":2581," ı":6625," İ":20776," ş":10933," Ş":5509,"ي":1130,"ل":1185,"ن":965,"ا":2068,"A ":2665,"Da":3349,"Cu":2398,"Co":2920,"Ce":1839,"Ch":2248,"Do":2641,"De":6139,"Di":2765,"Fe":2199,"Fa":2297,"Ey":1528,"Er":2288,"Es":1409,"En":2354,"El":1347,"Ek":1512,"Ağ":1411,"Ge":3246,"Ga":2711,"Bü":1152,"I ":2251,"Bö":1239,"Fr":4109,"Fo":1223,"Fi":2407,"B ":1016,"C ":1464,"Av":3034,"Ar":5311,"At":1678,"As":1760,"D ":3341,"Ba":8245,"Ay":1472,"Af":1012,"Ab":1087,"Ad":2089,"Am":3467,"An":5110,"Ak":1709,"Al":8286,"Bu":5544,"Br":2172,"Ca":3556,"Bi":5816,"Be":4617,"Bo":2818,"Ku":5039,"Gö":1216,"Kr":1520,"Ko":3871,"Le":1961,"Li":3051,"Gü":3195,"La":2872,"Lo":1867,"Me":5154,"Mi":3881,"Ma":11016,"Mu":2388,"Mo":2807,"Ni":2456,"Ne":2578,"Na":1799,"P ":982,"No":1828,"Ok":1001,"Ol":1014,"Oc":1091,"Gi":1074,"Gr":2260,"Go":1145,"Ha":7669,"He":2472,"Hi":1980,"Ho":2868,"Dü":2155,"In":1300,"Ja":2318,"Jo":1650,"Ka":10073,"M ":1217,"Ki":1749,"Ke":1920,"Ul":1131,"Tu":1634,"Tr":1523,"To":2482,"Th":2500,"Ti":1452,"Te":4160,"Ta":4376,"V ":1033,"St":3068,"Su":2048,"Wi":1541,"Wa":1156,"Vi":2393,"Va":1979,"Ve":1434,"Pr":2222,"S ":1244,"Pe":2037,"Pa":5059,"Kü":1314,"Po":6260,"Pi":1406,"Os":2243,"Or":2331,"Kö":1204,"Se":4094,"Sc":1963,"Si":3179,"Sh":999,"Sp":940,"So":3019,"Ru":3002,"Sa":7772,"Re":2740,"Ri":1066,"Ro":3278,"Kı":2617,"Ra":1699,"Mü":1802,"b ":1498,"a ":176902,"Ye":1685,"Tü":7528,"Ya":4483,"Yo":1238,"Yu":2386,"bö":7750,"i ":134793,"ge":29308,"ağ":18792,"bü":7597,"ga":7799,"fl":1604,"ff":1053,"fi":9181,"fr":3323,"fu":6151,"ft":2020,"fo":3898,"he":11849,"ha":24248,"cü":3165,"gl":1385,"gi":16689,"gh":1246,"gu":5867,"gr":6009,"go":2702,"du":14570,"dy":2265,"g ":6921,"ea":4032,"eb":5387,"ec":6696,"ed":29273,"de":95875,"dd":2946,"di":58969,"dl":2907,"do":12268,"dr":3254,"ew":1275,"eu":1087,"ev":14605,"ey":30087,"ez":8326,"fa":8497,"h ":5851,"fe":6028,"eh":5944,"eg":3973,"ef":3639,"ee":1898,"el":56887,"ek":41993,"aç":5873,"ei":4450,"ep":3432,"eo":3151,"en":100462,"em":27716,"et":47279,"es":52024,"er":131153,"ca":18661,"e ":189222,"br":3182,"bu":23594,"bo":7205,"bl":1863,"bi":65897,"be":19027,"da":97587,"f ":5395,"cu":6717,"ct":1404,"cr":1803,"co":3001,"ck":3008,"ci":12969,"ch":6435,"ce":16707,"c ":1516,"az":19018,"ay":42685,"ba":34644,"d ":11781,"at":38962,"as":49850,"ar":157561,"av":10941,"au":2292,"ak":57066,"al":73836,"ai":6322,"aj":1917,"ap":20439,"am":37584,"an":184545,"ac":10260,"ad":37380,"aa":2080,"ab":13369,"ag":3250,"ah":16357,"ae":2002,"af":13702,"nu":18358,"nt":17557,"ns":13849,"nr":3598,"no":7521,"nn":3019,"nz":1833,"ny":12611,"hı":1036,"iğ":11558,"of":3409,"oc":3225,"od":4927,"ob":3075,"om":13542,"on":43168,"ok":10977,"kç":3190,"ol":59486,"oi":1195,"oj":3578,"og":3689,"oh":1112,"m²":1589,"ot":7864,"os":10639,"ov":4695,"ou":3530,"op":8235,"oo":1893,"or":32678,"r ":170339,"ow":1477,"oz":2422,"kö":4303,"oy":8048,"pe":6240,"kü":6018,"pa":15264,"pl":7151,"pm":1729,"po":6918,"ph":1672,"pi":5799,"lç":5891,"lo":10260,"lm":31640,"ll":28962,"ls":3083,"fı":9951,"lu":32826,"lt":9362,"ly":5619,"o ":14289,"mc":2028,"md":5246,"hü":1497,"ma":67090,"mb":3028,"eş":11455,"mh":2240,"me":45214,"iç":12229,"ml":15462,"mi":33126,"mm":2739,"mp":4822,"mo":6599,"ms":3576,"mu":11366,"gı":2023,"my":1356,"p ":11912,"na":47407,"nb":3039,"nc":18538,"nd":102802,"ne":51401,"nf":1257,"ng":12672,"ni":53546,"nk":5100,"nl":28972,"nm":10641,"dı":37931,"ki":41309,"eğ":6350,"ke":25138,"ka":45970,"m ":32916,"ky":1403,"gö":9935,"ks":8093,"kt":21304,"ku":22990,"ko":13147,"kr":3294,"kk":1610,"kl":25669,"km":5012,"kn":1431,"li":71923,"lk":10655,"le":122046,"ld":15695,"lg":12154,"lf":964,"la":144587,"gü":8845,"lc":1649,"lb":3731,"n ":243619,"hr":4105,"bı":1392,"ht":2266,"hu":4538,"hi":16646,"hn":1197,"ho":2852,"hl":3714,"hm":1584,"id":21397,"ic":9431,"ib":5859,"dü":13708,"ia":8374,"ih":7127,"ig":4782,"aş":24105,"if":4811,"ie":4567,"dö":3928,"k ":83031,"ir":107452,"is":40125,"it":22698,"iu":1112,"iv":5224,"cı":8286,"ii":1103,"ij":1085,"ik":40360,"eç":4865,"il":86504,"im":27783,"in":138518,"io":5300,"ip":8100,"je":1713,"ji":4659,"iz":21969,"iy":28989,"l ":42232,"ja":1035,"tç":1871,"ğlu":963,"rı":43249,"sö":2312,"z ":18621,"sü":4852,"oş":947,"wi":1830,"sç":1475,"pı":8243,"vv":1012,"vy":1154,"y ":15418,"wa":1412,"rü":8173,"ğla":2983,"vl":4067,"rç":3762,"vi":9343,"nş":1036,"vu":2607,"vr":5677,"vo":1424,"uz":10570,"uy":4807,"uv":2249,"ve":59460,"va":15490,"x ":1999,"ui":1273,"uk":6582,"ul":36937,"ue":3947,"oğ":10206,"ug":2004,"ğit":1160,"uh":1861,"ur":35478,"ğin":3622,"us":21592,"ut":7622,"um":12243,"un":39859,"up":6378,"ty":1262,"tu":11035,"tt":7164,"nı":44489,"ub":5081,"ua":1900,"ud":5831,"uc":4903,"w ":1459,"to":17485,"tm":6406,"tl":13327,"ts":2378,"tr":10162,"te":52587,"pç":1206,"tk":2785,"ti":53158,"th":4577,"v ":2143,"tb":2499,"ta":68248,"su":11906,"sv":930,"ss":3728,"st":34655,"sy":6610,"mı":21024,"sw":1258,"sl":8558,"sk":7111,"sn":1024,"sm":5546,"sp":5003,"so":11037,"sc":1810,"se":25089,"sh":2001,"si":63532,"rz":1185,"u ":42818,"sa":39618,"nü":10410,"rr":2777,"rs":8400,"rt":19992,"ru":21036,"rv":1617,"lı":53226,"ry":3722,"rp":1215,"ro":18824,"rn":5785,"rm":14124,"rl":25348,"rk":23839,"nç":1698,"rj":994,"ri":89903,"rh":1272,"rg":7962,"rf":1575,"re":45975,"rd":23811,"rc":3479,"rb":3537,"mü":12609,"ği ":7346,"ra":89176,"t ":36013,"kı":16796,"ğer":2592,"iş":22142,"lü":12025,"s ":31408,"pt":3720,"pu":1218,"pr":5018,"ps":1980,"ğun":2143,"ğus":1809,"ğum":954,"zı":6859,"zü":1410,"yı":27954,"yü":11063,"ğu ":5997,"zö":2165,"ğlı":7152,"ğre":1586,"uş":11784,"yâ":1423,"yö":4435,"ğiş":1548,"uğ":6358,"zg":2173,"rş":2653,"zi":13389,"zc":5363,"zd":2307,"ze":21253,"za":12673,"tı":26503,"zy":1958,"zu":2578,"zo":2918,"zm":3788,"zl":5972,"yg":2621,"ye":34397,"yd":5178,"tü":11302,"ya":90167,"yb":1899,"tö":1628,"sı":43368,"yv":967,"yu":8007,"ys":1270,"yr":3168,"yo":15057,"yn":5330,"ym":1117,"yl":13689,"yk":1018,"uç":1551,"yi":7908,"² ":1587,"ğı ":9316,"ğın":3141,"Çi":1350,"Ça":1454,"âl":1634,"Ün":1471,"Öz":1299,"çı":7148,"çü":4634,"î ":1515,"ço":5321,"çm":1158,"çl":3470,"çi":13394,"çe":14606,"ça":9969,"ç ":5469,"üş":3641,"üğ":1561,"üç":3404,"ün":27564,"üm":12601,"ül":12799,"üs":5069,"ür":27019,"üp":974,"üt":2907,"üz":15474,"üy":5418,"üc":2767,"üf":4960,"üd":3476,"öğ":1623,"ük":10837,"ü ":14372,"öy":3363,"öz":7289,"ör":11792,"ös":1882,"ön":12083,"öl":13407,"ök":2015,"ğ ":1364,"ğu":12250,"ğr":3621,"ğe":3325,"ğd":979,"ğa":3324,"ğl":11320,"ği":14714,"ğı":15723,"ğü":1540,"ığ":10531,"ış":19918,"ı ":102139,"İn":4248,"İm":1089,"İl":1863,"ın":99943,"ım":16215,"ıp":1333,"ıl":42504,"ık":17416,"ıf":1477,"ıb":1069,"ıd":8898,"ıc":3354,"İt":1484,"İr":962,"İs":7391,"ıy":7698,"ız":6701,"ıs":11942,"ıt":2777,"ır":43940,"şt":19651,"şu":2786,"şi":12692,"şl":8667,"şk":4781,"şm":7053,"şa":13302,"şe":7623,"Şu":1166,"Şa":2212,"Şe":1159,"ş ":18890,"şı":10920,"şü":1419," Ga":2694," Bü":1149," Ağ":1410," Ge":3227," Bö":1239," Fo":1207," Fr":4097," Fi":2393," Ha":7643," He":2466," Go":1137," Gr":2225," Gi":1067," Dü":2153," Ho":2861," Hi":1971," Ja":2310," In":1293," Ka":10047," Ke":1904," Ki":1728," Jo":1635," Gü":3188," La":2844," Le":1944," Li":3034," Ko":3863," Kr":1518," Ku":5027," Gö":1215," Ma":10963," Mi":3862," Me":5127," Lo":1852," Ne":2557," Na":1779," Ni":2442," Mo":2790," Mu":2369," Am":3459," An":5094," Ak":1699," Al":8247," Af":1009," Ad":2080," Ab":1075," Ba":8219," Ay":1468," Av":3029," At":1662," As":1754," Ar":5292," Be":4599," Bi":5804," Bo":2788," Br":2154," Bu":5532," Ca":3515," Ce":1828," Ch":2228," Co":2886," Cu":2385," Da":3328," Di":2745," De":6126," Do":2600," El":1340," Ek":1512," Es":1407," Er":2282," En":2338," Ey":1526," Fe":2195," Fa":2283," Wi":1521," Wa":1142," Yu":2383," Yo":1225," Tü":7497," Ya":4478," Ye":1681," a ":1377," Kö":1204," Os":2235," Or":2329," Po":6216," Pi":1404," Pe":2029," Pa":5030," Kü":1311," No":1821," Ol":1012," Ok":999," Oc":1088," Ra":1684," Mü":1801," Kı":2610," Ro":3268," Re":2723," Ri":1059," Pr":2202," Su":2042," St":2970," Ta":4359," Th":2488," Ti":1443," Te":4145," Tr":1513," To":2461," Ru":2999," Sa":7750," Sh":981," Si":3171," Sc":1940," Se":4078," So":2983," Va":1973," Ve":1421," Vi":2383," Tu":1612," Ul":1129," im":933," in":10201," ik":4076," il":20054," is":5383," ka":23639," ki":5592," ke":6355," eğ":1244," dı":1534," ha":11824," he":4045," gi":4830," gr":3277," dö":3823," dü":5597," id":1007," aş":930," hi":2120," ni":6427," nd":4080," ne":3117," na":1507," mu":1078," mo":2617," ok":1391," ol":35321," on":2426," of":1496," nu":1915," no":1087," le":1165," li":3624," gü":6331," la":1347," ku":18460," gö":9671," km":3282," ko":8297," me":8098," mi":3110," iç":9390," hü":1380," ma":6917," ad":10374," am":2886," an":8617," ai":1874," ak":2362," al":14807," ar":9911," at":1881," as":2441," d ":1776," ba":23010," ay":4744," bi":52706," be":11424," bo":2633," bu":15855," ca":1538," e ":1478," er":1285," et":4327," es":3101," en":6931," el":3328," ek":1681," aç":1971," fe":1277," fa":4483," ey":3388," ev":1229," fu":1090," fr":1901," fo":1846," fi":5962," ağ":1494," ge":16462," bü":4020," ga":1355," i ":957," bö":7468," ci":1625," da":23947," do":7108," de":26605," di":9385," ed":6003," du":2416," za":2588," yo":2330," sı":5340," ye":11522," tü":4969," ya":37365," sö":2004," sü":3398," yı":13517," yü":5685," yö":4365," nü":5050," sa":17392," se":8759," si":6463," sp":996," so":7754," kı":5201," ra":1785," mü":4235," re":3088," ro":2639," pr":3082," iş":3101," ot":2226," or":5403," oy":4366," kö":3581," pe":1612," kü":2525," pa":4625," pl":965," po":2333," pi":1501," va":2885," ve":49979," uz":2734," uy":2267," vi":955," nı":8640," tu":1641," un":2824," ul":1616," ta":27334," st":1812," su":2833," to":3720," th":1268," ti":1727," te":11491," İt":1484," İs":7380," İr":961," ın":4768," İn":4243," İl":1857," İm":1089," Çi":1348," Ça":1452," Öz":1296," Ün":1468," çe":4156," ça":3855," ço":4379," çi":1440," çı":3500," öl":1632," ön":4253," ör":1000," öz":4179," ür":2793," üs":1211," ün":2173," ül":2545," üz":3783," öğ":1347," üç":1563," şi":1684," şe":4952," şa":3567," Şa":2210," Şe":1156," Şu":1166,"İst":2249,"İta":1353,"İsp":2587,"İng":3329,"İmp":936,"ıca":1274,"ıda":1227,"ılı":15802,"ısa":1923,"ıra":2874,"ırl":2487,"ırm":2227,"ını":15301,"ımı":5151,"ıya":1118,"ırı":3492,"ız ":2539,"ıl ":1920,"ıdı":7570,"ık ":8525,"ıcı":2074,"ıla":12183,"ın ":32401,"ıld":3344,"ılm":5978,"ıll":2939,"ım ":3853,"ıka":2439,"ıkl":3108,"ıkt":1193,"ınl":2820,"ıp ":1063,"ınd":38554,"ına":8579,"ınm":927,"ıma":1331,"ımc":938,"ıml":3642,"ıs ":2159,"ır ":29899,"ıyı":1283,"ızı":1437,"ısı":6837,"ıyl":4411,"ızl":1239,"Çin":1025,"ığı":10132,"ış ":6594,"ışt":5588,"ışa":989,"ışm":2843,"ışı":2617,"Fil":1047,"Eyl":1137,"Eki":1120,"End":927,"Ağu":1069,"Gen":1382,"Gal":926,"Böl":1142,"Fra":3208,"II ":1197,"Haz":1331,"Her":938,"Hal":1040,"Har":940,"Dün":1753,"Hol":1623,"Ara":2840,"Avr":1832,"Bar":1214,"Bat":956,"BD ":2271,"Ada":1186,"Alm":3781,"Ame":2710,"Ana":1277,"Ant":1215,"Bu ":2451,"şıl":1169,"şık":1556,"şın":2204,"şı ":3074,"Bel":1045,"Bil":1105,"Baş":989,"Bir":3328,"Cum":2030,"Dev":1876,"Den":1321,"Cha":1022,"Doğ":1141,"Nis":1117,"Oca":1015,"Ort":943,"Osm":1787,"Par":1928,"Pro":985,"Por":4336,"Kan":1554,"Kas":1322,"Kar":2603,"Kon":1226,"Kra":1061,"Kur":1360,"Kuz":1311,"Gün":1851,"Mer":1131,"Man":1052,"Mar":3200,"May":1678,"Mil":1269,"çok":3674,"çeş":1324,"çla":1690,"çim":1273,"çin":7152,"Yun":1817,"Tür":7269,"Yar":1131,"Sta":1148,"Tem":1343,"Rus":2328,"Sch":1519,"Sav":1634,"San":1732,"Rom":1453,"Ulu":930,"The":1923,"ça ":3264,"çal":2740,"çe ":2241,"çes":2357,"çer":2328,"çev":1541,"çek":2799,"çi ":1341,"biy":1472,"bit":1248,"bir":44645,"bil":11436,"bin":1380,"baş":7621,"şa ":1752,"bol":2761,"şam":1786,"şan":2545,"şar":3761,"boy":1344,"ban":1510,"bak":1768,"bal":1557,"baz":1091,"bay":1165,"azı":4805,"bat":3330,"bas":2581,"bar":2153,"Şub":979,"bi ":3364,"ber":3678,"ben":1194,"bel":7483,"bes":1722,"bağ":8489,"şla":4267,"şle":3059,"şma":4426,"şme":1364,"ca ":7674,"car":1245,"can":1925,"cak":4366,"ce ":9532,"bra":943,"bu ":5377,"şek":1911,"şeh":2781,"şi ":1181,"bur":1706,"bul":11239,"bun":1530,"buc":1673,"şid":1048,"şit":1478,"şir":1286,"şin":976,"şim":2323,"şil":1127,"şik":1949,"şki":1113,"şke":1220,"şka":1851,"aka":3723,"am ":5523,"aki":6742,"adı":13553,"al ":11243,"ail":1471,"air":932,"ait":1247,"acı":3398,"ak ":21809,"abı":1039,"ahi":5926,"aha":4061,"anu":1353,"any":6219,"ano":1022,"ann":1052,"anm":4730,"ant":4742,"ans":6131,"ane":2508,"ang":2190,"ani":4880,"ank":1605,"anl":12840,"ap ":1282,"ana":13099,"anb":2176,"anc":4224,"and":10443,"aml":2039,"amp":1789,"ami":2705,"ame":1745,"ama":13353,"aly":2496,"afı":8781,"alt":4529,"alm":2361,"all":3746,"alk":2064,"ali":6616,"ald":2079,"ale":5493,"ala":14214,"alb":2602,"an ":84156,"aks":970,"akt":8868,"akk":924,"akl":5036,"aba":3739,"abe":1525,"abi":3498,"abu":1168,"ae ":1012,"aca":2693,"ad ":1275,"ştu":4101,"şti":6965,"afi":1276,"ştı":7023,"ah ":1276,"ado":1356,"adl":2298,"adi":1602,"add":1987,"ade":4140,"ady":944,"ada":8970,"azi":2552,"azl":1262,"atı":8461,"aze":1261,"arş":2391,"aza":4710,"az ":2571,"ayn":2937,"ayl":1883,"ayr":2190,"ası":27693,"arı":35962,"aya":12842,"ayd":1055,"aye":1092,"âle":1449,"ba ":1088,"ayı":12386,"akı":5066,"at ":6328,"are":4111,"ard":9709,"arc":1066,"ara":45240,"arm":1435,"arl":5899,"ark":6902,"ari":9836,"alı":12304,"ars":1642,"art":5112,"asa":4734,"ary":1104,"asi":2326,"ask":1950,"ar ":19629,"apa":2097,"apm":1588,"apl":1156,"apo":1295,"apt":1792,"as ":3211,"ava":5024,"avr":943,"arç":1233,"avi":1237,"ay ":3414,"avu":1433,"apı":7957,"ata":3117,"ast":3088,"asy":2788,"amı":5927,"atm":1041,"apç":991,"atl":2228,"atr":1379,"ato":2437,"ate":2841,"ati":5438,"att":1422,"anı":18491,"Üni":1310,"Şar":1029,"ji ":1449,"jis":950,"itl":1849,"öğr":1364,"ito":956,"cı ":3493,"ism":1455,"ist":11668,"ita":3780,"ite":3995,"iti":3699,"cıl":1667,"üfu":4682,"iva":1157,"ive":2871,"ilç":2129,"is ":3578,"ion":3161,"ir ":71671,"irm":3093,"irk":1523,"irl":6644,"iri":12312,"isi":13092,"ise":2824,"isa":3166,"ire":3266,"ira":2633,"ird":1851,"it ":3783,"ünl":1704,"üni":930,"ünc":1623,"ünd":2862,"üne":6398,"üml":1023,"üme":1310,"ült":1479,"ür ":5331,"üny":3730,"iyi":1021,"ül ":1714,"iyl":2890,"iyo":3774,"iya":6327,"iye":14035,"üdü":2475,"ük ":5512,"cıy":983,"iz ":6631,"üle":2031,"ülk":2638,"üll":944,"üks":1988,"ün ":4631,"izm":2299,"izl":1160,"izi":3733,"izc":3709,"ükl":1155,"iza":1126,"üm ":2100,"kim":2642,"kil":4381,"kiy":3334,"kiz":4406,"kin":3419,"kis":1066,"kit":1608,"km ":1709,"ki ":15464,"eğe":1107,"eği":4944,"kel":2350,"ken":7944,"kes":1806,"ker":2097,"ket":3347,"kez":3142,"ke ":1164,"kra":1708,"kiş":2390,"kse":2982,"klı":3118,"km²":1573,"kor":1133,"kon":4434,"kom":1557,"kol":1873,"kle":9482,"kla":8235,"kli":3448,"dız":949,"dıy":1257,"dır":19814,"dın":2440,"dı ":5738,"kaz":1358,"kay":2732,"kat":3255,"kar":8976,"kas":2234,"kap":1914,"kan":7174,"kal":4961,"kam":982,"kad":3577,"kab":1637,"dış":1185,"ka ":4513,"dığ":4219,"ha ":3071,"cü ":1507,"ham":1081,"han":2727,"hak":1219,"hal":3743,"hav":968,"har":3475,"has":1125,"hat":1036,"haz":1057,"hay":1743,"he ":3168,"her":2799,"hen":1336,"hem":1060,"hi ":1444,"hip":4144,"hin":2793,"hil":1264,"hir":2302,"hle":2209,"ağı":5375,"gru":2866,"gra":2358,"gul":1221,"gue":2075,"dül":2427,"ian":1361,"dün":2104,"dür":3900,"ibi":3243,"düz":2369,"id ":1061,"iba":969,"ia ":4696,"aş ":1969,"ig ":1399,"ici":2058,"ich":1442,"ice":1186,"ie ":1088,"ica":1674,"idi":13477,"ide":3373,"ida":1813,"if ":1449,"düş":1411,"il ":5350,"im ":8509,"ika":5895,"aşa":4663,"aşl":3782,"aşm":1272,"aşk":2892,"aşt":1881,"ihl":1333,"ihi":3044,"ik ":16892,"iml":3173,"imp":924,"ime":2281,"imd":1567,"imi":5215,"ip ":4274,"inc":5308,"ind":30574,"ina":3996,"aşı":5939,"ino":995,"int":1436,"ins":2898,"ine":16354,"ing":3240,"ini":20786,"inl":1777,"iko":990,"ikl":5637,"iki":4685,"eçi":1614,"eçe":1013,"ila":2643,"in ":47078,"ikt":2801,"ilo":1062,"ill":5588,"ilk":4218,"ilm":10420,"ilg":3153,"ili":20573,"ild":2510,"ile":24870,"ima":2176,"io ":955,"ily":1842,"hri":2688,"hur":2425,"dör":1115,"dön":2425,"fes":1014,"fer":1326,"far":1401,"eyâ":1363,"fa ":1026,"eyb":1488,"eya":8242,"eyl":966,"eyi":4871,"eyd":2007,"eye":3542,"ez ":1852,"ezo":1005,"ezi":2727,"eta":1696,"ete":2836,"eti":16248,"etm":3594,"etl":4019,"etk":1732,"est":3861,"ess":1296,"esw":1087,"ev ":1032,"etr":1981,"ett":2719,"eve":1133,"eva":954,"evl":3752,"erç":1693,"evi":3588,"evr":2587,"ey ":4964,"er ":28073,"es ":5122,"erk":5212,"erl":5910,"eri":48739,"erg":2108,"ere":9775,"erc":963,"erd":6317,"era":4204,"erb":1053,"et ":9016,"açı":2094,"esk":1828,"esl":1542,"esm":989,"esi":30715,"ese":2043,"esa":1128,"ert":1494,"ers":4580,"ern":2620,"erm":2340,"eki":11678,"ekl":5425,"açl":1635,"ekn":1222,"eko":1029,"eks":1790,"ekt":6067,"en ":46837,"ela":1161,"eld":2163,"ele":20479,"eli":10508,"elm":1130,"ell":5734,"els":966,"ema":2676,"eme":6814,"eml":3228,"emm":1228,"emi":6693,"ene":8543,"eng":1269,"ena":1544,"end":6506,"enc":1546,"enm":2094,"enk":1143,"enl":4600,"eni":13441,"ens":1925,"ent":5851,"enz":1180,"egu":1647,"ehr":2303,"ehi":2131,"ek ":9883,"aç ":1092,"ein":1910,"el ":11201,"eke":1777,"eka":935,"em ":2604,"öst":1805,"gis":1807,"gir":1360,"gil":5370,"önü":972,"geç":2375,"gin":1021,"gib":2269,"ört":1016,"öre":4482,"ölü":2891,"ölç":2720,"gi ":2386,"ör ":949,"gen":4928,"ger":3542,"ges":4599,"ağa":1186,"gel":7731,"ağl":9979,"önc":1623,"öne":7107,"ge ":2812,"ağ ":1140,"gaz":1147,"ölg":6551,"gar":1277,"büy":3008,"büm":2701,"gan":1892,"böl":7461,"fus":4728,"fut":963,"öyü":1007,"fre":2029,"for":2226,"öze":3441,"özl":926,"örü":2439,"fil":4052,"da ":52350,"de ":46250,"dak":5123,"dal":2767,"dah":2619,"das":1177,"dar":3775,"dan":21526,"dam":1680,"day":1218,"dde":1533,"cul":955,"cus":1172,"cre":977,"cu ":1846,"ch ":1055,"ces":998,"cek":1367,"cel":1540,"ci ":4802,"ck ":1696,"che":1266,"chl":1164,"cil":1965,"cis":1558,"cin":1363,"ed ":1577,"ebe":1387,"ebi":2611,"efe":1034,"edi":16774,"ede":8053,"eda":1066,"eci":1467,"ece":3057,"dyo":1078,"dur":5671,"duğ":3327,"don":972,"dol":1859,"dok":977,"diğ":3682,"doğ":5145,"dra":1187,"dlı":1288,"du ":2273,"dağ":1454,"der":6369,"des":1911,"dev":2820,"ded":1153,"del":2710,"dek":4272,"den":20219,"dem":1663,"di ":6071,"dla":1190,"do ":1033,"diz":1879,"diy":5465,"din":3013,"dir":24487,"dis":2432,"dik":1433,"dil":6616,"değ":2872,"rga":1818,"ri ":19287,"rgi":1489,"rge":959,"ret":5334,"res":4290,"rev":1391,"rdu":2054,"rg ":1524,"rec":1008,"red":1084,"reg":1836,"ren":6966,"rek":4543,"rel":1942,"rda":4941,"rdi":3615,"rde":5880,"re ":11153,"ray":2290,"müz":3407,"rd ":1472,"rap":1757,"rar":2927,"ras":9820,"rat":4222,"rbi":1042,"rba":1205,"mün":3661,"ran":10600,"ram":4044,"ral":5875,"rak":15812,"rab":1433,"raf":10463,"rad":2346,"rac":1216,"rs ":1268,"ros":1512,"rot":967,"rom":2121,"ron":1936,"rol":2443,"rkç":1463,"rog":1323,"rna":1248,"rne":1926,"ro ":1843,"rma":6313,"rme":4031,"rmi":1407,"rlu":1118,"rli":4427,"rle":9208,"rla":6927,"rki":3194,"rkl":1461,"rke":5713,"rka":1543,"rdı":3751,"riy":3959,"rit":1940,"ris":5765,"rih":5101,"müş":1050,"raş":1079,"ril":8019,"rik":4914,"rin":23896,"rim":3407,"ria":1426,"rdü":1150,"ric":1498,"rid":4314,"rk ":5291,"lıl":999,"lık":8208,"lın":9443,"lım":1113,"lır":2906,"rya":1228,"rup":3018,"run":1284,"rum":2945,"rul":5547,"ry ":1364,"rsi":2903,"rsa":1025,"rta":4786,"rte":4662,"rti":1858,"lı ":21277,"rub":2412,"rt ":3885,"rkı":3548,"ru ":1610,"rlı":1952,"sab":1070,"sad":1069,"nüf":4498,"sah":5074,"sal":4864,"nüm":1062,"san":6337,"nün":1717,"sat":1316,"sar":2328,"say":5210,"sav":1790,"sa ":4002,"nü ":1409,"lış":2586,"lığ":4204,"si ":18237,"sağ":1738,"siz":1136,"siy":3676,"sid":3408,"sia":1857,"sit":3495,"sis":2770,"sin":18799,"sil":2083,"sim":2963,"sik":1925,"se ":2953,"ser":3758,"ses":2131,"sen":2086,"sem":1040,"sel":4229,"sek":2392,"spo":1131,"spa":2896,"son":6601,"su ":5315,"st ":3445,"slu":1935,"sla":3121,"sle":1769,"ski":2726,"ske":1671,"sma":2231,"smi":1955,"mın":4103,"swi":1113,"stü":1161,"sya":2595,"syo":3350,"ste":9865,"sta":9233,"sto":2278,"sti":3825,"stl":934,"str":1911,"mı ":4565,"sun":2855,"tak":3619,"tal":5550,"tab":2400,"tad":4726,"tay":1623,"tat":1503,"tas":2835,"tar":17331,"tap":1242,"tan":13930,"tam":2113,"te ":6912,"tbo":1793,"ta ":6535,"mış":10404,"pa ":2055,"par":4696,"kül":1832,"küm":1078,"pan":3919,"per":2301,"küç":956,"pla":3634,"ple":961,"plu":1414,"lçe":2541,"piy":2262,"por":1907,"pon":1081,"pol":1613,"lçü":2547,"pti":1099,"pra":1013,"pro":2802,"ptı":1375,"lü ":3559,"lüm":2580,"lül":1275,"lük":1015,"iş ":5374,"işi":4832,"işk":1088,"işl":2771,"işt":5462,"kı ":2158,"kıl":1124,"kım":2874,"kıs":2851,"kın":2434,"kıy":1259,"ra ":9857,"mü ":1475,"ngi":4542,"ni ":12017,"nge":1612,"ncü":1344,"nel":4863,"nek":1315,"nen":2962,"nem":4576,"ner":2342,"net":4444,"nes":2204,"ng ":2974,"ned":1504,"ney":4102,"nci":4377,"nce":5236,"nca":4009,"ne ":19615,"nbu":2438,"ndu":1472,"ndr":1044,"ndo":1041,"ndi":6541,"nde":35967,"nda":48013,"ncu":1481,"nak":1647,"nal":3192,"nam":1052,"nan":13136,"nar":2141,"nad":2145,"nd ":2524,"nat":3318,"nas":1441,"nay":1104,"na ":14803,"muş":4297,"nya":10222,"nun":5903,"nus":1510,"nuc":968,"nto":981,"ntr":1143,"nti":4165,"nta":1967,"nte":3585,"nmı":2809,"nst":987,"nse":1596,"nsi":1251,"nsa":3394,"nu ":4463,"nlı":4882,"nra":2468,"nt ":2440,"niş":970,"ns ":1946,"nlü":1339,"nom":2114,"nne":1221,"nme":1944,"nma":4498,"nmi":973,"nli":2503,"nla":14247,"nle":4649,"nlu":1296,"nka":1369,"ndı":4155,"nic":987,"ndü":1181,"niy":1052,"niz":3806,"ncı":1170,"niv":2157,"nis":1976,"nir":2294,"nim":1125,"nin":20306,"nik":2175,"nil":944,"ogr":1717,"ok ":4093,"oji":2722,"ol ":4011,"ock":1051,"ode":1635,"of ":1645,"iği":9467,"iğe":1929,"obi":987,"nsı":2063,"köy":1994,"oyu":4442,"oyn":1402,"oto":3517,"osy":1120,"ost":1210,"osu":1204,"ovi":1205,"ova":1326,"opl":2685,"os ":3304,"çıl":1025,"çık":4300,"or ":3792,"ork":1035,"orl":1782,"orm":2268,"ord":2039,"ore":1088,"org":1822,"ori":2004,"ort":8149,"oru":2784,"m² ":1578,"ora":1605,"ola":24963,"old":3573,"olc":925,"on ":12904,"oli":2380,"oll":2123,"ole":1383,"ols":1253,"olm":3895,"olo":4194,"olu":8661,"om ":1843,"kçe":1715,"okt":974,"oku":2095,"ona":2977,"ond":1625,"one":1566,"oni":1789,"onl":1840,"ono":2239,"onr":2464,"ons":1619,"ont":1468,"onu":7635,"ony":1563,"oma":3785,"ome":1724,"omi":1625,"omo":1197,"la ":11386,"le ":19263,"lde":3058,"ldi":2187,"ldu":3454,"lab":1412,"lac":926,"lad":1565,"lah":1109,"lak":1159,"gün":5583,"lan":43556,"lam":9121,"lar":57894,"lat":2773,"las":2417,"lay":4711,"ld ":958,"kuz":3055,"kur":7540,"kul":7562,"kta":6976,"kte":6402,"ksi":2396,"ktr":1368,"kti":2217,"gös":1720,"gör":5828,"ktı":2504,"lon":1166,"liğ":4787,"loj":2682,"lmi":5563,"lme":4109,"leş":5292,"lma":11588,"lmu":3175,"lst":1073,"lmı":3998,"lta":1114,"lte":1773,"lu ":7438,"llı":1332,"liş":3210,"lt ":1120,"lge":7089,"lgi":3114,"li ":16772,"lbü":2715,"lga":999,"ley":2404,"lev":1310,"les":3725,"let":9791,"ler":45886,"lem":4975,"len":16099,"lek":2724,"led":5291,"lec":1154,"lo ":939,"lla":11633,"lle":6955,"lli":5594,"lke":3179,"lm ":1714,"ldı":4073,"ll ":1428,"lit":1562,"lis":4164,"lir":4755,"lin":9226,"lim":4866,"liz":3501,"liy":1446,"lid":1846,"lia":946,"lk ":5135,"lik":9172,"lil":1002,"laş":3620,"ma ":9869,"mac":2119,"mak":8947,"mad":3045,"mar":2763,"mas":9043,"mal":4481,"mam":1405,"man":15375,"may":3231,"mat":2471,"me ":6043,"mda":1303,"mde":1460,"mdi":1706,"med":2057,"eş ":1039,"met":4942,"mes":5862,"mer":6138,"mel":3135,"men":5922,"mek":5999,"maç":1261,"mey":2238,"çüm":2197,"çük":1139,"luk":1924,"lup":1601,"lun":8327,"lum":1008,"lus":2340,"fın":8889,"lya":3819,"luğ":2080,"ltı":2446,"luş":5045,"mpi":1862,"mod":1329,"mon":1220,"mpa":1588,"mu ":1170,"miş":7571,"mun":1116,"muz":1291,"mhu":2129,"eşm":1123,"eşt":1717,"mi ":6405,"eşi":5221,"min":6521,"mil":2577,"mir":1395,"mis":1028,"mcı":1046,"mit":1204,"mid":2070,"mik":1507,"mlu":936,"mli":2428,"mle":5174,"mla":5844,"içi":8066,"içe":2201,"mmu":1223,"uğu":5772,"tı ":4071,"zun":1539,"tıl":3307,"tın":2882,"tır":10360,"tıs":1266,"zyo":962,"tığ":1565,"zi ":3100,"zet":1053,"zey":4798,"zen":2488,"zel":3272,"zer":6678,"ze ":1245,"zce":3650,"zde":1081,"zam":2429,"zan":2260,"zak":1080,"zar":3022,"zon":1273,"zme":978,"rşı":2081,"zla":2044,"zgü":956,"zle":1804,"zin":1617,"zik":2532,"zir":1408,"zis":1290,"yum":1152,"yun":4454,"sı ":14744,"ynı":1581,"ylü":1204,"yol":2904,"yor":1353,"yon":7554,"yrı":1788,"sıd":1928,"sıl":1373,"sım":1355,"sır":2328,"sın":16353,"sız":2708,"sıy":986,"ye ":8402,"yda":1138,"yed":1753,"yes":4583,"yer":7744,"yen":3683,"yel":1262,"yet":5659,"ya ":30890,"rış":2333,"yba":1101,"yaz":5444,"yay":4498,"yat":3352,"yar":6010,"tür":5785,"yas":4408,"yap":11061,"tün":1161,"yan":10579,"yal":5013,"tüm":1022,"yak":2748,"ydı":961,"yla":6744,"yle":4770,"yo ":1109,"yna":2824,"yi ":2354,"ygu":1552,"yin":3381,"yaş":2680,"tör":1578,"rı ":14655,"rım":1734,"rın":16497,"rıl":3423,"rıs":1452,"sür":2901,"söz":1499,"sça":1363,"wig":1137,"rü ":1492,"rün":2219,"rül":1328,"vru":1839,"vri":1244,"vre":1449,"vra":964,"pıl":3787,"pım":2266,"vil":989,"vaş":2523,"viz":960,"vis":1122,"rça":1249,"rçe":1701,"vle":3463,"vi ":1409,"vey":5886,"ver":8628,"vet":1190,"ven":1213,"ve ":40015,"val":1203,"van":2336,"var":3116,"va ":2329,"uzu":1436,"uze":4115,"uyu":1005,"uza":1667,"uyg":1403,"uya":1017,"uz ":2525,"usç":954,"uva":1017,"usl":3168,"usa":1561,"usu":5699,"ust":2940,"utb":1431,"us ":4706,"ut ":1302,"ura":2627,"urd":1061,"urg":1888,"uri":2897,"urm":935,"uro":982,"urt":1354,"uru":9050,"upa":2517,"ur ":10047,"umu":1897,"umh":2127,"uml":1645,"uma":1385,"unu":6201,"unl":2635,"unm":1881,"unc":2040,"und":5507,"una":8329,"up ":2721,"ukl":1910,"um ":2957,"ulu":12477,"ult":959,"ulm":2979,"ull":6976,"ula":6334,"un ":10772,"uk ":2310,"ul ":3821,"uha":1114,"ucu":1955,"udi":996,"ubu":2444,"uca":2065,"oğu":5104,"oğr":1470,"ues":1785,"udu":3053,"oğa":1679,"oğl":957,"uba":1499,"tur":6494,"nır":2256,"nıl":5540,"nın":19519,"nım":1872,"nıf":1036,"tre":1553,"tra":2671,"tri":1839,"tro":3501,"tte":1278,"tti":2653,"nı ":11130,"tme":3621,"tma":1695,"to ":1507,"tiğ":1447,"tos":1656,"tom":1446,"ton":2840,"tol":991,"tor":3257,"top":3422,"til":2825,"tik":5330,"tif":1158,"taş":2105,"tir":10735,"tis":1905,"tin":8295,"tim":3897,"tio":1845,"tic":1549,"tid":1024,"tiy":1388,"tki":2141,"pça":1027,"tli":1970,"tla":3863,"tle":6323,"tem":6048,"ten":3191,"tei":1620,"tek":7857,"tel":3048,"ted":3509,"th ":1012,"tes":4105,"ter":10441,"ti ":6632,"the":1663,"üşü":1357,"üğü":1444,"zı ":1890,"zıl":2487,"üç ":948,"zöl":2079,"üçü":1824,"yı ":2418,"yım":2005,"yıl":14867,"yın":3653,"yıs":2728,"yük":5408,"yüz":3683,"yön":3982,"üzö":2079,"üyü":3989,"üzi":2596,"üze":7514,"üye":1004,"ütü":1249,"üsü":1036,"ürü":4447,"ürk":7688,"ürl":1211,"üre":5286,"ümü":6554,"ülü":2192,"üs ":1086,"üst":1769,"ünü":3990,"uş ":2901,"uşu":1743,"uşt":4122,"uşa":1668,"yâl":1370},"n_words":[9192208,10449574,7620193],"name":"tr"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/uk b/nlp_resource_data/langdetect/profiles/uk

new file mode 100755 (executable)

index 0000000..e80f203
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/uk
@@ -0,0 +1 @@
+{"freq":{"D":2686,"E":45335,"F":1931,"G":2251,"A":4545,"B":2792,"C":4259,"L":2592,"M":3964,"N":23611,"O":1971,"H":1743,"I":29162,"T":3063,"V":2399,"P":3688,"S":26730,"R":2282,"X":2695,"f":3088,"g":5064,"d":7920,"e":26938,"b":4218,"c":9825,"a":26142,"n":18042,"o":19695,"l":12837,"m":7697,"k":2703,"h":7896,"i":22805,"w":2062,"v":2426,"u":10520,"t":16747,"s":15016,"r":19222,"p":6493,"y":4268,"x":1842,"́":45988,"ь":314859,"ю":121712,"я":259056,"ш":55425,"щ":34352,"ф":70318,"х":152145,"ц":190380,"ч":157844,"р":767814,"с":616542,"т":784384,"у":465611," t":1584,"є":83027,"ї":138148,"і":1047876," p":1613,"Є":3706,"І":14787,"Й":2127,"Л":22219,"К":34440,"Н":39528,"М":28217,"П":47409,"О":15488,"Б":23123,"А":35851,"Г":15610,"В":31676,"Е":8630,"Д":40073,"З":11772,"Ж":3520,"Ш":12613," I":24900,"Ю":2003,"Я":3891," L":1894," M":2910," B":2299,"Т":14847," C":3270,"У":18037,"Р":27850," A":3247,"С":43546," F":1599,"Ц":5942,"Ч":5807," G":1797,"Ф":32198," D":1850,"Х":6705,"л":484150,"к":541520,"й":174507," X":1787,"и":768429,"п":339521,"о":1248992,"н":1274404,"м":381536,"г":226618," S":3616," R":1762,"в":579991,"б":162019," P":2630,"а":1251055,"з":257293,"ж":95383,"е":806087," T":2138,"д":454853,"Ґ":1990,"ґ":4623," А":31736," Б":22701," В":30948," Г":15174," Д":39397," Е":7917," Ж":3460," З":11194," Й":2051," К":32754," Л":21825," М":27530," Н":36830," О":14175," П":46014," Є":3619," І":13525,"EE":21655,"I ":2684," б":36681," а":43891," г":34855," в":154625," е":34325," д":117491," з":113779," ж":8768," й":6819," л":26339," к":61923," н":123337," м":87814," п":148797," о":74913," Р":21137," С":39028," Т":13848," У":16956," Ф":31501," Х":6271," Ц":5651," Ч":5720," Ш":10618," Ю":1964," Я":3806," ї":5196," і":83499," є":13641," т":92032," у":89129," р":101219," с":120858," ц":13483," ч":24594," ф":40198," х":11362," ш":8917," щ":18497," ю":2119," я":36739," Ґ":1984,"E ":21977,"NS":21624,"II":2127,"IN":21640,"SE":21638,"a ":4767,"i ":1589,"he":1740,"el":1571,"en":2535,"es":2308,"er":4659,"e ":6591,"d ":1983,"at":2458,"ar":2758,"al":2232,"an":3512,"ac":1574,"nt":1885,"on":3299,"or":2694,"r ":2633,"o ":1648,"na":1574,"ni":1555,"li":1877,"le":1913,"la":1826,"n ":4271,"ic":2243,"ia":1621,"is":1891,"in":3535,"io":1638,"l ":1929,"y ":1830,"us":2293,"um":1631,"te":2376,"ti":2183,"th":1945,"ta":1627,"st":1645,"ro":2012,"ri":2866,"re":1930,"ra":2584,"t ":3059,"s ":6868,"́ ":2290,"́в":3741,"́д":2078,"́к":2065,"́й":3031,"́м":2120,"́л":4305,"́н":8997,"́р":4428,"́с":2026,"́т":2528,"А ":2905,"В ":2961,"ьє":2885,"юв":4387,"юд":3425,"юр":3284,"ют":14456,"юч":4396,"ює":2722,"яд":6152,"яг":3108,"яв":2990,"юю":1542,"ян":10771,"ям":9107,"ял":2194,"як":26414,"яз":4329,"ях":3443,"ят":9738,"яр":2263,"яч":4477,"яє":2354,"ші":2899,"щи":3380,"ще":5720,"ща":2636,"що":19501,"щі":1589,"ьк":115778,"і́":5805,"ьб":1841,"ьв":2256,"ьш":4727,"ьс":24456,"ьт":4512,"ьн":50183,"ьм":4831,"ьп":5070,"ьо":12701,"фі":13912,"хи":2735,"хн":7629,"хо":17158,"хр":2067,"ху":3902,"ха":7837,"ци":27077,"хі":11714,"цт":3698,"цу":23008,"це":16672,"чл":2502,"чн":59778,"чо":6493,"ці":93529,"чи":16012,"чк":3842,"чу":3146,"ць":14520,"че":19603,"ця":6177,"ча":25750,"цю":1804,"шо":7889,"шн":3282,"шк":3613,"шл":2107,"ши":11930,"шт":3498,"шу":1879,"чч":1777,"чі":4713,"ше":8200,"ша":4745,"ск":15952,"см":3119,"сл":40211,"со":21185,"сн":22933,"сп":20278,"св":10260,"се":44728,"си":21636,"рі":55323,"рш":6088,"рю":2898,"са":14767,"ря":9090,"рр":2512,"рс":15180,"рт":37524,"ру":28810,"рх":8486,"рц":1712,"тн":17002,"тл":3183,"тк":8278,"тс":3967,"тр":48477,"то":75584,"те":71719,"тв":21013,"ти":129473,"сі":43933,"сь":82109,"та":153881,"ся":27435,"сю":2514,"су":15433,"сс":3485,"ст":199868,"сх":2897,"сц":3318,"ур":24405,"уп":9084,"ут":35010,"ус":11964,"ум":8171,"ул":17666,"ун":34351,"ті":36808,"уз":30153,"ук":20646,"уд":13417,"уг":6510,"уж":3934,"тю":2714,"уа":9949,"тя":7369,"уб":6901,"ув":18706,"ть":44694,"тт":6769,"ту":74115,"фу":3164,"фт":2346,"фр":25365,"фо":9781,"фе":6303,"ує":8184,"фа":4406,"ую":4341,"уч":6062,"уш":1880,"ух":4146,"іш":7237,"іц":26077,"іч":43837,"іє":28690,"їв":4260,"ію":3757,"ія":30142,"ін":70416,"ім":15479,"іл":34864,"ік":38962,"ій":44252,"із":21994,"іж":6237,"іх":3168,"іф":1617,"іт":52153,"іс":42072,"ір":18028,"іп":25092,"іо":58208,"ів":71042,"іг":5632,"ід":79007,"іа":11156,"іб":26830,"її":1916,"ії":55045,"їн":18326,"їх":2476,"єю":25186,"єм":3400,"єн":2727,"єт":11907,"єр":2623,"єд":3075,"єк":1686,"єв":4497," IN":21609,"а ":272631,"Р ":3804,"У ":2439,"Єв":1986,"Ів":2523,"Ін":2522,"Іл":1807,"к ":38380,"й ":109949,"Ле":2968,"Ла":4470,"Ку":2193,"Ко":9293,"м ":60876,"Кр":3269,"Ки":4604,"Ка":8275,"л ":10034,"Йо":1971,"На":28658,"Не":1779,"Мі":3387,"Мо":5475,"о ":171358,"Ма":9118,"Ль":1829,"Ми":3400,"Лі":2822,"Ме":3642,"н ":48334,"Ло":1601,"Лу":5897,"Па":5129,"Пе":6590,"По":6892,"с ":17750,"р ":43282,"Ос":1646,"Ор":1949,"Ні":1710,"Ол":3009,"Но":2027,"п ":4596,"в ":98488,"Ам":1689,"Ан":4049,"Ак":1666,"Ал":7021,"Ав":2330,"Ба":3984,"Ар":5755,"б ":28913,"Во":3994,"д ":30432,"Ве":7201,"Ви":2444,"Бі":2125,"Га":3476,"Бо":3837,"г ":7123,"Бр":2821,"Бе":3301,"Ва":4390,"Бу":4720,"Дж":1933,"Де":3400,"До":4198,"ж ":10290,"Ві":6492,"Ге":2702,"Гр":3253,"Го":2499,"е ":51777,"Да":23258,"и ":147695,"За":6302,"з ":38125,"Ен":2243,"ь ":77560,"е́":6375,"Ша":6268,"Ше":1818,"и́":5895,"ю ":78537,"я ":164366,"Ст":3877,"Су":1648,"Та":2989,"Сі":1751,"Те":3497,"То":1720,"ф ":2068,"Тр":1849,"Ук":9250,"х ":93221,"РС":2535,"Пр":9085,"Пу":2178,"Ра":2736,"Ре":2944,"Пі":12505,"Ри":1682,"СР":4182,"т ":74217,"Ро":9385,"Ру":1597,"СШ":1868,"Са":5497,"Св":1748,"Си":1634,"Се":8345,"Со":4417,"у ":182563,"Це":4350,"ш ":3438,"а́":12498,"Че":2490,"ША":1865,"Фр":25493,"Фе":1857,"Ха":2266,"ч ":13144,"ль":97373,"лю":13039,"мб":2910,"ма":43992,"ля":27148,"ме":56645,"лі":100876,"ми":33489,"мл":2674,"мк":1772,"лл":4063,"ло":47531,"лу":11894,"ла":56466,"ле":64714,"лк":3429,"ли":41628,"кі":53446,"км":1768,"кн":2648,"кл":18117,"кр":33069,"кс":9526,"ко":160847,"кт":21418,"ку":30802,"кц":4113,"ка":63693,"ки":83769,"кв":7446,"ке":9654,"йн":15208,"йо":12518,"йк":1565,"йл":1986,"йм":3040,"йс":17098,"ия":1553,"ищ":6034,"иш":3403,"иї":2680,"у́":2267,"иє":2892,"йд":1750,"йб":2838,"ип":31662,"им":32760,"ин":52951,"ик":67385,"ил":14249,"ий":79035,"иц":14181,"ич":30391,"иф":2474,"их":75898,"ит":52472,"ир":15232,"ис":66975,"ри":69723,"рк":9097,"рл":2627,"рм":12337,"рн":24183,"ро":132547,"рп":3819,"ра":159212,"рб":3520,"рв":5932,"рг":12811,"рд":11789,"ре":97753,"рж":5968,"пі":30461,"пр":61701,"пт":2856,"пс":1790,"пу":8363,"ої":42339,"пи":15610,"пн":6007,"по":70389,"пл":11153,"ою":42263,"оя":1743,"па":69692,"оє":2366,"пе":54812,"ощ":2597,"ош":4991,"оч":9700,"оц":9082,"ос":101143,"ор":94904,"оп":23531,"оо":1738,"ох":8570,"оф":6129,"от":23405,"ок":38039,"ол":59373,"ом":93606,"он":134604,"ож":13014,"ні":182081,"оз":26801,"ой":3869,"ов":119233,"ог":97492,"од":63564,"ое":3185,"ню":3932,"ня":95444,"об":38077,"нь":43392,"нц":54211,"нш":5309,"нч":1583,"нт":57484,"нс":58539,"нф":2536,"ну":21092,"но":153491,"нн":89867,"нк":13365,"мі":70237,"ни":149376,"не":44107,"нг":6365,"нд":19006,"на":216248,"му":49313,"мс":3566,"мп":12768,"мо":32397,"мн":5261,"мм":1713,"ге":9243,"ві":92696,"ги":4209,"гн":2639,"го":97928,"гл":6748,"гр":20635,"гу":9031,"дв":4924,"дб":1844,"да":33332,"вд":7688,"ве":48786,"вж":2823,"бі":16889,"ви":75994,"вк":5403,"вл":11314,"вн":39402,"є ":23248,"во":52805,"вп":2102,"вр":5405,"вс":21071,"ву":11922,"вт":7129,"вч":4263,"вц":1693,"га":24943,"вя":2583,"би":5176,"аї":20255,"ає":15656,"бе":15560,"бр":8733,"бн":4172,"бо":24369,"бл":17974,"бк":1672,"бу":17942,"бс":1904,"ва":79944,"ад":39043,"аж":6697,"аз":20777,"аб":18055,"ав":68705,"аг":13743,"ам":54378,"ан":186015,"ап":12553,"ай":22198,"ак":29485,"ал":112843,"ах":20860,"аф":6570,"ач":13762,"ац":41300,"ас":69626,"ар":92013,"ау":6764,"ат":76559,"ба":13923,"аю":6775,"аш":6416,"зт":3423,"зр":3339,"зп":3833,"зу":8061,"зк":2540,"зи":11355,"жі":2914,"зо":12242,"зн":22505,"зм":7721,"ив":25953,"иг":5812,"иб":4671,"иж":2800,"зі":9568,"из":10688,"ид":11644,"зь":26870,"жо":4282,"жу":4654,"еї":6545,"жи":9686,"жн":10415,"за":70302,"зб":4513,"зв":12953,"зг":1786,"зд":4686,"зе":9240,"еф":2808,"ет":48754,"ес":21933,"ер":123318,"еп":28664,"ї ":106019,"ео":7564,"ен":200818,"ем":25862,"ел":52238,"ек":44785,"ей":12736,"ез":15415,"ді":60214,"еж":8691,"же":36749,"ея":2396,"жа":10671,"еч":5677,"еш":1971,"ех":4631,"ец":12416,"дс":7505,"др":11609,"ду":13511,"дн":33993,"дм":3635,"дп":3899,"і ":217878,"до":70187,"ди":35057,"дл":9508,"дк":6162,"де":83166,"дз":2175,"гі":41117,"о́":9048,"дж":30353,"еб":4669,"ев":15708,"ег":29992,"ед":62088,"дя":4582,"еа":4570," ар":4635," ба":6422," аб":10662," ав":3504," ад":2033," ал":3311," ак":3721," ан":4873," ам":2325," бу":8984," ва":2979," бе":6626," бр":1983," бо":3438," бл":2347," ву":2932," га":4235," бі":6025," ви":31633," ве":7581," во":6259," є ":4843," вс":3250," вл":2695," дв":2986," да":3768," гу":2423," го":6549," гр":11679," ге":4251," ві":52058," до":45102," і ":33511," др":3031," де":37135," гі":3754," ди":4078," дл":8069," ел":3252," ек":24873," ді":8202," зд":2257," зе":1996," за":56665," зв":4345," зб":3336," жо":2604," жи":2736," зм":2726," зо":3274," зн":5228," зі":1585," йо":3069," ка":7109," кв":3523," кр":6792," ко":25840," кн":1684," кл":3559," ку":3737," ла":4131," кі":5040," ли":5653," ле":3016," ме":10350," лі":7010," ми":2991," лю":4680," ма":12887," мо":13377," му":26214," ни":3719," мі":18386," не":11146," на":101285," но":3271," ок":3237," оз":1668," ні":2363," од":9316," об":14076," от":1642," ор":5923," ос":30485," оп":3801," по":44438," пл":4775," пи":3225," пе":16844," па":8207," Ре":2937," Пі":12498," Ра":2732," Ро":9378," Ри":1682," Пу":2177," Пр":9060," Пе":6585," Па":5105," По":6869," Ос":1642," Ор":1944," р ":1902," Те":3490," Сі":1746," То":1716," Тр":1847," Ст":3864," Су":1643," Та":2975," Св":1745," Си":1629," Се":8339," Со":4411," у ":67844," Ру":1597," СШ":1865," Са":5492," Фр":25493," Фе":1855," Ук":9235," Це":4345," Ха":2264," Ше":1818," Ша":6265," Че":2486," я ":1678," Ба":3980," Ар":5747," в ":34140," Ан":4045," Ам":1689," Ал":7017," Ак":1665," Ав":2326," Ва":4387," Бу":4716," Бо":3831," Бр":2819," Бе":3297," а ":3713," Дж":1930," Де":3390," До":4191," Ен":2238," з ":28463," Га":3472," Ве":7185," Ви":2437," Бі":2122," Во":3987," Да":23253," Ге":2696," Ві":6479," Го":2496," Гр":3249," Йо":1970," Ки":4602," Ка":8259," За":6236," й ":3420," Мо":5471," На":28633," Не":1775," Мі":3382," Но":2025," Ол":3009," Ні":1705," Ко":9278," м ":2294," Кр":3260," Ку":2189," Ла":4467," Ле":2966," Ло":1600," Лу":5896," Ль":1829," Ма":9106," Лі":2816," Ме":3636," Ми":3397," У ":1855," Єв":1986," Ів":2522," Іл":1806," Ін":2499," В ":2749,"Шам":2267,"Шар":2655,"INS":21596,"Кар":1691,"Кор":2456,"Киї":2310," єк":1604," єд":2480," їх":2188," іс":5151," із":3768," ім":4088," ін":33850," її":1897,"Луа":4691," ра":11678," ре":33133," пі":17870," ри":2122," ро":38150," пр":49254," св":7646," рі":9410," си":9133," се":11547," сл":3167," см":1572," ск":6896," сп":12079," со":4130," ру":3107," са":3132," сі":4244," ти":4115," тв":2718," те":13347," то":7539," тр":8770," сх":2630," ст":41843," су":7442," сю":1727," та":50935," ук":7367," ті":1840," ус":2008," ут":2182," ун":1655," фо":4647," фр":23569," фу":2570," фа":2336," уч":1717," хр":1639," хо":1776," ху":1552," фі":5255," ха":2328," ци":1770," це":7308," чо":1959," чл":2302," чи":4976," ці":2356," че":5747," ча":8926,"Льв":1624,"Мар":4041," шт":1850," що":17557," ян":1885," яз":3165," як":23816," ят":2201,"Мик":1636,"Мон":1603,"Нас":21926,"ад ":3964,"ав ":5473,"EE ":21630,"ам ":4361,"ан ":7774,"ак ":2945,"ал ":2859,"ай ":1706,"Оле":2078,"авч":1579,"авт":2891,"ага":5972,"аві":2505,"аго":2958,"ада":8317,"ади":3697,"аде":2606,"аду":2146,"адс":2241,"адо":2036,"адм":1600,"адя":2411,"аді":3733,"би ":2028,"ажа":1619,"або":10777,"абе":1768,"ава":4168,"авн":9017,"авл":3400,"авс":2814,"ає ":8442,"аво":4040,"аве":24071,"ави":3333,"бо ":10197,"ало":3744,"алу":3259,"ала":5763,"али":6052,"акі":1706,"але":7492,"амо":2377,"амп":2753,"ама":2782,"аль":45514,"ами":10086,"аме":25683,"алі":34159,"анн":20969,"ано":8600,"ану":2414,"анс":11789,"ант":9369,"анц":47104,"ань":4811,"ана":9630,"анд":6858,"анг":2311,"ани":9657,"амі":2404,"ане":2623,"анк":2826,"ані":36761,"азу":2305,"азо":3692,"ази":2579,"азв":4425,"аза":2057,"азі":1724,"айс":1694,"айо":6278,"айн":1941,"айб":2377,"акт":6740,"ако":7371,"ака":2878,"ах ":10031,"Пар":1681,"ас ":3290,"ар ":5874,"ат ":4127,"СР ":2617,"ба ":1557,"Пет":1744,"Пер":2277,"Пол":2322,"РСР":2369,"При":2113,"Про":3790,"Пуа":1541,"Пік":2365,"Пір":3646,"Рос":2415,"Роз":1581,"Рон":3384,"Пів":4853,"Аль":5551,"Сан":1563,"США":1862,"Сер":1775,"Сен":4479,"Ста":1577,"Ард":2815,"Тер":1748,"Вол":2219,"Вер":4116,"Вел":1833,"Укр":9121,"Бур":2431,"Фра":24714,"Дан":22031,"ША ":1853,"а́н":3396,"Цен":2677,"Чер":1615,"NSE":21595,"лам":1827,"лан":5122,"лас":12503,"лат":2370,"ма ":7369,"ля ":13589,"лав":3656,"лад":10782,"кці":3993,"ль ":10569,"кул":3808,"кур":1627,"кою":25572,"кої":15644,"кре":3214,"кра":21169,"кри":4236,"кро":1703,"лу ":3819,"кса":1966,"кте":2165,"кти":3833,"кто":5082,"ктр":2798,"кту":3015,"кла":11761,"ло ":6223,"клю":1692,"клі":1698,"ког":16518,"ков":13165,"ком":15826,"кон":32464,"коп":2254,"кор":9858,"кос":2132,"кож":4742,"кол":8207,"ким":4246,"кий":27960,"ких":8957,"кві":3598,"ле ":3371,"кі ":7394,"ли ":7619,"кер":1599,"ква":2483,"ках":2083,"кат":2360,"кар":5682,"кам":2944,"кан":6220,"кал":2575,"каз":1777,"кад":1810,"ла ":13040,"Іва":2158,"йсь":13547,"кт ":1727,"ку ":18769,"йна":1852,"йно":3774,"йни":5036,"йов":2120,"йог":2770,"йон":6057,"ко ":7248,"иїв":2473,"ки ":39616,"ке ":4049,"йбі":2012,"од ":2452,"нах":3061,"нац":23841,"нау":3625,"наф":1792,"нач":7238,"ог ":2185,"нан":4567,"нам":2788,"нал":31101,"нат":2955,"нас":4427,"нар":6454,"нап":3620,"над":4116,"нак":1904,"най":5486,"наз":4802,"нде":1705,"нда":2557,"ож ":4256,"нгл":1926,"неї":3826,"нен":4632,"нер":5081,"нес":1886,"нет":1977,"нец":1734,"нев":2650,"нез":1549,"нді":4444,"ні ":100559,"ндр":3680,"мії":2239,"ник":15713,"ний":35532,"ок ":8024,"мір":2620,"міс":12191,"мік":1956,"мін":9879,"міч":24763,"між":4232,"нь ":34487,"ня ":85321,"ню ":1561,"ов ":5346,"нав":26133,"об ":3066,"мпо":1641,"нт ":27692,"мпе":2663,"мпа":3905,"ну ":12904,"мпі":1622,"мсь":1988,"мун":23434,"муз":3552,"ліз":3946,"лій":2290,"лік":3856,"лід":25282,"лів":5148,"літ":32189,"ліс":3353,"лін":5016,"лії":2011,"мис":2927,"мир":2021,"но ":17011,"мно":2222,"мод":1683,"мог":2124,"мов":8204,"мож":2842,"мон":3152,"мол":2071,"мор":4169,"нс ":2855,"має":2263,"ляє":1872,"мац":1743,"мал":3075,"мад":2394,"ляд":1770,"мат":7712,"мас":1664,"ляр":1752,"мар":2087,"лян":1603,"ман":6730,"люч":1579,"маг":1762,"люд":2454,"лют":2538,"мет":6826,"мен":33354,"ни ":22900,"мер":6133,"меж":1784,"мі ":2838,"не ":13395,"льп":5070,"льн":50159,"льо":2192,"на ":73406,"льм":2015,"льк":3474,"льш":4684,"льт":4502,"льс":6409,"му ":17492,"лок":1590,"лог":8013,"лод":3460,"лор":1601,"лос":2987,"лот":1979,"лом":2920,"лон":2251,"лов":10255,"луж":1851,"ків":10057,"кій":3619,"кіл":3020,"кіп":21719,"кін":3986,"кіс":1867,"лив":4838,"лиз":2163,"лик":4841,"лі ":6988,"леж":2754,"ми ":23542,"лен":36965,"лем":3035,"лек":7457,"лиц":3452,"лиш":2141,"лис":4376,"лин":3727,"лип":2133,"пат":1536,"пад":4424,"пал":24506,"пан":5073,"пар":26316,"ре ":2016,"ра ":10978,"пис":6483,"пла":3785,"пле":2508,"пло":1933,"ро ":4356,"пед":22902,"ри ":9926,"пер":22098,"печ":1592,"ори":10852,"опі":2491,"орд":2209,"оре":6973,"орг":5783,"орс":3875,"оро":12661,"орм":6382,"орн":2519,"опу":1571,"ора":4398,"опе":4273,"опи":2385,"опо":5348,"опа":3672,"осі":28049,"оте":2368,"оти":4264,"ото":4402,"отр":2511,"ота":2076,"орі":12506,"оси":1994,"оск":2565,"осл":27847,"осн":4625,"осо":4627,"осп":1773,"ост":18846,"ору":3825,"орт":4236,"оря":2411,"оми":3494,"олі":11097,"оме":3420,"ома":8391,"оля":2505,"олю":1788,"оль":6830,"олу":1961,"по ":3091,"оло":19189,"оле":3051,"оли":6076,"окі":1858,"ола":2363,"окр":3962,"оку":9695,"око":4015,"оні":30331,"онс":3562,"онт":3739,"ону":5669,"они":2616,"омі":28058,"оно":28802,"онн":3725,"она":36879,"онд":1886,"оне":2765,"омо":4256,"омп":5253,"ому":14955,"оча":2902,"очи":1547,"оці":5589,"офе":1770,"оце":2765,"офі":2407,"охо":3822,"оба":1591,"нят":1850,"ням":4193,"ова":23572,"обу":2657,"обр":3128,"обо":4431,"обн":1797,"обл":9987,"оби":2069,"ою ":40879,"ньо":6069,"па ":1808,"оки":2050,"оке":1657,"ока":3381,"ожн":3109,"озв":2698,"нів":6803,"ніз":4328,"ози":2146,"оза":1905,"озт":3392,"ніт":1771,"ніс":9671,"ніц":22879,"ніш":2881,"ніч":5481,"нік":1930,"ній":8198,"озм":1625,"нім":3177,"озн":3472,"озр":1991,"озп":1689,"нії":3375,"нія":2921,"одн":11088,"оди":15822,"одж":2752,"огі":6272,"оду":4615,"одо":6110,"ої ":40186,"пи ":6419,"оді":8902,"оже":2141,"обі":3439,"ове":4729,"овл":2741,"ови":27036,"ово":14293,"овн":9596,"овт":2617,"ову":5401,"овс":3796,"ога":1966,"ові":15249,"ого":77853,"огр":5122,"ода":5921,"оде":2118,"ної":16682,"ною":5373,"нос":8914,"ноп":1707,"ном":33988,"нок":2048,"нні":5141,"ног":41554,"нов":14623,"ння":64956,"нно":4474,"ор ":9067,"нни":8535,"нна":3011,"SEE":21595,"нко":3013,"он ":7906,"нку":1909,"нка":2370,"ом ":22352,"ним":10232,"нин":2727,"нич":2031,"них":49028,"ниц":6236,"нши":2462,"нці":27792,"нцу":22511,"нув":2948,"нті":3002,"нсь":25045,"нта":5969,"нте":3274,"нти":4476,"нту":1658,"нто":2721,"ох ":2654,"нтр":7071,"нст":25118,"сам":2553,"рям":1886,"сан":2886,"ряд":3770,"сво":2566,"свя":1631,"сві":5195,"сі ":3512,"сел":26979,"ти ":13731,"сен":1784,"сер":9261,"рів":9362,"рід":3016,"ріа":3973,"рій":3132,"різ":4766,"ріо":2440,"ріш":1652,"річ":3119,"рія":3137,"сис":3972,"сит":2195,"рії":5126,"син":2791,"сил":2927,"ска":1987,"сли":1774,"сла":3451,"ско":3258,"скл":4964,"слі":25511,"сля":1724,"слу":2674,"сло":3957,"то ":6783,"сни":4268,"сня":2333,"соб":4324,"сов":3112,"сні":1707,"сок":1638,"сно":9584,"тр ":4211,"сну":2111,"спе":2794,"сор":1743,"сон":2424,"соц":1647,"ту ":33552,"спі":4109,"спу":1543,"спо":6481,"спр":2669,"су ":3870,"роц":5357,"рот":5051,"роф":2524,"роп":4979,"рос":8219,"ст ":6153,"рпн":1909,"рсь":7310,"рта":23604,"рст":1652,"рти":2701,"рси":2327,"рух":1575,"рту":2197,"рті":2581,"рук":2570,"руг":2655,"руд":3764,"руп":2957,"рус":1928,"рхі":2138,"рхн":4291,"рши":1950,"сь ":1944,"та ":53895,"ся ":24676,"рад":5772,"раж":1608,"раз":3483,"рав":11695,"рам":5248,"ран":55851,"рай":6264,"рак":4056,"рал":6105,"рах":2738,"раф":3312,"рац":4988,"рас":1585,"рат":9434,"раї":18701,"рі ":5250,"рде":3344,"ргі":2098,"реб":1606,"рев":3520,"рег":26086,"ред":9949,"реа":2012,"рет":3016,"рес":6263,"си ":1816,"рен":12198,"рем":5175,"рел":2059,"рек":2766,"рез":6938,"рді":3024,"реж":3159,"ржа":4954,"реч":1677,"рец":2506,"рвн":2312,"рга":5371,"ргу":2442,"рим":4841,"рин":4051,"рик":6384,"рил":1738,"рий":1712,"рич":3064,"рит":7476,"рир":1816,"рис":11068,"рка":1676,"пів":7244,"під":9422,"риб":1872,"риг":1977,"рив":2681,"риз":3342,"піл":3701,"піс":2425,"рмі":3148,"рни":5921,"рне":1575,"рна":4926,"рок":13067,"рол":3802,"ром":8841,"рон":6202,"роз":14973,"рні":3617,"ров":13779,"рог":3625,"род":18027,"роб":8445,"рно":5153,"рко":1653,"рма":4259,"пра":9765,"при":17085,"пре":5034,"про":26547,"ру ":4801,"поп":1576,"пор":8542,"пос":5211,"пот":2142,"пох":2319,"поч":2731,"пош":1607,"рт ":1813,"под":5278,"пов":10451,"пня":3871,"пон":3944,"пом":2310,"пол":10620,"пок":1979,"поз":3035,"пуб":2176,"пус":1543,"пря":1882,"са ":2954,"вар":4866,"ват":3699,"вач":1754,"ває":1673,"вав":2297,"ван":27620,"вал":6746,"важ":2723,"га ":2428,"бут":2028,"бул":3726,"бур":1929,"буд":3912,"був":3407,"́н ":1990,"вся":2929,"вто":3926,"втн":2139,"вст":3544,"всь":11822,"гу ":1846,"вро":2652,"вою":2829,"вої":4164,"вол":4005,"вні":6649,"вод":5394,"вог":4666,"вов":1600,"вня":4991,"вор":9030,"вос":3392,"вом":2439,"вон":1910,"вни":11918,"вне":1755,"вна":3546,"вно":8046,"влі":1867,"вля":2045,"вле":3353,"вла":2838,"го ":74789,"вка":1932,"вищ":4417,"вич":10270,"виз":2384,"вий":5063,"вил":1963,"вик":6641,"вин":5363,"вим":3289,"вип":2153,"вис":3214,"вир":3300,"вит":2461,"вих":6912,"вив":1786,"виг":1941,"вид":7090,"біл":7304,"вец":2207,"вер":12033,"ги ":1955,"вел":4069,"вед":23850,"ві ":8320,"вде":5262,"ва ":20184,"ают":4986,"баг":2186,"аці":38441,"ашо":3325,"аук":3811,"аті":2815,"афт":2014,"ахо":3481,"афі":2408,"ача":3308,"аче":5139,"ахі":3064,"апр":2675,"апа":1903,"апо":2274,"апи":1583,"арх":2803,"арс":2742,"арт":27902,"аре":1829,"ард":4062,"ара":11795,"арн":4172,"аро":9064,"ари":4536,"арк":3550,"аст":19420,"ата":3063,"аси":3444,"арі":4603,"асе":23473,"асл":1823,"асо":2485,"асн":7019,"ату":6561,"ать":1702,"ате":4441,"асі":1565,"ати":34490,"атк":2377,"атн":2400,"́ль":1996,"ато":8833,"атр":2250,"бол":1926,"бор":3014,"бни":2582,"бро":2133,"ву ":2725,"бра":3585,"блі":3564,"бла":6820,"бли":3503,"бле":1613,"во ":6738,"ви ":5236,"аєт":5513,"бер":8329,"без":3385,"аїн":17837,"ве ":2027,"дає":2865,"дач":1562,"дан":4800,"дар":3127,"дат":2935,"дал":2067,"дав":3568,"дем":2009,"ден":35742,"дер":7440,"деп":22255,"дже":27277,"ей ":5110,"дво":2066,"ез ":2842,"ді ":6167,"дсь":3872,"дст":2708,"дрі":2262,"дро":2499,"дру":2324,"дра":1728,"дпо":2072,"ет ":25079,"дко":1659,"ен ":10176,"ем ":4029,"див":2382,"гіч":3315,"дин":11940,"дит":3892,"гії":2091,"о́в":1799,"гіо":23749,"гір":2225,"гід":1618,"дня":2562,"доб":2151,"дні":5130,"дов":8472,"дос":25281,"дор":2173,"док":2841,"дон":2228,"дом":6507,"дна":4835,"дмі":2603,"дни":7067,"дне":1547,"дно":10257,"ер ":7534,"для":8246,"да ":6361,"газ":2730,"гал":4240,"гат":2475,"вят":1631,"ган":6922,"де ":6060,"вул":1831,"вує":1869,"вча":1606,"вче":1615,"гол":4270,"гос":1776,"гор":5348,"гов":2480,"год":1736,"гру":5182,"ду ":6755,"гро":3158,"гра":8609,"гре":2241,"гун":2442,"ген":3541,"ди ":6794,"гео":1703,"вів":2194,"вік":22506,"віл":2297,"вій":5760,"від":28365,"віт":10410,"вір":2465,"віс":2374,"він":3115,"гля":1806,"до ":11544,"жав":4567,"за ":31382,"жит":2833,"жив":2319,"жин":1953,"жі ":1618,"жен":31144,"зу ":2376,"жно":2243,"жни":2399,"жна":2353,"жні":1682,"жов":2516,"ежа":1636,"ежн":1674,"ежи":2240,"едс":1966,"еї ":5804,"еді":22755,"дія":9605,"діє":22354,"дії":2193,"діл":4733,"езн":2478,"езп":2124,"дів":4952,"дій":3323,"ева":2033,"еви":2106,"еат":1536,"дян":2535,"еда":2858,"еде":24252,"еди":1957,"егі":24559,"едо":1597,"едн":2110,"евн":1571,"же ":2405,"ево":2649,"еві":2180,"ент":38060,"енс":2476,"енц":1664,"енк":2033,"ени":9106,"ено":4866,"енн":55011,"ена":6246,"емі":4010,"ене":8942,"енд":1893,"еор":1718,"ені":27559,"ень":28255,"епа":22831,"ерш":4377,"ерх":5234,"ерп":2312,"ерс":4919,"ерт":3088,"ерм":2838,"ерн":8048,"еро":3883,"ери":8614,"ерк":2529,"ерд":2064,"ерг":2332,"ерж":5295,"ере":27737,"ера":11283,"ерв":4795,"ерб":1797,"ейс":2515,"еко":24801,"ект":7509,"екс":5740,"ели":5579,"ело":1860,"еле":29499,"ела":1663,"емл":1627,"емо":2244,"емн":1709,"еми":2190,"елі":3500,"ель":6771,"еме":2758,"ема":4374,"ехн":2533,"ець":7827,"еці":2099,"ері":9985,"есн":2797,"есп":2017,"есо":1640,"ест":4336,"ета":3510,"есі":2297,"ети":3023,"ете":1577,"етр":4098,"ето":3229,"ету":1803,"ива":5433,"иді":1970,"иго":1963,"ида":2853,"ивс":1559,"иво":3293,"ивн":5798,"иві":2780,"икл":3021,"ико":10700,"ики":26994,"ика":9412,"изь":1617,"изн":4148,"ині":5661,"имі":1780,"ини":11405,"инн":2229,"ино":4309,"инс":2389,"ину":2117,"ина":9917,"ими":8406,"илі":2149,"имо":2293,"има":1943,"иль":2368,"икі":3590,"или":2166,"ило":1638,"ила":2574,"исе":1748,"иса":1892,"ист":43720,"исо":2640,"исл":3795,"иск":1974,"ити":3061,"ите":2660,"ита":3840,"ися":2391,"ись":3153,"иту":23904,"итт":1716,"ито":5821,"ипа":23599,"ипн":2085,"ире":1798,"иро":6079,"ихо":1721,"ицт":1948,"иць":3344,"ить":5744,"ище":2694,"иці":4564,"ичи":1676,"ичн":15682,"иця":3075,"ича":2349,"ка ":31484,"ив ":2824,"зав":2331,"заб":1710,"заг":2313,"ид ":1647,"зви":3864,"зва":3561,"зац":3958,"зах":3661,"зас":5087,"зап":2437,"зан":2419,"зал":3333,"зак":3350,"ий ":76977,"зер":2249,"зем":2575,"зді":2313,"зі ":4174,"зик":1993,"ик ":11149,"ин ":8446,"им ":13202,"зич":2000,"зна":12042,"зни":3492,"змі":3801,"зно":1802,"зня":3181,"зов":3243,"зон":2606,"зпе":1731,"зпо":1609,"зро":1734,"зта":3312,"их ":71895,"ич ":8041,"зьк":25702,"ьме":1976,"ьна":3031,"ьни":9629,"ьно":31998,"ька":11941,"ьке":2474,"ьки":32360,"ько":60206,"ькі":5969,"ьту":1942,"ься":17960,"ьсь":5340,"ьог":3861,"ьов":1687,"ьні":3450,"ьпи":4442,"як ":5568,"ям ":4621,"ює ":1572,"юва":4241,"юр ":1687,"er ":1536,"яка":3149,"яки":6457,"яко":4501,"які":4711,"яч ":2564,"юто":2149,"ях ":2449,"ють":10792,"ючи":2094,"янс":3724,"ями":2547,"ять":2425,"яти":2263,"уча":3150,"уєт":3972,"фес":1886,"фер":1827,"уют":3197,"фун":1616,"фра":23347,"фор":6458,"фік":1922,"філ":3022,"фіз":1799,"це ":3714,"хан":1700,"хар":2307,"хні":3188,"хов":2787,"ход":8360,"хня":2286,"сто":20885,"стр":14612,"ств":9052,"сте":10268,"сти":58155,"ста":42984,"сті":17422,"стя":1612,"стю":2000,"сть":9349,"сту":4755,"сце":2414,"ть ":23970,"тю ":2076,"ськ":76644,"тя ":4033,"сьм":2215,"сюр":1594,"ув ":2334,"тав":4291,"так":7239,"тал":7016,"там":24076,"тан":13432,"тат":26660,"тах":1715,"тар":4879,"таш":3389,"тво":10485,"тва":6033,"тех":2451,"тец":1853,"тем":5996,"тел":3217,"тен":2843,"тер":16300,"теп":1791,"тет":24922,"тек":2381,"тей":1760,"ті ":19387,"січ":2428,"тив":7452,"сій":4342,"ук ":2255,"сів":2280,"сіб":22500,"тка":1831,"тич":9159,"сії":1716,"тий":2070,"тин":8122,"тик":26054,"тил":2041,"тир":1541,"тис":25636,"тип":1715,"тит":24505,"тку":2518,"тко":2210,"тла":1548,"ур ":1549,"тно":2899,"тні":2826,"тод":1678,"ток":3298,"тол":4612,"тов":10097,"тог":4926,"тня":4250,"тни":4639,"тре":1761,"тра":13299,"три":6247,"тор":19453,"тос":2194,"том":6059,"тон":3478,"топ":3418,"тою":1721,"тсь":3003,"тро":10751,"тру":4586,"трі":6027,"тув":1637,"туп":2434,"тур":8289,"ття":4038,"тут":22979,"тьс":17965,"ує ":4149,"ува":15273,"уго":1668,"уар":5114,"уат":1738,"убл":2373,"узь":23065,"узе":1844,"тій":2025,"узи":2433,"тіл":1694,"тів":6962,"уді":1635,"удо":3223,"удн":2817,"уме":1763,"уль":5593,"уля":1849,"ули":2646,"уло":1791,"ула":2199,"укт":2271,"укр":7476,"уко":2812,"упн":1546,"ура":2553,"ург":3415,"ури":2733,"упа":1808,"унк":2435,"уні":25084,"умо":1836,"унд":2657,"уту":22317,"уст":2645,"утв":1947,"урн":4525,"уро":1539,"що ":17655,"шов":4224,"ших":3225,"шир":2574,"ший":2090,"ще ":2338,"шен":2816,"ші ":1719,"шта":1833,"щин":2002,"щен":2787,"цен":4287,"чи ":3745,"цев":1886,"цес":2327,"цер":1781,"ці ":10080,"хід":5700,"цип":23018,"ць ":4743,"ця ":4768,"ча ":2185,"цуз":22494,"цтв":3628,"ців":2368,"ціа":3468,"ціо":25893,"цій":6921,"ція":7173,"чен":10697,"чер":4797,"чі ":2791,"чле":2375,"чка":1597,"чин":4805,"ціє":1783,"ції":31741,"чис":2289,"цьк":8704,"ша ":1587,"ше ":3944,"чає":2804,"час":12463,"чат":2049,"чай":1572,"ща ":1848,"чні":4741,"чна":4519,"чня":2280,"чни":34595,"чно":11306,"us ":1758,"ію ":3147,"їв ":1974,"іал":5781,"іан":1803,"ія ":24064,"іде":1874,"ідж":22706,"ідк":2690,"ідн":11513,"ії ":54256,"ідр":2425,"ідп":3775,"ідо":5503,"івд":5855,"іве":2535,"ібн":1843,"івс":5463,"івн":8627,"ійн":9751,"ійс":12117,"іжн":2207,"ізо":1615,"ізн":5205,"ізм":1906,"ізи":1715,"іза":4100,"інд":1760,"іне":2360,"іна":3489,"інн":4758,"іно":3712,"імі":1726,"інф":1596,"інц":2809,"інс":24263,"інт":1810,"іль":17941,"іме":3447,"іля":2402,"ілі":1831,"імп":2489,"іле":1779,"ікі":22043,"іло":3466,"іка":6429,"іки":2076,"іко":1702,"ісц":2429,"іст":27606,"ісл":1997,"існ":3890,"іре":3719,"ірн":2678,"іпе":22100,"іні":5468,"іод":1626,"інш":3831,"інь":1631,"іон":51386,"ітн":4596,"іто":2264,"їх ":2010,"ітт":1981,"іту":1943,"іта":3402,"іте":26865,"іти":4695,"ішн":1712,"іше":2070,"ічн":39482,"іці":2111,"іци":22860,"іб ":22595,"ів ":41252,"ід ":15090,"із ":3976,"іж ":3000,"ій ":20369,"ік ":2836,"ім ":3593,"ін ":5333,"ір ":2760,"іх ":2419,"єю ":25174,"єдн":2196,"єкт":1631,"єть":11566,"її ":1912,"ією":24943,"іяч":2557,"ївс":1781,"їнс":8303,"їни":6169},"n_words":[15331232,17151725,12469252],"name":"uk"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/ur b/nlp_resource_data/langdetect/profiles/ur

new file mode 100755 (executable)

index 0000000..0c11cc7
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/ur
@@ -0,0 +1 @@
+{"freq":{"ٹ":7565,"پ":19909,"ٰ":348,"و":91887,"ي":164757,"ً":561,"َ":343,"ُ":600,"ـ":316,"ف":15396,"ق":19382,"ك":314,"ل":61015,"م":85213,"ن":73500,"ه":210,"ّ":218,"ِ":1208,"خ":10520,"د":42999,"ج":29799,"ح":15399,"ت":63424,"ث":2772,"ب":46160,"ئ":12622,"ا":210246,"ؤ":741,"آ":6835,"ء":3457,"غ":3607,"ع":26594,"ظ":4173,"ط":9560,"ض":4853,"ص":11646,"ش":17043,"س":59068,"ز":13283,"ر":100269,"ذ":2393,"،":8584,"؛":310,"ہ":81948,"ۃ":194,"ۂ":442,"ے":72723,"ۓ":1226,"۔":21285,"گ":13942,"ک":96247,"ھ":18217,"ں":33709,"چ":6431,"ڈ":3486,"ڑ":2986,"ژ":212," ،":2410," ۔":1318," ہ":33487," ھ":395," گ":5518," ک":67994," ن":12353," ل":8072," م":43729," ق":5408," ك":206," ف":4429," ي":9396," و":11189," ص":3954," ش":8375," ط":3503," ض":1387," ر":7523," ذ":1057," س":22509," ز":3628," ع":9286," ظ":270," غ":946," ا":57235," ء":2548," آ":6281," ج":19940," ح":6228," خ":5235," د":11120," ب":20334," ت":17083," ث":374," چ":3475," ڈ":1133," ٹ":932," پ":14925,"کا ":9732,"کت ":188," ، ":2340," ء ":2420," و ":1367," بھ":3806," جن":2286," حا":1179," جل":205," بہ":901," جم":727," جي":597," جو":3440," جد":255," جس":2699," جز":358," جر":340," اے":201," اہ":629," جا":6670," جب":836," تي":855," اک":704," بڑ":974," اگ":491," خل":943," تہ":199," تھ":4319," خي":280," خو":966," دا":1316," خص":169," خر":184," خد":283," خط":402," حق":234," حي":633," تک":1112," حم":243," خا":1564," حر":509," حس":437," حد":214," حض":717," حص":647," بے":210," بن":2795," بل":1071," بغ":190," بع":1385," اُ":270," بر":1803," اي":8027," او":11360," بد":254," بح":524," اق":467," اف":961," ال":4248," با":2739," ان":6816," ام":1333," اط":314," اع":876," اد":879," ار":1330," از":241," اس":10716," اش":424," اص":842," اض":219," اب":1381," ات":313," اث":175," اج":297," اح":507," اخ":805," تو":1190," بچ":211," تن":378," تم":482," تق":681," تف":194," تع":1588," تش":261," تص":444," تر":1553," تخ":375," تج":367," تح":884," تا":1192," تب":411," اپ":1467," اٹ":312," بو":485," بي":1555," آپ":775," آل":254," آن":292," آي":279," آخ":168," آر":246," آت":368," آج":212," آئ":402," آب":1319," آف":180," آس":249," آز":331," عہ":219," سے":9513," شک":419," طو":1200," ظا":186," عظ":314," عر":1083," عد":298," عث":218," عا":1056," عب":533," شہ":1939," عي":293," عل":2998," عم":1153," عن":249," عو":213," غي":270," سع":194," سط":261," دھ":223," سف":224," سي":1366," سو":980," سم":787," دہ":248," شا":2109," سن":832," سل":1069," شر":734," شخ":260," شع":372," شي":299," شم":1497," صا":255," صر":324," رک":1032," صح":380," صد":878," صل":252," صف":254," صو":1376," رہ":1328," ضر":190," ضل":1004," طا":301," طب":638," سک":803," طر":1051," دس":404," در":2025," دي":2436," دو":2534," دن":789," دل":217," ذر":344," جگ":269," حک":746," ذي":205," جہ":593," را":903," جھ":293," رس":334," ري":940," زر":261," رق":405," رو":1485," زب":1015," زا":201," رن":190," زي":935," سر":1466," زم":612," سب":960," سا":2953," زن":311," ست":266,"گھر":200,"گہ ":210," ہے":16945,"گوں":240,"گيا":1207," ٹا":182," پڑ":329," پن":536," پو":662," پي":2142," پا":3033," پت":168," پر":5135," پش":170," پس":187," ٹي":317," لف":878,"ں، ":508," لغ":168," لح":265," لئ":237," لا":1034," مل":1468," مق":1520," مف":313," مغ":611," مع":1708," مط":921," مض":238," مص":513," مس":1572," مش":1902," مر":2119," مز":309," مد":642," مذ":261," مح":1355," مخ":914," لي":2681," مث":406," مج":498," لو":614," مت":1246," ما":1837," نف":215," نق":288," نم":568," نظ":923," نس":373," نش":178," مم":601," من":1456," نا":2714," نب":179," مو":1833," مي":17546," نج":183," وا":3957," لڑ":168," نو":730," ني":646," فض":168," فر":1011," فا":763," فت":168," قس":291," قص":318," فل":520," قا":1019," فن":193," فو":398," قب":691," في":493," قد":579," قر":855," فٹ":171," قل":240," قو":565," قي":355," وہ":1582," يہ":4254," لک":550," وج":661," لگ":411," وس":430," وز":353," ور":376," وغ":293," ول":235," وق":563," وف":272," مک":558," وي":428," لے":206," مگ":362," مہ":365," يا":2941," نک":275," نگ":248," يع":665," نہ":1220," يو":1100," نے":2696," گھ":362," ہو":7614," ہم":409," ہن":745," ہي":5912," ہز":243," ہر":384," ہا":430," ہج":240," کت":582," کر":5496," کس":1214," کش":187," کل":1235," کن":341," کم":719," کو":6421," کي":15836," کا":11009," کئ":348," کہ":5408," کھ":713," گا":490," گئ":863," کے":17634," کچ":321," گن":214," گل":202," گي":1388," گو":519," گر":807," ۔ ":1018," پہ":1353," پھ":659," چت":240," چا":498," چل":252," چي":492," چو":467," چن":303," ڈا":359," ڈي":297," چھ":653," چک":241,"کٹر":175,"کيے":174,"کي۔":282,"کلا":210,"کلو":610,"کلي":168,"کيا":2343,"کيو":230,"کيم":299,"کين":186,"کيل":348,"کمي":186,"کمل":193,"کنا":202,"کوئ":495,"کور":176,"کوم":779,"کرا":507,"کتے":245,"کسي":1209,"کست":1507,"کزي":242,"کري":262,"کرت":1247,"کرد":421,"کرن":1152,"کرک":265,"کار":1107,"کائ":219,"کئي":281,"کان":309,"کام":558,"کال":425,"کتي":199,"کثر":209,"کتا":937,"کم ":404,"کل ":646,"کن ":715,"کي ":12997,"کو ":5510,"کر ":2045,"کز ":207,"کس ":278,"گور":172,"گري":1479,"گرو":225,"گرد":220,"گرا":170,"گاہ":224,"گئے":444,"گار":222,"گئي":576,"گي ":628,"کہل":354,"کہت":572,"کہا":1480,"کھا":567,"کھت":425,"کھو":307,"کھن":402,"کھي":483,"گر ":1024,"گا ":209,"کے ":18223,"کچھ":310,"کھ ":350,"کہ ":4209,"ا، ":388,"ھا۔":1135,"ھائ":250,"ں۔ ":2401,"ھان":195,"ھار":432,"عہد":221,"ھتے":283,"پان":548,"پار":326,"پاس":169,"پائ":259,"ہ، ":321,"پي ":246,"ئي ":3847,"ات ":5026,"اح ":512,"اج ":434,"ئم ":385,"پرو":236,"پري":221,"ئل ":289,"اب ":1615,"اء ":706,"اؤ ":172,"پاک":1528,"پنج":401,"پنا":177,"ئع ":214,"پور":484,"پني":552,"ئش ":369,"ھي ":3674,"ئر ":279,"بت ":362,"فر ":258,"ان ":9264,"با ":254,"فظ ":915,"ہم ":759,"اً ":534,"ہو ":977,"پنے":725,"اف ":395,"پيش":361,"پيد":1076,"ام ":5824,"ال ":3321,"ہي ":1279,"اق ":313,"اظ ":454,"اع ":255,"ہائ":399,"ہات":210,"ہار":307,"ار ":4602,"اخ ":174,"فت ":347,"اد ":2459,"ہاں":875,"اص ":271,"از ":1002,"اس ":6443,"ہان":212,"ارے":550,"اطا":177,"ھوا":216,"اسک":1125,"ت، ":322,"بي ":1989,"اضي":270,"بو ":188,"اصط":318,"اصل":1371,"ارہ":558,"اسے":795,"اعت":501,"اعد":311,"اعر":317,"ھوٹ":341,"ھيل":511,"اطي":187,"ھنے":355,"اعظ":290,"ھوں":204,"اعل":259,"قع ":1257,"فار":420,"ھيں":343,"فات":329,"فاظ":273,"ادا":654,"اخل":199,"اتھ":922,"ھي۔":577,"احي":174,"اخت":714,"احم":281,"احت":209,"ئي۔":457,"ہا ":1654,"ارا":821,"ادي":1677,"ئيں":363,"ادل":266,"ادب":182,"ادت":200,"بق ":617,"ادر":214,"اتے":523,"ازي":292,"است":1978,"اري":1870,"ارن":318,"ارو":652,"ہت ":499,"ارف":181,"ارس":382,"بل ":520,"ارد":993,"ارت":911,"ارک":342,"اشي":236,"اسم":259,"ادہ":769,"اسل":804,"اشا":172,"اسي":947,"ارٹ":196,"بن ":873,"بع ":355,"ائد":205,"ائر":395,"ائش":451,"ائع":219,"ائن":582,"ائم":404,"ائل":355,"ائي":3099,"ئنس":292,"ابت":325,"ابر":254,"اؤں":202,"ہد ":227,"ابي":488,"ابل":415,"ابق":678,"ابو":358,"ابن":262,"اتح":175,"اتا":3152,"ہر ":2189,"اثر":224,"ؤں ":253,"اتي":1903,"قت ":631,"ائے":1094,"اجا":187,"بر ":849,"في ":604,"عظي":276,"عظم":356,"پڑھ":180,"عري":274,"عرو":255,"عرب":766,"عرا":186,"عدا":424,"عدد":310,"عثم":220,"ظيم":431,"عبد":345,"عات":242,"شہو":678,"شہر":1734,"عال":536,"عام":681,"عاش":251,"عار":177,"صے ":201,"پہن":208,"پہل":895,"غرب":546,"طہ ":206,"عيا":204,"عيس":213,"پھر":309,"پھي":228,"عمل":531,"عمو":341,"عمي":283,"عمر":256,"غان":180,"عني":881,"غاز":188,"علا":1570,"علق":602,"علي":1229,"علو":368,"عما":1154,"علم":738,"آتا":178,"آئي":238,"آبا":1188,"آخر":190,"آزا":306,"ھر ":618,"آن ":225,"ھا ":1233,"غير":796,"آيا":202,"عے ":290,"آپ ":665,"عہ ":760,"جسم":317,"جزي":258,"خت ":294,"جرا":223,"جري":262,"شعب":197,"جزا":198,"جرم":182,"بکہ":367,"ثيت":188,"ذہب":185,"شما":1408,"جبک":363,"اہم":638,"جائ":411,"اہل":181,"جات":4030,"اہو":304,"جاب":370,"جاس":338,"اہي":375,"جار":388,"جان":1341,"جام":298,"رے ":1358,"بہت":604,"رکز":477,"بہا":219,"صبہ":185,"جما":288,"رکا":241,"صحا":176,"رکي":303,"صدر":477,"حال":384,"جنو":753,"ہے۔":9056,"صدي":342,"حاد":180,"حاص":632,"حاظ":206,"جمع":225,"جمو":257,"شيا":452,"خط ":169,"بھي":3076,"بھا":585,"ضي ":277,"صطل":323,"جسے":332,"رکھ":862,"صرف":317,"جسک":274,"زہ ":268,"طب ":280,"تک ":956,"خي ":172,"حرک":215,"حضر":635,"حصي":251,"دت ":251,"حدہ":191,"سٹي":260,"حسا":181,"دا ":1143,"حسن":171,"سٹر":203,"حرا":239,"حري":437,"طح ":229,"تہ ":472,"تھ ":814,"صوص":325,"ہے،":923,"صول":176,"حدي":195,"صور":542,"صوب":1118,"رہي":215,"رہن":216,"جمہ":270,"رہا":394,"جنگ":528,"جيس":335,"رہت":193,"صلا":189,"جود":775,"ضرت":619,"ضرو":182,"خان":852,"خاص":306,"خار":176,"حمد":801,"حقي":267,"تے ":3295,"رہے":389,"صيل":310,"دس ":176,"در ":1119,"دد ":492,"حصہ":359,"دن ":286,"ٹا ":239,"دو ":1561,"ظر ":265,"خصو":323,"ر، ":449,"دي ":2904,"حير":216,"ضلع":1009,"حيا":329,"ختل":505,"حيث":191,"ختي":341,"سے ":11230,"طان":608,"طال":493,"طاب":536,"طبي":334,"دل ":339,"دم ":215,"خلي":574,"سکت":863,"تہا":216,"سکا":323,"سکي":300,"خلا":443,"سکو":402,"دان":840,"دال":423,"تھے":1383,"دائ":580,"دار":1737,"داز":181,"داد":564,"طرح":391,"طرز":173,"طرف":300,"خوا":291,"خود":282,"طري":194,"ظم ":393,"شہ ":199,"سکے":358,"خيا":180,"عت ":511,"تھي":1189,"عد ":1126,"تھا":1925,"رج ":392,"ٹي ":724,"رت ":2032,"رد ":384,"طلب":258,"طلا":420,"عض ":255,"رح ":408,"طور":1195,"ظام":601,"طنت":356,"شکل":258,"را ":761,"دست":238,"رب ":643,"ظاہ":223,"درس":185,"درج":475,"درا":374,"دري":711,"جہ ":949,"درم":359,"درو":190,"دون":230,"ظري":241,"صہ ":469,"دور":724,"دوس":950,"دني":624,"دوں":249,"رف ":993,"ديو":334,"ديم":368,"دين":825,"ديل":228,"ديا":819,"ديت":278,"ديد":212,"ٹر ":1167,"رس ":290,"عي ":320,"دما":182,"جے ":273,"اقو":437,"اقي":177,"اقت":219,"اقا":176,"افي":307,"اقع":1323,"الق":182,"الف":464,"الل":695,"الي":1802,"ان،":219,"امت":248,"اما":469,"الن":229,"الم":986,"الو":429,"امر":725,"الج":351,"الت":351,"الب":611,"الا":1303,"الس":179,"الر":180,"الد":643,"الح":530,"الع":407,"ري ":3579,"جگہ":221,"افر":440,"افت":417,"ٹري":255,"انے":1223,"ايم":221,"انہ":1162,"ايو":177,"ايس":1069,"انگ":1569,"ايش":229,"امہ":269,"ايا":902,"ايت":316,"انک":290,"اير":272,"، ":8315,"بحي":221,"اوہ":305,"رو ":291,"بحر":243,"اقے":283,"ديگ":275,"اني":2788,"انو":1426,"ديک":357,"رق ":406,"باً":193,"اقہ":415,"بان":1448,"بال":507,"اند":1077,"باد":1658,"باز":180,"بار":1042,"انس":905,"امن":202,"انا":720,"انب":309,"امو":288,"انت":736,"بات":454,"امي":1316,"انج":263,"امل":979,"بائ":435,"الے":826,"بتد":219,"اوي":243,"انڈ":193,"انچ":276,"الہ":394,"اون":198,"اول":497,"اوق":178,"اور":10590,"الک":535,"رم ":316,"ذري":244,"اوا":211,"تا ":6076,"حکو":764,"حکم":277,"رحد":298,"ردو":947,"ردي":295,"ردا":370,"رتے":586,"رتا":498,"ايک":5551,"بدا":306,"راچ":302,"ربع":318,"بدي":235,"برا":685,"ربي":855,"ذيل":186,"راہ":513,"برط":276,"برق":294,"بري":212,"رتي":676,"راک":232,"جہا":530,"تر ":480,"ران":1556,"ربا":206,"راع":230,"راف":173,"رام":330,"رال":643,"راب":283,"رائ":562,"راج":355,"رات":605,"راث":218,"رار":318,"راد":611,"راص":190,"راس":245,"ہيں":6226,"بعد":1053,"تح ":170,"ہو۔":237,"جھي":195,"بعض":255,"رآن":223,"بني":578,"بنا":831,"ہوگ":285,"ہوں":722,"بند":385,"ا ":36261,"بلن":294,"ہنچ":177,"بلو":291,"بلي":191,"ہلے":442,"سر ":306,"بلا":185,"ب ":6862,"ء ":3203,"ہوئ":1384,"ہور":1098,"ہوت":2264,"ہوا":926,"ہون":793,"ہلي":355,"ہند":711,"ہمي":171,"ؤ ":175,"زي ":1743,"رطا":394,"ح ":1752,"ھے ":736,"ہلا":565,"خ ":788,"رسٹ":177,"د ":10666,"بوں":222,"بيٹ":246,"ست ":854,"ذ ":232,"بين":440,"بيل":268,"سا ":510,"رست":341,"بيع":188,"ت ":16193,"بيا":321,"بير":253,"رسي":336,"سب ":865,"رجہ":199,"ث ":408,"بول":436,"ج ":1790,"بلک":176,"سم ":677,"دہ ":2099,"تي ":4132,"سل ":210,"اک ":371,"ريہ":436,"تو ":819,"ٹلي":171,"ريک":876,"دھ ":232,"روں":847,"رپ ":187,"ريع":323,"رين":690,"ريل":294,"ريف":299,"ريق":376,"رنے":958,"تم ":196,"ريا":1581,"ريب":696,"ريخ":486,"ريت":171,"ريز":1341,"ريش":173,"رنگ":273,"روف":378,"رون":314,"روم":315,"روع":324,"رور":309,"روز":221,"روس":205,"روا":481,"اپن":1352,"ہزا":285,"ہري":293,"زبا":959,"زار":465,"رند":247,"زاد":421,"ہرا":229,"رمي":582,"رمن":177,"رنا":383,"زائ":318,"رما":229,"اٹل":178,"ہتے":651,"ثر ":284,"؛ ":306,"سط ":176,"رقي":620,"رقب":303,"ہجر":225,"ئے ":2320,"تبد":215,"تان":2396,"تبا":381,"تار":774,"تاب":521,"اچي":318,"دے ":517,"سن ":294,"ہے ":7219,"اں ":1697,"سو ":217,"تحر":360,"شت ":170,"تحا":210,"تحص":253,"سي ":3401,"تدا":329,"اہ ":1042,"تري":474,"ترا":469,"جا ":352,"تصا":242,"جب ":430,"تصو":215,"ترک":411,"اڑي":213,"ستہ":193,"تظا":196,"سري":335,"تعا":188,"سرح":298,"سرا":421,"تعم":1291,"تعل":965,"تعد":362,"دگي":389,"جد ":384,"شن ":295,"زما":353,"سام":409,"سال":748,"سان":757,"زند":314,"جس ":1740,"سائ":617,"زمي":343,"سات":868,"ساب":328,"ستا":2494,"تقا":197,"سجد":245,"تقر":347,"تقس":240,"ستع":1050,"زيا":694,"ستي":192,"ستو":208,"زيد":178,"اے ":199,"زير":703,"تمل":297,"ا۔ ":2384,"صر ":373,"تلف":484,"تما":488,"اۓ ":304,"سلا":945,"تين":302,"سمج":250,"سلي":236,"سمب":188,"سلم":718,"سما":225,"تيس":201,"تيا":780,"سلط":553,"سلس":275,"توا":248,"شي ":269,"سطح":252,"تيں":193,"پر ":4411,"اکي":196,"اکا":303,"اکس":1424,"سرے":373,"توں":404,"صد ":229,"ٹے ":294,"اگر":397,"جن ":581,"بڑا":364,"رک ":436,"بہ ":1879,"اکھ":210,"رہ ":1976,"جي ":406,"شرق":569,"شرو":345,"شري":335,"صل ":1226,"جو ":2871,"بڑي":256,"حت ":262,"ئے۔":600,"بڑے":246,"حد ":400,"ھے۔":835,"سوي":178,"شتر":182,"سوا":232,"سور":248,"شاع":346,"شام":808,"سمن":228,"شائ":191,"سند":323,"شاخ":186,"سمي":191,"ثلا":193,"شخص":263,"ثما":224,"ذکر":233,"اہر":591,"اہد":216,"سين":345,"سيم":308,"شاہ":679,"شتم":303,"سيد":218,"بے ":441,"سيا":755,"ڑا ":500,"ٹھ":533,"پت":249,"پا":3394,"پس":366,"پش":179,"پر":5467,"ٹے":304,"گ ":1702,"ٹک":243,"پڑ":366,"پو":978,"پن":2069,"پي":2662,"پل":260,"ٹي":1294,"ٹو":369,"ٹن":221,"ٹل":297,"ٹر":1769,"ٹا":643,"ک ":9401,"لے":2164,"يں،":396,"مگ":395,"مک":804,"نڈ":840,"وي":2835,"ي،":686,"نچ":605,"وو":294,"يب":1474,"يا":18357,"مہ":1477,"يئ":237,"يع":1465,"يز":2169,"يس":3006,"نگ":3438,"يش":1424,"يص":226,"يخ":630,"يد":2958,"ير":4679,"يت":2515,"يث":342,"يج":682,"نک":1176,"يح":396,"ين":5944,"يو":3744,"وچ":363,"نھ":192,"يق":966,"يم":3458,"نہ":3293,"يل":3575,"يف":992,"وپ":411,"وٹ":898,"وڑ":349,"نے":8036,"وڈ":316,"يٹ":1722,"وک":763,"پ ":1225,"يٰ":198,"وگ":857,"وہ":2577,"يچ":216,"يڈ":589,"وں":7039,"يپ":229,"و۔":276,"يک":8185,"يگ":581,"يہ":6841,"يں":24637,"ي۔":1652,"يۓ":606,"يے":1374,"فع":234,"فض":206,"فظ":975,"فر":2018,"فس":304,"فت":956,"فا":1934,"يہا":454,"قع":1462,"قط":251,"قص":525,"قس":674,"قر":1456,"قد":958,"قت":918,"في":1664,"قب":1358,"فو":716,"قا":2876,"فن":259,"فل":626,"فق":170,"فٹ":222,"قي":2110,"ل،":228,"قل":641,"قو":1321,"لق":994,"لف":1951,"لط":721,"يں۔":3302,"لغ":238,"لع":1546,"لد":859,"لج":430,"لح":953,"لز":212,"لس":945,"لر":265,"لئ":340,"لا":9180,"لت":997,"لب":1083,"مع":2282,"مغ":639,"مص":529,"مض":257,"مط":1001,"مف":327,"مق":1607,"مل":3799,"مت":2835,"لو":3381,"مج":828,"لي":9899,"ن،":378,"مث":441,"لم":3135,"لل":728,"مب":937,"لن":899,"ما":9656,"مز":517,"مر":3546,"مش":2018,"مس":1772,"مخ":947,"مح":1397,"مذ":278,"مد":1699,"نظ":1248,"نع":313,"نل":169,"قہ":970,"نم":856,"نق":463,"نف":515,"ے، ":1325,"نج":1093,"مي":23019,"نت":1838,"مو":3519,"نب":733,"نا":7139,"من":2588,"فہ":638,"مم":631,"نص":409,"نش":418,"نس":2278,"نز":337,"نر":275,"ند":4576,"قے":375,"مپ":203,"وئ":2114,"وا":9079,"نن":295,"نو":3979,"ني":8247,"و،":187,"ٹ ":1461,"لڑ":176,"وغ":354,"نٹ":588,"وع":906,"وق":1147,"وف":901,"ون":4062,"ول":2928,"وم":3387,"لہ":2193,"وت":3089,"وب":2645,"ود":2057,"لک":2192,"وح":206,"وج":2206,"لگ":620,"وس":2774,"وز":895,"ور":18253,"وط":241,"وض":250,"وص":445,"وش":613,"يکھ":326,"ڑ ":321,"يکن":476,"يکي":320,"يگر":286,"چ ":481,"يکہ":301,"ڈ ":843,"خو":1180,"دت":297,"دا":6455,"دب":338,"خي":646,"خل":1333,"خم":176,"تہ":894,"تھ":5631,"خط":526,"خر":474,"خد":295,"خص":603,"دو":4578,"ر،":467,"دي":7480,"دف":241,"دل":705,"دم":634,"ذا":327,"دن":1041,"تے":3362,"دع":168,"دد":552,"در":3958,"دش":266,"دس":695,"جي":1296,"جو":4658,"حت":458,"جن":2503,"حا":2227,"حب":298,"جل":349,"بہ":2825,"جم":1355,"بھ":4084,"ا۔":3031,"اۓ":335,"اے":213,"جس":2866,"جز":533,"جر":1045,"جد":723,"بک":601,"خت":1742,"حي":1318,"تک":1171,"حم":1392,"خا":1967,"خب":324,"حو":294,"حق":534,"حل":497,"حض":792,"حص":974,"بے":458,"حر":1330,"حس":621,"حد":976,"تف":294,"تم":1316,"تل":864,"تق":1058,"تو":2257,"بچ":213,"ثا":431,"تن":759,"تج":398,"تح":1528,"تر":2642,"تخ":631,"تد":422,"اڑ":396,"تش":356,"تص":699,"تس":246,"تظ":210,"تع":3085,"اں":1731,"ثل":246,"جا":8260,"ثم":243,"اہ":3738,"جب":952,"ثي":412,"تي":6367,"اک":3503,"ثر":502,"اگ":766,"بڑ":994,"ئے":3028,"ؤں":259,"بغ":203,"بع":1847,"بن":3272,"بم":183,"بل":2052,"بق":773,"بد":925,"اً":548,"بج":198,"بح":566,"بت":774,"اي":9647,"او":12882,"بط":281,"بص":181,"بز":199,"بس":224,"اُ":281,"بر":3415,"اپ":1892,"اٹ":526,"تا":10999,"اچ":510,"تب":1015,"بو":1546,"ت،":328,"بي":4832,"ئد":211,"ئر":536,"ئش":454,"ا،":402,"اء":804,"اؤ":588,"ئل":407,"ائ":7576,"ئم":430,"ئع":221,"از":2184,"ار":13604,"اد":7175,"اض":766,"اص":2408,"اش":1157,"اس":13319,"ات":12391,"اب":5157,"ئن":668,"اخ":1510,"اح":1946,"اج":1405,"اث":517,"ئي":5602,"اف":2468,"اق":3776,"ام":10852,"با":7594,"ان":23472,"ال":15347,"اع":2513,"اغ":318,"اط":844,"اظ":619,"آپ":794,"آئ":413,"آب":1377,"آت":374,"آج":214,"آخ":195,"آر":259,"آس":262,"آز":333,"آف":182,"آل":275,"آم":177,"آن":518,"آي":281,"عے":301,"ٰ ":195,"عہ":1012,"طہ":234,"غي":910,"غل":246,"عي":1223,"غر":737,"صے":202,"عق":239,"عل":4649,"صہ":487,"عم":2601,"غا":645,"عن":1246,"عو":588,"عث":370,"ظي":492,"عت":818,"عد":2145,"عز":175,"عر":1942,"عظ":634,"عض":358,"عا":2408,"عب":841,"ظم":515,"شہ":2846,"ظا":851,"طن":541,"سہ":287,"طل":743,"سے":11317,"ظر":549,"شک":686,"طي":706,"طو":1493,"ضم":182,"زہ":344,"ضل":1190,"رے":1402,"طر":1375,"ضي":496,"سک":2597,"طح":262,"طا":2021,"طب":814,"ضو":288,"سپ":375,"صل":1848,"صف":401,"صط":357,"سٹ":824,"ضر":887,"صو":2504,"رہ":3575,"صن":434,"ضا":582,"صي":766,"دے":538,"شع":464,"رک":3328,"صح":404,"صد":1147,"صر":962,"رگ":411,"شم":1789,"ذہ":258,"صا":794,"شن":774,"صب":345,"شو":358,"شي":1319,"سع":252,"سط":727,"دگ":457,"سف":448,"دھ":774,"رپ":455,"رٹ":425,"رڈ":213,"سي":6156,"شت":955,"رچ":305,"سو":1823,"شا":3056,"سن":1275,"دہ":2389,"سم":1996,"سل":3485,"شر":1691,"شد":243,"شخ":304,"ذک":274,"سب":1244,"سا":5309,"زن":603,"ست":5770,"زو":460,"زم":950,"زل":234,"سر":2705,"سج":282,"زي":3808,"دک":188,"رس":1755,"رش":298,"رر":201,"رز":482,"جے":273,"رط":461,"رص":252,"رض":357,"رل":427,"رق":1634,"رف":1286,"رو":5304,"زب":1100,"رن":2342,"زا":1886,"رم":1656,"ري":12753,"زر":534,"ذر":554,"جگ":285,"رآ":320,"جھ":599,"جہ":1619,"رب":2493,"را":9267,"رت":4343,"رج":1190,"ذي":412,"رخ":287,"رح":1014,"حک":1119,"رد":2466,"ف ":3475,"ع ":4219,"ڑي ":543,"غ ":230,"ص ":761,"ض ":697,"ط ":839,"ظ ":1429,"ر ":38086,"ز ":2573,"س ":11232,"ش ":1550,"ِ ":847,"ً ":542,"ي ":57056,"ن ":18794,"و ":15262,"ق ":2735,"م ":14225,"ل ":13265,"ينہ":361,"ينے":248,"يوں":1103,"وہ ":2016,"يقي":265,"يلا":274,"يلي":664,"يما":422,"يلو":184,"ينا":221,"نہي":1033,"يمي":391,"نہو":438,"يٹ ":181,"يني":579,"يقہ":243,"وچس":192,"يور":512,"يوا":288,"يون":715,"ينڈ":257,"وں ":6861,"ہے":17450,"يٰ ":189,"وٹي":197,"يعے":209,"وگ ":226,"يرہ":738,"يشي":298,"يشن":306,"يسي":670,"نگي":216,"نگل":243,"يسو":203,"يسر":205,"ے،":1342,"نگر":1405,"يزي":1326,"نگا":269,"يسا":524,"يرو":314,"يري":407,"يعن":649,"يسے":586,"۔ا":238,"يثي":187,"يا۔":1172,"نکا":269,"ے۔":10861,"يتي":187,"ياں":353,"يرا":565,"نکہ":250,"يدا":1270,"نے ":7909,"يال":283,"يان":856,"يبا":352,"يام":192,"ياس":748,"يار":881,"ياد":1222,"ياض":232,"ياء":241,"يات":2312,"ياب":173,"يائ":508,"کچ":345,"کٹ":398,"کي":17718,"ي۔ ":1247,"کس":3186,"کش":375,"کر":6688,"کز":505,"کث":328,"کت":1905,"کو":8220,"کن":1311,"کم":1336,"کل":1933,"کئ":359,"کب":357,"کا":13105,"يۓ ":585,"يے ":1282,"گہ":320,"گھ":468,"گل":520,"گن":309,"گو":1092,"گي":2325,"گز":223,"گر":3536,"گئ":1042,"گا":1385,"کے":18417,"کھ":2829,"کہ":6908,"ں،":524,"گے":188,"يہ ":6011,"يڈي":227,"ھے":1744,"ہز":291,"ہر":3373,"ہج":298,"ہت":1501,"ہد":644,"ہا":4675,"ہب":255,"ہي":8193,"ہل":1738,"ہم":1207,"ہن":1510,"ہو":9694,"ں۔":3376,"ھم":206,"ھل":258,"ھي":5556,"ھو":1393,"ھن":692,"ہ،":328,"ھر":965,"يں ":20810,"ھا":3850,"ھت":572,"ں ":29650,"ڈر":227,"چي":975,"چن":411,"چو":627,"وہا":172,"ڈا":533,"چک":371,"چہ":230,"چھ":1116,"ڈو":318,"ڈي":856,"پھ":684,"پہ":1407,"چس":237,"چا":849,"چت":269,"چل":282,"ڑا":619,"ۃ ":184,"ۂ ":441,"ڑے ":367,"يٹر":982,"ڑي":670,"يک ":5968,"ھ ":2275,"چے":173,"ہ ":29676,"وگو":199,"ڑک":206,"ڑھ":467,"ڑے":384,"ے ":59880,"ۓ ":1150,"۔ ":15851,"و۔ ":200,"وز ":174,"ور ":14043,"ود ":1161,"ڈي ":274,"لگ ":206,"وس ":423,"چين":228,"نما":532,"وع ":503,"نيا":1482,"نوي":380,"نون":235,"نور":347,"نوب":664,"نوا":373,"نٹ ":234,"وف ":378,"نيہ":422,"نيو":458,"نوں":1111,"وم ":918,"لہ ":1961,"ون ":1017,"چند":174,"ول ":1088,"نڈ ":285,"وي ":1068,"ي، ":669,"نچ ":177,"مغر":519,"معل":267,"معن":336,"معر":258,"مشہ":684,"معا":596,"چست":201,"مقا":854,"مقد":219,"چان":177,"ملت":253,"ملا":357,"چتر":233,"قے ":368,"منا":208,"نائ":404,"مند":339,"نات":285,"منت":213,"نار":304,"مما":299,"ملي":227,"موا":174,"ملک":619,"موج":660,"مور":225,"موس":304,"موع":240,"نام":2044,"نان":513,"ناي":217,"نتق":217,"نتظ":204,"نتخ":209,"مون":289,"موم":337,"مول":250,"ميل":269,"ميد":233,"مير":641,"ميا":812,"ميت":203,"نتي":206,"نجا":562,"مين":762,"ميٹ":952,"موں":292,"ندا":677,"ميں":16302,"ندو":448,"ندر":713,"ندي":569,"چار":237,"نسا":448,"چي ":376,"وا ":736,"ندگ":273,"ندہ":249,"نسي":397,"نسل":289,"ندھ":311,"وب ":577,"وت ":377,"نظا":416,"نظر":493,"وج ":244,"نظي":172,"لک ":878,"ونا":319,"ومي":882,"ونس":207,"وما":454,"ولي":616,"ولو":168,"ومت":782,"يع ":214,"ولا":396,"وقت":472,"وفا":282,"يش ":423,"يس ":618,"نگ ":881,"ويں":364,"يق ":261,"يف ":611,"مکم":188,"ونے":813,"نڈي":216,"چہ ":203,"ونک":263,"چھ ":368,"وني":567,"وٹ ":170,"ونو":241,"وڈ ":203,"يو ":296,"نہ ":1535,"يم ":2131,"ين ":3424,"مگر":341,"يل ":1806,"لے ":2120,"وئے":873,"واں":205,"وتي":759,"وتا":1184,"وبہ":880,"وجي":169,"وجو":858,"وا۔":228,"وار":699,"واز":279,"واد":234,"واج":227,"وئي":1076,"واب":205,"وائ":356,"وبي":420,"واي":171,"واق":1340,"وال":2546,"وان":598,"وبا":263,"وام":369,"وري":875,"مہ ":911,"وست":275,"لگا":240,"وزي":317,"يا ":8493,"وسر":713,"وسط":295,"ورپ":286,"ودہ":176,"وسي":366,"يب ":717,"ورہ":195,"لکي":197,"وتے":500,"ودي":279,"ورا":750,"وجہ":615,"ورس":204,"ورت":506,"لکہ":196,"ورن":218,"لکھ":453,"وغي":292,"يد ":1164,"ير ":2092,"يز ":500,"يت ":1838,"يج ":176,"يح ":219,"يخ ":423,"لد ":289,"قصب":216,"لت ":358,"لا ":1251,"قسم":285,"لب ":395,"قسي":239,"�":423,"لق ":599,"لف ":570,"چھو":396,"قوں":260,"ما ":498,"لم ":1252,"لع ":962,"قيق":283,"قيا":359,"قوم":305,"قوا":311,"قل ":209,"فرا":633,"فرو":181,"فري":361,"قي ":950,"ل، ":225,"فيص":180,"قبہ":284,"قدر":190,"قدي":366,"قري":696,"قرآ":222,"قرا":218,"قال":197,"قائ":507,"قاب":316,"قات":245,"فلم":218,"فوج":255,"قبو":176,"قبل":223,"قان":205,"قبا":188,"قام":703,"فٹ ":202,"لما":635,"ماع":315,"لمي":331,"مات":656,"مار":1346,"ماد":258,"لند":373,"مائ":379,"لفظ":904,"لفا":303,"نس ":426,"لعہ":239,"ند ":917,"لطا":204,"مي ":1920,"لطن":355,"مطل":234,"ني ":4359,"و، ":185,"مطا":719,"مصن":197,"مصر":194,"مرک":583,"نو ":281,"مذہ":183,"مشر":609,"مسل":679,"مشت":387,"مست":254,"مسج":242,"مسا":197,"قہ ":928,"مري":646,"مجھ":254,"مرا":743,"مرب":331,"مرت":186,"مدي":169,"ليے":1014,"ليۓ":470,"مدد":185,"ليک":538,"ليہ":571,"مخت":598,"لوگ":423,"محم":603,"لوں":448,"لوچ":226,"لين":425,"مجم":256,"لنے":191,"ليت":222,"متي":175,"ليا":836,"ماہ":260,"ليم":656,"مثل":227,"لوي":317,"لوم":894,"للہ":672,"متح":211,"متع":399,"مال":2414,"مام":560,"مان":1742,"مبا":175,"ماي":231,"مبر":417,"مر ":315,"مد ":873,"لو ":198,"ے۔ ":8070,"مت ":1234,"لي ":3595,"ن، ":362,"لسل":386,"نب ":260,"نا ":1656,"من ":398,"فہ ":313,"نت ":572,"لدي":343,"لحک":303,"مل ":1925,"لتي":176,"لاک":279,"لئے":321,"لاہ":228,"لتا":182,"لحا":247,"لاح":555,"لاد":170,"لاز":192,"لائ":397,"لات":751,"لاق":1241,"لاف":367,"مع ":244,"لاو":376,"لام":1294,"لان":427,"لبا":312},"n_words":[1602570,1999510,1324903],"name":"ur"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/vi b/nlp_resource_data/langdetect/profiles/vi

new file mode 100755 (executable)

index 0000000..10c3d89
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/vi
@@ -0,0 +1 @@
+{"freq":{"D":18934,"E":10094,"F":9985,"G":23207,"A":42579,"B":48257,"C":72224,"L":45665,"M":47574,"N":70917,"O":10000,"H":57302,"I":15743,"J":5371,"K":22108,"U":5400,"T":104616,"W":8071,"V":30021,"Q":14744,"P":61692,"S":43824,"R":17604,"Y":3509,"X":6207,"Z":3364,"f":32992,"g":525146,"d":205922,"e":428748,"b":182777,"c":671453,"a":692878,"n":1382200,"o":437498,"l":442210,"m":420259,"j":4762,"k":100510,"h":937660,"i":759263,"w":22119,"v":178461,"u":410719,"t":952446,"s":286409,"r":444790,"q":33677,"p":174320,"z":16996,"y":192811,"x":29082,"²":689,"Î":330,"É":501,"Á":3556,"Â":2373,"Ý":1288,"ß":648,"Ú":1274,"Ô":1751,"í":49339,"ì":32480,"ê":66073,"é":38397,"è":3862,"ç":233,"ä":808,"ã":19808,"â":112923,"á":116051,"à":337783,"ü":3187,"ý":5197,"ú":10002,"ù":30808,"ö":2319,"ô":98885,"õ":1117,"ò":13544,"ó":78081,"ñ":390,"đ":198318,"Đ":41649,"ă":37793,"ā":557,"ĩ":9562,"ī":249,"ō":772,"ũ":5403,"ū":650,"ư":147894,"ơ":25607,"́":204,"ο":236,"ι":146,"λ":139,"α":217,"ς":203,"ρ":170," l":242575,"ь":159," m":173509," n":244497,"я":194," o":12009," h":134731," i":13655," j":644,"ы":142," k":76050," d":93354," e":7007," f":8319," g":49741,"ч":190,"р":638,"с":522," a":22488," b":142456,"т":441,"у":252," c":262807," y":1958," x":17955," z":365," u":1259," t":566783," w":4693," v":156401," q":28586," p":59115," s":94989," r":14869," J":5319," K":22031," H":57162," I":15633," N":70771," O":9935," L":45508," M":47408," B":48077," C":71972," A":42454,"С":156," F":9904," G":23091," D":18825," E":10050,"л":506,"к":551," Z":3238,"й":211," Y":3492,"и":885," X":6141,"о":889,"н":659,"м":235,"г":168," S":43565,"в":508," R":17493," Q":14713,"б":149,"а":1192," P":61533," W":8016," V":29925," U":5377,"е":792,"д":263," T":104253," á":1499," â":3662," í":541," ô":2220," ý":503," ă":627," Đ":41607," đ":198156," Â":2373," Á":3556," É":500," Î":330," Ô":1748," Ú":1274," Ý":1277," ư":1144,"ي":271,"ل":264,"م":187,"ن":142,"ا":398,"ر":178,"A ":2159,"F ":494,"Da":2569,"Cu":1591,"Cy":963,"Cl":1245,"Co":10944,"Cr":1746,"Ce":2576,"Ch":19039,"Ci":1005,"G ":637,"Ec":908,"Ed":361,"Ea":530,"Du":1267,"Dy":152,"Do":2470,"Dr":1247,"De":2217,"Di":2894,"Bà":792,"Bá":858,"Fe":902,"H ":455,"Fa":1831,"Eu":2126,"Ex":464,"Er":617,"Et":367,"Es":894,"En":771,"Em":460,"Ep":314,"Ei":335,"El":872,"Cá":2732,"Ge":2583,"Câ":290,"Cà":157,"Ga":3052,"I ":2174,"Fu":749,"Fr":2327,"Bí":387,"Bì":1773,"Fo":1217,"Bé":191,"Fl":1104,"Fi":1024,"B ":642," С":156,"C ":1227,"Av":428,"Au":2997,"Ar":5387,"Aq":1658,"At":1067,"As":1434,"D ":745,"Ba":13610,"Az":1190,"Ay":139,"Ae":265,"Af":962,"Ag":750,"Ah":192,"Ab":577,"Ac":1193,"Ad":611,"Am":2029,"An":10483,"Ap":626,"Ai":1117,"Ak":164,"Al":6955,"By":154,"Bu":3220,"Br":4012,"Ca":14082,"E ":3496,"Bh":176,"Bi":2488,"Be":3517,"Bo":4388,"Bl":876,"Gò":247,"Ku":658,"Gö":233,"Ky":391,"Kn":144,"Kl":231,"Kr":781,"Ko":952,"Hã":340,"Hà":3871,"Há":6912,"Le":5532,"Li":5716,"N ":1502,"La":11255,"Lu":1591,"Hó":455,"Hò":671,"Ly":614,"Bư":341,"Hé":321,"Lo":11096,"Hì":160,"Me":5622,"Mi":7578,"Cơ":380,"O ":859,"Ma":14564,"Cư":440,"My":979,"Mu":2726,"Mo":6001,"Nh":15252,"Ni":3602,"Ng":8870,"Ne":5246,"Na":14732,"P ":1136,"Dư":2196,"ưu ":1732,"Ny":327,"Nu":290,"No":7260,"Ok":174,"Ol":595,"Om":201,"On":483,"Oh":203,"Oi":748,"Oc":485,"Od":183,"Oe":270,"Oa":270,"Ob":773,"Gi":6635,"Gh":326,"Gl":496,"Gr":4171,"Go":1400,"Gu":2462,"Gy":286,"Cô":3480,"Có":402,"Cú":325,"J ":219,"Ha":7544,"Dâ":1283,"He":2483,"Hi":2709,"Ho":13976,"Hu":4398,"Hy":1927,"Dô":340,"K ":324,"Ib":265,"Id":176,"Ic":220,"Im":190,"In":3623,"Il":428,"Is":1691,"It":2092,"Ir":1003,"Ja":1797,"L ":642,"Ji":230,"Je":710,"Jo":1104,"Ju":1037,"Ka":2594,"M ":626,"Kh":4970,"ưng":2590,"Ki":2990,"Ke":1180,"Ut":638,"Ur":404,"Um":166,"Un":919,"Uk":408,"Ul":140,"Ug":456,"W ":203,"Ty":201,"Tw":155,"Tu":2972,"Tr":21381,"To":2690,"Th":42787,"Ti":3875,"Te":2952,"Ta":3822,"V ":758,"Sw":579,"Sy":613,"St":3687,"Su":1921,"Wo":921,"Wi":1647,"Wh":297,"Sé":358,"Sè":329,"Wa":2024,"Sâ":534,"Sá":354,"We":2017,"Sà":247,"Y ":322,"Lư":806,"Vo":1915,"Vu":318,"Vi":16219,"Ré":208,"X ":401,"Va":2214,"Ve":2267,"Uy":394,"Lă":215,"Mã":1030,"Má":214,"Lý":664,"Lü":151,"Mé":245,"Pt":637,"Pu":1475,"Pr":3920,"Ps":557,"S ":1842,"Py":3299,"Pe":2603,"Là":800,"Lã":279,"Pf":1720,"Lâ":833,"Pa":7810,"Lé":142,"Pl":1384,"Po":3931,"Lê":927,"Pi":2275,"Ph":31098,"Os":655,"Ot":301,"Ou":271,"Ov":238," ا":171,"Op":394,"Or":2938,"R ":429,"Kô":237,"Se":3444,"Sc":3122,"Si":2745,"Nă":720,"Sh":1345,"Sm":208,"Sl":522,"Sk":307,"Sr":367,"Sp":2000,"So":3317,"Ru":1170,"Nô":330,"Ry":155,"Nó":11015,"U ":337,"Hư":909,"Sa":11442,"Re":2085,"Ri":2194,"Rh":4441,"Ro":3835,"Qu":14208,"Mô":839,"T ":649,"Mù":252,"Ra":1904,"Mü":183,"Sơ":2030,"Sư":226,"Yê":502,"Xã":2079,"Wü":541,"Xô":549,"ưa ":1620,"Xí":179,"Vĩ":456,"b ":1444,"a ":226759,"Tư":1027,"Vũ":589,"Tù":160,"Tú":283,"Tô":962,"Xy":153,"Nư":140,"Ye":349,"Ya":435,"Yp":239,"Yo":874,"Sĩ":507,"Yv":187,"Yu":292,"Mư":225,"Só":240,"Sô":413,"Tâ":9624,"Tà":542,"Xe":229,"Tá":302,"Sü":308,"Xa":216,"Tê":676,"Xi":420,"Tò":164,"Xu":1093,"Tí":259,"Vù":202,"Vă":1172,"Za":1272,"Và":236,"Ze":739,"Zh":185,"Zi":458,"Vâ":484,"Tĩ":188,"Zu":199,"Võ":191,"i ":212782,"bó":2113,"cà":270,"gd":295,"cá":22579,"ge":11280,"câ":2231,"bú":210,"ga":12467,"bé":224,"fl":1616,"bã":190,"fg":215,"ff":1242,"fi":2497,"fs":160,"fr":3324,"fu":1164,"ft":684,"fo":4267,"bê":1228,"bí":976,"bì":10910,"j ":155,"cù":1871,"cú":386,"gy":681,"có":47444,"gw":161,"cô":5046,"dâ":28874,"dã":502,"dà":2537,"he":41577,"hb":191,"ha":43156,"gn":5754,"gm":577,"gl":1738,"gk":293,"gi":33763,"bă":263,"gh":11779,"gg":570,"cò":3292,"gu":10662,"gt":510,"gs":1016,"gr":2654,"go":8645,"dt":805,"du":4489,"dw":715,"dy":507,"g ":364697,"ea":15752,"eb":3649,"ec":8564,"ed":11541,"de":24490,"dd":556,"dg":298,"df":167,"di":32181,"dh":710,"dk":145,"dm":293,"dl":1043,"do":14691,"dn":264,"ds":1755,"dr":3533,"ew":3323,"ex":3581,"eu":6762,"ev":2827,"ey":3123,"ez":1216,"fa":5205,"h ":178273,"bà":2398,"bá":2351,"fe":2871,"eh":679,"eg":4460,"ef":1217,"ee":4546,"el":24442,"ek":799,"ej":311,"ei":13045,"ep":4148,"eo":14079,"en":49087,"em":10013,"et":12592,"es":40569,"er":51401,"eq":167,"ca":30360,"Xư":246,"e ":134279,"bw":303,"by":1138,"bs":628,"br":3660,"bu":6071,"bt":170,"bn":144,"bo":4357,"bl":1961,"bf":332,"bh":146,"bi":46026,"bb":920,"be":11159,"db":293,"da":38163,"f ":9188,"cy":1022,"cu":9388,"ct":7017,"cs":417,"cq":226,"cr":3098,"co":14606,"cm":248,"cn":200,"ck":4238,"cl":1934,"ci":14195,"ch":129179,"ce":13915,"cc":1731,"Vư":1101,"c ":282171,"az":1913,"ay":20261,"ba":27162,"d ":28581,"at":21123,"as":21308,"ar":46226,"aq":534,"ax":1123,"aw":1934,"av":3839,"au":17184,"ak":3208,"al":35399,"ai":30021,"aj":1274,"ao":21790,"ap":5292,"am":43172,"an":108096,"ac":18572,"ad":10374,"aa":985,"ab":4752,"ag":8446,"ah":2616,"ae":34413,"af":1148,"nu":5357,"nt":26825,"ns":10879,"nr":434,"nq":250,"np":246,"no":9747,"hĩ":3597,"nn":7993,"q ":277,"nz":1727,"dư":2566,"ny":2595,"nx":314,"nw":217,"nv":655,"oe":1614,"of":8008,"oc":9998,"od":5406,"oa":17234,"ob":3593,"ké":466,"om":14342,"kê":1778,"on":100425,"ok":1733,"kè":139,"ol":16267,"oi":9431,"oj":178,"og":4447,"oh":1218,"ot":8950,"m²":659,"hō":191,"os":13276,"ov":4515,"ou":16281,"kì":215,"op":11878,"oo":2896,"or":34485,"oq":166,"gũ":395,"kí":1225,"r ":23753,"ox":666,"ow":4522,"oz":1526,"oy":925,"là":155485,"lá":810,"pe":13642,"pf":220,"lâ":707,"lã":1014,"pa":8707,"ký":795,"lè":430,"pl":2303,"lé":393,"lê":819,"po":6064,"ph":59995,"pi":7555,"lo":48870,"ln":332,"hê":913,"lm":1499,"hé":1262,"hè":446,"ll":28312,"ls":5922,"hí":22956,"lp":3661,"hì":4519,"hó":4714,"lw":160,"hò":5943,"lv":1420,"lu":8343,"lt":3135,"bư":9431,"lz":2455,"hö":169,"ly":7272,"hô":9522,"hú":4659,"hù":1344,"o ":85116,"iß":177,"ià":688,"hü":788,"ma":19960,"mb":6117,"mg":163,"hă":499,"me":15863,"iá":6621,"cơ":2439,"iè":809,"ml":141,"mi":19185,"mn":579,"iê":19338,"mm":3914,"ié":147,"mp":6181,"mo":8059,"mt":201,"ms":1171,"mu":3841,"iô":221,"ió":194,"cư":776,"my":849,"p ":57766,"iú":305,"na":36440,"nb":2672,"nc":9194,"nd":29108,"ne":33625,"nf":985,"ng":422545,"nh":179739,"ni":21672,"nj":602,"nk":2141,"nl":3093,"nm":685,"ju":396,"jo":698,"ki":21387,"kh":39807,"gã":188,"gâ":871,"ke":3413,"gá":300,"gà":10665,"ka":4069,"m ":167316,"gó":565,"gô":2554,"ky":750,"ks":1201,"kt":384,"ku":945,"ko":1602,"cũ":2937,"kr":861,"kk":293,"kl":1148,"km":4687,"kn":974,"li":29575,"lh":510,"lk":1201,"le":28117,"há":50648,"hà":29720,"ld":3791,"hã":1199,"lg":997,"hâ":38191,"lf":811,"la":39723,"lc":719,"lb":2760,"n ":440934,"hr":3391,"hs":2273,"dò":649,"hw":1075,"ht":2057,"hu":96505,"hk":147,"hh":235,"că":337,"hi":59002,"hn":1134,"ho":33760,"hl":2514,"hm":909,"dé":1520,"id":34385,"ic":24617,"ib":3478,"ia":42159,"ih":435,"ig":7165,"if":3529,"ie":18418,"hy":2981,"dù":2108,"k ":5941,"iq":1214,"ir":10741,"is":30398,"it":19216,"iu":2880,"iv":3060,"iw":152,"eó":2057,"ix":1054,"ii":6395,"ij":559,"ik":1883,"il":38718,"im":11936,"in":70002,"io":10270,"ip":6214,"je":426,"ji":826,"iz":1625,"iy":348,"l ":17786,"ja":1818,"nơ":1994,"să":488,"xi":2236,"tê":7401,"xo":824,"té":201,"tì":8961,"xp":151,"tí":18614,"tò":332,"xt":572,"xu":3759,"mư":601,"sô":2011,"ww":348,"só":466,"z ":4461,"xc":234,"xa":2141,"tâ":11580,"tá":3122,"xe":1418,"tà":3376,"wh":1250,"ră":636,"wi":4079,"sè":470,"wl":199,"sé":203,"wn":1808,"sê":269,"wo":1048,"ws":516,"wt":183,"rò":801,"rõ":266,"rô":669,"lư":3756,"rö":667,"rù":551,"y ":123996,"rú":838,"rü":313,"wa":4838,"sâ":1032,"sá":4087,"we":3258,"rè":166,"ré":3552,"vi":14749,"râ":232,"rã":310,"vu":13359,"vr":721,"rì":2865,"rí":1219,"rê":17007,"vo":1414,"uz":955,"uy":40068,"ux":1932,"uw":188,"uv":1489,"ve":8349,"rá":1135,"rà":746,"va":5946,"x ":5085,"ui":7198,"uj":271,"uk":987,"ul":11719,"ue":8283,"uf":658,"ug":2145,"uh":313,"mũ":152,"ur":22852,"us":22737,"ut":11022,"um":8525,"un":37448,"uo":379,"up":2448,"ty":3384,"tz":1632,"tu":10835,"tt":6267,"tw":506,"tv":273,"ub":3534,"ua":12317,"ud":3186,"uc":4195,"w ":3929,"to":19941,"tn":373,"tm":750,"tl":2439,"ts":3488,"tr":168490,"lũ":299,"tp":612,"tg":165,"tf":340,"te":34125,"ti":45992,"th":244395,"v ":848,"tb":456,"tc":539,"ta":22518,"su":6738,"sv":185,"ss":11481,"st":25304,"sy":900,"sw":1422,"sl":2460,"sk":1966,"sn":3570,"sm":2007,"sp":6570,"so":7204,"sr":482,"sq":267,"oà":40273,"sd":636,"oß":224,"sc":6190,"sf":434,"se":16078,"oá":2469,"nă":29159,"sh":6976,"sg":779,"oã":161,"si":19668,"hư":18279,"rz":821,"u ":92839,"nú":1257,"sa":12158,"sb":1450,"rr":9518,"rs":7765,"rt":11056,"ru":33603,"rv":1083,"rw":648,"nó":3472,"nô":498,"ry":4815,"rq":206,"rp":2369,"lĩ":607,"ro":79309,"nê":766,"rn":10149,"né":3611,"rm":6127,"rl":2540,"nç":154,"rk":3126,"hơ":3447,"ri":45411,"rh":1015,"rg":11132,"nâ":296,"rf":2041,"re":31859,"rd":9664,"nà":32104,"า":167,"rc":4893,"rb":3024,"ra":48559,"mù":496,"t ":240212,"gư":23033,"mó":270,"mô":1896,"qu":33189,"ร":145,"mé":23563,"mì":570,"má":2261,"mã":976,"lý":2656,"mà":2647,"s ":83922,"lú":457,"lô":11642,"px":1137,"py":386,"pt":4331,"pu":3257,"lò":168,"pp":2831,"lí":1363,"pr":2803,"ps":2406,"hū":297,"vư":1017,"zè":171,"vũ":680,"tư":4765,"sơ":283,"yê":5321,"sư":822,"xá":866,"xâ":942,"xã":6617,"rư":7067,"xé":140,"vĩ":1312,"xí":212,"vă":2376,"vù":22603,"zz":263,"vâ":231,"zh":389,"zi":1581,"uý":467,"zb":261,"và":46943,"ze":2177,"za":3062,"yz":169,"vò":489,"võ":378,"vô":1031,"zu":1605,"zo":1512,"zn":191,"ví":191,"vì":1173,"zl":195,"yg":495,"yh":272,"tă":382,"ye":2354,"uá":1249,"uâ":5412,"yf":154,"yc":1911,"yd":753,"ya":3754,"yb":311,"tú":167,"tù":418,"nư":35853,"tô":1034,"xy":294,"yx":252,"uô":12078,"yu":633,"yt":872,"ys":3871,"yr":5371,"yp":2222,"sĩ":1785,"yo":1215,"yn":1357,"uê":505,"ym":1622,"ué":319,"yl":3067,"yi":713,"xư":530,"yū":142,"² ":668,"Á ":1673,"Áv":245,"Áo":1420,"Âu":2129,"Âm":150,"Îl":330,"àn":34510,"ào":9571,"àm":2558,"ài":40656,"ã ":15139,"ày":41288,"àu":3190,"ám":1101,"án":28484,"áo":4300,"áp":27542,"ái":6121,"ác":31049,"áy":2255,"áu":552,"át":6111,"âm":6839,"ân":71359,"âu":6947,"ât":496,"ây":27219,"ãn":2862,"ão":485,"ãi":672,"ãy":559,"à ":205928,"á ":8250,"ße":294,"Úc":1243,"Ý ":1285,"Ôn":1610,"アアア":304,"ôi":2574,"ôm":11781,"ôn":48478,"óa":2709,"õ ":900,"ói":993,"óc":731,"óp":293,"ón":6670,"óm":1531,"ô ":34032,"òa":5612,"òm":318,"òn":6337,"ó ":64834,"ña":207,"ò ":1069,"ín":13049,"ít":636,"ìn":20626,"ìm":8528,"íc":17864,"ía":12239,"í ":5056,"ên":48870,"êm":9168,"êu":3630,"éz":154,"ì ":3075,"él":169,"éo":764,"ép":2326,"ém":434,"én":3441,"és":341,"ét":23592,"ér":825,"év":213,"éb":181,"éd":152,"éc":499,"ée":3394,"ég":181,"èn":257,"èo":280,"èr":1433,"ès":603,"èv":444,"ê ":4050,"é ":1396,"Đưể":826,"è ":371,"är":168,"ăm":28997,"ăn":8563,"ăk":142,"đưể":28774,"ān":253,"ý ":5155,"ể":1508247,"ün":249,"ür":1564,"üt":177,"üc":207,"üd":322,"ùn":27807,"ùi":235,"ùa":1067,"úp":776,"ún":1686,"úy":219,"út":637,"úa":606,"ùy":248,"úi":1359,"úc":3127,"ù ":1286,"ú ":1439,"ôt":1632,"öt":180,"ör":366,"ös":634,"ön":312,"öl":155,"đĩ":597,"đó":3828,"đô":27703,"đú":182,"đă":220,"Đư":848," ể":80643,"đư":29695,"đơ":1908,"tưể":2647,"Đa":443,"Đo":196,"Đi":1420,"đe":478,"đa":1708,"đo":2470,"đi":17430,"Đ ":295,"Đì":322,"Đà":1638,"Đá":165,"Đâ":3555,"đí":392,"đì":460,"đê":8064,"đã":4348,"Đă":188,"đá":3730,"đâ":1604,"đà":1035,"Đô":12692,"đu":489,"Cể":8674,"Dể":759,"Bể":9876,"Hể":9168,"Gể":170,"Lể":3634,"Kể":5925,"ĩ ":4602,"ĩa":3526,"ĩn":1420,"ĩnh":1417,"dể":12219,"ĩa ":3523,"cể":67050,"bể":45726,"mể":131131,"ũ ":2155,"lể":23218,"kể":7585,"iể":166241,"hể":264262,"gể":17545,"Sể":1192,"Tể":7402,"Rể":525,"ō ":339,"Mể":5140,"vưể":475,"Nể":1896,"Xể":142,"Vể":1294,"rể":50434,"sể":54959,"tể":83275,"uể":92128,"nể":21381,"ū ":399,"oể":14719,"ũi":200,"ũn":2955,"vể":53705,"专专 ":169,"xể":2191,"yể":31245,"ơi":3100,"ơm":181,"ơn":18072,"ơ ":4182,"あ":270,"ア":496,"가가 ":224,"ươ":12314,"ư ":8494,"ưa":1622,"ưn":2592,"ưu":1733,"Để":19679,"để":91689,"Đan":304,"đa ":614,"ơm ":179,"ơn ":5934,"đai":310,"đan":664,"ơi ":3096,"đi ":743,"ơng":12128,"đen":294,"đo ":419,"đoà":1003,"đua":287,"乙":857,"乘":179,"之":2791,"丹":642,"临":665,"中":142,"並":1114,"丙":309,"丘":935,"丛":252,"专":2269,"且":143,"丈":196,"三":4224,"丁":2034,"万":989,"亞":563,"亂":327,"侏":215,"ểy ":15070,"ểu ":43780,"ểt ":179200,"Đây":3553,"Đào":412,"Đài":765,"Đà ":277,"Đôn":3441,"圓":161,"Đô ":9162,"Đìn":322,"冲":171,"đêm":8016," 丘":335,"đíc":372," 专":413," 三":1027," 丁":795,"đìn":459,"đây":1563,"đá ":2316,"倉":569,"đán":1131,"đã ":4325,"đào":392,"đài":257,"đàn":318,"đún":140," 倉":244,"ưể":121112," 侏":158,"đón":976,"đôi":461,"đôn":8728," 並":342," 临":320," 丹":351," 之":799," 乙":411,"đó ":2805,"đô ":18495," 亂":152,"大":197,"ểa ":61642,"ểc ":245982,"ểch":9163,"ểi ":112695,"ểk ":174,"ển ":203101,"ểm ":59200,"ểp ":25010,"ểng":124564,"ểnh":41635,"ểo ":9528,"Điể":1233,"ああ":156,"điể":16552,"đoể":920,"đăn":218,"đĩa":596,"ς ":202,"đơn":1904," ểc":15788," ển":5390," ểm":264," ểy":799,"アア":391," ể ":57997,"đươ":308,"đưa":612,"ск":150," vư":1017," xư":529," Áo":1420," Áv":245," Á ":1673," Ga":3043," Câ":289," Cá":2724," Ge":2570," Cà":157," I ":489," Bì":1773," Bí":387," Fo":1209," Fu":749," Fr":2319," Fi":1014," Fl":1075," Bé":190," Ha":7534," He":2474," Dâ":1282," Cô":3477," Có":398," Gy":285," Cú":325," J ":162," Go":1392," Gr":4156," Gu":2454," Gh":322," Gi":6622," Gl":488," Id":176," Ic":219," Ib":265," Hy":1924," Dô":340," Hu":4380," Ho":13959," Hi":2699," Ji":227," Je":694," L ":288," Ja":1791," Ir":1002," Is":1688," It":2091," Im":186," In":3565," Il":425," M ":229," Ka":2587," Ke":1169," Ki":2975," Kh":4951," Jo":1090," Ju":1029," N ":147," La":11244," Hà":3868," Há":6905," Le":5519," Hã":339," Li":5628," Kl":231," Kn":141," Ko":946," Kr":777," Gò":247," Ku":656," Gö":233," Ky":391," Ma":14506," O ":139," Cơ":380," Mi":7555," Me":5610," Hì":157," Lo":11086," Hé":321," Bư":341," Ly":614," Hó":455," Hò":670," Lu":1590," Ne":5220,"а ":296," P ":285," Na":14703," Ng":8844," Nh":15231," Ni":3593," Mo":5991," Cư":440," My":978," Mu":2713," A ":801," B ":314," C ":460," Ap":624," Am":2023," An":10467," Ak":163," Al":6944," Ai":1106," Ag":749," Ah":189," Ae":262," Af":957," Ac":1189," Ad":608," Ab":573," Ba":13586," D ":281," Az":1190," Ay":139," Av":424," Au":2992," At":1064," As":1419," Ar":5374," Aq":1657," Be":3504," Bi":2420," Bh":176," Bl":874," Bo":4376," Br":4006," Bu":3207," By":153," E ":175," Ca":14032," Ce":2572," Ci":1003," Ch":19001," Cl":1229," Cr":1732," Co":10896," Cu":1575," Cy":959," F ":217," Da":2548," Di":2876," De":2206," Dr":1245," Do":2436," Dy":152," Du":1262," Ea":529," Ec":908," Ed":358," G ":173," El":867," Ei":335," Et":365," Es":889," Er":614," Ep":314," En":763," Em":454," Ex":456," Eu":2122," Bà":785," Bá":846," Fe":894," Fa":1822," H ":178," Xu":1090," Tò":163," Tí":259," Tê":651," Xi":413," Tà":542," Xe":204," Tá":302," Tâ":9623," Sü":308," Xa":216," Só":240," Sô":411," Mư":225," Wo":905," Sé":358," Sè":329," Wi":1632," Wh":294," Sá":350," We":2007," Sâ":533," Sà":247," Wa":2017,"й ":151," Y ":281," Lư":805," Võ":190," Tĩ":188," Và":232," Ze":737," Vâ":484," Zh":184," Zi":450," Za":1260," Yv":187," Yu":290," Yp":238," Yo":868," Sĩ":506," Ya":432," Ye":349," Nư":140," Tô":962," Xy":153," Tú":282," Tù":160," Xí":179," Vĩ":455," Xô":549," Wü":541," Xã":2079," Vù":202," Vă":1148," Vũ":587," Tư":1025," a ":4962," Yê":501," Sư":226," Sơ":2029," R ":199," Kô":237," Ou":268," Ov":236," Os":655," Ot":299," Or":2938," Op":391," Po":3911," Lê":922," Lé":142," Pl":1371," Pi":2271," Ph":31029," Lã":279," Pf":1720," Lâ":831," Pe":2592," Là":800," Pa":7774," Dư":2196," Ny":327," Nu":290," No":7248," Ol":594," Ok":173," On":479," Om":200," Oh":202," Oi":748," Od":181," Oc":483," Oe":269," Ob":773," Oa":270," Ra":1894," Mü":182," T ":156," Mù":252," Mô":838," Qu":14182," Ro":3827," Re":2068," Ri":2185," Rh":4439," Py":3297," S ":230," Pr":3910," Ps":554," Pt":637," Pu":1471," Mé":244," Lý":663," Lü":151," Má":214," Lă":214," Mã":1026," Sy":607," Sw":578," Su":1915," St":3635," Ta":3810," V ":169," Th":42680," Ti":3850," Te":2931," Tr":21258," To":2625," Nó":11014," Ry":154," Nô":330," Ru":1162," Sa":11420," Hư":907," U ":154," Nă":720," Sh":1334," Si":2727," Sc":3070," Se":3434," So":3300," Sp":1984," Sr":367," Sk":306," Sl":517," Sm":206," Uy":393," Va":2209," X ":200," Ve":2261," Vi":16180," Ré":207," Vo":1912," Vu":317," Tu":2947," Tw":154," Ty":200," Ug":456," Uk":408," Ul":140," Um":165," Un":913," Ur":403," Ut":638," ja":224," l ":250," im":322," in":6639," is":5408," it":773," ka":329," m ":439," kh":38847," ki":18267," gâ":429," ke":373," gá":282," ju":151," cô":5020," có":47410," cú":382," cù":1871," ha":11491," dã":502," dâ":28872," he":621," dà":2524," gi":28288," bă":261," gh":729," gl":236," gr":899," go":250," gu":378," cò":3288," hy":276," dù":2107," că":333," hi":6669," dé":1144," ho":9851," ht":218," hu":16965," dò":646," nh":45517," ni":1927," ng":51853," ne":507," na":15500," cư":775," mu":1888," mo":1972," ké":437," on":1221," kê":1776," oc":375," of":6994," ob":204," dư":2565," nu":602," no":1173," hã":1017," hà":8670," há":923," le":1986," li":4644," la":5104," gó":504," kn":913," km":4476," cũ":2936," ko":246," me":1022," mi":9905," cơ":2426," hù":342,"я ":139," hú":203," ma":3748," lu":1700," hó":2026," hò":4796," ly":382," hô":283," bư":9431," hè":160," lo":35801," hì":2834," ag":160," ab":359," ac":560," ad":220,"Hểu":669," am":614,"Hểp":191," an":7056," ap":254," ai":278,"Hển":822," al":2427," au":545,"Hểi":3718," ar":2712,"Hểc":428," aq":275," at":395," as":1145," d ":2111," ba":20628," 가가":227," bi":39922," be":1810," bo":1086," bl":299," by":542," bu":1037," br":705," ca":18336," Xư":246,"Hể ":3004," Vư":1101," er":165," et":2265," es":168," en":1522," em":442," el":437," bà":2390," fe":289," bá":2320," fa":2087," eu":165," ex":489," fu":610," fr":2001," bí":964," bì":10908," fo":1760," bê":1224," bé":146," fl":771," fi":679," bã":190," ge":1821," cá":22553," câ":2231," cà":270," ga":2783," bú":206," bó":2103," cl":550," cm":154," co":7147," cr":532," ce":1064," ch":72993," ci":299," da":4945," cu":4863," cy":157," do":4301," dr":248," de":8416," di":19653," ec":238," ed":190," ea":304," du":2623," 三三":209," vù":22601," vă":2376," ví":173," vì":1170," võ":378," vô":1027," vò":489," và":46932," vâ":230," sĩ":1785,"ка":178," tù":416," tú":155," tô":909," nư":35851," tă":379," xo":207," tê":7377," nơ":1991," să":488," tò":331," xu":3665," tí":18540," tì":8960," mư":601," ww":167," só":461," sô":2005," tâ":11572," tà":3363," tá":3073," xe":994," xa":907," tư":4765," vũ":680," 三之":172," sư":822,"ов":213," yê":400," sơ":282," rư":159," vĩ":1312," xã":6617," xâ":941," xá":864," ru":574," nô":442," nó":3467," hư":1543," sa":4809," nú":1256," se":1352," sc":583," si":4586," sh":865," nă":29145," sn":2777," sm":592," sl":244," sp":4116," so":2264," qu":28552," mó":260,"ви":141," mô":1890," mù":492," ra":5117," re":1311," nà":32102," nâ":291," ri":1195," hơ":1747," né":300," ro":791," lĩ":607," nê":751," lò":161," pu":610," pr":1132," lí":1307," lú":446," s ":1052," lô":926," px":1128," py":148," mã":975," má":2233," mà":2646," lý":2654," mì":566," mé":12707," ot":153," kì":215," op":485," kí":1225," or":1862," lá":722," pe":891," là":155391," lã":1013," lâ":684," pa":1485," ký":784," lè":332," pl":606," po":832," lê":796," pi":462," ph":51377," wa":721," sâ":1031," sá":4080," we":797," rõ":266," y ":288," lư":3756," wo":408," wi":1339," wh":1197," sê":264," va":1100," ve":759," uy":141," vo":357," vu":12917," rã":285," vi":8649," ty":993," tu":4206," us":140," mũ":152," up":179," un":373," ta":1679," v ":243," sy":262," st":916," su":3809," lũ":299," tr":156052," to":6595," th":230858," ti":21975," te":899," đu":484," đa":1700," đe":474," đi":17419," đo":2466," Đá":164," Đà":1637," Đâ":3555," Đì":322," Đo":196," Đi":1416," Đa":443," đơ":1908," đư":29691," Đư":847," đí":392," đê":8032," đì":460," đá":3730," đâ":1604," đã":4347," Đă":188," đà":1035," Đô":12690," đă":220," đú":182," đô":27698," đó":3827," đĩ":596,"Lể ":569,"Lểi":178,"Lển":215,"Lểp":1280,"Lểc":1143," Bể":9869," Kể":5918," Hể":9143,"Kể ":5821," Gể":165," Cể":8663," Dể":757," Âm":150," Âu":2129," Îl":330," Ôn":1608,"Nể ":198,"Nển":165,"Nểi":1235," Úc":1243," Ý ":1274," áp":355," án":814," âm":3643," ít":431," ô ":353," ôn":1711,"Mể ":2527," ý ":503,"Mểc":807,"Mểt":1153,"Mểu":174,"Mển":285,"Mểi":146," ăn":626," 가":331," ươ":433," ưu":158,"가":830," hể":63957," gể":12621," lể":23214," kể":7585," cể":67038," dể":12218," bể":45724," Vể":1292," Tể":7366,"Rểp":306," Xể":142," Nể":1895," Lể":3615," Mể":5122," Rể":522," Sể":1093," vể":53702," xể":2191," yể":935," mể":131122," nể":21375," rể":4289," sể":54954," tể":83225,"Tểt":172,"Tểp":225,"Tểc":152,"Tểa":1075,"Tển":2741,"Tểi":245,"Tể ":2617,"Sể ":900,"Vểt":162,"Vển":442,"Vểi":163,"Vể ":500,"ưểt":382,"ưểu":183,"ưển":18802,"ưểm":9501,"ưểi":24657,"ưểc":67509," Để":19647," để":91683," ưể":501,"Ávi":245,"ال":152,"Áo ":1417,"三三 ":318,"三万 ":202,"Âu ":2118,"三专 ":318,"Âm ":149,"ươn":12134,"ươi":146,"가가":499,"Bể ":2169,"AO ":231,"AN ":149,"Bểt":140,"Bểc":4036,"Bển":2788,"Bểo":450,"Cể ":946,"Cểu":671,"Cểp":339,"Cểm":232,"Cển":5510,"Cểc":787,"Dể ":256,"Dểc":190,"Bà ":315,"Bá ":150,"Bài":310,"Bàn":152,"Bác":305,"Fel":162,"Fen":155,"Fer":312,"Fis":282,"Ext":290,"Fas":526,"Fal":176,"Far":151,"Fab":275,"Eri":170,"Ess":158,"Est":392,"Eth":271,"Eup":437,"Eur":1040,"El ":188,"Ele":184,"Eng":198,"Epi":248,"Ent":180,"Các":1801,"Ger":929,"Cát":277,"Geo":862,"Gen":359,"Gla":140,"Gha":245,"Gia":2324,"Gil":159,"Gir":424,"oể ":155,"oểt":2492,"Cá ":259,"oểc":3719,"Gan":147,"Gal":295,"Gam":240,"Gau":147,"Gar":1513,"oển":3941,"oểi":4411,"Gab":195,"Fus":252,"Fro":378,"Flo":588,"Fla":166,"Fle":174,"ũi ":200,"Fra":932,"Fri":349,"ũng":2953,"Fre":553,"Bín":197,"Bìn":1748,"Fon":217,"For":497,"Fou":186,"Dân":1220,"nểu":216,"II ":1176,"Hil":320,"Him":177,"Hin":145,"Hip":145,"nể ":1462,"Hel":390,"nểi":2679,"Hei":256,"nểm":15373,"nển":1144,"Hem":163,"Hen":196,"Hes":405,"nểa":304,"Her":528,"Cúp":236,"Hal":332,"Hai":401,"Han":467,"Ham":344,"Has":179,"Har":748,"Haw":533,"Hau":3745,"Côt":1585,"Guy":153,"Cô ":190,"Gua":732,"Có ":275,"Gui":1036,"Côn":1681,"Gre":1012,"Gri":151,"Gra":2331,"Gro":483,"Glo":169,"Giá":926,"Gon":156,"Goo":205,"Gol":188,"Gom":158,"Úc ":1233,"Inn":161,"Int":561,"Ins":197,"Ill":204,"Ind":2106,"mểc":13680,"mểm":11638,"mểi":2779,"mểu":420,"mểt":99606,"mển":1814,"mể ":1110,"Ibe":205,"亞 ":440,"Hyp":280,"Hyd":396,"Dôm":338,"Hy ":979,"Hun":464,"Huy":2731,"Hue":217,"IV ":160,"Hoà":1902,"Hor":294,"Hou":153,"Hom":153,"Hon":555,"Hok":148,"Hol":1270,"Hoa":8475,"Arg":562,"Are":201,"Arc":363,"Ard":1593,"Ara":960,"Arm":257,"Ari":480,"Aqu":1651,"Apo":203,"Ath":149,"Atl":649,"Ast":281,"Ass":392,"Asi":256,"Arr":175,"Art":235,"Ave":196,"Auv":549,"Aus":949,"Aur":157,"Aud":253,"Aug":204,"Aub":473,"lể ":2800,"Azu":1003,"lểp":5146,"lểt":155,"lểi":2389,"lển":5223,"lểa":430,"lểc":6489,"Ba ":889,"lểy":462,"Bai":215,"Bal":574,"Ban":6419,"Bac":146,"Bad":1148,"Bay":966,"Bar":929,"Bat":270,"Bas":1136,"Bau":155,"Abr":165,"Aca":233,"Acr":321,"Ach":200,"Ade":163,"Ai ":341,"Aga":200,"Afr":687,"Afg":186,"Air":488,"Ala":309,"Alb":681,"An ":1388,"Alg":185,"Ali":170,"Ale":299,"Alv":153,"Als":515,"Alt":493,"Alm":187,"All":366,"Alp":2942,"Ame":689,"Amb":210,"Ama":449,"Amp":158,"Anh":5927,"Ang":642,"Ana":331,"And":666,"Ant":595,"Ano":159,"Ann":219,"Bus":168,"Bul":625,"Bur":1284,"Buc":492,"Bru":412,"kể ":5372,"Cab":144,"kểt":1768,"Cae":179,"Cal":3283,"Cam":1757,"Cai":141,"Cas":3059,"Car":1359,"Cau":236,"Cat":533,"Cao":644,"Can":1671,"kểc":374,"Cap":594,"Bea":394,"Ber":1074,"Ben":559,"Bel":748,"Bin":143,"Bil":278,"Bis":150,"Bit":253,"Bir":234,"Blu":260,"CP ":196,"Biê":192,"CN ":900,"Bla":380,"Bre":721,"Bra":1413,"Bro":439,"Bri":863,"Bol":471,"Boi":232,"Bon":251,"Bor":635,"Bos":277,"Bot":225,"Bou":1517,"Cyp":409,"Cur":167,"Cub":268,"Cun":184,"Cup":164,"EE ":154,"Des":235,"Deu":424,"Del":231,"Dem":185,"Den":364,"Dam":166,"Dan":842,"Dar":270,"Dav":172,"Dal":154,"Cho":474,"Chr":351,"Che":539,"Chi":4142,"Cic":417,"Chu":975,"Cit":157,"Châ":2351,"Cle":204,"Cla":633,"iểt":19441,"iểu":22084,"iểy":197,"Cel":188,"iểm":11357,"iểp":3887,"iển":94986,"Cen":1295,"Cer":828,"iểc":3158,"iểi":7620,"iểa":1763,"Cha":5749,"Cri":167,"Cra":378,"Cre":395,"Chư":286,"Cro":546,"Chù":202,"Chú":672,"Chí":1230,"Coc":220,"Coe":184,"Cop":262,"Cos":1089,"Cor":1423,"Com":752,"Col":1893,"Con":3653,"Cou":867,"FA ":242,"Drô":335,"iể ":1731,"Edw":139,"Ông":1552,"Ect":200,"Ecu":469,"Eas":364,"Do ":273,"Diê":175,"Dic":144,"之三三":139,"Dit":152,"Dis":351,"hểo":739,"hểp":5189,"hểm":3873,"Dip":407,"hển":23819,"Dio":154,"hểy":11164,"hểu":6003,"Die":258,"hểt":14285,"hểa":1704,"hểi":18265,"hểc":23691,"Di ":179,"hể ":155404,"Dun":239,"Duy":294,"Du ":216,"Dri":386,"Dre":146,"Dra":192,"Dou":158,"Don":392,"Dom":257,"Dor":723,"Cươ":311,"Nev":160,"Neu":541,"Net":191,"Nep":895,"Neo":280,"Nas":316,"Nat":595,"Nav":156,"Nig":396,"Nie":1241,"Nic":337,"Nin":1068,"Nhi":250,"Nha":5926,"Nga":1678,"Ngh":1164,"Ngu":2389,"Ngo":273,"New":2499,"Myr":349,"xể ":903,"Mya":329,"Nak":165,"Nam":11941,"Nan":196,"Nag":208,"Na ":357,"xểp":906,"xểy":267,"Như":191,"Nym":236,"Nhó":149,"Nhà":676,"Nhâ":3285,"Ngô":642,"Ngà":567,"Ngâ":218,"Ngư":724,"Nhĩ":654,"Nov":205,"Ngũ":256,"Nor":4126,"Not":381,"Nob":171,"Noc":1779,"Oec":143,"Dươ":1846,"PG ":211,"Ois":715,"Ohi":151,"Oah":152,"Occ":238,"Obe":662,"Île":330,"Ott":171,"Ovu":166,"Kôn":235,"Oly":147,"Oli":201,"Giể":2035,"Ont":199,"Or ":550,"Opo":183,"Ora":158,"Ore":369,"Orc":304,"Ori":453,"Orn":566,"Ost":243,"Phú":926,"Phù":169,"Phò":192,"Phó":180,"Phá":21296,"Ple":538,"Phâ":188,"Pla":641,"Hiể":1343,"Lê ":891,"Pin":370,"Pit":142,"Phy":669,"Pie":506,"Pic":810,"Pho":672,"Phi":3036,"Pha":566,"Lãn":143,"vểy":569,"Lâm":707,"vển":2537,"vểt":17681,"Pfa":1704,"vểa":164,"vểc":13644,"vểi":7805,"Per":956,"Pet":409,"Pen":544,"Pel":140,"Lào":383,"vể ":11302,"Pay":1416,"Là ":298,"Pat":164,"Pas":888,"Par":1717,"Pau":274,"Pac":219,"Pan":548,"Pap":721,"Pal":1091,"Pak":192,"Pyr":3193,"Huể":468,"Pte":513,"Pun":221,"Pup":166,"Pue":171,"Puy":394,"Pro":2087,"Pri":425,"Pre":247,"Phư":678,"Pse":388,"Hoể":173,"Pra":808,"Pol":756,"Pom":148,"Pon":298,"Poi":1472,"Pot":227,"Por":309,"uểc":63038,"uểi":2825,"Lăn":196,"uểt":6647,"uển":17332,"Mã ":868,"uể ":2223,"Lý ":659,"Mùa":206,"SA ":174,"Ram":194,"Ran":617,"Quá":225,"Quâ":725,"Môn":650,"Quý":277,"Qua":706,"Qui":159,"Que":829,"Quy":457,"Ita":500,"Isl":594,"Isr":263,"It ":1456,"Ira":481,"Ire":363,"Isè":447,"tểm":507,"tển":38505,"tểi":9573,"tểc":3283,"tểa":923,"tể ":24294,"Jac":218,"Jav":319,"Jan":148,"Jam":357,"tểo":2243,"tểp":1747,"tểt":2043,"Jer":195,"Jea":217,"Biể":504,"Jos":191,"Jor":166,"Joh":334,"Jul":480,"sể ":37621,"sển":14954,"sểm":218,"sểc":1290,"sểa":158,"Kai":155,"Kam":181,"Kal":268,"Kan":358,"Kau":180,"Kat":153,"Kas":184,"Kar":496,"Kaz":203,"sểt":255,"sểp":188,"Ken":722,"Kir":203,"Kit":143,"Kin":907,"Kim":609,"Kho":491,"Khu":1438,"Kha":464,"Khi":230,"Chể":1285,"Khê":158,"Khá":594,"Khô":380,"Kon":253,"Kor":151,"Kre":398,"Gòn":201,"Cuể":281,"Kus":145,"Kur":182,"Hàn":1241,"Leu":183,"Les":367,"Lep":631,"Leo":339,"Len":152,"Hán":6856,"Lei":182,"rểc":1530,"Lea":285,"rển":39207,"rểm":291,"rểi":1137,"Hà ":2463,"Lau":510,"rể ":6793,"Le ":619,"Lak":163,"Lai":390,"Las":248,"Lat":742,"Lar":215,"Lao":140,"Lam":368,"Lan":4696,"Lac":165,"Lab":180,"倉 ":180,"La ":2717,"Liê":1950,"Hér":279,"Diể":335,"Lib":574,"Lie":155,"Lig":183,"Lim":511,"Lin":692,"Lio":171,"Lis":148,"Lit":507,"Liv":154,"Leó":2050,"Hãn":333,"rểt":1287,"Hòa":572,"Lud":184,"Luc":212,"Loà":1669,"Hìn":160,"Loz":163,"Lou":367,"Los":193,"Lot":580,"MS ":483,"Loi":3201,"Lor":2131,"Lon":1378,"Lom":271,"Loa":517,"Ma ":267,"Hóa":437,"Luâ":253,"Lyc":258,"Mei":187,"Men":179,"Mel":544,"Mes":227,"Mer":503,"Meu":1122,"Met":367,"Mec":579,"Meg":242,"Med":321,"Mex":918,"Man":2248,"Mal":1668,"Mar":4888,"Mas":469,"Mag":297,"Mad":710,"Mah":444,"Mai":1077,"Mac":532,"NE ":2847,"May":519,"Mau":531,"Mat":259,"Miê":150,"Mol":400,"Mon":1606,"Mos":1419,"Mor":654,"Mou":470,"Mot":424,"Moz":414,"Mid":2002,"Mic":1239,"Cơ ":351,"Mit":747,"Mir":161,"Mis":522,"Mil":319,"Min":1719,"Mun":171,"Mur":1848,"Mus":259,"Xià":174,"Phể":1906,"Tây":8477,"Tân":1027,"Tào":168,"Tàu":164,"Süd":288,"Sôn":404,"Sóc":214,"Wor":462,"Wol":216,"Séc":256,"Sèv":318,"Whi":209,"èvr":358,"Wik":174,"Wil":393,"Win":429,"Wie":149,"Wit":233,"ère":1422,"Sài":216,"Web":172,"Wei":433,"Lươ":358,"Wes":961,"Sân":446,"Was":264,"War":327,"Wal":818,"ès ":590,"Lưu":290,"Dưể":277,"èn ":152,"èo ":278,"ém ":263,"之丁":166,"QĐ ":168,"之三":478,"之万":178,"ée ":472,"之专":315,"之之":217,"ées":2904,"Vos":497,"Vor":516,"Vol":589,"éc ":283,"Nhể":3766,"Ngể":811,"Viê":165,"évi":139,"Tĩn":188,"ép ":1113,"Zea":402,"Zar":282,"Zam":723,"én ":239,"éo ":609,"éra":396,"ét ":23407,"éri":203,"éné":2942,"Zim":303,"Zel":153,"épa":1146,"Vân":484,"ên ":47944,"êm ":9120,"Yps":167,"Quể":10633,"Yve":176,"並三":153,"Sĩ ":505,"Yor":442,"You":143,"Yon":155,"ênh":268,"êng":589,"专专":219,"专三":225,"êu ":3625,"Yel":146,"三万":220,"三丁":213,"三三":777,"三专":394,"Tô ":277,"Xuâ":498,"三之":347,"Tôn":652,"Túc":179,"丁专":144,"丁丁":142,"Tên":660,"丁三":258,"Tín":160,"丁之":215,"Xuy":470,"ãi ":669,"Syn":143,"Syr":151,"Swi":186,"Swa":238,"Sur":243,"Sum":294,"Sul":327,"Sun":218,"Sud":207,"Str":573,"Stu":169,"Sti":380,"Sto":266,"Sta":800,"Ste":1346,"Ten":237,"Tel":150,"ãnh":975,"ãng":1319,"ão ":484,"Tam":802,"Tan":929,"Tas":235,"Tar":601,"ãn ":565,"Tai":151,"Tak":148,"Ski":144,"ãy ":559,"Khể":614,"Shi":359,"She":144,"Năm":661,"Sho":220,"Sha":353,"Sim":166,"Sil":271,"Sin":736,"Sie":417,"Sib":258,"Sic":157,"Ses":155,"Ser":590,"Sen":317,"Sel":193,"Hươ":208,"Sei":684,"Seg":435,"Sri":344,"TV ":172,"Spa":202,"Spi":140,"Sph":1036,"Spe":218,"Spr":184,"Sou":1403,"Sol":431,"Som":283,"Son":366,"Sor":294,"Kiể":703,"Slo":396,"Nôn":230,"Rus":514,"Nó ":10964,"Sai":3614,"Sam":307,"Sal":1166,"Saa":431,"Sac":786,"Sab":139,"Sco":422,"Sci":154,"Sch":2083,"Sca":289,"Sax":162,"Sav":348,"Sat":196,"Sau":596,"Sar":1013,"Sap":212,"San":1415,"Sao":173,"Hưn":444,"Sa ":225,"TA ":219,"Res":158,"Rhi":678,"Rhe":1850,"Riv":248,"Ris":546,"Rie":160,"Ric":612,"Red":234,"Rei":173,"Reg":216,"Ren":178,"Rep":253,"Rob":306,"Roc":356,"Rou":1296,"Rot":151,"Ros":295,"Rom":626,"SS ":368,"Rhô":1640,"Ven":831,"Vau":301,"Van":289,"Val":1014,"Var":300,"Vic":424,"Vie":766,"Vir":376,"Vil":850,"Vin":383,"Ver":694,"Vex":266,"Ukr":405,"Uni":646,"Miể":206,"Uy ":300,"Utt":447,"Tră":223,"Luể":194,"Trá":373,"Trà":189,"Trâ":155,"Trì":245,"Bưể":300,"Trư":1573,"Uga":441,"Tex":717,"Ter":1194,"Tha":1705,"The":9028,"Thi":2418,"Tho":351,"Thu":1030,"Til":178,"Tim":168,"Tin":432,"Thà":2636,"Thá":3215,"Liể":150,"Thü":723,"Thô":412,"Tiê":963,"Tor":743,"Tok":152,"Tol":222,"Tom":166,"Tou":295,"Thư":1102,"Tru":10246,"Tro":2116,"Tri":2061,"Tre":371,"Tra":816,"Toà":178,"Tuy":629,"Tur":1442,"Tun":141,"Mưể":195,"ày ":41264,"Tươ":169,"àu ":3188,"gểa":154,"gểc":1587,"gểi":5837,"gểp":336,"gểm":3968,"gển":2170,"gểy":212,"gểt":591,"ành":21823,"àng":7133,"gể ":2467,"ào ":9552,"àn ":5519,"àm ":2556,"ài ":40596,"dể ":1928,"bis":208,"bit":429,"biu":210,"bio":195,"biq":379,"bir":196,"bil":353,"bin":1156,"bii":441,"dểa":1072,"dểc":2450,"bo ":249,"dểi":171,"dểm":153,"dển":5451,"dểu":482,"dểy":237,"áy ":2251,"blo":143,"ble":788,"bli":464,"bla":343,"boa":544,"bol":192,"biê":756,"bon":625,"bom":252,"bop":200,"bor":571,"bot":181,"bos":169,"bou":707,"bbe":548,"be ":978,"áo ":4269,"ban":14532,"bal":489,"bai":195,"bac":1588,"bad":244,"bab":362,"án ":11049,"bay":2258,"bat":528,"bas":507,"bar":886,"bao":2943,"bea":250,"áp ":27496,"ánh":4002,"áng":13329,"bi ":208,"bei":269,"bee":755,"bed":546,"bec":391,"ber":5184,"ben":538,"bel":1263,"bek":171,"bes":289,"bet":259,"bfa":299,"áu ":550,"bia":2262,"bic":185,"bid":364,"át ":6081,"ách":6363,"áce":165,"ái ":6094,"ca ":5266,"car":2682,"cas":523,"cat":809,"cau":258,"can":2819,"cao":13635,"cap":271,"cac":552,"cae":343,"cad":152,"cam":352,"cal":1785,"cai":449,"ce ":3602,"ám ":1096,"bri":1032,"bro":469,"bra":914,"bre":597,"bru":440,"bsi":207,"bur":2940,"bul":333,"bun":198,"bum":722,"but":706,"bus":353,"by ":708,"bwe":288,"ác ":24475,"aka":675,"am ":27480,"ake":553,"aki":591,"akh":416,"aji":229,"ajo":231,"al ":6772,"aja":635,"aii":453,"ail":3696,"ain":10996,"air":827,"ais":1435,"ait":246,"ak ":354,"aig":201,"aid":285,"aic":180,"aia":148,"ây ":27201,"ahi":144,"ahu":249,"ahr":141,"aho":292,"aha":852,"agi":293,"agr":420,"agu":637,"agn":2270,"ago":1655,"aq ":141,"anu":911,"anz":946,"any":498,"ano":1266,"ann":1274,"anm":363,"ant":5267,"ans":1570,"ane":1647,"ang":23334,"anh":11504,"ani":4224,"anj":313,"ank":983,"ap ":200,"ana":4781,"anc":3852,"and":17225,"amu":239,"amm":581,"amo":823,"amp":3495,"ams":266,"ami":3273,"ame":2454,"amb":1921,"ama":1996,"ao ":21236,"alz":1868,"aly":679,"alv":524,"alu":519,"alt":1192,"als":583,"alp":280,"alo":1138,"alm":374,"all":4541,"alk":430,"alg":162,"ali":4610,"alc":275,"ald":946,"ale":3407,"alf":143,"ala":5532,"alb":1038,"an ":27435,"aku":180,"ako":211,"aba":1197,"abe":439,"abi":699,"abl":307,"abo":447,"abr":553,"abu":216,"abw":286,"ae ":31054,"aca":592,"aal":305,"aar":275,"ad ":1523,"ac ":1152,"âng":210,"ab ":257,"aft":152,"aff":222,"ai ":10857,"aga":1564,"age":954,"aeo":180,"aen":693,"ael":524,"aes":144,"aer":162,"Vươ":859,"aei":212,"ah ":482,"âte":406,"ado":1640,"adr":334,"adi":656,"âu ":6939,"ade":2368,"aea":378,"aec":197,"ag ":159,"ady":140,"adt":318,"adu":484,"aco":486,"ack":866,"aci":2053,"ach":4974,"ace":5973,"acc":246,"ada":2077,"af ":167,"act":895,"acu":424,"acr":438,"azo":203,"azi":604,"aze":208,"aza":466,"axi":240,"axo":326,"az ":215,"ayo":158,"ays":2306,"aya":1069,"aye":1477,"ân ":71107,"ba ":1967,"âm ":6828,"aqu":368,"at ":2191,"arh":179,"arg":1494,"are":4513,"ard":3817,"arc":1390,"arb":768,"ara":4118,"arp":777,"aro":1967,"arn":2152,"arm":452,"arl":768,"ark":1469,"ari":8943,"aru":469,"arv":290,"arr":2293,"ars":863,"art":4067,"au ":6244,"asa":466,"ary":947,"arz":380,"asi":1358,"ash":1142,"asc":1250,"ase":497,"aso":213,"asp":226,"ask":259,"asm":354,"aon":161,"ar ":3587,"apa":481,"ape":779,"api":791,"aph":710,"apl":205,"apo":627,"app":349,"apt":205,"apu":632,"as ":6065,"ava":1308,"ax ":315,"aux":509,"auv":229,"aut":4199,"avo":316,"avi":854,"ave":987,"awe":151,"ay ":14622,"awa":1004,"awi":403,"ata":3801,"asu":225,"ast":6128,"ass":2666,"atr":716,"ato":1406,"ate":3193,"atc":165,"ati":4840,"ath":1382,"aua":167,"auc":575,"aub":164,"att":609,"ats":390,"atu":1696,"aty":238,"aul":975,"aum":254,"aun":342,"aur":1043,"aus":1014,"aud":522,"aue":262,"aug":153,"aui":145,"bể ":9115,"Wür":527,"bểy":259,"bểt":2371,"bểu":602,"bểi":3447,"bểo":909,"Xã ":2077,"bển":18106,"bểc":10801,"Thể":19271,"Tiể":1514,"Trể":2534,"Vĩn":400,"Xíc":159,"Võ ":184,"cểu":3163,"cểt":394,"cểp":4267,"cểc":593,"cểa":46999,"cển":3734,"cểm":741,"cểi":1406,"Hưể":237,"Vùn":202,"cể ":5697,"Văn":1172,"Sư ":192,"Viể":12501,"Vũ ":518,"Tư ":617,"Tuể":240,"Xô ":542,"Yên":481,"Sơn":1964,"ji ":324,"jar":408,"jan":213,"biể":38109,"jo ":152,"itr":907,"ito":2297,"itu":456,"itt":1509,"its":449,"itz":828,"ity":598,"ism":206,"isl":191,"iso":476,"isp":337,"iss":3448,"ist":3624,"ita":3373,"itc":146,"ite":2019,"ith":2136,"iti":2423,"ivo":162,"ius":1319,"ium":1206,"iva":362,"ix ":837,"ivi":819,"ive":1549,"ipo":143,"ipp":1792,"ipu":171,"ipt":450,"ipi":254,"iph":349,"ipl":287,"ilô":10662,"is ":16454,"ion":4473,"iop":856,"ior":202,"ios":885,"iot":545,"iou":177,"ioi":173,"iol":823,"ipa":1057,"ipe":657,"ir ":1809,"iru":298,"irs":241,"ück":162,"iro":773,"irk":203,"irl":235,"iri":455,"isi":750,"ish":1324,"ise":1480,"isc":1025,"isa":361,"iqu":1196,"ire":3687,"irg":448,"ira":1107,"irc":652,"it ":1527,"ja ":441,"iya":187,"iz ":244,"eón":2056,"가가가":272,"izo":396,"ize":388,"iza":272,"kim":1056,"kil":10715,"kia":494,"kin":3453,"kip":325,"kir":466,"kis":481,"km ":3828,"chể":21772,"ki ":1183,"khi":3505,"út ":627,"khe":165,"kha":834,"khu":15081,"kho":6571,"gày":9377,"gái":216,"kel":215,"ken":731,"kes":174,"ker":740,"ket":226,"key":304,"gân":416,"gây":413,"ke ":634,"úp ":776,"gàn":962,"úng":1614,"kra":492,"kre":255,"kt ":229,"cũn":2434,"ểa":61690,"ku ":366,"km²":641,"kot":155,"kor":174,"kom":159,"kok":195,"ks ":691,"ể ":376292,"úy ":216,"cũ ":497,"kno":896,"kka":183,"khô":4285,"khó":360,"khí":730,"khú":467,"ko ":151,"khá":4090,"kle":735,"kla":228,"buể":254,"ểo":9548,"ểp":25031,"ểk":175,"ểm":59218,"ển":369560,"ểi":112828,"ểc":255331,"ểy":15079,"ểt":179295,"ểu":43805,"kaz":157,"gà ":270,"kat":204,"kar":238,"kas":219,"kan":739,"kal":182,"kam":185,"kai":366,"ka ":1184,"cùn":1840,"cúp":216,"ha ":8300,"ùng":27727,"ham":3795,"han":4769,"hao":550,"hap":378,"hai":4690,"hal":2163,"hau":2057,"hav":347,"har":4673,"has":721,"hat":786,"hae":501,"hag":376,"hab":337,"had":177,"hac":331,"hay":7652,"he ":18327,"dàn":837,"dài":1594,"hel":1664,"hei":3775,"hec":208,"hed":210,"hea":373,"hey":228,"hev":147,"het":420,"hes":679,"her":2719,"heo":10006,"hen":1600,"hem":538,"hi ":13142,"dây":222,"dân":28557,"dãy":398,"căn":327,"hig":332,"hie":278,"hid":1412,"hic":705,"hia":1555,"hip":346,"hio":999,"hin":3454,"him":2986,"ùy ":240,"hil":2244,"hik":232,"hii":300,"hiu":173,"his":1013,"hit":572,"hir":444,"hn ":286,"hla":242,"hle":1267,"hli":520,"hlo":240,"ho ":7256,"hma":350,"gma":174,"go ":1433,"giá":5603,"gme":307,"già":408,"glo":256,"gle":536,"gli":276,"gla":529,"gko":162,"gog":725,"gny":420,"ghĩ":2919,"gno":258,"gni":256,"gne":3863,"giú":291,"gna":708,"úa ":604,"giô":172,"gs ":297,"goz":262,"úc ":3114,"gom":139,"gol":384,"gon":1688,"gos":596,"gor":601,"gov":334,"gu ":148,"goà":839,"gro":663,"gra":1031,"gri":527,"gre":280,"gto":303,"gui":371,"gum":182,"gul":499,"có ":47334,"gua":512,"gue":1969,"gy ":222,"cô ":368,"guy":4577,"gur":185,"gus":453,"gun":159,"còn":3275,"úi ":1356,"côn":4644,"gyr":164,"iai":626,"iam":328,"ial":1014,"iao":1227,"ian":5609,"ias":397,"iar":265,"iat":696,"ic ":3037,"iac":1407,"iae":280,"ibl":173,"ibi":314,"ibo":164,"ibu":345,"id ":1469,"iba":306,"ibb":546,"ibe":1206,"ia ":29704,"iet":540,"ieu":473,"iel":1226,"ien":2075,"ier":2747,"ies":4767,"ied":1831,"ieg":187,"ig ":1226,"ifo":1413,"iff":148,"ife":614,"ifl":166,"ifi":693,"icr":1054,"ics":295,"ict":897,"icu":1741,"ico":2316,"ick":622,"ici":3591,"ich":3932,"ice":765,"ie ":3741,"ica":6048,"idu":239,"ids":175,"ido":567,"idi":2598,"ide":2455,"ida":26239,"iid":4750,"il ":3121,"ija":174,"iji":157,"im ":5476,"ika":371,"ige":770,"iga":742,"ii ":1370,"igm":416,"igh":923,"igi":710,"igu":318,"igs":187,"igr":220,"igo":300,"ign":1169,"iha":223,"ik ":169,"imo":742,"imm":158,"imp":567,"ime":1906,"imi":643,"ip ":810,"inc":1322,"ind":1704,"ina":4704,"inb":264,"imu":305,"inn":513,"ino":1469,"int":4526,"ins":1803,"inf":237,"ine":10837,"inh":13778,"ing":8141,"ini":3074,"inl":2046,"ink":351,"ioc":292,"iod":167,"inu":932,"inv":156,"inx":181,"iny":208,"iko":368,"iki":420,"ike":257,"ila":1297,"ilb":181,"in ":13262,"ilo":550,"ill":9682,"ilh":181,"ili":3385,"ild":355,"ile":3779,"ima":1310,"imb":551,"io ":1175,"ily":2239,"ils":2324,"ilu":323,"how":204,"hol":800,"hom":456,"hon":2401,"hoi":164,"hos":411,"hot":229,"hou":479,"hoo":159,"hop":241,"hor":2606,"hoa":5214,"hof":203,"hoe":206,"hod":441,"hoc":207,"hni":161,"hne":266,"dée":277,"hme":233,"hmi":203,"hiê":5040,"dép":1137,"hua":252,"htt":326,"htr":324,"hth":197,"hte":186,"hst":227,"hse":1600,"hoá":860,"hoà":1969,"hu ":18061,"hry":423,"hro":1154,"hre":321,"hri":577,"ùa ":1067,"ht ":687,"hra":468,"hya":190,"huê":158,"hyl":1269,"dòn":604,"hy ":171,"hwa":665,"hwe":313,"hum":443,"hun":1586,"hus":902,"hut":182,"hur":289,"huy":20578,"Vưể":237,"dùn":1738,"dù ":368,"hyt":176,"hys":223,"hyr":202,"huô":174,"ùi ":234,"ffe":313,"ffi":237,"fer":812,"báo":721,"bác":152,"fen":465,"bán":1218,"fel":1035,"fgh":188,"bà ":200,"fas":191,"far":156,"fam":2105,"fal":2041,"bày":158,"bàn":328,"bào":422,"bài":1285,"ff ":218,"eya":142,"ext":206,"eyr":325,"eyh":211,"eye":179,"exa":986,"ez ":316,"exi":1325,"exc":181,"ezu":350,"ezi":175,"eta":842,"ete":743,"eti":1014,"eth":666,"etn":165,"etl":616,"esp":684,"esn":283,"eso":340,"est":3808,"ess":1434,"esw":957,"ev ":202,"euc":337,"eud":540,"eui":263,"eum":201,"eto":465,"etr":999,"ets":459,"ett":1071,"etu":203,"etw":156,"etz":312,"ew ":2560,"eve":622,"eva":566,"evi":1144,"euv":222,"eut":564,"eur":1318,"eus":1354,"ex ":606,"euz":190,"eux":671,"ey ":1664,"ewa":232,"erö":586,"epe":234,"epi":495,"eph":665,"er ":9592,"epa":418,"eot":213,"eor":527,"eom":471,"eol":262,"eop":530,"eon":709,"es ":26910,"ept":1404,"epu":235,"epo":154,"erk":202,"erl":752,"eri":6361,"erg":3169,"erh":199,"ere":2445,"erf":557,"erc":1171,"erd":573,"era":3401,"erb":858,"et ":4591,"equ":162,"esl":202,"esh":1269,"esi":2021,"esc":975,"ese":753,"eu ":303,"esa":259,"erz":260,"ery":556,"erv":528,"eru":1012,"erw":278,"err":2985,"ert":1566,"ers":4623,"ern":4630,"erm":2107,"erp":282,"ero":2376,"eki":196,"en ":12018,"elb":206,"ela":2654,"eld":1258,"elf":146,"ele":1267,"eli":1798,"elg":160,"elm":262,"elk":404,"ell":9746,"elo":933,"elu":208,"els":642,"elt":280,"ely":341,"eo ":10593,"eiß":164,"emb":1047,"ema":1158,"eme":2947,"emm":256,"emo":539,"emi":1585,"emp":238,"ems":155,"emy":139,"enf":291,"ene":1945,"enh":484,"eng":730,"enb":2082,"ena":1558,"end":2473,"enc":2127,"eno":956,"enn":3389,"enk":343,"enl":192,"eni":2073,"enu":1664,"ens":4503,"ent":10489,"enr":274,"enz":420,"eny":515,"eoc":157,"ego":1228,"ege":478,"egg":139,"egi":670,"egr":229,"egu":327,"ek ":217,"eic":752,"eis":833,"eir":787,"eim":1035,"eil":952,"ein":4903,"eie":545,"eid":730,"eig":203,"eif":191,"el ":3452,"eiz":186,"eit":258,"em ":1658,"öst":587,"giu":194,"gis":185,"gil":310,"gin":1363,"gio":553,"gid":1547,"gic":199,"gia":8820,"ght":794,"băn":187,"gho":161,"ghi":3850,"ghe":189,"gha":488,"ggi":146,"gge":165,"câu":470,"cây":1629,"gi ":387,"gen":5062,"cán":798,"cáo":198,"ger":1519,"ges":1085,"gh ":607,"các":18946,"geb":333,"cái":701,"gem":243,"gel":544,"cá ":1776,"ge ":1808,"gae":147,"gai":1612,"gas":1474,"gar":1812,"gau":202,"gat":452,"gay":317,"gam":376,"gal":707,"gan":1692,"gap":295,"ga ":2819,"bút":167,"Tưể":239,"fur":264,"fus":239,"bón":2056,"ful":164,"fun":375,"ft ":439,"fra":309,"fre":507,"fri":815,"bín":794,"fro":1604,"fou":879,"for":2385,"fon":152,"fol":504,"bìn":10852,"bên":1143,"bí ":160,"fle":146,"fla":250,"fli":289,"flo":441,"fly":418,"fic":547,"fie":487,"fil":195,"fin":314,"fis":489,"da ":3697,"de ":8611,"dac":762,"dad":140,"dal":972,"dai":189,"dag":446,"dae":24708,"dat":480,"dar":398,"dap":183,"dan":4922,"dam":377,"cun":718,"cul":2298,"cum":328,"cua":546,"cty":284,"ctu":2164,"ctr":238,"cto":873,"cti":1458,"cte":647,"cta":670,"cy ":251,"cus":673,"cur":399,"cut":255,"cyc":140,"cks":263,"cki":166,"ckl":786,"cla":486,"chá":201,"cle":346,"châ":14860,"cky":262,"chí":8076,"chò":225,"clu":420,"chó":180,"cli":182,"ché":314,"chì":193,"clo":464,"chù":312,"co ":1548,"chú":1406,"coi":717,"cod":256,"coa":287,"cob":146,"coc":214,"con":3655,"col":1035,"com":2135,"cor":1035,"cos":565,"cop":801,"cot":443,"cou":1134,"coz":183,"cs ":366,"ct ":454,"cre":241,"cra":438,"chơ":592,"cri":479,"cro":1725,"chư":1623,"ccu":244,"cci":838,"cco":265,"cca":156,"cea":4824,"ch ":37737,"cer":1071,"ces":531,"cet":142,"cen":1512,"cep":386,"Xươ":220,"cel":1002,"ced":395,"cha":3033,"chw":543,"chu":6009,"chy":360,"cia":1452,"ck ":1564,"cie":3416,"cid":2524,"che":4294,"chl":1776,"chi":13490,"cho":7644,"chm":217,"chn":259,"chs":1959,"cht":448,"chr":711,"cil":2248,"cif":277,"cis":311,"cit":448,"cin":1085,"cio":750,"cip":1009,"cm ":153,"cke":753,"ed ":4935,"eba":285,"ebe":884,"ôn ":3722,"ebi":183,"ebo":301,"ebr":934,"ebs":216,"ebu":294,"ec ":362,"eac":334,"ôm ":271,"eag":170,"eaf":201,"eae":4646,"ead":460,"ean":1098,"eal":621,"ear":1037,"eas":698,"eat":882,"eau":1120,"eb ":303,"ea ":3947,"efo":223,"efe":249,"ei ":970,"ega":773,"een":1179,"eel":145,"eed":257,"ees":218,"eer":175,"eep":145,"eet":728,"edi":1118,"ede":1955,"ône":1757,"ông":42975,"eda":404,"eg ":341,"edt":284,"edo":1988,"edr":212,"eck":1046,"ech":639,"eci":3537,"ece":167,"eca":304,"ee ":1150,"ôme":730,"ecu":225,"ect":1210,"eco":767,"dwi":249,"dwe":183,"dwa":258,"dy ":278,"dur":468,"dus":275,"duy":1143,"ôi ":2569,"dor":2168,"dop":306,"don":2791,"dom":306,"dol":520,"dow":395,"dov":304,"dou":172,"dos":360,"ds ":1070,"doa":646,"doc":1372,"dog":488,"dun":458,"dul":398,"duc":230,"dri":731,"dra":523,"dt ":665,"dre":808,"du ":1084,"dro":986,"dha":326,"dge":229,"dic":469,"did":312,"dia":1872,"ôte":1594,"der":3477,"des":3199,"ômé":10659,"dea":701,"ded":323,"dec":231,"del":1465,"den":4157,"dem":911,"deo":220,"di ":3427,"dle":656,"dla":192,"do ":4210,"diu":260,"din":1244,"dio":353,"dis":2108,"dit":379,"die":2847,"dil":249,"rgy":150,"rgu":287,"rhe":221,"rha":208,"rho":196,"rga":532,"ri ":2301,"rgi":1202,"rgh":152,"rge":1773,"rgo":1290,"rgn":567,"ret":1055,"res":3724,"rev":398,"reu":912,"rex":297,"rey":288,"rfa":190,"rfl":255,"nân":179,"rdu":164,"rds":270,"rdr":159,"này":31504,"rg ":4949,"reb":939,"rea":2006,"ree":1006,"ref":247,"rec":523,"red":781,"rei":1635,"reg":1203,"rem":776,"ren":4076,"rel":1428,"rer":300,"reo":199,"rep":265,"rf ":1207,"rda":570,"rcu":715,"rct":554,"rdo":799,"nào":548,"rdi":1755,"rde":2543,"re ":9446,"rbu":335,"rco":417,"rci":274,"rch":1874,"rce":427,"rca":280,"ray":496,"raz":574,"rd ":2860,"rao":655,"rap":552,"raq":160,"rar":494,"ras":1804,"rat":1838,"rau":875,"rav":215,"rbi":1026,"rbo":389,"rba":580,"rbe":418,"rai":3535,"rah":190,"rag":1656,"ran":8266,"ram":1112,"ral":2485,"rak":217,"rab":584,"raf":217,"rae":749,"rad":1615,"rac":3183,"rpu":270,"rpo":754,"rs ":2330,"rpe":165,"rpa":404,"rpi":173,"rph":340,"ror":201,"ros":1675,"rot":1221,"rom":2423,"ron":57984,"lĩn":607,"roo":669,"rop":3656,"roy":149,"rou":1200,"rov":1781,"row":593,"rob":744,"roa":358,"rod":644,"roc":1935,"roi":481,"rol":951,"rof":216,"rog":648,"nên":713,"rno":358,"rns":159,"rna":1180,"rne":2695,"rni":1419,"riè":172,"ném":189,"riê":408,"rmo":497,"rms":184,"ro ":929,"rma":3617,"née":2905,"rme":709,"rmi":637,"rly":155,"rlo":157,"rli":457,"rld":399,"rle":351,"rla":627,"rn ":3576,"hơn":1695,"hơi":801,"rki":204,"rke":393,"rka":202,"né ":169,"rm ":302,"riz":418,"rix":394,"rl ":161,"rip":365,"rio":1072,"rit":3591,"ris":2983,"riv":318,"riu":456,"rig":1218,"rii":3008,"ril":1020,"rin":4090,"rim":450,"ria":5391,"rib":1175,"ric":5324,"rid":4461,"rie":2155,"rif":320,"rk ":1670,"hơ ":851,"rwe":155,"nói":685,"rz ":208,"hư ":5084,"nôn":377,"rya":216,"ryc":442,"rug":231,"rue":707,"ruc":377,"rup":171,"run":25178,"rum":705,"rul":242,"ruy":2675,"ruz":213,"rus":1414,"rva":295,"rvi":473,"rve":224,"rwa":347,"ry ":2417,"rsi":891,"rso":252,"roß":201,"rsc":361,"rsd":187,"rsa":1123,"rsb":257,"rsh":319,"rse":899,"rta":737,"óc ":726,"rst":546,"rto":415,"rtb":145,"rte":1749,"rth":2805,"rti":1215,"nó ":2643,"rub":265,"rts":371,"rtr":430,"rtu":180,"rtt":513,"rt ":1957,"óa ":2701,"rqu":185,"rro":1564,"rrh":197,"rri":1788,"rre":2216,"rra":3074,"ru ":477,"rry":300,"rru":147,"sab":140,"sac":1864,"sai":281,"sak":172,"sal":439,"sam":384,"sba":297,"sbe":425,"sao":776,"óng":4090,"san":1255,"sau":2887,"sat":228,"sas":328,"sar":660,"oà ":337,"óp ":288,"sa ":2015,"núi":1191,"óm ":1521,"ón ":2564,"rze":166,"hưa":582,"rys":387,"ryo":172,"ryp":167,"ryl":162,"ói ":993,"ryn":172,"hưn":1523,"sha":619,"năm":27971,"năn":1182,"sho":293,"shr":233,"sht":381,"she":876,"shi":1264,"si ":634,"oãn":159,"sge":580,"sie":368,"sid":739,"sic":687,"sia":3700,"sk ":591,"shw":300,"shu":294,"sit":709,"sis":3021,"sip":265,"sin":5091,"sio":465,"sil":1895,"sim":553,"sii":258,"sif":151,"sig":220,"scr":307,"scu":281,"òng":2223,"oài":34230,"oàn":5706,"sdo":465,"sbu":460,"se ":4575,"oá ":380,"sca":1351,"sce":236,"sci":1194,"sch":1792,"sco":719,"sey":198,"ser":1028,"ses":272,"set":418,"oát":273,"seu":551,"sh ":1939,"sfe":321,"sea":715,"sei":161,"see":284,"sed":245,"sec":229,"sep":202,"sen":3196,"oán":1638,"sem":1371,"sel":2101,"hươ":4318,"spo":375,"shū":247,"spr":259,"sph":157,"spe":3907,"spi":1229,"spa":351,"sot":292,"sou":945,"sol":467,"som":376,"son":1789,"sop":308,"sor":191,"sof":194,"soi":556,"soc":217,"su ":286,"sra":294,"st ":2960,"squ":256,"ss ":872,"sli":218,"sky":167,"kiể":2684,"sla":1569,"sle":323,"ski":288,"sks":212,"khể":3237,"ska":369,"sna":2980,"sni":207,"sne":250,"smo":583,"siê":264,"so ":947,"sma":1028,"smi":179,"swi":1048,"syn":165,"syl":224,"sse":3231,"soá":206,"ssa":1676,"sso":1413,"ssi":3339,"ssu":544,"ste":5951,"stf":168,"sth":161,"sta":3335,"sto":2068,"stp":174,"sti":4685,"stl":165,"stu":445,"str":4349,"sts":262,"sty":144,"sub":863,"sul":490,"sum":304,"sup":191,"sun":266,"sus":998,"sur":1961,"suy":191,"òa ":5602,"sy ":172,"swa":243,"tai":3180,"tak":236,"tal":2442,"tae":263,"tag":338,"tah":173,"tab":252,"tac":777,"tad":502,"tay":436,"tax":151,"tau":185,"tat":1298,"tas":231,"tar":1441,"tap":149,"tan":2995,"tam":442,"tch":446,"òm ":317,"te ":8300,"tbu":170,"òn ":4112,"ta ":6742,"ký ":793,"ozè":153,"pa ":420,"làm":1846,"làn":610,"lá ":624,"pe ":1111,"par":2272,"pat":396,"pas":155,"là ":152991,"pac":448,"pag":1801,"pal":1430,"pan":1032,"phe":584,"pha":2087,"phu":325,"phr":290,"pho":3352,"phn":208,"phi":5842,"pi ":273,"lãn":895,"ph ":150,"lâu":392,"lâm":192,"pea":417,"pec":3297,"ped":468,"pen":678,"per":2665,"pet":646,"pes":3121,"pel":638,"pla":748,"hiể":23255,"pli":245,"phâ":2752,"phá":7538,"ple":773,"lès":368,"phí":12064,"phê":328,"plo":417,"phé":481,"phò":631,"phó":453,"phy":787,"pia":524,"pid":686,"pic":879,"pie":144,"pil":547,"pin":2627,"pio":231,"pir":455,"pis":429,"pit":358,"por":1000,"pop":359,"pot":426,"pos":699,"pom":618,"pon":447,"pol":658,"poc":162,"pod":1012,"ps ":851,"hū ":248,"ppi":1358,"ppo":224,"ppe":777,"phú":224,"po ":176,"lí ":1217,"lên":761,"pta":227,"pse":181,"psi":853,"pso":273,"ptu":208,"pua":385,"pub":253,"puc":309,"pte":1527,"pti":1324,"pto":542,"ptr":151,"pra":304,"hoể":8808,"pt ":251,"phư":1981,"pri":732,"pre":749,"pro":835,"huể":53256,"lôn":185,"lôm":10657,"pur":817,"pus":345,"pun":212,"pul":573,"lô ":685,"px ":1130,"pyr":166,"lý ":2650,"lúc":371,"mà ":1433,"màn":167,"máy":1955,"mã ":867,"màu":877,"mét":23053,"méo":221,"mìn":517,"qua":5837,"mô ":773,"quy":3085,"que":2033,"qui":2180,"món":229,"môn":561,"môi":520,"quâ":3680,"quá":966,"quê":311,"quý":155,"mùa":374,"ra ":16536,"ngo":2830,"ngi":1741,"ngl":604,"ngk":252,"ngu":5422,"ngr":255,"ngt":407,"ngs":604,"ni ":1381,"nge":3237,"ngh":7783,"nga":2179,"nho":251,"ndé":306,"nhu":514,"nha":2433,"nhi":5815,"nhe":263,"neg":239,"nei":512,"nel":1599,"nen":680,"nem":262,"neo":282,"ner":1201,"net":904,"nes":4373,"nev":164,"neu":463,"ng ":357107,"nea":1462,"neb":278,"nec":300,"ned":346,"nee":182,"nfo":163,"ney":270,"nez":428,"nh ":132657,"nfe":333,"nct":449,"nco":534,"nci":1276,"ncl":257,"nce":3207,"nch":2383,"nca":570,"ne ":19277,"nbu":1513,"ndu":416,"ndr":1581,"nds":990,"ndo":2398,"ndl":619,"ndh":176,"ndi":4609,"nde":3318,"nda":1999,"ncy":170,"nal":1151,"nam":13111,"nan":913,"nar":774,"nac":1244,"nad":927,"nae":1024,"nag":697,"nai":3249,"nbo":145,"nbe":462,"nd ":12230,"nba":380,"nav":164,"nau":516,"nat":2078,"nas":580,"nay":2459,"na ":6835,"iúp":291,"가 ":328,"cư ":415,"iôn":176,"myc":163,"nya":523,"nyi":193,"nz ":267,"ny ":1333,"nvi":479,"nx ":222,"nul":298,"num":443,"nus":3075,"nut":506,"nty":550,"nto":2015,"ntu":367,"nts":334,"ntr":2166,"nti":3110,"nth":1365,"ntl":140,"nta":3026,"nte":4904,"nsu":730,"nsy":214,"nso":325,"nst":981,"nsf":180,"nse":669,"nsh":461,"nsi":2204,"nsl":567,"nsk":139,"nsc":152,"nsa":421,"nsb":373,"như":5336,"nt ":8221,"ngư":22262,"nqu":229,"ns ":3001,"noc":411,"nod":303,"hĩa":2885,"nob":365,"nol":370,"noi":432,"nop":786,"nom":551,"non":799,"not":862,"nos":776,"nor":939,"now":1063,"nov":236,"nou":281,"nne":5126,"nna":750,"nno":345,"nni":716,"nns":289,"nma":453,"niê":790,"nhâ":2593,"nn ":413,"nla":2159,"nhá":489,"nhà":6343,"nhó":1299,"nly":637,"no ":1110,"hĩ ":710,"nhì":354,"ngâ":223,"nke":271,"nki":377,"ngà":10009,"nka":598,"ngô":1896,"nkt":202,"nja":290,"nii":438,"nig":289,"nif":242,"nie":473,"nid":3745,"nic":2037,"nia":5524,"nk ":242,"nix":164,"niu":181,"niv":273,"nis":1435,"nit":1158,"nio":409,"nim":283,"nin":1178,"nik":149,"nil":357,"ogr":267,"ogu":214,"ogi":228,"ogl":349,"ogo":319,"ogn":1408,"oga":317,"oge":346,"oi ":1139,"ohn":303,"oha":229,"ohe":168,"ogy":195,"ois":1103,"oir":3370,"oit":1455,"oin":300,"oil":149,"oid":1389,"oie":174,"ok ":359,"ol ":657,"oce":820,"och":1882,"oci":330,"ock":741,"ocl":146,"oco":568,"ocr":154,"obu":172,"oe ":211,"oca":1110,"occ":493,"ode":1045,"odi":450,"odo":1089,"odr":210,"oct":1918,"ocy":317,"of ":6954,"oda":691,"dươ":848,"oen":367,"odu":285,"oed":204,"og ":551,"oft":282,"off":252,"ofe":161,"oa ":13629,"oc ":1250,"oan":1921,"oad":230,"oba":818,"od ":1168,"oar":504,"oas":324,"oat":240,"obo":175,"obr":145,"obl":325,"obi":968,"obe":655,"nym":211,"nza":905,"nze":176,"oya":255,"oxy":147,"oxi":202,"oz ":147,"guể":1293,"ows":383,"owl":142,"own":1769,"owi":183,"ozo":255,"oza":724,"otu":288,"oud":174,"oub":187,"ouc":419,"oua":154,"ow ":1038,"oti":1060,"oth":1496,"ote":697,"ott":720,"ots":282,"otr":764,"oto":966,"ost":2982,"osu":324,"ota":1031,"ov ":258,"osi":491,"osh":227,"ose":2073,"osg":493,"osp":425,"oss":826,"osm":547,"oso":413,"osn":140,"oy ":273,"owe":547,"ovi":954,"ovo":159,"ouv":263,"oux":242,"ova":705,"ove":2204,"oug":763,"oui":391,"oul":679,"oun":2537,"oup":387,"ous":2368,"our":3149,"out":2276,"opo":1519,"opp":188,"opi":1120,"opl":220,"ope":1422,"oph":3191,"opa":411,"os ":2470,"opu":314,"opt":1403,"ops":1255,"oon":440,"ool":189,"oom":247,"ook":283,"oog":197,"ood":513,"or ":3376,"oot":347,"oor":232,"ork":631,"orl":473,"orm":2975,"orn":1953,"oro":870,"orp":1047,"orr":2602,"orc":598,"ord":3003,"ore":2045,"orf":1239,"org":708,"ori":3584,"ou ":1707,"osa":1094,"osc":366,"ort":2762,"ors":1266,"oru":667,"ory":1225,"kín":427,"kíc":629,"ot ":1197,"goể":939,"m² ":639,"orb":599,"ora":2404,"oqu":150,"ola":2194,"old":517,"kê ":1566,"giể":12764,"on ":15999,"oli":2444,"oll":1993,"olf":295,"ole":1581,"ols":1043,"olt":149,"olm":148,"olo":2220,"oly":684,"olz":312,"olu":1314,"olv":159,"oka":251,"ghể":2336,"om ":2129,"okk":143,"oki":163,"oke":162,"oku":288,"ona":2358,"ond":2416,"onc":461,"onf":214,"one":2805,"ong":59905,"oni":4577,"onl":675,"onn":2036,"kên":176,"ono":1589,"ons":1629,"ont":3656,"onu":697,"onv":312,"ony":549,"gũ ":341,"kí ":158,"oma":3160,"oo ":152,"ome":1667,"omb":1058,"omi":1212,"omm":2408,"omp":760,"omo":798,"kéo":312,"omu":461,"omy":255,"op ":502,"kì ":200,"la ":10761,"ính":11878,"ín ":1003,"há ":535,"le ":10516,"lca":204,"ít ":628,"lch":163,"lf ":224,"lde":761,"ldb":159,"lda":161,"hào":150,"ldo":198,"hàn":21780,"hàm":435,"hài":213,"ldi":250,"lab":417,"lac":1406,"lad":738,"lae":401,"lah":182,"lag":424,"laj":292,"lai":1546,"lal":167,"lan":10384,"lam":1066,"lap":209,"lao":259,"lar":4422,"lat":2293,"las":1229,"law":585,"lau":512,"lav":645,"lay":1413,"lba":423,"hà ":7105,"ld ":1873,"lbe":635,"lbi":189,"lbo":527,"lbu":797,"gô ":876,"góc":144,"ky ":446,"ích":17834,"cuể":3904,"gôi":662,"góp":269,"gôn":1009,"hìn":3185,"llé":141,"hìm":147,"lpe":2886,"lpi":216,"lph":225,"ls ":3004,"híc":534,"hía":11942,"lok":158,"lon":3179,"lom":1001,"lop":1213,"lor":1602,"lod":250,"loc":750,"loe":167,"log":693,"loi":242,"los":940,"lot":496,"lou":372,"lov":598,"low":793,"hêm":276,"lob":302,"hí ":2215,"liê":1520,"hép":885,"lmo":317,"lme":324,"lma":282,"hì ":1109,"lti":486,"lto":187,"hó ":498,"lud":253,"luc":198,"lue":351,"lso":351,"lst":1066,"lta":363,"lte":462,"lu ":183,"lse":196,"loà":31500,"lsa":619,"ía ":12221,"hín":8169,"lt ":1144,"lhe":150,"lha":193,"hãn":1016,"lge":195,"li ":845,"lga":417,"hât":447,"hâu":5143,"hân":32338,"hâm":245,"lfe":172,"ley":541,"lex":375,"leu":725,"lev":279,"les":4804,"hát":4309,"let":890,"ler":1633,"leo":683,"háo":303,"lep":286,"háp":24791,"hám":208,"lem":456,"len":2814,"hán":13040,"lel":163,"lei":443,"hái":3807,"leg":371,"lef":140,"led":786,"hác":3511,"lec":514,"leb":324,"lea":958,"lls":526,"llu":1871,"lly":728,"lo ":703,"lla":8252,"llb":208,"lle":6069,"lli":4980,"llo":3130,"lks":330,"hê ":529,"diể":17680,"lka":390,"lm ":305,"ll ":2030,"hè ":211,"lit":1006,"lis":1811,"lip":1960,"lio":1480,"lin":3515,"lim":803,"liz":196,"liv":616,"liu":661,"lic":1528,"lid":3931,"lia":3554,"lk ":142,"lik":204,"lii":571,"lig":457,"lie":1623,"lif":1017,"ma ":3578,"húa":466,"húc":1589,"hún":1289,"hút":209,"húy":143,"mb ":269,"ìm ":8521,"mac":644,"mai":885,"maj":203,"mad":498,"mae":190,"ìn ":392,"mag":301,"hür":727,"mar":2810,"mas":488,"mal":1566,"man":6200,"maz":173,"mat":1698,"mba":981,"mbl":196,"mbi":1974,"mbe":1329,"mbr":351,"mbo":504,"me ":3769,"iá ":976,"iße":139,"mbu":270,"ình":20204,"iàn":475,"med":457,"meg":343,"mea":185,"iác":256,"mec":144,"met":1158,"iáp":2001,"mes":1080,"mer":2891,"iám":415,"mem":171,"mel":1053,"iáo":2698,"ián":260,"men":4045,"mei":235,"luy":242,"hòa":4573,"lva":958,"hô ":347,"lve":227,"lvi":160,"lul":235,"lun":153,"lum":1803,"lut":702,"lus":2401,"ly ":5413,"hóa":2227,"hòm":296,"hòn":1026,"lz ":1932,"hôi":152,"hón":422,"hóm":1446,"hôn":8959,"luô":144,"lyp":567,"lym":264,"lyn":185,"hù ":377,"hú ":960,"hùn":400,"hùa":429,"mpi":548,"mph":909,"mpe":699,"mpr":166,"mpo":339,"mpl":212,"mpu":526,"mps":257,"ms ":540,"moc":217,"mod":141,"mon":2835,"mop":418,"mol":919,"mor":1191,"mos":484,"mot":349,"mou":592,"mpa":2137,"mu ":149,"mua":252,"mst":192,"my ":346,"mur":466,"mus":478,"mul":387,"mun":1488,"hăm":190,"hăn":308,"mi ":443,"min":2275,"mil":2845,"mir":352,"mis":870,"mit":1161,"cơ ":2355,"mic":1181,"mib":195,"mia":637,"mie":192,"mid":343,"mo ":242,"ièr":623,"mm ":173,"iêu":3120,"mni":139,"iêm":793,"iên":15316,"mno":232,"mmu":465,"mmi":508,"miê":868,"mmo":992,"mma":550,"mme":1127,"xâm":158,"xây":774,"xã ":6613,"xác":753,"thể":105642,"tiể":16334,"vĩ ":1285,"Để ":4107,"suể":581,"Đểt":393,"Đểu":259,"Đểa":425,"Đểc":8666,"Đểi":2655,"Đển":2345,"Đểo":612,"văn":2376,"soể":246,"vô ":966,"vòn":456,"zue":365,"zur":1019,"ruể":225,"võ ":369,"hưể":6739,"vùn":22597,"vây":180,"vào":6478,"vàn":747,"zen":390,"zel":179,"zer":396,"ze ":541,"vài":326,"và ":39376,"zam":416,"zan":947,"zak":194,"zar":156,"ví ":164,"zon":909,"zo ":189,"vì ":1170,"zna":154,"riể":3937,"zia":217,"zie":194,"zin":147,"zil":497,"yré":2878,"yx ":207,"yth":348,"yst":455,"yso":284,"ysi":882,"yri":846,"yro":276,"yra":354,"yrg":171,"yre":315,"ys ":1775,"yph":446,"ypt":526,"ypr":406,"ypo":300,"ype":314,"yon":322,"uý ":440,"za ":604,"gưể":22834,"uôn":11800,"uôi":272,"quể":14720,"ye ":286,"uá ":683,"yca":261,"yce":225,"ych":303,"ycl":287,"yco":153,"yct":483,"ydr":465,"yer":937,"uán":332,"yen":509,"ya ":1624,"yat":304,"yan":957,"yal":151,"uê ":487,"yla":353,"yle":207,"yli":383,"yll":1078,"ylo":482,"ylv":243,"ylu":187,"yma":185,"sĩ ":1783,"yo ":184,"yme":163,"ymp":602,"ymn":164,"yna":220,"yne":184,"yno":257,"uân":5407,"yi ":308,"tăn":371,"yho":211,"yin":196,"tín":2825,"để ":24120,"tíc":15647,"đểy":569,"xtr":322,"đểu":7023,"đểt":4542,"đểa":6133,"đểi":9089,"đểc":6272,"đển":28910,"đểo":4093,"đểp":378,"đểm":552,"tên":7356,"tìn":624,"tìm":8334,"xon":351,"xoa":253,"tù ":245,"tô ":345,"tòa":324,"xuy":162,"xun":238,"tôn":573,"săn":488,"xi ":160,"tây":9977,"tâm":1507,"tán":178,"xem":772,"tác":2521,"tái":142,"tàn":309,"tàu":1978,"nơi":1970,"phể":19501,"xil":335,"xin":153,"xic":980,"xa ":336,"tài":1059,"xce":150,"xe ":304,"xas":712,"xan":855,"ww ":171,"www":171,"són":349,"sôn":1977,"wo ":144,"yểu":888,"yểt":2656,"yển":27686,"sên":250,"wn ":1674,"sèr":449,"ws ":411,"wor":456,"woo":295,"we ":324,"sân":742,"wes":568,"wer":566,"sáu":219,"sát":605,"wen":170,"sán":1258,"wel":193,"wei":504,"wed":169,"wee":268,"web":356,"sác":1906,"whe":719,"whi":453,"răn":535,"sâu":259,"wi ":419,"wit":1057,"wig":1125,"wid":265,"wic":172,"win":477,"wil":183,"rös":588,"lưu":884,"vuô":11318,"dưể":1608,"rùn":419,"wa ":340,"rúc":638,"wan":646,"wal":651,"way":264,"wat":547,"war":864,"was":494,"wai":552,"rüc":145,"ría":161,"vre":519,"rò ":634,"vua":1478,"vul":314,"rõ ":263,"ròn":162,"rôm":369,"via":1024,"vir":262,"vil":2132,"vin":903,"vig":163,"vic":424,"vid":453,"vie":338,"ngể":4112,"vit":212,"vis":359,"nhể":20937,"ré ":197,"niể":631,"rén":2896,"viê":2261,"rì ":216,"rên":16855,"rí ":838,"voi":343,"vol":291,"von":231,"vor":176,"rìn":2591,"vi ":814,"râu":162,"rãi":238,"ver":2511,"ves":443,"vet":169,"ràn":171,"rào":384,"rái":592,"rán":159,"ven":2308,"vel":746,"rác":292,"ve ":1535,"val":496,"vak":211,"van":1011,"var":683,"vat":444,"rà ":170,"vad":360,"vai":638,"va ":1421,"uyê":4853,"cưể":274,"uze":169,"uzn":158,"uya":181,"uxe":167,"uxo":172,"muể":253,"ux ":1397,"uvi":315,"uve":890,"uy ":4510,"usk":685,"ush":495,"usi":1086,"use":2256,"usc":486,"usa":422,"usu":188,"ust":2067,"uss":2125,"utm":305,"uth":2069,"uti":986,"ute":4039,"uta":488,"utt":513,"uts":241,"utu":168,"uto":356,"us ":12540,"ut ":1468,"urb":527,"ura":1745,"urc":370,"ure":2141,"urg":4458,"uri":3154,"urk":298,"urn":505,"uro":993,"urp":172,"urr":1188,"urs":436,"urt":1515,"uru":494,"ury":261,"ur ":3979,"uph":337,"upi":208,"upe":461,"upl":256,"umi":350,"umo":202,"uma":655,"umb":1047,"ume":427,"unt":1093,"uns":220,"uni":1404,"unn":257,"unc":850,"und":2117,"una":410,"ung":28650,"une":861,"up ":561,"uki":214,"um ":5338,"ulu":810,"ult":827,"ulo":594,"ulm":200,"ull":475,"uli":2981,"ulg":343,"ule":468,"ulc":170,"ula":2994,"ulb":224,"miể":7160,"un ":876,"uid":1911,"uil":817,"uin":1133,"uis":537,"uit":1805,"ul ":704,"ugh":635,"uge":202,"ugl":142,"ui ":422,"uga":249,"ugu":343,"uco":341,"uct":140,"ucu":155,"uda":464,"ude":797,"udi":503,"ubs":175,"ubr":196,"uca":423,"ue ":2123,"uce":268,"ucc":527,"uci":266,"uch":1204,"ucl":244,"uck":465,"uet":161,"uev":189,"uer":585,"ues":940,"uff":186,"udo":607,"udw":163,"uee":563,"ued":1142,"ueb":187,"uen":891,"uel":938,"ub ":179,"ua ":4516,"uay":450,"uat":643,"uar":339,"ual":262,"uan":4521,"ubi":473,"ubl":385,"ube":682,"ubf":310,"uba":515,"ud ":256,"uai":209,"uad":968,"tze":378,"tyl":438,"typ":380,"bưể":9335,"trư":5334,"trù":418,"ty ":2056,"trú":686,"trò":743,"trí":887,"trì":2564,"trê":16809,"trá":625,"trà":444,"tvi":145,"tuy":1965,"tur":1038,"tus":1579,"tut":187,"tui":1819,"tul":991,"tun":226,"tum":525,"tud":242,"tuc":215,"luể":1046,"tz ":904,"two":235,"tră":326,"ts ":2029,"lũn":205,"tre":2316,"loể":3367,"tt ":262,"tra":13075,"thơ":816,"tri":6787,"tru":17464,"tro":55540,"thư":5419,"tu ":263,"try":204,"toá":1052,"tsc":237,"toà":1057,"tsu":330,"tsw":157,"tta":873,"tte":2605,"tti":475,"ttl":380,"tto":705,"ttp":325,"tts":284,"thă":146,"tme":444,"tma":160,"thú":506,"thù":145,"to ":4755,"thô":2398,"tiê":3084,"tp ":325,"tna":209,"toe":205,"tod":450,"toc":558,"toi":213,"tog":204,"tob":196,"tou":1449,"tos":429,"tot":153,"tow":612,"tom":1725,"ton":3013,"tol":487,"tor":2310,"top":602,"tr ":327,"tii":370,"til":3476,"tif":586,"tie":501,"tig":741,"tir":204,"tiq":562,"tit":511,"tis":1648,"tin":5106,"tim":1567,"tip":239,"tio":2405,"thy":410,"thu":53072,"thw":183,"tia":1585,"tic":3507,"tid":1768,"tiu":237,"tiv":602,"tli":260,"thê":286,"thé":143,"thí":558,"thì":784,"liể":1722,"tla":967,"thâ":11890,"thà":11154,"tle":1005,"thá":12170,"tem":2257,"ten":2299,"tep":155,"tei":2098,"tel":2040,"tee":338,"teg":403,"tea":605,"tec":172,"ted":1492,"tfa":159,"th ":4032,"tev":154,"teu":168,"tet":184,"tes":3544,"ter":9430,"ti ":720,"tho":1935,"thm":205,"thr":908,"the":21046,"thi":5961,"tha":4235,"之三 ":187,"之万 ":161,"rưể":6336,"之专 ":252,"ăk ":141,"ăm ":28988,"ăn ":5275,"ăng":3284,"xưa":183,"vươ":541,"nưể":35806,"丘 ":240,"mưể":240,"专 ":1368,"xuể":3053,"並 ":377,"tươ":996,"lưể":2704,"三 ":1277,"丁 ":542,"万 ":771,"uyể":30286,"乙 ":247,"sư ":673,"tư ":1117,"zèr":162,"vũ ":670,"ürt":532,"üri":758,"viể":4820,"rươ":293,"rưn":427,"tuể":1284,"trể":43603,"yêu":353,"之 ":589,"yên":4958,"sơ ":163},"n_words":[13809827,17315344,13396979],"name":"vi"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/zh-cn b/nlp_resource_data/langdetect/profiles/zh-cn

new file mode 100755 (executable)

index 0000000..904c61e
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/zh-cn
@@ -0,0 +1 @@
+{"freq":{"·":11798,"é":695,"и":659,"о":642,"а":705," 《":2860," 。":4044," 、":2042,"あ":2229,"。":93258,"、":80590,"》":12787,"《":12801,"ア":2133,"乱":17692,"书":4419,"习":841,"乡":1808,"九":2519,"也":8266,"乘":28571,"乐":5294,"乌":1217,"义":4779,"之":17358,"久":747,"主":15497,"为":43069,"举":3076,"丽":1041,"丼":1241,"丰":817,"临":983,"个":19411,"中":47239,"两":4794,"严":592,"丛":1367,"业":8072,"东":12690,"丞":9778,"专":3303,"丕":15196,"世":9256,"丈":146015,"三":7927,"上":15305,"下":7962,"不":69712,"与":14330,"一":51232,"丁":117148,"七":1693,"万":2760,"价":1006,"份":14596,"任":5559,"以":21926,"令":1161,"代":10675,"们":2426,"他":7464,"仙":673,"仅":1129,"仍":888,"从":3822,"今":2840,"亲":1398,"人":38353,"亡":667,"产":5405,"亦":2742,"交":20817,"京":3333,"云":2725,"五":3431,"亚":46408,"些":2757,"了":8577,"争":2347,"予":591,"事":19248,"二":6378,"于":42781,"使":59599,"低":1173,"住":1250,"位":13424,"但":4207,"作":14815,"何":21331,"体":8040,"伯":2492,"传":4668,"伦":1894,"伊":2356,"企":1149,"会":16875,"优":845,"休":48239,"众":1421,"信":7840,"俄":2213,"保":3387,"侧":721,"供":2204," 分":744," 公":1819,"商":3626,"哥":1688,"品":4389,"响":1404,"和":23304,"周":2010,"呼":663,"命":2331,"员":7153,"启":817,"含":1431,"吴":771,"名":20347,"同":8476,"后":12165,"吉":2174,"合":7745,"各":3613,"向":3047," 号":1564," 台":812,"域":2609,"城":5315,"培":3476,"基":6619," 名":675,"址":719,"坦":1302,"坡":2693,"土":2149,"圆":692,"场":5846,"地":26817,"在":35727,"圣":2664,"回":1392,"四":5911,"团":4373,"因":6183,"园":2775,"围":1767,"图":2777,"国":50445,"器":3296,"况":701,"冰":646,"决":1729,"冲":743,"农":1200,"军":7136,"写":2568,"册":628,"再":1221,"内":8287,"击":1569," 丕":929," 世":1671," 丈":5741," 不":1283," 一":646," 丁":3946," 万":1431,"兰":5551,"共":7134,"关":4776,"兴":1795,"其":12214,"具":2485,"兹":634,"养":660,"入":4429,"全":7377,"八":1932,"六":2119,"公":17034,"党":2412,"元":4144,"克":7641,"光":2911,"先":2098,"免":623,"停":661,"原":6718,"压":796,"厂":831,"历":4017,"厅":672,"去":1176,"县":5347,"厦":614,"变":2770,"受":2511,"取":2479,"发":12789,"双":1530,"反":2487,"及":17913,"友":732,"又":5333,"参":2872,"司":7147,"号":5641,"台":13293,"可":7699,"只":2126,"口":4422,"化":7453,"包":4823," 何":732,"区":18524,"医":1928," 位":590,"南":15302,"单":3024," 使":1450,"博":1783,"升":971,"千":1226,"十":5342,"协":2640,"华":6868,"半":2037,"卷":760,"印":3243,"即":2844,"卡":3152,"卫":1639,"力":4944," 人":2100,"办":2818,"动":10155,"助":1407,"加":7203,"务":5246,"势":871,"劳":804,"勞":1639," 休":920,"分":15267," 个":2399," 中":1862,"刘":669,"则":3203,"创":4231,"初":2398," 之":647,"别":3560,"利":8681,"到":6365,"制":7035," 乘":758,"前":10878," 乱":770,"剧":3129,"副":860," 亚":733," 天":581," 大":1004,"工":9317,"已":3013,"巴":5253,"州":7529,"川":3028,"山":12651,"属":10137,"展":3788,"屋":1097,"届":1515,"局":2483,"层":1590,"居":1947,"尼":5208,"就":2894,"尚":3502,"少":2443,"小":25275,"将":4225," 多":650,"岸":1431,"岭":590,"岩":988,"岛":4996,"岁":688,"录":1700,"归":696,"当":5254,"形":4076,"役":939,"影":4410,"式":6888,"异":925,"开":7255,"引":4831,"张":2153,"弹":947,"强":1613," 小":894,"念":1709,"往":1560,"律":1732,"得":4936," 家":600,"德":8327,"广":7680,"并":7400,"年":47488,"干":1556,"平":5989,"帝":2672,"帕":667,"师":2671,"希":2317,"布":7549,"常":6171,"席":1225,"带":2534,"延":769,"建":8867,"库":1470,"底":1106,"应":3484,"店":1431,"庙":584,"庆":953,"康":1164,"度":6769,"座":2603,"大":34268,"央":1321,"天":7035,"夫":2315,"太":3274,"头":2350,"失":939,"备":1259,"处":4116,"复":1700,"外":5857,"多":11036,"奥":3388,"女":3642,"好":1250,"奖":2274,"如":3936,"始":3351,"委":2437," 和":850,"增":971,"境":2254,"声":4998," 在":1140,"子":9623,"存":2538,"学":28915,"安":5174,"它":4574,"宁":1619,"定":6314,"实":4032,"宝":999,"宗":2085,"宫":1054,"客":2272,"宣":1095,"宾":780,"家":33525,"富":1265,"密":1755,"察":855,"导":2989,"对":7380,"威":2129,"媒":887," 国":713,"区，一":689,"更":2085,"曲":2749,"曾":3299,"是":61140,"映":608,"春":892,"显":972,"星":4368,"易":1898,"普":2707,"智":818,"晚":728,"架":926,"林":7036,"果":2196,"极":1459,"构":3349,"查":1580,"机":7949,"未":4221,"木":1876,"术":4302,"本":13564,"望":1007,"朝":3840,"期":7068,"朗":953,"月":15728,"有":24992,"服":2639,"最":10271,"松":1118,"杰":812,"杨":679,"来":9550,"条":3385,"村":1739,"李":1577,"杀":947,"杂":1265,"权":2775,"播":2492,"·丁":954,"·丈":771,"摄":716,"料":1677,"文":14966,"施":1293,"斯":13962,"断":823,"旁":959,"旅":1334,"族":3701," 或":968,"时":14785,"无":3416,"旧":1079,"日":21151,"早":2138,"放":2159,"改":3194,"收":1975,"支":2892,"教":9551,"故":1906,"数":7124,"整":1497,"括":3402,"拥":1168,"拉":7109,"拔":3550,"报":2620,"护":1665,"技":2723,"抗":915,"投":1684,"执":1062,"扩":636,"承":1002,"批":885,"所":11521,"手":3277," 年":38848," 平":1187,"打":1400,"户":1297,"房":1033,"戏":2673,"我":858,"成":16718,"或":10994,"战":6655,"提":4221,"推":2158,"控":1211,"接":3776,"排":1222,"换":868,"据":3024,"持":2131,"指":5889,"情":1949,"息":1214,"态":1462,"总":5550,"感":998,"游":3622,"温":1133,"港":10900,"清":2634,"湾":8336,"湖":3858,"源":3031,"满":827,"演":3628,"区，多":1252,"澳":2170,"气":1964,"民":10950,"水":5383,"江":5857,"汉":2630,"求":1033,"汇":1020,"沟":938,"沙":2368,"河":5488,"油":1041,"治":5001,"没":1471,"泽":750,"波":2738,"派":1913,"活":2831,"洲":5289,"测":1385,"济":2241,"流":4509,"消":1003,"深":1422,"横":631," 是":2568," 月":13702,"区，常":666,"武":2208,"此":5466,"止":1051,"正":4102,"死":6678,"歌":2416,"次":4906,"欧":2758,"款":923,"母":1743,"比":4805,"毕":833,"毒":762,"案":1633,"桥":1371,"树":1023,"标":3335,"样":1395,"栽":2997,"根":2240,"核":1380,"校":3605,"楼":1695,"植":5185," 日":10844,"概":864,"石":3358,"知":2149,"省":5247,"着":1639,"皇":2205,"的":145656,"盖":717,"监":811,"盘":598,"目":9318,"直":3003,"白":2529,"百":1846,"登":1073,"病":1344,"町":605,"画":2451,"甸":900,"电":10706,"田":1994,"由":17497,"用":13911,"生":19204,"甘":1358,"略":871,"留":781,"界":6597,"理":9553,"球":6996,"班":1750,"玛":789,"王":5331,"现":8430,"环":2438,"独":1820,"状":1446,"片":2527,"牌":1136,"物":13958,"特":10095,"爱":2167,"照":1002,"然":2677,"热":1483,"点":3443,"火":1816,"灵":903,"缅":631,"缘":1067,"编":2591,"缩":977,"罗":6436,"网":4079,"置":1765,"署":901,"美":10457,"群":2218,"纳":2534,"纽":906,"线":7551,"红":1720,"约":5717,"级":4065,"纪":4115,"结":3008,"统":6645,"绝":650,"络":1469,"给":1157,"细":1259,"织":2183,"组":5795,"经":8789,"终":1135,"维":3625,"综":661,"绿":800,"继":1038,"续":1408,"索":1233,"素":1524,"类":5149,"米":9738,"系":8292,"等":12322,"策":957,"笔":614,"第":11099,"篇":721,"简":4298,"算":1987,"究":2923,"空":3846," 的":2781,"程":4573,"竞":659,"站":6147,"立":9651,"童":907,"突":730,"票":886,"神":3463,"社":4420,"示":1536,"积":2267,"移":1075,"称":17001,"私":738,"科":10282,"离":1553,"福":3324,"确":1094,"础":692,"破":649,"码":1376,"要":8323,"视":4552,"规":2234,"观":2303,"见":2171,"览":667,"角":3015,"解":2300,"西":17559,"被":7905,"街":2230,"行":14959,"表":5479,"认":2753,"计":5438,"训":622,"讯":1488,"议":3045,"让":773,"记":2357,"设":6237,"许":1740,"论":3372,"该":4765,"说":3972,"诸":678,"诺":1644,"读":946,"证":1510,"识":1123,"评":1093,"词":2012,"译":3205,"诗":967,"试":1000,"话":1875,"警":737,"言":2970,"路":9754,"越":2100,"超":1907,"足":2749,"资":4130,"赛":5990,"起":4293,"负":1207,"贝":1346,"财":832,"责":1208,"败":623,"货":901,"质":2689,"购":664,"贵":1643,"费":1407,"象":1888,"调":1492,"谷":1826,"过":6354,"进":5216,"还":1606,"这":6952,"连":2790,"远":1319,"运":6417,"近":3105,"辽":711,"达":4214,"边":2785,"较":2013,"辖":1462,"输":1210,"辑":1374,"车":6585,"轨":655,"转":1925,"轮":727,"软":1659,"轻":756,"载":1404,"身":2869,"脑":1283,"脉":651,"腊":1076,"致":1313,"至":9855,"自":8792,"而":9496,"者":7418,"职":2157,"联":6161," 米":6968,"肃":1126,"股":1262,"育":2953,"胜":1031,"能":5645,"药":977,"荣":690,"草":3994,"获":2004,"莱":1327,"菲":926,"般":2569,"航":2316,"舰":942,"艺":2269,"艾":805,"色":3575,"花":3115,"节":2402,"英":9686,"苏":3117,"著":2998,"营":2531,"萨":1866,"落":1487,"蓝":701,"蒂":676,"虽":721," 英":812,"频":899,"题":1997,"预":942,"领":2926,"食":1377,"飞":1436,"风":2500,"顿":1362,"顺":631,"项":2377,"顶":716,"页":780,"馆":2371,"香":9519,"验":1104,"高":8988," 阿":664,"马":7083,"鲜":1464,"鲁":1516,"鱼":955," 香":692,"黑":1900,"黄":2293,"龙":3881,"造":2929,"选":3181,"送":619,"适":644,"通":7604,"速":2166,"遗":895,"邻":1194,"郡":912,"部":14531,"都":5243,"配":1056,"金":5677,"野":1099,"量":3834,"里":8179,"重":5583,"释":634,"银":1422,"铁":4586,"针":594,"钟":734,"长":12428,"镜":635,"镇":2371,"锡":594,"锦":662,"销":827,"闻":1352,"间":8313,"问":1446,"门":4685,"际":4272,"陆":5534,"陈":1215,"降":626,"限":2379,"陕":1187,"院":5166,"除":1569,"险":710,"队":3990,"阶":1080,"阴":725,"防":1166,"阳":2041,"阿":4439,"随":1152,"难":627,"隶":660,"青":2398,"非":3368,"面":6531,"需":1195,"音":4816,"韩":1018,"가":589,"）":53342,"（":53630,"－":1381,"，":211761,"：":15751,"；":5849," （":2857," ）":2917," ，":8290,"国的特":945,"植物。":2891,"植物，":1199,"基丁教":662,"等地，":3010,"民共和":1261,"。 ":4085,"、 ":2429,"》 ":659,"地区，":3536,"。这":1769,"、陕":800," 、 ":1160,"、福":680,"。现":602,"、甘":833,"。由":929,"、西":1056,"、贵":865,"。该":1506,"四川、":1228,"、广":1717,"、山":2033,"国大陆":3832,"、安":622,"。它":1480,"、四":1328,"。在":1915,"、台":689,"。分":2961,"、印":861,"。其":1173,"、湖":1562,"、河":1285,"、江":1104,"。此":760,"》是":802,"、日":634,"ああ":1707,"、《":1198,"》、":1065,"》中":595,"《丈":900,"、休":601,"。他":1369,"、云":1236,"。乘":742,"、人":909,"。丁":940,"、不":1336,"、丁":2633,"、丈":4209,"、乘":1367,"、中":959,"。丈":1034,"アア":1643,"地区的":687,"》（":2220,"》，":1819,"在中国":1180,"栽培。":2912,"、贵州":848,"是香港":1721,"立于 ":1061,"从 ":609,"人 ":623,"以 ":631,"于 ":10544,"了 ":641,"亚·":946,"亚 ":1478,"休 ":1064,"会 ":652,"使 ":1090,"不、":2097,"不。":742,"丈》":600,"丈、":2959,"丈。":2119,"丁、":2210,"丁。":2121,"一。":1805,"东、":1223,"中、":800,"人。":1182,"事。":826,"亚、":1600,"亚。":687,"交。":780,"丈山":758,"丈属":948,"不家":1629,"之后":1285,"丈小":1097,"丈家":1182,"丁属":1251,"中国":13581,"丈子":859,"丈学":961,"一家":830,"乱丈":1142,"为台":739,"丈大":637,"举办":651,"丁大":616,"何。":612,"乘丁":814,"乘一":737,"乘丈":920,"中华":3016,"乘丛":864,"丈地":713,"之一":4710,"丁国":1036,"丁地":630,"东南":852,"主义":1789,"为主":1114,"为中":1501,"为丈":1939,"为一":949,"为丁":953,"丈和":636,"为了":902,"不同":2049,"中使":2667,"不区":1235,"中一":647,"不军":620,"个人":1067,"丈克":1016,"丈其":691,"不公":924,"两个":961,"不使":1931,"休。":1293,"休、":1110,"丁克":614,"东不":1592,"丛中":711,"丁军":992,"丁兰":618,"不交":3736,"不京":1465,"不事":949,"丈休":1776,"丈使":1087,"不休":658,"丕丈":949,"丈不":1018,"丈丈":10643,"丈丕":925,"丈丞":767,"丈中":605,"丁休":2172,"丈为":629,"丈之":694,"上不":1488,"一位":1048,"丈乘":1009,"丈乱":1342,"丁作":779,"丈事":642,"丁使":3178,"丈交":632,"不丈":1417,"不丁":1361,"丈亚":1470,"不不":1140,"与丈":673,"一丈":695,"丁丈":5202,"丁丁":3673,"丁不":1113,"一个":9742,"丁乘":875,"一亚":4760,"一些":1051,"丁乱":789,"丁亚":2029,"份、":719,"丈一":1128,"丈丁":6021,"主教":1120,"任何":656,"于山":605,"交大":1013,"企业":965,"为是":626,"中文":1113,"人口":1818,"一次":948,"丈林":809,"他们":991,"产品":768,"事场":701,"以丈":590,"人使":892,"丁是":769,"丈是":785,"一条":1013,"不拔":3372,"二十":706,"人乘":690,"云南":1737,"人丁":945,"人丈":706,"主席":689,"丁教":806,"于台":1035,"元 ":583,"丁斯":907,"丁文":660,"一所":719,"亚使":601,"交休":767,"事务":628,"乘大":1199,"亚丈":1056,"不式":1110,"亚丁":910,"中学":1267,"亚亚":814,"交不":722,"使。":1718,"使、":1407,"丁店":632,"一座":888,"中央":1281,"于丁":1020,"于不":3550,"于丈":1169,"事交":1241,"于中":1731,"中的":2873,"休如":834,"丈科":1509,"亚栽":2911,"东省":644,"份年":663,"使休":784,"使代":599,"使中":607,"二次":594,"使丈":1433,"使上":903,"使丁":935,"下的":912,"世界":3851,"上的":1685,"何丁":623,"丈的":3617,"于日":763,"乘江":1087,"位于":6910,"丁的":3074,"不生":996,"作为":1517,"不的":1824,"也有":715,"不现":1191,"份围":818,"人工":3099,"丈球":620,"休丈":1574,"休丁":1348,"以及":5720,"不治":1654,"也是":2441,"休休":1177,"休使":948,"交的":754,"何成":650,"但是":741,"人的":1301,"事的":883,"人物":960,"丁西":582,"产生":952,"亚的":1008,"一般":2436,"人民":2356,"丈至":636,"丈草":591,"使团":1466,"前 ":886,"作家":983,"作品":1438,"亚洲":979,"使兰":933,"世纪":2136,"乘的":934,"到 ":1169,"丈 ":2639,"丈·":660,"丁 ":1901,"丁·":957,"为 ":5249,"地、":838,"国、":813,"又译":658,"发行":1130,"可能":964,"南部":1207,"名称":1365,"后的":584,"名的":1485,"同的":919,"商业":792,"使（":777,"使，":2980,"又称":2280,"台湾":6649,"位，":691,"发现":998,"体，":607,"何，":1182,"作，":878,"发生":1318,"后来":990,"只有":591,"南等":606,"休（":899,"休，":2609,"和国":2085,"会，":853,"会（":981,"同时":1360,"在 ":4342,"命名":845,"份，":752,"国 ":738,"区的":1567,"和丈":1029,"和丁":641,"前身":709,"员会":1240,"交，":1266,"制造":737,"反应":687,"人，":2362,"化的":604,"事，":1473,"亚，":1453,"亚（":751,"亚：":3961,"华民":1238,"发展":2345,"多生长":627,"国的":3079,"国王":741,"城事":1744,"因此":1473,"国民":1000,"基丁":1219,"分，":694,"四川":1896,"地区":6955,"国大":4205,"在台":652," 公里":1220,"国家":4191,"在不":766,"在丈":1143,"在丁":899,"地不":1517,"在中":1500,"内，":588,"国国":1280,"因为":1284,"团体":692,"国丁":732,"国丈":695,"国不":1345,"国人":990,"培。":2913,"公园":1063,"全国":1074,"共和":2100,"共同":711,"军事":699,"公司":6085,"和 ":1051,"克斯":708,"分丁":674,"分为":837," 丈丈":703,"及 ":757,"内信":885,"其他":1807,"内丁":823,"公共":779,"其中":1933,"俄罗":1205,"交通":1010,"代表":1920,"使用":3270,"使理":2624,"使的":2300,"何的":1007,"之间":1601,"作用":691,"何用":717,"传统":899,"作的":824,"何能":734,"九龙":820,"于香":923,"会议":766,"人闻":930,"体育":809,"使究":2862,"不部":1509,"代的":607,"人类":940,"使本":654,"丁近":677,"举行":1293,"主要":4270,"于美":614,"一部":1591,"丈车":720,"他的":941,"专辑":636,"信息":699,"保护":898,"亚言":1720,"休画":658,"休的":1686,"会的":752,"不面":1014,"及丈":729,"及中":2186,"下，":946,"上，":1461,"丈（":2586,"丈，":5165,"参与":744,"半岛":715,"不，":1460,"一，":2377,"丁）":1039,"丁，":4237,"丁（":1706,"包括":3325,"参加":688,"动画":653,"中，":3371,"及其":661,"业，":586,"原名":724,"动物":1359,"名为":1425,"乘，":704,"各亚":724,"又名":874,"印度":2180,"可以":2687,"台不":1436,"乱，":730,"名使":755,"合作":595,"包含":732,"单位":907,"员。":605,"华人":1471,"协会":1039,"南不":934,"公路":1194,"区域":737," 世纪":1447,"化学":839,"分类":693,"利用":693,"医学":647,"历使":2363,"创立":811,"公里":2086,"前尚":2725,"内的":698,"关系":1044,"名。":910,"创作":725,"分别":959,"利亚":1729,"共有":696,"制作":1175,"创办":644,"具有":1055,"区、":855,"区。":806," 中国":719,"南、":2847,"分子":582,"分布":3839,"全球":752,"加乘":908,"州事":599,"工业":864,"工作":1348,"实际":605,"巴乘":819,"属的":3366,"学院":1919,"度、":646,"帝国":1098,"工引":2908,"平不":1296,"山谷":602,"并丁":847,"年代":1820,"广东":1877,"广乘":652,"布在":1655,"山西":700,"小都":652,"布于":1877,"建、":593,"巴使":874,"小说":1496,"、陕西":793,"场，":659,"尚未":2839,"定的":781,"州、":1204,"地，":4149,"川、":1254,"它的":633,"山坡":1678,"学的":914,"国，":704,"学生":932,"小的":1375,"德·":778,"就是":947,"家的":1013,"应用":955," 年 ":13492,"形式":842,"形成":784,"德国":1532,"或 ":819,"影响":1080,"德亚":680,"希腊":1007,"年的":1358,"广西":1228,"建立":1106,"当时":1404,"年至":809,"拔 ":3155,"外，":1036,"广场":674,"广州":765,"建于":690,"建丁":1787,"年在":775,"处，":614,"开始":2055,"工程":1106,"已经":808,"常生":724,"引亚":2921,"广播":596,"开发":1428,"大学":4751,"名：":4034,"后，":2099,"名，":804,"大战":639,"号，":798,"司，":904,"司（":706,"太平":714,"员，":1310,"委员":1628,"多生":1267,"处理":904,"国立":626,"动，":770,"基本":664,"境内":659,"声任":875,"区，":4940,"基础":690,"国际":3114,"大利":1318,"大使":1012,"家 ":943,"天休":592,"天主":779,"大丈":869,"学使":616,"大陆":4400,"它们":581,"安丁":901,"学名":3799,"定义":596,"存在":833,"岛、":590,"家亚":645,"家丈":713,"家丁":849,"学家":1872,"对于":704,"年 ":13993,"小丈":990,"家国":1673,"尼亚":1342,"学校":1549,"它是":712,"家律":640,"属于":2022,"宗教":606,"山东":748,"大的":1819,"品，":659,"未由人":2713,"学、":1095,"媒体":593,"家、":1079,"家。":1319,"学中":593,"大部":622,"是由":1948,"最大":1527,"成，":1007,"最早":864,"是美":1276,"日至":621,"林中":937,"林下":866,"林丈":747,"是日":749,"最丈":683,"教育":1765,"斯特":879,"是指":1893,"最后":916,"服务":2010,"时的":619," 日 ":1067,"有一":1032,"有丈":634,"有关":606,"栽培":2968,"曾经":583," 月 ":10460,"期的":814,"未由":2714,"有的":742,"来的":817,"是香":1732," 年，":2603," 年）":2400,"机构":1217,"有植":939,"有时":593,"时间":1469,"标何":1254,"故事":840,"教丁":619,"教会":604,"教休":765,"文使":804,"、湖不":767,"斯丁":803,"文化":2111,"、江西":640,"式，":710,"是台":693,"是在":1454,"早期":592,"时期":1763,"日本":5547,"数学":761,"、湖南":720,"名：）":3257,"时代":1179,"斯坦":867,"文学":1074,"数据":868,"是位":626,"是以":583,"是中":2851,"日在":675,"是丁":1437,"是一":8458,"是不":724,"是丈":2026,"是 ":2352,"推不":777," 年的":914,"或称":642,"成立":2637,"技术":1644,"成的":1249,"日 ":1181,"斯·":1031,"所有":1061,"拉丁":1110,"委员会":1211,"投资":588,"月 ":10522,"有 ":2350,"年（":1135,"年）":2514,"年，":3020," 年至":781,"提不":749,"提供":1571,"控制":687,"或者":862,"拥有":1075,"家（":883,"家，":2992,"学（":696,"学，":1273,"子，":932,"成。":674,"年间":605," 年在":695," 年代":1447," 平不":1027,"所以":844,"成员":1194,"属（":1286,"成为":2403,"战争":1282,"成何":673,"车站，":720,"总统":750,"小，":1060,"流行":630," 日）":775," 日，":1367,"没有":1287,"活动":1238,"比赛":1111,"江苏":807,"江西":972,"湖不":1117,"湖南":1294,"源于":655,"游戏":1924,"江、":1010,"斯（":605,"文：":2387,"毕业":667,"时，":1279,"死关":792,"日，":1548,"日）":813," 日至":616,"、广西":861,"民国":1569,"民主":899,"欧洲":1589,"民共":1266,"正式":1275,"河南":853,"月，":734,"民族":1066,"期，":613,"河不":810,"称为 ":1204,"根据":1261,"来自":905,"朝鲜":1221,"最高":965,"期间":1013,"有限":1519,"植物":4924," 是一":701,"概念":628," 日在":655,"社会":2250,"的西":629,"的重":753,"第 ":1806,"的第":1619,"物，":2113,"目的":1041,"的是":1254,"的最":739,"的植":2910,"称 ":1447,"积 ":789,"生长":4500,"直接":612,"的特":1539,"的电":996,"的小":1525,"的家":638,"电脑":1011,"的大":1230,"的国":1201,"的基":581,"的地":4246,"电视":2674,"目前":4402,"的丁":3401,"的一":9462,"用的":1236,"生的":842,"球队":888,"生物":1143,"甘肃":1053,"的名":795,"的发":604,"的休":1722,"的使":1923,"的何":678,"的作":618,"的主":1363,"的中":1197,"的丈":5079,"的不":2462,"的乱":628,"的乘":866,"的人":1919,"的交":1075,"的亚":598,"的份":596,"用来":773,"电影":2044,"理论":1071,"生活":843,"生在":746,"电子":1067,"的。":902,"的《":764,"由人":3013,"由于":1683,"用于":1226,"生于":2477,"生产":971,"理学":1070,"王朝":657,"独立":1054,"现在":1080,"环境":892,"的 ":3036,"用。":732,"现代":899,"班丁":801,"物理":701,"特有":1087,"由 ":647,"特别":1034,"澳门":921,"、福建":587,"物。":3636,"而成":666,"网络":1319,"等，":840,"。由于":637,"结构":811,"称，":606,"统治":615,"美国":6262,"有限公":1342,"经济":1788,"线的":727,"统的":590,"群岛":661,"罗斯":1403,"组织":2012,"联休":1568,"联合":1420,"经营":801,"美洲":847,"站，":1417,"站（":605,"立，":948,"约丈":612,"系统":2897,"、甘肃":804,"结丁":584,"纪念":701,"缩写":612,"至 ":5691,"肃、":800,"自 ":728,"组成":1458,"米的":3087,"米至":2745,"空间":581,"简称":3399,"目，":621,"系使":1617,"等地":3350,"的，":1232,"算机":592,"约 ":1747,"天主教":743,"立的":1250,"立于":1279,"生，":616,"用，":772,"科技":622,"第三":1132,"第一":3441,"第二":2122,"科丈":1055,"等。":1256,"科学":1716,"福建":1133,"称为":4237,"站。":627,"角色":608,"认为":1515,"西部":624,"计份":1242,"译为":652,"西班":773,"要的":1104,"视台":583,"被称":1048,"设计":1764,"越南":953,"贵州":1074,"计算":1077,"设立":673,"赛事":631,"过 ":590,"达 ":635,"许多":1084,"这亚":795,"这些":921,"这个":1485,"运动":2493,"选举":733,"超过":649,"软交":1060,"路线":871,"负责":846,"资讯":628,"足球":1709,"行，":1162,"资料":726,"都是":704,"说，":693,"进行":2271,"华民国":1215,"过程":791,"部分":2341,"部份":779,"车站":2410,"通常":1655,"连接":661,"系，":584,"英亚":2018,"自治":726,"自然":791,"英国":2374,"艺术":1217,"自由":876,"般生":684,"航空":1141,"草地":757," 米的":3008,"罗马":1148,"自丁":786," 米至":2742,"米，":928,"联赛":872,"者，":759,"节目":923,"英文":2834,"苏联":584,"获得":1131,"线，":677,"著名":1735,"是台湾":602,"虽然":581,"行。":771,"规份":679,"西南":748,"行的":1255,"西份":1008,"西亚":1199,"西不":1211,"行为":642,"行不":2204,"西、":2735,"中，目":813,"马来":712,"香港":8632,"高丁":607,"间，":1121,"队，":590,"高速":865," 香港":632,"及中国":2044,"公里，":1108,"是一个":2236,"是一亚":2252,"是位于":587,"丈（学":778,"是中国":2142,"部的":1067,"通过":1270,"赛，":689,"长 ":666,"路，":847,"重要":1745,"的第一":601,"铁家":726,"重要的":693,"长于":3283,"长在":1116,"部，":1121,"除了":587,"限公":1343,"阿亚":611,"间的":1117,"阿拉":760,"铁路":1902,"银行":879,"陆的":3096,"里，":1518,"陕西":1134,"音乐":1761,"面积":1528,"问题":913,"非洲":689,"领域":706,"需要":625,"项目":737,"有植物":938,"的特有":970,"华人民":1156,"是美国":1183,"生长在":1097,"的植物":2905,"生长于":3254,"日至 ":606,"前尚未":2719,"最大的":950,"加乘大":716,"是日本":698,"目前尚":2723,"的地区":3123,"甘肃、":797,"的一部":774,"的主要":669,"年），":737,"的一亚":1477,"的一个":3092,"限公司":1342,"年（ ":710,"长于不":2913," ）是":697," ，是":1072,"南等地":586,"），":14139,"（）":3758,"（，":1176,"，）":1561,"：，":733,"：）":4083,"由人工":2914,"，有":1486,"，最":1122,"，曾":1063,"，是":13170,"）是":10848,"，此":660,"，总":746,"，当":886,"，并":4017,"，常":1051,"，故":644,"，指":584,"，所":997,"，或":1328,"，成":1293,"）的":2201,"，目":3648,"，第":777,"，简":2029,"，现":1341,"，由":3911,"，用":638,"，生":3542,"，西":833,"，被":862,"，该":1018,"，经":598,"，美":598,"，而":3223,"，英":974,"（英":2411,"，香":721,"，这":1581,"，通":884,"，《":690,"）。":3713,"）、":2880,"（丁":582,"（学":3658,"，它":1417,"，属":841,"，小":789,"，因":2416,"，在":4524,"，多":1573,"，大":803,"，如":965,"，前":662,"，分":865,"，南":723,"，即":1029,"，包":1287,"）和":952,"，后":1255,"，同":895,"，台":750,"，可":1101,"，又":2947,"，原":1270,"，与":1258,"，不":2776,"，东":861,"，中":1697,"，主":1658,"，为":3616,"（今":666,"，乘":1464,"，也":3256,"，乱":593,"（丈":745,"）为":2108,"，丈":5138,"，丁":4254,"，一":2351,"，休":1379,"，但":3353,"，位":2756,"，使":1942,"，交":917,"，亦":1195,"，于":2786,"，人":883,"，以":3498,"，他":1341,"，从":880,"，份":844,"，共":756,"，其":3552,"，全":1258,"（ ":4029,"） ":1194,"， ":8207,"－ ":821,"： ":1177,"于台湾":746,"云南、":1087,"丁属（":793,"中国的":1325,"中国大":3854,"之一，":2267,"不拔 ":3152,"于不拔":2931,"于中国":1363,"丈属的":710,"人工引":2908," ），":840,"乘江、":593,"特有植":935,"位于香":588,"于日本":718,"交大利":915,"以及中":2006,"一般生":684,"英文：":1509,"亚栽培":2910,"陕西、":802,"人民共":1261,"广西、":800,"：）为":1432,"：）是":1905,"），又":980,"），是":2411,"（）是":1113,"，），":1007,"著名的":779,"（），":1527,"年至 ":713,"面积 ":773,"于香港":916,"之间的":629,"内丁使":615,"成立于":917,"英亚：":1340,"俄罗斯":1205,"，香港":708,"物。分":2814,"公司（":690,"公司，":865,"行不区":1022,"共和国":1949,"分布于":1823,"分布在":1639,"》、《":946,"）是丈":621,"）是一":2670,"，是一":2180,"，是中":1286,"湖南、":689,"あああ":1321,"尚未由":2713,"地，生":2896,"，并丁":652,"，常生":669,"，所以":699,"，又称":1386,"，台湾":584,"，因此":1152,"，多生":1256,"（学名":3651,"。分布":2922,"、印度":793,"、乘江":625,"、云南":1182,"被称为":866,"、山坡":669,"、广东":743,"、四川":1251,"大陆的":3080,"アアア":1275,"学名：":3527,"（英文":1335,"（英亚":1020,"，目前":3438,"，生长":2942,"西班丁":773,"，简称":1929,"工引亚":2908,"平不公":791,"贵州、":836,"广东、":761,"引亚栽":2908,"，包括":1127,"米的地":2908,"，又名":585,"，在 ":658,"，其中":943,"，以及":1003,"，也是":1477,"，位于":2555,"，一般":1217,"，主要":1207,"，为中":658,"，中国":911,"属的植":2824,"，于 ":2118,"丁使、":641,"丈丈丁":631,"计算机":592,"江西、":618,"湖不、":763,"不同的":722,"米至 ":2743,"之一。":1753,"中华民":1237,"中华人":1156,"为中国":948,"不公里":767,"丈丈，":642},"n_words":[4792118,1709982,314544],"name":"zh-cn"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/profiles/zh-tw b/nlp_resource_data/langdetect/profiles/zh-tw

new file mode 100755 (executable)

index 0000000..674b28d
--- /dev/null
+++ b/nlp_resource_data/langdetect/profiles/zh-tw
@@ -0,0 +1 @@
+{"freq":{"·":11773,"é":695,"區，常":664,"и":659,"о":642,"а":705," 《":2780," 。":1867," 、":1053,"あ":2229,"。":93215,"、":80530,"》":12775,"《":12789,"」":16776,"「":16978,"ア":2133,"九":2518,"也":8240,"乘":27365,"之":17356,"久":750,"主":15490,"丼":1230,"中":47219,"並":6623,"丞":85837,"丕":96789,"世":9230,"丈":143774,"三":7943,"上":15307,"下":7965,"不":69580,"一":51222,"丁":112603,"七":1691,"份":8990,"任":5556,"以":21889,"令":1163,"代":10588,"他":7458,"仙":669,"仍":884,"今":2840,"人":38053,"亡":666,"亦":2741,"交":18627,"京":3334,"五":3430,"些":2759,"亞":10406,"了":8549,"予":591,"事":19252,"二":6375,"使":59515,"低":1174,"住":1250,"位":13512,"但":4206,"作":14816,"何":16276,"佐":33991,"伯":2485,"伊":2316,"企":1149,"休":48236,"信":7776,"俄":2093,"係":1210,"保":3393,"來":9528,"供":2206," 分":709,"單":3071,"問":1448," 公":2117,"商":3624,"員":7098,"哥":1699,"品":4392,"和":23261,"周":1446,"呼":664,"命":2330,"含":1432,"名":20323,"同":8443,"吉":2089,"合":7724," 倫":1516,"各":3613,"向":3031,"域":2612,"城":5349,"執":1078,"培":3458,"基":6608," 名":633,"址":721,"坦":1100,"坡":2688,"團":4364,"土":2148,"園":2775,"圖":2763,"國":50470,"地":26714,"在":35708,"回":1107,"四":5909,"因":6124,"嚴":592," 勞":1513,"器":3287,"冰":646,"再":1221," 丞":2854," 丕":3070," 世":1592," 丈":4439," 不":917," 丁":3186,"優":845,"共":7054,"其":12215,"具":2485,"入":4421,"內":8167,"兩":4796,"全":7445,"八":1932,"六":2118,"公":17482,"兒":1569,"元":4231,"克":7487,"光":2846,"先":2097,"免":625,"傳":4645,"價":1006,"倫":51626,"個":19430,"們":2429,"停":661,"原":6791,"去":1179,"受":2509,"取":2465,"反":2477,"及":17959,"友":732,"又":5332,"參":2854,"司":7145,"台":11564,"可":7754,"只":1856," 個":2397,"口":4174,"化":7454,"包":4825," 佐":611," 位":594,"南":15370,"協":2646," 使":1198,"博":1769,"升":830,"千":1228,"十":5341,"區":18522,"半":2039,"卷":655,"印":3241,"即":2842,"卡":3151,"劃":2056,"劇":3028,"力":4955," 人":1795,"助":1406,"加":7145,"勞":44204,"務":5038,"動":10173," 休":691,"分":15261," 中":1418,"初":2396,"別":3548,"利":8617,"到":6362,"制":3819," 乘":601,"則":3204,"前":10854,"副":861,"創":4236," 大":752,"工":9304,"已":3034,"巴":5247,"州":7520,"川":3027,"山":12644,"屬":10111,"展":3787,"屋":1099,"局":2380,"居":1946,"尼":4905,"就":2895,"尚":3522,"少":2442,"對":12131,"小":25320,"導":2988,"專":3315,"將":4225,"島":4996," 多":609,"岸":1457,"岩":955,"彈":1036,"形":4081,"役":939,"影":4543,"式":7142,"引":4806,"張":2155,"強":1613," 小":712,"念":1686,"往":1561,"律":1728,"後":11603,"得":4825,"從":3819,"德":8443,"幹":610,"年":47473,"平":5943,"帝":2671,"帕":627,"希":2358,"布":6774,"常":6175,"帶":2450,"師":2700,"席":1237,"延":769,"建":8865,"廣":7680,"廠":829,"底":1070,"店":1431,"康":1164,"度":6845,"座":2605,"大":34205,"央":1335,"天":6785,"夫":2310,"太":3614,"失":939,"外":5866,"多":11013,"奧":3348,"女":3636,"好":1255,"如":3939,"始":3424,"委":2437,"場":5840,"報":2620," 和":619,"增":971,"境":2254," 在":1006,"子":9582,"存":2418,"學":28894,"安":5183,"它":4320,"定":6314,"宗":2121,"宮":11665,"客":2255,"宣":1095,"家":33259,"富":1263,"密":1751,"察":902,"寫":2568,"實":4042,"威":2163,"媒":887,"更":2088,"曲":2693,"曾":3297,"書":4421,"是":61135,"映":606,"春":891,"星":4383,"易":1898,"普":2726,"智":821,"時":14789,"晚":728,"架":926,"林":7015,"果":2193,"查":1544,"未":4199,"木":1871,"本":13516,"望":1009,"朝":3760,"期":7068,"朗":950,"月":15722,"有":25006,"服":2638,"最":10258,"會":16872,"松":993,"東":12692,"村":1739,"李":1578,"播":2488,"擊":1563,"據":2801,"·丁":953,"·丕":684,"·丈":742,"料":1908,"文":14926,"於":85512,"施":1293,"斯":13761,"旁":958,"旅":1333,"族":3698," 或":592,"日":21067,"早":2137,"放":2161,"改":3194,"收":1971,"支":2879,"教":9547,"故":1906,"數":7013,"整":1506,"括":3403,"拉":7094,"拔":3534,"技":2726,"抗":915,"投":1689,"承":1001,"批":886,"所":11497,"手":3280," 年":38814," 平":1137,"打":1356,"戰":6668,"戲":2673,"房":1031,"我":858,"成":16686,"或":10999,"提":4217,"推":2160,"控":1213,"接":3506,"排":1256,"持":2131,"指":5892,"情":1951,"息":1210,"應":3488,"感":1000,"愛":2219,"游":601,"測":1417,"港":10903,"清":2633,"湖":3859,"源":2955,"滿":823,"漢":2629,"演":3758,"澳":2170,"濟":2246,"民":10942,"水":5379,"氣":1960,"江":5858,"求":1019,"決":1728,"沒":1469,"沙":2360,"河":5480,"油":1041,"治":5045,"波":2725,"派":1934,"活":2827,"洲":5278,"流":4541,"消":1003,"深":1422,"機":7988,"樓":1698,"標":3283,"樂":5292," 是":2270," 月":13683,"武":2208,"此":5462,"止":1051,"正":4102,"歷":3537,"歲":679,"死":6673,"歌":2412,"歐":2776,"次":4908,"款":922,"權":2775,"母":1810,"比":4792,"毒":761,"殺":3118,"案":1653,"栽":2979,"根":2237,"核":1380,"校":3603,"條":3385,"楊":679,"業":8348,"植":5169,"構":3354," 日":10789,"概":795,"石":3331,"知":2143,"省":5241,"眾":1402,"皇":2166,"的":145617,"目":9288,"直":3002,"發":12643,"白":2538,"百":1845,"登":1101,"病":1342,"町":605,"甸":897,"田":1992,"由":17477,"用":13939,"產":5389,"生":19162,"甘":1400,"當":5199,"畫":2821,"略":871,"留":781,"界":6587,"環":2430,"理":9565,"球":7020,"區，一":683,"現":8423,"班":1762,"王":5328,"獎":2259,"獲":1989,"片":2524,"牌":1136,"物":13950,"特":9797,"爭":2331,"爾":10487,"營":2531,"照":1001,"然":2724,"無":3430,"灣":8336,"火":1815,"置":1766,"署":979,"羅":6361,"美":10443,"群":2244,"義":5695,"習":841,"總":5592,"縣":5350,"繼":1038,"續":1407,"索":1368,"素":1525,"納":2465,"約":5697,"紀":4111,"級":4056,"統":6648,"組":5814,"結":3076,"綠":801,"維":3676,"網":4429,"經":8789,"綜":642,"編":2471,"線":7038,"簡":4320,"米":9228,"系":6773,"等":12258,"策":935,"第":11129,"篇":721,"節":2370,"算":1983,"積":2283,"究":2927,"空":4143," 的":1486,"程":4595,"稱":17006,"種":13270,"站":6147,"立":9659,"競":659,"童":907,"突":731,"票":889,"神":3459,"區，多":1245,"社":4421,"示":1588,"移":1020,"私":736,"科":10232,"福":3334,"破":649,"要":8322,"規":2224,"視":4488,"親":1395,"觀":2300,"角":3003,"解":2370,"西":17796,"被":7902,"製":3213," 萬":1397,"衛":1641,"街":2230,"術":4294,"行":14995,"表":5436,"變":2770,"譯":3201,"警":737,"議":3043,"護":1664,"證":1490,"調":1479,"說":3940,"語":12000,"認":2740,"論":3370," 號":1554,"設":6262,"記":2497,"計":5475,"訊":1564,"言":2973,"該":4761,"話":1875,"評":1093,"路":11367,"越":2102,"超":1905,"足":2739,"起":4291,"賓":786,"資":4396,"賽":6003,"質":2688,"費":1408,"貨":906,"責":1207,"象":1864,"谷":1752,"近":3107,"辦":2818,"農":1197,"轉":1925,"較":2015,"載":1405,"車":6562,"身":2869,"致":1208,"至":9837,"自":8751,"而":9484,"者":7426,"聞":13745,"聖":2663," 米":6617,"聯":14082,"聲":1069,"肅":1122,"股":1263,"育":2954,"能":5599,"草":3974,"菲":888,"華":6888,"般":2564,"航":2033,"興":1795,"舉":3075,"與":14328,"艦":940,"艾":758,"色":3577,"花":3113,"英":9681,"藝":2269,"藥":973,"葉":1953,"著":4637,"萬":2757,"落":1486,"蒂":673,"處":4121,"號":5640,"蘭":5557,"蘇":3088," 英":759,"風":2494,"食":1389,"飛":1667,"領":2927,"預":955,"項":2370,"類":5147,"馬":6983,"香":9518,"館":2367,"體":9444,"高":8889," 阿":586," 香":621,"點":3460,"黑":1831,"黃":2289,"黨":2390,"龍":3877,"連":2837,"造":2926,"進":5235,"送":620,"這":6954,"通":7591,"速":2179,"遺":895,"選":3181,"過":6349,"運":6466,"遊":3024,"達":4169,"還":1604,"邊":2782,"郡":912,"部":14529,"都":5256,"配":1052,"醫":1928,"金":5699,"野":1096,"量":3732,"里":6615,"重":5591,"銀":1423,"錄":1696,"鎮":2372,"鐵":4592,"間":8271,"開":7281,"門":4672,"降":626,"限":2369,"院":5168,"除":1568,"陳":1215,"陸":5523,"陽":2042,"防":1167,"阿":4360,"離":1505,"難":627,"電":10735,"雲":2700,"隊":3986,"際":4571,"青":2396,"非":3371,"面":6432,"需":1196,"響":1404,"音":4823,"가":589,"）":53309,"（":53597,"－":1381,"，":211682,"：":15729,"；":5850," （":1546," ）":2103," ，":4859,"車站，":720,"國的特":944,"植物。":2875,"植物，":1199,"基丁教":662,"等地，":2994,"民共和":1261,"。 ":3480,"、 ":1427,"」 ":591,"》 ":624,"國大陸":3816,"地區，":3519,"。這":1771,"、雲":1235,"」的":1264,"、福":680,"。現":601,"、甘":835,"。由":929,"、西":1055,"。該":1503,"四川、":1228,"、廣":1717,"、山":2030,"、安":622,"。它":1379,"、四":1327,"。在":1916,"、台":622,"。分":2945,"、勞":792,"、印":867,"、倫":1945,"。其":1173,"、湖":1562,"、河":1284,"、江":1104,"。此":759,"》是":802,"、日":634,"ああ":1707,"、《":1196,"、「":974,"》、":1063,"」、":966,"」。":2150,"》中":594,"「丕":727,"「丞":592,"《丈":880,"、休":613,"。他":1366,"「丈":1131,"《丕":588,"。乘":718,"、人":863,"、丞":3044,"。丁":930,"、丕":3444,"、不":1334,"、丁":2563,"、丈":4244,"、乘":1329,"。丞":586,"。丕":1165,"、中":959,"。丈":1017,"アア":1643,"地區的":687,"」（":1271,"」，":3388,"」）":800,"》（":2216,"》，":1818,"在中國":1179,"立於 ":1061,"栽培。":2896,"是香港":1720,"休 ":665,"使 ":756,"不、":2094,"不。":739,"丈」":981,"丈》":585,"丈、":2906,"丈。":2027,"丁、":2136,"丁。":1921,"一。":1804,"丞、":1947,"丞。":1850,"丕」":693,"丕。":1789,"丕、":2245,"中、":800,"人。":1182,"事。":826,"交。":660,"亞、":852,"倫 ":745,"丈山":729,"丈屬":937,"不家":1629,"丈小":1088,"丈家":1181,"丁屬":1243,"中國":13565,"丈學":945,"丈子":859,"一家":831,"丕大":655,"乘倫":1335,"丈大":605,"丕地":870,"丁大":588,"乘丁":705,"乘一":736,"佐。":1221,"乘丈":663,"佐、":761,"丈地":748,"丞和":584,"之一":4708,"丁國":1032,"丞勞":850,"丕勞":1045,"丈和":615,"不同":2051,"不勞":723,"中使":2667,"不區":1235,"中一":646,"丞使":1157,"丞倫":1278,"丈勞":1514,"丞人":604,"丈克":1050,"丈其":690,"丕使":877,"一勞":758,"丞佐":1297,"不公":954,"丁勞":1850,"並丁":839,"丕倫":1815,"丞休":1067,"丈倫":1781,"不使":1931,"休。":1287,"休、":1097,"丁克":614,"丞丈":3784,"丞不":839,"丞丁":2627,"丞丕":3712,"丕休":1238,"丞丞":2873,"丕佐":865,"不交":3733,"不京":1465,"不事":953,"丈休":1744,"一個":9748,"丈使":1114,"丈佐":1124,"丁倫":2045,"丕丞":2819,"丕丕":4439,"不休":656,"丕不":779,"丕丈":4929,"丕丁":2910,"丕乘":689,"丈不":986,"丈丈":10533,"丈丕":5787,"丈丞":4591,"丁休":2124,"丈之":678,"上不":1486,"一位":1048,"丈乘":1046,"丁佐":630,"丈事":641,"丁使":3115,"不丈":1385,"不丁":1326,"不丕":1047,"不不":1140,"不丞":1080,"一丈":689,"丁丕":3008,"丁丞":3158,"丁丈":5062,"丁丁":3771,"丁不":1124,"丁乘":842,"一些":1051,"份、":614,"丈一":1125,"丈丁":5951,"丕樂":705,"主教":1120,"任何":653,"丞業":742,"中文":1114,"人口":1812,"一次":950,"丈林":808,"一條":1013,"之後":1287,"種栽培":2894,"丕於":989,"他們":991,"事場":701,"丞於":627,"丈於":1045,"人使":893,"丁是":707,"丈是":753,"不拔":3356,"二十":705,"人丁":915,"人丈":695,"主席":690,"丁教":802,"丁斯":911,"丁於":991,"丁文":610,"一所":718,"交休":759,"乘大":1193,"事務":628,"不式":1114,"中學":1267,"交不":718,"使。":1730,"使、":1391,"丁店":631,"一座":888,"中央":1281,"丕小":677,"丕屬":955,"事交":1241,"丞家":1096,"丞寫":642,"佐勞":651,"中的":2868,"休如":836,"丈科":1482,"一種":4747,"丞的":2226,"份年":660,"使休":786,"使代":599,"使中":614,"二次":594,"使丈":1261,"使上":902,"使丁":966,"使丞":904,"使丕":680,"下的":914,"世界":3842,"上的":1685,"丈的":3553,"乘江":1087,"佐丞":598,"佐丕":890,"丁的":2951,"佐不":741,"佐丈":1040,"佐丁":1316,"不生":995,"交於":631,"丕的":2516,"不的":1824,"倫。":904,"倫、":1101,"丈爾":927,"丕然":602,"丁爾":1156,"也有":714,"不現":1191,"人工":3084,"丈球":618,"休丈":1588,"休丁":1289,"休丕":1088,"休丞":1470,"以及":5705,"不治":1653,"休倫":752,"也是":2445,"乘斯":612,"休佐":739,"休休":1180,"休使":949,"丁語":647,"交的":662,"何成":648,"丕西":1266,"但是":741,"人的":1304,"丕語":843,"主義":1789,"丁蘭":617,"事的":883,"人物":959,"企業":965,"丁西":590,"中華":3016,"一般":2431,"不聯":718,"人民":2356,"丈至":635,"丈草":589,"使團":1465,"前 ":869,"作家":983,"作品":1437,"使倫":938,"世紀":2133,"亞洲":978,"丁聯":1080,"乘的":932,"到 ":1125,"丞 ":1327,"丕 ":1248,"稱於「":1031,"丈 ":1426,"丈·":665,"丁 ":1214,"丁·":957,"地、":843,"商業":792,"國、":913,"倫，":2201,"又譯":655,"問宮":915,"單位":908,"員會":1240,"可能":966,"南部":1208,"名稱":1366,"參與":743,"各種":710,"名的":1486,"同的":917,"又稱":2281,"使（":775,"使，":3019,"位，":692,"佐，":2446,"台灣":5826,"何，":895,"作，":878,"只有":589,"南等":606,"休（":898,"休，":2604,"同時":1361,"和國":2079,"名於":1540,"在 ":4228,"命名":845,"和丕":658,"和丞":617,"區的":1565,"創辦":644,"和丈":1045,"和丁":617,"前身":709,"交，":1096,"人，":2364,"反應":687,"分類":693,"化的":604,"事，":1473,"勞立":1122,"國王":742,"城事":1733,"因此":1472,"國民":1001,"基丁":1219,"基丞":778,"因於":1311,"多生聞":623,"分，":730,"國大":4188,"四川":1895,"地區":6934,"國國":1282,"在台":591,"國家":4180," 公里":1210,"國丁":723,"國丈":683,"國不":1346,"國人":979,"內，":588,"在丕":590,"在不":766,"在丈":1169,"在丁":857,"地不":1518,"在中":1499,"培。":2897,"保護":897,"公園":1063,"全國":1075,"共和":2094,"共同":713,"公司":6083,"和 ":817,"分丁":734,"勞。":848,"勞、":713,"使蘭":928,"佐責":846,"倫爾":803,"及 ":587,"來自":904,"人類":938,"其他":1807,"公共":721,"倫用":717,"內信":886,"其中":1932,"兩個":962,"倫的":1758,"俄羅":1193,"內丁":817,"交通":965,"代表":1917,"使用":3276,"使理":2623,"來的":817,"使的":2298,"何的":820,"佐的":1260,"作用":690,"之間":1600,"倫州":1179,"作的":823,"何能":735,"倫有":1307,"九龍":820,"倫斯":769,"使究":2861,"倫何":704,"不部":1510,"倫休":862,"代的":605,"倫倫":983,"倫克":676,"使於":674,"使本":654,"位於":6989,"個人":1069,"丁近":678,"作於":1594,"丈車":636,"主要":4266,"一部":1589,"他的":940,"倫丈":1719,"倫丁":1808,"倫丞":1219,"倫丕":1126,"倫中":896,"信息":690,"丈體":712,"倫多":1308,"倫勞":1159,"休畫":658,"人聞":943,"休的":1677,"不面":1013,"及丈":747,"及中":2170,"丕（":1556,"丕，":3869,"下，":946,"上，":1460,"丈（":2498,"丈，":5085,"半島":716,"不，":1457,"一，":2375,"丁）":1026,"丁，":3944,"丁（":1688,"包括":3326,"參加":686,"員。":603,"中，":3370,"及其":661,"創立":812,"原名":722,"丞，":4229,"丞（":1428,"乘，":704,"動物":1355,"又名":874,"勞灣":831,"印度":2180,"可以":2687,"台不":1162,"勞的":1695,"協會":1036,"名使":755,"動畫":650,"合作":595,"包含":733,"南不":932,"公路":1194,"化學":839,"於香港":1347,"利用":694," 世紀":1443,"區域":760,"公里":2070,"內的":698,"倫體":1121,"前尚":2709,"分於":856,"勞丞":1027,"勞休":691,"勞任":888,"勞不":751,"勞丈":1379,"勞丁":1206,"勞丕":1033,"勞倫":1202,"勞佐":918,"名。":909,"分別":960,"利亞":1460,"傳統":899,"共有":696,"具有":1054,"區。":805,"區、":855,"南、":2848,"創作":724,"分布":3537,"全球":780,"加乘":907,"場，":657,"州事":599,"工作":1351,"巴乘":819,"學院":1919,"屬的":3350,"度、":646,"帝國":1099,"工引":2892,"平不":1295,"山谷":592,"年代":1818,"布在":1588,"山西":699,"小都":651,"實際":604,"建、":593,"小說":1488,"巴使":882,"希丕":1033,"尚未":2823,"定的":781,"州、":1207,"地，":4121,"川、":1254,"、雲南":1181,"對於":1252,"它的":613,"學的":914,"山坡":1674,"國，":702,"學生":932,"小的":1375,"山東":747,"屬於":2028,"德·":729,"定義":598,"媒體":593,"就是":946,"家的":1003," 年 ":13455,"形式":845,"形成":781,"德國":1532,"後來":987,"廣播":596,"廣東":1877,"建於":727,"年的":1359,"建立":1105,"引種":2905,"年至":809,"拔 ":3138,"廣乘":652,"外，":1036,"工業":864,"布於":1775,"建佐":1764,"年在":773,"廣州":765,"工程":1106,"已經":809,"常生":722,"廣場":674,"團體":692,"大學":4750,"國際":3079,"名：":4017,"名，":805,"大戰":639,"司，":903,"司（":707,"太平":715,"委員":1628,"多生":1260,"動，":770,"勞，":1822,"國立":626,"國的":3067,"基本":664,"境內":659,"區，":4922,"大利":1318,"大使":1011,"家 ":671,"天休":592,"天主":779,"大丈":858,"大丕":898,"學中":593,"大陸":4384,"學使":615,"安丁":874,"存在":831,"學名":3781,"家丈":660,"家丁":836,"學家":1871,"島、":590,"年 ":13925,"小丕":726,"小丈":935,"家國":1674,"專倫":637,"對係":1024,"尼亞":1180,"學校":1548,"它是":687,"家律":639,"宗教":606,"山丞":856,"大的":1820,"品，":659,"學、":1095,"未由人":2697,"員，":1301,"太空":651,"家、":1074,"家。":1302,"大部":622,"東不":1591,"是由":1946,"有對":649,"於美":890,"最大":1529,"成，":1008,"最早":863,"是美":1276,"日至":621,"最後":917,"時的":620,"朝宮":1146,"林中":937,"林下":865,"林丈":744,"林丕":654,"東南":851,"是於":666,"是日":750,"最丈":682,"教育":1764,"斯特":828,"是指":1896,"服務":1801," 日 ":1059,"時期":1763,"有一":1032,"有丈":631,"栽培":2950,"時間":1470," 月 ":10453,"期的":813,"未由":2698,"有的":742,"東省":644,"是香":1731," 年，":2603," 年）":2399,"有植":939,"有時":596,"會的":752,"於香":1366,"故事":841,"後，":2045,"教丁":618,"教休":765,"文使":808,"、湖不":767,"斯丁":773,"文化":2109,"、江西":640,"於丁":1931,"於一":1279,"於丈":3104,"於不":3971,"於丕":2015,"於丞":1568,"於中":3228,"於主":1138,"式，":713,"於「":3160,"是「":751,"是台":621,"時代":1179,"是在":1454,"東、":1223,"於是":782,"於日":1024,"早期":592,"日本":5548,"於倫":790,"、湖南":720,"名：）":3240,"於了":898,"於人":919,"斯坦":674,"文學":1074,"於台":1510,"數學":761,"於勞":777,"數據":647,"於山":668,"是位":626,"是丕":1220,"是丞":853,"是中":2852,"日在":676,"是丁":1400,"是一":8457,"是不":724,"是丈":2046,"教會":604,"是 ":1586,"推不":777," 年的":915,"或稱":642,"成立":2635,"戰爭":1266,"成的":1247,"日 ":1136,"於 ":12568,"斯·":1030,"所有":1063,"應用":957,"拉丁":1113,"成於":2501,"投資":589,"月 ":10505,"有 ":1922,"技術":1647,"年（":1135,"年）":2513,"年，":3020," 年至":781,"提不":748,"提供":1573,"控制":697,"或者":865,"宮，":689,"家（":883,"家，":2956,"學（":695,"學，":1273,"子，":930,"成。":673,"年間":605,"廣西":1228," 年在":693,"屬（":1280," 年代":1445," 平不":1026,"成員":1192,"所以":843,"成何":668,"影響":1080,"小，":1059,"委員會":1211,"流行":628,"業，":586," 日）":775," 日，":1367,"源於":682,"爾·":762,"沒有":1285,"活動":1238,"比賽":1111,"江蘇":807,"江西":972,"湖不":1117,"湖南":1294,"江、":1010,"斯（":607,"文：":2388,"機構":1217,"死對":1361,"、廣東":743,"日，":1544,"日）":813,"歷使":2356," 日至":616,"民國":1569,"歐洲":1588,"民主":899,"民共":1266,"正式":1274,"時，":1278,"河南":853,"月，":734,"、廣西":861,"會，":853,"會（":980,"民族":1066,"期，":613,"河不":810,"根據":1260,"會議":766,"最高":966,"期間":1013,"有限":1519,"標佐":1052,"植物":4908," 是一":681,"概念":627," 日在":656,"的電":999,"稱「":1043,"的西":633,"發行":1127,"的重":753,"第 ":1807,"的發":596,"的第":1621,"物，":2112,"目的":1040,"的是":1255,"的最":739,"的植":2894,"直接":612,"發現":996,"的特":1537,"積 ":785,"發生":1316,"的對":648,"的小":1527,"的家":632,"發展":2338,"的大":1229,"的國":1195,"的地":4227,"目前":4386,"的丁":3310,"的一":9461,"用的":1238,"生的":842,"產生":949,"生產":964,"球隊":888,"生物":1143,"生聞":4477,"甘肅":1049,"的名":796,"的勞":1368,"的倫":1653,"的休":1719,"的使":1916,"的佐":1046,"的作":651,"的主":1361,"的中":1197,"的丕":3244,"的丞":2994,"的丈":5094,"的不":2456,"的乘":747,"的人":1899,"的交":1021,"生於":2502,"用於":1261,"由於":1686,"理論":1080,"當時":1405,"生活":843,"生在":746,"產品":771,"的。":900,"的《":764,"的「":1273,"環境":892,"由人":2997,"用來":777,"理學":1070,"現在":1079,"王朝":657,"現代":897,"獲得":1128,"的 ":1662,"用。":731,"班丁":801,"物理":701,"特有":1087,"特別":1033,"澳門":921,"、福建":587,"物。":3616,"聯合":1423,"而成":666,"聞在":1114,"等，":840,"聞於":3447,"肅、":797,"網路":1657,"總統":750,"稱，":608,"種，":743,"美國":6254,"有限公":1342,"義大":928,"群島":661,"羅斯":1391,"聯事":701,"美洲":846,"站，":1417,"站（":605,"聯休":1628,"立，":947,"經濟":1788,"統的":590,"經營":801,"、甘肅":803,"線的":712,"至 ":5663,"自 ":699,"紀念":700,"簡稱":3399,"組成":1456,"。由於":638,"聞 ":661,"米的":3024,"結構":812,"米至":2731,"系統":2899,"統治":615,"目，":621,"結丁":586,"組佐":2037,"約丈":598,"系使":1485,"節目":923,"立於":1349,"等地":3334,"的，":1232,"天主教":743,"立的":1251,"生，":616,"用，":772,"程式":650,"科技":622,"約 ":1720,"第三":1132,"第一":3441,"第二":2120,"稱於":4215,"種栽":2894,"科丈":1047,"等。":1259,"科學":1716,"福建":1133,"社會":2251,"站。":627,"角色":606,"計算":1063,"設立":673,"西部":622,"製造":737,"認於":1497,"西班":773,"要的":1104,"被稱":1048,"計劃":875,"越南":953,"資料":957,"賽事":631,"語言":1720,"設計":1777,"號，":798,"處，":614,"譯於":650,"超過":651,"路線":849,"足球":1703,"資訊":664,"行，":1162,"醫學":647,"進行":2272,"都是":704,"過程":791,"這種":792,"部分":2342,"部份":776,"遊戲":1924,"連接":662,"通常":1653,"運動":2496,"語：":3929,"說，":690,"車站":2410,"這個":1488,"這些":923,"於日本":917,"自治":728,"自然":792,"英國":2374,"自由":877,"般生":678,"線，":642,"航空":1143,"草地":752," 米的":2958,"自丁":786,"羅馬":1149,"與丈":666," 米至":2728,"米，":753,"聯賽":916,"華民":1238,"英語":2016,"者，":759,"舉行":1293,"英文":2836,"舉辦":651,"華人":1471,"著名":1732,"蘇聯":599,"處理":911,"藝術":1217,"行。":771,"聞，":628,"行於":853,"製作":1162,"西南":747,"行的":1255,"西份":1005,"西亞":1235,"西不":1211,"行不":2204,"語 ":675,"西、":2735,"中，目":813,"是位於":586,"間，":1121,"是一種":2255,"香港":8631,"高丁":607,"體的":647,"隊，":588,"高速":865,"體育":810,"及中國":2028,"公里，":1100,"體，":795,"是一個":2235,"丈（學":765,"是中國":2143,"於台灣":1145,"選舉":733,"通過":1270,"部的":1066,"賽，":689,"路，":930,"重要":1746,"銀行":879,"鐵家":726,"的第一":601,"重要的":694,"開始":2056,"部，":1123,"除了":587,"限公":1343,"鐵路":1902,"電勞":1154,"雲南":1736,"阿拉":770,"間的":1117,"開發":1451,"陸的":3080,"里，":1236,"電影":2042,"阿爾":603,"電子":1071,"面積":1525,"非洲":681,"領域":708,"項目":726,"電視":2609,"音樂":1761,"需要":625,"馬來":712,"有植物":938,"的特有":970,"是美國":1183,"的植物":2889,"日至 ":606,"前尚未":2703,"最大的":952,"於美國":799,"加乘大":716,"是日本":699,"目前尚":2707,"的地區":3107,"生聞在":1091,"生聞於":3237,"甘肅、":794,"的一部":774,"的主要":668,"的一種":1474,"年），":737,"的一個":3094,"限公司":1342,"年（ ":710,"於不拔":2919,"於中國":2310," ，是":653,"南等地":586,"），":14128,"（）":3756,"（，":1173,"，）":1558,"：，":731,"：）":4068,"由人工":2898,"，東":861,"，有":1487,"，最":1123,"，曾":1063,"，是":13163,"）是":10839,"，於":6392,"，此":660,"，後":1253,"，從":882,"，常":1047,"，故":644,"）於":2446,"，指":584,"，所":993,"，或":1331,"，成":1293,"）的":2203,"，目":3631,"，簡":2031,"，第":778,"，現":1338,"，由":3905,"，用":642,"，生":3524,"，當":880,"，西":834,"，被":860,"，該":1017,"，經":598,"，總":744,"，美":597,"，而":3222,"，英":973,"，與":1255,"（英":2412,"，香":721,"，通":885,"，這":1582,"，「":615,"，《":690,"）。":3711,"）、":2876,"（學":3641,"，它":1379,"，屬":841,"，小":787,"，對":650,"，因":2415,"，在":4525,"，多":1566,"，大":803,"，如":966,"，前":662,"，分":861,"，南":727,"，即":1028,"，勞":1219,"，包":1288,"）和":952,"，同":897,"，台":652,"，可":1102,"，又":2944,"，原":1271,"，不":2771,"，丕":3583,"，丞":2297,"，並":3993,"，中":1698,"，主":1659,"（今":666,"，乘":1385,"，也":3255,"（丈":751,"，丈":5090,"，丁":3996,"，一":2348,"，休":1371,"，但":3353,"，位":2751,"，佐":784,"，使":1949,"，交":834,"，亦":1195,"，人":871,"，以":3497,"，他":1340,"，份":756,"，倫":1535,"，共":754,"，其":3556,"，全":1257,"（ ":3933,"） ":996,"， ":7569,"－ ":815,"雲南、":1086,"丁屬（":787,"華民國":1215,"廣西、":800,"中國的":1325,"中國大":3838,"之一，":2265,"英語：":1339,"不拔 ":3137,"引種栽":2892,"丈屬的":703,"人工引":2892," ），":596,"乘江、":593,"特有植":935,"以及中":1990,"一般生":678,"中華人":1156,"英文：":1510,"丕西、":804,"中華民":1237,"廣東、":761,"華人民":1156,"人民共":1261,"：）於":1432,"：）是":1898,"），又":978,"），是":2411,"（）是":1113,"，），":1004,"著名的":780,"（），":1524,"位於香":588,"年至 ":713,"面積 ":770,"倫州、":844,"之間的":628,"成立於":922,"內丁使":611,"俄羅斯":1193,"，香港":708,"物。分":2798,"公司（":691,"公司，":864,"行不區":1022,"共和國":1943,"分布於":1717,"分布在":1578,"》、《":944,"」、「":864,"，於中":674,"）是丈":645,"）是一":2670,"，是一":2178,"，是中":1286,"湖南、":689,"あああ":1321,"尚未由":2697,"地，生":2880,"，常生":667,"，於 ":2197,"，所以":698,"，又稱":1386,"，因此":1151,"，多生":1249,"聞於不":2897,"（學名":3634,"。分布":2903,"、印度":793,"、乘江":625,"、丕西":798,"、倫州":850,"、山坡":668,"被稱於":856,"、四川":1250,"大陸的":3064,"アアア":1275,"學名：":3509,"（英語":1020,"（英文":1336,"義大利":914,"，目前":3420,"西班丁":773,"，生聞":2926,"，簡稱":1929,"平不公":818,"工引種":2892,"，包括":1128,"米的地":2889,"，又名":585,"，在 ":642,"，其中":943,"，以及":1003,"，也是":1481,"，一般":1212,"，主要":1207,"，位於":2550,"，並丁":649,"，中國":911,"屬的植":2808,"丁使、":633,"丈丈丁":628,"江西、":618,"湖不、":763,"不同的":723,"米至 ":2729,"之一。":1752,"不公里":761,"丈丈，":616},"n_words":[4924775,1867501,309785],"name":"zh-tw"}
+\ No newline at end of file
diff --git a/nlp_resource_data/langdetect/utils/__init__.py b/nlp_resource_data/langdetect/utils/__init__.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/utils/__init__.pyc b/nlp_resource_data/langdetect/utils/__init__.pyc

deleted file mode 100755 (executable)

index 565c461..0000000

Binary files a/nlp_resource_data/langdetect/utils/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/utils/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/langdetect/utils/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0e5818c

Binary files /dev/null and b/nlp_resource_data/langdetect/utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/utils/__pycache__/lang_profile.cpython-37.pyc b/nlp_resource_data/langdetect/utils/__pycache__/lang_profile.cpython-37.pyc

new file mode 100644 (file)

index 0000000..733e25a

Binary files /dev/null and b/nlp_resource_data/langdetect/utils/__pycache__/lang_profile.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/utils/__pycache__/messages.cpython-37.pyc b/nlp_resource_data/langdetect/utils/__pycache__/messages.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8830b2d

Binary files /dev/null and b/nlp_resource_data/langdetect/utils/__pycache__/messages.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/utils/__pycache__/ngram.cpython-37.pyc b/nlp_resource_data/langdetect/utils/__pycache__/ngram.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e65f3e4

Binary files /dev/null and b/nlp_resource_data/langdetect/utils/__pycache__/ngram.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/utils/__pycache__/unicode_block.cpython-37.pyc b/nlp_resource_data/langdetect/utils/__pycache__/unicode_block.cpython-37.pyc

new file mode 100644 (file)

index 0000000..51dee4b

Binary files /dev/null and b/nlp_resource_data/langdetect/utils/__pycache__/unicode_block.cpython-37.pyc differ
diff --git a/nlp_resource_data/langdetect/utils/lang_profile.py b/nlp_resource_data/langdetect/utils/lang_profile.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/utils/lang_profile.pyc b/nlp_resource_data/langdetect/utils/lang_profile.pyc

deleted file mode 100755 (executable)

index 1344115..0000000

Binary files a/nlp_resource_data/langdetect/utils/lang_profile.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/utils/messages.py b/nlp_resource_data/langdetect/utils/messages.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/utils/messages.pyc b/nlp_resource_data/langdetect/utils/messages.pyc

deleted file mode 100755 (executable)

index 1601f17..0000000

Binary files a/nlp_resource_data/langdetect/utils/messages.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/utils/ngram.py b/nlp_resource_data/langdetect/utils/ngram.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/utils/ngram.pyc b/nlp_resource_data/langdetect/utils/ngram.pyc

deleted file mode 100755 (executable)

index ac43ed1..0000000

Binary files a/nlp_resource_data/langdetect/utils/ngram.pyc and /dev/null differ
diff --git a/nlp_resource_data/langdetect/utils/unicode_block.py b/nlp_resource_data/langdetect/utils/unicode_block.py

old mode 100755 (executable)

new mode 100644 (file)
diff --git a/nlp_resource_data/langdetect/utils/unicode_block.pyc b/nlp_resource_data/langdetect/utils/unicode_block.pyc

deleted file mode 100755 (executable)

index fca22dc..0000000

Binary files a/nlp_resource_data/langdetect/utils/unicode_block.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/VERSION b/nlp_resource_data/nltk/VERSION

old mode 100755 (executable)

new mode 100644 (file)

index 5ae69bd..5a95802
--- a/nlp_resource_data/nltk/VERSION
+++ b/nlp_resource_data/nltk/VERSION
@@ -1 +1 @@
-3.2.5
+3.5
diff --git a/nlp_resource_data/nltk/__init__.py b/nlp_resource_data/nltk/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index c47f6be..32833cb
--- a/nlp_resource_data/nltk/__init__.py
+++ b/nlp_resource_data/nltk/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit (NLTK)
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Steven Bird <stevenbird1@gmail.com>
  #          Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -15,7 +15,6 @@ Steven Bird, Ewan Klein, and Edward Loper (2009).
  Natural Language Processing with Python.  O'Reilly Media Inc.
  http://nltk.org/book
  """
-from __future__ import print_function, absolute_import
  
  import os
  
@@ -27,21 +26,21 @@ import os
  # in the file VERSION.
  try:
      # If a VERSION file exists, use it!
-    version_file = os.path.join(os.path.dirname(__file__), 'VERSION')
-    with open(version_file, 'r') as infile:
+    version_file = os.path.join(os.path.dirname(__file__), "VERSION")
+    with open(version_file, "r") as infile:
          __version__ = infile.read().strip()
  except NameError:
-    __version__ = 'unknown (running code interactively?)'
+    __version__ = "unknown (running code interactively?)"
  except IOError as ex:
      __version__ = "unknown (%s)" % ex
  
  if __doc__ is not None:  # fix for the ``python -OO``
-    __doc__ += '\n@version: ' + __version__
+    __doc__ += "\n@version: " + __version__
  
  
  # Copyright notice
  __copyright__ = """\
-Copyright (C) 2001-2017 NLTK Project.
+Copyright (C) 2001-2020 NLTK Project.
  
  Distributed and Licensed under the Apache License, Version 2.0,
  which is included by reference.
@@ -52,10 +51,20 @@ __license__ = "Apache License, Version 2.0"
  __longdescr__ = """\
  The Natural Language Toolkit (NLTK) is a Python package for
  natural language processing.  NLTK requires Python 2.6 or higher."""
-__keywords__ = ['NLP', 'CL', 'natural language processing',
-                'computational linguistics', 'parsing', 'tagging',
-                'tokenizing', 'syntax', 'linguistics', 'language',
-                'natural language', 'text analytics']
+__keywords__ = [
+    "NLP",
+    "CL",
+    "natural language processing",
+    "computational linguistics",
+    "parsing",
+    "tagging",
+    "tokenizing",
+    "syntax",
+    "linguistics",
+    "language",
+    "natural language",
+    "text analytics",
+]
  __url__ = "http://nltk.org/"
  
  # Maintainer, contributors, etc.
@@ -66,24 +75,24 @@ __author_email__ = __maintainer_email__
  
  # "Trove" classifiers for Python Package Index.
  __classifiers__ = [
-    'Development Status :: 5 - Production/Stable',
-    'Intended Audience :: Developers',
-    'Intended Audience :: Education',
-    'Intended Audience :: Information Technology',
-    'Intended Audience :: Science/Research',
-    'License :: OSI Approved :: Apache Software License',
-    'Operating System :: OS Independent',
-    'Programming Language :: Python :: 2.6',
-    'Programming Language :: Python :: 2.7',
-    'Topic :: Scientific/Engineering',
-    'Topic :: Scientific/Engineering :: Artificial Intelligence',
-    'Topic :: Scientific/Engineering :: Human Machine Interfaces',
-    'Topic :: Scientific/Engineering :: Information Analysis',
-    'Topic :: Text Processing',
-    'Topic :: Text Processing :: Filters',
-    'Topic :: Text Processing :: General',
-    'Topic :: Text Processing :: Indexing',
-    'Topic :: Text Processing :: Linguistic',
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Education",
+    "Intended Audience :: Information Technology",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 2.6",
+    "Programming Language :: Python :: 2.7",
+    "Topic :: Scientific/Engineering",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Scientific/Engineering :: Human Machine Interfaces",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Text Processing",
+    "Topic :: Text Processing :: Filters",
+    "Topic :: Text Processing :: General",
+    "Topic :: Text Processing :: Indexing",
+    "Topic :: Text Processing :: Linguistic",
  ]
  
  from nltk.internals import config_java
@@ -96,13 +105,18 @@ except ImportError:
  
  # Override missing methods on environments where it cannot be used like GAE.
  import subprocess
-if not hasattr(subprocess, 'PIPE'):
+
+if not hasattr(subprocess, "PIPE"):
+
      def _fake_PIPE(*args, **kwargs):
-        raise NotImplementedError('subprocess.PIPE is not supported.')
+        raise NotImplementedError("subprocess.PIPE is not supported.")
+
      subprocess.PIPE = _fake_PIPE
-if not hasattr(subprocess, 'Popen'):
+if not hasattr(subprocess, "Popen"):
+
      def _fake_Popen(*args, **kwargs):
-        raise NotImplementedError('subprocess.Popen is not supported.')
+        raise NotImplementedError("subprocess.Popen is not supported.")
+
      subprocess.Popen = _fake_Popen
  
  ###########################################################
@@ -142,11 +156,12 @@ from nltk.stem import *
  #     that can safely fail at run time
  
  from nltk import lazyimport
-app = lazyimport.LazyModule('nltk.app', locals(), globals())
-chat = lazyimport.LazyModule('nltk.chat', locals(), globals())
-corpus = lazyimport.LazyModule('nltk.corpus', locals(), globals())
-draw = lazyimport.LazyModule('nltk.draw', locals(), globals())
-toolbox = lazyimport.LazyModule('nltk.toolbox', locals(), globals())
+
+app = lazyimport.LazyModule("nltk.app", locals(), globals())
+chat = lazyimport.LazyModule("nltk.chat", locals(), globals())
+corpus = lazyimport.LazyModule("nltk.corpus", locals(), globals())
+draw = lazyimport.LazyModule("nltk.draw", locals(), globals())
+toolbox = lazyimport.LazyModule("nltk.toolbox", locals(), globals())
  
  # Optional loading
  
@@ -158,8 +173,9 @@ else:
      from nltk import cluster
  
  from nltk.downloader import download, download_shell
+
  try:
-    from six.moves import tkinter
+    import tkinter
  except ImportError:
      pass
  else:
@@ -167,8 +183,11 @@ else:
          from nltk.downloader import download_gui
      except RuntimeError as e:
          import warnings
-        warnings.warn("Corpus downloader GUI not loaded "
-                      "(RuntimeError during import: %s)" % str(e))
+
+        warnings.warn(
+            "Corpus downloader GUI not loaded "
+            "(RuntimeError during import: %s)" % str(e)
+        )
  
  # explicitly import all top-level modules (ensuring
  # they override the same names inadvertently imported
@@ -180,6 +199,6 @@ from nltk import misc, parse, probability, sem, stem, wsd
  from nltk import tag, tbl, text, tokenize, translate, tree, treetransforms, util
  
  
-# override any accidentally imported demo
+# FIXME:  override any accidentally imported demo, see https://github.com/nltk/nltk/issues/2116
  def demo():
      print("To run the demo code for a module, type nltk.module.demo()")
diff --git a/nlp_resource_data/nltk/__init__.pyc b/nlp_resource_data/nltk/__init__.pyc

deleted file mode 100755 (executable)

index f2f789e..0000000

Binary files a/nlp_resource_data/nltk/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..625f133

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/book.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/book.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7eb915b

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/book.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/cli.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/cli.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0848537

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/cli.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/collections.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/collections.cpython-37.pyc

new file mode 100644 (file)

index 0000000..54cc532

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/collections.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/collocations.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/collocations.cpython-37.pyc

new file mode 100644 (file)

index 0000000..85a13f8

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/collocations.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/compat.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/compat.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a8163a3

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/compat.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/data.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/data.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a35d1c7

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/data.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/decorators.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/decorators.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0592dc3

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/decorators.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/downloader.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/downloader.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5646828

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/downloader.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/featstruct.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/featstruct.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9f2b636

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/featstruct.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/grammar.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/grammar.cpython-37.pyc

new file mode 100644 (file)

index 0000000..22d6037

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/grammar.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/help.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/help.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7f307fd

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/help.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/internals.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/internals.cpython-37.pyc

new file mode 100644 (file)

index 0000000..af20c90

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/internals.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/jsontags.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/jsontags.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7cecde8

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/jsontags.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/lazyimport.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/lazyimport.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8d4bfbd

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/lazyimport.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/probability.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/probability.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6897e4f

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/probability.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/text.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/text.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8740f67

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/text.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/tgrep.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/tgrep.cpython-37.pyc

new file mode 100644 (file)

index 0000000..84f29c7

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/tgrep.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/toolbox.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/toolbox.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d7552ab

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/toolbox.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/tree.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/tree.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c737e0a

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/tree.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/treeprettyprinter.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/treeprettyprinter.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e4a9364

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/treeprettyprinter.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/treetransforms.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/treetransforms.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9e4dd09

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/treetransforms.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3bd6059

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/__pycache__/wsd.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/wsd.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8b85d4d

Binary files /dev/null and b/nlp_resource_data/nltk/__pycache__/wsd.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__init__.py b/nlp_resource_data/nltk/app/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index b843d88..458ac4c
--- a/nlp_resource_data/nltk/app/__init__.py
+++ b/nlp_resource_data/nltk/app/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Applications package
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
@@ -22,11 +22,11 @@ wordnet:      WordNet Browser
  
  # Import Tkinter-based modules if Tkinter is installed
  try:
-    from six.moves import tkinter
+    import tkinter
  except ImportError:
      import warnings
-    warnings.warn("nltk.app package not loaded "
-                  "(please install Tkinter library).")
+
+    warnings.warn("nltk.app package not loaded " "(please install Tkinter library).")
  else:
      from nltk.app.chartparser_app import app as chartparser
      from nltk.app.chunkparser_app import app as chunkparser
@@ -41,12 +41,15 @@ else:
          from matplotlib import pylab
      except ImportError:
          import warnings
-        warnings.warn("nltk.app.wordfreq not loaded "
-                      "(requires the matplotlib library).")
+
+        warnings.warn(
+            "nltk.app.wordfreq not loaded " "(requires the matplotlib library)."
+        )
      else:
          from nltk.app.wordfreq_app import app as wordfreq
  
  # skip doctests from this package
  def setup_module(module):
      from nose import SkipTest
+
      raise SkipTest("nltk.app examples are not doctests")
diff --git a/nlp_resource_data/nltk/app/__init__.pyc b/nlp_resource_data/nltk/app/__init__.pyc

deleted file mode 100755 (executable)

index 9fa49cd..0000000

Binary files a/nlp_resource_data/nltk/app/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..cc90f65

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/chartparser_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/chartparser_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..10a3a53

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/chartparser_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/chunkparser_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/chunkparser_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c0a1d94

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/chunkparser_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/collocations_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/collocations_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ad048ca

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/collocations_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/concordance_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/concordance_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..bfb4ca5

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/concordance_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/nemo_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/nemo_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2fc5157

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/nemo_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/rdparser_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/rdparser_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7051cd5

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/rdparser_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/srparser_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/srparser_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f3c02a8

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/srparser_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0532e32

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/__pycache__/wordnet_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/wordnet_app.cpython-37.pyc

new file mode 100644 (file)

index 0000000..da15b5f

Binary files /dev/null and b/nlp_resource_data/nltk/app/__pycache__/wordnet_app.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/app/chartparser_app.py b/nlp_resource_data/nltk/app/chartparser_app.py

old mode 100755 (executable)

new mode 100644 (file)

index bc68d88..92fff32
--- a/nlp_resource_data/nltk/app/chartparser_app.py
+++ b/nlp_resource_data/nltk/app/chartparser_app.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Chart Parser Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Jean Mark Gawron <gawron@mail.sdsu.edu>
  #         Steven Bird <stevenbird1@gmail.com>
@@ -37,26 +37,48 @@ edge you wish to apply a rule to.
  # widget system.
  
  
-from __future__ import division
  import pickle
  import os.path
  
-from six.moves.tkinter import (Button, Canvas, Checkbutton, Frame, IntVar,
-                               Label, Menu, Scrollbar, Tk, Toplevel)
-from six.moves.tkinter_font import Font
-from six.moves.tkinter_messagebox import showerror, showinfo
-from six.moves.tkinter_tkfiledialog import asksaveasfilename, askopenfilename
-
-from nltk.parse.chart import (BottomUpPredictCombineRule, BottomUpPredictRule,
-                              Chart, LeafEdge, LeafInitRule, SingleEdgeFundamentalRule,
-                              SteppingChartParser, TopDownInitRule, TopDownPredictRule,
-                              TreeEdge)
+from tkinter import (
+    Button,
+    Canvas,
+    Checkbutton,
+    Frame,
+    IntVar,
+    Label,
+    Menu,
+    Scrollbar,
+    Tk,
+    Toplevel,
+)
+from tkinter.font import Font
+from tkinter.messagebox import showerror, showinfo
+from tkinter.filedialog import asksaveasfilename, askopenfilename
+
+from nltk.parse.chart import (
+    BottomUpPredictCombineRule,
+    BottomUpPredictRule,
+    Chart,
+    LeafEdge,
+    LeafInitRule,
+    SingleEdgeFundamentalRule,
+    SteppingChartParser,
+    TopDownInitRule,
+    TopDownPredictRule,
+    TreeEdge,
+)
  from nltk.tree import Tree
  from nltk.grammar import Nonterminal, CFG
  from nltk.util import in_idle
-from nltk.draw.util import (CanvasFrame, ColorizedList,
-                            EntryDialog, MutableOptionMenu,
-                            ShowText, SymbolWidget)
+from nltk.draw.util import (
+    CanvasFrame,
+    ColorizedList,
+    EntryDialog,
+    MutableOptionMenu,
+    ShowText,
+    SymbolWidget,
+)
  from nltk.draw import CFGEditor, tree_to_treesegment, TreeSegmentWidget
  
  # Known bug: ChartView doesn't handle edges generated by epsilon
@@ -66,41 +88,47 @@ from nltk.draw import CFGEditor, tree_to_treesegment, TreeSegmentWidget
  # Edge List
  #######################################################################
  
+
  class EdgeList(ColorizedList):
-    ARROW = SymbolWidget.SYMBOLS['rightarrow']
+    ARROW = SymbolWidget.SYMBOLS["rightarrow"]
  
      def _init_colortags(self, textwidget, options):
-        textwidget.tag_config('terminal', foreground='#006000')
-        textwidget.tag_config('arrow', font='symbol', underline='0')
-        textwidget.tag_config('dot', foreground = '#000000')
-        textwidget.tag_config('nonterminal', foreground='blue',
-                              font=('helvetica', -12, 'bold'))
+        textwidget.tag_config("terminal", foreground="#006000")
+        textwidget.tag_config("arrow", font="symbol", underline="0")
+        textwidget.tag_config("dot", foreground="#000000")
+        textwidget.tag_config(
+            "nonterminal", foreground="blue", font=("helvetica", -12, "bold")
+        )
  
      def _item_repr(self, item):
          contents = []
-        contents.append(('%s\t' % item.lhs(), 'nonterminal'))
-        contents.append((self.ARROW, 'arrow'))
+        contents.append(("%s\t" % item.lhs(), "nonterminal"))
+        contents.append((self.ARROW, "arrow"))
          for i, elt in enumerate(item.rhs()):
              if i == item.dot():
-                contents.append((' *', 'dot'))
+                contents.append((" *", "dot"))
              if isinstance(elt, Nonterminal):
-                contents.append((' %s' % elt.symbol(), 'nonterminal'))
+                contents.append((" %s" % elt.symbol(), "nonterminal"))
              else:
-                contents.append((' %r' % elt, 'terminal'))
+                contents.append((" %r" % elt, "terminal"))
          if item.is_complete():
-            contents.append((' *', 'dot'))
+            contents.append((" *", "dot"))
          return contents
  
+
  #######################################################################
  # Chart Matrix View
  #######################################################################
  
+
  class ChartMatrixView(object):
      """
      A view of a chart that displays the contents of the corresponding matrix.
      """
-    def __init__(self, parent, chart, toplevel=True, title='Chart Matrix',
-                 show_numedges=False):
+
+    def __init__(
+        self, parent, chart, toplevel=True, title="Chart Matrix", show_numedges=False
+    ):
          self._chart = chart
          self._cells = []
          self._marks = []
@@ -110,7 +138,7 @@ class ChartMatrixView(object):
          if toplevel:
              self._root = Toplevel(parent)
              self._root.title(title)
-            self._root.bind('<Control-q>', self.destroy)
+            self._root.bind("<Control-q>", self.destroy)
              self._init_quit(self._root)
          else:
              self._root = Frame(parent)
@@ -129,31 +157,36 @@ class ChartMatrixView(object):
          self.draw()
  
      def _init_quit(self, root):
-        quit = Button(root, text='Quit', command=self.destroy)
-        quit.pack(side='bottom', expand=0, fill='none')
+        quit = Button(root, text="Quit", command=self.destroy)
+        quit.pack(side="bottom", expand=0, fill="none")
  
      def _init_matrix(self, root):
-        cframe = Frame(root, border=2, relief='sunken')
-        cframe.pack(expand=0, fill='none', padx=1, pady=3, side='top')
-        self._canvas = Canvas(cframe, width=200, height=200,
-                                      background='white')
-        self._canvas.pack(expand=0, fill='none')
+        cframe = Frame(root, border=2, relief="sunken")
+        cframe.pack(expand=0, fill="none", padx=1, pady=3, side="top")
+        self._canvas = Canvas(cframe, width=200, height=200, background="white")
+        self._canvas.pack(expand=0, fill="none")
  
      def _init_numedges(self, root):
-        self._numedges_label = Label(root, text='0 edges')
-        self._numedges_label.pack(expand=0, fill='none', side='top')
+        self._numedges_label = Label(root, text="0 edges")
+        self._numedges_label.pack(expand=0, fill="none", side="top")
  
      def _init_list(self, root):
          self._list = EdgeList(root, [], width=20, height=5)
-        self._list.pack(side='top', expand=1, fill='both', pady=3)
-        def cb(edge, self=self): self._fire_callbacks('select', edge)
-        self._list.add_callback('select', cb)
+        self._list.pack(side="top", expand=1, fill="both", pady=3)
+
+        def cb(edge, self=self):
+            self._fire_callbacks("select", edge)
+
+        self._list.add_callback("select", cb)
          self._list.focus()
  
      def destroy(self, *e):
-        if self._root is None: return
-        try: self._root.destroy()
-        except: pass
+        if self._root is None:
+            return
+        try:
+            self._root.destroy()
+        except:
+            pass
          self._root = None
  
      def set_chart(self, chart):
@@ -163,7 +196,8 @@ class ChartMatrixView(object):
              self.draw()
  
      def update(self):
-        if self._root is None: return
+        if self._root is None:
+            return
  
          # Count the edges in each cell
          N = len(self._cells)
@@ -175,20 +209,19 @@ class ChartMatrixView(object):
          for i in range(N):
              for j in range(i, N):
                  if cell_edges[i][j] == 0:
-                    color = 'gray20'
+                    color = "gray20"
                  else:
-                    color = ('#00%02x%02x' %
-                             (min(255, 50+128*cell_edges[i][j]/10),
-                              max(0, 128-128*cell_edges[i][j]/10)))
+                    color = "#00%02x%02x" % (
+                        min(255, 50 + 128 * cell_edges[i][j] / 10),
+                        max(0, 128 - 128 * cell_edges[i][j] / 10),
+                    )
                  cell_tag = self._cells[i][j]
                  self._canvas.itemconfig(cell_tag, fill=color)
-                if (i,j) == self._selected_cell:
-                    self._canvas.itemconfig(cell_tag, outline='#00ffff',
-                                            width=3)
+                if (i, j) == self._selected_cell:
+                    self._canvas.itemconfig(cell_tag, outline="#00ffff", width=3)
                      self._canvas.tag_raise(cell_tag)
                  else:
-                    self._canvas.itemconfig(cell_tag, outline='black',
-                                            width=1)
+                    self._canvas.itemconfig(cell_tag, outline="black", width=1)
  
          # Update the edge list.
          edges = list(self._chart.select(span=self._selected_cell))
@@ -197,51 +230,58 @@ class ChartMatrixView(object):
          # Update our edge count.
          self._num_edges = self._chart.num_edges()
          if self._numedges_label is not None:
-            self._numedges_label['text'] = '%d edges' % self._num_edges
+            self._numedges_label["text"] = "%d edges" % self._num_edges
  
      def activate(self):
-        self._canvas.itemconfig('inactivebox', state='hidden')
+        self._canvas.itemconfig("inactivebox", state="hidden")
          self.update()
  
      def inactivate(self):
-        self._canvas.itemconfig('inactivebox', state='normal')
+        self._canvas.itemconfig("inactivebox", state="normal")
          self.update()
  
      def add_callback(self, event, func):
-        self._callbacks.setdefault(event,{})[func] = 1
+        self._callbacks.setdefault(event, {})[func] = 1
  
      def remove_callback(self, event, func=None):
-        if func is None: del self._callbacks[event]
+        if func is None:
+            del self._callbacks[event]
          else:
-            try: del self._callbacks[event][func]
-            except: pass
+            try:
+                del self._callbacks[event][func]
+            except:
+                pass
  
      def _fire_callbacks(self, event, *args):
-        if event not in self._callbacks: return
-        for cb_func in list(self._callbacks[event].keys()): cb_func(*args)
+        if event not in self._callbacks:
+            return
+        for cb_func in list(self._callbacks[event].keys()):
+            cb_func(*args)
  
      def select_cell(self, i, j):
-        if self._root is None: return
+        if self._root is None:
+            return
  
          # If the cell is already selected (and the chart contents
          # haven't changed), then do nothing.
-        if ((i,j) == self._selected_cell and
-            self._chart.num_edges() == self._num_edges): return
+        if (i, j) == self._selected_cell and self._chart.num_edges() == self._num_edges:
+            return
  
-        self._selected_cell = (i,j)
+        self._selected_cell = (i, j)
          self.update()
  
          # Fire the callback.
-        self._fire_callbacks('select_cell', i, j)
+        self._fire_callbacks("select_cell", i, j)
  
      def deselect_cell(self):
-        if self._root is None: return
+        if self._root is None:
+            return
          self._selected_cell = None
          self._list.set([])
          self.update()
  
      def _click_cell(self, i, j):
-        if self._selected_cell == (i,j):
+        if self._selected_cell == (i, j):
              self.deselect_cell()
          else:
              self.select_cell(i, j)
@@ -251,64 +291,95 @@ class ChartMatrixView(object):
          self._list.view(edge)
  
      def mark_edge(self, edge):
-        if self._root is None: return
+        if self._root is None:
+            return
          self.select_cell(*edge.span())
          self._list.mark(edge)
  
      def unmark_edge(self, edge=None):
-        if self._root is None: return
+        if self._root is None:
+            return
          self._list.unmark(edge)
  
      def markonly_edge(self, edge):
-        if self._root is None: return
+        if self._root is None:
+            return
          self.select_cell(*edge.span())
          self._list.markonly(edge)
  
      def draw(self):
-        if self._root is None: return
+        if self._root is None:
+            return
          LEFT_MARGIN = BOT_MARGIN = 15
          TOP_MARGIN = 5
          c = self._canvas
-        c.delete('all')
-        N = self._chart.num_leaves()+1
-        dx = (int(c['width'])-LEFT_MARGIN)/N
-        dy = (int(c['height'])-TOP_MARGIN-BOT_MARGIN)/N
+        c.delete("all")
+        N = self._chart.num_leaves() + 1
+        dx = (int(c["width"]) - LEFT_MARGIN) / N
+        dy = (int(c["height"]) - TOP_MARGIN - BOT_MARGIN) / N
  
-        c.delete('all')
+        c.delete("all")
  
          # Labels and dotted lines
          for i in range(N):
-            c.create_text(LEFT_MARGIN-2, i*dy+dy/2+TOP_MARGIN,
-                          text=repr(i), anchor='e')
-            c.create_text(i*dx+dx/2+LEFT_MARGIN, N*dy+TOP_MARGIN+1,
-                          text=repr(i), anchor='n')
-            c.create_line(LEFT_MARGIN, dy*(i+1)+TOP_MARGIN,
-                          dx*N+LEFT_MARGIN, dy*(i+1)+TOP_MARGIN, dash='.')
-            c.create_line(dx*i+LEFT_MARGIN, TOP_MARGIN,
-                          dx*i+LEFT_MARGIN, dy*N+TOP_MARGIN, dash='.')
+            c.create_text(
+                LEFT_MARGIN - 2, i * dy + dy / 2 + TOP_MARGIN, text=repr(i), anchor="e"
+            )
+            c.create_text(
+                i * dx + dx / 2 + LEFT_MARGIN,
+                N * dy + TOP_MARGIN + 1,
+                text=repr(i),
+                anchor="n",
+            )
+            c.create_line(
+                LEFT_MARGIN,
+                dy * (i + 1) + TOP_MARGIN,
+                dx * N + LEFT_MARGIN,
+                dy * (i + 1) + TOP_MARGIN,
+                dash=".",
+            )
+            c.create_line(
+                dx * i + LEFT_MARGIN,
+                TOP_MARGIN,
+                dx * i + LEFT_MARGIN,
+                dy * N + TOP_MARGIN,
+                dash=".",
+            )
  
          # A box around the whole thing
-        c.create_rectangle(LEFT_MARGIN, TOP_MARGIN,
-                           LEFT_MARGIN+dx*N, dy*N+TOP_MARGIN,
-                           width=2)
+        c.create_rectangle(
+            LEFT_MARGIN, TOP_MARGIN, LEFT_MARGIN + dx * N, dy * N + TOP_MARGIN, width=2
+        )
  
          # Cells
          self._cells = [[None for i in range(N)] for j in range(N)]
          for i in range(N):
              for j in range(i, N):
-                t = c.create_rectangle(j*dx+LEFT_MARGIN, i*dy+TOP_MARGIN,
-                                       (j+1)*dx+LEFT_MARGIN,
-                                       (i+1)*dy+TOP_MARGIN,
-                                       fill='gray20')
+                t = c.create_rectangle(
+                    j * dx + LEFT_MARGIN,
+                    i * dy + TOP_MARGIN,
+                    (j + 1) * dx + LEFT_MARGIN,
+                    (i + 1) * dy + TOP_MARGIN,
+                    fill="gray20",
+                )
                  self._cells[i][j] = t
-                def cb(event, self=self, i=i, j=j): self._click_cell(i,j)
-                c.tag_bind(t, '<Button-1>', cb)
+
+                def cb(event, self=self, i=i, j=j):
+                    self._click_cell(i, j)
+
+                c.tag_bind(t, "<Button-1>", cb)
  
          # Inactive box
-        xmax, ymax = int(c['width']), int(c['height'])
-        t = c.create_rectangle(-100, -100, xmax+100, ymax+100,
-                               fill='gray50', state='hidden',
-                               tag='inactivebox')
+        xmax, ymax = int(c["width"]), int(c["height"])
+        t = c.create_rectangle(
+            -100,
+            -100,
+            xmax + 100,
+            ymax + 100,
+            fill="gray50",
+            state="hidden",
+            tag="inactivebox",
+        )
          c.tag_lower(t)
  
          # Update the cells.
@@ -317,10 +388,12 @@ class ChartMatrixView(object):
      def pack(self, *args, **kwargs):
          self._root.pack(*args, **kwargs)
  
+
  #######################################################################
  # Chart Results View
  #######################################################################
  
+
  class ChartResultsView(object):
      def __init__(self, parent, chart, grammar, toplevel=True):
          self._chart = chart
@@ -333,35 +406,37 @@ class ChartResultsView(object):
  
          if toplevel:
              self._root = Toplevel(parent)
-            self._root.title('Chart Parser Application: Results')
-            self._root.bind('<Control-q>', self.destroy)
+            self._root.title("Chart Parser Application: Results")
+            self._root.bind("<Control-q>", self.destroy)
          else:
              self._root = Frame(parent)
  
          # Buttons
          if toplevel:
              buttons = Frame(self._root)
-            buttons.pack(side='bottom', expand=0, fill='x')
-            Button(buttons, text='Quit',
-                           command=self.destroy).pack(side='right')
-            Button(buttons, text='Print All',
-                           command=self.print_all).pack(side='left')
-            Button(buttons, text='Print Selection',
-                           command=self.print_selection).pack(side='left')
+            buttons.pack(side="bottom", expand=0, fill="x")
+            Button(buttons, text="Quit", command=self.destroy).pack(side="right")
+            Button(buttons, text="Print All", command=self.print_all).pack(side="left")
+            Button(buttons, text="Print Selection", command=self.print_selection).pack(
+                side="left"
+            )
  
          # Canvas frame.
          self._cframe = CanvasFrame(self._root, closeenough=20)
-        self._cframe.pack(side='top', expand=1, fill='both')
+        self._cframe.pack(side="top", expand=1, fill="both")
  
          # Initial update
          self.update()
  
      def update(self, edge=None):
-        if self._root is None: return
+        if self._root is None:
+            return
          # If the edge isn't a parse edge, do nothing.
          if edge is not None:
-            if edge.lhs() != self._grammar.start(): return
-            if edge.span() != (0, self._chart.num_leaves()): return
+            if edge.lhs() != self._grammar.start():
+                return
+            if edge.span() != (0, self._chart.num_leaves()):
+                return
  
          for parse in self._chart.parses(self._grammar.start()):
              if parse not in self._trees:
@@ -391,34 +466,35 @@ class ChartResultsView(object):
              c.delete(self._selectbox)
          self._selection = widget
          (x1, y1, x2, y2) = widget.bbox()
-        self._selectbox = c.create_rectangle(x1, y1, x2, y2,
-                                             width=2, outline='#088')
+        self._selectbox = c.create_rectangle(x1, y1, x2, y2, width=2, outline="#088")
  
      def _color(self, treewidget, color):
-        treewidget.label()['color'] = color
+        treewidget.label()["color"] = color
          for child in treewidget.subtrees():
              if isinstance(child, TreeSegmentWidget):
                  self._color(child, color)
              else:
-                child['color'] = color
+                child["color"] = color
  
      def print_all(self, *e):
-        if self._root is None: return
+        if self._root is None:
+            return
          self._cframe.print_to_file()
  
      def print_selection(self, *e):
-        if self._root is None: return
+        if self._root is None:
+            return
          if self._selection is None:
-            showerror('Print Error', 'No tree selected')
+            showerror("Print Error", "No tree selected")
          else:
              c = self._cframe.canvas()
              for widget in self._treewidgets:
                  if widget is not self._selection:
                      self._cframe.destroy_widget(widget)
              c.delete(self._selectbox)
-            (x1,y1,x2,y2) = self._selection.bbox()
-            self._selection.move(10-x1,10-y1)
-            c['scrollregion'] = '0 0 %s %s' % (x2-x1+20, y2-y1+20)
+            (x1, y1, x2, y2) = self._selection.bbox()
+            self._selection.move(10 - x1, 10 - y1)
+            c["scrollregion"] = "0 0 %s %s" % (x2 - x1 + 20, y2 - y1 + 20)
              self._cframe.print_to_file()
  
              # Restore our state.
@@ -427,7 +503,8 @@ class ChartResultsView(object):
              self.update()
  
      def clear(self):
-        if self._root is None: return
+        if self._root is None:
+            return
          for treewidget in self._treewidgets:
              self._cframe.destroy_widget(treewidget)
          self._trees = []
@@ -448,18 +525,23 @@ class ChartResultsView(object):
          self.update()
  
      def destroy(self, *e):
-        if self._root is None: return
-        try: self._root.destroy()
-        except: pass
+        if self._root is None:
+            return
+        try:
+            self._root.destroy()
+        except:
+            pass
          self._root = None
  
      def pack(self, *args, **kwargs):
          self._root.pack(*args, **kwargs)
  
+
  #######################################################################
  # Chart Comparer
  #######################################################################
  
+
  class ChartComparer(object):
      """
  
@@ -488,24 +570,26 @@ class ChartComparer(object):
      :ivar _op_label: A Label containing the most recent operation.
      """
  
-    _OPSYMBOL = {'-': '-',
-                 'and': SymbolWidget.SYMBOLS['intersection'],
-                 'or': SymbolWidget.SYMBOLS['union']}
+    _OPSYMBOL = {
+        "-": "-",
+        "and": SymbolWidget.SYMBOLS["intersection"],
+        "or": SymbolWidget.SYMBOLS["union"],
+    }
  
      def __init__(self, *chart_filenames):
          # This chart is displayed when we don't have a value (eg
          # before any chart is loaded).
-        faketok = [''] * 8
+        faketok = [""] * 8
          self._emptychart = Chart(faketok)
  
          # The left & right charts start out empty.
-        self._left_name = 'None'
-        self._right_name = 'None'
+        self._left_name = "None"
+        self._right_name = "None"
          self._left_chart = self._emptychart
          self._right_chart = self._emptychart
  
          # The charts that have been loaded.
-        self._charts = {'None': self._emptychart}
+        self._charts = {"None": self._emptychart}
  
          # The output chart.
          self._out_chart = self._emptychart
@@ -515,9 +599,9 @@ class ChartComparer(object):
  
          # Set up the root window.
          self._root = Tk()
-        self._root.title('Chart Comparison')
-        self._root.bind('<Control-q>', self.destroy)
-        self._root.bind('<Control-x>', self.destroy)
+        self._root.title("Chart Comparison")
+        self._root.bind("<Control-q>", self.destroy)
+        self._root.bind("<Control-x>", self.destroy)
  
          # Initialize all widgets, etc.
          self._init_menubar(self._root)
@@ -531,195 +615,214 @@ class ChartComparer(object):
              self.load_chart(filename)
  
      def destroy(self, *e):
-        if self._root is None: return
-        try: self._root.destroy()
-        except: pass
+        if self._root is None:
+            return
+        try:
+            self._root.destroy()
+        except:
+            pass
          self._root = None
  
      def mainloop(self, *args, **kwargs):
          return
          self._root.mainloop(*args, **kwargs)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Initialization
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _init_menubar(self, root):
          menubar = Menu(root)
  
          # File menu
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Load Chart', accelerator='Ctrl-o',
-                             underline=0, command=self.load_chart_dialog)
-        filemenu.add_command(label='Save Output', accelerator='Ctrl-s',
-                             underline=0, command=self.save_chart_dialog)
+        filemenu.add_command(
+            label="Load Chart",
+            accelerator="Ctrl-o",
+            underline=0,
+            command=self.load_chart_dialog,
+        )
+        filemenu.add_command(
+            label="Save Output",
+            accelerator="Ctrl-s",
+            underline=0,
+            command=self.save_chart_dialog,
+        )
          filemenu.add_separator()
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          # Compare menu
          opmenu = Menu(menubar, tearoff=0)
-        opmenu.add_command(label='Intersection',
-                           command=self._intersection,
-                           accelerator='+')
-        opmenu.add_command(label='Union',
-                           command=self._union,
-                           accelerator='*')
-        opmenu.add_command(label='Difference',
-                           command=self._difference,
-                           accelerator='-')
+        opmenu.add_command(
+            label="Intersection", command=self._intersection, accelerator="+"
+        )
+        opmenu.add_command(label="Union", command=self._union, accelerator="*")
+        opmenu.add_command(
+            label="Difference", command=self._difference, accelerator="-"
+        )
          opmenu.add_separator()
-        opmenu.add_command(label='Swap Charts',
-                           command=self._swapcharts)
-        menubar.add_cascade(label='Compare', underline=0, menu=opmenu)
+        opmenu.add_command(label="Swap Charts", command=self._swapcharts)
+        menubar.add_cascade(label="Compare", underline=0, menu=opmenu)
  
          # Add the menu
          self._root.config(menu=menubar)
  
      def _init_divider(self, root):
-        divider = Frame(root, border=2, relief='sunken')
-        divider.pack(side='top', fill='x', ipady=2)
+        divider = Frame(root, border=2, relief="sunken")
+        divider.pack(side="top", fill="x", ipady=2)
  
      def _init_chartviews(self, root):
-        opfont=('symbol', -36) # Font for operator.
-        eqfont=('helvetica', -36) # Font for equals sign.
+        opfont = ("symbol", -36)  # Font for operator.
+        eqfont = ("helvetica", -36)  # Font for equals sign.
  
-        frame = Frame(root, background='#c0c0c0')
-        frame.pack(side='top', expand=1, fill='both')
+        frame = Frame(root, background="#c0c0c0")
+        frame.pack(side="top", expand=1, fill="both")
  
          # The left matrix.
-        cv1_frame = Frame(frame, border=3, relief='groove')
-        cv1_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both')
+        cv1_frame = Frame(frame, border=3, relief="groove")
+        cv1_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
          self._left_selector = MutableOptionMenu(
-            cv1_frame, list(self._charts.keys()), command=self._select_left)
-        self._left_selector.pack(side='top', pady=5, fill='x')
-        self._left_matrix = ChartMatrixView(cv1_frame, self._emptychart,
-                                            toplevel=False,
-                                            show_numedges=True)
-        self._left_matrix.pack(side='bottom', padx=5, pady=5,
-                               expand=1, fill='both')
-        self._left_matrix.add_callback('select', self.select_edge)
-        self._left_matrix.add_callback('select_cell', self.select_cell)
+            cv1_frame, list(self._charts.keys()), command=self._select_left
+        )
+        self._left_selector.pack(side="top", pady=5, fill="x")
+        self._left_matrix = ChartMatrixView(
+            cv1_frame, self._emptychart, toplevel=False, show_numedges=True
+        )
+        self._left_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
+        self._left_matrix.add_callback("select", self.select_edge)
+        self._left_matrix.add_callback("select_cell", self.select_cell)
          self._left_matrix.inactivate()
  
          # The operator.
-        self._op_label = Label(frame, text=' ', width=3,
-                                       background='#c0c0c0', font=opfont)
-        self._op_label.pack(side='left', padx=5, pady=5)
+        self._op_label = Label(
+            frame, text=" ", width=3, background="#c0c0c0", font=opfont
+        )
+        self._op_label.pack(side="left", padx=5, pady=5)
  
          # The right matrix.
-        cv2_frame = Frame(frame, border=3, relief='groove')
-        cv2_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both')
+        cv2_frame = Frame(frame, border=3, relief="groove")
+        cv2_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
          self._right_selector = MutableOptionMenu(
-            cv2_frame, list(self._charts.keys()), command=self._select_right)
-        self._right_selector.pack(side='top', pady=5, fill='x')
-        self._right_matrix = ChartMatrixView(cv2_frame, self._emptychart,
-                                            toplevel=False,
-                                            show_numedges=True)
-        self._right_matrix.pack(side='bottom', padx=5, pady=5,
-                               expand=1, fill='both')
-        self._right_matrix.add_callback('select', self.select_edge)
-        self._right_matrix.add_callback('select_cell', self.select_cell)
+            cv2_frame, list(self._charts.keys()), command=self._select_right
+        )
+        self._right_selector.pack(side="top", pady=5, fill="x")
+        self._right_matrix = ChartMatrixView(
+            cv2_frame, self._emptychart, toplevel=False, show_numedges=True
+        )
+        self._right_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
+        self._right_matrix.add_callback("select", self.select_edge)
+        self._right_matrix.add_callback("select_cell", self.select_cell)
          self._right_matrix.inactivate()
  
          # The equals sign
-        Label(frame, text='=', width=3, background='#c0c0c0',
-                      font=eqfont).pack(side='left', padx=5, pady=5)
+        Label(frame, text="=", width=3, background="#c0c0c0", font=eqfont).pack(
+            side="left", padx=5, pady=5
+        )
  
          # The output matrix.
-        out_frame = Frame(frame, border=3, relief='groove')
-        out_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both')
-        self._out_label = Label(out_frame, text='Output')
-        self._out_label.pack(side='top', pady=9)
-        self._out_matrix = ChartMatrixView(out_frame, self._emptychart,
-                                            toplevel=False,
-                                            show_numedges=True)
-        self._out_matrix.pack(side='bottom', padx=5, pady=5,
-                                 expand=1, fill='both')
-        self._out_matrix.add_callback('select', self.select_edge)
-        self._out_matrix.add_callback('select_cell', self.select_cell)
+        out_frame = Frame(frame, border=3, relief="groove")
+        out_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
+        self._out_label = Label(out_frame, text="Output")
+        self._out_label.pack(side="top", pady=9)
+        self._out_matrix = ChartMatrixView(
+            out_frame, self._emptychart, toplevel=False, show_numedges=True
+        )
+        self._out_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
+        self._out_matrix.add_callback("select", self.select_edge)
+        self._out_matrix.add_callback("select_cell", self.select_cell)
          self._out_matrix.inactivate()
  
      def _init_buttons(self, root):
          buttons = Frame(root)
-        buttons.pack(side='bottom', pady=5, fill='x', expand=0)
-        Button(buttons, text='Intersection',
-                       command=self._intersection).pack(side='left')
-        Button(buttons, text='Union',
-                       command=self._union).pack(side='left')
-        Button(buttons, text='Difference',
-                       command=self._difference).pack(side='left')
-        Frame(buttons, width=20).pack(side='left')
-        Button(buttons, text='Swap Charts',
-                       command=self._swapcharts).pack(side='left')
-
-        Button(buttons, text='Detatch Output',
-                       command=self._detatch_out).pack(side='right')
+        buttons.pack(side="bottom", pady=5, fill="x", expand=0)
+        Button(buttons, text="Intersection", command=self._intersection).pack(
+            side="left"
+        )
+        Button(buttons, text="Union", command=self._union).pack(side="left")
+        Button(buttons, text="Difference", command=self._difference).pack(side="left")
+        Frame(buttons, width=20).pack(side="left")
+        Button(buttons, text="Swap Charts", command=self._swapcharts).pack(side="left")
+
+        Button(buttons, text="Detatch Output", command=self._detatch_out).pack(
+            side="right"
+        )
  
      def _init_bindings(self, root):
-        #root.bind('<Control-s>', self.save_chart)
-        root.bind('<Control-o>', self.load_chart_dialog)
-        #root.bind('<Control-r>', self.reset)
+        # root.bind('<Control-s>', self.save_chart)
+        root.bind("<Control-o>", self.load_chart_dialog)
+        # root.bind('<Control-r>', self.reset)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Input Handling
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _select_left(self, name):
          self._left_name = name
          self._left_chart = self._charts[name]
          self._left_matrix.set_chart(self._left_chart)
-        if name == 'None': self._left_matrix.inactivate()
+        if name == "None":
+            self._left_matrix.inactivate()
          self._apply_op()
  
      def _select_right(self, name):
          self._right_name = name
          self._right_chart = self._charts[name]
          self._right_matrix.set_chart(self._right_chart)
-        if name == 'None': self._right_matrix.inactivate()
+        if name == "None":
+            self._right_matrix.inactivate()
          self._apply_op()
  
      def _apply_op(self):
-        if self._operator == '-': self._difference()
-        elif self._operator == 'or': self._union()
-        elif self._operator == 'and': self._intersection()
-
-
-    #////////////////////////////////////////////////////////////
+        if self._operator == "-":
+            self._difference()
+        elif self._operator == "or":
+            self._union()
+        elif self._operator == "and":
+            self._intersection()
+
+    # ////////////////////////////////////////////////////////////
      # File
-    #////////////////////////////////////////////////////////////
-    CHART_FILE_TYPES = [('Pickle file', '.pickle'),
-                        ('All files', '*')]
+    # ////////////////////////////////////////////////////////////
+    CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")]
  
      def save_chart_dialog(self, *args):
-        filename = asksaveasfilename(filetypes=self.CHART_FILE_TYPES,
-                                     defaultextension='.pickle')
-        if not filename: return
+        filename = asksaveasfilename(
+            filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
+        )
+        if not filename:
+            return
          try:
-            with open(filename, 'wb') as outfile:
+            with open(filename, "wb") as outfile:
                  pickle.dump(self._out_chart, outfile)
          except Exception as e:
-            showerror('Error Saving Chart',
-                                   'Unable to open file: %r\n%s' %
-                                   (filename, e))
+            showerror(
+                "Error Saving Chart", "Unable to open file: %r\n%s" % (filename, e)
+            )
  
      def load_chart_dialog(self, *args):
-        filename = askopenfilename(filetypes=self.CHART_FILE_TYPES,
-                                   defaultextension='.pickle')
-        if not filename: return
-        try: self.load_chart(filename)
+        filename = askopenfilename(
+            filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
+        )
+        if not filename:
+            return
+        try:
+            self.load_chart(filename)
          except Exception as e:
-            showerror('Error Loading Chart',
-                                   'Unable to open file: %r\n%s' %
-                                   (filename, e))
+            showerror(
+                "Error Loading Chart", "Unable to open file: %r\n%s" % (filename, e)
+            )
  
      def load_chart(self, filename):
-        with open(filename, 'rb') as infile:
+        with open(filename, "rb") as infile:
              chart = pickle.load(infile)
          name = os.path.basename(filename)
-        if name.endswith('.pickle'): name = name[:-7]
-        if name.endswith('.chart'): name = name[:-6]
+        if name.endswith(".pickle"):
+            name = name[:-7]
+        if name.endswith(".chart"):
+            name = name[:-6]
          self._charts[name] = chart
          self._left_selector.add(name)
          self._right_selector.add(name)
@@ -736,9 +839,9 @@ class ChartComparer(object):
          self._right_matrix.update()
          self._out_matrix.update()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Selection
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def select_edge(self, edge):
          if edge in self._left_chart:
@@ -759,32 +862,35 @@ class ChartComparer(object):
          self._right_matrix.select_cell(i, j)
          self._out_matrix.select_cell(i, j)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Operations
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _difference(self):
-        if not self._checkcompat(): return
+        if not self._checkcompat():
+            return
  
          out_chart = Chart(self._left_chart.tokens())
          for edge in self._left_chart:
              if edge not in self._right_chart:
                  out_chart.insert(edge, [])
  
-        self._update('-', out_chart)
+        self._update("-", out_chart)
  
      def _intersection(self):
-        if not self._checkcompat(): return
+        if not self._checkcompat():
+            return
  
          out_chart = Chart(self._left_chart.tokens())
          for edge in self._left_chart:
              if edge in self._right_chart:
                  out_chart.insert(edge, [])
  
-        self._update('and', out_chart)
+        self._update("and", out_chart)
  
      def _union(self):
-        if not self._checkcompat(): return
+        if not self._checkcompat():
+            return
  
          out_chart = Chart(self._left_chart.tokens())
          for edge in self._left_chart:
@@ -792,7 +898,7 @@ class ChartComparer(object):
          for edge in self._right_chart:
              out_chart.insert(edge, [])
  
-        self._update('or', out_chart)
+        self._update("or", out_chart)
  
      def _swapcharts(self):
          left, right = self._left_name, self._right_name
@@ -800,16 +906,17 @@ class ChartComparer(object):
          self._right_selector.set(left)
  
      def _checkcompat(self):
-        if (self._left_chart.tokens() != self._right_chart.tokens() or
-            self._left_chart.property_names() !=
-            self._right_chart.property_names() or
-            self._left_chart == self._emptychart or
-            self._right_chart == self._emptychart):
+        if (
+            self._left_chart.tokens() != self._right_chart.tokens()
+            or self._left_chart.property_names() != self._right_chart.property_names()
+            or self._left_chart == self._emptychart
+            or self._right_chart == self._emptychart
+        ):
              # Clear & inactivate the output chart.
              self._out_chart = self._emptychart
              self._out_matrix.set_chart(self._out_chart)
              self._out_matrix.inactivate()
-            self._out_label['text'] = 'Output'
+            self._out_label["text"] = "Output"
              # Issue some other warning?
              return False
          else:
@@ -817,34 +924,30 @@ class ChartComparer(object):
  
      def _update(self, operator, out_chart):
          self._operator = operator
-        self._op_label['text'] = self._OPSYMBOL[operator]
+        self._op_label["text"] = self._OPSYMBOL[operator]
          self._out_chart = out_chart
          self._out_matrix.set_chart(out_chart)
-        self._out_label['text'] = '%s %s %s' % (self._left_name,
-                                                self._operator,
-                                                self._right_name)
+        self._out_label["text"] = "%s %s %s" % (
+            self._left_name,
+            self._operator,
+            self._right_name,
+        )
  
      def _clear_out_chart(self):
          self._out_chart = self._emptychart
          self._out_matrix.set_chart(self._out_chart)
-        self._op_label['text'] = ' '
+        self._op_label["text"] = " "
          self._out_matrix.inactivate()
  
      def _detatch_out(self):
-        ChartMatrixView(self._root, self._out_chart,
-                        title=self._out_label['text'])
-
-
-
-
-
-
+        ChartMatrixView(self._root, self._out_chart, title=self._out_label["text"])
  
  
  #######################################################################
  # Chart View
  #######################################################################
  
+
  class ChartView(object):
      """
      A component for viewing charts.  This is used by ``ChartParserApp`` to
@@ -902,9 +1005,9 @@ class ChartView(object):
          Construct a new ``Chart`` display.
          """
          # Process keyword args.
-        draw_tree = kw.get('draw_tree', 0)
-        draw_sentence = kw.get('draw_sentence', 1)
-        self._fontsize = kw.get('fontsize', -12)
+        draw_tree = kw.get("draw_tree", 0)
+        draw_sentence = kw.get("draw_sentence", 1)
+        self._fontsize = kw.get("fontsize", -12)
  
          # The chart!
          self._chart = chart
@@ -934,12 +1037,17 @@ class ChartView(object):
          # If they didn't provide a main window, then set one up.
          if root is None:
              top = Tk()
-            top.title('Chart View')
-            def destroy1(e, top=top): top.destroy()
-            def destroy2(top=top): top.destroy()
-            top.bind('q', destroy1)
-            b = Button(top, text='Done', command=destroy2)
-            b.pack(side='bottom')
+            top.title("Chart View")
+
+            def destroy1(e, top=top):
+                top.destroy()
+
+            def destroy2(top=top):
+                top.destroy()
+
+            top.bind("q", destroy1)
+            b = Button(top, text="Done", command=destroy2)
+            b.pack(side="bottom")
              self._root = top
          else:
              self._root = root
@@ -949,25 +1057,25 @@ class ChartView(object):
  
          # Create the chart canvas.
          (self._chart_sb, self._chart_canvas) = self._sb_canvas(self._root)
-        self._chart_canvas['height'] = 300
-        self._chart_canvas['closeenough'] = 15
+        self._chart_canvas["height"] = 300
+        self._chart_canvas["closeenough"] = 15
  
          # Create the sentence canvas.
          if draw_sentence:
-            cframe = Frame(self._root, relief='sunk', border=2)
-            cframe.pack(fill='both', side='bottom')
+            cframe = Frame(self._root, relief="sunk", border=2)
+            cframe.pack(fill="both", side="bottom")
              self._sentence_canvas = Canvas(cframe, height=50)
-            self._sentence_canvas['background'] = '#e0e0e0'
-            self._sentence_canvas.pack(fill='both')
-            #self._sentence_canvas['height'] = self._sentence_height
+            self._sentence_canvas["background"] = "#e0e0e0"
+            self._sentence_canvas.pack(fill="both")
+            # self._sentence_canvas['height'] = self._sentence_height
          else:
              self._sentence_canvas = None
  
          # Create the tree canvas.
          if draw_tree:
-            (sb, canvas) = self._sb_canvas(self._root, 'n', 'x')
+            (sb, canvas) = self._sb_canvas(self._root, "n", "x")
              (self._tree_sb, self._tree_canvas) = (sb, canvas)
-            self._tree_canvas['height'] = 200
+            self._tree_canvas["height"] = 200
          else:
              self._tree_canvas = None
  
@@ -979,48 +1087,45 @@ class ChartView(object):
  
          # Set up the configure callback, which will be called whenever
          # the window is resized.
-        self._chart_canvas.bind('<Configure>', self._configure)
+        self._chart_canvas.bind("<Configure>", self._configure)
  
      def _init_fonts(self, root):
-        self._boldfont = Font(family='helvetica', weight='bold',
-                                    size=self._fontsize)
-        self._font = Font(family='helvetica',
-                                    size=self._fontsize)
+        self._boldfont = Font(family="helvetica", weight="bold", size=self._fontsize)
+        self._font = Font(family="helvetica", size=self._fontsize)
          # See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
          self._sysfont = Font(font=Button()["font"])
          root.option_add("*Font", self._sysfont)
  
-    def _sb_canvas(self, root, expand='y',
-                   fill='both', side='bottom'):
+    def _sb_canvas(self, root, expand="y", fill="both", side="bottom"):
          """
          Helper for __init__: construct a canvas with a scrollbar.
          """
-        cframe = Frame(root, relief='sunk', border=2)
+        cframe = Frame(root, relief="sunk", border=2)
          cframe.pack(fill=fill, expand=expand, side=side)
-        canvas = Canvas(cframe, background='#e0e0e0')
+        canvas = Canvas(cframe, background="#e0e0e0")
  
          # Give the canvas a scrollbar.
-        sb = Scrollbar(cframe, orient='vertical')
-        sb.pack(side='right', fill='y')
-        canvas.pack(side='left', fill=fill, expand='yes')
+        sb = Scrollbar(cframe, orient="vertical")
+        sb.pack(side="right", fill="y")
+        canvas.pack(side="left", fill=fill, expand="yes")
  
          # Connect the scrollbars to the canvas.
-        sb['command']= canvas.yview
-        canvas['yscrollcommand'] = sb.set
+        sb["command"] = canvas.yview
+        canvas["yscrollcommand"] = sb.set
  
          return (sb, canvas)
  
      def scroll_up(self, *e):
-        self._chart_canvas.yview('scroll', -1, 'units')
+        self._chart_canvas.yview("scroll", -1, "units")
  
      def scroll_down(self, *e):
-        self._chart_canvas.yview('scroll', 1, 'units')
+        self._chart_canvas.yview("scroll", 1, "units")
  
      def page_up(self, *e):
-        self._chart_canvas.yview('scroll', -1, 'pages')
+        self._chart_canvas.yview("scroll", -1, "pages")
  
      def page_down(self, *e):
-        self._chart_canvas.yview('scroll', 1, 'pages')
+        self._chart_canvas.yview("scroll", 1, "pages")
  
      def _grow(self):
          """
@@ -1028,19 +1133,20 @@ class ChartView(object):
          """
          # Grow, if need-be
          N = self._chart.num_leaves()
-        width = max(int(self._chart_canvas['width']),
-                    N * self._unitsize + ChartView._MARGIN * 2 )
+        width = max(
+            int(self._chart_canvas["width"]), N * self._unitsize + ChartView._MARGIN * 2
+        )
  
          # It won't resize without the second (height) line, but I
          # don't understand why not.
          self._chart_canvas.configure(width=width)
-        self._chart_canvas.configure(height=self._chart_canvas['height'])
+        self._chart_canvas.configure(height=self._chart_canvas["height"])
  
-        self._unitsize = (width - 2*ChartView._MARGIN) / N
+        self._unitsize = (width - 2 * ChartView._MARGIN) / N
  
          # Reset the height for the sentence window.
          if self._sentence_canvas is not None:
-            self._sentence_canvas['height'] = self._sentence_height
+            self._sentence_canvas["height"] = self._sentence_height
  
      def set_font_size(self, size):
          self._font.configure(size=-abs(size))
@@ -1061,7 +1167,7 @@ class ChartView(object):
          canvas.
          """
          N = self._chart.num_leaves()
-        self._unitsize = (e.width - 2*ChartView._MARGIN) / N
+        self._unitsize = (e.width - 2 * ChartView._MARGIN) / N
          self.draw()
  
      def update(self, chart=None):
@@ -1089,7 +1195,6 @@ class ChartView(object):
                      self._add_edge(edge)
              self._resize()
  
-
      def _edge_conflict(self, edge, lvl):
          """
          Return True if the given edge overlaps with any edge on the given
@@ -1099,7 +1204,7 @@ class ChartView(object):
          (s1, e1) = edge.span()
          for otheredge in self._edgelevels[lvl]:
              (s2, e2) = otheredge.span()
-            if (s1 <= s2 < e1) or (s2 <= s1 < e2) or (s1==s2==e1==e2):
+            if (s1 <= s2 < e1) or (s2 <= s1 < e2) or (s1 == s2 == e1 == e2):
                  return True
          return False
  
@@ -1124,17 +1229,17 @@ class ChartView(object):
              rhs = " ".join(rhselts)
          else:
              lhs = edge.lhs()
-            rhs = ''
+            rhs = ""
  
          for s in (lhs, rhs):
-            tag = c.create_text(0,0, text=s,
-                                font=self._boldfont,
-                                anchor='nw', justify='left')
+            tag = c.create_text(
+                0, 0, text=s, font=self._boldfont, anchor="nw", justify="left"
+            )
              bbox = c.bbox(tag)
              c.delete(tag)
-            width = bbox[2] #+ ChartView._LEAF_SPACING
+            width = bbox[2]  # + ChartView._LEAF_SPACING
              edgelen = max(edge.length(), 1)
-            self._unitsize = max(self._unitsize, width/edgelen)
+            self._unitsize = max(self._unitsize, width / edgelen)
              self._text_height = max(self._text_height, bbox[3] - bbox[1])
  
      def _add_edge(self, edge, minlvl=0):
@@ -1146,15 +1251,17 @@ class ChartView(object):
              - Call _draw_edge
          """
          # Do NOT show leaf edges in the chart.
-        if isinstance(edge, LeafEdge): return
+        if isinstance(edge, LeafEdge):
+            return
  
-        if edge in self._edgetags: return
+        if edge in self._edgetags:
+            return
          self._analyze_edge(edge)
          self._grow()
  
          if not self._compact:
              self._edgelevels.append([edge])
-            lvl = len(self._edgelevels)-1
+            lvl = len(self._edgelevels) - 1
              self._draw_edge(edge, lvl)
              self._resize()
              return
@@ -1168,7 +1275,7 @@ class ChartView(object):
                  self._resize()
  
              # Check if we can fit the edge in this level.
-            if lvl>=minlvl and not self._edge_conflict(edge, lvl):
+            if lvl >= minlvl and not self._edge_conflict(edge, lvl):
                  # Go ahead and draw it.
                  self._edgelevels[lvl].append(edge)
                  break
@@ -1184,14 +1291,14 @@ class ChartView(object):
              if edge in self._edgelevels[i]:
                  level = i
                  break
-        if level is None: return
+        if level is None:
+            return
          # Try to view the new edge..
-        y = (level+1) * self._chart_level_size
+        y = (level + 1) * self._chart_level_size
          dy = self._text_height + 10
-        self._chart_canvas.yview('moveto', 1.0)
+        self._chart_canvas.yview("moveto", 1.0)
          if self._chart_height != 0:
-            self._chart_canvas.yview('moveto',
-                                     (y-dy)/self._chart_height)
+            self._chart_canvas.yview("moveto", (y - dy) / self._chart_height)
  
      def _draw_edge(self, edge, lvl):
          """
@@ -1200,11 +1307,12 @@ class ChartView(object):
          c = self._chart_canvas
  
          # Draw the arrow.
-        x1 = (edge.start() * self._unitsize + ChartView._MARGIN)
-        x2 = (edge.end() * self._unitsize + ChartView._MARGIN)
-        if x2 == x1: x2 += max(4, self._unitsize/5)
-        y = (lvl+1) * self._chart_level_size
-        linetag = c.create_line(x1, y, x2, y, arrow='last', width=3)
+        x1 = edge.start() * self._unitsize + ChartView._MARGIN
+        x2 = edge.end() * self._unitsize + ChartView._MARGIN
+        if x2 == x1:
+            x2 += max(4, self._unitsize / 5)
+        y = (lvl + 1) * self._chart_level_size
+        linetag = c.create_line(x1, y, x2, y, arrow="last", width=3)
  
          # Draw a label for the edge.
          if isinstance(edge, TreeEdge):
@@ -1221,31 +1329,27 @@ class ChartView(object):
  
          rhs1 = " ".join(rhs[:pos])
          rhs2 = " ".join(rhs[pos:])
-        rhstag1 = c.create_text(x1+3, y, text=rhs1,
-                                font=self._font,
-                                anchor='nw')
+        rhstag1 = c.create_text(x1 + 3, y, text=rhs1, font=self._font, anchor="nw")
          dotx = c.bbox(rhstag1)[2] + 6
-        doty = (c.bbox(rhstag1)[1]+c.bbox(rhstag1)[3])/2
-        dottag = c.create_oval(dotx-2, doty-2, dotx+2, doty+2)
-        rhstag2 = c.create_text(dotx+6, y, text=rhs2,
-                                font=self._font,
-                                anchor='nw')
-        lhstag =  c.create_text((x1+x2)/2, y, text=str(edge.lhs()),
-                                anchor='s',
-                                font=self._boldfont)
+        doty = (c.bbox(rhstag1)[1] + c.bbox(rhstag1)[3]) / 2
+        dottag = c.create_oval(dotx - 2, doty - 2, dotx + 2, doty + 2)
+        rhstag2 = c.create_text(dotx + 6, y, text=rhs2, font=self._font, anchor="nw")
+        lhstag = c.create_text(
+            (x1 + x2) / 2, y, text=str(edge.lhs()), anchor="s", font=self._boldfont
+        )
  
          # Keep track of the edge's tags.
-        self._edgetags[edge] = (linetag, rhstag1,
-                                dottag, rhstag2, lhstag)
+        self._edgetags[edge] = (linetag, rhstag1, dottag, rhstag2, lhstag)
  
          # Register a callback for clicking on the edge.
          def cb(event, self=self, edge=edge):
-            self._fire_callbacks('select', edge)
-        c.tag_bind(rhstag1, '<Button-1>', cb)
-        c.tag_bind(rhstag2, '<Button-1>', cb)
-        c.tag_bind(linetag, '<Button-1>', cb)
-        c.tag_bind(dottag, '<Button-1>', cb)
-        c.tag_bind(lhstag, '<Button-1>', cb)
+            self._fire_callbacks("select", edge)
+
+        c.tag_bind(rhstag1, "<Button-1>", cb)
+        c.tag_bind(rhstag2, "<Button-1>", cb)
+        c.tag_bind(linetag, "<Button-1>", cb)
+        c.tag_bind(dottag, "<Button-1>", cb)
+        c.tag_bind(lhstag, "<Button-1>", cb)
  
          self._color_edge(edge)
  
@@ -1255,7 +1359,8 @@ class ChartView(object):
          If no colors are specified, use intelligent defaults
          (dependent on selection, etc.)
          """
-        if edge not in self._edgetags: return
+        if edge not in self._edgetags:
+            return
          c = self._chart_canvas
  
          if linecolor is not None and textcolor is not None:
@@ -1264,8 +1369,7 @@ class ChartView(object):
              tags = self._edgetags[edge]
              c.itemconfig(tags[0], fill=linecolor)
              c.itemconfig(tags[1], fill=textcolor)
-            c.itemconfig(tags[2], fill=textcolor,
-                         outline=textcolor)
+            c.itemconfig(tags[2], fill=textcolor, outline=textcolor)
              c.itemconfig(tags[3], fill=textcolor)
              c.itemconfig(tags[4], fill=textcolor)
              return
@@ -1273,14 +1377,14 @@ class ChartView(object):
              N = self._chart.num_leaves()
              if edge in self._marks:
                  self._color_edge(self._marks[edge])
-            if (edge.is_complete() and edge.span() == (0, N)):
-                self._color_edge(edge, '#084', '#042')
+            if edge.is_complete() and edge.span() == (0, N):
+                self._color_edge(edge, "#084", "#042")
              elif isinstance(edge, LeafEdge):
-                self._color_edge(edge, '#48c', '#246')
+                self._color_edge(edge, "#48c", "#246")
              else:
-                self._color_edge(edge, '#00f', '#008')
+                self._color_edge(edge, "#00f", "#008")
  
-    def mark_edge(self, edge, mark='#0df'):
+    def mark_edge(self, edge, mark="#0df"):
          """
          Mark an edge
          """
@@ -1300,7 +1404,7 @@ class ChartView(object):
              del self._marks[edge]
              self._color_edge(edge)
  
-    def markonly_edge(self, edge, mark='#0df'):
+    def markonly_edge(self, edge, mark="#0df"):
          self.unmark_edge()
          self.mark_edge(edge, mark)
  
@@ -1310,15 +1414,15 @@ class ChartView(object):
          to be, How big the tree should be, etc.
          """
          # Figure out the text height and the unit size.
-        unitsize = 70 # min unitsize
+        unitsize = 70  # min unitsize
          text_height = 0
          c = self._chart_canvas
  
          # Check against all tokens
          for leaf in self._chart.leaves():
-            tag = c.create_text(0,0, text=repr(leaf),
-                                font=self._font,
-                                anchor='nw', justify='left')
+            tag = c.create_text(
+                0, 0, text=repr(leaf), font=self._font, anchor="nw", justify="left"
+            )
              bbox = c.bbox(tag)
              c.delete(tag)
              width = bbox[2] + ChartView._LEAF_SPACING
@@ -1327,8 +1431,7 @@ class ChartView(object):
  
          self._unitsize = unitsize
          self._text_height = text_height
-        self._sentence_height = (self._text_height +
-                               2*ChartView._MARGIN)
+        self._sentence_height = self._text_height + 2 * ChartView._MARGIN
  
          # Check against edges.
          for edge in self._chart.edges():
@@ -1338,8 +1441,7 @@ class ChartView(object):
          self._chart_level_size = self._text_height * 2
  
          # Default tree size..
-        self._tree_height = (3 * (ChartView._TREE_LEVEL_SIZE +
-                                  self._text_height))
+        self._tree_height = 3 * (ChartView._TREE_LEVEL_SIZE + self._text_height)
  
          # Resize the scrollregions.
          self._resize()
@@ -1354,17 +1456,15 @@ class ChartView(object):
          c = self._chart_canvas
  
          # Reset the chart scroll region
-        width = ( self._chart.num_leaves() * self._unitsize +
-                  ChartView._MARGIN * 2 )
+        width = self._chart.num_leaves() * self._unitsize + ChartView._MARGIN * 2
  
          levels = len(self._edgelevels)
-        self._chart_height = (levels+2)*self._chart_level_size
-        c['scrollregion']=(0,0,width,self._chart_height)
+        self._chart_height = (levels + 2) * self._chart_level_size
+        c["scrollregion"] = (0, 0, width, self._chart_height)
  
          # Reset the tree scroll region
          if self._tree_canvas:
-            self._tree_canvas['scrollregion'] = (0, 0, width,
-                                                 self._tree_height)
+            self._tree_canvas["scrollregion"] = (0, 0, width, self._tree_height)
  
      def _draw_loclines(self):
          """
@@ -1377,36 +1477,40 @@ class ChartView(object):
          c3 = self._chart_canvas
          margin = ChartView._MARGIN
          self._loclines = []
-        for i in range(0, self._chart.num_leaves()+1):
-            x = i*self._unitsize + margin
+        for i in range(0, self._chart.num_leaves() + 1):
+            x = i * self._unitsize + margin
  
              if c1:
-                t1=c1.create_line(x, 0, x, BOTTOM)
+                t1 = c1.create_line(x, 0, x, BOTTOM)
                  c1.tag_lower(t1)
              if c2:
-                t2=c2.create_line(x, 0, x, self._sentence_height)
+                t2 = c2.create_line(x, 0, x, self._sentence_height)
                  c2.tag_lower(t2)
-            t3=c3.create_line(x, 0, x, BOTTOM)
+            t3 = c3.create_line(x, 0, x, BOTTOM)
              c3.tag_lower(t3)
-            t4=c3.create_text(x+2, 0, text=repr(i), anchor='nw',
-                              font=self._font)
+            t4 = c3.create_text(x + 2, 0, text=repr(i), anchor="nw", font=self._font)
              c3.tag_lower(t4)
-            #if i % 4 == 0:
+            # if i % 4 == 0:
              #    if c1: c1.itemconfig(t1, width=2, fill='gray60')
              #    if c2: c2.itemconfig(t2, width=2, fill='gray60')
              #    c3.itemconfig(t3, width=2, fill='gray60')
              if i % 2 == 0:
-                if c1: c1.itemconfig(t1, fill='gray60')
-                if c2: c2.itemconfig(t2, fill='gray60')
-                c3.itemconfig(t3, fill='gray60')
+                if c1:
+                    c1.itemconfig(t1, fill="gray60")
+                if c2:
+                    c2.itemconfig(t2, fill="gray60")
+                c3.itemconfig(t3, fill="gray60")
              else:
-                if c1: c1.itemconfig(t1, fill='gray80')
-                if c2: c2.itemconfig(t2, fill='gray80')
-                c3.itemconfig(t3, fill='gray80')
+                if c1:
+                    c1.itemconfig(t1, fill="gray80")
+                if c2:
+                    c2.itemconfig(t2, fill="gray80")
+                c3.itemconfig(t3, fill="gray80")
  
      def _draw_sentence(self):
          """Draw the sentence string."""
-        if self._chart.num_leaves() == 0: return
+        if self._chart.num_leaves() == 0:
+            return
          c = self._sentence_canvas
          margin = ChartView._MARGIN
          y = ChartView._MARGIN
@@ -1414,38 +1518,47 @@ class ChartView(object):
          for i, leaf in enumerate(self._chart.leaves()):
              x1 = i * self._unitsize + margin
              x2 = x1 + self._unitsize
-            x = (x1+x2)/2
-            tag = c.create_text(x, y, text=repr(leaf),
-                                font=self._font,
-                                anchor='n', justify='left')
+            x = (x1 + x2) / 2
+            tag = c.create_text(
+                x, y, text=repr(leaf), font=self._font, anchor="n", justify="left"
+            )
              bbox = c.bbox(tag)
-            rt=c.create_rectangle(x1+2, bbox[1]-(ChartView._LEAF_SPACING/2),
-                                  x2-2, bbox[3]+(ChartView._LEAF_SPACING/2),
-                                  fill='#f0f0f0', outline='#f0f0f0')
+            rt = c.create_rectangle(
+                x1 + 2,
+                bbox[1] - (ChartView._LEAF_SPACING / 2),
+                x2 - 2,
+                bbox[3] + (ChartView._LEAF_SPACING / 2),
+                fill="#f0f0f0",
+                outline="#f0f0f0",
+            )
              c.tag_lower(rt)
  
      def erase_tree(self):
-        for tag in self._tree_tags: self._tree_canvas.delete(tag)
+        for tag in self._tree_tags:
+            self._tree_canvas.delete(tag)
          self._treetoks = []
          self._treetoks_edge = None
          self._treetoks_index = 0
  
      def draw_tree(self, edge=None):
-        if edge is None and self._treetoks_edge is None: return
-        if edge is None: edge = self._treetoks_edge
+        if edge is None and self._treetoks_edge is None:
+            return
+        if edge is None:
+            edge = self._treetoks_edge
  
          # If it's a new edge, then get a new list of treetoks.
          if self._treetoks_edge != edge:
-            self._treetoks = [t for t in self._chart.trees(edge)
-                              if isinstance(t, Tree)]
+            self._treetoks = [t for t in self._chart.trees(edge) if isinstance(t, Tree)]
              self._treetoks_edge = edge
              self._treetoks_index = 0
  
          # Make sure there's something to draw.
-        if len(self._treetoks) == 0: return
+        if len(self._treetoks) == 0:
+            return
  
          # Erase the old tree.
-        for tag in self._tree_tags: self._tree_canvas.delete(tag)
+        for tag in self._tree_tags:
+            self._tree_canvas.delete(tag)
  
          # Draw the new tree.
          tree = self._treetoks[self._treetoks_index]
@@ -1455,34 +1568,37 @@ class ChartView(object):
          self._draw_treecycle()
  
          # Update the scroll region.
-        w = self._chart.num_leaves()*self._unitsize+2*ChartView._MARGIN
-        h = tree.height() * (ChartView._TREE_LEVEL_SIZE+self._text_height)
-        self._tree_canvas['scrollregion'] = (0, 0, w, h)
+        w = self._chart.num_leaves() * self._unitsize + 2 * ChartView._MARGIN
+        h = tree.height() * (ChartView._TREE_LEVEL_SIZE + self._text_height)
+        self._tree_canvas["scrollregion"] = (0, 0, w, h)
  
      def cycle_tree(self):
-        self._treetoks_index = (self._treetoks_index+1)%len(self._treetoks)
+        self._treetoks_index = (self._treetoks_index + 1) % len(self._treetoks)
          self.draw_tree(self._treetoks_edge)
  
      def _draw_treecycle(self):
-        if len(self._treetoks) <= 1: return
+        if len(self._treetoks) <= 1:
+            return
  
          # Draw the label.
-        label = '%d Trees' % len(self._treetoks)
+        label = "%d Trees" % len(self._treetoks)
          c = self._tree_canvas
          margin = ChartView._MARGIN
-        right = self._chart.num_leaves()*self._unitsize+margin-2
-        tag = c.create_text(right, 2, anchor='ne', text=label,
-                            font=self._boldfont)
+        right = self._chart.num_leaves() * self._unitsize + margin - 2
+        tag = c.create_text(right, 2, anchor="ne", text=label, font=self._boldfont)
          self._tree_tags.append(tag)
          _, _, _, y = c.bbox(tag)
  
          # Draw the triangles.
          for i in range(len(self._treetoks)):
-            x = right - 20*(len(self._treetoks)-i-1)
-            if i == self._treetoks_index: fill = '#084'
-            else: fill = '#fff'
-            tag = c.create_polygon(x, y+10, x-5, y, x-10, y+10,
-                             fill=fill, outline='black')
+            x = right - 20 * (len(self._treetoks) - i - 1)
+            if i == self._treetoks_index:
+                fill = "#084"
+            else:
+                fill = "#fff"
+            tag = c.create_polygon(
+                x, y + 10, x - 5, y, x - 10, y + 10, fill=fill, outline="black"
+            )
              self._tree_tags.append(tag)
  
              # Set up a callback: show the tree if they click on its
@@ -1490,7 +1606,8 @@ class ChartView(object):
              def cb(event, self=self, i=i):
                  self._treetoks_index = i
                  self.draw_tree()
-            c.tag_bind(tag, '<Button-1>', cb)
+
+            c.tag_bind(tag, "<Button-1>", cb)
  
      def _draw_treetok(self, treetok, index, depth=0):
          """
@@ -1504,26 +1621,32 @@ class ChartView(object):
          child_xs = []
          for child in treetok:
              if isinstance(child, Tree):
-                child_x, index = self._draw_treetok(child, index, depth+1)
+                child_x, index = self._draw_treetok(child, index, depth + 1)
                  child_xs.append(child_x)
              else:
-                child_xs.append((2*index+1)*self._unitsize/2 + margin)
+                child_xs.append((2 * index + 1) * self._unitsize / 2 + margin)
                  index += 1
  
          # If we have children, then get the node's x by averaging their
          # node x's.  Otherwise, make room for ourselves.
          if child_xs:
-            nodex = sum(child_xs)/len(child_xs)
+            nodex = sum(child_xs) / len(child_xs)
          else:
              # [XX] breaks for null productions.
-            nodex = (2*index+1)*self._unitsize/2 + margin
+            nodex = (2 * index + 1) * self._unitsize / 2 + margin
              index += 1
  
          # Draw the node
          nodey = depth * (ChartView._TREE_LEVEL_SIZE + self._text_height)
-        tag = c.create_text(nodex, nodey, anchor='n', justify='center',
-                            text=str(treetok.label()), fill='#042',
-                            font=self._boldfont)
+        tag = c.create_text(
+            nodex,
+            nodey,
+            anchor="n",
+            justify="center",
+            text=str(treetok.label()),
+            fill="#042",
+            font=self._boldfont,
+        )
          self._tree_tags.append(tag)
  
          # Draw lines to the children.
@@ -1531,19 +1654,37 @@ class ChartView(object):
          for childx, child in zip(child_xs, treetok):
              if isinstance(child, Tree) and child:
                  # A "real" tree token:
-                tag = c.create_line(nodex, nodey + self._text_height,
-                                    childx, childy, width=2, fill='#084')
+                tag = c.create_line(
+                    nodex,
+                    nodey + self._text_height,
+                    childx,
+                    childy,
+                    width=2,
+                    fill="#084",
+                )
                  self._tree_tags.append(tag)
              if isinstance(child, Tree) and not child:
                  # An unexpanded tree token:
-                tag = c.create_line(nodex, nodey + self._text_height,
-                                    childx, childy, width=2,
-                                    fill='#048', dash='2 3')
+                tag = c.create_line(
+                    nodex,
+                    nodey + self._text_height,
+                    childx,
+                    childy,
+                    width=2,
+                    fill="#048",
+                    dash="2 3",
+                )
                  self._tree_tags.append(tag)
              if not isinstance(child, Tree):
                  # A leaf:
-                tag = c.create_line(nodex, nodey + self._text_height,
-                                    childx, 10000, width=2, fill='#084')
+                tag = c.create_line(
+                    nodex,
+                    nodey + self._text_height,
+                    childx,
+                    10000,
+                    width=2,
+                    fill="#084",
+                )
                  self._tree_tags.append(tag)
  
          return nodex, index
@@ -1553,14 +1694,14 @@ class ChartView(object):
          Draw everything (from scratch).
          """
          if self._tree_canvas:
-            self._tree_canvas.delete('all')
+            self._tree_canvas.delete("all")
              self.draw_tree()
  
          if self._sentence_canvas:
-            self._sentence_canvas.delete('all')
+            self._sentence_canvas.delete("all")
              self._draw_sentence()
  
-        self._chart_canvas.delete('all')
+        self._chart_canvas.delete("all")
          self._edgetags = {}
  
          # Redraw any edges we erased.
@@ -1574,17 +1715,23 @@ class ChartView(object):
          self._draw_loclines()
  
      def add_callback(self, event, func):
-        self._callbacks.setdefault(event,{})[func] = 1
+        self._callbacks.setdefault(event, {})[func] = 1
  
      def remove_callback(self, event, func=None):
-        if func is None: del self._callbacks[event]
+        if func is None:
+            del self._callbacks[event]
          else:
-            try: del self._callbacks[event][func]
-            except: pass
+            try:
+                del self._callbacks[event][func]
+            except:
+                pass
  
      def _fire_callbacks(self, event, *args):
-        if event not in self._callbacks: return
-        for cb_func in list(self._callbacks[event].keys()): cb_func(*args)
+        if event not in self._callbacks:
+            return
+        for cb_func in list(self._callbacks[event].keys()):
+            cb_func(*args)
+
  
  #######################################################################
  # Edge Rules
@@ -1592,39 +1739,53 @@ class ChartView(object):
  # These version of the chart rules only apply to a specific edge.
  # This lets the user select an edge, and then apply a rule.
  
+
  class EdgeRule(object):
      """
      To create an edge rule, make an empty base class that uses
      EdgeRule as the first base class, and the basic rule as the
      second base class.  (Order matters!)
      """
+
      def __init__(self, edge):
          super = self.__class__.__bases__[1]
          self._edge = edge
-        self.NUM_EDGES = super.NUM_EDGES-1
+        self.NUM_EDGES = super.NUM_EDGES - 1
+
      def apply(self, chart, grammar, *edges):
          super = self.__class__.__bases__[1]
          edges += (self._edge,)
-        for e in super.apply(self, chart, grammar, *edges): yield e
+        for e in super.apply(self, chart, grammar, *edges):
+            yield e
+
      def __str__(self):
          super = self.__class__.__bases__[1]
          return super.__str__(self)
  
+
  class TopDownPredictEdgeRule(EdgeRule, TopDownPredictRule):
      pass
+
+
  class BottomUpEdgeRule(EdgeRule, BottomUpPredictRule):
      pass
+
+
  class BottomUpLeftCornerEdgeRule(EdgeRule, BottomUpPredictCombineRule):
      pass
+
+
  class FundamentalEdgeRule(EdgeRule, SingleEdgeFundamentalRule):
      pass
  
+
  #######################################################################
  # Chart Parser Application
  #######################################################################
  
+
  class ChartParserApp(object):
-    def __init__(self, grammar, tokens, title='Chart Parser Application'):
+    def __init__(self, grammar, tokens, title="Chart Parser Application"):
          # Initialize the parser
          self._init_parser(grammar, tokens)
  
@@ -1633,15 +1794,15 @@ class ChartParserApp(object):
              # Create the root window.
              self._root = Tk()
              self._root.title(title)
-            self._root.bind('<Control-q>', self.destroy)
+            self._root.bind("<Control-q>", self.destroy)
  
              # Set up some frames.
              frame3 = Frame(self._root)
              frame2 = Frame(self._root)
              frame1 = Frame(self._root)
-            frame3.pack(side='bottom', fill='none')
-            frame2.pack(side='bottom', fill='x')
-            frame1.pack(side='bottom', fill='both', expand=1)
+            frame3.pack(side="bottom", fill="none")
+            frame2.pack(side="bottom", fill="x")
+            frame1.pack(side="bottom", fill="both", expand=1)
  
              self._init_fonts(self._root)
              self._init_animation()
@@ -1657,12 +1818,13 @@ class ChartParserApp(object):
              self._init_bindings()
  
          except:
-            print('Error creating Tree View')
+            print("Error creating Tree View")
              self.destroy()
              raise
  
      def destroy(self, *args):
-        if self._root is None: return
+        if self._root is None:
+            return
          self._root.destroy()
          self._root = None
  
@@ -1673,12 +1835,13 @@ class ChartParserApp(object):
          from a secript); otherwise, the demo will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._root.mainloop(*args, **kwargs)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Initialization Helpers
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _init_parser(self, grammar, tokens):
          self._grammar = grammar
@@ -1707,12 +1870,10 @@ class ChartParserApp(object):
  
          # TWhat's our font size (default=same as sysfont)
          self._size = IntVar(root)
-        self._size.set(self._sysfont.cget('size'))
+        self._size.set(self._sysfont.cget("size"))
  
-        self._boldfont = Font(family='helvetica', weight='bold',
-                                    size=self._size.get())
-        self._font = Font(family='helvetica',
-                                    size=self._size.get())
+        self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
+        self._font = Font(family="helvetica", size=self._size.get())
  
      def _init_animation(self):
          # Are we stepping? (default=yes)
@@ -1721,206 +1882,299 @@ class ChartParserApp(object):
  
          # What's our animation speed (default=fast)
          self._animate = IntVar(self._root)
-        self._animate.set(3) # Default speed = fast
+        self._animate.set(3)  # Default speed = fast
  
          # Are we currently animating?
          self._animating = 0
  
      def _init_chartview(self, parent):
-        self._cv = ChartView(self._chart, parent,
-                             draw_tree=1, draw_sentence=1)
-        self._cv.add_callback('select', self._click_cv_edge)
+        self._cv = ChartView(self._chart, parent, draw_tree=1, draw_sentence=1)
+        self._cv.add_callback("select", self._click_cv_edge)
  
      def _init_rulelabel(self, parent):
-        ruletxt = 'Last edge generated by:'
-
-        self._rulelabel1 = Label(parent,text=ruletxt,
-                                         font=self._boldfont)
-        self._rulelabel2 = Label(parent, width=40,
-                                         relief='groove', anchor='w',
-                                         font=self._boldfont)
-        self._rulelabel1.pack(side='left')
-        self._rulelabel2.pack(side='left')
-        step = Checkbutton(parent, variable=self._step,
-                                   text='Step')
-        step.pack(side='right')
+        ruletxt = "Last edge generated by:"
+
+        self._rulelabel1 = Label(parent, text=ruletxt, font=self._boldfont)
+        self._rulelabel2 = Label(
+            parent, width=40, relief="groove", anchor="w", font=self._boldfont
+        )
+        self._rulelabel1.pack(side="left")
+        self._rulelabel2.pack(side="left")
+        step = Checkbutton(parent, variable=self._step, text="Step")
+        step.pack(side="right")
  
      def _init_buttons(self, parent):
          frame1 = Frame(parent)
          frame2 = Frame(parent)
-        frame1.pack(side='bottom', fill='x')
-        frame2.pack(side='top', fill='none')
-
-        Button(frame1, text='Reset\nParser',
-                       background='#90c0d0', foreground='black',
-                       command=self.reset).pack(side='right')
+        frame1.pack(side="bottom", fill="x")
+        frame2.pack(side="top", fill="none")
+
+        Button(
+            frame1,
+            text="Reset\nParser",
+            background="#90c0d0",
+            foreground="black",
+            command=self.reset,
+        ).pack(side="right")
          # Button(frame1, text='Pause',
          #               background='#90c0d0', foreground='black',
          #               command=self.pause).pack(side='left')
  
-        Button(frame1, text='Top Down\nStrategy',
-                       background='#90c0d0', foreground='black',
-                       command=self.top_down_strategy).pack(side='left')
-        Button(frame1, text='Bottom Up\nStrategy',
-                       background='#90c0d0', foreground='black',
-                       command=self.bottom_up_strategy).pack(side='left')
-        Button(frame1, text='Bottom Up\nLeft-Corner Strategy',
-                       background='#90c0d0', foreground='black',
-                       command=self.bottom_up_leftcorner_strategy).pack(side='left')
-
-        Button(frame2, text='Top Down Init\nRule',
-                       background='#90f090', foreground='black',
-                       command=self.top_down_init).pack(side='left')
-        Button(frame2, text='Top Down Predict\nRule',
-                       background='#90f090', foreground='black',
-                       command=self.top_down_predict).pack(side='left')
-        Frame(frame2, width=20).pack(side='left')
-
-        Button(frame2, text='Bottom Up Predict\nRule',
-                       background='#90f090', foreground='black',
-                       command=self.bottom_up).pack(side='left')
-        Frame(frame2, width=20).pack(side='left')
-
-        Button(frame2, text='Bottom Up Left-Corner\nPredict Rule',
-                       background='#90f090', foreground='black',
-                       command=self.bottom_up_leftcorner).pack(side='left')
-        Frame(frame2, width=20).pack(side='left')
-
-        Button(frame2, text='Fundamental\nRule',
-                       background='#90f090', foreground='black',
-                       command=self.fundamental).pack(side='left')
+        Button(
+            frame1,
+            text="Top Down\nStrategy",
+            background="#90c0d0",
+            foreground="black",
+            command=self.top_down_strategy,
+        ).pack(side="left")
+        Button(
+            frame1,
+            text="Bottom Up\nStrategy",
+            background="#90c0d0",
+            foreground="black",
+            command=self.bottom_up_strategy,
+        ).pack(side="left")
+        Button(
+            frame1,
+            text="Bottom Up\nLeft-Corner Strategy",
+            background="#90c0d0",
+            foreground="black",
+            command=self.bottom_up_leftcorner_strategy,
+        ).pack(side="left")
+
+        Button(
+            frame2,
+            text="Top Down Init\nRule",
+            background="#90f090",
+            foreground="black",
+            command=self.top_down_init,
+        ).pack(side="left")
+        Button(
+            frame2,
+            text="Top Down Predict\nRule",
+            background="#90f090",
+            foreground="black",
+            command=self.top_down_predict,
+        ).pack(side="left")
+        Frame(frame2, width=20).pack(side="left")
+
+        Button(
+            frame2,
+            text="Bottom Up Predict\nRule",
+            background="#90f090",
+            foreground="black",
+            command=self.bottom_up,
+        ).pack(side="left")
+        Frame(frame2, width=20).pack(side="left")
+
+        Button(
+            frame2,
+            text="Bottom Up Left-Corner\nPredict Rule",
+            background="#90f090",
+            foreground="black",
+            command=self.bottom_up_leftcorner,
+        ).pack(side="left")
+        Frame(frame2, width=20).pack(side="left")
+
+        Button(
+            frame2,
+            text="Fundamental\nRule",
+            background="#90f090",
+            foreground="black",
+            command=self.fundamental,
+        ).pack(side="left")
  
      def _init_bindings(self):
-        self._root.bind('<Up>', self._cv.scroll_up)
-        self._root.bind('<Down>', self._cv.scroll_down)
-        self._root.bind('<Prior>', self._cv.page_up)
-        self._root.bind('<Next>', self._cv.page_down)
-        self._root.bind('<Control-q>', self.destroy)
-        self._root.bind('<Control-x>', self.destroy)
-        self._root.bind('<F1>', self.help)
-
-        self._root.bind('<Control-s>', self.save_chart)
-        self._root.bind('<Control-o>', self.load_chart)
-        self._root.bind('<Control-r>', self.reset)
-
-        self._root.bind('t', self.top_down_strategy)
-        self._root.bind('b', self.bottom_up_strategy)
-        self._root.bind('c', self.bottom_up_leftcorner_strategy)
-        self._root.bind('<space>', self._stop_animation)
-
-        self._root.bind('<Control-g>', self.edit_grammar)
-        self._root.bind('<Control-t>', self.edit_sentence)
+        self._root.bind("<Up>", self._cv.scroll_up)
+        self._root.bind("<Down>", self._cv.scroll_down)
+        self._root.bind("<Prior>", self._cv.page_up)
+        self._root.bind("<Next>", self._cv.page_down)
+        self._root.bind("<Control-q>", self.destroy)
+        self._root.bind("<Control-x>", self.destroy)
+        self._root.bind("<F1>", self.help)
+
+        self._root.bind("<Control-s>", self.save_chart)
+        self._root.bind("<Control-o>", self.load_chart)
+        self._root.bind("<Control-r>", self.reset)
+
+        self._root.bind("t", self.top_down_strategy)
+        self._root.bind("b", self.bottom_up_strategy)
+        self._root.bind("c", self.bottom_up_leftcorner_strategy)
+        self._root.bind("<space>", self._stop_animation)
+
+        self._root.bind("<Control-g>", self.edit_grammar)
+        self._root.bind("<Control-t>", self.edit_sentence)
  
          # Animation speed control
-        self._root.bind('-', lambda e,a=self._animate:a.set(1))
-        self._root.bind('=', lambda e,a=self._animate:a.set(2))
-        self._root.bind('+', lambda e,a=self._animate:a.set(3))
+        self._root.bind("-", lambda e, a=self._animate: a.set(1))
+        self._root.bind("=", lambda e, a=self._animate: a.set(2))
+        self._root.bind("+", lambda e, a=self._animate: a.set(3))
  
          # Step control
-        self._root.bind('s', lambda e,s=self._step:s.set(not s.get()))
+        self._root.bind("s", lambda e, s=self._step: s.set(not s.get()))
  
      def _init_menubar(self):
          menubar = Menu(self._root)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Save Chart', underline=0,
-                             command=self.save_chart, accelerator='Ctrl-s')
-        filemenu.add_command(label='Load Chart', underline=0,
-                             command=self.load_chart, accelerator='Ctrl-o')
-        filemenu.add_command(label='Reset Chart', underline=0,
-                             command=self.reset, accelerator='Ctrl-r')
+        filemenu.add_command(
+            label="Save Chart",
+            underline=0,
+            command=self.save_chart,
+            accelerator="Ctrl-s",
+        )
+        filemenu.add_command(
+            label="Load Chart",
+            underline=0,
+            command=self.load_chart,
+            accelerator="Ctrl-o",
+        )
+        filemenu.add_command(
+            label="Reset Chart", underline=0, command=self.reset, accelerator="Ctrl-r"
+        )
          filemenu.add_separator()
-        filemenu.add_command(label='Save Grammar',
-                             command=self.save_grammar)
-        filemenu.add_command(label='Load Grammar',
-                             command=self.load_grammar)
+        filemenu.add_command(label="Save Grammar", command=self.save_grammar)
+        filemenu.add_command(label="Load Grammar", command=self.load_grammar)
          filemenu.add_separator()
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          editmenu = Menu(menubar, tearoff=0)
-        editmenu.add_command(label='Edit Grammar', underline=5,
-                             command=self.edit_grammar,
-                             accelerator='Ctrl-g')
-        editmenu.add_command(label='Edit Text', underline=5,
-                             command=self.edit_sentence,
-                             accelerator='Ctrl-t')
-        menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
+        editmenu.add_command(
+            label="Edit Grammar",
+            underline=5,
+            command=self.edit_grammar,
+            accelerator="Ctrl-g",
+        )
+        editmenu.add_command(
+            label="Edit Text",
+            underline=5,
+            command=self.edit_sentence,
+            accelerator="Ctrl-t",
+        )
+        menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
  
          viewmenu = Menu(menubar, tearoff=0)
-        viewmenu.add_command(label='Chart Matrix', underline=6,
-                             command=self.view_matrix)
-        viewmenu.add_command(label='Results', underline=0,
-                             command=self.view_results)
-        menubar.add_cascade(label='View', underline=0, menu=viewmenu)
+        viewmenu.add_command(
+            label="Chart Matrix", underline=6, command=self.view_matrix
+        )
+        viewmenu.add_command(label="Results", underline=0, command=self.view_results)
+        menubar.add_cascade(label="View", underline=0, menu=viewmenu)
  
          rulemenu = Menu(menubar, tearoff=0)
-        rulemenu.add_command(label='Top Down Strategy', underline=0,
-                             command=self.top_down_strategy,
-                             accelerator='t')
-        rulemenu.add_command(label='Bottom Up Strategy', underline=0,
-                             command=self.bottom_up_strategy,
-                             accelerator='b')
-        rulemenu.add_command(label='Bottom Up Left-Corner Strategy', underline=0,
-                             command=self.bottom_up_leftcorner_strategy,
-                             accelerator='c')
+        rulemenu.add_command(
+            label="Top Down Strategy",
+            underline=0,
+            command=self.top_down_strategy,
+            accelerator="t",
+        )
+        rulemenu.add_command(
+            label="Bottom Up Strategy",
+            underline=0,
+            command=self.bottom_up_strategy,
+            accelerator="b",
+        )
+        rulemenu.add_command(
+            label="Bottom Up Left-Corner Strategy",
+            underline=0,
+            command=self.bottom_up_leftcorner_strategy,
+            accelerator="c",
+        )
          rulemenu.add_separator()
-        rulemenu.add_command(label='Bottom Up Rule',
-                             command=self.bottom_up)
-        rulemenu.add_command(label='Bottom Up Left-Corner Rule',
-                             command=self.bottom_up_leftcorner)
-        rulemenu.add_command(label='Top Down Init Rule',
-                             command=self.top_down_init)
-        rulemenu.add_command(label='Top Down Predict Rule',
-                             command=self.top_down_predict)
-        rulemenu.add_command(label='Fundamental Rule',
-                             command=self.fundamental)
-        menubar.add_cascade(label='Apply', underline=0, menu=rulemenu)
+        rulemenu.add_command(label="Bottom Up Rule", command=self.bottom_up)
+        rulemenu.add_command(
+            label="Bottom Up Left-Corner Rule", command=self.bottom_up_leftcorner
+        )
+        rulemenu.add_command(label="Top Down Init Rule", command=self.top_down_init)
+        rulemenu.add_command(
+            label="Top Down Predict Rule", command=self.top_down_predict
+        )
+        rulemenu.add_command(label="Fundamental Rule", command=self.fundamental)
+        menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
  
          animatemenu = Menu(menubar, tearoff=0)
-        animatemenu.add_checkbutton(label="Step", underline=0,
-                                    variable=self._step,
-                                    accelerator='s')
+        animatemenu.add_checkbutton(
+            label="Step", underline=0, variable=self._step, accelerator="s"
+        )
          animatemenu.add_separator()
-        animatemenu.add_radiobutton(label="No Animation", underline=0,
-                                    variable=self._animate, value=0)
-        animatemenu.add_radiobutton(label="Slow Animation", underline=0,
-                                    variable=self._animate, value=1,
-                                    accelerator='-')
-        animatemenu.add_radiobutton(label="Normal Animation", underline=0,
-                                    variable=self._animate, value=2,
-                                    accelerator='=')
-        animatemenu.add_radiobutton(label="Fast Animation", underline=0,
-                                    variable=self._animate, value=3,
-                                    accelerator='+')
+        animatemenu.add_radiobutton(
+            label="No Animation", underline=0, variable=self._animate, value=0
+        )
+        animatemenu.add_radiobutton(
+            label="Slow Animation",
+            underline=0,
+            variable=self._animate,
+            value=1,
+            accelerator="-",
+        )
+        animatemenu.add_radiobutton(
+            label="Normal Animation",
+            underline=0,
+            variable=self._animate,
+            value=2,
+            accelerator="=",
+        )
+        animatemenu.add_radiobutton(
+            label="Fast Animation",
+            underline=0,
+            variable=self._animate,
+            value=3,
+            accelerator="+",
+        )
          menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
  
          zoommenu = Menu(menubar, tearoff=0)
-        zoommenu.add_radiobutton(label='Tiny', variable=self._size,
-                                 underline=0, value=10, command=self.resize)
-        zoommenu.add_radiobutton(label='Small', variable=self._size,
-                                 underline=0, value=12, command=self.resize)
-        zoommenu.add_radiobutton(label='Medium', variable=self._size,
-                                 underline=0, value=14, command=self.resize)
-        zoommenu.add_radiobutton(label='Large', variable=self._size,
-                                 underline=0, value=18, command=self.resize)
-        zoommenu.add_radiobutton(label='Huge', variable=self._size,
-                                 underline=0, value=24, command=self.resize)
-        menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu)
+        zoommenu.add_radiobutton(
+            label="Tiny",
+            variable=self._size,
+            underline=0,
+            value=10,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Small",
+            variable=self._size,
+            underline=0,
+            value=12,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Medium",
+            variable=self._size,
+            underline=0,
+            value=14,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Large",
+            variable=self._size,
+            underline=0,
+            value=18,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Huge",
+            variable=self._size,
+            underline=0,
+            value=24,
+            command=self.resize,
+        )
+        menubar.add_cascade(label="Zoom", underline=0, menu=zoommenu)
  
          helpmenu = Menu(menubar, tearoff=0)
-        helpmenu.add_command(label='About', underline=0,
-                             command=self.about)
-        helpmenu.add_command(label='Instructions', underline=0,
-                             command=self.help, accelerator='F1')
-        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
+        helpmenu.add_command(label="About", underline=0, command=self.about)
+        helpmenu.add_command(
+            label="Instructions", underline=0, command=self.help, accelerator="F1"
+        )
+        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
  
          self._root.config(menu=menubar)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Selection Handling
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _click_cv_edge(self, edge):
          if edge != self._selection:
@@ -1939,11 +2193,13 @@ class ChartParserApp(object):
      def _select_edge(self, edge):
          self._selection = edge
          # Update the chart view.
-        self._cv.markonly_edge(edge, '#f00')
+        self._cv.markonly_edge(edge, "#f00")
          self._cv.draw_tree(edge)
          # Update the matrix view.
-        if self._matrix: self._matrix.markonly_edge(edge)
-        if self._matrix: self._matrix.view_edge(edge)
+        if self._matrix:
+            self._matrix.markonly_edge(edge)
+        if self._matrix:
+            self._matrix.view_edge(edge)
  
      def _deselect_edge(self):
          self._selection = None
@@ -1951,130 +2207,156 @@ class ChartParserApp(object):
          self._cv.unmark_edge()
          self._cv.erase_tree()
          # Update the matrix view
-        if self._matrix: self._matrix.unmark_edge()
+        if self._matrix:
+            self._matrix.unmark_edge()
  
      def _show_new_edge(self, edge):
          self._display_rule(self._cp.current_chartrule())
          # Update the chart view.
          self._cv.update()
          self._cv.draw_tree(edge)
-        self._cv.markonly_edge(edge, '#0df')
+        self._cv.markonly_edge(edge, "#0df")
          self._cv.view_edge(edge)
          # Update the matrix view.
-        if self._matrix: self._matrix.update()
-        if self._matrix: self._matrix.markonly_edge(edge)
-        if self._matrix: self._matrix.view_edge(edge)
+        if self._matrix:
+            self._matrix.update()
+        if self._matrix:
+            self._matrix.markonly_edge(edge)
+        if self._matrix:
+            self._matrix.view_edge(edge)
          # Update the results view.
-        if self._results: self._results.update(edge)
+        if self._results:
+            self._results.update(edge)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Help/usage
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def help(self, *e):
          self._animating = 0
          # The default font's not very legible; try using 'fixed' instead.
          try:
-            ShowText(self._root, 'Help: Chart Parser Application',
-                     (__doc__ or '').strip(), width=75, font='fixed')
+            ShowText(
+                self._root,
+                "Help: Chart Parser Application",
+                (__doc__ or "").strip(),
+                width=75,
+                font="fixed",
+            )
          except:
-            ShowText(self._root, 'Help: Chart Parser Application',
-                     (__doc__ or '').strip(), width=75)
+            ShowText(
+                self._root,
+                "Help: Chart Parser Application",
+                (__doc__ or "").strip(),
+                width=75,
+            )
  
      def about(self, *e):
-        ABOUT = ("NLTK Chart Parser Application\n"+
-                 "Written by Edward Loper")
-        showinfo('About: Chart Parser Application', ABOUT)
+        ABOUT = "NLTK Chart Parser Application\n" + "Written by Edward Loper"
+        showinfo("About: Chart Parser Application", ABOUT)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # File Menu
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
-    CHART_FILE_TYPES = [('Pickle file', '.pickle'),
-                        ('All files', '*')]
-    GRAMMAR_FILE_TYPES = [('Plaintext grammar file', '.cfg'),
-                          ('Pickle file', '.pickle'),
-                          ('All files', '*')]
+    CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")]
+    GRAMMAR_FILE_TYPES = [
+        ("Plaintext grammar file", ".cfg"),
+        ("Pickle file", ".pickle"),
+        ("All files", "*"),
+    ]
  
      def load_chart(self, *args):
          "Load a chart from a pickle file"
-        filename = askopenfilename(filetypes=self.CHART_FILE_TYPES,
-                                   defaultextension='.pickle')
-        if not filename: return
+        filename = askopenfilename(
+            filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
+        )
+        if not filename:
+            return
          try:
-            with open(filename, 'rb') as infile:
+            with open(filename, "rb") as infile:
                  chart = pickle.load(infile)
              self._chart = chart
              self._cv.update(chart)
-            if self._matrix: self._matrix.set_chart(chart)
-            if self._matrix: self._matrix.deselect_cell()
-            if self._results: self._results.set_chart(chart)
+            if self._matrix:
+                self._matrix.set_chart(chart)
+            if self._matrix:
+                self._matrix.deselect_cell()
+            if self._results:
+                self._results.set_chart(chart)
              self._cp.set_chart(chart)
          except Exception as e:
              raise
-            showerror('Error Loading Chart',
-                                   'Unable to open file: %r' % filename)
+            showerror("Error Loading Chart", "Unable to open file: %r" % filename)
  
      def save_chart(self, *args):
          "Save a chart to a pickle file"
-        filename = asksaveasfilename(filetypes=self.CHART_FILE_TYPES,
-                                     defaultextension='.pickle')
-        if not filename: return
+        filename = asksaveasfilename(
+            filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
+        )
+        if not filename:
+            return
          try:
-            with open(filename, 'wb') as outfile:
+            with open(filename, "wb") as outfile:
                  pickle.dump(self._chart, outfile)
          except Exception as e:
              raise
-            showerror('Error Saving Chart',
-                                   'Unable to open file: %r' % filename)
+            showerror("Error Saving Chart", "Unable to open file: %r" % filename)
  
      def load_grammar(self, *args):
          "Load a grammar from a pickle file"
-        filename = askopenfilename(filetypes=self.GRAMMAR_FILE_TYPES,
-                                   defaultextension='.cfg')
-        if not filename: return
+        filename = askopenfilename(
+            filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg"
+        )
+        if not filename:
+            return
          try:
-            if filename.endswith('.pickle'):
-                with open(filename, 'rb') as infile:
+            if filename.endswith(".pickle"):
+                with open(filename, "rb") as infile:
                      grammar = pickle.load(infile)
              else:
-                with open(filename, 'r') as infile:
+                with open(filename, "r") as infile:
                      grammar = CFG.fromstring(infile.read())
              self.set_grammar(grammar)
          except Exception as e:
-            showerror('Error Loading Grammar',
-                                   'Unable to open file: %r' % filename)
+            showerror("Error Loading Grammar", "Unable to open file: %r" % filename)
  
      def save_grammar(self, *args):
-        filename = asksaveasfilename(filetypes=self.GRAMMAR_FILE_TYPES,
-                                     defaultextension='.cfg')
-        if not filename: return
+        filename = asksaveasfilename(
+            filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg"
+        )
+        if not filename:
+            return
          try:
-            if filename.endswith('.pickle'):
-                with open(filename, 'wb') as outfile:
+            if filename.endswith(".pickle"):
+                with open(filename, "wb") as outfile:
                      pickle.dump((self._chart, self._tokens), outfile)
              else:
-                with open(filename, 'w') as outfile:
+                with open(filename, "w") as outfile:
                      prods = self._grammar.productions()
                      start = [p for p in prods if p.lhs() == self._grammar.start()]
                      rest = [p for p in prods if p.lhs() != self._grammar.start()]
-                    for prod in start: outfile.write('%s\n' % prod)
-                    for prod in rest: outfile.write('%s\n' % prod)
+                    for prod in start:
+                        outfile.write("%s\n" % prod)
+                    for prod in rest:
+                        outfile.write("%s\n" % prod)
          except Exception as e:
-            showerror('Error Saving Grammar',
-                                   'Unable to open file: %r' % filename)
+            showerror("Error Saving Grammar", "Unable to open file: %r" % filename)
  
      def reset(self, *args):
          self._animating = 0
          self._reset_parser()
          self._cv.update(self._chart)
-        if self._matrix: self._matrix.set_chart(self._chart)
-        if self._matrix: self._matrix.deselect_cell()
-        if self._results: self._results.set_chart(self._chart)
-
-    #////////////////////////////////////////////////////////////
+        if self._matrix:
+            self._matrix.set_chart(self._chart)
+        if self._matrix:
+            self._matrix.deselect_cell()
+        if self._results:
+            self._results.set_chart(self._chart)
+
+    # ////////////////////////////////////////////////////////////
      # Edit
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def edit_grammar(self, *e):
          CFGEditor(self._root, self._grammar, self.set_grammar)
@@ -2082,35 +2364,37 @@ class ChartParserApp(object):
      def set_grammar(self, grammar):
          self._grammar = grammar
          self._cp.set_grammar(grammar)
-        if self._results: self._results.set_grammar(grammar)
+        if self._results:
+            self._results.set_grammar(grammar)
  
      def edit_sentence(self, *e):
          sentence = " ".join(self._tokens)
-        title = 'Edit Text'
-        instr = 'Enter a new sentence to parse.'
+        title = "Edit Text"
+        instr = "Enter a new sentence to parse."
          EntryDialog(self._root, sentence, instr, self.set_sentence, title)
  
      def set_sentence(self, sentence):
          self._tokens = list(sentence.split())
          self.reset()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # View Menu
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def view_matrix(self, *e):
-        if self._matrix is not None: self._matrix.destroy()
+        if self._matrix is not None:
+            self._matrix.destroy()
          self._matrix = ChartMatrixView(self._root, self._chart)
-        self._matrix.add_callback('select', self._select_matrix_edge)
+        self._matrix.add_callback("select", self._select_matrix_edge)
  
      def view_results(self, *e):
-        if self._results is not None: self._results.destroy()
-        self._results = ChartResultsView(self._root, self._chart,
-                                         self._grammar)
+        if self._results is not None:
+            self._results.destroy()
+        self._results = ChartResultsView(self._root, self._chart, self._grammar)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Zoom Menu
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def resize(self):
          self._animating = 0
@@ -2125,9 +2409,9 @@ class ChartParserApp(object):
      def get_font_size(self):
          return abs(self._size.get())
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Parsing
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def apply_strategy(self, strategy, edge_strategy=None):
          # If we're animating, then stop.
@@ -2137,7 +2421,7 @@ class ChartParserApp(object):
  
          # Clear the rule display & mark.
          self._display_rule(None)
-        #self._cv.unmark_edge()
+        # self._cv.unmark_edge()
  
          if self._step.get():
              selection = self._selection
@@ -2161,16 +2445,20 @@ class ChartParserApp(object):
                  self._animate_strategy()
              else:
                  for edge in self._cpstep:
-                    if edge is None: break
+                    if edge is None:
+                        break
                  self._cv.update()
-                if self._matrix: self._matrix.update()
-                if self._results: self._results.update()
+                if self._matrix:
+                    self._matrix.update()
+                if self._results:
+                    self._results.update()
  
      def _stop_animation(self, *e):
          self._animating = 0
  
      def _animate_strategy(self, speed=1):
-        if self._animating == 0: return
+        if self._animating == 0:
+            return
          if self._apply_strategy() is not None:
              if self._animate.get() == 0 or self._step.get() == 1:
                  return
@@ -2190,48 +2478,57 @@ class ChartParserApp(object):
  
      def _display_rule(self, rule):
          if rule is None:
-            self._rulelabel2['text'] = ''
+            self._rulelabel2["text"] = ""
          else:
              name = str(rule)
-            self._rulelabel2['text'] = name
+            self._rulelabel2["text"] = name
              size = self._cv.get_font_size()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Parsing Strategies
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      # Basic rules:
-    _TD_INIT     = [TopDownInitRule()]
-    _TD_PREDICT  = [TopDownPredictRule()]
-    _BU_RULE     = [BottomUpPredictRule()]
-    _BU_LC_RULE  = [BottomUpPredictCombineRule()]
+    _TD_INIT = [TopDownInitRule()]
+    _TD_PREDICT = [TopDownPredictRule()]
+    _BU_RULE = [BottomUpPredictRule()]
+    _BU_LC_RULE = [BottomUpPredictCombineRule()]
      _FUNDAMENTAL = [SingleEdgeFundamentalRule()]
  
      # Complete strategies:
-    _TD_STRATEGY =  _TD_INIT + _TD_PREDICT + _FUNDAMENTAL
+    _TD_STRATEGY = _TD_INIT + _TD_PREDICT + _FUNDAMENTAL
      _BU_STRATEGY = _BU_RULE + _FUNDAMENTAL
      _BU_LC_STRATEGY = _BU_LC_RULE + _FUNDAMENTAL
  
      # Button callback functions:
      def top_down_init(self, *e):
          self.apply_strategy(self._TD_INIT, None)
+
      def top_down_predict(self, *e):
          self.apply_strategy(self._TD_PREDICT, TopDownPredictEdgeRule)
+
      def bottom_up(self, *e):
          self.apply_strategy(self._BU_RULE, BottomUpEdgeRule)
+
      def bottom_up_leftcorner(self, *e):
          self.apply_strategy(self._BU_LC_RULE, BottomUpLeftCornerEdgeRule)
+
      def fundamental(self, *e):
          self.apply_strategy(self._FUNDAMENTAL, FundamentalEdgeRule)
+
      def bottom_up_strategy(self, *e):
          self.apply_strategy(self._BU_STRATEGY, BottomUpEdgeRule)
+
      def bottom_up_leftcorner_strategy(self, *e):
          self.apply_strategy(self._BU_LC_STRATEGY, BottomUpLeftCornerEdgeRule)
+
      def top_down_strategy(self, *e):
          self.apply_strategy(self._TD_STRATEGY, TopDownPredictEdgeRule)
  
+
  def app():
-    grammar = CFG.fromstring("""
+    grammar = CFG.fromstring(
+        """
      # Grammatical productions.
          S -> NP VP
          VP -> VP PP | V NP | V
@@ -2243,34 +2540,36 @@ def app():
          N -> 'dog' | 'cookie' | 'table' | 'cake' | 'fork'
          V -> 'ate' | 'saw'
          P -> 'on' | 'under' | 'with'
-    """)
+    """
+    )
  
-    sent = 'John ate the cake on the table with a fork'
-    sent = 'John ate the cake on the table'
+    sent = "John ate the cake on the table with a fork"
+    sent = "John ate the cake on the table"
      tokens = list(sent.split())
  
-    print('grammar= (')
+    print("grammar= (")
      for rule in grammar.productions():
-        print(('    ', repr(rule)+','))
-    print(')')
-    print(('tokens = %r' % tokens))
+        print(("    ", repr(rule) + ","))
+    print(")")
+    print(("tokens = %r" % tokens))
      print('Calling "ChartParserApp(grammar, tokens)"...')
      ChartParserApp(grammar, tokens).mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
      # Chart comparer:
-    #charts = ['/tmp/earley.pickle',
+    # charts = ['/tmp/earley.pickle',
      #          '/tmp/topdown.pickle',
      #          '/tmp/bottomup.pickle']
-    #ChartComparer(*charts).mainloop()
+    # ChartComparer(*charts).mainloop()
  
-    #import profile
-    #profile.run('demo2()', '/tmp/profile.out')
-    #import pstats
-    #p = pstats.Stats('/tmp/profile.out')
-    #p.strip_dirs().sort_stats('time', 'cum').print_stats(60)
-    #p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
+    # import profile
+    # profile.run('demo2()', '/tmp/profile.out')
+    # import pstats
+    # p = pstats.Stats('/tmp/profile.out')
+    # p.strip_dirs().sort_stats('time', 'cum').print_stats(60)
+    # p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/chartparser_app.pyc b/nlp_resource_data/nltk/app/chartparser_app.pyc

deleted file mode 100755 (executable)

index f494ece..0000000

Binary files a/nlp_resource_data/nltk/app/chartparser_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/chunkparser_app.py b/nlp_resource_data/nltk/app/chunkparser_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 5e08421..699b7d7
--- a/nlp_resource_data/nltk/app/chunkparser_app.py
+++ b/nlp_resource_data/nltk/app/chunkparser_app.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Regexp Chunk Parser Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -15,16 +15,25 @@ parser ``nltk.chunk.RegexpChunkParser``.
  # configuration parameters to select what's being chunked (eg VP vs NP)
  # and what part of the data is being used as the development set.
  
-from __future__ import division
  import time
  import textwrap
  import re
  import random
  
-from six.moves.tkinter import (Button, Canvas, Checkbutton, Frame, IntVar,
-                               Label, Menu, Scrollbar, Text, Tk)
-from six.moves.tkinter_tkfiledialog import askopenfilename, asksaveasfilename
-from six.moves.tkinter_font import Font
+from tkinter import (
+    Button,
+    Canvas,
+    Checkbutton,
+    Frame,
+    IntVar,
+    Label,
+    Menu,
+    Scrollbar,
+    Text,
+    Tk,
+)
+from tkinter.filedialog import askopenfilename, asksaveasfilename
+from tkinter.font import Font
  
  from nltk.tree import Tree
  from nltk.util import in_idle
@@ -33,6 +42,7 @@ from nltk.corpus import conll2000, treebank_chunk
  from nltk.chunk import ChunkScore, RegexpChunkParser
  from nltk.chunk.regexp import RegexpChunkRule
  
+
  class RegexpChunkApp(object):
      """
      A graphical tool for exploring the regular expression based chunk
@@ -49,33 +59,52 @@ class RegexpChunkApp(object):
      #: which is used in the help text.  (This should probably live with
      #: the conll and/or treebank corpus instead.)
      TAGSET = {
-        'CC':   'Coordinating conjunction',   'PRP$': 'Possessive pronoun',
-        'CD':   'Cardinal number',            'RB':   'Adverb',
-        'DT':   'Determiner',                 'RBR':  'Adverb, comparative',
-        'EX':   'Existential there',          'RBS':  'Adverb, superlative',
-        'FW':   'Foreign word',               'RP':   'Particle',
-        'JJ':   'Adjective',                  'TO':   'to',
-        'JJR':  'Adjective, comparative',     'UH':   'Interjection',
-        'JJS':  'Adjective, superlative',     'VB':   'Verb, base form',
-        'LS':   'List item marker',           'VBD':  'Verb, past tense',
-        'MD':   'Modal',                      'NNS':  'Noun, plural',
-        'NN':   'Noun, singular or masps',    'VBN':  'Verb, past participle',
-        'VBZ':  'Verb,3rd ps. sing. present', 'NNP':  'Proper noun, singular',
-        'NNPS': 'Proper noun plural',         'WDT':  'wh-determiner',
-        'PDT':  'Predeterminer',              'WP':   'wh-pronoun',
-        'POS':  'Possessive ending',          'WP$':  'Possessive wh-pronoun',
-        'PRP':  'Personal pronoun',           'WRB':  'wh-adverb',
-        '(':    'open parenthesis',           ')':    'close parenthesis',
-        '``':   'open quote',                 ',':    'comma',
-        "''":   'close quote',                '.':    'period',
-        '#':    'pound sign (currency marker)',
-        '$':    'dollar sign (currency marker)',
-        'IN':   'Preposition/subord. conjunction',
-        'SYM':  'Symbol (mathematical or scientific)',
-        'VBG':  'Verb, gerund/present participle',
-        'VBP':  'Verb, non-3rd ps. sing. present',
-        ':':    'colon',
-        }
+        "CC": "Coordinating conjunction",
+        "PRP$": "Possessive pronoun",
+        "CD": "Cardinal number",
+        "RB": "Adverb",
+        "DT": "Determiner",
+        "RBR": "Adverb, comparative",
+        "EX": "Existential there",
+        "RBS": "Adverb, superlative",
+        "FW": "Foreign word",
+        "RP": "Particle",
+        "JJ": "Adjective",
+        "TO": "to",
+        "JJR": "Adjective, comparative",
+        "UH": "Interjection",
+        "JJS": "Adjective, superlative",
+        "VB": "Verb, base form",
+        "LS": "List item marker",
+        "VBD": "Verb, past tense",
+        "MD": "Modal",
+        "NNS": "Noun, plural",
+        "NN": "Noun, singular or masps",
+        "VBN": "Verb, past participle",
+        "VBZ": "Verb,3rd ps. sing. present",
+        "NNP": "Proper noun, singular",
+        "NNPS": "Proper noun plural",
+        "WDT": "wh-determiner",
+        "PDT": "Predeterminer",
+        "WP": "wh-pronoun",
+        "POS": "Possessive ending",
+        "WP$": "Possessive wh-pronoun",
+        "PRP": "Personal pronoun",
+        "WRB": "wh-adverb",
+        "(": "open parenthesis",
+        ")": "close parenthesis",
+        "``": "open quote",
+        ",": "comma",
+        "''": "close quote",
+        ".": "period",
+        "#": "pound sign (currency marker)",
+        "$": "dollar sign (currency marker)",
+        "IN": "Preposition/subord. conjunction",
+        "SYM": "Symbol (mathematical or scientific)",
+        "VBG": "Verb, gerund/present participle",
+        "VBP": "Verb, non-3rd ps. sing. present",
+        ":": "colon",
+    }
  
      #: Contents for the help box.  This is a list of tuples, one for
      #: each help page, where each tuple has four elements:
@@ -85,111 +114,120 @@ class RegexpChunkApp(object):
      #:     like <red>...</red> to colorize the text; see ``HELP_AUTOTAG``
      #:     for a list of tags you can use for colorizing.
      HELP = [
-        ('Help', '20',
-         "Welcome to the regular expression chunk-parser grammar editor.  "
-         "You can use this editor to develop and test chunk parser grammars "
-         "based on NLTK's RegexpChunkParser class.\n\n"
-         # Help box.
-         "Use this box ('Help') to learn more about the editor; click on the "
-         "tabs for help on specific topics:"
-         "<indent>\n"
-         "Rules: grammar rule types\n"
-         "Regexps: regular expression syntax\n"
-         "Tags: part of speech tags\n</indent>\n"
-         # Grammar.
-         "Use the upper-left box ('Grammar') to edit your grammar.  "
-         "Each line of your grammar specifies a single 'rule', "
-         "which performs an action such as creating a chunk or merging "
-         "two chunks.\n\n"
-         # Dev set.
-         "The lower-left box ('Development Set') runs your grammar on the "
-         "development set, and displays the results.  "
-         "Your grammar's chunks are <highlight>highlighted</highlight>, and "
-         "the correct (gold standard) chunks are "
-         "<underline>underlined</underline>.  If they "
-         "match, they are displayed in <green>green</green>; otherwise, "
-         "they are displayed in <red>red</red>.  The box displays a single "
-         "sentence from the development set at a time; use the scrollbar or "
-         "the next/previous buttons view additional sentences.\n\n"
-         # Performance
-         "The lower-right box ('Evaluation') tracks the performance of "
-         "your grammar on the development set.  The 'precision' axis "
-         "indicates how many of your grammar's chunks are correct; and "
-         "the 'recall' axis indicates how many of the gold standard "
-         "chunks your system generated.  Typically, you should try to "
-         "design a grammar that scores high on both metrics.  The "
-         "exact precision and recall of the current grammar, as well "
-         "as their harmonic mean (the 'f-score'), are displayed in "
-         "the status bar at the bottom of the window."
-         ),
-        ('Rules', '10',
-         "<h1>{...regexp...}</h1>"
-         "<indent>\nChunk rule: creates new chunks from words matching "
-         "regexp.</indent>\n\n"
-         "<h1>}...regexp...{</h1>"
-         "<indent>\nChink rule: removes words matching regexp from existing "
-         "chunks.</indent>\n\n"
-         "<h1>...regexp1...}{...regexp2...</h1>"
-         "<indent>\nSplit rule: splits chunks that match regexp1 followed by "
-         "regexp2 in two.</indent>\n\n"
-         "<h1>...regexp...{}...regexp...</h1>"
-         "<indent>\nMerge rule: joins consecutive chunks that match regexp1 "
-         "and regexp2</indent>\n"
-         ),
-        ('Regexps', '10 60',
-         #"Regular Expression Syntax Summary:\n\n"
-         "<h1>Pattern\t\tMatches...</h1>\n"
-         "<hangindent>"
-         "\t<<var>T</var>>\ta word with tag <var>T</var> "
-         "(where <var>T</var> may be a regexp).\n"
-         "\t<var>x</var>?\tan optional <var>x</var>\n"
-         "\t<var>x</var>+\ta sequence of 1 or more <var>x</var>'s\n"
-         "\t<var>x</var>*\ta sequence of 0 or more <var>x</var>'s\n"
-         "\t<var>x</var>|<var>y</var>\t<var>x</var> or <var>y</var>\n"
-         "\t.\tmatches any character\n"
-         "\t(<var>x</var>)\tTreats <var>x</var> as a group\n"
-         "\t# <var>x...</var>\tTreats <var>x...</var> "
-         "(to the end of the line) as a comment\n"
-         "\t\\<var>C</var>\tmatches character <var>C</var> "
-         "(useful when <var>C</var> is a special character "
-         "like + or #)\n"
-         "</hangindent>"
-         "\n<h1>Examples:</h1>\n"
-         "<hangindent>"
-         '\t<regexp><NN></regexp>\n'
-         '\t\tMatches <match>"cow/NN"</match>\n'
-         '\t\tMatches <match>"green/NN"</match>\n'
-         '\t<regexp><VB.*></regexp>\n'
-         '\t\tMatches <match>"eating/VBG"</match>\n'
-         '\t\tMatches <match>"ate/VBD"</match>\n'
-         '\t<regexp><IN><DT><NN></regexp>\n'
-         '\t\tMatches <match>"on/IN the/DT car/NN"</match>\n'
-         '\t<regexp><RB>?<VBD></regexp>\n'
-         '\t\tMatches <match>"ran/VBD"</match>\n'
-         '\t\tMatches <match>"slowly/RB ate/VBD"</match>\n'
-         '\t<regexp><\#><CD> # This is a comment...</regexp>\n'
-         '\t\tMatches <match>"#/# 100/CD"</match>\n'
-         "</hangindent>"
-         ),
-        ('Tags', '10 60',
-         "<h1>Part of Speech Tags:</h1>\n" +
-         '<hangindent>' +
-         '<<TAGSET>>' + # this gets auto-substituted w/ self.TAGSET
-         '</hangindent>\n')
-        ]
+        (
+            "Help",
+            "20",
+            "Welcome to the regular expression chunk-parser grammar editor.  "
+            "You can use this editor to develop and test chunk parser grammars "
+            "based on NLTK's RegexpChunkParser class.\n\n"
+            # Help box.
+            "Use this box ('Help') to learn more about the editor; click on the "
+            "tabs for help on specific topics:"
+            "<indent>\n"
+            "Rules: grammar rule types\n"
+            "Regexps: regular expression syntax\n"
+            "Tags: part of speech tags\n</indent>\n"
+            # Grammar.
+            "Use the upper-left box ('Grammar') to edit your grammar.  "
+            "Each line of your grammar specifies a single 'rule', "
+            "which performs an action such as creating a chunk or merging "
+            "two chunks.\n\n"
+            # Dev set.
+            "The lower-left box ('Development Set') runs your grammar on the "
+            "development set, and displays the results.  "
+            "Your grammar's chunks are <highlight>highlighted</highlight>, and "
+            "the correct (gold standard) chunks are "
+            "<underline>underlined</underline>.  If they "
+            "match, they are displayed in <green>green</green>; otherwise, "
+            "they are displayed in <red>red</red>.  The box displays a single "
+            "sentence from the development set at a time; use the scrollbar or "
+            "the next/previous buttons view additional sentences.\n\n"
+            # Performance
+            "The lower-right box ('Evaluation') tracks the performance of "
+            "your grammar on the development set.  The 'precision' axis "
+            "indicates how many of your grammar's chunks are correct; and "
+            "the 'recall' axis indicates how many of the gold standard "
+            "chunks your system generated.  Typically, you should try to "
+            "design a grammar that scores high on both metrics.  The "
+            "exact precision and recall of the current grammar, as well "
+            "as their harmonic mean (the 'f-score'), are displayed in "
+            "the status bar at the bottom of the window.",
+        ),
+        (
+            "Rules",
+            "10",
+            "<h1>{...regexp...}</h1>"
+            "<indent>\nChunk rule: creates new chunks from words matching "
+            "regexp.</indent>\n\n"
+            "<h1>}...regexp...{</h1>"
+            "<indent>\nChink rule: removes words matching regexp from existing "
+            "chunks.</indent>\n\n"
+            "<h1>...regexp1...}{...regexp2...</h1>"
+            "<indent>\nSplit rule: splits chunks that match regexp1 followed by "
+            "regexp2 in two.</indent>\n\n"
+            "<h1>...regexp...{}...regexp...</h1>"
+            "<indent>\nMerge rule: joins consecutive chunks that match regexp1 "
+            "and regexp2</indent>\n",
+        ),
+        (
+            "Regexps",
+            "10 60",
+            # "Regular Expression Syntax Summary:\n\n"
+            "<h1>Pattern\t\tMatches...</h1>\n"
+            "<hangindent>"
+            "\t<<var>T</var>>\ta word with tag <var>T</var> "
+            "(where <var>T</var> may be a regexp).\n"
+            "\t<var>x</var>?\tan optional <var>x</var>\n"
+            "\t<var>x</var>+\ta sequence of 1 or more <var>x</var>'s\n"
+            "\t<var>x</var>*\ta sequence of 0 or more <var>x</var>'s\n"
+            "\t<var>x</var>|<var>y</var>\t<var>x</var> or <var>y</var>\n"
+            "\t.\tmatches any character\n"
+            "\t(<var>x</var>)\tTreats <var>x</var> as a group\n"
+            "\t# <var>x...</var>\tTreats <var>x...</var> "
+            "(to the end of the line) as a comment\n"
+            "\t\\<var>C</var>\tmatches character <var>C</var> "
+            "(useful when <var>C</var> is a special character "
+            "like + or #)\n"
+            "</hangindent>"
+            "\n<h1>Examples:</h1>\n"
+            "<hangindent>"
+            "\t<regexp><NN></regexp>\n"
+            '\t\tMatches <match>"cow/NN"</match>\n'
+            '\t\tMatches <match>"green/NN"</match>\n'
+            "\t<regexp><VB.*></regexp>\n"
+            '\t\tMatches <match>"eating/VBG"</match>\n'
+            '\t\tMatches <match>"ate/VBD"</match>\n'
+            "\t<regexp><IN><DT><NN></regexp>\n"
+            '\t\tMatches <match>"on/IN the/DT car/NN"</match>\n'
+            "\t<regexp><RB>?<VBD></regexp>\n"
+            '\t\tMatches <match>"ran/VBD"</match>\n'
+            '\t\tMatches <match>"slowly/RB ate/VBD"</match>\n'
+            "\t<regexp><\#><CD> # This is a comment...</regexp>\n"
+            '\t\tMatches <match>"#/# 100/CD"</match>\n'
+            "</hangindent>",
+        ),
+        (
+            "Tags",
+            "10 60",
+            "<h1>Part of Speech Tags:</h1>\n"
+            + "<hangindent>"
+            + "<<TAGSET>>"
+            + "</hangindent>\n",  # this gets auto-substituted w/ self.TAGSET
+        ),
+    ]
  
      HELP_AUTOTAG = [
-        ('red', dict(foreground='#a00')),
-        ('green', dict(foreground='#080')),
-        ('highlight', dict(background='#ddd')),
-        ('underline', dict(underline=True)),
-        ('h1', dict(underline=True)),
-        ('indent', dict(lmargin1=20, lmargin2=20)),
-        ('hangindent', dict(lmargin1=0, lmargin2=60)),
-        ('var', dict(foreground='#88f')),
-        ('regexp', dict(foreground='#ba7')),
-        ('match', dict(foreground='#6a6')),
-        ]
+        ("red", dict(foreground="#a00")),
+        ("green", dict(foreground="#080")),
+        ("highlight", dict(background="#ddd")),
+        ("underline", dict(underline=True)),
+        ("h1", dict(underline=True)),
+        ("indent", dict(lmargin1=20, lmargin2=20)),
+        ("hangindent", dict(lmargin1=0, lmargin2=60)),
+        ("var", dict(foreground="#88f")),
+        ("regexp", dict(foreground="#ba7")),
+        ("match", dict(foreground="#6a6")),
+    ]
  
      ##/////////////////////////////////////////////////////////////////
      ##  Config Parmeters
@@ -205,59 +243,88 @@ class RegexpChunkApp(object):
         demon each time it runs."""
      _EVAL_FREQ = 0.2
      """The frequency (in seconds) at which the eval demon is run"""
-    _EVAL_DEMON_MIN = .02
+    _EVAL_DEMON_MIN = 0.02
      """The minimum amount of time that the eval demon should take each time
         it runs -- if it takes less than this time, _EVAL_CHUNK will be
         modified upwards."""
-    _EVAL_DEMON_MAX = .04
+    _EVAL_DEMON_MAX = 0.04
      """The maximum amount of time that the eval demon should take each time
         it runs -- if it takes more than this time, _EVAL_CHUNK will be
         modified downwards."""
  
      _GRAMMARBOX_PARAMS = dict(
-        width=40, height=12, background='#efe', highlightbackground='#efe',
-        highlightthickness=1, relief='groove', border=2, wrap='word')
+        width=40,
+        height=12,
+        background="#efe",
+        highlightbackground="#efe",
+        highlightthickness=1,
+        relief="groove",
+        border=2,
+        wrap="word",
+    )
      _HELPBOX_PARAMS = dict(
-        width=15, height=15, background='#efe', highlightbackground='#efe',
-        foreground='#555',
-        highlightthickness=1, relief='groove', border=2, wrap='word')
+        width=15,
+        height=15,
+        background="#efe",
+        highlightbackground="#efe",
+        foreground="#555",
+        highlightthickness=1,
+        relief="groove",
+        border=2,
+        wrap="word",
+    )
      _DEVSETBOX_PARAMS = dict(
-        width=70, height=10, background='#eef', highlightbackground='#eef',
-        highlightthickness=1, relief='groove', border=2, wrap='word',
-        tabs=(30,))
-    _STATUS_PARAMS = dict(
-        background='#9bb', relief='groove', border=2)
-    _FONT_PARAMS = dict(
-        family='helvetica', size=-20)
-    _FRAME_PARAMS = dict(
-        background='#777', padx=2, pady=2, border=3)
+        width=70,
+        height=10,
+        background="#eef",
+        highlightbackground="#eef",
+        highlightthickness=1,
+        relief="groove",
+        border=2,
+        wrap="word",
+        tabs=(30,),
+    )
+    _STATUS_PARAMS = dict(background="#9bb", relief="groove", border=2)
+    _FONT_PARAMS = dict(family="helvetica", size=-20)
+    _FRAME_PARAMS = dict(background="#777", padx=2, pady=2, border=3)
      _EVALBOX_PARAMS = dict(
-        background='#eef', highlightbackground='#eef',
-        highlightthickness=1, relief='groove', border=2,
-        width=300, height=280)
+        background="#eef",
+        highlightbackground="#eef",
+        highlightthickness=1,
+        relief="groove",
+        border=2,
+        width=300,
+        height=280,
+    )
      _BUTTON_PARAMS = dict(
-        background='#777', activebackground='#777',
-        highlightbackground='#777')
-    _HELPTAB_BG_COLOR = '#aba'
-    _HELPTAB_FG_COLOR = '#efe'
+        background="#777", activebackground="#777", highlightbackground="#777"
+    )
+    _HELPTAB_BG_COLOR = "#aba"
+    _HELPTAB_FG_COLOR = "#efe"
  
-    _HELPTAB_FG_PARAMS = dict(background='#efe')
-    _HELPTAB_BG_PARAMS = dict(background='#aba')
+    _HELPTAB_FG_PARAMS = dict(background="#efe")
+    _HELPTAB_BG_PARAMS = dict(background="#aba")
      _HELPTAB_SPACER = 6
  
      def normalize_grammar(self, grammar):
          # Strip comments
-        grammar = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', grammar)
+        grammar = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", grammar)
          # Normalize whitespace
-        grammar = re.sub(' +', ' ', grammar)
-        grammar = re.sub('\n\s+', '\n', grammar)
+        grammar = re.sub(" +", " ", grammar)
+        grammar = re.sub("\n\s+", "\n", grammar)
          grammar = grammar.strip()
          # [xx] Hack: automatically backslash $!
-        grammar = re.sub(r'([^\\])\$', r'\1\\$', grammar)
+        grammar = re.sub(r"([^\\])\$", r"\1\\$", grammar)
          return grammar
  
-    def __init__(self, devset_name='conll2000', devset=None,
-                 grammar = '', chunk_label='NP', tagset=None):
+    def __init__(
+        self,
+        devset_name="conll2000",
+        devset=None,
+        grammar="",
+        chunk_label="NP",
+        tagset=None,
+    ):
          """
          :param devset_name: The name of the development set; used for
              display & for save files.  If either the name 'treebank'
@@ -270,17 +337,18 @@ class RegexpChunkApp(object):
          """
          self._chunk_label = chunk_label
  
-        if tagset is None: tagset = self.TAGSET
+        if tagset is None:
+            tagset = self.TAGSET
          self.tagset = tagset
  
          # Named development sets:
          if devset is None:
-            if devset_name == 'conll2000':
-                devset = conll2000.chunked_sents('train.txt')#[:100]
-            elif devset == 'treebank':
-                devset = treebank_chunk.chunked_sents()#[:100]
+            if devset_name == "conll2000":
+                devset = conll2000.chunked_sents("train.txt")  # [:100]
+            elif devset == "treebank":
+                devset = treebank_chunk.chunked_sents()  # [:100]
              else:
-                raise ValueError('Unknown development set %s' % devset_name)
+                raise ValueError("Unknown development set %s" % devset_name)
  
          self.chunker = None
          """The chunker built from the grammar string"""
@@ -332,9 +400,9 @@ class RegexpChunkApp(object):
  
          # Set up the main window.
          top = self.top = Tk()
-        top.geometry('+50+50')
-        top.title('Regexp Chunk Parser App')
-        top.bind('<Control-q>', self.destroy)
+        top.geometry("+50+50")
+        top.title("Regexp Chunk Parser App")
+        top.bind("<Control-q>", self.destroy)
  
          # Varaible that restricts how much of the devset we look at.
          self._devset_size = IntVar(top)
@@ -347,92 +415,133 @@ class RegexpChunkApp(object):
          self._init_menubar(top)
          self.grammarbox.focus()
  
-
          # If a grammar was given, then display it.
          if grammar:
-            self.grammarbox.insert('end', grammar+'\n')
-            self.grammarbox.mark_set('insert', '1.0')
+            self.grammarbox.insert("end", grammar + "\n")
+            self.grammarbox.mark_set("insert", "1.0")
  
          # Display the first item in the development set
          self.show_devset(0)
          self.update()
  
      def _init_bindings(self, top):
-        top.bind('<Control-n>', self._devset_next)
-        top.bind('<Control-p>', self._devset_prev)
-        top.bind('<Control-t>', self.toggle_show_trace)
-        top.bind('<KeyPress>', self.update)
-        top.bind('<Control-s>', lambda e: self.save_grammar())
-        top.bind('<Control-o>', lambda e: self.load_grammar())
-        self.grammarbox.bind('<Control-t>', self.toggle_show_trace)
-        self.grammarbox.bind('<Control-n>', self._devset_next)
-        self.grammarbox.bind('<Control-p>', self._devset_prev)
+        top.bind("<Control-n>", self._devset_next)
+        top.bind("<Control-p>", self._devset_prev)
+        top.bind("<Control-t>", self.toggle_show_trace)
+        top.bind("<KeyPress>", self.update)
+        top.bind("<Control-s>", lambda e: self.save_grammar())
+        top.bind("<Control-o>", lambda e: self.load_grammar())
+        self.grammarbox.bind("<Control-t>", self.toggle_show_trace)
+        self.grammarbox.bind("<Control-n>", self._devset_next)
+        self.grammarbox.bind("<Control-p>", self._devset_prev)
  
          # Redraw the eval graph when the window size changes
-        self.evalbox.bind('<Configure>', self._eval_plot)
+        self.evalbox.bind("<Configure>", self._eval_plot)
  
      def _init_fonts(self, top):
          # TWhat's our font size (default=same as sysfont)
          self._size = IntVar(top)
          self._size.set(20)
-        self._font = Font(family='helvetica',
-                                 size=-self._size.get())
-        self._smallfont = Font(family='helvetica',
-                                      size=-(int(self._size.get()*14//20)))
+        self._font = Font(family="helvetica", size=-self._size.get())
+        self._smallfont = Font(
+            family="helvetica", size=-(int(self._size.get() * 14 // 20))
+        )
  
      def _init_menubar(self, parent):
          menubar = Menu(parent)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Reset Application', underline=0,
-                             command=self.reset)
-        filemenu.add_command(label='Save Current Grammar', underline=0,
-                             accelerator='Ctrl-s',
-                             command=self.save_grammar)
-        filemenu.add_command(label='Load Grammar', underline=0,
-                             accelerator='Ctrl-o',
-                             command=self.load_grammar)
-
-        filemenu.add_command(label='Save Grammar History', underline=13,
-                             command=self.save_history)
-
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-q')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(label="Reset Application", underline=0, command=self.reset)
+        filemenu.add_command(
+            label="Save Current Grammar",
+            underline=0,
+            accelerator="Ctrl-s",
+            command=self.save_grammar,
+        )
+        filemenu.add_command(
+            label="Load Grammar",
+            underline=0,
+            accelerator="Ctrl-o",
+            command=self.load_grammar,
+        )
+
+        filemenu.add_command(
+            label="Save Grammar History", underline=13, command=self.save_history
+        )
+
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          viewmenu = Menu(menubar, tearoff=0)
-        viewmenu.add_radiobutton(label='Tiny', variable=self._size,
-                                 underline=0, value=10, command=self.resize)
-        viewmenu.add_radiobutton(label='Small', variable=self._size,
-                                 underline=0, value=16, command=self.resize)
-        viewmenu.add_radiobutton(label='Medium', variable=self._size,
-                                 underline=0, value=20, command=self.resize)
-        viewmenu.add_radiobutton(label='Large', variable=self._size,
-                                 underline=0, value=24, command=self.resize)
-        viewmenu.add_radiobutton(label='Huge', variable=self._size,
-                                 underline=0, value=34, command=self.resize)
-        menubar.add_cascade(label='View', underline=0, menu=viewmenu)
+        viewmenu.add_radiobutton(
+            label="Tiny",
+            variable=self._size,
+            underline=0,
+            value=10,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Small",
+            variable=self._size,
+            underline=0,
+            value=16,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Medium",
+            variable=self._size,
+            underline=0,
+            value=20,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Large",
+            variable=self._size,
+            underline=0,
+            value=24,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Huge",
+            variable=self._size,
+            underline=0,
+            value=34,
+            command=self.resize,
+        )
+        menubar.add_cascade(label="View", underline=0, menu=viewmenu)
  
          devsetmenu = Menu(menubar, tearoff=0)
-        devsetmenu.add_radiobutton(label='50 sentences',
-                                   variable=self._devset_size,
-                                   value=50, command=self.set_devset_size)
-        devsetmenu.add_radiobutton(label='100 sentences',
-                                   variable=self._devset_size,
-                                   value=100, command=self.set_devset_size)
-        devsetmenu.add_radiobutton(label='200 sentences',
-                                   variable=self._devset_size,
-                                   value=200, command=self.set_devset_size)
-        devsetmenu.add_radiobutton(label='500 sentences',
-                                   variable=self._devset_size,
-                                   value=500, command=self.set_devset_size)
-        menubar.add_cascade(label='Development-Set', underline=0,
-                            menu=devsetmenu)
+        devsetmenu.add_radiobutton(
+            label="50 sentences",
+            variable=self._devset_size,
+            value=50,
+            command=self.set_devset_size,
+        )
+        devsetmenu.add_radiobutton(
+            label="100 sentences",
+            variable=self._devset_size,
+            value=100,
+            command=self.set_devset_size,
+        )
+        devsetmenu.add_radiobutton(
+            label="200 sentences",
+            variable=self._devset_size,
+            value=200,
+            command=self.set_devset_size,
+        )
+        devsetmenu.add_radiobutton(
+            label="500 sentences",
+            variable=self._devset_size,
+            value=500,
+            command=self.set_devset_size,
+        )
+        menubar.add_cascade(label="Development-Set", underline=0, menu=devsetmenu)
  
          helpmenu = Menu(menubar, tearoff=0)
-        helpmenu.add_command(label='About', underline=0,
-                             command=self.about)
-        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
+        helpmenu.add_command(label="About", underline=0, command=self.about)
+        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
  
          parent.config(menu=menubar)
  
@@ -441,117 +550,151 @@ class RegexpChunkApp(object):
              self.show_devset()
          else:
              self.show_trace()
-        return 'break'
-
+        return "break"
  
-    _SCALE_N = 5 # center on the last 5 examples.
+    _SCALE_N = 5  # center on the last 5 examples.
      _DRAW_LINES = False
+
      def _eval_plot(self, *e, **config):
-        width = config.get('width', self.evalbox.winfo_width())
-        height = config.get('height', self.evalbox.winfo_height())
+        width = config.get("width", self.evalbox.winfo_width())
+        height = config.get("height", self.evalbox.winfo_height())
  
          # Clear the canvas
-        self.evalbox.delete('all')
+        self.evalbox.delete("all")
  
          # Draw the precision & recall labels.
-        tag = self.evalbox.create_text(10, height//2-10, justify='left',
-                                 anchor='w', text='Precision')
-        left, right = self.evalbox.bbox(tag)[2] + 5, width-10
-        tag = self.evalbox.create_text(left + (width-left)//2, height-10,
-                                anchor='s', text='Recall', justify='center')
-        top, bot = 10, self.evalbox.bbox(tag)[1]-10
+        tag = self.evalbox.create_text(
+            10, height // 2 - 10, justify="left", anchor="w", text="Precision"
+        )
+        left, right = self.evalbox.bbox(tag)[2] + 5, width - 10
+        tag = self.evalbox.create_text(
+            left + (width - left) // 2,
+            height - 10,
+            anchor="s",
+            text="Recall",
+            justify="center",
+        )
+        top, bot = 10, self.evalbox.bbox(tag)[1] - 10
  
          # Draw masks for clipping the plot.
-        bg = self._EVALBOX_PARAMS['background']
-        self.evalbox.lower(self.evalbox.create_rectangle(0, 0, left-1, 5000,
-                                                         fill=bg, outline=bg))
-        self.evalbox.lower(self.evalbox.create_rectangle(0, bot+1, 5000, 5000,
-                                                         fill=bg, outline=bg))
+        bg = self._EVALBOX_PARAMS["background"]
+        self.evalbox.lower(
+            self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg)
+        )
+        self.evalbox.lower(
+            self.evalbox.create_rectangle(0, bot + 1, 5000, 5000, fill=bg, outline=bg)
+        )
  
          # Calculate the plot's scale.
          if self._autoscale.get() and len(self._history) > 1:
              max_precision = max_recall = 0
              min_precision = min_recall = 1
-            for i in range(1, min(len(self._history), self._SCALE_N+1)):
+            for i in range(1, min(len(self._history), self._SCALE_N + 1)):
                  grammar, precision, recall, fmeasure = self._history[-i]
                  min_precision = min(precision, min_precision)
                  min_recall = min(recall, min_recall)
                  max_precision = max(precision, max_precision)
                  max_recall = max(recall, max_recall)
-#             if max_precision-min_precision > max_recall-min_recall:
-#                 min_recall -= (max_precision-min_precision)/2
-#                 max_recall += (max_precision-min_precision)/2
-#             else:
-#                 min_precision -= (max_recall-min_recall)/2
-#                 max_precision += (max_recall-min_recall)/2
-#             if min_recall < 0:
-#                 max_recall -= min_recall
-#                 min_recall = 0
-#             if min_precision < 0:
-#                 max_precision -= min_precision
-#                 min_precision = 0
-            min_precision = max(min_precision-.01, 0)
-            min_recall = max(min_recall-.01, 0)
-            max_precision = min(max_precision+.01, 1)
-            max_recall = min(max_recall+.01, 1)
+            #             if max_precision-min_precision > max_recall-min_recall:
+            #                 min_recall -= (max_precision-min_precision)/2
+            #                 max_recall += (max_precision-min_precision)/2
+            #             else:
+            #                 min_precision -= (max_recall-min_recall)/2
+            #                 max_precision += (max_recall-min_recall)/2
+            #             if min_recall < 0:
+            #                 max_recall -= min_recall
+            #                 min_recall = 0
+            #             if min_precision < 0:
+            #                 max_precision -= min_precision
+            #                 min_precision = 0
+            min_precision = max(min_precision - 0.01, 0)
+            min_recall = max(min_recall - 0.01, 0)
+            max_precision = min(max_precision + 0.01, 1)
+            max_recall = min(max_recall + 0.01, 1)
          else:
              min_precision = min_recall = 0
              max_precision = max_recall = 1
  
          # Draw the axis lines & grid lines
          for i in range(11):
-            x = left + (right-left)*((i/10.-min_recall)/
-                                     (max_recall-min_recall))
-            y = bot - (bot-top)*((i/10.-min_precision)/
-                                 (max_precision-min_precision))
+            x = left + (right - left) * (
+                (i / 10.0 - min_recall) / (max_recall - min_recall)
+            )
+            y = bot - (bot - top) * (
+                (i / 10.0 - min_precision) / (max_precision - min_precision)
+            )
              if left < x < right:
-                self.evalbox.create_line(x, top, x, bot, fill='#888')
+                self.evalbox.create_line(x, top, x, bot, fill="#888")
              if top < y < bot:
-                self.evalbox.create_line(left, y, right, y, fill='#888')
+                self.evalbox.create_line(left, y, right, y, fill="#888")
          self.evalbox.create_line(left, top, left, bot)
          self.evalbox.create_line(left, bot, right, bot)
  
          # Display the plot's scale
          self.evalbox.create_text(
-            left-3, bot, justify='right', anchor='se',
-            text='%d%%' % (100*min_precision))
+            left - 3,
+            bot,
+            justify="right",
+            anchor="se",
+            text="%d%%" % (100 * min_precision),
+        )
          self.evalbox.create_text(
-            left-3, top, justify='right', anchor='ne',
-            text='%d%%' % (100*max_precision))
+            left - 3,
+            top,
+            justify="right",
+            anchor="ne",
+            text="%d%%" % (100 * max_precision),
+        )
          self.evalbox.create_text(
-            left, bot+3, justify='center', anchor='nw',
-            text='%d%%' % (100*min_recall))
+            left,
+            bot + 3,
+            justify="center",
+            anchor="nw",
+            text="%d%%" % (100 * min_recall),
+        )
          self.evalbox.create_text(
-            right, bot+3, justify='center', anchor='ne',
-            text='%d%%' % (100*max_recall))
+            right,
+            bot + 3,
+            justify="center",
+            anchor="ne",
+            text="%d%%" % (100 * max_recall),
+        )
  
          # Display the scores.
          prev_x = prev_y = None
          for i, (_, precision, recall, fscore) in enumerate(self._history):
-            x = left + (right-left) * ((recall-min_recall) /
-                                (max_recall-min_recall))
-            y = bot - (bot-top) * ((precision-min_precision) /
-                                (max_precision-min_precision))
+            x = left + (right - left) * (
+                (recall - min_recall) / (max_recall - min_recall)
+            )
+            y = bot - (bot - top) * (
+                (precision - min_precision) / (max_precision - min_precision)
+            )
              if i == self._history_index:
-                self.evalbox.create_oval(x-2,y-2,x+2,y+2,
-                                         fill='#0f0', outline='#000')
-                self.status['text'] = (
-                    'Precision: %.2f%%\t' % (precision*100)+
-                    'Recall: %.2f%%\t' % (recall*100)+
-                    'F-score: %.2f%%' % (fscore*100))
+                self.evalbox.create_oval(
+                    x - 2, y - 2, x + 2, y + 2, fill="#0f0", outline="#000"
+                )
+                self.status["text"] = (
+                    "Precision: %.2f%%\t" % (precision * 100)
+                    + "Recall: %.2f%%\t" % (recall * 100)
+                    + "F-score: %.2f%%" % (fscore * 100)
+                )
              else:
                  self.evalbox.lower(
-                    self.evalbox.create_oval(x-2,y-2,x+2,y+2,
-                                             fill='#afa', outline='#8c8'))
+                    self.evalbox.create_oval(
+                        x - 2, y - 2, x + 2, y + 2, fill="#afa", outline="#8c8"
+                    )
+                )
              if prev_x is not None and self._eval_lines.get():
                  self.evalbox.lower(
-                    self.evalbox.create_line(prev_x, prev_y, x, y,
-                                             fill='#8c8'))
+                    self.evalbox.create_line(prev_x, prev_y, x, y, fill="#8c8")
+                )
              prev_x, prev_y = x, y
  
      _eval_demon_running = False
+
      def _eval_demon(self):
-        if self.top is None: return
+        if self.top is None:
+            return
          if self.chunker is None:
              self._eval_demon_running = False
              return
@@ -560,10 +703,12 @@ class RegexpChunkApp(object):
          t0 = time.time()
  
          # If are still typing, then wait for them to finish.
-        if (time.time()-self._last_keypress < self._EVAL_DELAY and
-            self.normalized_grammar != self._eval_normalized_grammar):
+        if (
+            time.time() - self._last_keypress < self._EVAL_DELAY
+            and self.normalized_grammar != self._eval_normalized_grammar
+        ):
              self._eval_demon_running = True
-            return self.top.after(int(self._EVAL_FREQ*1000), self._eval_demon)
+            return self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)
  
          # If the grammar changed, restart the evaluation.
          if self.normalized_grammar != self._eval_normalized_grammar:
@@ -571,7 +716,7 @@ class RegexpChunkApp(object):
              # just use the old evaluation values.
              for (g, p, r, f) in self._history:
                  if self.normalized_grammar == self.normalize_grammar(g):
-                    self._history.append( (g, p, r, f) )
+                    self._history.append((g, p, r, f))
                      self._history_index = len(self._history) - 1
                      self._eval_plot()
                      self._eval_demon_running = False
@@ -584,15 +729,17 @@ class RegexpChunkApp(object):
  
          # If the grammar is empty, the don't bother evaluating it, or
          # recording it in history -- the score will just be 0.
-        if self.normalized_grammar.strip() == '':
-            #self._eval_index = self._devset_size.get()
+        if self.normalized_grammar.strip() == "":
+            # self._eval_index = self._devset_size.get()
              self._eval_demon_running = False
              return
  
          # Score the next set of examples
-        for gold in self.devset[self._eval_index:
-                                min(self._eval_index+self._EVAL_CHUNK,
-                                    self._devset_size.get())]:
+        for gold in self.devset[
+            self._eval_index : min(
+                self._eval_index + self._EVAL_CHUNK, self._devset_size.get()
+            )
+        ]:
              guess = self._chunkparse(gold.leaves())
              self._eval_score.score(gold, guess)
  
@@ -601,21 +748,24 @@ class RegexpChunkApp(object):
  
          # Check if we're done
          if self._eval_index >= self._devset_size.get():
-            self._history.append( (self._eval_grammar,
-                                   self._eval_score.precision(),
-                                   self._eval_score.recall(),
-                                   self._eval_score.f_measure()) )
-            self._history_index = len(self._history)-1
+            self._history.append(
+                (
+                    self._eval_grammar,
+                    self._eval_score.precision(),
+                    self._eval_score.recall(),
+                    self._eval_score.f_measure(),
+                )
+            )
+            self._history_index = len(self._history) - 1
              self._eval_plot()
              self._eval_demon_running = False
              self._eval_normalized_grammar = None
          else:
-            progress = 100*self._eval_index/self._devset_size.get()
-            self.status['text'] = ('Evaluating on Development Set (%d%%)' %
-                                   progress)
+            progress = 100 * self._eval_index / self._devset_size.get()
+            self.status["text"] = "Evaluating on Development Set (%d%%)" % progress
              self._eval_demon_running = True
              self._adaptively_modify_eval_chunk(time.time() - t0)
-            self.top.after(int(self._EVAL_FREQ*1000), self._eval_demon)
+            self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)
  
      def _adaptively_modify_eval_chunk(self, t):
          """
@@ -625,13 +775,21 @@ class RegexpChunkApp(object):
          :param t: The amount of time that the eval demon took.
          """
          if t > self._EVAL_DEMON_MAX and self._EVAL_CHUNK > 5:
-            self._EVAL_CHUNK = min(self._EVAL_CHUNK-1,
-                         max(int(self._EVAL_CHUNK*(self._EVAL_DEMON_MAX/t)),
-                             self._EVAL_CHUNK-10))
+            self._EVAL_CHUNK = min(
+                self._EVAL_CHUNK - 1,
+                max(
+                    int(self._EVAL_CHUNK * (self._EVAL_DEMON_MAX / t)),
+                    self._EVAL_CHUNK - 10,
+                ),
+            )
          elif t < self._EVAL_DEMON_MIN:
-            self._EVAL_CHUNK = max(self._EVAL_CHUNK+1,
-                         min(int(self._EVAL_CHUNK*(self._EVAL_DEMON_MIN/t)),
-                             self._EVAL_CHUNK+10))
+            self._EVAL_CHUNK = max(
+                self._EVAL_CHUNK + 1,
+                min(
+                    int(self._EVAL_CHUNK * (self._EVAL_DEMON_MIN / t)),
+                    self._EVAL_CHUNK + 10,
+                ),
+            )
  
      def _init_widgets(self, top):
          frame0 = Frame(top, **self._FRAME_PARAMS)
@@ -641,194 +799,232 @@ class RegexpChunkApp(object):
          frame0.grid_rowconfigure(5, weight=1)
  
          # The grammar
-        self.grammarbox = Text(frame0, font=self._font,
-                               **self._GRAMMARBOX_PARAMS)
-        self.grammarlabel = Label(frame0, font=self._font, text='Grammar:',
-                      highlightcolor='black',
-                      background=self._GRAMMARBOX_PARAMS['background'])
-        self.grammarlabel.grid(column=0, row=0, sticky='SW')
-        self.grammarbox.grid(column=0, row=1, sticky='NEWS')
+        self.grammarbox = Text(frame0, font=self._font, **self._GRAMMARBOX_PARAMS)
+        self.grammarlabel = Label(
+            frame0,
+            font=self._font,
+            text="Grammar:",
+            highlightcolor="black",
+            background=self._GRAMMARBOX_PARAMS["background"],
+        )
+        self.grammarlabel.grid(column=0, row=0, sticky="SW")
+        self.grammarbox.grid(column=0, row=1, sticky="NEWS")
  
          # Scroll bar for grammar
          grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview)
-        grammar_scrollbar.grid(column=1, row=1, sticky='NWS')
+        grammar_scrollbar.grid(column=1, row=1, sticky="NWS")
          self.grammarbox.config(yscrollcommand=grammar_scrollbar.set)
  
          # grammar buttons
-        bg = self._FRAME_PARAMS['background']
+        bg = self._FRAME_PARAMS["background"]
          frame3 = Frame(frame0, background=bg)
-        frame3.grid(column=0, row=2, sticky='EW')
-        Button(frame3, text='Prev Grammar', command=self._history_prev,
-               **self._BUTTON_PARAMS).pack(side='left')
-        Button(frame3, text='Next Grammar', command=self._history_next,
-               **self._BUTTON_PARAMS).pack(side='left')
+        frame3.grid(column=0, row=2, sticky="EW")
+        Button(
+            frame3,
+            text="Prev Grammar",
+            command=self._history_prev,
+            **self._BUTTON_PARAMS
+        ).pack(side="left")
+        Button(
+            frame3,
+            text="Next Grammar",
+            command=self._history_next,
+            **self._BUTTON_PARAMS
+        ).pack(side="left")
  
          # Help box
-        self.helpbox = Text(frame0, font=self._smallfont,
-                            **self._HELPBOX_PARAMS)
-        self.helpbox.grid(column=3, row=1, sticky='NEWS')
+        self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS)
+        self.helpbox.grid(column=3, row=1, sticky="NEWS")
          self.helptabs = {}
-        bg = self._FRAME_PARAMS['background']
+        bg = self._FRAME_PARAMS["background"]
          helptab_frame = Frame(frame0, background=bg)
-        helptab_frame.grid(column=3, row=0, sticky='SW')
+        helptab_frame.grid(column=3, row=0, sticky="SW")
          for i, (tab, tabstops, text) in enumerate(self.HELP):
              label = Label(helptab_frame, text=tab, font=self._smallfont)
-            label.grid(column=i*2, row=0, sticky='S')
-            #help_frame.grid_columnconfigure(i, weight=1)
-            #label.pack(side='left')
-            label.bind('<ButtonPress>', lambda e, tab=tab: self.show_help(tab))
+            label.grid(column=i * 2, row=0, sticky="S")
+            # help_frame.grid_columnconfigure(i, weight=1)
+            # label.pack(side='left')
+            label.bind("<ButtonPress>", lambda e, tab=tab: self.show_help(tab))
              self.helptabs[tab] = label
-            Frame(helptab_frame, height=1, width=self._HELPTAB_SPACER,
-                  background=bg).grid(column=i*2+1, row=0)
+            Frame(
+                helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg
+            ).grid(column=i * 2 + 1, row=0)
          self.helptabs[self.HELP[0][0]].configure(font=self._font)
-        self.helpbox.tag_config('elide', elide=True)
+        self.helpbox.tag_config("elide", elide=True)
          for (tag, params) in self.HELP_AUTOTAG:
-            self.helpbox.tag_config('tag-%s' % tag, **params)
+            self.helpbox.tag_config("tag-%s" % tag, **params)
          self.show_help(self.HELP[0][0])
  
          # Scroll bar for helpbox
          help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview)
          self.helpbox.config(yscrollcommand=help_scrollbar.set)
-        help_scrollbar.grid(column=4, row=1, sticky='NWS')
+        help_scrollbar.grid(column=4, row=1, sticky="NWS")
  
          # The dev set
-        frame4 = Frame(frame0, background=self._FRAME_PARAMS['background'])
-        self.devsetbox = Text(frame4, font=self._font,
-                              **self._DEVSETBOX_PARAMS)
-        self.devsetbox.pack(expand=True, fill='both')
-        self.devsetlabel = Label(frame0, font=self._font,
-                      text='Development Set:', justify='right',
-                      background=self._DEVSETBOX_PARAMS['background'])
-        self.devsetlabel.grid(column=0, row=4, sticky='SW')
-        frame4.grid(column=0, row=5, sticky='NEWS')
+        frame4 = Frame(frame0, background=self._FRAME_PARAMS["background"])
+        self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS)
+        self.devsetbox.pack(expand=True, fill="both")
+        self.devsetlabel = Label(
+            frame0,
+            font=self._font,
+            text="Development Set:",
+            justify="right",
+            background=self._DEVSETBOX_PARAMS["background"],
+        )
+        self.devsetlabel.grid(column=0, row=4, sticky="SW")
+        frame4.grid(column=0, row=5, sticky="NEWS")
  
          # dev set scrollbars
          self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll)
-        self.devset_scroll.grid(column=1, row=5, sticky='NWS')
-        self.devset_xscroll = Scrollbar(frame4, command=self.devsetbox.xview,
-                                        orient='horiz')
-        self.devsetbox['xscrollcommand'] = self.devset_xscroll.set
-        self.devset_xscroll.pack(side='bottom', fill='x')
+        self.devset_scroll.grid(column=1, row=5, sticky="NWS")
+        self.devset_xscroll = Scrollbar(
+            frame4, command=self.devsetbox.xview, orient="horiz"
+        )
+        self.devsetbox["xscrollcommand"] = self.devset_xscroll.set
+        self.devset_xscroll.pack(side="bottom", fill="x")
  
          # dev set buttons
-        bg = self._FRAME_PARAMS['background']
+        bg = self._FRAME_PARAMS["background"]
          frame1 = Frame(frame0, background=bg)
-        frame1.grid(column=0, row=7, sticky='EW')
-        Button(frame1, text='Prev Example (Ctrl-p)',
-               command=self._devset_prev,
-               **self._BUTTON_PARAMS).pack(side='left')
-        Button(frame1, text='Next Example (Ctrl-n)',
-               command=self._devset_next,
-               **self._BUTTON_PARAMS).pack(side='left')
-        self.devset_button = Button(frame1, text='Show example',
-                                   command=self.show_devset,
-                                    state='disabled',
-                                   **self._BUTTON_PARAMS)
-        self.devset_button.pack(side='right')
-        self.trace_button = Button(frame1, text='Show trace',
-                                   command=self.show_trace,
-                                   **self._BUTTON_PARAMS)
-        self.trace_button.pack(side='right')
-
+        frame1.grid(column=0, row=7, sticky="EW")
+        Button(
+            frame1,
+            text="Prev Example (Ctrl-p)",
+            command=self._devset_prev,
+            **self._BUTTON_PARAMS
+        ).pack(side="left")
+        Button(
+            frame1,
+            text="Next Example (Ctrl-n)",
+            command=self._devset_next,
+            **self._BUTTON_PARAMS
+        ).pack(side="left")
+        self.devset_button = Button(
+            frame1,
+            text="Show example",
+            command=self.show_devset,
+            state="disabled",
+            **self._BUTTON_PARAMS
+        )
+        self.devset_button.pack(side="right")
+        self.trace_button = Button(
+            frame1, text="Show trace", command=self.show_trace, **self._BUTTON_PARAMS
+        )
+        self.trace_button.pack(side="right")
  
          # evaluation box
          self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS)
-        label = Label(frame0, font=self._font, text='Evaluation:',
-              justify='right', background=self._EVALBOX_PARAMS['background'])
-        label.grid(column=3, row=4, sticky='SW')
-        self.evalbox.grid(column=3, row=5, sticky='NEWS', columnspan=2)
+        label = Label(
+            frame0,
+            font=self._font,
+            text="Evaluation:",
+            justify="right",
+            background=self._EVALBOX_PARAMS["background"],
+        )
+        label.grid(column=3, row=4, sticky="SW")
+        self.evalbox.grid(column=3, row=5, sticky="NEWS", columnspan=2)
  
          # evaluation box buttons
-        bg = self._FRAME_PARAMS['background']
+        bg = self._FRAME_PARAMS["background"]
          frame2 = Frame(frame0, background=bg)
-        frame2.grid(column=3, row=7, sticky='EW')
+        frame2.grid(column=3, row=7, sticky="EW")
          self._autoscale = IntVar(self.top)
          self._autoscale.set(False)
-        Checkbutton(frame2, variable=self._autoscale, command=self._eval_plot,
-                    text='Zoom', **self._BUTTON_PARAMS).pack(side='left')
+        Checkbutton(
+            frame2,
+            variable=self._autoscale,
+            command=self._eval_plot,
+            text="Zoom",
+            **self._BUTTON_PARAMS
+        ).pack(side="left")
          self._eval_lines = IntVar(self.top)
          self._eval_lines.set(False)
-        Checkbutton(frame2, variable=self._eval_lines, command=self._eval_plot,
-                    text='Lines', **self._BUTTON_PARAMS).pack(side='left')
-        Button(frame2, text='History',
-               **self._BUTTON_PARAMS).pack(side='right')
+        Checkbutton(
+            frame2,
+            variable=self._eval_lines,
+            command=self._eval_plot,
+            text="Lines",
+            **self._BUTTON_PARAMS
+        ).pack(side="left")
+        Button(frame2, text="History", **self._BUTTON_PARAMS).pack(side="right")
  
          # The status label
          self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS)
-        self.status.grid(column=0, row=9, sticky='NEW', padx=3, pady=2,
-                         columnspan=5)
+        self.status.grid(column=0, row=9, sticky="NEW", padx=3, pady=2, columnspan=5)
  
          # Help box & devset box can't be edited.
-        self.helpbox['state'] = 'disabled'
-        self.devsetbox['state'] = 'disabled'
+        self.helpbox["state"] = "disabled"
+        self.devsetbox["state"] = "disabled"
  
          # Spacers
-        bg = self._FRAME_PARAMS['background']
+        bg = self._FRAME_PARAMS["background"]
          Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3)
          Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0)
          Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8)
  
          # pack the frame.
-        frame0.pack(fill='both', expand=True)
+        frame0.pack(fill="both", expand=True)
  
          # Set up colors for the devset box
-        self.devsetbox.tag_config('true-pos', background='#afa',
-                                  underline='True')
-        self.devsetbox.tag_config('false-neg', underline='True',
-                                foreground='#800')
-        self.devsetbox.tag_config('false-pos', background='#faa')
-        self.devsetbox.tag_config('trace', foreground='#666', wrap='none')
-        self.devsetbox.tag_config('wrapindent', lmargin2=30, wrap='none')
-        self.devsetbox.tag_config('error', foreground='#800')
+        self.devsetbox.tag_config("true-pos", background="#afa", underline="True")
+        self.devsetbox.tag_config("false-neg", underline="True", foreground="#800")
+        self.devsetbox.tag_config("false-pos", background="#faa")
+        self.devsetbox.tag_config("trace", foreground="#666", wrap="none")
+        self.devsetbox.tag_config("wrapindent", lmargin2=30, wrap="none")
+        self.devsetbox.tag_config("error", foreground="#800")
  
          # And for the grammarbox
-        self.grammarbox.tag_config('error', background='#fec')
-        self.grammarbox.tag_config('comment', foreground='#840')
-        self.grammarbox.tag_config('angle', foreground='#00f')
-        self.grammarbox.tag_config('brace', foreground='#0a0')
-        self.grammarbox.tag_config('hangindent', lmargin1=0, lmargin2=40)
+        self.grammarbox.tag_config("error", background="#fec")
+        self.grammarbox.tag_config("comment", foreground="#840")
+        self.grammarbox.tag_config("angle", foreground="#00f")
+        self.grammarbox.tag_config("brace", foreground="#0a0")
+        self.grammarbox.tag_config("hangindent", lmargin1=0, lmargin2=40)
  
      _showing_trace = False
+
      def show_trace(self, *e):
          self._showing_trace = True
-        self.trace_button['state'] = 'disabled'
-        self.devset_button['state'] = 'normal'
+        self.trace_button["state"] = "disabled"
+        self.devset_button["state"] = "normal"
  
-        self.devsetbox['state'] = 'normal'
-        #self.devsetbox['wrap'] = 'none'
-        self.devsetbox.delete('1.0', 'end')
-        self.devsetlabel['text']='Development Set (%d/%d)' % (
-            (self.devset_index+1, self._devset_size.get()))
+        self.devsetbox["state"] = "normal"
+        # self.devsetbox['wrap'] = 'none'
+        self.devsetbox.delete("1.0", "end")
+        self.devsetlabel["text"] = "Development Set (%d/%d)" % (
+            (self.devset_index + 1, self._devset_size.get())
+        )
  
          if self.chunker is None:
-            self.devsetbox.insert('1.0', 'Trace: waiting for a valid grammar.')
-            self.devsetbox.tag_add('error', '1.0', 'end')
-            return # can't do anything more
+            self.devsetbox.insert("1.0", "Trace: waiting for a valid grammar.")
+            self.devsetbox.tag_add("error", "1.0", "end")
+            return  # can't do anything more
  
          gold_tree = self.devset[self.devset_index]
          rules = self.chunker.rules()
  
          # Calculate the tag sequence
-        tagseq = '\t'
+        tagseq = "\t"
          charnum = [1]
          for wordnum, (word, pos) in enumerate(gold_tree.leaves()):
-            tagseq += '%s ' % pos
+            tagseq += "%s " % pos
              charnum.append(len(tagseq))
-        self.charnum = dict(((i, j), charnum[j])
-                            for i in range(len(rules)+1)
-                            for j in range(len(charnum)))
-        self.linenum = dict((i,i*2+2) for i in range(len(rules)+1))
-
-        for i in range(len(rules)+1):
+        self.charnum = dict(
+            ((i, j), charnum[j])
+            for i in range(len(rules) + 1)
+            for j in range(len(charnum))
+        )
+        self.linenum = dict((i, i * 2 + 2) for i in range(len(rules) + 1))
+
+        for i in range(len(rules) + 1):
              if i == 0:
-                self.devsetbox.insert('end', 'Start:\n')
-                self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c')
+                self.devsetbox.insert("end", "Start:\n")
+                self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
              else:
-                self.devsetbox.insert('end', 'Apply %s:\n' % rules[i-1])
-                self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c')
+                self.devsetbox.insert("end", "Apply %s:\n" % rules[i - 1])
+                self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
              # Display the tag sequence.
-            self.devsetbox.insert('end', tagseq+'\n')
-            self.devsetbox.tag_add('wrapindent','end -2c linestart','end -2c')
+            self.devsetbox.insert("end", tagseq + "\n")
+            self.devsetbox.tag_add("wrapindent", "end -2c linestart", "end -2c")
              # Run a partial parser, and extract gold & test chunks
              chunker = RegexpChunkParser(rules[:i])
              test_tree = self._chunkparse(gold_tree.leaves())
@@ -836,74 +1032,86 @@ class RegexpChunkApp(object):
              test_chunks = self._chunks(test_tree)
              # Compare them.
              for chunk in gold_chunks.intersection(test_chunks):
-                self._color_chunk(i, chunk, 'true-pos')
+                self._color_chunk(i, chunk, "true-pos")
              for chunk in gold_chunks - test_chunks:
-                self._color_chunk(i, chunk, 'false-neg')
+                self._color_chunk(i, chunk, "false-neg")
              for chunk in test_chunks - gold_chunks:
-                self._color_chunk(i, chunk, 'false-pos')
-        self.devsetbox.insert('end', 'Finished.\n')
-        self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c')
+                self._color_chunk(i, chunk, "false-pos")
+        self.devsetbox.insert("end", "Finished.\n")
+        self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
  
          # This is a hack, because the x-scrollbar isn't updating its
          # position right -- I'm not sure what the underlying cause is
          # though.  (This is on OS X w/ python 2.5)
-        self.top.after(100, self.devset_xscroll.set, 0, .3)
+        self.top.after(100, self.devset_xscroll.set, 0, 0.3)
  
      def show_help(self, tab):
-        self.helpbox['state'] = 'normal'
-        self.helpbox.delete('1.0', 'end')
+        self.helpbox["state"] = "normal"
+        self.helpbox.delete("1.0", "end")
          for (name, tabstops, text) in self.HELP:
              if name == tab:
-                text = text.replace('<<TAGSET>>', '\n'.join(
-                    ('\t%s\t%s' % item for item in sorted(list(self.tagset.items()),
-                    key=lambda t_w:re.match('\w+',t_w[0]) and (0,t_w[0]) or (1,t_w[0])))))
+                text = text.replace(
+                    "<<TAGSET>>",
+                    "\n".join(
+                        (
+                            "\t%s\t%s" % item
+                            for item in sorted(
+                                list(self.tagset.items()),
+                                key=lambda t_w: re.match("\w+", t_w[0])
+                                and (0, t_w[0])
+                                or (1, t_w[0]),
+                            )
+                        )
+                    ),
+                )
  
                  self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
                  self.helpbox.config(tabs=tabstops)
-                self.helpbox.insert('1.0', text+'\n'*20)
-                C = '1.0 + %d chars'
+                self.helpbox.insert("1.0", text + "\n" * 20)
+                C = "1.0 + %d chars"
                  for (tag, params) in self.HELP_AUTOTAG:
-                    pattern = '(?s)(<%s>)(.*?)(</%s>)' % (tag, tag)
+                    pattern = "(?s)(<%s>)(.*?)(</%s>)" % (tag, tag)
                      for m in re.finditer(pattern, text):
-                        self.helpbox.tag_add('elide',
-                                             C % m.start(1), C % m.end(1))
-                        self.helpbox.tag_add('tag-%s' % tag,
-                                             C % m.start(2), C % m.end(2))
-                        self.helpbox.tag_add('elide',
-                                             C % m.start(3), C % m.end(3))
+                        self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1))
+                        self.helpbox.tag_add(
+                            "tag-%s" % tag, C % m.start(2), C % m.end(2)
+                        )
+                        self.helpbox.tag_add("elide", C % m.start(3), C % m.end(3))
              else:
                  self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
-        self.helpbox['state'] = 'disabled'
+        self.helpbox["state"] = "disabled"
  
      def _history_prev(self, *e):
-        self._view_history(self._history_index-1)
-        return 'break'
+        self._view_history(self._history_index - 1)
+        return "break"
  
      def _history_next(self, *e):
-        self._view_history(self._history_index+1)
-        return 'break'
+        self._view_history(self._history_index + 1)
+        return "break"
  
      def _view_history(self, index):
          # Bounds & sanity checking:
-        index = max(0, min(len(self._history)-1, index))
-        if not self._history: return
+        index = max(0, min(len(self._history) - 1, index))
+        if not self._history:
+            return
          # Already viewing the requested history item?
          if index == self._history_index:
              return
          # Show the requested grammar.  It will get added to _history
          # only if they edit it (causing self.update() to get run.)
-        self.grammarbox['state'] = 'normal'
-        self.grammarbox.delete('1.0', 'end')
-        self.grammarbox.insert('end', self._history[index][0])
-        self.grammarbox.mark_set('insert', '1.0')
+        self.grammarbox["state"] = "normal"
+        self.grammarbox.delete("1.0", "end")
+        self.grammarbox.insert("end", self._history[index][0])
+        self.grammarbox.mark_set("insert", "1.0")
          self._history_index = index
          self._syntax_highlight_grammar(self._history[index][0])
          # Record the normalized grammar & regenerate the chunker.
-        self.normalized_grammar = self.normalize_grammar(
-            self._history[index][0])
+        self.normalized_grammar = self.normalize_grammar(self._history[index][0])
          if self.normalized_grammar:
-            rules = [RegexpChunkRule.fromstring(line)
-                     for line in self.normalized_grammar.split('\n')]
+            rules = [
+                RegexpChunkRule.fromstring(line)
+                for line in self.normalized_grammar.split("\n")
+            ]
          else:
              rules = []
          self.chunker = RegexpChunkParser(rules)
@@ -911,80 +1119,87 @@ class RegexpChunkApp(object):
          self._eval_plot()
          # Update the devset box
          self._highlight_devset()
-        if self._showing_trace: self.show_trace()
+        if self._showing_trace:
+            self.show_trace()
          # Update the grammar label
-        if self._history_index < len(self._history)-1:
-            self.grammarlabel['text'] = 'Grammar %s/%s:' % (
-                self._history_index+1, len(self._history))
+        if self._history_index < len(self._history) - 1:
+            self.grammarlabel["text"] = "Grammar %s/%s:" % (
+                self._history_index + 1,
+                len(self._history),
+            )
          else:
-            self.grammarlabel['text'] = 'Grammar:'
+            self.grammarlabel["text"] = "Grammar:"
  
      def _devset_next(self, *e):
-        self._devset_scroll('scroll', 1, 'page')
-        return 'break'
+        self._devset_scroll("scroll", 1, "page")
+        return "break"
  
      def _devset_prev(self, *e):
-        self._devset_scroll('scroll', -1, 'page')
-        return 'break'
+        self._devset_scroll("scroll", -1, "page")
+        return "break"
  
      def destroy(self, *e):
-        if self.top is None: return
+        if self.top is None:
+            return
          self.top.destroy()
          self.top = None
  
      def _devset_scroll(self, command, *args):
-        N = 1 # size of a page -- one sentence.
+        N = 1  # size of a page -- one sentence.
          showing_trace = self._showing_trace
-        if command == 'scroll' and args[1].startswith('unit'):
-            self.show_devset(self.devset_index+int(args[0]))
-        elif command == 'scroll' and args[1].startswith('page'):
-            self.show_devset(self.devset_index+N*int(args[0]))
-        elif command == 'moveto':
-            self.show_devset(int(float(args[0])*self._devset_size.get()))
+        if command == "scroll" and args[1].startswith("unit"):
+            self.show_devset(self.devset_index + int(args[0]))
+        elif command == "scroll" and args[1].startswith("page"):
+            self.show_devset(self.devset_index + N * int(args[0]))
+        elif command == "moveto":
+            self.show_devset(int(float(args[0]) * self._devset_size.get()))
          else:
-            assert 0, 'bad scroll command %s %s' % (command, args)
+            assert 0, "bad scroll command %s %s" % (command, args)
          if showing_trace:
              self.show_trace()
  
      def show_devset(self, index=None):
-        if index is None: index = self.devset_index
+        if index is None:
+            index = self.devset_index
  
          # Bounds checking
-        index = min(max(0, index), self._devset_size.get()-1)
+        index = min(max(0, index), self._devset_size.get() - 1)
  
-        if index == self.devset_index and not self._showing_trace: return
+        if index == self.devset_index and not self._showing_trace:
+            return
          self.devset_index = index
  
          self._showing_trace = False
-        self.trace_button['state'] = 'normal'
-        self.devset_button['state'] = 'disabled'
+        self.trace_button["state"] = "normal"
+        self.devset_button["state"] = "disabled"
  
          # Clear the text box.
-        self.devsetbox['state'] = 'normal'
-        self.devsetbox['wrap'] = 'word'
-        self.devsetbox.delete('1.0', 'end')
-        self.devsetlabel['text']='Development Set (%d/%d)' % (
-            (self.devset_index+1, self._devset_size.get()))
+        self.devsetbox["state"] = "normal"
+        self.devsetbox["wrap"] = "word"
+        self.devsetbox.delete("1.0", "end")
+        self.devsetlabel["text"] = "Development Set (%d/%d)" % (
+            (self.devset_index + 1, self._devset_size.get())
+        )
  
          # Add the sentences
-        sample = self.devset[self.devset_index:self.devset_index+1]
+        sample = self.devset[self.devset_index : self.devset_index + 1]
          self.charnum = {}
-        self.linenum = {0:1}
+        self.linenum = {0: 1}
          for sentnum, sent in enumerate(sample):
-            linestr = ''
+            linestr = ""
              for wordnum, (word, pos) in enumerate(sent.leaves()):
                  self.charnum[sentnum, wordnum] = len(linestr)
-                linestr += '%s/%s ' % (word, pos)
-                self.charnum[sentnum, wordnum+1] = len(linestr)
-            self.devsetbox.insert('end', linestr[:-1]+'\n\n')
+                linestr += "%s/%s " % (word, pos)
+                self.charnum[sentnum, wordnum + 1] = len(linestr)
+            self.devsetbox.insert("end", linestr[:-1] + "\n\n")
  
          # Highlight chunks in the dev set
          if self.chunker is not None:
              self._highlight_devset()
-        self.devsetbox['state'] = 'disabled'
+        self.devsetbox["state"] = "disabled"
  
          # Update the scrollbar
-        first = self.devset_index/self._devset_size.get()
+        first = self.devset_index / self._devset_size.get()
          last = (self.devset_index + 2) / self._devset_size.get()
          self.devset_scroll.set(first, last)
  
@@ -994,52 +1209,55 @@ class RegexpChunkApp(object):
          for child in tree:
              if isinstance(child, Tree):
                  if child.label() == self._chunk_label:
-                    chunks.add( (wordnum, wordnum+len(child)) )
+                    chunks.add((wordnum, wordnum + len(child)))
                  wordnum += len(child)
              else:
                  wordnum += 1
          return chunks
  
      def _syntax_highlight_grammar(self, grammar):
-        if self.top is None: return
-        self.grammarbox.tag_remove('comment', '1.0', 'end')
-        self.grammarbox.tag_remove('angle', '1.0', 'end')
-        self.grammarbox.tag_remove('brace', '1.0', 'end')
-        self.grammarbox.tag_add('hangindent', '1.0', 'end')
-        for lineno, line in enumerate(grammar.split('\n')):
-            if not line.strip(): continue
-            m = re.match(r'(\\.|[^#])*(#.*)?', line)
+        if self.top is None:
+            return
+        self.grammarbox.tag_remove("comment", "1.0", "end")
+        self.grammarbox.tag_remove("angle", "1.0", "end")
+        self.grammarbox.tag_remove("brace", "1.0", "end")
+        self.grammarbox.tag_add("hangindent", "1.0", "end")
+        for lineno, line in enumerate(grammar.split("\n")):
+            if not line.strip():
+                continue
+            m = re.match(r"(\\.|[^#])*(#.*)?", line)
              comment_start = None
              if m.group(2):
                  comment_start = m.start(2)
-                s = '%d.%d' % (lineno+1, m.start(2))
-                e = '%d.%d' % (lineno+1, m.end(2))
-                self.grammarbox.tag_add('comment', s, e)
-            for m in re.finditer('[<>{}]', line):
+                s = "%d.%d" % (lineno + 1, m.start(2))
+                e = "%d.%d" % (lineno + 1, m.end(2))
+                self.grammarbox.tag_add("comment", s, e)
+            for m in re.finditer("[<>{}]", line):
                  if comment_start is not None and m.start() >= comment_start:
                      break
-                s = '%d.%d' % (lineno+1, m.start())
-                e = '%d.%d' % (lineno+1, m.end())
-                if m.group() in '<>':
-                    self.grammarbox.tag_add('angle', s, e)
+                s = "%d.%d" % (lineno + 1, m.start())
+                e = "%d.%d" % (lineno + 1, m.end())
+                if m.group() in "<>":
+                    self.grammarbox.tag_add("angle", s, e)
                  else:
-                    self.grammarbox.tag_add('brace', s, e)
-
+                    self.grammarbox.tag_add("brace", s, e)
  
      def _grammarcheck(self, grammar):
-        if self.top is None: return
-        self.grammarbox.tag_remove('error', '1.0', 'end')
+        if self.top is None:
+            return
+        self.grammarbox.tag_remove("error", "1.0", "end")
          self._grammarcheck_errs = []
-        for lineno, line in enumerate(grammar.split('\n')):
-            line = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', line)
+        for lineno, line in enumerate(grammar.split("\n")):
+            line = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", line)
              line = line.strip()
              if line:
                  try:
                      RegexpChunkRule.fromstring(line)
                  except ValueError as e:
-                    self.grammarbox.tag_add('error', '%s.0' % (lineno+1),
-                                            '%s.0 lineend' % (lineno+1))
-        self.status['text'] = ''
+                    self.grammarbox.tag_add(
+                        "error", "%s.0" % (lineno + 1), "%s.0 lineend" % (lineno + 1)
+                    )
+        self.status["text"] = ""
  
      def update(self, *event):
          # Record when update was called (for grammarcheck)
@@ -1047,7 +1265,7 @@ class RegexpChunkApp(object):
              self._last_keypress = time.time()
  
          # Read the grammar from the Text box.
-        self.grammar = grammar = self.grammarbox.get('1.0', 'end')
+        self.grammar = grammar = self.grammarbox.get("1.0", "end")
  
          # If the grammar hasn't changed, do nothing:
          normalized_grammar = self.normalize_grammar(grammar)
@@ -1058,8 +1276,8 @@ class RegexpChunkApp(object):
  
          # If the grammar has changed, and we're looking at history,
          # then stop looking at history.
-        if self._history_index < len(self._history)-1:
-            self.grammarlabel['text'] = 'Grammar:'
+        if self._history_index < len(self._history) - 1:
+            self.grammarlabel["text"] = "Grammar:"
  
          self._syntax_highlight_grammar(grammar)
  
@@ -1068,8 +1286,10 @@ class RegexpChunkApp(object):
          try:
              # Note: the normalized grammar has no blank lines.
              if normalized_grammar:
-                rules = [RegexpChunkRule.fromstring(line)
-                         for line in normalized_grammar.split('\n')]
+                rules = [
+                    RegexpChunkRule.fromstring(line)
+                    for line in normalized_grammar.split("\n")
+                ]
              else:
                  rules = []
          except ValueError as e:
@@ -1079,7 +1299,7 @@ class RegexpChunkApp(object):
              return
  
          self.chunker = RegexpChunkParser(rules)
-        self.grammarbox.tag_remove('error', '1.0', 'end')
+        self.grammarbox.tag_remove("error", "1.0", "end")
          self.grammar_changed = time.time()
          # Display the results
          if self._showing_trace:
@@ -1092,11 +1312,11 @@ class RegexpChunkApp(object):
  
      def _highlight_devset(self, sample=None):
          if sample is None:
-            sample = self.devset[self.devset_index:self.devset_index+1]
+            sample = self.devset[self.devset_index : self.devset_index + 1]
  
-        self.devsetbox.tag_remove('true-pos', '1.0', 'end')
-        self.devsetbox.tag_remove('false-neg', '1.0', 'end')
-        self.devsetbox.tag_remove('false-pos', '1.0', 'end')
+        self.devsetbox.tag_remove("true-pos", "1.0", "end")
+        self.devsetbox.tag_remove("false-neg", "1.0", "end")
+        self.devsetbox.tag_remove("false-pos", "1.0", "end")
  
          # Run the grammar on the test cases.
          for sentnum, gold_tree in enumerate(sample):
@@ -1107,11 +1327,11 @@ class RegexpChunkApp(object):
              test_chunks = self._chunks(test_tree)
              # Compare them.
              for chunk in gold_chunks.intersection(test_chunks):
-                self._color_chunk(sentnum, chunk, 'true-pos')
+                self._color_chunk(sentnum, chunk, "true-pos")
              for chunk in gold_chunks - test_chunks:
-                self._color_chunk(sentnum, chunk, 'false-neg')
+                self._color_chunk(sentnum, chunk, "false-neg")
              for chunk in test_chunks - gold_chunks:
-                self._color_chunk(sentnum, chunk, 'false-pos')
+                self._color_chunk(sentnum, chunk, "false-pos")
  
      def _chunkparse(self, words):
          try:
@@ -1120,15 +1340,17 @@ class RegexpChunkApp(object):
              # There's an error somewhere in the grammar, but we're not sure
              # exactly where, so just mark the whole grammar as bad.
              # E.g., this is caused by: "({<NN>})"
-            self.grammarbox.tag_add('error', '1.0', 'end')
+            self.grammarbox.tag_add("error", "1.0", "end")
              # Treat it as tagging nothing:
              return words
  
      def _color_chunk(self, sentnum, chunk, tag):
          start, end = chunk
-        self.devsetbox.tag_add(tag,
-            '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, start]),
-            '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, end]-1))
+        self.devsetbox.tag_add(
+            tag,
+            "%s.%s" % (self.linenum[sentnum], self.charnum[sentnum, start]),
+            "%s.%s" % (self.linenum[sentnum], self.charnum[sentnum, end] - 1),
+        )
  
      def reset(self):
          # Clear various variables
@@ -1139,109 +1361,125 @@ class RegexpChunkApp(object):
          self._history = []
          self._history_index = 0
          # Update the on-screen display.
-        self.grammarbox.delete('1.0', 'end')
+        self.grammarbox.delete("1.0", "end")
          self.show_devset(0)
          self.update()
-        #self._eval_plot()
+        # self._eval_plot()
  
      SAVE_GRAMMAR_TEMPLATE = (
-        '# Regexp Chunk Parsing Grammar\n'
-        '# Saved %(date)s\n'
-        '#\n'
-        '# Development set: %(devset)s\n'
-        '#   Precision: %(precision)s\n'
-        '#   Recall:    %(recall)s\n'
-        '#   F-score:   %(fscore)s\n\n'
-        '%(grammar)s\n')
+        "# Regexp Chunk Parsing Grammar\n"
+        "# Saved %(date)s\n"
+        "#\n"
+        "# Development set: %(devset)s\n"
+        "#   Precision: %(precision)s\n"
+        "#   Recall:    %(recall)s\n"
+        "#   F-score:   %(fscore)s\n\n"
+        "%(grammar)s\n"
+    )
  
      def save_grammar(self, filename=None):
          if not filename:
-            ftypes = [('Chunk Gramamr', '.chunk'),
-                      ('All files', '*')]
-            filename = asksaveasfilename(filetypes=ftypes,
-                                                      defaultextension='.chunk')
-            if not filename: return
-        if (self._history and self.normalized_grammar ==
-            self.normalize_grammar(self._history[-1][0])):
-            precision, recall, fscore = ['%.2f%%' % (100*v) for v in
-                                         self._history[-1][1:]]
+            ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
+            filename = asksaveasfilename(filetypes=ftypes, defaultextension=".chunk")
+            if not filename:
+                return
+        if self._history and self.normalized_grammar == self.normalize_grammar(
+            self._history[-1][0]
+        ):
+            precision, recall, fscore = [
+                "%.2f%%" % (100 * v) for v in self._history[-1][1:]
+            ]
          elif self.chunker is None:
-            precision = recall = fscore = 'Grammar not well formed'
+            precision = recall = fscore = "Grammar not well formed"
          else:
-            precision = recall = fscore = 'Not finished evaluation yet'
-
-        with open(filename, 'w') as outfile:
-            outfile.write(self.SAVE_GRAMMAR_TEMPLATE % dict(
-                date=time.ctime(), devset=self.devset_name,
-                precision=precision, recall=recall, fscore=fscore,
-                grammar=self.grammar.strip()))
+            precision = recall = fscore = "Not finished evaluation yet"
+
+        with open(filename, "w") as outfile:
+            outfile.write(
+                self.SAVE_GRAMMAR_TEMPLATE
+                % dict(
+                    date=time.ctime(),
+                    devset=self.devset_name,
+                    precision=precision,
+                    recall=recall,
+                    fscore=fscore,
+                    grammar=self.grammar.strip(),
+                )
+            )
  
      def load_grammar(self, filename=None):
          if not filename:
-            ftypes = [('Chunk Gramamr', '.chunk'),
-                      ('All files', '*')]
-            filename = askopenfilename(filetypes=ftypes,
-                                                    defaultextension='.chunk')
-            if not filename: return
-        self.grammarbox.delete('1.0', 'end')
+            ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
+            filename = askopenfilename(filetypes=ftypes, defaultextension=".chunk")
+            if not filename:
+                return
+        self.grammarbox.delete("1.0", "end")
          self.update()
-        with open(filename, 'r') as infile:
+        with open(filename, "r") as infile:
              grammar = infile.read()
-        grammar = re.sub('^\# Regexp Chunk Parsing Grammar[\s\S]*'
-                         'F-score:.*\n', '', grammar).lstrip()
-        self.grammarbox.insert('1.0', grammar)
+        grammar = re.sub(
+            "^\# Regexp Chunk Parsing Grammar[\s\S]*" "F-score:.*\n", "", grammar
+        ).lstrip()
+        self.grammarbox.insert("1.0", grammar)
          self.update()
  
      def save_history(self, filename=None):
          if not filename:
-            ftypes = [('Chunk Gramamr History', '.txt'),
-                      ('All files', '*')]
-            filename = asksaveasfilename(filetypes=ftypes,
-                                                      defaultextension='.txt')
-            if not filename: return
-
-        with open(filename, 'w') as outfile:
-            outfile.write('# Regexp Chunk Parsing Grammar History\n')
-            outfile.write('# Saved %s\n' % time.ctime())
-            outfile.write('# Development set: %s\n' % self.devset_name)
+            ftypes = [("Chunk Gramamr History", ".txt"), ("All files", "*")]
+            filename = asksaveasfilename(filetypes=ftypes, defaultextension=".txt")
+            if not filename:
+                return
+
+        with open(filename, "w") as outfile:
+            outfile.write("# Regexp Chunk Parsing Grammar History\n")
+            outfile.write("# Saved %s\n" % time.ctime())
+            outfile.write("# Development set: %s\n" % self.devset_name)
              for i, (g, p, r, f) in enumerate(self._history):
-                hdr = ('Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, '
-                       'fscore=%.2f%%)' % (i+1, len(self._history),
-                                           p*100, r*100, f*100))
-                outfile.write('\n%s\n' % hdr)
-                outfile.write(''.join('  %s\n' % line for line in g.strip().split()))
-
-            if not (self._history and self.normalized_grammar ==
-                    self.normalize_grammar(self._history[-1][0])):
+                hdr = (
+                    "Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, "
+                    "fscore=%.2f%%)"
+                    % (i + 1, len(self._history), p * 100, r * 100, f * 100)
+                )
+                outfile.write("\n%s\n" % hdr)
+                outfile.write("".join("  %s\n" % line for line in g.strip().split()))
+
+            if not (
+                self._history
+                and self.normalized_grammar
+                == self.normalize_grammar(self._history[-1][0])
+            ):
                  if self.chunker is None:
-                    outfile.write('\nCurrent Grammar (not well-formed)\n')
+                    outfile.write("\nCurrent Grammar (not well-formed)\n")
                  else:
-                    outfile.write('\nCurrent Grammar (not evaluated)\n')
-                outfile.write(''.join('  %s\n' % line for line
-                                  in self.grammar.strip().split()))
+                    outfile.write("\nCurrent Grammar (not evaluated)\n")
+                outfile.write(
+                    "".join("  %s\n" % line for line in self.grammar.strip().split())
+                )
  
      def about(self, *e):
-        ABOUT = ("NLTK RegExp Chunk Parser Application\n"+
-                 "Written by Edward Loper")
-        TITLE = 'About: Regular Expression Chunk Parser Application'
+        ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper"
+        TITLE = "About: Regular Expression Chunk Parser Application"
          try:
-            from six.moves.tkinter_messagebox import Message
+            from tkinter.messagebox import Message
+
              Message(message=ABOUT, title=TITLE).show()
          except:
              ShowText(self.top, TITLE, ABOUT)
  
      def set_devset_size(self, size=None):
-        if size is not None: self._devset_size.set(size)
+        if size is not None:
+            self._devset_size.set(size)
          self._devset_size.set(min(len(self.devset), self._devset_size.get()))
          self.show_devset(1)
          self.show_devset(0)
          # what about history?  Evaluated at diff dev set sizes!
  
      def resize(self, size=None):
-        if size is not None: self._size.set(size)
+        if size is not None:
+            self._size.set(size)
          size = self._size.get()
          self._font.configure(size=-(abs(size)))
-        self._smallfont.configure(size=min(-10, -(abs(size))*14//20))
+        self._smallfont.configure(size=min(-10, -(abs(size)) * 14 // 20))
  
      def mainloop(self, *args, **kwargs):
          """
@@ -1250,13 +1488,16 @@ class RegexpChunkApp(object):
          from a secript); otherwise, the demo will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self.top.mainloop(*args, **kwargs)
  
+
  def app():
      RegexpChunkApp().mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/chunkparser_app.pyc b/nlp_resource_data/nltk/app/chunkparser_app.pyc

deleted file mode 100755 (executable)

index 4777811..0000000

Binary files a/nlp_resource_data/nltk/app/chunkparser_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/collocations_app.py b/nlp_resource_data/nltk/app/collocations_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 49cbb8b..36362a8
--- a/nlp_resource_data/nltk/app/collocations_app.py
+++ b/nlp_resource_data/nltk/app/collocations_app.py
@@ -1,72 +1,81 @@
  # Natural Language Toolkit: Collocations Application
  # Much of the GUI code is imported from concordance.py; We intend to merge these tools together
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  #
  
  
-from __future__ import division
-
  import threading
  
-from six.moves import queue as q
-from six.moves.tkinter_font import Font
-from six.moves.tkinter import (Button, END, Frame, IntVar, LEFT, Label, Menu,
-                               OptionMenu, SUNKEN, Scrollbar, StringVar,
-                               Text, Tk)
-
-from nltk.corpus import (cess_cat, brown, nps_chat, treebank, sinica_treebank, alpino,
-                         indian, floresta, mac_morpho, machado, cess_esp)
+import queue as q
+from tkinter.font import Font
+from tkinter import (
+    Button,
+    END,
+    Frame,
+    IntVar,
+    LEFT,
+    Label,
+    Menu,
+    OptionMenu,
+    SUNKEN,
+    Scrollbar,
+    StringVar,
+    Text,
+    Tk,
+)
+
+from nltk.corpus import (
+    cess_cat,
+    brown,
+    nps_chat,
+    treebank,
+    sinica_treebank,
+    alpino,
+    indian,
+    floresta,
+    mac_morpho,
+    machado,
+    cess_esp,
+)
  from nltk.util import in_idle
  from nltk.probability import FreqDist
  
  
-CORPUS_LOADED_EVENT = '<<CL_EVENT>>'
-ERROR_LOADING_CORPUS_EVENT = '<<ELC_EVENT>>'
+CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
+ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
  POLL_INTERVAL = 100
  
-_DEFAULT = 'English: Brown Corpus (Humor)'
+_DEFAULT = "English: Brown Corpus (Humor)"
  _CORPORA = {
-            'Catalan: CESS-CAT Corpus':
-                lambda: cess_cat.words(),
-            'English: Brown Corpus':
-                lambda: brown.words(),
-            'English: Brown Corpus (Press)':
-                lambda: brown.words(categories=['news', 'editorial', 'reviews']),
-            'English: Brown Corpus (Religion)':
-                lambda: brown.words(categories='religion'),
-            'English: Brown Corpus (Learned)':
-                lambda: brown.words(categories='learned'),
-            'English: Brown Corpus (Science Fiction)':
-                lambda: brown.words(categories='science_fiction'),
-            'English: Brown Corpus (Romance)':
-                lambda: brown.words(categories='romance'),
-            'English: Brown Corpus (Humor)':
-                lambda: brown.words(categories='humor'),
-            'English: NPS Chat Corpus':
-                lambda: nps_chat.words(),
-            'English: Wall Street Journal Corpus':
-                lambda: treebank.words(),
-            'Chinese: Sinica Corpus':
-                lambda: sinica_treebank.words(),
-            'Dutch: Alpino Corpus':
-                lambda: alpino.words(),
-            'Hindi: Indian Languages Corpus':
-                lambda: indian.words(files='hindi.pos'),
-            'Portuguese: Floresta Corpus (Portugal)':
-                lambda: floresta.words(),
-            'Portuguese: MAC-MORPHO Corpus (Brazil)':
-                lambda: mac_morpho.words(),
-            'Portuguese: Machado Corpus (Brazil)':
-                lambda: machado.words(),
-            'Spanish: CESS-ESP Corpus':
-                lambda: cess_esp.words()
-           }
+    "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
+    "English: Brown Corpus": lambda: brown.words(),
+    "English: Brown Corpus (Press)": lambda: brown.words(
+        categories=["news", "editorial", "reviews"]
+    ),
+    "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
+    "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
+    "English: Brown Corpus (Science Fiction)": lambda: brown.words(
+        categories="science_fiction"
+    ),
+    "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
+    "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
+    "English: NPS Chat Corpus": lambda: nps_chat.words(),
+    "English: Wall Street Journal Corpus": lambda: treebank.words(),
+    "Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
+    "Dutch: Alpino Corpus": lambda: alpino.words(),
+    "Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
+    "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
+    "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
+    "Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
+    "Spanish: CESS-ESP Corpus": lambda: cess_esp.words(),
+}
+
  
  class CollocationsView:
-    _BACKGROUND_COLOUR='#FFF' #white
+    _BACKGROUND_COLOUR = "#FFF"  # white
  
      def __init__(self):
          self.queue = q.Queue()
@@ -79,58 +88,100 @@ class CollocationsView:
          self.after = self.top.after(POLL_INTERVAL, self._poll)
  
      def _init_top(self, top):
-        top.geometry('550x650+50+50')
-        top.title('NLTK Collocations List')
-        top.bind('<Control-q>', self.destroy)
-        top.protocol('WM_DELETE_WINDOW', self.destroy)
-        top.minsize(550,650)
+        top.geometry("550x650+50+50")
+        top.title("NLTK Collocations List")
+        top.bind("<Control-q>", self.destroy)
+        top.protocol("WM_DELETE_WINDOW", self.destroy)
+        top.minsize(550, 650)
  
      def _init_widgets(self, parent):
-        self.main_frame = Frame(parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1))
+        self.main_frame = Frame(
+            parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
+        )
          self._init_corpus_select(self.main_frame)
          self._init_results_box(self.main_frame)
          self._init_paging(self.main_frame)
          self._init_status(self.main_frame)
-        self.main_frame.pack(fill='both', expand=True)
+        self.main_frame.pack(fill="both", expand=True)
  
      def _init_corpus_select(self, parent):
          innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
          self.var = StringVar(innerframe)
          self.var.set(self.model.DEFAULT_CORPUS)
-        Label(innerframe, justify=LEFT, text=' Corpus: ', background=self._BACKGROUND_COLOUR, padx = 2, pady = 1, border = 0).pack(side='left')
-
-        other_corpora = list(self.model.CORPORA.keys()).remove(self.model.DEFAULT_CORPUS)
-        om = OptionMenu(innerframe, self.var, self.model.DEFAULT_CORPUS, command=self.corpus_selected, *self.model.non_default_corpora())
-        om['borderwidth'] = 0
-        om['highlightthickness'] = 1
-        om.pack(side='left')
-        innerframe.pack(side='top', fill='x', anchor='n')
+        Label(
+            innerframe,
+            justify=LEFT,
+            text=" Corpus: ",
+            background=self._BACKGROUND_COLOUR,
+            padx=2,
+            pady=1,
+            border=0,
+        ).pack(side="left")
+
+        other_corpora = list(self.model.CORPORA.keys()).remove(
+            self.model.DEFAULT_CORPUS
+        )
+        om = OptionMenu(
+            innerframe,
+            self.var,
+            self.model.DEFAULT_CORPUS,
+            command=self.corpus_selected,
+            *self.model.non_default_corpora()
+        )
+        om["borderwidth"] = 0
+        om["highlightthickness"] = 1
+        om.pack(side="left")
+        innerframe.pack(side="top", fill="x", anchor="n")
  
      def _init_status(self, parent):
-        self.status = Label(parent, justify=LEFT, relief=SUNKEN, background=self._BACKGROUND_COLOUR, border=0, padx = 1, pady = 0)
-        self.status.pack(side='top', anchor='sw')
+        self.status = Label(
+            parent,
+            justify=LEFT,
+            relief=SUNKEN,
+            background=self._BACKGROUND_COLOUR,
+            border=0,
+            padx=1,
+            pady=0,
+        )
+        self.status.pack(side="top", anchor="sw")
  
      def _init_menubar(self):
          self._result_size = IntVar(self.top)
          menubar = Menu(self.top)
  
          filemenu = Menu(menubar, tearoff=0, borderwidth=0)
-        filemenu.add_command(label='Exit', underline=1,
-                   command=self.destroy, accelerator='Ctrl-q')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          editmenu = Menu(menubar, tearoff=0)
          rescntmenu = Menu(editmenu, tearoff=0)
-        rescntmenu.add_radiobutton(label='20', variable=self._result_size,
-                     underline=0, value=20, command=self.set_result_size)
-        rescntmenu.add_radiobutton(label='50', variable=self._result_size,
-                     underline=0, value=50, command=self.set_result_size)
-        rescntmenu.add_radiobutton(label='100', variable=self._result_size,
-                     underline=0, value=100, command=self.set_result_size)
+        rescntmenu.add_radiobutton(
+            label="20",
+            variable=self._result_size,
+            underline=0,
+            value=20,
+            command=self.set_result_size,
+        )
+        rescntmenu.add_radiobutton(
+            label="50",
+            variable=self._result_size,
+            underline=0,
+            value=50,
+            command=self.set_result_size,
+        )
+        rescntmenu.add_radiobutton(
+            label="100",
+            variable=self._result_size,
+            underline=0,
+            value=100,
+            command=self.set_result_size,
+        )
          rescntmenu.invoke(1)
-        editmenu.add_cascade(label='Result Count', underline=0, menu=rescntmenu)
+        editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
  
-        menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
+        menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
          self.top.config(menu=menubar)
  
      def set_result_size(self, **kwargs):
@@ -141,30 +192,55 @@ class CollocationsView:
          i1 = Frame(innerframe)
          i2 = Frame(innerframe)
          vscrollbar = Scrollbar(i1, borderwidth=1)
-        hscrollbar = Scrollbar(i2, borderwidth=1, orient='horiz')
-        self.results_box = Text(i1,
-                    font=Font(family='courier', size='16'),
-                    state='disabled', borderwidth=1,
-                    yscrollcommand=vscrollbar.set,
-                    xscrollcommand=hscrollbar.set, wrap='none', width='40', height = '20', exportselection=1)
-        self.results_box.pack(side='left', fill='both', expand=True)
-        vscrollbar.pack(side='left', fill='y', anchor='e')
+        hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
+        self.results_box = Text(
+            i1,
+            font=Font(family="courier", size="16"),
+            state="disabled",
+            borderwidth=1,
+            yscrollcommand=vscrollbar.set,
+            xscrollcommand=hscrollbar.set,
+            wrap="none",
+            width="40",
+            height="20",
+            exportselection=1,
+        )
+        self.results_box.pack(side="left", fill="both", expand=True)
+        vscrollbar.pack(side="left", fill="y", anchor="e")
          vscrollbar.config(command=self.results_box.yview)
-        hscrollbar.pack(side='left', fill='x', expand=True, anchor='w')
+        hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
          hscrollbar.config(command=self.results_box.xview)
-        #there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
-        Label(i2, text='   ', background=self._BACKGROUND_COLOUR).pack(side='left', anchor='e')
-        i1.pack(side='top', fill='both', expand=True, anchor='n')
-        i2.pack(side='bottom', fill='x', anchor='s')
-        innerframe.pack(side='top', fill='both', expand=True)
+        # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
+        Label(i2, text="   ", background=self._BACKGROUND_COLOUR).pack(
+            side="left", anchor="e"
+        )
+        i1.pack(side="top", fill="both", expand=True, anchor="n")
+        i2.pack(side="bottom", fill="x", anchor="s")
+        innerframe.pack(side="top", fill="both", expand=True)
  
      def _init_paging(self, parent):
          innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
-        self.prev = prev = Button(innerframe, text='Previous', command=self.previous, width='10', borderwidth=1, highlightthickness=1, state='disabled')
-        prev.pack(side='left', anchor='center')
-        self.next = next = Button(innerframe, text='Next', command=self.__next__, width='10', borderwidth=1, highlightthickness=1, state='disabled')
-        next.pack(side='right', anchor='center')
-        innerframe.pack(side='top', fill='y')
+        self.prev = prev = Button(
+            innerframe,
+            text="Previous",
+            command=self.previous,
+            width="10",
+            borderwidth=1,
+            highlightthickness=1,
+            state="disabled",
+        )
+        prev.pack(side="left", anchor="center")
+        self.next = next = Button(
+            innerframe,
+            text="Next",
+            command=self.__next__,
+            width="10",
+            borderwidth=1,
+            highlightthickness=1,
+            state="disabled",
+        )
+        next.pack(side="right", anchor="center")
+        innerframe.pack(side="top", fill="y")
          self.reset_current_page()
  
      def reset_current_page(self):
@@ -183,18 +259,18 @@ class CollocationsView:
          self.after = self.top.after(POLL_INTERVAL, self._poll)
  
      def handle_error_loading_corpus(self, event):
-        self.status['text'] = 'Error in loading ' + self.var.get()
+        self.status["text"] = "Error in loading " + self.var.get()
          self.unfreeze_editable()
          self.clear_results_box()
          self.freeze_editable()
          self.reset_current_page()
  
      def handle_corpus_loaded(self, event):
-        self.status['text'] = self.var.get() + ' is loaded'
+        self.status["text"] = self.var.get() + " is loaded"
          self.unfreeze_editable()
          self.clear_results_box()
          self.reset_current_page()
-        #self.next()
+        # self.next()
          collocations = self.model.next(self.current_page + 1)
          self.write_results(collocations)
          self.current_page += 1
@@ -206,7 +282,7 @@ class CollocationsView:
      def previous(self):
          self.freeze_editable()
          collocations = self.model.prev(self.current_page - 1)
-        self.current_page= self.current_page - 1
+        self.current_page = self.current_page - 1
          self.clear_results_box()
          self.write_results(collocations)
          self.unfreeze_editable()
@@ -221,31 +297,33 @@ class CollocationsView:
  
      def load_corpus(self, selection):
          if self.model.selected_corpus != selection:
-            self.status['text'] = 'Loading ' + selection + '...'
+            self.status["text"] = "Loading " + selection + "..."
              self.freeze_editable()
              self.model.load_corpus(selection)
  
      def freeze_editable(self):
-        self.prev['state'] = 'disabled'
-        self.next['state'] = 'disabled'
+        self.prev["state"] = "disabled"
+        self.next["state"] = "disabled"
  
      def clear_results_box(self):
-        self.results_box['state'] = 'normal'
+        self.results_box["state"] = "normal"
          self.results_box.delete("1.0", END)
-        self.results_box['state'] = 'disabled'
+        self.results_box["state"] = "disabled"
  
      def fire_event(self, event):
-        #Firing an event so that rendering of widgets happen in the mainloop thread
-        self.top.event_generate(event, when='tail')
+        # Firing an event so that rendering of widgets happen in the mainloop thread
+        self.top.event_generate(event, when="tail")
  
      def destroy(self, *e):
-        if self.top is None: return
+        if self.top is None:
+            return
          self.top.after_cancel(self.after)
          self.top.destroy()
          self.top = None
  
      def mainloop(self, *args, **kwargs):
-        if in_idle(): return
+        if in_idle():
+            return
          self.top.mainloop(*args, **kwargs)
  
      def unfreeze_editable(self):
@@ -253,21 +331,22 @@ class CollocationsView:
  
      def set_paging_button_states(self):
          if self.current_page == -1 or self.current_page == 0:
-            self.prev['state'] = 'disabled'
+            self.prev["state"] = "disabled"
          else:
-            self.prev['state'] = 'normal'
+            self.prev["state"] = "normal"
          if self.model.is_last_page(self.current_page):
-            self.next['state'] = 'disabled'
+            self.next["state"] = "disabled"
          else:
-            self.next['state'] = 'normal'
+            self.next["state"] = "normal"
  
      def write_results(self, results):
-        self.results_box['state'] = 'normal'
+        self.results_box["state"] = "normal"
          row = 1
          for each in results:
-            self.results_box.insert(str(row) + '.0', each[0] + " " + each[1] + "\n")
+            self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n")
              row += 1
-        self.results_box['state'] = 'disabled'
+        self.results_box["state"] = "disabled"
+
  
  class CollocationsModel:
      def __init__(self, queue):
@@ -300,12 +379,19 @@ class CollocationsModel:
      def is_last_page(self, number):
          if number < len(self.result_pages):
              return False
-        return self.results_returned + (number - len(self.result_pages)) * self.result_count >= len(self.collocations)
+        return self.results_returned + (
+            number - len(self.result_pages)
+        ) * self.result_count >= len(self.collocations)
  
      def next(self, page):
          if (len(self.result_pages) - 1) < page:
              for i in range(page - (len(self.result_pages) - 1)):
-                self.result_pages.append(self.collocations[self.results_returned:self.results_returned+self.result_count])
+                self.result_pages.append(
+                    self.collocations[
+                        self.results_returned : self.results_returned
+                        + self.result_count
+                    ]
+                )
                  self.results_returned += self.result_count
          return self.result_pages[page]
  
@@ -323,10 +409,14 @@ class CollocationsModel:
              try:
                  words = self.model.CORPORA[self.name]()
                  from operator import itemgetter
+
                  text = [w for w in words if len(w) > 2]
-                fd = FreqDist(tuple(text[i:i+2]) for i in range(len(text)-1))
+                fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1))
                  vocab = FreqDist(text)
-                scored = [((w1,w2), fd[(w1,w2)] ** 3 / (vocab[w1] * vocab[w2])) for w1, w2 in fd]
+                scored = [
+                    ((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2]))
+                    for w1, w2 in fd
+                ]
                  scored.sort(key=itemgetter(1), reverse=True)
                  self.model.collocations = list(map(itemgetter(0), scored))
                  self.model.queue.put(CORPUS_LOADED_EVENT)
@@ -334,14 +424,17 @@ class CollocationsModel:
                  print(e)
                  self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
  
-#def collocations():
+
+# def collocations():
  #    colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]]
  
+
  def app():
      c = CollocationsView()
      c.mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/collocations_app.pyc b/nlp_resource_data/nltk/app/collocations_app.pyc

deleted file mode 100755 (executable)

index b21f97c..0000000

Binary files a/nlp_resource_data/nltk/app/collocations_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/concordance_app.py b/nlp_resource_data/nltk/app/concordance_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 53c7167..afdef61
--- a/nlp_resource_data/nltk/app/concordance_app.py
+++ b/nlp_resource_data/nltk/app/concordance_app.py
@@ -1,104 +1,133 @@
  # Natural Language Toolkit: Concordance Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-
-import nltk.compat
  import re
  import threading
  
-from six.moves import queue as q
-from six.moves.tkinter_font import Font
-from six.moves.tkinter import (Tk, Button, END, Entry, Frame, IntVar, LEFT,
-                               Label, Menu, OptionMenu, SUNKEN, Scrollbar,
-                               StringVar, Text)
-
-from nltk.corpus import (cess_cat, brown, nps_chat, treebank, sinica_treebank,
-                         alpino, indian, floresta, mac_morpho, cess_esp)
+import queue as q
+from tkinter.font import Font
+from tkinter import (
+    Tk,
+    Button,
+    END,
+    Entry,
+    Frame,
+    IntVar,
+    LEFT,
+    Label,
+    Menu,
+    OptionMenu,
+    SUNKEN,
+    Scrollbar,
+    StringVar,
+    Text,
+)
+
+from nltk.corpus import (
+    cess_cat,
+    brown,
+    nps_chat,
+    treebank,
+    sinica_treebank,
+    alpino,
+    indian,
+    floresta,
+    mac_morpho,
+    cess_esp,
+)
  from nltk.util import in_idle
  from nltk.draw.util import ShowText
  
-WORD_OR_TAG = '[^/ ]+'
-BOUNDARY = r'\b'
+WORD_OR_TAG = "[^/ ]+"
+BOUNDARY = r"\b"
  
-CORPUS_LOADED_EVENT = '<<CL_EVENT>>'
-SEARCH_TERMINATED_EVENT = '<<ST_EVENT>>'
-SEARCH_ERROR_EVENT = '<<SE_EVENT>>'
-ERROR_LOADING_CORPUS_EVENT = '<<ELC_EVENT>>'
+CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
+SEARCH_TERMINATED_EVENT = "<<ST_EVENT>>"
+SEARCH_ERROR_EVENT = "<<SE_EVENT>>"
+ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
  
  POLL_INTERVAL = 50
  
  # NB All corpora must be specified in a lambda expression so as not to be
  # loaded when the module is imported.
  
-_DEFAULT = 'English: Brown Corpus (Humor, simplified)'
+_DEFAULT = "English: Brown Corpus (Humor, simplified)"
  _CORPORA = {
-            'Catalan: CESS-CAT Corpus (simplified)':
-                lambda: cess_cat.tagged_sents(tagset='universal'),
-            'English: Brown Corpus':
-                lambda: brown.tagged_sents(),
-            'English: Brown Corpus (simplified)':
-                lambda: brown.tagged_sents(tagset='universal'),
-            'English: Brown Corpus (Press, simplified)':
-                lambda: brown.tagged_sents(categories=['news', 'editorial', 'reviews'], tagset='universal'),
-            'English: Brown Corpus (Religion, simplified)':
-                lambda: brown.tagged_sents(categories='religion', tagset='universal'),
-            'English: Brown Corpus (Learned, simplified)':
-                lambda: brown.tagged_sents(categories='learned', tagset='universal'),
-            'English: Brown Corpus (Science Fiction, simplified)':
-                lambda: brown.tagged_sents(categories='science_fiction', tagset='universal'),
-            'English: Brown Corpus (Romance, simplified)':
-                lambda: brown.tagged_sents(categories='romance', tagset='universal'),
-            'English: Brown Corpus (Humor, simplified)':
-                lambda: brown.tagged_sents(categories='humor', tagset='universal'),
-            'English: NPS Chat Corpus':
-                lambda: nps_chat.tagged_posts(),
-            'English: NPS Chat Corpus (simplified)':
-                lambda: nps_chat.tagged_posts(tagset='universal'),
-            'English: Wall Street Journal Corpus':
-                lambda: treebank.tagged_sents(),
-            'English: Wall Street Journal Corpus (simplified)':
-                lambda: treebank.tagged_sents(tagset='universal'),
-            'Chinese: Sinica Corpus':
-                lambda: sinica_treebank.tagged_sents(),
-            'Chinese: Sinica Corpus (simplified)':
-                lambda: sinica_treebank.tagged_sents(tagset='universal'),
-            'Dutch: Alpino Corpus':
-                lambda: alpino.tagged_sents(),
-            'Dutch: Alpino Corpus (simplified)':
-                lambda: alpino.tagged_sents(tagset='universal'),
-            'Hindi: Indian Languages Corpus':
-                lambda: indian.tagged_sents(files='hindi.pos'),
-            'Hindi: Indian Languages Corpus (simplified)':
-                lambda: indian.tagged_sents(files='hindi.pos', tagset='universal'),
-            'Portuguese: Floresta Corpus (Portugal)':
-                lambda: floresta.tagged_sents(),
-            'Portuguese: Floresta Corpus (Portugal, simplified)':
-                lambda: floresta.tagged_sents(tagset='universal'),
-            'Portuguese: MAC-MORPHO Corpus (Brazil)':
-                lambda: mac_morpho.tagged_sents(),
-            'Portuguese: MAC-MORPHO Corpus (Brazil, simplified)':
-                lambda: mac_morpho.tagged_sents(tagset='universal'),
-            'Spanish: CESS-ESP Corpus (simplified)':
-                lambda: cess_esp.tagged_sents(tagset='universal'),
-           }
+    "Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents(
+        tagset="universal"
+    ),
+    "English: Brown Corpus": lambda: brown.tagged_sents(),
+    "English: Brown Corpus (simplified)": lambda: brown.tagged_sents(
+        tagset="universal"
+    ),
+    "English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents(
+        categories=["news", "editorial", "reviews"], tagset="universal"
+    ),
+    "English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents(
+        categories="religion", tagset="universal"
+    ),
+    "English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents(
+        categories="learned", tagset="universal"
+    ),
+    "English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents(
+        categories="science_fiction", tagset="universal"
+    ),
+    "English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents(
+        categories="romance", tagset="universal"
+    ),
+    "English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents(
+        categories="humor", tagset="universal"
+    ),
+    "English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(),
+    "English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts(
+        tagset="universal"
+    ),
+    "English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(),
+    "English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents(
+        tagset="universal"
+    ),
+    "Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(),
+    "Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents(
+        tagset="universal"
+    ),
+    "Dutch: Alpino Corpus": lambda: alpino.tagged_sents(),
+    "Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents(
+        tagset="universal"
+    ),
+    "Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"),
+    "Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents(
+        files="hindi.pos", tagset="universal"
+    ),
+    "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(),
+    "Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents(
+        tagset="universal"
+    ),
+    "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(),
+    "Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents(
+        tagset="universal"
+    ),
+    "Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents(
+        tagset="universal"
+    ),
+}
  
-class ConcordanceSearchView(object):
-    _BACKGROUND_COLOUR='#FFF' #white
  
-    #Colour of highlighted results
-    _HIGHLIGHT_WORD_COLOUR='#F00' #red
-    _HIGHLIGHT_WORD_TAG='HL_WRD_TAG'
+class ConcordanceSearchView(object):
+    _BACKGROUND_COLOUR = "#FFF"  # white
  
-    _HIGHLIGHT_LABEL_COLOUR='#C0C0C0' # dark grey
-    _HIGHLIGHT_LABEL_TAG='HL_LBL_TAG'
+    # Colour of highlighted results
+    _HIGHLIGHT_WORD_COLOUR = "#F00"  # red
+    _HIGHLIGHT_WORD_TAG = "HL_WRD_TAG"
  
+    _HIGHLIGHT_LABEL_COLOUR = "#C0C0C0"  # dark grey
+    _HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG"
  
-    #Percentage of text left of the scrollbar position
-    _FRACTION_LEFT_TEXT=0.30
+    # Percentage of text left of the scrollbar position
+    _FRACTION_LEFT_TEXT = 0.30
  
      def __init__(self):
          self.queue = q.Queue()
@@ -111,20 +140,22 @@ class ConcordanceSearchView(object):
          self.after = self.top.after(POLL_INTERVAL, self._poll)
  
      def _init_top(self, top):
-        top.geometry('950x680+50+50')
-        top.title('NLTK Concordance Search')
-        top.bind('<Control-q>', self.destroy)
-        top.protocol('WM_DELETE_WINDOW', self.destroy)
-        top.minsize(950,680)
+        top.geometry("950x680+50+50")
+        top.title("NLTK Concordance Search")
+        top.bind("<Control-q>", self.destroy)
+        top.protocol("WM_DELETE_WINDOW", self.destroy)
+        top.minsize(950, 680)
  
      def _init_widgets(self, parent):
-        self.main_frame = Frame(parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1))
+        self.main_frame = Frame(
+            parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
+        )
          self._init_corpus_select(self.main_frame)
          self._init_query_box(self.main_frame)
          self._init_results_box(self.main_frame)
          self._init_paging(self.main_frame)
          self._init_status(self.main_frame)
-        self.main_frame.pack(fill='both', expand=True)
+        self.main_frame.pack(fill="both", expand=True)
  
      def _init_menubar(self):
          self._result_size = IntVar(self.top)
@@ -133,60 +164,91 @@ class ConcordanceSearchView(object):
          menubar = Menu(self.top)
  
          filemenu = Menu(menubar, tearoff=0, borderwidth=0)
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-q')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          editmenu = Menu(menubar, tearoff=0)
          rescntmenu = Menu(editmenu, tearoff=0)
-        rescntmenu.add_radiobutton(label='20', variable=self._result_size,
-                                   underline=0, value=20,
-                                   command=self.set_result_size)
-        rescntmenu.add_radiobutton(label='50', variable=self._result_size,
-                                   underline=0, value=50,
-                                   command=self.set_result_size)
-        rescntmenu.add_radiobutton(label='100', variable=self._result_size,
-                                   underline=0, value=100,
-                                   command=self.set_result_size)
+        rescntmenu.add_radiobutton(
+            label="20",
+            variable=self._result_size,
+            underline=0,
+            value=20,
+            command=self.set_result_size,
+        )
+        rescntmenu.add_radiobutton(
+            label="50",
+            variable=self._result_size,
+            underline=0,
+            value=50,
+            command=self.set_result_size,
+        )
+        rescntmenu.add_radiobutton(
+            label="100",
+            variable=self._result_size,
+            underline=0,
+            value=100,
+            command=self.set_result_size,
+        )
          rescntmenu.invoke(1)
-        editmenu.add_cascade(label='Result Count', underline=0, menu=rescntmenu)
+        editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
  
          cntxmenu = Menu(editmenu, tearoff=0)
          cntxbfmenu = Menu(cntxmenu, tearoff=0)
-        cntxbfmenu.add_radiobutton(label='60 characters',
-                                   variable=self._cntx_bf_len,
-                                   underline=0, value=60,
-                                   command=self.set_cntx_bf_len)
-        cntxbfmenu.add_radiobutton(label='80 characters',
-                                   variable=self._cntx_bf_len,
-                                   underline=0, value=80,
-                                   command=self.set_cntx_bf_len)
-        cntxbfmenu.add_radiobutton(label='100 characters',
-                                   variable=self._cntx_bf_len,
-                                   underline=0, value=100,
-                                   command=self.set_cntx_bf_len)
+        cntxbfmenu.add_radiobutton(
+            label="60 characters",
+            variable=self._cntx_bf_len,
+            underline=0,
+            value=60,
+            command=self.set_cntx_bf_len,
+        )
+        cntxbfmenu.add_radiobutton(
+            label="80 characters",
+            variable=self._cntx_bf_len,
+            underline=0,
+            value=80,
+            command=self.set_cntx_bf_len,
+        )
+        cntxbfmenu.add_radiobutton(
+            label="100 characters",
+            variable=self._cntx_bf_len,
+            underline=0,
+            value=100,
+            command=self.set_cntx_bf_len,
+        )
          cntxbfmenu.invoke(1)
-        cntxmenu.add_cascade(label='Before', underline=0, menu=cntxbfmenu)
+        cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu)
  
          cntxafmenu = Menu(cntxmenu, tearoff=0)
-        cntxafmenu.add_radiobutton(label='70 characters',
-                                   variable=self._cntx_af_len,
-                                   underline=0, value=70,
-                                   command=self.set_cntx_af_len)
-        cntxafmenu.add_radiobutton(label='90 characters',
-                                   variable=self._cntx_af_len,
-                                   underline=0, value=90,
-                                   command=self.set_cntx_af_len)
-        cntxafmenu.add_radiobutton(label='110 characters',
-                                   variable=self._cntx_af_len,
-                                   underline=0, value=110,
-                                   command=self.set_cntx_af_len)
+        cntxafmenu.add_radiobutton(
+            label="70 characters",
+            variable=self._cntx_af_len,
+            underline=0,
+            value=70,
+            command=self.set_cntx_af_len,
+        )
+        cntxafmenu.add_radiobutton(
+            label="90 characters",
+            variable=self._cntx_af_len,
+            underline=0,
+            value=90,
+            command=self.set_cntx_af_len,
+        )
+        cntxafmenu.add_radiobutton(
+            label="110 characters",
+            variable=self._cntx_af_len,
+            underline=0,
+            value=110,
+            command=self.set_cntx_af_len,
+        )
          cntxafmenu.invoke(1)
-        cntxmenu.add_cascade(label='After', underline=0, menu=cntxafmenu)
+        cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu)
  
-        editmenu.add_cascade(label='Context', underline=0, menu=cntxmenu)
+        editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu)
  
-        menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
+        menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
  
          self.top.config(menu=menubar)
  
@@ -203,30 +265,59 @@ class ConcordanceSearchView(object):
          innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
          self.var = StringVar(innerframe)
          self.var.set(self.model.DEFAULT_CORPUS)
-        Label(innerframe, justify=LEFT, text=' Corpus: ',
-              background=self._BACKGROUND_COLOUR, padx = 2, pady = 1, border = 0).pack(side='left')
-
-        other_corpora = list(self.model.CORPORA.keys()).remove(self.model.DEFAULT_CORPUS)
-        om = OptionMenu(innerframe, self.var, self.model.DEFAULT_CORPUS, command=self.corpus_selected, *self.model.non_default_corpora())
-        om['borderwidth'] = 0
-        om['highlightthickness'] = 1
-        om.pack(side='left')
-        innerframe.pack(side='top', fill='x', anchor='n')
+        Label(
+            innerframe,
+            justify=LEFT,
+            text=" Corpus: ",
+            background=self._BACKGROUND_COLOUR,
+            padx=2,
+            pady=1,
+            border=0,
+        ).pack(side="left")
+
+        other_corpora = list(self.model.CORPORA.keys()).remove(
+            self.model.DEFAULT_CORPUS
+        )
+        om = OptionMenu(
+            innerframe,
+            self.var,
+            self.model.DEFAULT_CORPUS,
+            command=self.corpus_selected,
+            *self.model.non_default_corpora()
+        )
+        om["borderwidth"] = 0
+        om["highlightthickness"] = 1
+        om.pack(side="left")
+        innerframe.pack(side="top", fill="x", anchor="n")
  
      def _init_status(self, parent):
-        self.status = Label(parent, justify=LEFT, relief=SUNKEN, background=self._BACKGROUND_COLOUR, border=0, padx = 1, pady = 0)
-        self.status.pack(side='top', anchor='sw')
+        self.status = Label(
+            parent,
+            justify=LEFT,
+            relief=SUNKEN,
+            background=self._BACKGROUND_COLOUR,
+            border=0,
+            padx=1,
+            pady=0,
+        )
+        self.status.pack(side="top", anchor="sw")
  
      def _init_query_box(self, parent):
          innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
          another = Frame(innerframe, background=self._BACKGROUND_COLOUR)
          self.query_box = Entry(another, width=60)
-        self.query_box.pack(side='left', fill='x', pady=25, anchor='center')
-        self.search_button = Button(another, text='Search', command=self.search, borderwidth=1, highlightthickness=1)
-        self.search_button.pack(side='left', fill='x', pady=25, anchor='center')
-        self.query_box.bind('<KeyPress-Return>', self.search_enter_keypress_handler)
+        self.query_box.pack(side="left", fill="x", pady=25, anchor="center")
+        self.search_button = Button(
+            another,
+            text="Search",
+            command=self.search,
+            borderwidth=1,
+            highlightthickness=1,
+        )
+        self.search_button.pack(side="left", fill="x", pady=25, anchor="center")
+        self.query_box.bind("<KeyPress-Return>", self.search_enter_keypress_handler)
          another.pack()
-        innerframe.pack(side='top', fill='x', anchor='n')
+        innerframe.pack(side="top", fill="x", anchor="n")
  
      def search_enter_keypress_handler(self, *event):
          self.search()
@@ -236,32 +327,61 @@ class ConcordanceSearchView(object):
          i1 = Frame(innerframe)
          i2 = Frame(innerframe)
          vscrollbar = Scrollbar(i1, borderwidth=1)
-        hscrollbar = Scrollbar(i2, borderwidth=1, orient='horiz')
-        self.results_box = Text(i1,
-                                font=Font(family='courier', size='16'),
-                                state='disabled', borderwidth=1,
-                                                            yscrollcommand=vscrollbar.set,
-                                xscrollcommand=hscrollbar.set, wrap='none', width='40', height = '20', exportselection=1)
-        self.results_box.pack(side='left', fill='both', expand=True)
-        self.results_box.tag_config(self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR)
-        self.results_box.tag_config(self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR)
-        vscrollbar.pack(side='left', fill='y', anchor='e')
+        hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
+        self.results_box = Text(
+            i1,
+            font=Font(family="courier", size="16"),
+            state="disabled",
+            borderwidth=1,
+            yscrollcommand=vscrollbar.set,
+            xscrollcommand=hscrollbar.set,
+            wrap="none",
+            width="40",
+            height="20",
+            exportselection=1,
+        )
+        self.results_box.pack(side="left", fill="both", expand=True)
+        self.results_box.tag_config(
+            self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR
+        )
+        self.results_box.tag_config(
+            self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR
+        )
+        vscrollbar.pack(side="left", fill="y", anchor="e")
          vscrollbar.config(command=self.results_box.yview)
-        hscrollbar.pack(side='left', fill='x', expand=True, anchor='w')
+        hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
          hscrollbar.config(command=self.results_box.xview)
-        #there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
-        Label(i2, text='   ', background=self._BACKGROUND_COLOUR).pack(side='left', anchor='e')
-        i1.pack(side='top', fill='both', expand=True, anchor='n')
-        i2.pack(side='bottom', fill='x', anchor='s')
-        innerframe.pack(side='top', fill='both', expand=True)
+        # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
+        Label(i2, text="   ", background=self._BACKGROUND_COLOUR).pack(
+            side="left", anchor="e"
+        )
+        i1.pack(side="top", fill="both", expand=True, anchor="n")
+        i2.pack(side="bottom", fill="x", anchor="s")
+        innerframe.pack(side="top", fill="both", expand=True)
  
      def _init_paging(self, parent):
          innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
-        self.prev = prev = Button(innerframe, text='Previous', command=self.previous, width='10', borderwidth=1, highlightthickness=1, state='disabled')
-        prev.pack(side='left', anchor='center')
-        self.next = next = Button(innerframe, text='Next', command=self.__next__, width='10', borderwidth=1, highlightthickness=1, state='disabled')
-        next.pack(side='right', anchor='center')
-        innerframe.pack(side='top', fill='y')
+        self.prev = prev = Button(
+            innerframe,
+            text="Previous",
+            command=self.previous,
+            width="10",
+            borderwidth=1,
+            highlightthickness=1,
+            state="disabled",
+        )
+        prev.pack(side="left", anchor="center")
+        self.next = next = Button(
+            innerframe,
+            text="Next",
+            command=self.__next__,
+            width="10",
+            borderwidth=1,
+            highlightthickness=1,
+            state="disabled",
+        )
+        next.pack(side="right", anchor="center")
+        innerframe.pack(side="top", fill="y")
          self.current_page = 0
  
      def previous(self):
@@ -275,10 +395,11 @@ class ConcordanceSearchView(object):
          self.model.next(self.current_page + 1)
  
      def about(self, *e):
-        ABOUT = ("NLTK Concordance Search Demo\n")
-        TITLE = 'About: NLTK Concordance Search Demo'
+        ABOUT = "NLTK Concordance Search Demo\n"
+        TITLE = "About: NLTK Concordance Search Demo"
          try:
-            from six.moves.tkinter_messagebox import Message
+            from tkinter.messagebox import Message
+
              Message(message=ABOUT, title=TITLE, parent=self.main_frame).show()
          except:
              ShowText(self.top, TITLE, ABOUT)
@@ -306,31 +427,31 @@ class ConcordanceSearchView(object):
          self.after = self.top.after(POLL_INTERVAL, self._poll)
  
      def handle_error_loading_corpus(self, event):
-        self.status['text'] = 'Error in loading ' + self.var.get()
+        self.status["text"] = "Error in loading " + self.var.get()
          self.unfreeze_editable()
          self.clear_all()
          self.freeze_editable()
  
      def handle_corpus_loaded(self, event):
-        self.status['text'] = self.var.get() + ' is loaded'
+        self.status["text"] = self.var.get() + " is loaded"
          self.unfreeze_editable()
          self.clear_all()
          self.query_box.focus_set()
  
      def handle_search_terminated(self, event):
-        #todo: refactor the model such that it is less state sensitive
+        # todo: refactor the model such that it is less state sensitive
          results = self.model.get_results()
          self.write_results(results)
-        self.status['text'] = ''
+        self.status["text"] = ""
          if len(results) == 0:
-            self.status['text'] = 'No results found for ' + self.model.query
+            self.status["text"] = "No results found for " + self.model.query
          else:
-                self.current_page = self.model.last_requested_page
+            self.current_page = self.model.last_requested_page
          self.unfreeze_editable()
          self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT)
  
      def handle_search_error(self, event):
-        self.status['text'] = 'Error in query ' + self.model.query
+        self.status["text"] = "Error in query " + self.model.query
          self.unfreeze_editable()
  
      def corpus_selected(self, *args):
@@ -339,7 +460,7 @@ class ConcordanceSearchView(object):
  
      def load_corpus(self, selection):
          if self.model.selected_corpus != selection:
-            self.status['text'] = 'Loading ' + selection + '...'
+            self.status["text"] = "Loading " + selection + "..."
              self.freeze_editable()
              self.model.load_corpus(selection)
  
@@ -348,43 +469,57 @@ class ConcordanceSearchView(object):
          self.clear_results_box()
          self.model.reset_results()
          query = self.query_box.get()
-        if (len(query.strip()) == 0): return
-        self.status['text']  = 'Searching for ' + query
+        if len(query.strip()) == 0:
+            return
+        self.status["text"] = "Searching for " + query
          self.freeze_editable()
-        self.model.search(query, self.current_page + 1, )
-
+        self.model.search(query, self.current_page + 1)
  
      def write_results(self, results):
-        self.results_box['state'] = 'normal'
+        self.results_box["state"] = "normal"
          row = 1
          for each in results:
              sent, pos1, pos2 = each[0].strip(), each[1], each[2]
              if len(sent) != 0:
-                if (pos1 < self._char_before):
+                if pos1 < self._char_before:
                      sent, pos1, pos2 = self.pad(sent, pos1, pos2)
-                sentence = sent[pos1-self._char_before:pos1+self._char_after]
+                sentence = sent[pos1 - self._char_before : pos1 + self._char_after]
                  if not row == len(results):
-                    sentence += '\n'
-                self.results_box.insert(str(row) + '.0', sentence)
+                    sentence += "\n"
+                self.results_box.insert(str(row) + ".0", sentence)
                  word_markers, label_markers = self.words_and_labels(sent, pos1, pos2)
-                for marker in word_markers: self.results_box.tag_add(self._HIGHLIGHT_WORD_TAG, str(row) + '.' + str(marker[0]), str(row) + '.' + str(marker[1]))
-                for marker in label_markers: self.results_box.tag_add(self._HIGHLIGHT_LABEL_TAG, str(row) + '.' + str(marker[0]), str(row) + '.' + str(marker[1]))
+                for marker in word_markers:
+                    self.results_box.tag_add(
+                        self._HIGHLIGHT_WORD_TAG,
+                        str(row) + "." + str(marker[0]),
+                        str(row) + "." + str(marker[1]),
+                    )
+                for marker in label_markers:
+                    self.results_box.tag_add(
+                        self._HIGHLIGHT_LABEL_TAG,
+                        str(row) + "." + str(marker[0]),
+                        str(row) + "." + str(marker[1]),
+                    )
                  row += 1
-        self.results_box['state'] = 'disabled'
+        self.results_box["state"] = "disabled"
  
      def words_and_labels(self, sentence, pos1, pos2):
          search_exp = sentence[pos1:pos2]
          words, labels = [], []
-        labeled_words = search_exp.split(' ')
+        labeled_words = search_exp.split(" ")
          index = 0
          for each in labeled_words:
-            if each == '':
+            if each == "":
                  index += 1
              else:
-                word, label = each.split('/')
-                words.append((self._char_before + index, self._char_before + index + len(word)))
+                word, label = each.split("/")
+                words.append(
+                    (self._char_before + index, self._char_before + index + len(word))
+                )
                  index += len(word) + 1
-                labels.append((self._char_before + index, self._char_before + index + len(label)))
+                labels.append(
+                    (self._char_before + index, self._char_before + index + len(label))
+                )
                  index += len(label)
              index += 1
          return words, labels
@@ -393,11 +528,12 @@ class ConcordanceSearchView(object):
          if hstart >= self._char_before:
              return sent, hstart, hend
          d = self._char_before - hstart
-        sent = ''.join([' '] * d) + sent
+        sent = "".join([" "] * d) + sent
          return sent, hstart + d, hend + d
  
      def destroy(self, *e):
-        if self.top is None: return
+        if self.top is None:
+            return
          self.top.after_cancel(self.after)
          self.top.destroy()
          self.top = None
@@ -408,39 +544,41 @@ class ConcordanceSearchView(object):
          self.clear_results_box()
  
      def clear_results_box(self):
-        self.results_box['state'] = 'normal'
+        self.results_box["state"] = "normal"
          self.results_box.delete("1.0", END)
-        self.results_box['state'] = 'disabled'
+        self.results_box["state"] = "disabled"
  
      def freeze_editable(self):
-        self.query_box['state'] = 'disabled'
-        self.search_button['state'] = 'disabled'
-        self.prev['state'] = 'disabled'
-        self.next['state'] = 'disabled'
+        self.query_box["state"] = "disabled"
+        self.search_button["state"] = "disabled"
+        self.prev["state"] = "disabled"
+        self.next["state"] = "disabled"
  
      def unfreeze_editable(self):
-        self.query_box['state'] = 'normal'
-        self.search_button['state'] = 'normal'
+        self.query_box["state"] = "normal"
+        self.search_button["state"] = "normal"
          self.set_paging_button_states()
  
      def set_paging_button_states(self):
          if self.current_page == 0 or self.current_page == 1:
-            self.prev['state'] = 'disabled'
+            self.prev["state"] = "disabled"
          else:
-            self.prev['state'] = 'normal'
+            self.prev["state"] = "normal"
          if self.model.has_more_pages(self.current_page):
-            self.next['state'] = 'normal'
+            self.next["state"] = "normal"
          else:
-            self.next['state'] = 'disabled'
+            self.next["state"] = "disabled"
  
      def fire_event(self, event):
-        #Firing an event so that rendering of widgets happen in the mainloop thread
-        self.top.event_generate(event, when='tail')
+        # Firing an event so that rendering of widgets happen in the mainloop thread
+        self.top.event_generate(event, when="tail")
  
      def mainloop(self, *args, **kwargs):
-        if in_idle(): return
+        if in_idle():
+            return
          self.top.mainloop(*args, **kwargs)
  
+
  class ConcordanceSearchModel(object):
      def __init__(self, queue):
          self.queue = queue
@@ -510,7 +648,9 @@ class ConcordanceSearchModel(object):
          def run(self):
              try:
                  ts = self.model.CORPORA[self.name]()
-                self.model.tagged_sents = [' '.join(w+'/'+t for (w,t) in sent) for sent in ts]
+                self.model.tagged_sents = [
+                    " ".join(w + "/" + t for (w, t) in sent) for sent in ts
+                ]
                  self.model.queue.put(CORPUS_LOADED_EVENT)
              except Exception as e:
                  print(e)
@@ -524,7 +664,7 @@ class ConcordanceSearchModel(object):
          def run(self):
              q = self.processed_query()
              sent_pos, i, sent_count = [], 0, 0
-            for sent in self.model.tagged_sents[self.model.last_sent_searched:]:
+            for sent in self.model.tagged_sents[self.model.last_sent_searched :]:
                  try:
                      m = re.search(q, sent)
                  except re.error:
@@ -538,7 +678,7 @@ class ConcordanceSearchModel(object):
                          self.model.last_sent_searched += sent_count - 1
                          break
                  sent_count += 1
-            if (self.count >= len(sent_pos)):
+            if self.count >= len(sent_pos):
                  self.model.last_sent_searched += sent_count - 1
                  self.model.last_page = self.page
                  self.model.set_results(self.page, sent_pos)
@@ -549,20 +689,22 @@ class ConcordanceSearchModel(object):
          def processed_query(self):
              new = []
              for term in self.model.query.split():
-                term = re.sub(r'\.', r'[^/ ]', term)
-                if re.match('[A-Z]+$', term):
-                    new.append(BOUNDARY + WORD_OR_TAG + '/' + term + BOUNDARY)
-                elif '/' in term:
+                term = re.sub(r"\.", r"[^/ ]", term)
+                if re.match("[A-Z]+$", term):
+                    new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY)
+                elif "/" in term:
                      new.append(BOUNDARY + term + BOUNDARY)
                  else:
-                    new.append(BOUNDARY + term + '/' + WORD_OR_TAG + BOUNDARY)
-            return ' '.join(new)
+                    new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY)
+            return " ".join(new)
+
  
  def app():
      d = ConcordanceSearchView()
      d.mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/concordance_app.pyc b/nlp_resource_data/nltk/app/concordance_app.pyc

deleted file mode 100755 (executable)

index bca3c83..0000000

Binary files a/nlp_resource_data/nltk/app/concordance_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/nemo_app.py b/nlp_resource_data/nltk/app/nemo_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 4b142fc..e653ea1
--- a/nlp_resource_data/nltk/app/nemo_app.py
+++ b/nlp_resource_data/nltk/app/nemo_app.py
@@ -7,12 +7,21 @@ Finding (and Replacing) Nemo
  Instant Regular Expressions
  Created by Aristide Grange
  """
-
-from six.moves.tkinter import (Frame, Label, PhotoImage, Scrollbar, Text, Tk,
-                               SEL_FIRST, SEL_LAST)
  import re
  import itertools
  
+from tkinter import (
+    Frame,
+    Label,
+    PhotoImage,
+    Scrollbar,
+    Text,
+    Tk,
+    SEL_FIRST,
+    SEL_LAST,
+)
+
+
  windowTitle = "Finding (and Replacing) Nemo"
  initialFind = r"n(.*?)e(.*?)m(.*?)o"
  initialRepl = r"M\1A\2K\3I"
@@ -23,29 +32,29 @@ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
  Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
  """
  images = {
-    "FIND":"R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
-    "find":"R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
-    "REPL":"R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
-    "repl":"R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
+    "FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
+    "find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
+    "REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
+    "repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
  }
-colors = ["#FF7B39","#80F121"]
-emphColors = ["#DAFC33","#F42548"]
+colors = ["#FF7B39", "#80F121"]
+emphColors = ["#DAFC33", "#F42548"]
  fieldParams = {
-    "height":3,
-    "width":70,
-    "font":("monaco",14),
-    "highlightthickness":0,
-    "borderwidth":0,
-    "background":"white",
+    "height": 3,
+    "width": 70,
+    "font": ("monaco", 14),
+    "highlightthickness": 0,
+    "borderwidth": 0,
+    "background": "white",
  }
  textParams = {
-    "bg":"#F7E0D4",
-    "fg":"#2321F1",
-    "highlightthickness":0,
-    "width":1,
-    "height":10,
-    "font":("verdana",16),
-    "wrap":"word",
+    "bg": "#F7E0D4",
+    "fg": "#2321F1",
+    "highlightthickness": 0,
+    "width": 1,
+    "height": 10,
+    "font": ("verdana", 16),
+    "wrap": "word",
  }
  
  
@@ -53,79 +62,87 @@ class Zone:
      def __init__(self, image, initialField, initialText):
          frm = Frame(root)
          frm.config(background="white")
-        self.image = PhotoImage(format='gif',data=images[image.upper()])
-        self.imageDimmed = PhotoImage(format='gif',data=images[image])
+        self.image = PhotoImage(format="gif", data=images[image.upper()])
+        self.imageDimmed = PhotoImage(format="gif", data=images[image])
          self.img = Label(frm)
          self.img.config(borderwidth=0)
-        self.img.pack(side = "left")
+        self.img.pack(side="left")
          self.fld = Text(frm, **fieldParams)
-        self.initScrollText(frm,self.fld,initialField)
+        self.initScrollText(frm, self.fld, initialField)
          frm = Frame(root)
          self.txt = Text(frm, **textParams)
-        self.initScrollText(frm,self.txt,initialText)
+        self.initScrollText(frm, self.txt, initialText)
          for i in range(2):
-            self.txt.tag_config(colors[i], background = colors[i])
-            self.txt.tag_config("emph"+colors[i], foreground = emphColors[i])
-    def initScrollText(self,frm,txt,contents):
+            self.txt.tag_config(colors[i], background=colors[i])
+            self.txt.tag_config("emph" + colors[i], foreground=emphColors[i])
+
+    def initScrollText(self, frm, txt, contents):
          scl = Scrollbar(frm)
-        scl.config(command = txt.yview)
-        scl.pack(side="right",fill="y")
-        txt.pack(side = "left", expand=True, fill="x")
-        txt.config(yscrollcommand = scl.set)
-        txt.insert("1.0",contents)
-        frm.pack(fill = "x")
+        scl.config(command=txt.yview)
+        scl.pack(side="right", fill="y")
+        txt.pack(side="left", expand=True, fill="x")
+        txt.config(yscrollcommand=scl.set)
+        txt.insert("1.0", contents)
+        frm.pack(fill="x")
          Frame(height=2, bd=1, relief="ridge").pack(fill="x")
+
      def refresh(self):
          self.colorCycle = itertools.cycle(colors)
          try:
              self.substitute()
-            self.img.config(image = self.image)
+            self.img.config(image=self.image)
          except re.error:
-            self.img.config(image = self.imageDimmed)
+            self.img.config(image=self.imageDimmed)
  
  
  class FindZone(Zone):
-    def addTags(self,m):
+    def addTags(self, m):
          color = next(self.colorCycle)
-        self.txt.tag_add(color,"1.0+%sc"%m.start(),"1.0+%sc"%m.end())
+        self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end())
          try:
-            self.txt.tag_add("emph"+color,"1.0+%sc"%m.start("emph"),
-                             "1.0+%sc"%m.end("emph"))
+            self.txt.tag_add(
+                "emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph")
+            )
          except:
              pass
-    def substitute(self,*args):
+
+    def substitute(self, *args):
          for color in colors:
-            self.txt.tag_remove(color,"1.0","end")
-            self.txt.tag_remove("emph"+color,"1.0","end")
-        self.rex = re.compile("") # default value in case of misformed regexp
-        self.rex = re.compile(self.fld.get("1.0","end")[:-1],re.MULTILINE)
+            self.txt.tag_remove(color, "1.0", "end")
+            self.txt.tag_remove("emph" + color, "1.0", "end")
+        self.rex = re.compile("")  # default value in case of misformed regexp
+        self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE)
          try:
-            re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST,
-                                                      SEL_LAST))
-            self.rexSel = re.compile("%s(?P<emph>%s)%s" % (
-                self.fld.get("1.0",SEL_FIRST),
-                self.fld.get(SEL_FIRST,SEL_LAST),
-                self.fld.get(SEL_LAST,"end")[:-1],
-            ),re.MULTILINE)
+            re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST, SEL_LAST))
+            self.rexSel = re.compile(
+                "%s(?P<emph>%s)%s"
+                % (
+                    self.fld.get("1.0", SEL_FIRST),
+                    self.fld.get(SEL_FIRST, SEL_LAST),
+                    self.fld.get(SEL_LAST, "end")[:-1],
+                ),
+                re.MULTILINE,
+            )
          except:
              self.rexSel = self.rex
-        self.rexSel.sub(self.addTags,self.txt.get("1.0","end"))
+        self.rexSel.sub(self.addTags, self.txt.get("1.0", "end"))
  
  
  class ReplaceZone(Zone):
-    def addTags(self,m):
-        s = sz.rex.sub(self.repl,m.group())
-        self.txt.delete("1.0+%sc"%(m.start()+self.diff),
-                        "1.0+%sc"%(m.end()+self.diff))
-        self.txt.insert("1.0+%sc"%(m.start()+self.diff),s,
-                        next(self.colorCycle))
+    def addTags(self, m):
+        s = sz.rex.sub(self.repl, m.group())
+        self.txt.delete(
+            "1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff)
+        )
+        self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle))
          self.diff += len(s) - (m.end() - m.start())
+
      def substitute(self):
-        self.txt.delete("1.0","end")
-        self.txt.insert("1.0",sz.txt.get("1.0","end")[:-1])
+        self.txt.delete("1.0", "end")
+        self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1])
          self.diff = 0
-        self.repl = rex0.sub(r"\\g<\1>",self.fld.get("1.0","end")[:-1])
-        sz.rex.sub(self.addTags,sz.txt.get("1.0","end")[:-1])
+        self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1])
+        sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1])
  
  
  def launchRefresh(_):
@@ -136,21 +153,22 @@ def launchRefresh(_):
  def app():
      global root, sz, rz, rex0
      root = Tk()
-    root.resizable(height=False,width=True)
+    root.resizable(height=False, width=True)
      root.title(windowTitle)
-    root.minsize(width=250,height=0)
-    sz = FindZone("find",initialFind,initialText)
-    sz.fld.bind("<Button-1>",launchRefresh)
-    sz.fld.bind("<ButtonRelease-1>",launchRefresh)
-    sz.fld.bind("<B1-Motion>",launchRefresh)
+    root.minsize(width=250, height=0)
+    sz = FindZone("find", initialFind, initialText)
+    sz.fld.bind("<Button-1>", launchRefresh)
+    sz.fld.bind("<ButtonRelease-1>", launchRefresh)
+    sz.fld.bind("<B1-Motion>", launchRefresh)
      sz.rexSel = re.compile("")
-    rz = ReplaceZone("repl",initialRepl,"")
+    rz = ReplaceZone("repl", initialRepl, "")
      rex0 = re.compile(r"(?<!\\)\\([0-9]+)")
-    root.bind_all("<Key>",launchRefresh)
+    root.bind_all("<Key>", launchRefresh)
      launchRefresh(None)
      root.mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/nemo_app.pyc b/nlp_resource_data/nltk/app/nemo_app.pyc

deleted file mode 100755 (executable)

index c6c06a2..0000000

Binary files a/nlp_resource_data/nltk/app/nemo_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/rdparser_app.py b/nlp_resource_data/nltk/app/rdparser_app.py

old mode 100755 (executable)

new mode 100644 (file)

index b791767..9437bff
--- a/nlp_resource_data/nltk/app/rdparser_app.py
+++ b/nlp_resource_data/nltk/app/rdparser_app.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Recursive Descent Parser Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -63,11 +63,9 @@ Keyboard Shortcuts::
        [Ctrl-p]\t Print
        [q]\t Quit
  """
-from __future__ import division
  
-from six.moves.tkinter_font import Font
-from six.moves.tkinter import (Listbox, IntVar, Button, Frame, Label, Menu,
-                               Scrollbar, Tk)
+from tkinter.font import Font
+from tkinter import Listbox, IntVar, Button, Frame, Label, Menu, Scrollbar, Tk
  
  from nltk.tree import Tree
  from nltk.util import in_idle
@@ -75,6 +73,7 @@ from nltk.parse import SteppingRecursiveDescentParser
  from nltk.draw.util import TextWidget, ShowText, CanvasFrame, EntryDialog
  from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
  
+
  class RecursiveDescentApp(object):
      """
      A graphical tool for exploring the recursive descent parser.  The tool
@@ -85,13 +84,14 @@ class RecursiveDescentApp(object):
      through the parsing process, performing the operations that
      ``RecursiveDescentParser`` would use.
      """
+
      def __init__(self, grammar, sent, trace=0):
          self._sent = sent
          self._parser = SteppingRecursiveDescentParser(grammar, trace)
  
          # Set up the main window.
          self._top = Tk()
-        self._top.title('Recursive Descent Parser Application')
+        self._top.title("Recursive Descent Parser Application")
  
          # Set up key bindings.
          self._init_bindings()
@@ -121,7 +121,7 @@ class RecursiveDescentApp(object):
          self._parser.initialize(self._sent)
  
          # Resize callback
-        self._canvas.bind('<Configure>', self._configure)
+        self._canvas.bind("<Configure>", self._configure)
  
      #########################################
      ##  Initialization Helpers
@@ -134,134 +134,173 @@ class RecursiveDescentApp(object):
  
          # TWhat's our font size (default=same as sysfont)
          self._size = IntVar(root)
-        self._size.set(self._sysfont.cget('size'))
+        self._size.set(self._sysfont.cget("size"))
  
-        self._boldfont = Font(family='helvetica', weight='bold',
-                                    size=self._size.get())
-        self._font = Font(family='helvetica',
-                                    size=self._size.get())
-        if self._size.get() < 0: big = self._size.get()-2
-        else: big = self._size.get()+2
-        self._bigfont = Font(family='helvetica', weight='bold',
-                                    size=big)
+        self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
+        self._font = Font(family="helvetica", size=self._size.get())
+        if self._size.get() < 0:
+            big = self._size.get() - 2
+        else:
+            big = self._size.get() + 2
+        self._bigfont = Font(family="helvetica", weight="bold", size=big)
  
      def _init_grammar(self, parent):
          # Grammar view.
          self._prodframe = listframe = Frame(parent)
-        self._prodframe.pack(fill='both', side='left', padx=2)
-        self._prodlist_label = Label(self._prodframe, font=self._boldfont,
-                                     text='Available Expansions')
+        self._prodframe.pack(fill="both", side="left", padx=2)
+        self._prodlist_label = Label(
+            self._prodframe, font=self._boldfont, text="Available Expansions"
+        )
          self._prodlist_label.pack()
-        self._prodlist = Listbox(self._prodframe, selectmode='single',
-                                 relief='groove', background='white',
-                                 foreground='#909090', font=self._font,
-                                 selectforeground='#004040',
-                                 selectbackground='#c0f0c0')
-
-        self._prodlist.pack(side='right', fill='both', expand=1)
+        self._prodlist = Listbox(
+            self._prodframe,
+            selectmode="single",
+            relief="groove",
+            background="white",
+            foreground="#909090",
+            font=self._font,
+            selectforeground="#004040",
+            selectbackground="#c0f0c0",
+        )
+
+        self._prodlist.pack(side="right", fill="both", expand=1)
  
          self._productions = list(self._parser.grammar().productions())
          for production in self._productions:
-            self._prodlist.insert('end', ('  %s' % production))
+            self._prodlist.insert("end", ("  %s" % production))
          self._prodlist.config(height=min(len(self._productions), 25))
  
          # Add a scrollbar if there are more than 25 productions.
          if len(self._productions) > 25:
-            listscroll = Scrollbar(self._prodframe,
-                                   orient='vertical')
-            self._prodlist.config(yscrollcommand = listscroll.set)
+            listscroll = Scrollbar(self._prodframe, orient="vertical")
+            self._prodlist.config(yscrollcommand=listscroll.set)
              listscroll.config(command=self._prodlist.yview)
-            listscroll.pack(side='left', fill='y')
+            listscroll.pack(side="left", fill="y")
  
          # If they select a production, apply it.
-        self._prodlist.bind('<<ListboxSelect>>', self._prodlist_select)
+        self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
  
      def _init_bindings(self):
          # Key bindings are a good thing.
-        self._top.bind('<Control-q>', self.destroy)
-        self._top.bind('<Control-x>', self.destroy)
-        self._top.bind('<Escape>', self.destroy)
-        self._top.bind('e', self.expand)
-        #self._top.bind('<Alt-e>', self.expand)
-        #self._top.bind('<Control-e>', self.expand)
-        self._top.bind('m', self.match)
-        self._top.bind('<Alt-m>', self.match)
-        self._top.bind('<Control-m>', self.match)
-        self._top.bind('b', self.backtrack)
-        self._top.bind('<Alt-b>', self.backtrack)
-        self._top.bind('<Control-b>', self.backtrack)
-        self._top.bind('<Control-z>', self.backtrack)
-        self._top.bind('<BackSpace>', self.backtrack)
-        self._top.bind('a', self.autostep)
-        #self._top.bind('<Control-a>', self.autostep)
-        self._top.bind('<Control-space>', self.autostep)
-        self._top.bind('<Control-c>', self.cancel_autostep)
-        self._top.bind('<space>', self.step)
-        self._top.bind('<Delete>', self.reset)
-        self._top.bind('<Control-p>', self.postscript)
-        #self._top.bind('<h>', self.help)
-        #self._top.bind('<Alt-h>', self.help)
-        self._top.bind('<Control-h>', self.help)
-        self._top.bind('<F1>', self.help)
-        #self._top.bind('<g>', self.toggle_grammar)
-        #self._top.bind('<Alt-g>', self.toggle_grammar)
-        #self._top.bind('<Control-g>', self.toggle_grammar)
-        self._top.bind('<Control-g>', self.edit_grammar)
-        self._top.bind('<Control-t>', self.edit_sentence)
+        self._top.bind("<Control-q>", self.destroy)
+        self._top.bind("<Control-x>", self.destroy)
+        self._top.bind("<Escape>", self.destroy)
+        self._top.bind("e", self.expand)
+        # self._top.bind('<Alt-e>', self.expand)
+        # self._top.bind('<Control-e>', self.expand)
+        self._top.bind("m", self.match)
+        self._top.bind("<Alt-m>", self.match)
+        self._top.bind("<Control-m>", self.match)
+        self._top.bind("b", self.backtrack)
+        self._top.bind("<Alt-b>", self.backtrack)
+        self._top.bind("<Control-b>", self.backtrack)
+        self._top.bind("<Control-z>", self.backtrack)
+        self._top.bind("<BackSpace>", self.backtrack)
+        self._top.bind("a", self.autostep)
+        # self._top.bind('<Control-a>', self.autostep)
+        self._top.bind("<Control-space>", self.autostep)
+        self._top.bind("<Control-c>", self.cancel_autostep)
+        self._top.bind("<space>", self.step)
+        self._top.bind("<Delete>", self.reset)
+        self._top.bind("<Control-p>", self.postscript)
+        # self._top.bind('<h>', self.help)
+        # self._top.bind('<Alt-h>', self.help)
+        self._top.bind("<Control-h>", self.help)
+        self._top.bind("<F1>", self.help)
+        # self._top.bind('<g>', self.toggle_grammar)
+        # self._top.bind('<Alt-g>', self.toggle_grammar)
+        # self._top.bind('<Control-g>', self.toggle_grammar)
+        self._top.bind("<Control-g>", self.edit_grammar)
+        self._top.bind("<Control-t>", self.edit_sentence)
  
      def _init_buttons(self, parent):
          # Set up the frames.
          self._buttonframe = buttonframe = Frame(parent)
-        buttonframe.pack(fill='none', side='bottom', padx=3, pady=2)
-        Button(buttonframe, text='Step',
-               background='#90c0d0', foreground='black',
-               command=self.step,).pack(side='left')
-        Button(buttonframe, text='Autostep',
-               background='#90c0d0', foreground='black',
-               command=self.autostep,).pack(side='left')
-        Button(buttonframe, text='Expand', underline=0,
-               background='#90f090', foreground='black',
-               command=self.expand).pack(side='left')
-        Button(buttonframe, text='Match', underline=0,
-               background='#90f090', foreground='black',
-               command=self.match).pack(side='left')
-        Button(buttonframe, text='Backtrack', underline=0,
-               background='#f0a0a0', foreground='black',
-               command=self.backtrack).pack(side='left')
+        buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
+        Button(
+            buttonframe,
+            text="Step",
+            background="#90c0d0",
+            foreground="black",
+            command=self.step,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Autostep",
+            background="#90c0d0",
+            foreground="black",
+            command=self.autostep,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Expand",
+            underline=0,
+            background="#90f090",
+            foreground="black",
+            command=self.expand,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Match",
+            underline=0,
+            background="#90f090",
+            foreground="black",
+            command=self.match,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Backtrack",
+            underline=0,
+            background="#f0a0a0",
+            foreground="black",
+            command=self.backtrack,
+        ).pack(side="left")
          # Replace autostep...
-#         self._autostep_button = Button(buttonframe, text='Autostep',
-#                                        underline=0, command=self.autostep)
-#         self._autostep_button.pack(side='left')
+
+    #         self._autostep_button = Button(buttonframe, text='Autostep',
+    #                                        underline=0, command=self.autostep)
+    #         self._autostep_button.pack(side='left')
  
      def _configure(self, event):
          self._autostep = 0
          (x1, y1, x2, y2) = self._cframe.scrollregion()
          y2 = event.height - 6
-        self._canvas['scrollregion'] = '%d %d %d %d' % (x1,y1,x2,y2)
+        self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
          self._redraw()
  
      def _init_feedback(self, parent):
          self._feedbackframe = feedbackframe = Frame(parent)
-        feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3)
-        self._lastoper_label = Label(feedbackframe, text='Last Operation:',
-                                     font=self._font)
-        self._lastoper_label.pack(side='left')
-        lastoperframe = Frame(feedbackframe, relief='sunken', border=1)
-        lastoperframe.pack(fill='x', side='right', expand=1, padx=5)
-        self._lastoper1 = Label(lastoperframe, foreground='#007070',
-                                background='#f0f0f0', font=self._font)
-        self._lastoper2 = Label(lastoperframe, anchor='w', width=30,
-                                foreground='#004040', background='#f0f0f0',
-                                font=self._font)
-        self._lastoper1.pack(side='left')
-        self._lastoper2.pack(side='left', fill='x', expand=1)
+        feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
+        self._lastoper_label = Label(
+            feedbackframe, text="Last Operation:", font=self._font
+        )
+        self._lastoper_label.pack(side="left")
+        lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
+        lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
+        self._lastoper1 = Label(
+            lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
+        )
+        self._lastoper2 = Label(
+            lastoperframe,
+            anchor="w",
+            width=30,
+            foreground="#004040",
+            background="#f0f0f0",
+            font=self._font,
+        )
+        self._lastoper1.pack(side="left")
+        self._lastoper2.pack(side="left", fill="x", expand=1)
  
      def _init_canvas(self, parent):
-        self._cframe = CanvasFrame(parent, background='white',
-                                   #width=525, height=250,
-                                   closeenough=10,
-                                   border=2, relief='sunken')
-        self._cframe.pack(expand=1, fill='both', side='top', pady=2)
+        self._cframe = CanvasFrame(
+            parent,
+            background="white",
+            # width=525, height=250,
+            closeenough=10,
+            border=2,
+            relief="sunken",
+        )
+        self._cframe.pack(expand=1, fill="both", side="top", pady=2)
          canvas = self._canvas = self._cframe.canvas()
  
          # Initially, there's no tree or text
@@ -273,75 +312,130 @@ class RecursiveDescentApp(object):
          menubar = Menu(parent)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Reset Parser', underline=0,
-                             command=self.reset, accelerator='Del')
-        filemenu.add_command(label='Print to Postscript', underline=0,
-                             command=self.postscript, accelerator='Ctrl-p')
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
+        )
+        filemenu.add_command(
+            label="Print to Postscript",
+            underline=0,
+            command=self.postscript,
+            accelerator="Ctrl-p",
+        )
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          editmenu = Menu(menubar, tearoff=0)
-        editmenu.add_command(label='Edit Grammar', underline=5,
-                             command=self.edit_grammar,
-                             accelerator='Ctrl-g')
-        editmenu.add_command(label='Edit Text', underline=5,
-                             command=self.edit_sentence,
-                             accelerator='Ctrl-t')
-        menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
+        editmenu.add_command(
+            label="Edit Grammar",
+            underline=5,
+            command=self.edit_grammar,
+            accelerator="Ctrl-g",
+        )
+        editmenu.add_command(
+            label="Edit Text",
+            underline=5,
+            command=self.edit_sentence,
+            accelerator="Ctrl-t",
+        )
+        menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
  
          rulemenu = Menu(menubar, tearoff=0)
-        rulemenu.add_command(label='Step', underline=1,
-                             command=self.step, accelerator='Space')
+        rulemenu.add_command(
+            label="Step", underline=1, command=self.step, accelerator="Space"
+        )
          rulemenu.add_separator()
-        rulemenu.add_command(label='Match', underline=0,
-                             command=self.match, accelerator='Ctrl-m')
-        rulemenu.add_command(label='Expand', underline=0,
-                             command=self.expand, accelerator='Ctrl-e')
+        rulemenu.add_command(
+            label="Match", underline=0, command=self.match, accelerator="Ctrl-m"
+        )
+        rulemenu.add_command(
+            label="Expand", underline=0, command=self.expand, accelerator="Ctrl-e"
+        )
          rulemenu.add_separator()
-        rulemenu.add_command(label='Backtrack', underline=0,
-                             command=self.backtrack, accelerator='Ctrl-b')
-        menubar.add_cascade(label='Apply', underline=0, menu=rulemenu)
+        rulemenu.add_command(
+            label="Backtrack", underline=0, command=self.backtrack, accelerator="Ctrl-b"
+        )
+        menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
  
          viewmenu = Menu(menubar, tearoff=0)
-        viewmenu.add_checkbutton(label="Show Grammar", underline=0,
-                                 variable=self._show_grammar,
-                                 command=self._toggle_grammar)
+        viewmenu.add_checkbutton(
+            label="Show Grammar",
+            underline=0,
+            variable=self._show_grammar,
+            command=self._toggle_grammar,
+        )
          viewmenu.add_separator()
-        viewmenu.add_radiobutton(label='Tiny', variable=self._size,
-                                 underline=0, value=10, command=self.resize)
-        viewmenu.add_radiobutton(label='Small', variable=self._size,
-                                 underline=0, value=12, command=self.resize)
-        viewmenu.add_radiobutton(label='Medium', variable=self._size,
-                                 underline=0, value=14, command=self.resize)
-        viewmenu.add_radiobutton(label='Large', variable=self._size,
-                                 underline=0, value=18, command=self.resize)
-        viewmenu.add_radiobutton(label='Huge', variable=self._size,
-                                 underline=0, value=24, command=self.resize)
-        menubar.add_cascade(label='View', underline=0, menu=viewmenu)
+        viewmenu.add_radiobutton(
+            label="Tiny",
+            variable=self._size,
+            underline=0,
+            value=10,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Small",
+            variable=self._size,
+            underline=0,
+            value=12,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Medium",
+            variable=self._size,
+            underline=0,
+            value=14,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Large",
+            variable=self._size,
+            underline=0,
+            value=18,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Huge",
+            variable=self._size,
+            underline=0,
+            value=24,
+            command=self.resize,
+        )
+        menubar.add_cascade(label="View", underline=0, menu=viewmenu)
  
          animatemenu = Menu(menubar, tearoff=0)
-        animatemenu.add_radiobutton(label="No Animation", underline=0,
-                                    variable=self._animation_frames,
-                                    value=0)
-        animatemenu.add_radiobutton(label="Slow Animation", underline=0,
-                                    variable=self._animation_frames,
-                                    value=10, accelerator='-')
-        animatemenu.add_radiobutton(label="Normal Animation", underline=0,
-                                    variable=self._animation_frames,
-                                    value=5, accelerator='=')
-        animatemenu.add_radiobutton(label="Fast Animation", underline=0,
-                                    variable=self._animation_frames,
-                                    value=2, accelerator='+')
+        animatemenu.add_radiobutton(
+            label="No Animation", underline=0, variable=self._animation_frames, value=0
+        )
+        animatemenu.add_radiobutton(
+            label="Slow Animation",
+            underline=0,
+            variable=self._animation_frames,
+            value=10,
+            accelerator="-",
+        )
+        animatemenu.add_radiobutton(
+            label="Normal Animation",
+            underline=0,
+            variable=self._animation_frames,
+            value=5,
+            accelerator="=",
+        )
+        animatemenu.add_radiobutton(
+            label="Fast Animation",
+            underline=0,
+            variable=self._animation_frames,
+            value=2,
+            accelerator="+",
+        )
          menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
  
-
          helpmenu = Menu(menubar, tearoff=0)
-        helpmenu.add_command(label='About', underline=0,
-                             command=self.about)
-        helpmenu.add_command(label='Instructions', underline=0,
-                             command=self.help, accelerator='F1')
-        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
+        helpmenu.add_command(label="About", underline=0, command=self.about)
+        helpmenu.add_command(
+            label="Instructions", underline=0, command=self.help, accelerator="F1"
+        )
+        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
  
          parent.config(menu=menubar)
  
@@ -350,7 +444,8 @@ class RecursiveDescentApp(object):
      #########################################
  
      def _get(self, widget, treeloc):
-        for i in treeloc: widget = widget.subtrees()[i]
+        for i in treeloc:
+            widget = widget.subtrees()[i]
          if isinstance(widget, TreeSegmentWidget):
              widget = widget.label()
          return widget
@@ -371,26 +466,31 @@ class RecursiveDescentApp(object):
              self._canvas.delete(self._textline)
  
          # Draw the tree.
-        helv = ('helvetica', -self._size.get())
-        bold = ('helvetica', -self._size.get(), 'bold')
-        attribs = {'tree_color': '#000000', 'tree_width': 2,
-                   'node_font': bold, 'leaf_font': helv,}
+        helv = ("helvetica", -self._size.get())
+        bold = ("helvetica", -self._size.get(), "bold")
+        attribs = {
+            "tree_color": "#000000",
+            "tree_width": 2,
+            "node_font": bold,
+            "leaf_font": helv,
+        }
          tree = self._parser.tree()
          self._tree = tree_to_treesegment(canvas, tree, **attribs)
          self._cframe.add_widget(self._tree, 30, 5)
  
          # Draw the text.
-        helv = ('helvetica', -self._size.get())
+        helv = ("helvetica", -self._size.get())
          bottom = y = self._cframe.scrollregion()[3]
-        self._textwidgets = [TextWidget(canvas, word, font=self._font)
-                             for word in self._sent]
+        self._textwidgets = [
+            TextWidget(canvas, word, font=self._font) for word in self._sent
+        ]
          for twidget in self._textwidgets:
              self._cframe.add_widget(twidget, 0, 0)
-            twidget.move(0, bottom-twidget.bbox()[3]-5)
+            twidget.move(0, bottom - twidget.bbox()[3] - 5)
              y = min(y, twidget.bbox()[1])
  
          # Draw a line over the text, to separate it from the tree.
-        self._textline = canvas.create_line(-5000, y-5, 5000, y-5, dash='.')
+        self._textline = canvas.create_line(-5000, y - 5, 5000, y - 5, dash=".")
  
          # Highlight appropriate nodes.
          self._highlight_nodes()
@@ -399,7 +499,6 @@ class RecursiveDescentApp(object):
          # Make sure the text lines up.
          self._position_text()
  
-
      def _redraw_quick(self):
          # This should be more-or-less sufficient after an animation.
          self._highlight_nodes()
@@ -408,31 +507,30 @@ class RecursiveDescentApp(object):
  
      def _highlight_nodes(self):
          # Highlight the list of nodes to be checked.
-        bold = ('helvetica', -self._size.get(), 'bold')
+        bold = ("helvetica", -self._size.get(), "bold")
          for treeloc in self._parser.frontier()[:1]:
-            self._get(self._tree, treeloc)['color'] = '#20a050'
-            self._get(self._tree, treeloc)['font'] = bold
+            self._get(self._tree, treeloc)["color"] = "#20a050"
+            self._get(self._tree, treeloc)["font"] = bold
          for treeloc in self._parser.frontier()[1:]:
-            self._get(self._tree, treeloc)['color'] = '#008080'
+            self._get(self._tree, treeloc)["color"] = "#008080"
  
      def _highlight_prodlist(self):
          # Highlight the productions that can be expanded.
          # Boy, too bad tkinter doesn't implement Listbox.itemconfig;
          # that would be pretty useful here.
-        self._prodlist.delete(0, 'end')
+        self._prodlist.delete(0, "end")
          expandable = self._parser.expandable_productions()
          untried = self._parser.untried_expandable_productions()
          productions = self._productions
          for index in range(len(productions)):
              if productions[index] in expandable:
                  if productions[index] in untried:
-                    self._prodlist.insert(index, ' %s' % productions[index])
+                    self._prodlist.insert(index, " %s" % productions[index])
                  else:
-                    self._prodlist.insert(index, ' %s (TRIED)' %
-                                          productions[index])
+                    self._prodlist.insert(index, " %s (TRIED)" % productions[index])
                  self._prodlist.selection_set(index)
              else:
-                self._prodlist.insert(index, ' %s' % productions[index])
+                self._prodlist.insert(index, " %s" % productions[index])
  
      def _position_text(self):
          # Line up the text widgets that are matched against the tree
@@ -443,22 +541,22 @@ class RecursiveDescentApp(object):
          for i in range(0, len(leaves)):
              widget = self._textwidgets[i]
              leaf = leaves[i]
-            widget['color'] = '#006040'
-            leaf['color'] = '#006040'
+            widget["color"] = "#006040"
+            leaf["color"] = "#006040"
              widget.move(leaf.bbox()[0] - widget.bbox()[0], 0)
              xmax = widget.bbox()[2] + 10
  
          # Line up the text widgets that are not matched against the tree.
          for i in range(len(leaves), numwords):
              widget = self._textwidgets[i]
-            widget['color'] = '#a0a0a0'
+            widget["color"] = "#a0a0a0"
              widget.move(xmax - widget.bbox()[0], 0)
              xmax = widget.bbox()[2] + 10
  
          # If we have a complete parse, make everything green :)
          if self._parser.currently_complete():
              for twidget in self._textwidgets:
-                twidget['color'] = '#00a000'
+                twidget["color"] = "#00a000"
  
          # Move the matched leaves down to the text.
          for i in range(0, len(leaves)):
@@ -469,10 +567,12 @@ class RecursiveDescentApp(object):
              leaf.move(0, dy)
  
      def _tree_leaves(self, tree=None):
-        if tree is None: tree = self._tree
+        if tree is None:
+            tree = self._tree
          if isinstance(tree, TreeSegmentWidget):
              leaves = []
-            for child in tree.subtrees(): leaves += self._tree_leaves(child)
+            for child in tree.subtrees():
+                leaves += self._tree_leaves(child)
              return leaves
          else:
              return [tree]
@@ -483,15 +583,16 @@ class RecursiveDescentApp(object):
  
      def destroy(self, *e):
          self._autostep = 0
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
      def reset(self, *e):
          self._autostep = 0
          self._parser.initialize(self._sent)
-        self._lastoper1['text'] = 'Reset Application'
-        self._lastoper2['text'] = ''
+        self._lastoper1["text"] = "Reset Application"
+        self._lastoper2["text"] = ""
          self._redraw()
  
      def autostep(self, *e):
@@ -504,71 +605,89 @@ class RecursiveDescentApp(object):
              self._step()
  
      def cancel_autostep(self, *e):
-        #self._autostep_button['text'] = 'Autostep'
+        # self._autostep_button['text'] = 'Autostep'
          self._autostep = 0
  
      # Make sure to stop auto-stepping if we get any user input.
-    def step(self, *e): self._autostep = 0; self._step()
-    def match(self, *e): self._autostep = 0; self._match()
-    def expand(self, *e): self._autostep = 0; self._expand()
-    def backtrack(self, *e): self._autostep = 0; self._backtrack()
+    def step(self, *e):
+        self._autostep = 0
+        self._step()
+
+    def match(self, *e):
+        self._autostep = 0
+        self._match()
+
+    def expand(self, *e):
+        self._autostep = 0
+        self._expand()
+
+    def backtrack(self, *e):
+        self._autostep = 0
+        self._backtrack()
  
      def _step(self):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
  
          # Try expanding, matching, and backtracking (in that order)
-        if self._expand(): pass
-        elif self._parser.untried_match() and self._match(): pass
-        elif self._backtrack(): pass
+        if self._expand():
+            pass
+        elif self._parser.untried_match() and self._match():
+            pass
+        elif self._backtrack():
+            pass
          else:
-            self._lastoper1['text'] = 'Finished'
-            self._lastoper2['text'] = ''
+            self._lastoper1["text"] = "Finished"
+            self._lastoper2["text"] = ""
              self._autostep = 0
  
          # Check if we just completed a parse.
          if self._parser.currently_complete():
              self._autostep = 0
-            self._lastoper2['text'] += '    [COMPLETE PARSE]'
+            self._lastoper2["text"] += "    [COMPLETE PARSE]"
  
      def _expand(self, *e):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
          old_frontier = self._parser.frontier()
          rv = self._parser.expand()
          if rv is not None:
-            self._lastoper1['text'] = 'Expand:'
-            self._lastoper2['text'] = rv
-            self._prodlist.selection_clear(0, 'end')
+            self._lastoper1["text"] = "Expand:"
+            self._lastoper2["text"] = rv
+            self._prodlist.selection_clear(0, "end")
              index = self._productions.index(rv)
              self._prodlist.selection_set(index)
              self._animate_expand(old_frontier[0])
              return True
          else:
-            self._lastoper1['text'] = 'Expand:'
-            self._lastoper2['text'] = '(all expansions tried)'
+            self._lastoper1["text"] = "Expand:"
+            self._lastoper2["text"] = "(all expansions tried)"
              return False
  
      def _match(self, *e):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
          old_frontier = self._parser.frontier()
          rv = self._parser.match()
          if rv is not None:
-            self._lastoper1['text'] = 'Match:'
-            self._lastoper2['text'] = rv
+            self._lastoper1["text"] = "Match:"
+            self._lastoper2["text"] = rv
              self._animate_match(old_frontier[0])
              return True
          else:
-            self._lastoper1['text'] = 'Match:'
-            self._lastoper2['text'] = '(failed)'
+            self._lastoper1["text"] = "Match:"
+            self._lastoper2["text"] = "(failed)"
              return False
  
      def _backtrack(self, *e):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
          if self._parser.backtrack():
              elt = self._parser.tree()
              for i in self._parser.frontier()[0]:
                  elt = elt[i]
-            self._lastoper1['text'] = 'Backtrack'
-            self._lastoper2['text'] = ''
+            self._lastoper1["text"] = "Backtrack"
+            self._lastoper2["text"] = ""
              if isinstance(elt, Tree):
                  self._animate_backtrack(self._parser.frontier()[0])
              else:
@@ -576,16 +695,18 @@ class RecursiveDescentApp(object):
              return True
          else:
              self._autostep = 0
-            self._lastoper1['text'] = 'Finished'
-            self._lastoper2['text'] = ''
+            self._lastoper1["text"] = "Finished"
+            self._lastoper2["text"] = ""
              return False
  
      def about(self, *e):
-        ABOUT = ("NLTK Recursive Descent Parser Application\n"+
-                 "Written by Edward Loper")
-        TITLE = 'About: Recursive Descent Parser Application'
+        ABOUT = (
+            "NLTK Recursive Descent Parser Application\n" + "Written by Edward Loper"
+        )
+        TITLE = "About: Recursive Descent Parser Application"
          try:
-            from six.moves.tkinter_messagebox import Message
+            from tkinter.messagebox import Message
+
              Message(message=ABOUT, title=TITLE).show()
          except:
              ShowText(self._top, TITLE, ABOUT)
@@ -594,11 +715,20 @@ class RecursiveDescentApp(object):
          self._autostep = 0
          # The default font's not very legible; try using 'fixed' instead.
          try:
-            ShowText(self._top, 'Help: Recursive Descent Parser Application',
-                     (__doc__ or '').strip(), width=75, font='fixed')
+            ShowText(
+                self._top,
+                "Help: Recursive Descent Parser Application",
+                (__doc__ or "").strip(),
+                width=75,
+                font="fixed",
+            )
          except:
-            ShowText(self._top, 'Help: Recursive Descent Parser Application',
-                     (__doc__ or '').strip(), width=75)
+            ShowText(
+                self._top,
+                "Help: Recursive Descent Parser Application",
+                (__doc__ or "").strip(),
+                width=75,
+            )
  
      def postscript(self, *e):
          self._autostep = 0
@@ -611,16 +741,18 @@ class RecursiveDescentApp(object):
          from a secript); otherwise, the demo will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._top.mainloop(*args, **kwargs)
  
      def resize(self, size=None):
-        if size is not None: self._size.set(size)
+        if size is not None:
+            self._size.set(size)
          size = self._size.get()
          self._font.configure(size=-(abs(size)))
          self._boldfont.configure(size=-(abs(size)))
          self._sysfont.configure(size=-(abs(size)))
-        self._bigfont.configure(size=-(abs(size+2)))
+        self._bigfont.configure(size=-(abs(size + 2)))
          self._redraw()
  
      #########################################
@@ -629,41 +761,43 @@ class RecursiveDescentApp(object):
  
      def _toggle_grammar(self, *e):
          if self._show_grammar.get():
-            self._prodframe.pack(fill='both', side='left', padx=2,
-                                 after=self._feedbackframe)
-            self._lastoper1['text'] = 'Show Grammar'
+            self._prodframe.pack(
+                fill="both", side="left", padx=2, after=self._feedbackframe
+            )
+            self._lastoper1["text"] = "Show Grammar"
          else:
              self._prodframe.pack_forget()
-            self._lastoper1['text'] = 'Hide Grammar'
-        self._lastoper2['text'] = ''
-
-#     def toggle_grammar(self, *e):
-#         self._show_grammar = not self._show_grammar
-#         if self._show_grammar:
-#             self._prodframe.pack(fill='both', expand='y', side='left',
-#                                  after=self._feedbackframe)
-#             self._lastoper1['text'] = 'Show Grammar'
-#         else:
-#             self._prodframe.pack_forget()
-#             self._lastoper1['text'] = 'Hide Grammar'
-#         self._lastoper2['text'] = ''
+            self._lastoper1["text"] = "Hide Grammar"
+        self._lastoper2["text"] = ""
+
+    #     def toggle_grammar(self, *e):
+    #         self._show_grammar = not self._show_grammar
+    #         if self._show_grammar:
+    #             self._prodframe.pack(fill='both', expand='y', side='left',
+    #                                  after=self._feedbackframe)
+    #             self._lastoper1['text'] = 'Show Grammar'
+    #         else:
+    #             self._prodframe.pack_forget()
+    #             self._lastoper1['text'] = 'Hide Grammar'
+    #         self._lastoper2['text'] = ''
  
      def _prodlist_select(self, event):
          selection = self._prodlist.curselection()
-        if len(selection) != 1: return
+        if len(selection) != 1:
+            return
          index = int(selection[0])
          old_frontier = self._parser.frontier()
          production = self._parser.expand(self._productions[index])
  
          if production:
-            self._lastoper1['text'] = 'Expand:'
-            self._lastoper2['text'] = production
-            self._prodlist.selection_clear(0, 'end')
+            self._lastoper1["text"] = "Expand:"
+            self._lastoper2["text"] = production
+            self._prodlist.selection_clear(0, "end")
              self._prodlist.selection_set(index)
              self._animate_expand(old_frontier[0])
          else:
              # Reset the production selections.
-            self._prodlist.selection_clear(0, 'end')
+            self._prodlist.selection_clear(0, "end")
              for prod in self._parser.expandable_productions():
                  index = self._productions.index(prod)
                  self._prodlist.selection_set(index)
@@ -681,21 +815,25 @@ class RecursiveDescentApp(object):
          for i in treeloc:
              tree = tree[i]
  
-        widget = tree_to_treesegment(self._canvas, tree,
-                                     node_font=self._boldfont,
-                                     leaf_color='white',
-                                     tree_width=2, tree_color='white',
-                                     node_color='white',
-                                     leaf_font=self._font)
-        widget.label()['color'] = '#20a050'
+        widget = tree_to_treesegment(
+            self._canvas,
+            tree,
+            node_font=self._boldfont,
+            leaf_color="white",
+            tree_width=2,
+            tree_color="white",
+            node_color="white",
+            leaf_font=self._font,
+        )
+        widget.label()["color"] = "#20a050"
  
          (oldx, oldy) = oldtree.label().bbox()[:2]
          (newx, newy) = widget.label().bbox()[:2]
-        widget.move(oldx-newx, oldy-newy)
+        widget.move(oldx - newx, oldy - newy)
  
          if top:
              self._cframe.add_widget(widget, 0, 5)
-            widget.move(30-widget.label().bbox()[0], 0)
+            widget.move(30 - widget.label().bbox()[0], 0)
              self._tree = widget
          else:
              oldtree.parent().replace_child(oldtree, widget)
@@ -703,10 +841,14 @@ class RecursiveDescentApp(object):
          # Move the children over so they don't overlap.
          # Line the children up in a strange way.
          if widget.subtrees():
-            dx = (oldx + widget.label().width()/2 -
-                  widget.subtrees()[0].bbox()[0]/2 -
-                  widget.subtrees()[0].bbox()[2]/2)
-            for subtree in widget.subtrees(): subtree.move(dx, 0)
+            dx = (
+                oldx
+                + widget.label().width() / 2
+                - widget.subtrees()[0].bbox()[0] / 2
+                - widget.subtrees()[0].bbox()[2] / 2
+            )
+            for subtree in widget.subtrees():
+                subtree.move(dx, 0)
  
          self._makeroom(widget)
  
@@ -715,13 +857,16 @@ class RecursiveDescentApp(object):
          else:
              oldtree.destroy()
  
-        colors = ['gray%d' % (10*int(10*x/self._animation_frames.get()))
-                  for x in range(self._animation_frames.get(),0,-1)]
+        colors = [
+            "gray%d" % (10 * int(10 * x / self._animation_frames.get()))
+            for x in range(self._animation_frames.get(), 0, -1)
+        ]
  
          # Move the text string down, if necessary.
          dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1]
          if dy > 0:
-            for twidget in self._textwidgets: twidget.move(0, dy)
+            for twidget in self._textwidgets:
+                twidget.move(0, dy)
              self._canvas.move(self._textline, 0, dy)
  
          self._animate_expand_frame(widget, colors)
@@ -731,19 +876,21 @@ class RecursiveDescentApp(object):
          Make sure that no sibling tree bbox's overlap.
          """
          parent = treeseg.parent()
-        if not isinstance(parent, TreeSegmentWidget): return
+        if not isinstance(parent, TreeSegmentWidget):
+            return
  
          index = parent.subtrees().index(treeseg)
  
          # Handle siblings to the right
-        rsiblings = parent.subtrees()[index+1:]
+        rsiblings = parent.subtrees()[index + 1 :]
          if rsiblings:
              dx = treeseg.bbox()[2] - rsiblings[0].bbox()[0] + 10
-            for sibling in rsiblings: sibling.move(dx, 0)
+            for sibling in rsiblings:
+                sibling.move(dx, 0)
  
          # Handle siblings to the left
          if index > 0:
-            lsibling = parent.subtrees()[index-1]
+            lsibling = parent.subtrees()[index - 1]
              dx = max(0, lsibling.bbox()[2] - treeseg.bbox()[0] + 10)
              treeseg.move(dx, 0)
  
@@ -753,32 +900,36 @@ class RecursiveDescentApp(object):
      def _animate_expand_frame(self, widget, colors):
          if len(colors) > 0:
              self._animating_lock = 1
-            widget['color'] = colors[0]
+            widget["color"] = colors[0]
              for subtree in widget.subtrees():
                  if isinstance(subtree, TreeSegmentWidget):
-                    subtree.label()['color'] = colors[0]
+                    subtree.label()["color"] = colors[0]
                  else:
-                    subtree['color'] = colors[0]
-            self._top.after(50, self._animate_expand_frame,
-                            widget, colors[1:])
+                    subtree["color"] = colors[0]
+            self._top.after(50, self._animate_expand_frame, widget, colors[1:])
          else:
-            widget['color'] = 'black'
+            widget["color"] = "black"
              for subtree in widget.subtrees():
                  if isinstance(subtree, TreeSegmentWidget):
-                    subtree.label()['color'] = 'black'
+                    subtree.label()["color"] = "black"
                  else:
-                    subtree['color'] = 'black'
+                    subtree["color"] = "black"
              self._redraw_quick()
-            widget.label()['color'] = 'black'
+            widget.label()["color"] = "black"
              self._animating_lock = 0
-            if self._autostep: self._step()
+            if self._autostep:
+                self._step()
  
      def _animate_backtrack(self, treeloc):
          # Flash red first, if we're animating.
-        if self._animation_frames.get() == 0: colors = []
-        else: colors = ['#a00000', '#000000', '#a00000']
-        colors += ['gray%d' % (10*int(10*x/(self._animation_frames.get())))
-                   for x in range(1, self._animation_frames.get()+1)]
+        if self._animation_frames.get() == 0:
+            colors = []
+        else:
+            colors = ["#a00000", "#000000", "#a00000"]
+        colors += [
+            "gray%d" % (10 * int(10 * x / (self._animation_frames.get())))
+            for x in range(1, self._animation_frames.get() + 1)
+        ]
  
          widgets = [self._get(self._tree, treeloc).parent()]
          for subtree in widgets[0].subtrees():
@@ -792,55 +943,59 @@ class RecursiveDescentApp(object):
      def _animate_backtrack_frame(self, widgets, colors):
          if len(colors) > 0:
              self._animating_lock = 1
-            for widget in widgets: widget['color'] = colors[0]
-            self._top.after(50, self._animate_backtrack_frame,
-                            widgets, colors[1:])
+            for widget in widgets:
+                widget["color"] = colors[0]
+            self._top.after(50, self._animate_backtrack_frame, widgets, colors[1:])
          else:
              for widget in widgets[0].subtrees():
                  widgets[0].remove_child(widget)
                  widget.destroy()
              self._redraw_quick()
              self._animating_lock = 0
-            if self._autostep: self._step()
+            if self._autostep:
+                self._step()
  
      def _animate_match_backtrack(self, treeloc):
          widget = self._get(self._tree, treeloc)
          node = widget.parent().label()
-        dy = ((node.bbox()[3] - widget.bbox()[1] + 14) /
-              max(1, self._animation_frames.get()))
-        self._animate_match_backtrack_frame(self._animation_frames.get(),
-                                            widget, dy)
+        dy = (node.bbox()[3] - widget.bbox()[1] + 14) / max(
+            1, self._animation_frames.get()
+        )
+        self._animate_match_backtrack_frame(self._animation_frames.get(), widget, dy)
  
      def _animate_match(self, treeloc):
          widget = self._get(self._tree, treeloc)
  
-        dy = ((self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) /
-              max(1, self._animation_frames.get()))
+        dy = (self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) / max(
+            1, self._animation_frames.get()
+        )
          self._animate_match_frame(self._animation_frames.get(), widget, dy)
  
      def _animate_match_frame(self, frame, widget, dy):
          if frame > 0:
              self._animating_lock = 1
              widget.move(0, dy)
-            self._top.after(10, self._animate_match_frame,
-                            frame-1, widget, dy)
+            self._top.after(10, self._animate_match_frame, frame - 1, widget, dy)
          else:
-            widget['color'] = '#006040'
+            widget["color"] = "#006040"
              self._redraw_quick()
              self._animating_lock = 0
-            if self._autostep: self._step()
+            if self._autostep:
+                self._step()
  
      def _animate_match_backtrack_frame(self, frame, widget, dy):
          if frame > 0:
              self._animating_lock = 1
              widget.move(0, dy)
-            self._top.after(10, self._animate_match_backtrack_frame,
-                            frame-1, widget, dy)
+            self._top.after(
+                10, self._animate_match_backtrack_frame, frame - 1, widget, dy
+            )
          else:
              widget.parent().remove_child(widget)
              widget.destroy()
              self._animating_lock = 0
-            if self._autostep: self._step()
+            if self._autostep:
+                self._step()
  
      def edit_grammar(self, *e):
          CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
@@ -848,27 +1003,30 @@ class RecursiveDescentApp(object):
      def set_grammar(self, grammar):
          self._parser.set_grammar(grammar)
          self._productions = list(grammar.productions())
-        self._prodlist.delete(0, 'end')
+        self._prodlist.delete(0, "end")
          for production in self._productions:
-            self._prodlist.insert('end', (' %s' % production))
+            self._prodlist.insert("end", (" %s" % production))
  
      def edit_sentence(self, *e):
          sentence = " ".join(self._sent)
-        title = 'Edit Text'
-        instr = 'Enter a new sentence to parse.'
+        title = "Edit Text"
+        instr = "Enter a new sentence to parse."
          EntryDialog(self._top, sentence, instr, self.set_sentence, title)
  
      def set_sentence(self, sentence):
-        self._sent = sentence.split() #[XX] use tagged?
+        self._sent = sentence.split()  # [XX] use tagged?
          self.reset()
  
+
  def app():
      """
      Create a recursive descent parser demo, using a simple grammar and
      text.
      """
      from nltk.grammar import CFG
-    grammar = CFG.fromstring("""
+
+    grammar = CFG.fromstring(
+        """
      # Grammatical productions.
          S -> NP VP
          NP -> Det N PP | Det N
@@ -880,13 +1038,15 @@ def app():
          N -> 'man' | 'park' | 'dog' | 'telescope'
          V -> 'ate' | 'saw'
          P -> 'in' | 'under' | 'with'
-    """)
+    """
+    )
  
-    sent = 'the dog saw a man in the park'.split()
+    sent = "the dog saw a man in the park".split()
  
      RecursiveDescentApp(grammar, sent).mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/rdparser_app.pyc b/nlp_resource_data/nltk/app/rdparser_app.pyc

deleted file mode 100755 (executable)

index 916c96a..0000000

Binary files a/nlp_resource_data/nltk/app/rdparser_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/srparser_app.py b/nlp_resource_data/nltk/app/srparser_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 0dd5786..1db15ab
--- a/nlp_resource_data/nltk/app/srparser_app.py
+++ b/nlp_resource_data/nltk/app/srparser_app.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Shift-Reduce Parser Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -62,9 +62,8 @@ Keyboard Shortcuts::
  
  """
  
-from six.moves.tkinter_font import Font
-from six.moves.tkinter import (IntVar, Listbox, Button, Frame, Label, Menu,
-                               Scrollbar, Tk)
+from tkinter.font import Font
+from tkinter import IntVar, Listbox, Button, Frame, Label, Menu, Scrollbar, Tk
  
  from nltk.tree import Tree
  from nltk.parse import SteppingShiftReduceParser
@@ -84,6 +83,7 @@ Possible future improvements:
      responsible for that.
  """
  
+
  class ShiftReduceApp(object):
      """
      A graphical tool for exploring the shift-reduce parser.  The tool
@@ -94,19 +94,20 @@ class ShiftReduceApp(object):
      the parsing process, performing the operations that
      ``nltk.parse.ShiftReduceParser`` would use.
      """
+
      def __init__(self, grammar, sent, trace=0):
          self._sent = sent
          self._parser = SteppingShiftReduceParser(grammar, trace)
  
          # Set up the main window.
          self._top = Tk()
-        self._top.title('Shift Reduce Parser Application')
+        self._top.title("Shift Reduce Parser Application")
  
          # Animations.  animating_lock is a lock to prevent the demo
          # from performing new operations while it's animating.
          self._animating_lock = 0
          self._animate = IntVar(self._top)
-        self._animate.set(10) # = medium
+        self._animate.set(10)  # = medium
  
          # The user can hide the grammar.
          self._show_grammar = IntVar(self._top)
@@ -130,7 +131,7 @@ class ShiftReduceApp(object):
  
          # Reset the demo, and set the feedback frame to empty.
          self.reset()
-        self._lastoper1['text'] = ''
+        self._lastoper1["text"] = ""
  
      #########################################
      ##  Initialization Helpers
@@ -143,211 +144,302 @@ class ShiftReduceApp(object):
  
          # TWhat's our font size (default=same as sysfont)
          self._size = IntVar(root)
-        self._size.set(self._sysfont.cget('size'))
+        self._size.set(self._sysfont.cget("size"))
  
-        self._boldfont = Font(family='helvetica', weight='bold',
-                                    size=self._size.get())
-        self._font = Font(family='helvetica',
-                                    size=self._size.get())
+        self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
+        self._font = Font(family="helvetica", size=self._size.get())
  
      def _init_grammar(self, parent):
          # Grammar view.
          self._prodframe = listframe = Frame(parent)
-        self._prodframe.pack(fill='both', side='left', padx=2)
-        self._prodlist_label = Label(self._prodframe,
-                                     font=self._boldfont,
-                                     text='Available Reductions')
+        self._prodframe.pack(fill="both", side="left", padx=2)
+        self._prodlist_label = Label(
+            self._prodframe, font=self._boldfont, text="Available Reductions"
+        )
          self._prodlist_label.pack()
-        self._prodlist = Listbox(self._prodframe, selectmode='single',
-                                 relief='groove', background='white',
-                                 foreground='#909090',
-                                 font=self._font,
-                                 selectforeground='#004040',
-                                 selectbackground='#c0f0c0')
+        self._prodlist = Listbox(
+            self._prodframe,
+            selectmode="single",
+            relief="groove",
+            background="white",
+            foreground="#909090",
+            font=self._font,
+            selectforeground="#004040",
+            selectbackground="#c0f0c0",
+        )
  
-        self._prodlist.pack(side='right', fill='both', expand=1)
+        self._prodlist.pack(side="right", fill="both", expand=1)
  
          self._productions = list(self._parser.grammar().productions())
          for production in self._productions:
-            self._prodlist.insert('end', (' %s' % production))
+            self._prodlist.insert("end", (" %s" % production))
          self._prodlist.config(height=min(len(self._productions), 25))
  
          # Add a scrollbar if there are more than 25 productions.
-        if 1:#len(self._productions) > 25:
-            listscroll = Scrollbar(self._prodframe,
-                                   orient='vertical')
-            self._prodlist.config(yscrollcommand = listscroll.set)
+        if 1:  # len(self._productions) > 25:
+            listscroll = Scrollbar(self._prodframe, orient="vertical")
+            self._prodlist.config(yscrollcommand=listscroll.set)
              listscroll.config(command=self._prodlist.yview)
-            listscroll.pack(side='left', fill='y')
+            listscroll.pack(side="left", fill="y")
  
          # If they select a production, apply it.
-        self._prodlist.bind('<<ListboxSelect>>', self._prodlist_select)
+        self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
  
          # When they hover over a production, highlight it.
          self._hover = -1
-        self._prodlist.bind('<Motion>', self._highlight_hover)
-        self._prodlist.bind('<Leave>', self._clear_hover)
+        self._prodlist.bind("<Motion>", self._highlight_hover)
+        self._prodlist.bind("<Leave>", self._clear_hover)
  
      def _init_bindings(self):
          # Quit
-        self._top.bind('<Control-q>', self.destroy)
-        self._top.bind('<Control-x>', self.destroy)
-        self._top.bind('<Alt-q>', self.destroy)
-        self._top.bind('<Alt-x>', self.destroy)
+        self._top.bind("<Control-q>", self.destroy)
+        self._top.bind("<Control-x>", self.destroy)
+        self._top.bind("<Alt-q>", self.destroy)
+        self._top.bind("<Alt-x>", self.destroy)
  
          # Ops (step, shift, reduce, undo)
-        self._top.bind('<space>', self.step)
-        self._top.bind('<s>', self.shift)
-        self._top.bind('<Alt-s>', self.shift)
-        self._top.bind('<Control-s>', self.shift)
-        self._top.bind('<r>', self.reduce)
-        self._top.bind('<Alt-r>', self.reduce)
-        self._top.bind('<Control-r>', self.reduce)
-        self._top.bind('<Delete>', self.reset)
-        self._top.bind('<u>', self.undo)
-        self._top.bind('<Alt-u>', self.undo)
-        self._top.bind('<Control-u>', self.undo)
-        self._top.bind('<Control-z>', self.undo)
-        self._top.bind('<BackSpace>', self.undo)
+        self._top.bind("<space>", self.step)
+        self._top.bind("<s>", self.shift)
+        self._top.bind("<Alt-s>", self.shift)
+        self._top.bind("<Control-s>", self.shift)
+        self._top.bind("<r>", self.reduce)
+        self._top.bind("<Alt-r>", self.reduce)
+        self._top.bind("<Control-r>", self.reduce)
+        self._top.bind("<Delete>", self.reset)
+        self._top.bind("<u>", self.undo)
+        self._top.bind("<Alt-u>", self.undo)
+        self._top.bind("<Control-u>", self.undo)
+        self._top.bind("<Control-z>", self.undo)
+        self._top.bind("<BackSpace>", self.undo)
  
          # Misc
-        self._top.bind('<Control-p>', self.postscript)
-        self._top.bind('<Control-h>', self.help)
-        self._top.bind('<F1>', self.help)
-        self._top.bind('<Control-g>', self.edit_grammar)
-        self._top.bind('<Control-t>', self.edit_sentence)
+        self._top.bind("<Control-p>", self.postscript)
+        self._top.bind("<Control-h>", self.help)
+        self._top.bind("<F1>", self.help)
+        self._top.bind("<Control-g>", self.edit_grammar)
+        self._top.bind("<Control-t>", self.edit_sentence)
  
          # Animation speed control
-        self._top.bind('-', lambda e,a=self._animate:a.set(20))
-        self._top.bind('=', lambda e,a=self._animate:a.set(10))
-        self._top.bind('+', lambda e,a=self._animate:a.set(4))
+        self._top.bind("-", lambda e, a=self._animate: a.set(20))
+        self._top.bind("=", lambda e, a=self._animate: a.set(10))
+        self._top.bind("+", lambda e, a=self._animate: a.set(4))
  
      def _init_buttons(self, parent):
          # Set up the frames.
          self._buttonframe = buttonframe = Frame(parent)
-        buttonframe.pack(fill='none', side='bottom')
-        Button(buttonframe, text='Step',
-               background='#90c0d0', foreground='black',
-               command=self.step,).pack(side='left')
-        Button(buttonframe, text='Shift', underline=0,
-               background='#90f090', foreground='black',
-               command=self.shift).pack(side='left')
-        Button(buttonframe, text='Reduce', underline=0,
-               background='#90f090', foreground='black',
-               command=self.reduce).pack(side='left')
-        Button(buttonframe, text='Undo', underline=0,
-               background='#f0a0a0', foreground='black',
-               command=self.undo).pack(side='left')
+        buttonframe.pack(fill="none", side="bottom")
+        Button(
+            buttonframe,
+            text="Step",
+            background="#90c0d0",
+            foreground="black",
+            command=self.step,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Shift",
+            underline=0,
+            background="#90f090",
+            foreground="black",
+            command=self.shift,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Reduce",
+            underline=0,
+            background="#90f090",
+            foreground="black",
+            command=self.reduce,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Undo",
+            underline=0,
+            background="#f0a0a0",
+            foreground="black",
+            command=self.undo,
+        ).pack(side="left")
  
      def _init_menubar(self, parent):
          menubar = Menu(parent)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Reset Parser', underline=0,
-                             command=self.reset, accelerator='Del')
-        filemenu.add_command(label='Print to Postscript', underline=0,
-                             command=self.postscript, accelerator='Ctrl-p')
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
+        )
+        filemenu.add_command(
+            label="Print to Postscript",
+            underline=0,
+            command=self.postscript,
+            accelerator="Ctrl-p",
+        )
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          editmenu = Menu(menubar, tearoff=0)
-        editmenu.add_command(label='Edit Grammar', underline=5,
-                             command=self.edit_grammar,
-                             accelerator='Ctrl-g')
-        editmenu.add_command(label='Edit Text', underline=5,
-                             command=self.edit_sentence,
-                             accelerator='Ctrl-t')
-        menubar.add_cascade(label='Edit', underline=0, menu=editmenu)
+        editmenu.add_command(
+            label="Edit Grammar",
+            underline=5,
+            command=self.edit_grammar,
+            accelerator="Ctrl-g",
+        )
+        editmenu.add_command(
+            label="Edit Text",
+            underline=5,
+            command=self.edit_sentence,
+            accelerator="Ctrl-t",
+        )
+        menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
  
          rulemenu = Menu(menubar, tearoff=0)
-        rulemenu.add_command(label='Step', underline=1,
-                             command=self.step, accelerator='Space')
+        rulemenu.add_command(
+            label="Step", underline=1, command=self.step, accelerator="Space"
+        )
          rulemenu.add_separator()
-        rulemenu.add_command(label='Shift', underline=0,
-                             command=self.shift, accelerator='Ctrl-s')
-        rulemenu.add_command(label='Reduce', underline=0,
-                             command=self.reduce, accelerator='Ctrl-r')
+        rulemenu.add_command(
+            label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s"
+        )
+        rulemenu.add_command(
+            label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r"
+        )
          rulemenu.add_separator()
-        rulemenu.add_command(label='Undo', underline=0,
-                             command=self.undo, accelerator='Ctrl-u')
-        menubar.add_cascade(label='Apply', underline=0, menu=rulemenu)
+        rulemenu.add_command(
+            label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u"
+        )
+        menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
  
          viewmenu = Menu(menubar, tearoff=0)
-        viewmenu.add_checkbutton(label="Show Grammar", underline=0,
-                                 variable=self._show_grammar,
-                                 command=self._toggle_grammar)
+        viewmenu.add_checkbutton(
+            label="Show Grammar",
+            underline=0,
+            variable=self._show_grammar,
+            command=self._toggle_grammar,
+        )
          viewmenu.add_separator()
-        viewmenu.add_radiobutton(label='Tiny', variable=self._size,
-                                 underline=0, value=10, command=self.resize)
-        viewmenu.add_radiobutton(label='Small', variable=self._size,
-                                 underline=0, value=12, command=self.resize)
-        viewmenu.add_radiobutton(label='Medium', variable=self._size,
-                                 underline=0, value=14, command=self.resize)
-        viewmenu.add_radiobutton(label='Large', variable=self._size,
-                                 underline=0, value=18, command=self.resize)
-        viewmenu.add_radiobutton(label='Huge', variable=self._size,
-                                 underline=0, value=24, command=self.resize)
-        menubar.add_cascade(label='View', underline=0, menu=viewmenu)
+        viewmenu.add_radiobutton(
+            label="Tiny",
+            variable=self._size,
+            underline=0,
+            value=10,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Small",
+            variable=self._size,
+            underline=0,
+            value=12,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Medium",
+            variable=self._size,
+            underline=0,
+            value=14,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Large",
+            variable=self._size,
+            underline=0,
+            value=18,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Huge",
+            variable=self._size,
+            underline=0,
+            value=24,
+            command=self.resize,
+        )
+        menubar.add_cascade(label="View", underline=0, menu=viewmenu)
  
          animatemenu = Menu(menubar, tearoff=0)
-        animatemenu.add_radiobutton(label="No Animation", underline=0,
-                                    variable=self._animate, value=0)
-        animatemenu.add_radiobutton(label="Slow Animation", underline=0,
-                                    variable=self._animate, value=20,
-                                    accelerator='-')
-        animatemenu.add_radiobutton(label="Normal Animation", underline=0,
-                                    variable=self._animate, value=10,
-                                    accelerator='=')
-        animatemenu.add_radiobutton(label="Fast Animation", underline=0,
-                                    variable=self._animate, value=4,
-                                    accelerator='+')
+        animatemenu.add_radiobutton(
+            label="No Animation", underline=0, variable=self._animate, value=0
+        )
+        animatemenu.add_radiobutton(
+            label="Slow Animation",
+            underline=0,
+            variable=self._animate,
+            value=20,
+            accelerator="-",
+        )
+        animatemenu.add_radiobutton(
+            label="Normal Animation",
+            underline=0,
+            variable=self._animate,
+            value=10,
+            accelerator="=",
+        )
+        animatemenu.add_radiobutton(
+            label="Fast Animation",
+            underline=0,
+            variable=self._animate,
+            value=4,
+            accelerator="+",
+        )
          menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
  
-
          helpmenu = Menu(menubar, tearoff=0)
-        helpmenu.add_command(label='About', underline=0,
-                             command=self.about)
-        helpmenu.add_command(label='Instructions', underline=0,
-                             command=self.help, accelerator='F1')
-        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
+        helpmenu.add_command(label="About", underline=0, command=self.about)
+        helpmenu.add_command(
+            label="Instructions", underline=0, command=self.help, accelerator="F1"
+        )
+        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
  
          parent.config(menu=menubar)
  
      def _init_feedback(self, parent):
          self._feedbackframe = feedbackframe = Frame(parent)
-        feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3)
-        self._lastoper_label = Label(feedbackframe, text='Last Operation:',
-                                     font=self._font)
-        self._lastoper_label.pack(side='left')
-        lastoperframe = Frame(feedbackframe, relief='sunken', border=1)
-        lastoperframe.pack(fill='x', side='right', expand=1, padx=5)
-        self._lastoper1 = Label(lastoperframe, foreground='#007070',
-                                background='#f0f0f0', font=self._font)
-        self._lastoper2 = Label(lastoperframe, anchor='w', width=30,
-                                foreground='#004040', background='#f0f0f0',
-                                font=self._font)
-        self._lastoper1.pack(side='left')
-        self._lastoper2.pack(side='left', fill='x', expand=1)
+        feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
+        self._lastoper_label = Label(
+            feedbackframe, text="Last Operation:", font=self._font
+        )
+        self._lastoper_label.pack(side="left")
+        lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
+        lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
+        self._lastoper1 = Label(
+            lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
+        )
+        self._lastoper2 = Label(
+            lastoperframe,
+            anchor="w",
+            width=30,
+            foreground="#004040",
+            background="#f0f0f0",
+            font=self._font,
+        )
+        self._lastoper1.pack(side="left")
+        self._lastoper2.pack(side="left", fill="x", expand=1)
  
      def _init_canvas(self, parent):
-        self._cframe = CanvasFrame(parent, background='white',
-                                   width=525, closeenough=10,
-                                   border=2, relief='sunken')
-        self._cframe.pack(expand=1, fill='both', side='top', pady=2)
+        self._cframe = CanvasFrame(
+            parent,
+            background="white",
+            width=525,
+            closeenough=10,
+            border=2,
+            relief="sunken",
+        )
+        self._cframe.pack(expand=1, fill="both", side="top", pady=2)
          canvas = self._canvas = self._cframe.canvas()
  
          self._stackwidgets = []
          self._rtextwidgets = []
-        self._titlebar = canvas.create_rectangle(0,0,0,0, fill='#c0f0f0',
-                                                 outline='black')
-        self._exprline = canvas.create_line(0,0,0,0, dash='.')
-        self._stacktop = canvas.create_line(0,0,0,0, fill='#408080')
-        size = self._size.get()+4
-        self._stacklabel = TextWidget(canvas, 'Stack', color='#004040',
-                                      font=self._boldfont)
-        self._rtextlabel = TextWidget(canvas, 'Remaining Text',
-                                      color='#004040', font=self._boldfont)
+        self._titlebar = canvas.create_rectangle(
+            0, 0, 0, 0, fill="#c0f0f0", outline="black"
+        )
+        self._exprline = canvas.create_line(0, 0, 0, 0, dash=".")
+        self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080")
+        size = self._size.get() + 4
+        self._stacklabel = TextWidget(
+            canvas, "Stack", color="#004040", font=self._boldfont
+        )
+        self._rtextlabel = TextWidget(
+            canvas, "Remaining Text", color="#004040", font=self._boldfont
+        )
          self._cframe.add_widget(self._stacklabel)
          self._cframe.add_widget(self._rtextlabel)
  
@@ -356,7 +448,7 @@ class ShiftReduceApp(object):
      #########################################
  
      def _redraw(self):
-        scrollregion = self._canvas['scrollregion'].split()
+        scrollregion = self._canvas["scrollregion"].split()
          (cx1, cy1, cx2, cy2) = [int(c) for c in scrollregion]
  
          # Delete the old stack & rtext widgets.
@@ -369,30 +461,32 @@ class ShiftReduceApp(object):
  
          # Position the titlebar & exprline
          (x1, y1, x2, y2) = self._stacklabel.bbox()
-        y = y2-y1+10
-        self._canvas.coords(self._titlebar, -5000, 0, 5000, y-4)
-        self._canvas.coords(self._exprline, 0, y*2-10, 5000, y*2-10)
+        y = y2 - y1 + 10
+        self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4)
+        self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10)
  
          # Position the titlebar labels..
          (x1, y1, x2, y2) = self._stacklabel.bbox()
-        self._stacklabel.move(5-x1, 3-y1)
+        self._stacklabel.move(5 - x1, 3 - y1)
          (x1, y1, x2, y2) = self._rtextlabel.bbox()
-        self._rtextlabel.move(cx2-x2-5, 3-y1)
+        self._rtextlabel.move(cx2 - x2 - 5, 3 - y1)
  
          # Draw the stack.
          stackx = 5
          for tok in self._parser.stack():
              if isinstance(tok, Tree):
-                attribs = {'tree_color': '#4080a0', 'tree_width': 2,
-                           'node_font': self._boldfont,
-                           'node_color': '#006060',
-                           'leaf_color': '#006060', 'leaf_font':self._font}
-                widget = tree_to_treesegment(self._canvas, tok,
-                                             **attribs)
-                widget.label()['color'] = '#000000'
+                attribs = {
+                    "tree_color": "#4080a0",
+                    "tree_width": 2,
+                    "node_font": self._boldfont,
+                    "node_color": "#006060",
+                    "leaf_color": "#006060",
+                    "leaf_font": self._font,
+                }
+                widget = tree_to_treesegment(self._canvas, tok, **attribs)
+                widget.label()["color"] = "#000000"
              else:
-                widget = TextWidget(self._canvas, tok,
-                                    color='#000000', font=self._font)
+                widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
              widget.bind_click(self._popup_reduce)
              self._stackwidgets.append(widget)
              self._cframe.add_widget(widget, stackx, y)
@@ -401,8 +495,7 @@ class ShiftReduceApp(object):
          # Draw the remaining text.
          rtextwidth = 0
          for tok in self._parser.remaining_text():
-            widget = TextWidget(self._canvas, tok,
-                                color='#000000', font=self._font)
+            widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
              self._rtextwidgets.append(widget)
              self._cframe.add_widget(widget, rtextwidth, y)
              rtextwidth = widget.bbox()[2] + 4
@@ -414,23 +507,27 @@ class ShiftReduceApp(object):
          # Move the remaining text to the correct location (keep it
          # right-justified, when possible); and move the remaining text
          # label, if necessary.
-        stackx = max(stackx, self._stacklabel.width()+25)
-        rlabelwidth = self._rtextlabel.width()+10
-        if stackx >= cx2-max(rtextwidth, rlabelwidth):
+        stackx = max(stackx, self._stacklabel.width() + 25)
+        rlabelwidth = self._rtextlabel.width() + 10
+        if stackx >= cx2 - max(rtextwidth, rlabelwidth):
              cx2 = stackx + max(rtextwidth, rlabelwidth)
          for rtextwidget in self._rtextwidgets:
-            rtextwidget.move(4+cx2-rtextwidth, 0)
-        self._rtextlabel.move(cx2-self._rtextlabel.bbox()[2]-5, 0)
+            rtextwidget.move(4 + cx2 - rtextwidth, 0)
+        self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0)
  
-        midx = (stackx + cx2-max(rtextwidth, rlabelwidth))/2
+        midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2
          self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
          (x1, y1, x2, y2) = self._stacklabel.bbox()
  
          # Set up binding to allow them to shift a token by dragging it.
          if len(self._rtextwidgets) > 0:
+
              def drag_shift(widget, midx=midx, self=self):
-                if widget.bbox()[0] < midx: self.shift()
-                else: self._redraw()
+                if widget.bbox()[0] < midx:
+                    self.shift()
+                else:
+                    self._redraw()
+
              self._rtextwidgets[0].bind_drag(drag_shift)
              self._rtextwidgets[0].bind_click(self.shift)
  
@@ -439,12 +536,12 @@ class ShiftReduceApp(object):
  
      def _draw_stack_top(self, widget):
          # hack..
-        midx = widget.bbox()[2]+50
+        midx = widget.bbox()[2] + 50
          self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
  
      def _highlight_productions(self):
          # Highlight the productions that can be reduced.
-        self._prodlist.selection_clear(0, 'end')
+        self._prodlist.selection_clear(0, "end")
          for prod in self._parser.reducible_productions():
              index = self._productions.index(prod)
              self._prodlist.selection_set(index)
@@ -454,33 +551,37 @@ class ShiftReduceApp(object):
      #########################################
  
      def destroy(self, *e):
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
      def reset(self, *e):
          self._parser.initialize(self._sent)
-        self._lastoper1['text'] = 'Reset App'
-        self._lastoper2['text'] = ''
+        self._lastoper1["text"] = "Reset App"
+        self._lastoper2["text"] = ""
          self._redraw()
  
      def step(self, *e):
-        if self.reduce(): return True
-        elif self.shift(): return True
+        if self.reduce():
+            return True
+        elif self.shift():
+            return True
          else:
              if list(self._parser.parses()):
-                self._lastoper1['text'] = 'Finished:'
-                self._lastoper2['text'] = 'Success'
+                self._lastoper1["text"] = "Finished:"
+                self._lastoper2["text"] = "Success"
              else:
-                self._lastoper1['text'] = 'Finished:'
-                self._lastoper2['text'] = 'Failure'
+                self._lastoper1["text"] = "Finished:"
+                self._lastoper2["text"] = "Failure"
  
      def shift(self, *e):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
          if self._parser.shift():
              tok = self._parser.stack()[-1]
-            self._lastoper1['text'] = 'Shift:'
-            self._lastoper2['text'] = '%r' % tok
+            self._lastoper1["text"] = "Shift:"
+            self._lastoper2["text"] = "%r" % tok
              if self._animate.get():
                  self._animate_shift()
              else:
@@ -489,11 +590,12 @@ class ShiftReduceApp(object):
          return False
  
      def reduce(self, *e):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
          production = self._parser.reduce()
          if production:
-            self._lastoper1['text'] = 'Reduce:'
-            self._lastoper2['text'] = '%s' % production
+            self._lastoper1["text"] = "Reduce:"
+            self._lastoper2["text"] = "%s" % production
              if self._animate.get():
                  self._animate_reduce()
              else:
@@ -501,7 +603,8 @@ class ShiftReduceApp(object):
          return production
  
      def undo(self, *e):
-        if self._animating_lock: return
+        if self._animating_lock:
+            return
          if self._parser.undo():
              self._redraw()
  
@@ -515,7 +618,8 @@ class ShiftReduceApp(object):
          from a secript); otherwise, the demo will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._top.mainloop(*args, **kwargs)
  
      #########################################
@@ -523,36 +627,46 @@ class ShiftReduceApp(object):
      #########################################
  
      def resize(self, size=None):
-        if size is not None: self._size.set(size)
+        if size is not None:
+            self._size.set(size)
          size = self._size.get()
          self._font.configure(size=-(abs(size)))
          self._boldfont.configure(size=-(abs(size)))
          self._sysfont.configure(size=-(abs(size)))
  
-        #self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
-        #self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
-        #self._lastoper_label['font'] = ('helvetica', -size)
-        #self._lastoper1['font'] = ('helvetica', -size)
-        #self._lastoper2['font'] = ('helvetica', -size)
-        #self._prodlist['font'] = ('helvetica', -size)
-        #self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
+        # self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
+        # self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
+        # self._lastoper_label['font'] = ('helvetica', -size)
+        # self._lastoper1['font'] = ('helvetica', -size)
+        # self._lastoper2['font'] = ('helvetica', -size)
+        # self._prodlist['font'] = ('helvetica', -size)
+        # self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
          self._redraw()
  
      def help(self, *e):
          # The default font's not very legible; try using 'fixed' instead.
          try:
-            ShowText(self._top, 'Help: Shift-Reduce Parser Application',
-                     (__doc__ or '').strip(), width=75, font='fixed')
+            ShowText(
+                self._top,
+                "Help: Shift-Reduce Parser Application",
+                (__doc__ or "").strip(),
+                width=75,
+                font="fixed",
+            )
          except:
-            ShowText(self._top, 'Help: Shift-Reduce Parser Application',
-                     (__doc__ or '').strip(), width=75)
+            ShowText(
+                self._top,
+                "Help: Shift-Reduce Parser Application",
+                (__doc__ or "").strip(),
+                width=75,
+            )
  
      def about(self, *e):
-        ABOUT = ("NLTK Shift-Reduce Parser Application\n"+
-                 "Written by Edward Loper")
-        TITLE = 'About: Shift-Reduce Parser Application'
+        ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper"
+        TITLE = "About: Shift-Reduce Parser Application"
          try:
-            from six.moves.tkinter_messagebox import Message
+            from tkinter.messagebox import Message
+
              Message(message=ABOUT, title=TITLE).show()
          except:
              ShowText(self._top, TITLE, ABOUT)
@@ -563,18 +677,18 @@ class ShiftReduceApp(object):
      def set_grammar(self, grammar):
          self._parser.set_grammar(grammar)
          self._productions = list(grammar.productions())
-        self._prodlist.delete(0, 'end')
+        self._prodlist.delete(0, "end")
          for production in self._productions:
-            self._prodlist.insert('end', (' %s' % production))
+            self._prodlist.insert("end", (" %s" % production))
  
      def edit_sentence(self, *e):
          sentence = " ".join(self._sent)
-        title = 'Edit Text'
-        instr = 'Enter a new sentence to parse.'
+        title = "Edit Text"
+        instr = "Enter a new sentence to parse."
          EntryDialog(self._top, sentence, instr, self.set_sentence, title)
  
      def set_sentence(self, sent):
-        self._sent = sent.split() #[XX] use tagged?
+        self._sent = sent.split()  # [XX] use tagged?
          self.reset()
  
      #########################################
@@ -583,29 +697,31 @@ class ShiftReduceApp(object):
  
      def _toggle_grammar(self, *e):
          if self._show_grammar.get():
-            self._prodframe.pack(fill='both', side='left', padx=2,
-                                 after=self._feedbackframe)
-            self._lastoper1['text'] = 'Show Grammar'
+            self._prodframe.pack(
+                fill="both", side="left", padx=2, after=self._feedbackframe
+            )
+            self._lastoper1["text"] = "Show Grammar"
          else:
              self._prodframe.pack_forget()
-            self._lastoper1['text'] = 'Hide Grammar'
-        self._lastoper2['text'] = ''
+            self._lastoper1["text"] = "Hide Grammar"
+        self._lastoper2["text"] = ""
  
      def _prodlist_select(self, event):
          selection = self._prodlist.curselection()
-        if len(selection) != 1: return
+        if len(selection) != 1:
+            return
          index = int(selection[0])
          production = self._parser.reduce(self._productions[index])
          if production:
-            self._lastoper1['text'] = 'Reduce:'
-            self._lastoper2['text'] = '%s' % production
+            self._lastoper1["text"] = "Reduce:"
+            self._lastoper2["text"] = "%s" % production
              if self._animate.get():
                  self._animate_reduce()
              else:
                  self._redraw()
          else:
              # Reset the production selections.
-            self._prodlist.selection_clear(0, 'end')
+            self._prodlist.selection_clear(0, "end")
              for prod in self._parser.reducible_productions():
                  index = self._productions.index(prod)
                  self._prodlist.selection_set(index)
@@ -613,14 +729,15 @@ class ShiftReduceApp(object):
      def _popup_reduce(self, widget):
          # Remove old commands.
          productions = self._parser.reducible_productions()
-        if len(productions) == 0: return
+        if len(productions) == 0:
+            return
  
-        self._reduce_menu.delete(0, 'end')
+        self._reduce_menu.delete(0, "end")
          for production in productions:
-            self._reduce_menu.add_command(label=str(production),
-                                          command=self.reduce)
-        self._reduce_menu.post(self._canvas.winfo_pointerx(),
-                               self._canvas.winfo_pointery())
+            self._reduce_menu.add_command(label=str(production), command=self.reduce)
+        self._reduce_menu.post(
+            self._canvas.winfo_pointerx(), self._canvas.winfo_pointery()
+        )
  
      #########################################
      ##  Animations
@@ -632,20 +749,21 @@ class ShiftReduceApp(object):
  
          # Where are we shifting from & to?
          right = widget.bbox()[0]
-        if len(self._stackwidgets) == 0: left = 5
-        else: left = self._stackwidgets[-1].bbox()[2]+10
+        if len(self._stackwidgets) == 0:
+            left = 5
+        else:
+            left = self._stackwidgets[-1].bbox()[2] + 10
  
          # Start animating.
          dt = self._animate.get()
-        dx = (left-right)*1.0/dt
+        dx = (left - right) * 1.0 / dt
          self._animate_shift_frame(dt, widget, dx)
  
      def _animate_shift_frame(self, frame, widget, dx):
          if frame > 0:
              self._animating_lock = 1
              widget.move(dx, 0)
-            self._top.after(10, self._animate_shift_frame,
-                            frame-1, widget, dx)
+            self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx)
          else:
              # but: stacktop??
  
@@ -660,7 +778,7 @@ class ShiftReduceApp(object):
  
      def _animate_reduce(self):
          # What widgets are we shifting?
-        numwidgets = len(self._parser.stack()[-1]) # number of children
+        numwidgets = len(self._parser.stack()[-1])  # number of children
          widgets = self._stackwidgets[-numwidgets:]
  
          # How far are we moving?
@@ -671,29 +789,32 @@ class ShiftReduceApp(object):
  
          # Start animating.
          dt = self._animate.get()
-        dy = ydist*2.0/dt
-        self._animate_reduce_frame(dt/2, widgets, dy)
+        dy = ydist * 2.0 / dt
+        self._animate_reduce_frame(dt / 2, widgets, dy)
  
      def _animate_reduce_frame(self, frame, widgets, dy):
          if frame > 0:
              self._animating_lock = 1
-            for widget in widgets: widget.move(0, dy)
-            self._top.after(10, self._animate_reduce_frame,
-                            frame-1, widgets, dy)
+            for widget in widgets:
+                widget.move(0, dy)
+            self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy)
          else:
-            del self._stackwidgets[-len(widgets):]
+            del self._stackwidgets[-len(widgets) :]
              for widget in widgets:
                  self._cframe.remove_widget(widget)
              tok = self._parser.stack()[-1]
-            if not isinstance(tok, Tree): raise ValueError()
-            label = TextWidget(self._canvas, str(tok.label()), color='#006060',
-                               font=self._boldfont)
-            widget = TreeSegmentWidget(self._canvas, label, widgets,
-                                       width=2)
+            if not isinstance(tok, Tree):
+                raise ValueError()
+            label = TextWidget(
+                self._canvas, str(tok.label()), color="#006060", font=self._boldfont
+            )
+            widget = TreeSegmentWidget(self._canvas, label, widgets, width=2)
              (x1, y1, x2, y2) = self._stacklabel.bbox()
-            y = y2-y1+10
-            if not self._stackwidgets: x = 5
-            else: x = self._stackwidgets[-1].bbox()[2] + 10
+            y = y2 - y1 + 10
+            if not self._stackwidgets:
+                x = 5
+            else:
+                x = self._stackwidgets[-1].bbox()[2] + 10
              self._cframe.add_widget(widget, x, y)
              self._stackwidgets.append(widget)
  
@@ -701,32 +822,32 @@ class ShiftReduceApp(object):
              self._draw_stack_top(widget)
              self._highlight_productions()
  
-#             # Delete the old widgets..
-#             del self._stackwidgets[-len(widgets):]
-#             for widget in widgets:
-#                 self._cframe.destroy_widget(widget)
-#
-#             # Make a new one.
-#             tok = self._parser.stack()[-1]
-#             if isinstance(tok, Tree):
-#                 attribs = {'tree_color': '#4080a0', 'tree_width': 2,
-#                            'node_font': bold, 'node_color': '#006060',
-#                            'leaf_color': '#006060', 'leaf_font':self._font}
-#                 widget = tree_to_treesegment(self._canvas, tok.type(),
-#                                              **attribs)
-#                 widget.node()['color'] = '#000000'
-#             else:
-#                 widget = TextWidget(self._canvas, tok.type(),
-#                                     color='#000000', font=self._font)
-#             widget.bind_click(self._popup_reduce)
-#             (x1, y1, x2, y2) = self._stacklabel.bbox()
-#             y = y2-y1+10
-#             if not self._stackwidgets: x = 5
-#             else: x = self._stackwidgets[-1].bbox()[2] + 10
-#             self._cframe.add_widget(widget, x, y)
-#             self._stackwidgets.append(widget)
-
-            #self._redraw()
+            #             # Delete the old widgets..
+            #             del self._stackwidgets[-len(widgets):]
+            #             for widget in widgets:
+            #                 self._cframe.destroy_widget(widget)
+            #
+            #             # Make a new one.
+            #             tok = self._parser.stack()[-1]
+            #             if isinstance(tok, Tree):
+            #                 attribs = {'tree_color': '#4080a0', 'tree_width': 2,
+            #                            'node_font': bold, 'node_color': '#006060',
+            #                            'leaf_color': '#006060', 'leaf_font':self._font}
+            #                 widget = tree_to_treesegment(self._canvas, tok.type(),
+            #                                              **attribs)
+            #                 widget.node()['color'] = '#000000'
+            #             else:
+            #                 widget = TextWidget(self._canvas, tok.type(),
+            #                                     color='#000000', font=self._font)
+            #             widget.bind_click(self._popup_reduce)
+            #             (x1, y1, x2, y2) = self._stacklabel.bbox()
+            #             y = y2-y1+10
+            #             if not self._stackwidgets: x = 5
+            #             else: x = self._stackwidgets[-1].bbox()[2] + 10
+            #             self._cframe.add_widget(widget, x, y)
+            #             self._stackwidgets.append(widget)
+
+            # self._redraw()
              self._animating_lock = 0
  
      #########################################
@@ -736,7 +857,8 @@ class ShiftReduceApp(object):
      def _highlight_hover(self, event):
          # What production are we hovering over?
          index = self._prodlist.nearest(event.y)
-        if self._hover == index: return
+        if self._hover == index:
+            return
  
          # Clear any previous hover highlighting.
          self._clear_hover()
@@ -748,22 +870,23 @@ class ShiftReduceApp(object):
              rhslen = len(self._productions[index].rhs())
              for stackwidget in self._stackwidgets[-rhslen:]:
                  if isinstance(stackwidget, TreeSegmentWidget):
-                    stackwidget.label()['color'] = '#00a000'
+                    stackwidget.label()["color"] = "#00a000"
                  else:
-                    stackwidget['color'] = '#00a000'
+                    stackwidget["color"] = "#00a000"
  
          # Remember what production we're hovering over.
          self._hover = index
  
      def _clear_hover(self, *event):
          # Clear any previous hover highlighting.
-        if self._hover == -1: return
+        if self._hover == -1:
+            return
          self._hover = -1
          for stackwidget in self._stackwidgets:
              if isinstance(stackwidget, TreeSegmentWidget):
-                stackwidget.label()['color'] = 'black'
+                stackwidget.label()["color"] = "black"
              else:
-                stackwidget['color'] = 'black'
+                stackwidget["color"] = "black"
  
  
  def app():
@@ -773,9 +896,9 @@ def app():
      """
  
      from nltk.grammar import Nonterminal, Production, CFG
-    nonterminals = 'S VP NP PP P N Name V Det'
-    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
-                                           for s in nonterminals.split()]
+
+    nonterminals = "S VP NP PP P N Name V Det"
+    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
  
      productions = (
          # Syntactic Productions
@@ -786,24 +909,29 @@ def app():
          Production(VP, [V, NP, PP]),
          Production(VP, [V, NP]),
          Production(PP, [P, NP]),
-
          # Lexical Productions
-        Production(NP, ['I']),   Production(Det, ['the']),
-        Production(Det, ['a']),  Production(N, ['man']),
-        Production(V, ['saw']),  Production(P, ['in']),
-        Production(P, ['with']), Production(N, ['park']),
-        Production(N, ['dog']),  Production(N, ['statue']),
-        Production(Det, ['my']),
-        )
+        Production(NP, ["I"]),
+        Production(Det, ["the"]),
+        Production(Det, ["a"]),
+        Production(N, ["man"]),
+        Production(V, ["saw"]),
+        Production(P, ["in"]),
+        Production(P, ["with"]),
+        Production(N, ["park"]),
+        Production(N, ["dog"]),
+        Production(N, ["statue"]),
+        Production(Det, ["my"]),
+    )
  
      grammar = CFG(S, productions)
  
      # tokenize the sentence
-    sent = 'my dog saw a man in the park with a statue'.split()
+    sent = "my dog saw a man in the park with a statue".split()
  
      ShiftReduceApp(grammar, sent).mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/srparser_app.pyc b/nlp_resource_data/nltk/app/srparser_app.pyc

deleted file mode 100755 (executable)

index 584ef70..0000000

Binary files a/nlp_resource_data/nltk/app/srparser_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/wordfreq_app.py b/nlp_resource_data/nltk/app/wordfreq_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 23bc796..522139c
--- a/nlp_resource_data/nltk/app/wordfreq_app.py
+++ b/nlp_resource_data/nltk/app/wordfreq_app.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Wordfreq Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Sumukh Ghodke <sghodke@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -9,12 +9,13 @@ from matplotlib import pylab
  from nltk.text import Text
  from nltk.corpus import gutenberg
  
+
  def plot_word_freq_dist(text):
      fd = text.vocab()
  
      samples = [item for item, _ in fd.most_common(50)]
      values = [fd[sample] for sample in samples]
-    values = [sum(values[:i+1]) * 100.0/fd.N() for i in range(len(values))]
+    values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
      pylab.title(text.name)
      pylab.xlabel("Samples")
      pylab.ylabel("Cumulative Percentage")
@@ -22,11 +23,13 @@ def plot_word_freq_dist(text):
      pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
      pylab.show()
  
+
  def app():
-    t1 = Text(gutenberg.words('melville-moby_dick.txt'))
+    t1 = Text(gutenberg.words("melville-moby_dick.txt"))
      plot_word_freq_dist(t1)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/wordfreq_app.pyc b/nlp_resource_data/nltk/app/wordfreq_app.pyc

deleted file mode 100755 (executable)

index fde9608..0000000

Binary files a/nlp_resource_data/nltk/app/wordfreq_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/app/wordnet_app.py b/nlp_resource_data/nltk/app/wordnet_app.py

old mode 100755 (executable)

new mode 100644 (file)

index 13807bc..f31000a
--- a/nlp_resource_data/nltk/app/wordnet_app.py
+++ b/nlp_resource_data/nltk/app/wordnet_app.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: WordNet Browser Application
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Jussi Salmela <jtsalmela@users.sourceforge.net>
  #         Paul Bone <pbone@students.csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
@@ -44,7 +44,6 @@ Options::
  # modifying to be compliant with NLTK's coding standards.  Tests also
  # need to be develop to ensure this continues to work in the face of
  # changes to other NLTK packages.
-from __future__ import print_function
  
  # Allow this program to run inside the NLTK source tree.
  from sys import path
@@ -62,18 +61,12 @@ import getopt
  import base64
  import pickle
  import copy
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from urllib.parse import unquote_plus
  
-from six.moves.urllib.parse import unquote_plus
-
-from nltk import compat
  from nltk.corpus import wordnet as wn
  from nltk.corpus.reader.wordnet import Synset, Lemma
  
-if compat.PY3:
-    from http.server import HTTPServer, BaseHTTPRequestHandler
-else:
-    from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
-
  # now included in local file
  # from util import html_header, html_trailer, \
  #    get_static_index_page, get_static_page_by_path, \
@@ -90,78 +83,83 @@ logfile = None
  
  
  class MyServerHandler(BaseHTTPRequestHandler):
-
      def do_HEAD(self):
          self.send_head()
  
      def do_GET(self):
          global firstClient
          sp = self.path[1:]
-        if unquote_plus(sp) == 'SHUTDOWN THE SERVER':
+        if unquote_plus(sp) == "SHUTDOWN THE SERVER":
              if server_mode:
                  page = "Server must be killed with SIGTERM."
                  type = "text/plain"
              else:
-                print('Server shutting down!')
+                print("Server shutting down!")
                  os._exit(0)
  
-        elif sp == '': # First request.
-            type = 'text/html'
+        elif sp == "":  # First request.
+            type = "text/html"
              if not server_mode and firstClient:
                  firstClient = False
                  page = get_static_index_page(True)
              else:
                  page = get_static_index_page(False)
-            word = 'green'
+            word = "green"
  
-        elif sp.endswith('.html'): # Trying to fetch a HTML file TODO:
-            type = 'text/html'
+        elif sp.endswith(".html"):  # Trying to fetch a HTML file TODO:
+            type = "text/html"
              usp = unquote_plus(sp)
-            if usp == 'NLTK Wordnet Browser Database Info.html':
-                word = '* Database Info *'
+            if usp == "NLTK Wordnet Browser Database Info.html":
+                word = "* Database Info *"
                  if os.path.isfile(usp):
-                    with open(usp, 'r') as infile:
+                    with open(usp, "r") as infile:
                          page = infile.read()
                  else:
-                    page = (html_header % word) + \
-                        '<p>The database info file:'\
-                        '<p><b>' + usp + '</b>' + \
-                        '<p>was not found. Run this:' + \
-                        '<p><b>python dbinfo_html.py</b>' + \
-                        '<p>to produce it.' + html_trailer
+                    page = (
+                        (html_header % word) + "<p>The database info file:"
+                        "<p><b>"
+                        + usp
+                        + "</b>"
+                        + "<p>was not found. Run this:"
+                        + "<p><b>python dbinfo_html.py</b>"
+                        + "<p>to produce it."
+                        + html_trailer
+                    )
              else:
                  # Handle files here.
                  word = sp
                  page = get_static_page_by_path(usp)
          elif sp.startswith("search"):
              # This doesn't seem to work with MWEs.
-            type = 'text/html'
+            type = "text/html"
              parts = (sp.split("?")[1]).split("&")
-            word = [p.split("=")[1].replace("+", " ")
-                    for p in parts if p.startswith("nextWord")][0]
+            word = [
+                p.split("=")[1].replace("+", " ")
+                for p in parts
+                if p.startswith("nextWord")
+            ][0]
              page, word = page_from_word(word)
          elif sp.startswith("lookup_"):
              # TODO add a variation of this that takes a non ecoded word or MWE.
-            type = 'text/html'
-            sp = sp[len("lookup_"):]
+            type = "text/html"
+            sp = sp[len("lookup_") :]
              page, word = page_from_href(sp)
          elif sp == "start_page":
              # if this is the first request we should display help
              # information, and possibly set a default word.
-            type = 'text/html'
+            type = "text/html"
              page, word = page_from_word("wordnet")
          else:
-            type = 'text/plain'
+            type = "text/plain"
              page = "Could not parse request: '%s'" % sp
  
          # Send result.
          self.send_head(type)
-        self.wfile.write(page.encode('utf8'))
-
+        self.wfile.write(page.encode("utf8"))
  
      def send_head(self, type=None):
          self.send_response(200)
-        self.send_header('Content-type', type)
+        self.send_header("Content-type", type)
          self.end_headers()
  
      def log_message(self, format, *args):
@@ -169,10 +167,9 @@ class MyServerHandler(BaseHTTPRequestHandler):
  
          if logfile:
              logfile.write(
-                "%s - - [%s] %s\n" %
-                (self.address_string(),
-                 self.log_date_time_string(),
-                 format%args))
+                "%s - - [%s] %s\n"
+                % (self.address_string(), self.log_date_time_string(), format % args)
+            )
  
  
  def get_unique_counter_from_url(sp):
@@ -180,9 +177,9 @@ def get_unique_counter_from_url(sp):
      Extract the unique counter from the URL if it has one.  Otherwise return
      null.
      """
-    pos = sp.rfind('%23')
+    pos = sp.rfind("%23")
      if pos != -1:
-        return int(sp[(pos + 3):])
+        return int(sp[(pos + 3) :])
      else:
          return None
  
@@ -222,16 +219,15 @@ def wnb(port=8000, runBrowser=True, logfilename=None):
      # Setup logging.
      if logfilename:
          try:
-            logfile = open(logfilename, "a", 1) # 1 means 'line buffering'
+            logfile = open(logfilename, "a", 1)  # 1 means 'line buffering'
          except IOError as e:
-            sys.stderr.write("Couldn't open %s for writing: %s",
-                             logfilename, e)
+            sys.stderr.write("Couldn't open %s for writing: %s", logfilename, e)
              sys.exit(1)
      else:
          logfile = None
  
      # Compute URL and start web browser
-    url = 'http://localhost:' + str(port)
+    url = "http://localhost:" + str(port)
  
      server_ready = None
      browser_thread = None
@@ -241,10 +237,9 @@ def wnb(port=8000, runBrowser=True, logfilename=None):
          browser_thread = startBrowser(url, server_ready)
  
      # Start the server.
-    server = HTTPServer(('', port), MyServerHandler)
+    server = HTTPServer(("", port), MyServerHandler)
      if logfile:
-        logfile.write(
-            'NLTK Wordnet browser server running serving: %s\n' % url)
+        logfile.write("NLTK Wordnet browser server running serving: %s\n" % url)
      if runBrowser:
          server_ready.set()
  
@@ -263,13 +258,15 @@ def wnb(port=8000, runBrowser=True, logfilename=None):
  def startBrowser(url, server_ready):
      def run():
          server_ready.wait()
-        time.sleep(1) # Wait a little bit more, there's still the chance of
-                      # a race condition.
-        webbrowser.open(url, new = 2, autoraise = 1)
+        time.sleep(1)  # Wait a little bit more, there's still the chance of
+        # a race condition.
+        webbrowser.open(url, new=2, autoraise=1)
+
      t = threading.Thread(target=run)
      t.start()
      return t
  
+
  #####################################################################
  # Utilities
  #####################################################################
@@ -281,7 +278,6 @@ WordNet Browser Utilities.
  This provides a backend to both wxbrowse and browserver.py.
  """
  
-\f
  ################################################################################
  #
  # Main logic for wordnet browser.
@@ -291,10 +287,12 @@ This provides a backend to both wxbrowse and browserver.py.
  # WordNet corpus is installed.
  def _pos_tuples():
      return [
-        (wn.NOUN,'N','noun'),
-        (wn.VERB,'V','verb'),
-        (wn.ADJ,'J','adj'),
-        (wn.ADV,'R','adv')]
+        (wn.NOUN, "N", "noun"),
+        (wn.VERB, "V", "verb"),
+        (wn.ADJ, "J", "adj"),
+        (wn.ADV, "R", "adv"),
+    ]
+
  
  def _pos_match(pos_tuple):
      """
@@ -302,13 +300,14 @@ def _pos_match(pos_tuple):
      tuple given to it.  It attempts to match it against the first
      non-null component of the given pos tuple.
      """
-    if pos_tuple[0] == 's':
-        pos_tuple = ('a', pos_tuple[1], pos_tuple[2])
-    for n,x in enumerate(pos_tuple):
+    if pos_tuple[0] == "s":
+        pos_tuple = ("a", pos_tuple[1], pos_tuple[2])
+    for n, x in enumerate(pos_tuple):
          if x is not None:
              break
      for pt in _pos_tuples():
-        if pt[n] == pos_tuple[n]: return pt
+        if pt[n] == pos_tuple[n]:
+            return pt
      return None
  
  
@@ -345,7 +344,6 @@ INDIRECT_HYPERNYMS = 26
  
  
  def lemma_property(word, synset, func):
-
      def flattern(l):
          if l == []:
              return []
@@ -367,78 +365,81 @@ def get_relations_data(word, synset):
      yet support things such as full hyponym vs direct hyponym.
      """
      if synset.pos() == wn.NOUN:
-        return ((HYPONYM, 'Hyponyms',
-                   synset.hyponyms()),
-                (INSTANCE_HYPONYM , 'Instance hyponyms',
-                   synset.instance_hyponyms()),
-                (HYPERNYM, 'Direct hypernyms',
-                   synset.hypernyms()),
-                (INDIRECT_HYPERNYMS, 'Indirect hypernyms',
-                   rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1]),
-#  hypernyms', 'Sister terms',
-                (INSTANCE_HYPERNYM , 'Instance hypernyms',
-                   synset.instance_hypernyms()),
-#            (CLASS_REGIONAL, ['domain term region'], ),
-                (PART_HOLONYM, 'Part holonyms',
-                   synset.part_holonyms()),
-                (PART_MERONYM, 'Part meronyms',
-                   synset.part_meronyms()),
-                (SUBSTANCE_HOLONYM, 'Substance holonyms',
-                   synset.substance_holonyms()),
-                (SUBSTANCE_MERONYM, 'Substance meronyms',
-                   synset.substance_meronyms()),
-                (MEMBER_HOLONYM, 'Member holonyms',
-                   synset.member_holonyms()),
-                (MEMBER_MERONYM, 'Member meronyms',
-                   synset.member_meronyms()),
-                (ATTRIBUTE, 'Attributes',
-                   synset.attributes()),
-                (ANTONYM, "Antonyms",
-                   lemma_property(word, synset, lambda l: l.antonyms())),
-                (DERIVATIONALLY_RELATED_FORM, "Derivationally related form",
-                   lemma_property(word, synset, lambda l: l.derivationally_related_forms())))
+        return (
+            (HYPONYM, "Hyponyms", synset.hyponyms()),
+            (INSTANCE_HYPONYM, "Instance hyponyms", synset.instance_hyponyms()),
+            (HYPERNYM, "Direct hypernyms", synset.hypernyms()),
+            (
+                INDIRECT_HYPERNYMS,
+                "Indirect hypernyms",
+                rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1],
+            ),
+            #  hypernyms', 'Sister terms',
+            (INSTANCE_HYPERNYM, "Instance hypernyms", synset.instance_hypernyms()),
+            #            (CLASS_REGIONAL, ['domain term region'], ),
+            (PART_HOLONYM, "Part holonyms", synset.part_holonyms()),
+            (PART_MERONYM, "Part meronyms", synset.part_meronyms()),
+            (SUBSTANCE_HOLONYM, "Substance holonyms", synset.substance_holonyms()),
+            (SUBSTANCE_MERONYM, "Substance meronyms", synset.substance_meronyms()),
+            (MEMBER_HOLONYM, "Member holonyms", synset.member_holonyms()),
+            (MEMBER_MERONYM, "Member meronyms", synset.member_meronyms()),
+            (ATTRIBUTE, "Attributes", synset.attributes()),
+            (ANTONYM, "Antonyms", lemma_property(word, synset, lambda l: l.antonyms())),
+            (
+                DERIVATIONALLY_RELATED_FORM,
+                "Derivationally related form",
+                lemma_property(
+                    word, synset, lambda l: l.derivationally_related_forms()
+                ),
+            ),
+        )
      elif synset.pos() == wn.VERB:
-        return ((ANTONYM, 'Antonym',
-                   lemma_property(word, synset, lambda l: l.antonyms())),
-                (HYPONYM, 'Hyponym',
-                   synset.hyponyms()),
-                (HYPERNYM, 'Direct hypernyms',
-                   synset.hypernyms()),
-                (INDIRECT_HYPERNYMS, 'Indirect hypernyms',
-                   rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1]),
-                (ENTAILMENT, 'Entailments',
-                   synset.entailments()),
-                (CAUSE, 'Causes',
-                   synset.causes()),
-                (ALSO_SEE, 'Also see',
-                   synset.also_sees()),
-                (VERB_GROUP, 'Verb Groups',
-                   synset.verb_groups()),
-                (DERIVATIONALLY_RELATED_FORM, "Derivationally related form",
-                   lemma_property(word, synset, lambda l: l.derivationally_related_forms())))
+        return (
+            (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
+            (HYPONYM, "Hyponym", synset.hyponyms()),
+            (HYPERNYM, "Direct hypernyms", synset.hypernyms()),
+            (
+                INDIRECT_HYPERNYMS,
+                "Indirect hypernyms",
+                rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1],
+            ),
+            (ENTAILMENT, "Entailments", synset.entailments()),
+            (CAUSE, "Causes", synset.causes()),
+            (ALSO_SEE, "Also see", synset.also_sees()),
+            (VERB_GROUP, "Verb Groups", synset.verb_groups()),
+            (
+                DERIVATIONALLY_RELATED_FORM,
+                "Derivationally related form",
+                lemma_property(
+                    word, synset, lambda l: l.derivationally_related_forms()
+                ),
+            ),
+        )
      elif synset.pos() == wn.ADJ or synset.pos == wn.ADJ_SAT:
-        return ((ANTONYM, 'Antonym',
-                   lemma_property(word, synset, lambda l: l.antonyms())),
-                (SIMILAR, 'Similar to',
-                   synset.similar_tos()),
-                # Participle of verb - not supported by corpus
-                (PERTAINYM, 'Pertainyms',
-                   lemma_property(word, synset, lambda l: l.pertainyms())),
-                (ATTRIBUTE, 'Attributes',
-                   synset.attributes()),
-                (ALSO_SEE, 'Also see',
-                   synset.also_sees()))
+        return (
+            (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
+            (SIMILAR, "Similar to", synset.similar_tos()),
+            # Participle of verb - not supported by corpus
+            (
+                PERTAINYM,
+                "Pertainyms",
+                lemma_property(word, synset, lambda l: l.pertainyms()),
+            ),
+            (ATTRIBUTE, "Attributes", synset.attributes()),
+            (ALSO_SEE, "Also see", synset.also_sees()),
+        )
      elif synset.pos() == wn.ADV:
          # This is weird. adverbs such as 'quick' and 'fast' don't seem
          # to have antonyms returned by the corpus.a
-        return ((ANTONYM, 'Antonym',
-                   lemma_property(word, synset, lambda l: l.antonyms())),)
-                # Derived from adjective - not supported by corpus
+        return (
+            (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
+        )
+        # Derived from adjective - not supported by corpus
      else:
          raise TypeError("Unhandles synset POS type: " + str(synset.pos()))
  
  
-html_header = '''
+html_header = """
  <!DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
  'http://www.w3.org/TR/html4/strict.dtd'>
  <html>
@@ -449,13 +450,13 @@ html_header = '''
  'text/html; charset=us-ascii'>
  <title>NLTK Wordnet Browser display of: %s</title></head>
  <body bgcolor='#F5F5F5' text='#000000'>
-'''
-html_trailer = '''
+"""
+html_trailer = """
  </body>
  </html>
-'''
+"""
  
-explanation  = '''
+explanation = """
  <h3>Search Help</h3>
  <ul><li>The display below the line is an example of the output the browser
  shows you when you enter a search word. The search word was <b>green</b>.</li>
@@ -474,22 +475,33 @@ synsets.</li>
  <b>Enter/Return</b> key or click the <b>Search</b> button.</li>
  </ul>
  <hr width='100%'>
-'''
+"""
  
  # HTML oriented functions
  
-def _bold(txt): return '<b>%s</b>' % txt
  
-def _center(txt): return '<center>%s</center>' % txt
+def _bold(txt):
+    return "<b>%s</b>" % txt
+
+
+def _center(txt):
+    return "<center>%s</center>" % txt
  
-def _hlev(n,txt): return '<h%d>%s</h%d>' % (n,txt,n)
  
-def _italic(txt): return '<i>%s</i>' % txt
+def _hlev(n, txt):
+    return "<h%d>%s</h%d>" % (n, txt, n)
+
+
+def _italic(txt):
+    return "<i>%s</i>" % txt
+
+
+def _li(txt):
+    return "<li>%s</li>" % txt
  
-def _li(txt): return '<li>%s</li>' % txt
  
  def pg(word, body):
-    '''
+    """
      Return a HTML page of NLTK Browser format constructed from the
      word and body
  
@@ -499,19 +511,22 @@ def pg(word, body):
      :type body: str
      :return: a HTML page for the word-body combination
      :rtype: str
-    '''
+    """
      return (html_header % word) + body + html_trailer
  
-def _ul(txt): return '<ul>' + txt + '</ul>'
+
+def _ul(txt):
+    return "<ul>" + txt + "</ul>"
+
  
  def _abbc(txt):
      """
      abbc = asterisks, breaks, bold, center
      """
-    return _center(_bold('<br>'*10 + '*'*10 + ' ' + txt + ' ' + '*'*10))
+    return _center(_bold("<br>" * 10 + "*" * 10 + " " + txt + " " + "*" * 10))
+
  
-full_hyponym_cont_text = \
-    _ul(_li(_italic('(has full hyponym continuation)'))) + '\n'
+full_hyponym_cont_text = _ul(_li(_italic("(has full hyponym continuation)"))) + "\n"
  
  
  def _get_synset(synset_key):
@@ -521,8 +536,9 @@ def _get_synset(synset_key):
      """
      return wn.synset(synset_key)
  
+
  def _collect_one_synset(word, synset, synset_relations):
-    '''
+    """
      Returns the HTML string for one synset or word
  
      :param word: the current word
@@ -534,11 +550,11 @@ def _collect_one_synset(word, synset, synset_relations):
      :type synset_relations: dict(synset_key, set(relation_id))
      :return: The HTML string built for this synset
      :rtype: str
-    '''
-    if isinstance(synset, tuple): # It's a word
+    """
+    if isinstance(synset, tuple):  # It's a word
          raise NotImplementedError("word not supported by _collect_one_synset")
  
-    typ = 'S'
+    typ = "S"
      pos_tuple = _pos_match((synset.pos(), None, None))
      assert pos_tuple is not None, "pos_tuple is null: synset.pos(): %s" % synset.pos()
      descr = pos_tuple[2]
@@ -547,34 +563,40 @@ def _collect_one_synset(word, synset, synset_relations):
      synset_label = typ + ";"
      if synset.name() in synset_relations:
          synset_label = _bold(synset_label)
-    s = '<li>%s (%s) ' % (make_lookup_link(ref, synset_label), descr)
+    s = "<li>%s (%s) " % (make_lookup_link(ref, synset_label), descr)
+
      def format_lemma(w):
-        w = w.replace('_', ' ')
+        w = w.replace("_", " ")
          if w.lower() == word:
              return _bold(w)
          else:
              ref = Reference(w)
              return make_lookup_link(ref, w)
  
-    s += ', '.join(format_lemma(l.name()) for l in synset.lemmas())
+    s += ", ".join(format_lemma(l.name()) for l in synset.lemmas())
+
+    gl = " (%s) <i>%s</i> " % (
+        synset.definition(),
+        "; ".join('"%s"' % e for e in synset.examples()),
+    )
+    return s + gl + _synset_relations(word, synset, synset_relations) + "</li>\n"
  
-    gl = " (%s) <i>%s</i> " % \
-        (synset.definition(),
-         "; ".join("\"%s\"" % e for e in synset.examples()))
-    return s + gl + _synset_relations(word, synset, synset_relations) + '</li>\n'
  
  def _collect_all_synsets(word, pos, synset_relations=dict()):
      """
      Return a HTML unordered list of synsets for the given word and
      part of speech.
      """
-    return '<ul>%s\n</ul>\n' % \
-        ''.join((_collect_one_synset(word, synset, synset_relations)
-                 for synset
-                 in wn.synsets(word, pos)))
+    return "<ul>%s\n</ul>\n" % "".join(
+        (
+            _collect_one_synset(word, synset, synset_relations)
+            for synset in wn.synsets(word, pos)
+        )
+    )
+
  
  def _synset_relations(word, synset, synset_relations):
-    '''
+    """
      Builds the HTML string for the relations of a synset
  
      :param word: The current word
@@ -585,7 +607,7 @@ def _synset_relations(word, synset, synset_relations):
      :type synset_relations: dict(synset_key, set(relation_type))
      :return: The HTML for a synset's relations
      :rtype: str
-    '''
+    """
  
      if not synset.name() in synset_relations:
          return ""
@@ -599,29 +621,40 @@ def _synset_relations(word, synset, synset_relations):
          elif isinstance(r, tuple):
              # It's probably a tuple containing a Synset and a list of
              # similar tuples.  This forms a tree of synsets.
-            return "%s\n<ul>%s</ul>\n" % \
-                (relation_html(r[0]),
-                 ''.join('<li>%s</li>\n' % relation_html(sr) for sr in r[1]))
+            return "%s\n<ul>%s</ul>\n" % (
+                relation_html(r[0]),
+                "".join("<li>%s</li>\n" % relation_html(sr) for sr in r[1]),
+            )
          else:
-            raise TypeError("r must be a synset, lemma or list, it was: type(r) = %s, r = %s" % (type(r), r))
+            raise TypeError(
+                "r must be a synset, lemma or list, it was: type(r) = %s, r = %s"
+                % (type(r), r)
+            )
  
      def make_synset_html(db_name, disp_name, rels):
-        synset_html = '<i>%s</i>\n' % \
-            make_lookup_link(
-                copy.deepcopy(ref).toggle_synset_relation(synset, db_name).encode(),
-                disp_name)
+        synset_html = "<i>%s</i>\n" % make_lookup_link(
+            copy.deepcopy(ref).toggle_synset_relation(synset, db_name).encode(),
+            disp_name,
+        )
  
          if db_name in ref.synset_relations[synset.name()]:
-             synset_html += '<ul>%s</ul>\n' % \
-                ''.join("<li>%s</li>\n" % relation_html(r) for r in rels)
+            synset_html += "<ul>%s</ul>\n" % "".join(
+                "<li>%s</li>\n" % relation_html(r) for r in rels
+            )
  
          return synset_html
  
-    html = '<ul>' + \
-        '\n'.join(("<li>%s</li>" % make_synset_html(*rel_data) for rel_data
-                   in get_relations_data(word, synset)
-                   if rel_data[2] != [])) + \
-        '</ul>'
+    html = (
+        "<ul>"
+        + "\n".join(
+            (
+                "<li>%s</li>" % make_synset_html(*rel_data)
+                for rel_data in get_relations_data(word, synset)
+                if rel_data[2] != []
+            )
+        )
+        + "</ul>"
+    )
  
      return html
  
@@ -699,17 +732,18 @@ def page_from_word(word):
      """
      Return a HTML page for the given word.
  
-    :param word: The currently active word
      :type word: str
+    :param word: The currently active word
      :return: A tuple (page,word), where page is the new current HTML page
-             to be sent to the browser and
-             word is the new current word
+        to be sent to the browser and
+        word is the new current word
      :rtype: A tuple (str,str)
      """
      return page_from_reference(Reference(word))
  
+
  def page_from_href(href):
-    '''
+    """
      Returns a tuple of the HTML page built and the new current word
  
      :param href: The hypertext reference to be solved
@@ -718,11 +752,12 @@ def page_from_href(href):
               to be sent to the browser and
               word is the new current word
      :rtype: A tuple (str,str)
-    '''
+    """
      return page_from_reference(Reference.decode(href))
  
+
  def page_from_reference(href):
-    '''
+    """
      Returns a tuple of the HTML page built and the new current word
  
      :param href: The hypertext reference to be solved
@@ -731,13 +766,11 @@ def page_from_reference(href):
               to be sent to the browser and
               word is the new current word
      :rtype: A tuple (str,str)
-    '''
+    """
      word = href.word
      pos_forms = defaultdict(list)
-    words = word.split(',')
-    words = [w for w in [w.strip().lower().replace(' ', '_')
-                         for w in words]
-             if w != ""]
+    words = word.split(",")
+    words = [w for w in [w.strip().lower().replace(" ", "_") for w in words] if w != ""]
      if len(words) == 0:
          # No words were found.
          return "", "Please specify a word to search for."
@@ -749,10 +782,10 @@ def page_from_reference(href):
              form = wn.morphy(w, pos)
              if form and form not in pos_forms[pos]:
                  pos_forms[pos].append(form)
-    body = ''
-    for pos,pos_str,name in _pos_tuples():
+    body = ""
+    for pos, pos_str, name in _pos_tuples():
          if pos in pos_forms:
-            body += _hlev(3, name) + '\n'
+            body += _hlev(3, name) + "\n"
              for w in pos_forms[pos]:
                  # Not all words of exc files are in the database, skip
                  # to the next word if a KeyError is raised.
@@ -765,11 +798,11 @@ def page_from_reference(href):
      return body, word
  
  
-\f
  #####################################################################
  # Static pages
  #####################################################################
  
+
  def get_static_page_by_path(path):
      """
      Return a static HTML page from the path given.
@@ -796,12 +829,11 @@ def get_static_web_help_page():
      """
      Return the static web help page.
      """
-    return \
-"""
+    return """
  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  <html>
       <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-            Copyright (C) 2001-2017 NLTK Project
+            Copyright (C) 2001-2020 NLTK Project
              Author: Jussi Salmela <jtsalmela@users.sourceforge.net>
              URL: <http://nltk.org/>
              For license information, see LICENSE.TXT -->
@@ -843,8 +875,7 @@ def get_static_welcome_message():
      """
      Get the static welcome page.
      """
-    return \
-"""
+    return """
  <h3>Search Help</h3>
  <ul><li>The display below the line is an example of the output the browser
  shows you when you enter a search word. The search word was <b>green</b>.</li>
@@ -863,16 +894,16 @@ synsets.</li>
  </ul>
  """
  
+
  def get_static_index_page(with_shutdown):
      """
      Get the static index page.
      """
-    template = \
-"""
+    template = """
  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"  "http://www.w3.org/TR/html4/frameset.dtd">
  <HTML>
       <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-            Copyright (C) 2001-2017 NLTK Project
+            Copyright (C) 2001-2020 NLTK Project
              Author: Jussi Salmela <jtsalmela@users.sourceforge.net>
              URL: <http://nltk.org/>
              For license information, see LICENSE.TXT -->
@@ -901,12 +932,11 @@ def get_static_upper_page(with_shutdown):
      If with_shutdown is True then a 'shutdown' button is also provided
      to shutdown the server.
      """
-    template = \
-"""
+    template = """
  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  <html>
      <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
-        Copyright (C) 2001-2017 NLTK Project
+        Copyright (C) 2001-2020 NLTK Project
          Author: Jussi Salmela <jtsalmela@users.sourceforge.net>
          URL: <http://nltk.org/>
          For license information, see LICENSE.TXT -->
@@ -927,24 +957,25 @@ def get_static_upper_page(with_shutdown):
  </html>
  """
      if with_shutdown:
-        shutdown_link = "<a href=\"SHUTDOWN THE SERVER\">Shutdown</a>"
+        shutdown_link = '<a href="SHUTDOWN THE SERVER">Shutdown</a>'
      else:
          shutdown_link = ""
  
      return template % shutdown_link
  
  
-
  def usage():
      """
      Display the command line help message.
      """
      print(__doc__)
  
+
  def app():
      # Parse and interpret options.
-    (opts, _) = getopt.getopt(argv[1:], "l:p:sh",
-                              ["logfile=", "port=", "server-mode", "help"])
+    (opts, _) = getopt.getopt(
+        argv[1:], "l:p:sh", ["logfile=", "port=", "server-mode", "help"]
+    )
      port = 8000
      server_mode = False
      help_mode = False
@@ -964,7 +995,8 @@ def app():
      else:
          wnb(port, not server_mode, logfilename)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      app()
  
-__all__ = ['app']
+__all__ = ["app"]
diff --git a/nlp_resource_data/nltk/app/wordnet_app.pyc b/nlp_resource_data/nltk/app/wordnet_app.pyc

deleted file mode 100755 (executable)

index c800f95..0000000

Binary files a/nlp_resource_data/nltk/app/wordnet_app.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/book.py b/nlp_resource_data/nltk/book.py

old mode 100755 (executable)

new mode 100644 (file)

index 5394736..0098bed
--- a/nlp_resource_data/nltk/book.py
+++ b/nlp_resource_data/nltk/book.py
@@ -1,14 +1,20 @@
  # Natural Language Toolkit: Some texts for exploration in chapter 1 of the book
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function
  
-from nltk.corpus import (gutenberg, genesis, inaugural,
-                         nps_chat, webtext, treebank, wordnet)
+from nltk.corpus import (
+    gutenberg,
+    genesis,
+    inaugural,
+    nps_chat,
+    webtext,
+    treebank,
+    wordnet,
+)
  from nltk.text import Text
  from nltk.probability import FreqDist
  from nltk.util import bigrams
@@ -18,13 +24,13 @@ print("Loading text1, ..., text9 and sent1, ..., sent9")
  print("Type the name of the text or sentence to view it.")
  print("Type: 'texts()' or 'sents()' to list the materials.")
  
-text1 = Text(gutenberg.words('melville-moby_dick.txt'))
+text1 = Text(gutenberg.words("melville-moby_dick.txt"))
  print("text1:", text1.name)
  
-text2 = Text(gutenberg.words('austen-sense.txt'))
+text2 = Text(gutenberg.words("austen-sense.txt"))
  print("text2:", text2.name)
  
-text3 = Text(genesis.words('english-kjv.txt'), name="The Book of Genesis")
+text3 = Text(genesis.words("english-kjv.txt"), name="The Book of Genesis")
  print("text3:", text3.name)
  
  text4 = Text(inaugural.words(), name="Inaugural Address Corpus")
@@ -33,17 +39,16 @@ print("text4:", text4.name)
  text5 = Text(nps_chat.words(), name="Chat Corpus")
  print("text5:", text5.name)
  
-text6 = Text(webtext.words('grail.txt'),
-             name="Monty Python and the Holy Grail")
+text6 = Text(webtext.words("grail.txt"), name="Monty Python and the Holy Grail")
  print("text6:", text6.name)
  
  text7 = Text(treebank.words(), name="Wall Street Journal")
  print("text7:", text7.name)
  
-text8 = Text(webtext.words('singles.txt'), name="Personals Corpus")
+text8 = Text(webtext.words("singles.txt"), name="Personals Corpus")
  print("text8:", text8.name)
  
-text9 = Text(gutenberg.words('chesterton-thursday.txt'))
+text9 = Text(gutenberg.words("chesterton-thursday.txt"))
  print("text9:", text9.name)
  
  
@@ -58,25 +63,142 @@ def texts():
      print("text8:", text8.name)
      print("text9:", text9.name)
  
+
  sent1 = ["Call", "me", "Ishmael", "."]
-sent2 = ["The", "family", "of", "Dashwood", "had", "long",
-         "been", "settled", "in", "Sussex", "."]
-sent3 = ["In", "the", "beginning", "God", "created", "the",
-         "heaven", "and", "the", "earth", "."]
-sent4 = ["Fellow", "-", "Citizens", "of", "the", "Senate",
-         "and", "of", "the", "House", "of", "Representatives", ":"]
-sent5 = ["I", "have", "a", "problem", "with", "people",
-         "PMing", "me", "to", "lol", "JOIN"]
-sent6 = ['SCENE', '1', ':', '[', 'wind', ']', '[', 'clop', 'clop',
-         'clop', ']', 'KING', 'ARTHUR', ':', 'Whoa', 'there', '!']
-sent7 = ["Pierre", "Vinken", ",", "61", "years", "old", ",",
-         "will", "join", "the", "board", "as", "a", "nonexecutive",
-         "director", "Nov.", "29", "."]
-sent8 = ['25', 'SEXY', 'MALE', ',', 'seeks', 'attrac', 'older',
-         'single', 'lady', ',', 'for', 'discreet', 'encounters', '.']
-sent9 = ["THE", "suburb", "of", "Saffron", "Park", "lay", "on", "the",
-         "sunset", "side", "of", "London", ",", "as", "red", "and",
-         "ragged", "as", "a", "cloud", "of", "sunset", "."]
+sent2 = [
+    "The",
+    "family",
+    "of",
+    "Dashwood",
+    "had",
+    "long",
+    "been",
+    "settled",
+    "in",
+    "Sussex",
+    ".",
+]
+sent3 = [
+    "In",
+    "the",
+    "beginning",
+    "God",
+    "created",
+    "the",
+    "heaven",
+    "and",
+    "the",
+    "earth",
+    ".",
+]
+sent4 = [
+    "Fellow",
+    "-",
+    "Citizens",
+    "of",
+    "the",
+    "Senate",
+    "and",
+    "of",
+    "the",
+    "House",
+    "of",
+    "Representatives",
+    ":",
+]
+sent5 = [
+    "I",
+    "have",
+    "a",
+    "problem",
+    "with",
+    "people",
+    "PMing",
+    "me",
+    "to",
+    "lol",
+    "JOIN",
+]
+sent6 = [
+    "SCENE",
+    "1",
+    ":",
+    "[",
+    "wind",
+    "]",
+    "[",
+    "clop",
+    "clop",
+    "clop",
+    "]",
+    "KING",
+    "ARTHUR",
+    ":",
+    "Whoa",
+    "there",
+    "!",
+]
+sent7 = [
+    "Pierre",
+    "Vinken",
+    ",",
+    "61",
+    "years",
+    "old",
+    ",",
+    "will",
+    "join",
+    "the",
+    "board",
+    "as",
+    "a",
+    "nonexecutive",
+    "director",
+    "Nov.",
+    "29",
+    ".",
+]
+sent8 = [
+    "25",
+    "SEXY",
+    "MALE",
+    ",",
+    "seeks",
+    "attrac",
+    "older",
+    "single",
+    "lady",
+    ",",
+    "for",
+    "discreet",
+    "encounters",
+    ".",
+]
+sent9 = [
+    "THE",
+    "suburb",
+    "of",
+    "Saffron",
+    "Park",
+    "lay",
+    "on",
+    "the",
+    "sunset",
+    "side",
+    "of",
+    "London",
+    ",",
+    "as",
+    "red",
+    "and",
+    "ragged",
+    "as",
+    "a",
+    "cloud",
+    "of",
+    "sunset",
+    ".",
+]
  
  
  def sents():
diff --git a/nlp_resource_data/nltk/book.pyc b/nlp_resource_data/nltk/book.pyc

deleted file mode 100755 (executable)

index 87310ef..0000000

Binary files a/nlp_resource_data/nltk/book.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/ccg/__init__.py b/nlp_resource_data/nltk/ccg/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 630c182..7d54311
--- a/nlp_resource_data/nltk/ccg/__init__.py
+++ b/nlp_resource_data/nltk/ccg/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Combinatory Categorial Grammar
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -11,12 +11,24 @@ Combinatory Categorial Grammar.
  For more information see nltk/doc/contrib/ccg/ccg.pdf
  """
  
-from nltk.ccg.combinator import (UndirectedBinaryCombinator, DirectedBinaryCombinator,
-                                 ForwardCombinator, BackwardCombinator,
-                                 UndirectedFunctionApplication, ForwardApplication,
-                                 BackwardApplication, UndirectedComposition,
-                                 ForwardComposition, BackwardComposition,
-                                 BackwardBx, UndirectedSubstitution, ForwardSubstitution,
-                                 BackwardSx, UndirectedTypeRaise, ForwardT, BackwardT)
+from nltk.ccg.combinator import (
+    UndirectedBinaryCombinator,
+    DirectedBinaryCombinator,
+    ForwardCombinator,
+    BackwardCombinator,
+    UndirectedFunctionApplication,
+    ForwardApplication,
+    BackwardApplication,
+    UndirectedComposition,
+    ForwardComposition,
+    BackwardComposition,
+    BackwardBx,
+    UndirectedSubstitution,
+    ForwardSubstitution,
+    BackwardSx,
+    UndirectedTypeRaise,
+    ForwardT,
+    BackwardT,
+)
  from nltk.ccg.chart import CCGEdge, CCGLeafEdge, CCGChartParser, CCGChart
  from nltk.ccg.lexicon import CCGLexicon
diff --git a/nlp_resource_data/nltk/ccg/__init__.pyc b/nlp_resource_data/nltk/ccg/__init__.pyc

deleted file mode 100755 (executable)

index 70c36ec..0000000

Binary files a/nlp_resource_data/nltk/ccg/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/ccg/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..10662e1

Binary files /dev/null and b/nlp_resource_data/nltk/ccg/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/ccg/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..25cb42d

Binary files /dev/null and b/nlp_resource_data/nltk/ccg/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/ccg/__pycache__/chart.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/chart.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e610180

Binary files /dev/null and b/nlp_resource_data/nltk/ccg/__pycache__/chart.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/ccg/__pycache__/combinator.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/combinator.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0a30b63

Binary files /dev/null and b/nlp_resource_data/nltk/ccg/__pycache__/combinator.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/ccg/__pycache__/lexicon.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/lexicon.cpython-37.pyc

new file mode 100644 (file)

index 0000000..bd6b1bd

Binary files /dev/null and b/nlp_resource_data/nltk/ccg/__pycache__/lexicon.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/ccg/__pycache__/logic.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/logic.cpython-37.pyc

new file mode 100644 (file)

index 0000000..df6e87b

Binary files /dev/null and b/nlp_resource_data/nltk/ccg/__pycache__/logic.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/ccg/api.py b/nlp_resource_data/nltk/ccg/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 79c6b77..6278452
--- a/nlp_resource_data/nltk/ccg/api.py
+++ b/nlp_resource_data/nltk/ccg/api.py
@@ -1,25 +1,21 @@
  # Natural Language Toolkit: CCG Categories
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import unicode_literals
+
  from functools import total_ordering
  
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
  from nltk.internals import raise_unorderable_types
-from nltk.compat import (python_2_unicode_compatible, unicode_repr)
-
  
-@add_metaclass(ABCMeta)
  @total_ordering
-class AbstractCCGCategory(object):
-    '''
+class AbstractCCGCategory(metaclass=ABCMeta):
+    """
      Interface for categories in combinatory grammars.
-    '''
+    """
  
      @abstractmethod
      def is_primitive(self):
@@ -60,8 +56,10 @@ class AbstractCCGCategory(object):
          pass
  
      def __eq__(self, other):
-        return (self.__class__ is other.__class__ and
-                self._comparison_key == other._comparison_key)
+        return (
+            self.__class__ is other.__class__
+            and self._comparison_key == other._comparison_key
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -82,13 +80,13 @@ class AbstractCCGCategory(object):
              return self._hash
  
  
-@python_2_unicode_compatible
  class CCGVar(AbstractCCGCategory):
-    '''
+    """
      Class representing a variable CCG category.
      Used for conjunctions (and possibly type-raising, if implemented as a
      unary rule).
-    '''
+    """
+
      _maxID = 0
  
      def __init__(self, prim_only=False):
@@ -148,13 +146,13 @@ class CCGVar(AbstractCCGCategory):
  
  
  @total_ordering
-@python_2_unicode_compatible
  class Direction(object):
-    '''
+    """
      Class representing the direction of a function application.
      Also contains maintains information as to which combinators
      may be used with the category.
-    '''
+    """
+
      def __init__(self, dir, restrictions):
          self._dir = dir
          self._restrs = restrictions
@@ -162,10 +160,10 @@ class Direction(object):
  
      # Testing the application direction
      def is_forward(self):
-        return self._dir == '/'
+        return self._dir == "/"
  
      def is_backward(self):
-        return self._dir == '\\'
+        return self._dir == "\\"
  
      def dir(self):
          return self._dir
@@ -180,16 +178,16 @@ class Direction(object):
          return self._restrs
  
      def is_variable(self):
-        return self._restrs == '_'
+        return self._restrs == "_"
  
      # Unification and substitution of variable directions.
      # Used only if type-raising is implemented as a unary rule, as it
      # must inherit restrictions from the argument category.
      def can_unify(self, other):
          if other.is_variable():
-            return [('_', self.restrs())]
+            return [("_", self.restrs())]
          elif self.is_variable():
-            return [('_', other.restrs())]
+            return [("_", other.restrs())]
          else:
              if self.restrs() == other.restrs():
                  return []
@@ -200,20 +198,22 @@ class Direction(object):
              return self
  
          for (var, restrs) in subs:
-            if var == '_':
+            if var == "_":
                  return Direction(self._dir, restrs)
          return self
  
      # Testing permitted combinators
      def can_compose(self):
-        return (',' not in self._restrs)
+        return "," not in self._restrs
  
      def can_cross(self):
-        return ('.' not in self._restrs)
+        return "." not in self._restrs
  
      def __eq__(self, other):
-        return (self.__class__ is other.__class__ and
-                self._comparison_key == other._comparison_key)
+        return (
+            self.__class__ is other.__class__
+            and self._comparison_key == other._comparison_key
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -241,19 +241,19 @@ class Direction(object):
  
      # The negation operator reverses the direction of the application
      def __neg__(self):
-        if self._dir == '/':
-            return Direction('\\', self._restrs)
+        if self._dir == "/":
+            return Direction("\\", self._restrs)
          else:
-            return Direction('/', self._restrs)
+            return Direction("/", self._restrs)
  
  
-@python_2_unicode_compatible
  class PrimitiveCategory(AbstractCCGCategory):
-    '''
+    """
      Class representing primitive categories.
      Takes a string representation of the category, and a
      list of strings specifying the morphological subcategories.
-    '''
+    """
+
      def __init__(self, categ, restrictions=[]):
          self._categ = categ
          self._restrs = restrictions
@@ -296,17 +296,17 @@ class PrimitiveCategory(AbstractCCGCategory):
      def __str__(self):
          if self._restrs == []:
              return "%s" % self._categ
-        restrictions = "[%s]" % ",".join(unicode_repr(r) for r in self._restrs)
+        restrictions = "[%s]" % ",".join(repr(r) for r in self._restrs)
          return "%s%s" % (self._categ, restrictions)
  
  
-@python_2_unicode_compatible
  class FunctionalCategory(AbstractCCGCategory):
-    '''
+    """
      Class that represents a function application category.
      Consists of argument and result categories, together with
      an application direction.
-    '''
+    """
+
      def __init__(self, res, arg, dir):
          self._res = res
          self._arg = arg
@@ -339,8 +339,7 @@ class FunctionalCategory(AbstractCCGCategory):
              sa = self._res.can_unify(other.res())
              sd = self._dir.can_unify(other.dir())
              if sa is not None and sd is not None:
-                sb = self._arg.substitute(sa).can_unify(
-                    other.arg().substitute(sa))
+                sb = self._arg.substitute(sa).can_unify(other.arg().substitute(sa))
                  if sb is not None:
                      return sa + sb
          return None
diff --git a/nlp_resource_data/nltk/ccg/api.pyc b/nlp_resource_data/nltk/ccg/api.pyc

deleted file mode 100755 (executable)

index 9972117..0000000

Binary files a/nlp_resource_data/nltk/ccg/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/ccg/chart.py b/nlp_resource_data/nltk/ccg/chart.py

old mode 100755 (executable)

new mode 100644 (file)

index e2f04b1..ab4807c
--- a/nlp_resource_data/nltk/ccg/chart.py
+++ b/nlp_resource_data/nltk/ccg/chart.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Combinatory Categorial Grammar
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -29,22 +29,26 @@ which should print a nice representation of the derivation.
  This entire process is shown far more clearly in the demonstration:
  python chart.py
  """
-from __future__ import print_function, division, unicode_literals
  
  import itertools
  
-from six import string_types
-
  from nltk.parse import ParserI
  from nltk.parse.chart import AbstractChartRule, EdgeI, Chart
  from nltk.tree import Tree
  
  from nltk.ccg.lexicon import fromstring, Token
-from nltk.ccg.combinator import (ForwardT, BackwardT, ForwardApplication,
-                                 BackwardApplication, ForwardComposition,
-                                 BackwardComposition, ForwardSubstitution,
-                                 BackwardBx, BackwardSx)
-from nltk.compat import python_2_unicode_compatible
+from nltk.ccg.combinator import (
+    ForwardT,
+    BackwardT,
+    ForwardApplication,
+    BackwardApplication,
+    ForwardComposition,
+    BackwardComposition,
+    ForwardSubstitution,
+    BackwardBx,
+    BackwardSx,
+)
+
  from nltk.ccg.combinator import *
  from nltk.ccg.logic import *
  from nltk.sem.logic import *
@@ -60,24 +64,48 @@ class CCGEdge(EdgeI):
          self._comparison_key = (span, categ, rule)
  
      # Accessors
-    def lhs(self): return self._categ
-    def span(self): return self._span
-    def start(self): return self._span[0]
-    def end(self): return self._span[1]
-    def length(self): return self._span[1] - self.span[0]
-    def rhs(self): return ()
-    def dot(self): return 0
-    def is_complete(self): return True
-    def is_incomplete(self): return False
-    def nextsym(self): return None
-
-    def categ(self): return self._categ
-    def rule(self): return self._rule
+    def lhs(self):
+        return self._categ
+
+    def span(self):
+        return self._span
+
+    def start(self):
+        return self._span[0]
+
+    def end(self):
+        return self._span[1]
+
+    def length(self):
+        return self._span[1] - self.span[0]
+
+    def rhs(self):
+        return ()
+
+    def dot(self):
+        return 0
+
+    def is_complete(self):
+        return True
+
+    def is_incomplete(self):
+        return False
+
+    def nextsym(self):
+        return None
+
+    def categ(self):
+        return self._categ
+
+    def rule(self):
+        return self._rule
+
  
  class CCGLeafEdge(EdgeI):
-    '''
+    """
      Class representing leaf edges in a CCG derivation.
-    '''
+    """
+
      def __init__(self, pos, token, leaf):
          self._pos = pos
          self._token = token
@@ -85,29 +113,55 @@ class CCGLeafEdge(EdgeI):
          self._comparison_key = (pos, token.categ(), leaf)
  
      # Accessors
-    def lhs(self): return self._token.categ()
-    def span(self): return (self._pos, self._pos+1)
-    def start(self): return self._pos
-    def end(self): return self._pos + 1
-    def length(self): return 1
-    def rhs(self): return self._leaf
-    def dot(self): return 0
-    def is_complete(self): return True
-    def is_incomplete(self): return False
-    def nextsym(self): return None
-
-    def token(self): return self._token
-    def categ(self): return self._token.categ()
-    def leaf(self): return self._leaf
-
-@python_2_unicode_compatible
+    def lhs(self):
+        return self._token.categ()
+
+    def span(self):
+        return (self._pos, self._pos + 1)
+
+    def start(self):
+        return self._pos
+
+    def end(self):
+        return self._pos + 1
+
+    def length(self):
+        return 1
+
+    def rhs(self):
+        return self._leaf
+
+    def dot(self):
+        return 0
+
+    def is_complete(self):
+        return True
+
+    def is_incomplete(self):
+        return False
+
+    def nextsym(self):
+        return None
+
+    def token(self):
+        return self._token
+
+    def categ(self):
+        return self._token.categ()
+
+    def leaf(self):
+        return self._leaf
+
+
  class BinaryCombinatorRule(AbstractChartRule):
-    '''
+    """
      Class implementing application of a binary combinator to a chart.
      Takes the directed combinator to apply.
-    '''
+    """
+
      NUMEDGES = 2
-    def __init__(self,combinator):
+
+    def __init__(self, combinator):
          self._combinator = combinator
  
      # Apply a combinator
@@ -118,55 +172,65 @@ class BinaryCombinatorRule(AbstractChartRule):
  
          # Check if the two edges are permitted to combine.
          # If so, generate the corresponding edge.
-        if self._combinator.can_combine(left_edge.categ(),right_edge.categ()):
+        if self._combinator.can_combine(left_edge.categ(), right_edge.categ()):
              for res in self._combinator.combine(left_edge.categ(), right_edge.categ()):
-                new_edge = CCGEdge(span=(left_edge.start(), right_edge.end()),categ=res,rule=self._combinator)
-                if chart.insert(new_edge,(left_edge,right_edge)):
+                new_edge = CCGEdge(
+                    span=(left_edge.start(), right_edge.end()),
+                    categ=res,
+                    rule=self._combinator,
+                )
+                if chart.insert(new_edge, (left_edge, right_edge)):
                      yield new_edge
  
      # The representation of the combinator (for printing derivations)
      def __str__(self):
          return "%s" % self._combinator
  
+
  # Type-raising must be handled slightly differently to the other rules, as the
  # resulting rules only span a single edge, rather than both edges.
-@python_2_unicode_compatible
+
+
  class ForwardTypeRaiseRule(AbstractChartRule):
-    '''
+    """
      Class for applying forward type raising
-    '''
+    """
+
      NUMEDGES = 2
  
      def __init__(self):
-       self._combinator = ForwardT
+        self._combinator = ForwardT
+
      def apply(self, chart, grammar, left_edge, right_edge):
          if not (left_edge.end() == right_edge.start()):
              return
  
          for res in self._combinator.combine(left_edge.categ(), right_edge.categ()):
-            new_edge = CCGEdge(span=left_edge.span(),categ=res,rule=self._combinator)
-            if chart.insert(new_edge,(left_edge,)):
+            new_edge = CCGEdge(span=left_edge.span(), categ=res, rule=self._combinator)
+            if chart.insert(new_edge, (left_edge,)):
                  yield new_edge
  
      def __str__(self):
          return "%s" % self._combinator
  
-@python_2_unicode_compatible
+
  class BackwardTypeRaiseRule(AbstractChartRule):
-    '''
+    """
      Class for applying backward type raising.
-    '''
+    """
+
      NUMEDGES = 2
  
      def __init__(self):
-       self._combinator = BackwardT
+        self._combinator = BackwardT
+
      def apply(self, chart, grammar, left_edge, right_edge):
          if not (left_edge.end() == right_edge.start()):
              return
  
          for res in self._combinator.combine(left_edge.categ(), right_edge.categ()):
-            new_edge = CCGEdge(span=right_edge.span(),categ=res,rule=self._combinator)
-            if chart.insert(new_edge,(right_edge,)):
+            new_edge = CCGEdge(span=right_edge.span(), categ=res, rule=self._combinator)
+            if chart.insert(new_edge, (right_edge,)):
                  yield new_edge
  
      def __str__(self):
@@ -174,24 +238,33 @@ class BackwardTypeRaiseRule(AbstractChartRule):
  
  
  # Common sets of combinators used for English derivations.
-ApplicationRuleSet = [BinaryCombinatorRule(ForwardApplication),
-                        BinaryCombinatorRule(BackwardApplication)]
-CompositionRuleSet = [BinaryCombinatorRule(ForwardComposition),
-                        BinaryCombinatorRule(BackwardComposition),
-                        BinaryCombinatorRule(BackwardBx)]
-SubstitutionRuleSet = [BinaryCombinatorRule(ForwardSubstitution),
-                        BinaryCombinatorRule(BackwardSx)]
+ApplicationRuleSet = [
+    BinaryCombinatorRule(ForwardApplication),
+    BinaryCombinatorRule(BackwardApplication),
+]
+CompositionRuleSet = [
+    BinaryCombinatorRule(ForwardComposition),
+    BinaryCombinatorRule(BackwardComposition),
+    BinaryCombinatorRule(BackwardBx),
+]
+SubstitutionRuleSet = [
+    BinaryCombinatorRule(ForwardSubstitution),
+    BinaryCombinatorRule(BackwardSx),
+]
  TypeRaiseRuleSet = [ForwardTypeRaiseRule(), BackwardTypeRaiseRule()]
  
  # The standard English rule set.
-DefaultRuleSet = ApplicationRuleSet + CompositionRuleSet + \
-                    SubstitutionRuleSet + TypeRaiseRuleSet
+DefaultRuleSet = (
+    ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet + TypeRaiseRuleSet
+)
+
  
  class CCGChartParser(ParserI):
-    '''
+    """
      Chart parser for CCGs.
      Based largely on the ChartParser class from NLTK.
-    '''
+    """
+
      def __init__(self, lexicon, rules, trace=0):
          self._lexicon = lexicon
          self._rules = rules
@@ -200,7 +273,7 @@ class CCGChartParser(ParserI):
      def lexicon(self):
          return self._lexicon
  
-   # Implements the CYK algorithm
+    # Implements the CYK algorithm
      def parse(self, tokens):
          tokens = list(tokens)
          chart = CCGChart(list(tokens))
@@ -212,28 +285,28 @@ class CCGChartParser(ParserI):
                  new_edge = CCGLeafEdge(index, token, chart.leaf(index))
                  chart.insert(new_edge, ())
  
-
          # Select a span for the new edges
-        for span in range(2,chart.num_leaves()+1):
-            for start in range(0,chart.num_leaves()-span+1):
+        for span in range(2, chart.num_leaves() + 1):
+            for start in range(0, chart.num_leaves() - span + 1):
                  # Try all possible pairs of edges that could generate
                  # an edge for that span
-                for part in range(1,span):
+                for part in range(1, span):
                      lstart = start
                      mid = start + part
                      rend = start + span
  
-                    for left in chart.select(span=(lstart,mid)):
-                        for right in chart.select(span=(mid,rend)):
+                    for left in chart.select(span=(lstart, mid)):
+                        for right in chart.select(span=(mid, rend)):
                              # Generate all possible combinations of the two edges
                              for rule in self._rules:
                                  edges_added_by_rule = 0
-                                for newedge in rule.apply(chart,lex,left,right):
+                                for newedge in rule.apply(chart, lex, left, right):
                                      edges_added_by_rule += 1
  
          # Output the resulting parses
          return chart.parses(lex.start())
  
+
  class CCGChart(Chart):
      def __init__(self, tokens):
          Chart.__init__(self, tokens)
@@ -247,7 +320,7 @@ class CCGChart(Chart):
          if edge in memo:
              return memo[edge]
  
-        if isinstance(edge,CCGLeafEdge):
+        if isinstance(edge, CCGLeafEdge):
              word = tree_class(edge.token(), [self._tokens[edge.start()]])
              leaf = tree_class((edge.token(), "Leaf"), [word])
              memo[edge] = [leaf]
@@ -257,10 +330,16 @@ class CCGChart(Chart):
          trees = []
  
          for cpl in self.child_pointer_lists(edge):
-            child_choices = [self._trees(cp, complete, memo, tree_class)
-                             for cp in cpl]
+            child_choices = [self._trees(cp, complete, memo, tree_class) for cp in cpl]
              for children in itertools.product(*child_choices):
-                lhs = (Token(self._tokens[edge.start():edge.end()], edge.lhs(), compute_semantics(children, edge)), str(edge.rule()))
+                lhs = (
+                    Token(
+                        self._tokens[edge.start() : edge.end()],
+                        edge.lhs(),
+                        compute_semantics(children, edge),
+                    ),
+                    str(edge.rule()),
+                )
                  trees.append(tree_class(lhs, children))
  
          memo[edge] = trees
@@ -271,9 +350,9 @@ def compute_semantics(children, edge):
      if children[0].label()[0].semantics() is None:
          return None
  
-    if len(children) is 2:
+    if len(children) == 2:
          if isinstance(edge.rule(), BackwardCombinator):
-            children = [children[1],children[0]]
+            children = [children[1], children[0]]
  
          combinator = edge.rule()._combinator
          function = children[0].label()[0].semantics()
@@ -286,18 +365,19 @@ def compute_semantics(children, edge):
          elif isinstance(combinator, UndirectedSubstitution):
              return compute_substitution_semantics(function, argument)
          else:
-            raise AssertionError('Unsupported combinator \'' + combinator + '\'')
+            raise AssertionError("Unsupported combinator '" + combinator + "'")
      else:
          return compute_type_raised_semantics(children[0].label()[0].semantics())
  
-#--------
+
+# --------
  # Displaying derivations
-#--------
+# --------
  def printCCGDerivation(tree):
      # Get the leaves and initial categories
      leafcats = tree.pos()
-    leafstr = ''
-    catstr = ''
+    leafstr = ""
+    catstr = ""
  
      # Construct a string with both the leaf word and corresponding
      # category aligned.
@@ -306,18 +386,19 @@ def printCCGDerivation(tree):
          nextlen = 2 + max(len(leaf), len(str_cat))
          lcatlen = (nextlen - len(str_cat)) // 2
          rcatlen = lcatlen + (nextlen - len(str_cat)) % 2
-        catstr += ' '*lcatlen + str_cat + ' '*rcatlen
+        catstr += " " * lcatlen + str_cat + " " * rcatlen
          lleaflen = (nextlen - len(leaf)) // 2
          rleaflen = lleaflen + (nextlen - len(leaf)) % 2
-        leafstr += ' '*lleaflen + leaf + ' '*rleaflen
+        leafstr += " " * lleaflen + leaf + " " * rleaflen
      print(leafstr.rstrip())
      print(catstr.rstrip())
  
      # Display the derivation steps
-    printCCGTree(0,tree)
+    printCCGTree(0, tree)
+
  
  # Prints the sequence of derivation steps.
-def printCCGTree(lwidth,tree):
+def printCCGTree(lwidth, tree):
      rwidth = lwidth
  
      # Is a leaf (word).
@@ -327,34 +408,37 @@ def printCCGTree(lwidth,tree):
  
      # Find the width of the current derivation step
      for child in tree:
-        rwidth = max(rwidth, printCCGTree(rwidth,child))
+        rwidth = max(rwidth, printCCGTree(rwidth, child))
  
      # Is a leaf node.
      # Don't print anything, but account for the space occupied.
      if not isinstance(tree.label(), tuple):
-        return max(rwidth,2 + lwidth + len("%s" % tree.label()),
-                  2 + lwidth + len(tree[0]))
+        return max(
+            rwidth, 2 + lwidth + len("%s" % tree.label()), 2 + lwidth + len(tree[0])
+        )
  
      (token, op) = tree.label()
  
-    if op == 'Leaf':
+    if op == "Leaf":
          return rwidth
  
      # Pad to the left with spaces, followed by a sequence of '-'
      # and the derivation rule.
-    print(lwidth*' ' + (rwidth-lwidth)*'-' + "%s" % op)
+    print(lwidth * " " + (rwidth - lwidth) * "-" + "%s" % op)
      # Print the resulting category on a new line.
      str_res = "%s" % (token.categ())
      if token.semantics() is not None:
          str_res += " {" + str(token.semantics()) + "}"
      respadlen = (rwidth - lwidth - len(str_res)) // 2 + lwidth
-    print(respadlen*' ' + str_res)
+    print(respadlen * " " + str_res)
      return rwidth
  
+
  ### Demonstration code
  
  # Construct the lexicon
-lex = fromstring('''
+lex = fromstring(
+    """
      :- S, NP, N, VP    # Primitive categories, S is the target primitive
  
      Det :: NP/N         # Family of words
@@ -383,12 +467,15 @@ lex = fromstring('''
      mushrooms => N
      parsnips => N
      bacon => N
-    ''')
+    """
+)
+
  
  def demo():
      parser = CCGChartParser(lex, DefaultRuleSet)
      for parse in parser.parse("I might cook and eat the bacon".split()):
          printCCGDerivation(parse)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/ccg/chart.pyc b/nlp_resource_data/nltk/ccg/chart.pyc

deleted file mode 100755 (executable)

index e1df2b6..0000000

Binary files a/nlp_resource_data/nltk/ccg/chart.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/ccg/combinator.py b/nlp_resource_data/nltk/ccg/combinator.py

old mode 100755 (executable)

new mode 100644 (file)

index 1fecd5c..60bb149
--- a/nlp_resource_data/nltk/ccg/combinator.py
+++ b/nlp_resource_data/nltk/ccg/combinator.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Combinatory Categorial Grammar
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -8,16 +8,12 @@
  CCG Combinators
  """
  
-from __future__ import unicode_literals
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
-from nltk.compat import python_2_unicode_compatible
  from nltk.ccg.api import FunctionalCategory
  
  
-@add_metaclass(ABCMeta)
-class UndirectedBinaryCombinator(object):
+class UndirectedBinaryCombinator(metaclass=ABCMeta):
      """
      Abstract class for representing a binary combinator.
      Merely defines functions for checking if the function and argument
@@ -28,6 +24,7 @@ class UndirectedBinaryCombinator(object):
      of the combinators; these restrictions must be added in the rule
      class.
      """
+
      @abstractmethod
      def can_combine(self, function, argument):
          pass
@@ -37,14 +34,14 @@ class UndirectedBinaryCombinator(object):
          pass
  
  
-@add_metaclass(ABCMeta)
-class DirectedBinaryCombinator(object):
+class DirectedBinaryCombinator(metaclass=ABCMeta):
      """
      Wrapper for the undirected binary combinator.
      It takes left and right categories, and decides which is to be
      the function, and which the argument.
      It then decides whether or not they can be combined.
      """
+
      @abstractmethod
      def can_combine(self, left, right):
          pass
@@ -54,7 +51,6 @@ class DirectedBinaryCombinator(object):
          pass
  
  
-@python_2_unicode_compatible
  class ForwardCombinator(DirectedBinaryCombinator):
      """
      Class representing combinators where the primary functor is on the left.
@@ -62,14 +58,16 @@ class ForwardCombinator(DirectedBinaryCombinator):
      Takes an undirected combinator, and a predicate which adds constraints
      restricting the cases in which it may apply.
      """
-    def __init__(self, combinator, predicate, suffix=''):
+
+    def __init__(self, combinator, predicate, suffix=""):
          self._combinator = combinator
          self._predicate = predicate
          self._suffix = suffix
  
      def can_combine(self, left, right):
-        return (self._combinator.can_combine(left, right) and
-                self._predicate(left, right))
+        return self._combinator.can_combine(left, right) and self._predicate(
+            left, right
+        )
  
      def combine(self, left, right):
          for cat in self._combinator.combine(left, right):
@@ -79,19 +77,20 @@ class ForwardCombinator(DirectedBinaryCombinator):
          return ">%s%s" % (self._combinator, self._suffix)
  
  
-@python_2_unicode_compatible
  class BackwardCombinator(DirectedBinaryCombinator):
      """
      The backward equivalent of the ForwardCombinator class.
      """
-    def __init__(self, combinator, predicate, suffix=''):
+
+    def __init__(self, combinator, predicate, suffix=""):
          self._combinator = combinator
          self._predicate = predicate
          self._suffix = suffix
  
      def can_combine(self, left, right):
-        return (self._combinator.can_combine(right, left) and
-                self._predicate(left, right))
+        return self._combinator.can_combine(right, left) and self._predicate(
+            left, right
+        )
  
      def combine(self, left, right):
          for cat in self._combinator.combine(right, left):
@@ -101,7 +100,6 @@ class BackwardCombinator(DirectedBinaryCombinator):
          return "<%s%s" % (self._combinator, self._suffix)
  
  
-@python_2_unicode_compatible
  class UndirectedFunctionApplication(UndirectedBinaryCombinator):
      """
      Class representing function application.
@@ -127,7 +125,7 @@ class UndirectedFunctionApplication(UndirectedBinaryCombinator):
          yield function.res().substitute(subs)
  
      def __str__(self):
-        return ''
+        return ""
  
  
  # Predicates for function application.
@@ -143,13 +141,10 @@ def backwardOnly(left, right):
  
  
  # Application combinator instances
-ForwardApplication = ForwardCombinator(UndirectedFunctionApplication(),
-                                       forwardOnly)
-BackwardApplication = BackwardCombinator(UndirectedFunctionApplication(),
-                                         backwardOnly)
+ForwardApplication = ForwardCombinator(UndirectedFunctionApplication(), forwardOnly)
+BackwardApplication = BackwardCombinator(UndirectedFunctionApplication(), backwardOnly)
  
  
-@python_2_unicode_compatible
  class UndirectedComposition(UndirectedBinaryCombinator):
      """
      Functional composition (harmonic) combinator.
@@ -157,6 +152,7 @@ class UndirectedComposition(UndirectedBinaryCombinator):
      X/Y Y/Z -> X/Z (B>)
      And the corresponding backwards and crossed variations.
      """
+
      def can_combine(self, function, argument):
          # Can only combine two functions, and both functions must
          # allow composition.
@@ -172,12 +168,14 @@ class UndirectedComposition(UndirectedBinaryCombinator):
          if function.dir().can_compose() and argument.dir().can_compose():
              subs = function.arg().can_unify(argument.res())
              if subs is not None:
-                yield FunctionalCategory(function.res().substitute(subs),
-                                         argument.arg().substitute(subs),
-                                         argument.dir())
+                yield FunctionalCategory(
+                    function.res().substitute(subs),
+                    argument.arg().substitute(subs),
+                    argument.dir(),
+                )
  
      def __str__(self):
-        return 'B'
+        return "B"
  
  
  # Predicates for restricting application of straight composition.
@@ -206,17 +204,15 @@ def backwardBxConstraint(left, right):
  
  
  # Straight composition combinators
-ForwardComposition = ForwardCombinator(UndirectedComposition(),
-                                       forwardOnly)
-BackwardComposition = BackwardCombinator(UndirectedComposition(),
-                                         backwardOnly)
+ForwardComposition = ForwardCombinator(UndirectedComposition(), forwardOnly)
+BackwardComposition = BackwardCombinator(UndirectedComposition(), backwardOnly)
  
  # Backward crossed composition
-BackwardBx = BackwardCombinator(UndirectedComposition(), backwardBxConstraint,
-                                suffix='x')
+BackwardBx = BackwardCombinator(
+    UndirectedComposition(), backwardBxConstraint, suffix="x"
+)
  
  
-@python_2_unicode_compatible
  class UndirectedSubstitution(UndirectedBinaryCombinator):
      """
      Substitution (permutation) combinator.
@@ -224,6 +220,7 @@ class UndirectedSubstitution(UndirectedBinaryCombinator):
      Y/Z (X\Y)/Z -> X/Z (<Sx)
      And other variations.
      """
+
      def can_combine(self, function, argument):
          if function.is_primitive() or argument.is_primitive():
              return False
@@ -237,15 +234,18 @@ class UndirectedSubstitution(UndirectedBinaryCombinator):
  
          if not (function.dir().can_compose() and argument.dir().can_compose()):
              return False
-        return (function.res().arg() == argument.res()) and (function.arg() == argument.arg())
+        return (function.res().arg() == argument.res()) and (
+            function.arg() == argument.arg()
+        )
  
      def combine(self, function, argument):
          if self.can_combine(function, argument):
-            yield FunctionalCategory(function.res().res(), argument.arg(),
-                                     argument.dir())
+            yield FunctionalCategory(
+                function.res().res(), argument.arg(), argument.dir()
+            )
  
      def __str__(self):
-        return 'S'
+        return "S"
  
  
  # Predicate for forward substitution
@@ -265,10 +265,8 @@ def backwardSxConstraint(left, right):
  
  
  # Instances of substitution combinators
-ForwardSubstitution = ForwardCombinator(UndirectedSubstitution(),
-                                        forwardSConstraint)
-BackwardSx = BackwardCombinator(UndirectedSubstitution(),
-                                backwardSxConstraint, 'x')
+ForwardSubstitution = ForwardCombinator(UndirectedSubstitution(), forwardSConstraint)
+BackwardSx = BackwardCombinator(UndirectedSubstitution(), backwardSxConstraint, "x")
  
  
  # Retrieves the left-most functional category.
@@ -279,11 +277,11 @@ def innermostFunction(categ):
      return categ
  
  
-@python_2_unicode_compatible
  class UndirectedTypeRaise(UndirectedBinaryCombinator):
      """
      Undirected combinator for type raising.
      """
+
      def can_combine(self, function, arg):
          # The argument must be a function.
          # The restriction that arg.res() must be a function
@@ -304,8 +302,9 @@ class UndirectedTypeRaise(UndirectedBinaryCombinator):
          return False
  
      def combine(self, function, arg):
-        if not (function.is_primitive() and
-                arg.is_function() and arg.res().is_function()):
+        if not (
+            function.is_primitive() and arg.is_function() and arg.res().is_function()
+        ):
              return
  
          # Type-raising matches only the innermost application.
@@ -314,13 +313,12 @@ class UndirectedTypeRaise(UndirectedBinaryCombinator):
          subs = function.can_unify(arg.arg())
          if subs is not None:
              xcat = arg.res().substitute(subs)
-            yield FunctionalCategory(xcat,
-                                     FunctionalCategory(xcat, function,
-                                                        arg.dir()),
-                                     -(arg.dir()))
+            yield FunctionalCategory(
+                xcat, FunctionalCategory(xcat, function, arg.dir()), -(arg.dir())
+            )
  
      def __str__(self):
-        return 'T'
+        return "T"
  
  
  # Predicates for type-raising
diff --git a/nlp_resource_data/nltk/ccg/combinator.pyc b/nlp_resource_data/nltk/ccg/combinator.pyc

deleted file mode 100755 (executable)

index 0f5e6d5..0000000

Binary files a/nlp_resource_data/nltk/ccg/combinator.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/ccg/lexicon.py b/nlp_resource_data/nltk/ccg/lexicon.py

old mode 100755 (executable)

new mode 100644 (file)

index 699dd87..628eb8a
--- a/nlp_resource_data/nltk/ccg/lexicon.py
+++ b/nlp_resource_data/nltk/ccg/lexicon.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Combinatory Categorial Grammar
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Graeme Gange <ggange@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -8,42 +8,40 @@
  CCG Lexicons
  """
  
-from __future__ import unicode_literals
-
  import re
  from collections import defaultdict
  
  from nltk.ccg.api import PrimitiveCategory, Direction, CCGVar, FunctionalCategory
-from nltk.compat import python_2_unicode_compatible
  from nltk.internals import deprecated
  
-from nltk.sem.logic import *
+from nltk.sem.logic import Expression
  
-#------------
+# ------------
  # Regular expressions used for parsing components of the lexicon
-#------------
+# ------------
  
  # Parses a primitive category and subscripts
-PRIM_RE = re.compile(r'''([A-Za-z]+)(\[[A-Za-z,]+\])?''')
+PRIM_RE = re.compile(r"""([A-Za-z]+)(\[[A-Za-z,]+\])?""")
  
  # Separates the next primitive category from the remainder of the
  # string
-NEXTPRIM_RE = re.compile(r'''([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)''')
+NEXTPRIM_RE = re.compile(r"""([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)""")
  
  # Separates the next application operator from the remainder
-APP_RE = re.compile(r'''([\\/])([.,]?)([.,]?)(.*)''')
+APP_RE = re.compile(r"""([\\/])([.,]?)([.,]?)(.*)""")
  
  # Parses the definition of the right-hand side (rhs) of either a word or a family
-LEX_RE = re.compile(r'''([\S_]+)\s*(::|[-=]+>)\s*(.+)''', re.UNICODE)
+LEX_RE = re.compile(r"""([\S_]+)\s*(::|[-=]+>)\s*(.+)""", re.UNICODE)
  
  # Parses the right hand side that contains category and maybe semantic predicate
-RHS_RE = re.compile(r'''([^{}]*[^ {}])\s*(\{[^}]+\})?''', re.UNICODE)
+RHS_RE = re.compile(r"""([^{}]*[^ {}])\s*(\{[^}]+\})?""", re.UNICODE)
  
  # Parses the semantic predicate
-SEMANTICS_RE = re.compile(r'''\{([^}]+)\}''', re.UNICODE)
+SEMANTICS_RE = re.compile(r"""\{([^}]+)\}""", re.UNICODE)
  
  # Strips comments from a line
-COMMENTS_RE = re.compile('''([^#]*)(?:#.*)?''')
+COMMENTS_RE = re.compile("""([^#]*)(?:#.*)?""")
+
  
  class Token(object):
      """
@@ -56,29 +54,30 @@ class Token(object):
      * `categ` (string)
      * `semantics` (Expression)
      """
+
      def __init__(self, token, categ, semantics=None):
          self._token = token
          self._categ = categ
          self._semantics = semantics
-        
+
      def categ(self):
          return self._categ
-    
+
      def semantics(self):
          return self._semantics
-        
+
      def __str__(self):
          semantics_str = ""
          if self._semantics is not None:
              semantics_str = " {" + str(self._semantics) + "}"
          return "" + str(self._categ) + semantics_str
-    
+
      def __cmp__(self, other):
-        if not isinstance(other, Token): return -1
-        return cmp((self._categ,self._semantics),
-                    other.categ(),other.semantics())
+        if not isinstance(other, Token):
+            return -1
+        return cmp((self._categ, self._semantics), other.categ(), other.semantics())
+
  
-@python_2_unicode_compatible
  class CCGLexicon(object):
      """
      Class representing a lexicon for CCG grammars.
@@ -87,20 +86,19 @@ class CCGLexicon(object):
      * `families`: Families of categories
      * `entries`: A mapping of words to possible categories
      """
+
      def __init__(self, start, primitives, families, entries):
          self._start = PrimitiveCategory(start)
          self._primitives = primitives
          self._families = families
          self._entries = entries
  
-
      def categories(self, word):
          """
          Returns all the possible categories for a word
          """
          return self._entries[word]
  
-
      def start(self):
          """
          Return the target category for the parser
@@ -128,9 +126,9 @@ class CCGLexicon(object):
          return string
  
  
-#-----------
+# -----------
  # Parsing lexicons
-#-----------
+# -----------
  
  
  def matchBrackets(string):
@@ -141,16 +139,16 @@ def matchBrackets(string):
      rest = string[1:]
      inside = "("
  
-    while rest != "" and not rest.startswith(')'):
-        if rest.startswith('('):
+    while rest != "" and not rest.startswith(")"):
+        if rest.startswith("("):
              (part, rest) = matchBrackets(rest)
              inside = inside + part
          else:
              inside = inside + rest[0]
              rest = rest[1:]
-    if rest.startswith(')'):
-        return (inside + ')', rest[1:])
-    raise AssertionError('Unmatched bracket in string \'' + string + '\'')
+    if rest.startswith(")"):
+        return (inside + ")", rest[1:])
+    raise AssertionError("Unmatched bracket in string '" + string + "'")
  
  
  def nextCategory(string):
@@ -158,10 +156,11 @@ def nextCategory(string):
      Separate the string for the next portion of the category from the rest
      of the string
      """
-    if string.startswith('('):
+    if string.startswith("("):
          return matchBrackets(string)
      return NEXTPRIM_RE.match(string).groups()
  
+
  def parseApplication(app):
      """
      Parse an application operator
@@ -174,7 +173,7 @@ def parseSubscripts(subscr):
      Parse the subscripts for a primitive category
      """
      if subscr:
-        return subscr[1:-1].split(',')
+        return subscr[1:-1].split(",")
      return []
  
  
@@ -203,7 +202,9 @@ def parsePrimitiveCategory(chunks, primitives, families, var):
      if catstr in primitives:
          subscrs = parseSubscripts(chunks[1])
          return (PrimitiveCategory(catstr, subscrs), var)
-    raise AssertionError('String \'' + catstr + '\' is neither a family nor primitive category.')
+    raise AssertionError(
+        "String '" + catstr + "' is neither a family nor primitive category."
+    )
  
  
  def augParseCategory(line, primitives, families, var=None):
@@ -213,14 +214,13 @@ def augParseCategory(line, primitives, families, var=None):
      """
      (cat_string, rest) = nextCategory(line)
  
-    if cat_string.startswith('('):
+    if cat_string.startswith("("):
          (res, var) = augParseCategory(cat_string[1:-1], primitives, families, var)
  
      else:
-#        print rePrim.match(str).groups()
-        (res, var) =\
-            parsePrimitiveCategory(PRIM_RE.match(cat_string).groups(), primitives,
-                                   families, var)
+        (res, var) = parsePrimitiveCategory(
+            PRIM_RE.match(cat_string).groups(), primitives, families, var
+        )
  
      while rest != "":
          app = APP_RE.match(rest).groups()
@@ -228,16 +228,17 @@ def augParseCategory(line, primitives, families, var=None):
          rest = app[3]
  
          (cat_string, rest) = nextCategory(rest)
-        if cat_string.startswith('('):
+        if cat_string.startswith("("):
              (arg, var) = augParseCategory(cat_string[1:-1], primitives, families, var)
          else:
-            (arg, var) =\
-                parsePrimitiveCategory(PRIM_RE.match(cat_string).groups(),
-                                       primitives, families, var)
+            (arg, var) = parsePrimitiveCategory(
+                PRIM_RE.match(cat_string).groups(), primitives, families, var
+            )
          res = FunctionalCategory(res, arg, direction)
  
      return (res, var)
  
+
  def fromstring(lex_str, include_semantics=False):
      """
      Convert string representation into a lexicon for CCGs.
@@ -252,18 +253,20 @@ def fromstring(lex_str, include_semantics=False):
          if line == "":
              continue
  
-        if line.startswith(':-'):
+        if line.startswith(":-"):
              # A line of primitive categories.
              # The first one is the target category
              # ie, :- S, N, NP, VP
-            primitives = primitives + [prim.strip() for prim in line[2:].strip().split(',')]
+            primitives = primitives + [
+                prim.strip() for prim in line[2:].strip().split(",")
+            ]
          else:
              # Either a family definition, or a word definition
              (ident, sep, rhs) = LEX_RE.match(line).groups()
              (catstr, semantics_str) = RHS_RE.match(rhs).groups()
              (cat, var) = augParseCategory(catstr, primitives, families)
  
-            if sep == '::':
+            if sep == "::":
                  # Family definition
                  # ie, Det :: NP/N
                  families[ident] = (cat, var)
@@ -271,20 +274,27 @@ def fromstring(lex_str, include_semantics=False):
                  semantics = None
                  if include_semantics is True:
                      if semantics_str is None:
-                        raise AssertionError(line + " must contain semantics because include_semantics is set to True")
+                        raise AssertionError(
+                            line
+                            + " must contain semantics because include_semantics is set to True"
+                        )
                      else:
-                        semantics = Expression.fromstring(SEMANTICS_RE.match(semantics_str).groups()[0])
+                        semantics = Expression.fromstring(
+                            SEMANTICS_RE.match(semantics_str).groups()[0]
+                        )
                  # Word definition
                  # ie, which => (N\N)/(S/NP)
                  entries[ident].append(Token(ident, cat, semantics))
      return CCGLexicon(primitives[0], primitives, families, entries)
  
  
-@deprecated('Use fromstring() instead.')
+@deprecated("Use fromstring() instead.")
  def parseLexicon(lex_str):
      return fromstring(lex_str)
  
-openccg_tinytiny = fromstring("""
+
+openccg_tinytiny = fromstring(
+    """
      # Rather minimal lexicon based on the openccg `tinytiny' grammar.
      # Only incorporates a subset of the morphological subcategories, however.
      :- S,NP,N                    # Primitive categories
@@ -325,4 +335,5 @@ openccg_tinytiny = fromstring("""
  
      see => TransVpl
      sees => TransVsg
-    """)
+    """
+)
diff --git a/nlp_resource_data/nltk/ccg/lexicon.pyc b/nlp_resource_data/nltk/ccg/lexicon.pyc

deleted file mode 100755 (executable)

index fd53711..0000000

Binary files a/nlp_resource_data/nltk/ccg/lexicon.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/ccg/logic.py b/nlp_resource_data/nltk/ccg/logic.py

old mode 100755 (executable)

new mode 100644 (file)

index 39d2ba2..37b87f3
--- a/nlp_resource_data/nltk/ccg/logic.py
+++ b/nlp_resource_data/nltk/ccg/logic.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Combinatory Categorial Grammar
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tanin Na Nakorn (@tanin)
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -10,37 +10,51 @@ Helper functions for CCG semantics computation
  
  from nltk.sem.logic import *
  
+
  def compute_type_raised_semantics(semantics):
      core = semantics
      parent = None
      while isinstance(core, LambdaExpression):
          parent = core
          core = core.term
-        
+
      var = Variable("F")
      while var in core.free():
          var = unique_variable(pattern=var)
      core = ApplicationExpression(FunctionVariableExpression(var), core)
-    
+
      if parent is not None:
          parent.term = core
      else:
          semantics = core
-    
+
      return LambdaExpression(var, semantics)
  
+
  def compute_function_semantics(function, argument):
      return ApplicationExpression(function, argument).simplify()
  
+
  def compute_composition_semantics(function, argument):
-    assert isinstance(argument, LambdaExpression), "`" + str(argument) + "` must be a lambda expression"
-    return LambdaExpression(argument.variable, ApplicationExpression(function, argument.term).simplify())
+    assert isinstance(argument, LambdaExpression), (
+        "`" + str(argument) + "` must be a lambda expression"
+    )
+    return LambdaExpression(
+        argument.variable, ApplicationExpression(function, argument.term).simplify()
+    )
+
  
  def compute_substitution_semantics(function, argument):
-    assert isinstance(function, LambdaExpression) and isinstance(function.term, LambdaExpression), "`" + str(function) + "` must be a lambda expression with 2 arguments"
-    assert isinstance(argument, LambdaExpression), "`" + str(argument) + "` must be a lambda expression"
+    assert isinstance(function, LambdaExpression) and isinstance(
+        function.term, LambdaExpression
+    ), ("`" + str(function) + "` must be a lambda expression with 2 arguments")
+    assert isinstance(argument, LambdaExpression), (
+        "`" + str(argument) + "` must be a lambda expression"
+    )
  
-    new_argument = ApplicationExpression(argument, VariableExpression(function.variable)).simplify()
-    new_term = ApplicationExpression(function.term, new_argument).simplify() 
+    new_argument = ApplicationExpression(
+        argument, VariableExpression(function.variable)
+    ).simplify()
+    new_term = ApplicationExpression(function.term, new_argument).simplify()
  
      return LambdaExpression(function.variable, new_term)
diff --git a/nlp_resource_data/nltk/ccg/logic.pyc b/nlp_resource_data/nltk/ccg/logic.pyc

deleted file mode 100755 (executable)

index ad8e9d7..0000000

Binary files a/nlp_resource_data/nltk/ccg/logic.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/__init__.py b/nlp_resource_data/nltk/chat/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 574d770..d34def9
--- a/nlp_resource_data/nltk/chat/__init__.py
+++ b/nlp_resource_data/nltk/chat/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Chatbots
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -15,7 +15,6 @@ typed by users, and respond with automatically generated sentences.
  These chatbots may not work using the windows command line or the
  windows IDLE GUI.
  """
-from __future__ import print_function
  
  from nltk.chat.util import Chat
  from nltk.chat.eliza import eliza_chat
@@ -25,25 +24,28 @@ from nltk.chat.suntsu import suntsu_chat
  from nltk.chat.zen import zen_chat
  
  bots = [
-    (eliza_chat,  'Eliza (psycho-babble)'),
-    (iesha_chat,  'Iesha (teen anime junky)'),
-    (rude_chat,   'Rude (abusive bot)'),
-    (suntsu_chat, 'Suntsu (Chinese sayings)'),
-    (zen_chat,    'Zen (gems of wisdom)')]
+    (eliza_chat, "Eliza (psycho-babble)"),
+    (iesha_chat, "Iesha (teen anime junky)"),
+    (rude_chat, "Rude (abusive bot)"),
+    (suntsu_chat, "Suntsu (Chinese sayings)"),
+    (zen_chat, "Zen (gems of wisdom)"),
+]
+
  
  def chatbots():
      import sys
-    print('Which chatbot would you like to talk to?')
+
+    print("Which chatbot would you like to talk to?")
      botcount = len(bots)
      for i in range(botcount):
-        print('  %d: %s' % (i+1, bots[i][1]))
+        print("  %d: %s" % (i + 1, bots[i][1]))
      while True:
-        print('\nEnter a number in the range 1-%d: ' % botcount, end=' ')
+        print("\nEnter a number in the range 1-%d: " % botcount, end=" ")
          choice = sys.stdin.readline().strip()
          if choice.isdigit() and (int(choice) - 1) in range(botcount):
              break
          else:
-            print('   Error: bad chatbot number')
+            print("   Error: bad chatbot number")
  
-    chatbot = bots[int(choice)-1][0]
+    chatbot = bots[int(choice) - 1][0]
      chatbot()
diff --git a/nlp_resource_data/nltk/chat/__init__.pyc b/nlp_resource_data/nltk/chat/__init__.pyc

deleted file mode 100755 (executable)

index bb344a0..0000000

Binary files a/nlp_resource_data/nltk/chat/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2af6a37

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/eliza.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/eliza.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2847528

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/eliza.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/iesha.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/iesha.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9abe7ff

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/iesha.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/rude.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/rude.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ee564b2

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/rude.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/suntsu.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/suntsu.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1487e3f

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/suntsu.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..541a5eb

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/__pycache__/zen.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/zen.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2003d48

Binary files /dev/null and b/nlp_resource_data/nltk/chat/__pycache__/zen.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chat/eliza.py b/nlp_resource_data/nltk/chat/eliza.py

old mode 100755 (executable)

new mode 100644 (file)

index c550306..4731c8f
--- a/nlp_resource_data/nltk/chat/eliza.py
+++ b/nlp_resource_data/nltk/chat/eliza.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Eliza
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Steven Bird <stevenbird1@gmail.com>
  #          Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -12,7 +12,6 @@
  # a translation table used to convert things you say into things the
  # computer says back, e.g. "I am" --> "you are"
  
-from __future__ import print_function
  from nltk.chat.util import Chat, reflections
  
  # a table of response pairs, where each pair consists of a
@@ -20,225 +19,319 @@ from nltk.chat.util import Chat, reflections
  # with group-macros labelled as %1, %2.
  
  pairs = (
-  (r'I need (.*)',
-  ( "Why do you need %1?",
-    "Would it really help you to get %1?",
-    "Are you sure you need %1?")),
-
-  (r'Why don\'t you (.*)',
-  ( "Do you really think I don't %1?",
-    "Perhaps eventually I will %1.",
-    "Do you really want me to %1?")),
-
-  (r'Why can\'t I (.*)',
-  ( "Do you think you should be able to %1?",
-    "If you could %1, what would you do?",
-    "I don't know -- why can't you %1?",
-    "Have you really tried?")),
-
-  (r'I can\'t (.*)',
-  ( "How do you know you can't %1?",
-    "Perhaps you could %1 if you tried.",
-    "What would it take for you to %1?")),
-
-  (r'I am (.*)',
-  ( "Did you come to me because you are %1?",
-    "How long have you been %1?",
-    "How do you feel about being %1?")),
-
-  (r'I\'m (.*)',
-  ( "How does being %1 make you feel?",
-    "Do you enjoy being %1?",
-    "Why do you tell me you're %1?",
-    "Why do you think you're %1?")),
-
-  (r'Are you (.*)',
-  ( "Why does it matter whether I am %1?",
-    "Would you prefer it if I were not %1?",
-    "Perhaps you believe I am %1.",
-    "I may be %1 -- what do you think?")),
-
-  (r'What (.*)',
-  ( "Why do you ask?",
-    "How would an answer to that help you?",
-    "What do you think?")),
-
-  (r'How (.*)',
-  ( "How do you suppose?",
-    "Perhaps you can answer your own question.",
-    "What is it you're really asking?")),
-
-  (r'Because (.*)',
-  ( "Is that the real reason?",
-    "What other reasons come to mind?",
-    "Does that reason apply to anything else?",
-    "If %1, what else must be true?")),
-
-  (r'(.*) sorry (.*)',
-  ( "There are many times when no apology is needed.",
-    "What feelings do you have when you apologize?")),
-
-  (r'Hello(.*)',
-  ( "Hello... I'm glad you could drop by today.",
-    "Hi there... how are you today?",
-    "Hello, how are you feeling today?")),
-
-  (r'I think (.*)',
-  ( "Do you doubt %1?",
-    "Do you really think so?",
-    "But you're not sure %1?")),
-
-  (r'(.*) friend (.*)',
-  ( "Tell me more about your friends.",
-    "When you think of a friend, what comes to mind?",
-    "Why don't you tell me about a childhood friend?")),
-
-  (r'Yes',
-  ( "You seem quite sure.",
-    "OK, but can you elaborate a bit?")),
-
-  (r'(.*) computer(.*)',
-  ( "Are you really talking about me?",
-    "Does it seem strange to talk to a computer?",
-    "How do computers make you feel?",
-    "Do you feel threatened by computers?")),
-
-  (r'Is it (.*)',
-  ( "Do you think it is %1?",
-    "Perhaps it's %1 -- what do you think?",
-    "If it were %1, what would you do?",
-    "It could well be that %1.")),
-
-  (r'It is (.*)',
-  ( "You seem very certain.",
-    "If I told you that it probably isn't %1, what would you feel?")),
-
-  (r'Can you (.*)',
-  ( "What makes you think I can't %1?",
-    "If I could %1, then what?",
-    "Why do you ask if I can %1?")),
-
-  (r'Can I (.*)',
-  ( "Perhaps you don't want to %1.",
-    "Do you want to be able to %1?",
-    "If you could %1, would you?")),
-
-  (r'You are (.*)',
-  ( "Why do you think I am %1?",
-    "Does it please you to think that I'm %1?",
-    "Perhaps you would like me to be %1.",
-    "Perhaps you're really talking about yourself?")),
-
-  (r'You\'re (.*)',
-  ( "Why do you say I am %1?",
-    "Why do you think I am %1?",
-    "Are we talking about you, or me?")),
-
-  (r'I don\'t (.*)',
-  ( "Don't you really %1?",
-    "Why don't you %1?",
-    "Do you want to %1?")),
-
-  (r'I feel (.*)',
-  ( "Good, tell me more about these feelings.",
-    "Do you often feel %1?",
-    "When do you usually feel %1?",
-    "When you feel %1, what do you do?")),
-
-  (r'I have (.*)',
-  ( "Why do you tell me that you've %1?",
-    "Have you really %1?",
-    "Now that you have %1, what will you do next?")),
-
-  (r'I would (.*)',
-  ( "Could you explain why you would %1?",
-    "Why would you %1?",
-    "Who else knows that you would %1?")),
-
-  (r'Is there (.*)',
-  ( "Do you think there is %1?",
-    "It's likely that there is %1.",
-    "Would you like there to be %1?")),
-
-  (r'My (.*)',
-  ( "I see, your %1.",
-    "Why do you say that your %1?",
-    "When your %1, how do you feel?")),
-
-  (r'You (.*)',
-  ( "We should be discussing you, not me.",
-    "Why do you say that about me?",
-    "Why do you care whether I %1?")),
-
-  (r'Why (.*)',
-  ( "Why don't you tell me the reason why %1?",
-    "Why do you think %1?" )),
-
-  (r'I want (.*)',
-  ( "What would it mean to you if you got %1?",
-    "Why do you want %1?",
-    "What would you do if you got %1?",
-    "If you got %1, then what would you do?")),
-
-  (r'(.*) mother(.*)',
-  ( "Tell me more about your mother.",
-    "What was your relationship with your mother like?",
-    "How do you feel about your mother?",
-    "How does this relate to your feelings today?",
-    "Good family relations are important.")),
-
-  (r'(.*) father(.*)',
-  ( "Tell me more about your father.",
-    "How did your father make you feel?",
-    "How do you feel about your father?",
-    "Does your relationship with your father relate to your feelings today?",
-    "Do you have trouble showing affection with your family?")),
-
-  (r'(.*) child(.*)',
-  ( "Did you have close friends as a child?",
-    "What is your favorite childhood memory?",
-    "Do you remember any dreams or nightmares from childhood?",
-    "Did the other children sometimes tease you?",
-    "How do you think your childhood experiences relate to your feelings today?")),
-
-  (r'(.*)\?',
-  ( "Why do you ask that?",
-    "Please consider whether you can answer your own question.",
-    "Perhaps the answer lies within yourself?",
-    "Why don't you tell me?")),
-
-  (r'quit',
-  ( "Thank you for talking with me.",
-    "Good-bye.",
-    "Thank you, that will be $150.  Have a good day!")),
-
-  (r'(.*)',
-  ( "Please tell me more.",
-    "Let's change focus a bit... Tell me about your family.",
-    "Can you elaborate on that?",
-    "Why do you say that %1?",
-    "I see.",
-    "Very interesting.",
-    "%1.",
-    "I see.  And what does that tell you?",
-    "How does that make you feel?",
-    "How do you feel when you say that?"))
+    (
+        r"I need (.*)",
+        (
+            "Why do you need %1?",
+            "Would it really help you to get %1?",
+            "Are you sure you need %1?",
+        ),
+    ),
+    (
+        r"Why don\'t you (.*)",
+        (
+            "Do you really think I don't %1?",
+            "Perhaps eventually I will %1.",
+            "Do you really want me to %1?",
+        ),
+    ),
+    (
+        r"Why can\'t I (.*)",
+        (
+            "Do you think you should be able to %1?",
+            "If you could %1, what would you do?",
+            "I don't know -- why can't you %1?",
+            "Have you really tried?",
+        ),
+    ),
+    (
+        r"I can\'t (.*)",
+        (
+            "How do you know you can't %1?",
+            "Perhaps you could %1 if you tried.",
+            "What would it take for you to %1?",
+        ),
+    ),
+    (
+        r"I am (.*)",
+        (
+            "Did you come to me because you are %1?",
+            "How long have you been %1?",
+            "How do you feel about being %1?",
+        ),
+    ),
+    (
+        r"I\'m (.*)",
+        (
+            "How does being %1 make you feel?",
+            "Do you enjoy being %1?",
+            "Why do you tell me you're %1?",
+            "Why do you think you're %1?",
+        ),
+    ),
+    (
+        r"Are you (.*)",
+        (
+            "Why does it matter whether I am %1?",
+            "Would you prefer it if I were not %1?",
+            "Perhaps you believe I am %1.",
+            "I may be %1 -- what do you think?",
+        ),
+    ),
+    (
+        r"What (.*)",
+        (
+            "Why do you ask?",
+            "How would an answer to that help you?",
+            "What do you think?",
+        ),
+    ),
+    (
+        r"How (.*)",
+        (
+            "How do you suppose?",
+            "Perhaps you can answer your own question.",
+            "What is it you're really asking?",
+        ),
+    ),
+    (
+        r"Because (.*)",
+        (
+            "Is that the real reason?",
+            "What other reasons come to mind?",
+            "Does that reason apply to anything else?",
+            "If %1, what else must be true?",
+        ),
+    ),
+    (
+        r"(.*) sorry (.*)",
+        (
+            "There are many times when no apology is needed.",
+            "What feelings do you have when you apologize?",
+        ),
+    ),
+    (
+        r"Hello(.*)",
+        (
+            "Hello... I'm glad you could drop by today.",
+            "Hi there... how are you today?",
+            "Hello, how are you feeling today?",
+        ),
+    ),
+    (
+        r"I think (.*)",
+        ("Do you doubt %1?", "Do you really think so?", "But you're not sure %1?"),
+    ),
+    (
+        r"(.*) friend (.*)",
+        (
+            "Tell me more about your friends.",
+            "When you think of a friend, what comes to mind?",
+            "Why don't you tell me about a childhood friend?",
+        ),
+    ),
+    (r"Yes", ("You seem quite sure.", "OK, but can you elaborate a bit?")),
+    (
+        r"(.*) computer(.*)",
+        (
+            "Are you really talking about me?",
+            "Does it seem strange to talk to a computer?",
+            "How do computers make you feel?",
+            "Do you feel threatened by computers?",
+        ),
+    ),
+    (
+        r"Is it (.*)",
+        (
+            "Do you think it is %1?",
+            "Perhaps it's %1 -- what do you think?",
+            "If it were %1, what would you do?",
+            "It could well be that %1.",
+        ),
+    ),
+    (
+        r"It is (.*)",
+        (
+            "You seem very certain.",
+            "If I told you that it probably isn't %1, what would you feel?",
+        ),
+    ),
+    (
+        r"Can you (.*)",
+        (
+            "What makes you think I can't %1?",
+            "If I could %1, then what?",
+            "Why do you ask if I can %1?",
+        ),
+    ),
+    (
+        r"Can I (.*)",
+        (
+            "Perhaps you don't want to %1.",
+            "Do you want to be able to %1?",
+            "If you could %1, would you?",
+        ),
+    ),
+    (
+        r"You are (.*)",
+        (
+            "Why do you think I am %1?",
+            "Does it please you to think that I'm %1?",
+            "Perhaps you would like me to be %1.",
+            "Perhaps you're really talking about yourself?",
+        ),
+    ),
+    (
+        r"You\'re (.*)",
+        (
+            "Why do you say I am %1?",
+            "Why do you think I am %1?",
+            "Are we talking about you, or me?",
+        ),
+    ),
+    (
+        r"I don\'t (.*)",
+        ("Don't you really %1?", "Why don't you %1?", "Do you want to %1?"),
+    ),
+    (
+        r"I feel (.*)",
+        (
+            "Good, tell me more about these feelings.",
+            "Do you often feel %1?",
+            "When do you usually feel %1?",
+            "When you feel %1, what do you do?",
+        ),
+    ),
+    (
+        r"I have (.*)",
+        (
+            "Why do you tell me that you've %1?",
+            "Have you really %1?",
+            "Now that you have %1, what will you do next?",
+        ),
+    ),
+    (
+        r"I would (.*)",
+        (
+            "Could you explain why you would %1?",
+            "Why would you %1?",
+            "Who else knows that you would %1?",
+        ),
+    ),
+    (
+        r"Is there (.*)",
+        (
+            "Do you think there is %1?",
+            "It's likely that there is %1.",
+            "Would you like there to be %1?",
+        ),
+    ),
+    (
+        r"My (.*)",
+        (
+            "I see, your %1.",
+            "Why do you say that your %1?",
+            "When your %1, how do you feel?",
+        ),
+    ),
+    (
+        r"You (.*)",
+        (
+            "We should be discussing you, not me.",
+            "Why do you say that about me?",
+            "Why do you care whether I %1?",
+        ),
+    ),
+    (r"Why (.*)", ("Why don't you tell me the reason why %1?", "Why do you think %1?")),
+    (
+        r"I want (.*)",
+        (
+            "What would it mean to you if you got %1?",
+            "Why do you want %1?",
+            "What would you do if you got %1?",
+            "If you got %1, then what would you do?",
+        ),
+    ),
+    (
+        r"(.*) mother(.*)",
+        (
+            "Tell me more about your mother.",
+            "What was your relationship with your mother like?",
+            "How do you feel about your mother?",
+            "How does this relate to your feelings today?",
+            "Good family relations are important.",
+        ),
+    ),
+    (
+        r"(.*) father(.*)",
+        (
+            "Tell me more about your father.",
+            "How did your father make you feel?",
+            "How do you feel about your father?",
+            "Does your relationship with your father relate to your feelings today?",
+            "Do you have trouble showing affection with your family?",
+        ),
+    ),
+    (
+        r"(.*) child(.*)",
+        (
+            "Did you have close friends as a child?",
+            "What is your favorite childhood memory?",
+            "Do you remember any dreams or nightmares from childhood?",
+            "Did the other children sometimes tease you?",
+            "How do you think your childhood experiences relate to your feelings today?",
+        ),
+    ),
+    (
+        r"(.*)\?",
+        (
+            "Why do you ask that?",
+            "Please consider whether you can answer your own question.",
+            "Perhaps the answer lies within yourself?",
+            "Why don't you tell me?",
+        ),
+    ),
+    (
+        r"quit",
+        (
+            "Thank you for talking with me.",
+            "Good-bye.",
+            "Thank you, that will be $150.  Have a good day!",
+        ),
+    ),
+    (
+        r"(.*)",
+        (
+            "Please tell me more.",
+            "Let's change focus a bit... Tell me about your family.",
+            "Can you elaborate on that?",
+            "Why do you say that %1?",
+            "I see.",
+            "Very interesting.",
+            "%1.",
+            "I see.  And what does that tell you?",
+            "How does that make you feel?",
+            "How do you feel when you say that?",
+        ),
+    ),
  )
  
  eliza_chatbot = Chat(pairs, reflections)
  
+
  def eliza_chat():
      print("Therapist\n---------")
      print("Talk to the program by typing in plain English, using normal upper-")
      print('and lower-case letters and punctuation.  Enter "quit" when done.')
-    print('='*72)
+    print("=" * 72)
      print("Hello.  How are you feeling today?")
  
      eliza_chatbot.converse()
  
+
  def demo():
      eliza_chat()
  
+
  if __name__ == "__main__":
      demo()
-
diff --git a/nlp_resource_data/nltk/chat/eliza.pyc b/nlp_resource_data/nltk/chat/eliza.pyc

deleted file mode 100755 (executable)

index 68f33c8..0000000

Binary files a/nlp_resource_data/nltk/chat/eliza.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/iesha.py b/nlp_resource_data/nltk/chat/iesha.py

old mode 100755 (executable)

new mode 100644 (file)

index 68d52be..55318af
--- a/nlp_resource_data/nltk/chat/iesha.py
+++ b/nlp_resource_data/nltk/chat/iesha.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Teen Chatbot
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Selina Dennis <sjmd@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -10,131 +10,151 @@ This chatbot is a tongue-in-cheek take on the average teen
  anime junky that frequents YahooMessenger or MSNM.
  All spelling mistakes and flawed grammar are intentional.
  """
-from __future__ import print_function
  
  from nltk.chat.util import Chat
  
  reflections = {
-    "am"     : "r",
-    "was"    : "were",
-    "i"      : "u",
-    "i'd"    : "u'd",
-    "i've"   : "u'v",
-    "ive"    : "u'v",
-    "i'll"   : "u'll",
-    "my"     : "ur",
-    "are"    : "am",
-    "you're" : "im",
-    "you've" : "ive",
-    "you'll" : "i'll",
-    "your"   : "my",
-    "yours"  : "mine",
-    "you"    : "me",
-    "u"      : "me",
-    "ur"     : "my",
-    "urs"    : "mine",
-    "me"     : "u"
+    "am": "r",
+    "was": "were",
+    "i": "u",
+    "i'd": "u'd",
+    "i've": "u'v",
+    "ive": "u'v",
+    "i'll": "u'll",
+    "my": "ur",
+    "are": "am",
+    "you're": "im",
+    "you've": "ive",
+    "you'll": "i'll",
+    "your": "my",
+    "yours": "mine",
+    "you": "me",
+    "u": "me",
+    "ur": "my",
+    "urs": "mine",
+    "me": "u",
  }
  
  # Note: %1/2/etc are used without spaces prior as the chat bot seems
  # to add a superfluous space when matching.
  
  pairs = (
-    (r'I\'m (.*)',
-    ( "ur%1?? that's so cool! kekekekeke ^_^ tell me more!",
-      "ur%1? neat!! kekeke >_<")),
-
-    (r'(.*) don\'t you (.*)',
-    ( "u think I can%2??! really?? kekeke \<_\<",
-      "what do u mean%2??!",
-      "i could if i wanted, don't you think!! kekeke")),
-
-    (r'ye[as] [iI] (.*)',
-    ( "u%1? cool!! how?",
-      "how come u%1??",
-      "u%1? so do i!!")),
-
-    (r'do (you|u) (.*)\??',
-    ( "do i%2? only on tuesdays! kekeke *_*",
-      "i dunno! do u%2??")),
-
-    (r'(.*)\?',
-    ( "man u ask lots of questions!",
-      "booooring! how old r u??",
-      "boooooring!! ur not very fun")),
-
-    (r'(cos|because) (.*)',
-    ( "hee! i don't believe u! >_<",
-      "nuh-uh! >_<",
-      "ooooh i agree!")),
-
-    (r'why can\'t [iI] (.*)',
-    ( "i dunno! y u askin me for!",
-      "try harder, silly! hee! ^_^",
-      "i dunno! but when i can't%1 i jump up and down!")),
-
-    (r'I can\'t (.*)',
-    ( "u can't what??! >_<",
-      "that's ok! i can't%1 either! kekekekeke ^_^",
-      "try harder, silly! hee! ^&^")),
-
-    (r'(.*) (like|love|watch) anime',
-    ( "omg i love anime!! do u like sailor moon??! ^&^",
-      "anime yay! anime rocks sooooo much!",
-      "oooh anime! i love anime more than anything!",
-      "anime is the bestest evar! evangelion is the best!",
-      "hee anime is the best! do you have ur fav??")),
-
-    (r'I (like|love|watch|play) (.*)',
-    ( "yay! %2 rocks!",
-      "yay! %2 is neat!",
-      "cool! do u like other stuff?? ^_^")),
-
-    (r'anime sucks|(.*) (hate|detest) anime',
-    ( "ur a liar! i'm not gonna talk to u nemore if u h8 anime *;*",
-      "no way! anime is the best ever!",
-      "nuh-uh, anime is the best!")),
-
-    (r'(are|r) (you|u) (.*)',
-    ( "am i%1??! how come u ask that!",
-      "maybe!  y shud i tell u?? kekeke >_>")),
-
-    (r'what (.*)',
-    ( "hee u think im gonna tell u? .v.",
-      "booooooooring! ask me somethin else!")),
-
-    (r'how (.*)',
-    ( "not tellin!! kekekekekeke ^_^",)),
-
-    (r'(hi|hello|hey) (.*)',
-    ( "hi!!! how r u!!",)),
-
-    (r'quit',
-    ( "mom says i have to go eat dinner now :,( bye!!",
-      "awww u have to go?? see u next time!!",
-      "how to see u again soon! ^_^")),
-
-    (r'(.*)',
-    ( "ur funny! kekeke",
-      "boooooring! talk about something else! tell me wat u like!",
-      "do u like anime??",
-      "do u watch anime? i like sailor moon! ^_^",
-      "i wish i was a kitty!! kekekeke ^_^"))
-    )
+    (
+        r"I\'m (.*)",
+        (
+            "ur%1?? that's so cool! kekekekeke ^_^ tell me more!",
+            "ur%1? neat!! kekeke >_<",
+        ),
+    ),
+    (
+        r"(.*) don\'t you (.*)",
+        (
+            "u think I can%2??! really?? kekeke \<_\<",
+            "what do u mean%2??!",
+            "i could if i wanted, don't you think!! kekeke",
+        ),
+    ),
+    (r"ye[as] [iI] (.*)", ("u%1? cool!! how?", "how come u%1??", "u%1? so do i!!")),
+    (
+        r"do (you|u) (.*)\??",
+        ("do i%2? only on tuesdays! kekeke *_*", "i dunno! do u%2??"),
+    ),
+    (
+        r"(.*)\?",
+        (
+            "man u ask lots of questions!",
+            "booooring! how old r u??",
+            "boooooring!! ur not very fun",
+        ),
+    ),
+    (
+        r"(cos|because) (.*)",
+        ("hee! i don't believe u! >_<", "nuh-uh! >_<", "ooooh i agree!"),
+    ),
+    (
+        r"why can\'t [iI] (.*)",
+        (
+            "i dunno! y u askin me for!",
+            "try harder, silly! hee! ^_^",
+            "i dunno! but when i can't%1 i jump up and down!",
+        ),
+    ),
+    (
+        r"I can\'t (.*)",
+        (
+            "u can't what??! >_<",
+            "that's ok! i can't%1 either! kekekekeke ^_^",
+            "try harder, silly! hee! ^&^",
+        ),
+    ),
+    (
+        r"(.*) (like|love|watch) anime",
+        (
+            "omg i love anime!! do u like sailor moon??! ^&^",
+            "anime yay! anime rocks sooooo much!",
+            "oooh anime! i love anime more than anything!",
+            "anime is the bestest evar! evangelion is the best!",
+            "hee anime is the best! do you have ur fav??",
+        ),
+    ),
+    (
+        r"I (like|love|watch|play) (.*)",
+        ("yay! %2 rocks!", "yay! %2 is neat!", "cool! do u like other stuff?? ^_^"),
+    ),
+    (
+        r"anime sucks|(.*) (hate|detest) anime",
+        (
+            "ur a liar! i'm not gonna talk to u nemore if u h8 anime *;*",
+            "no way! anime is the best ever!",
+            "nuh-uh, anime is the best!",
+        ),
+    ),
+    (
+        r"(are|r) (you|u) (.*)",
+        ("am i%1??! how come u ask that!", "maybe!  y shud i tell u?? kekeke >_>"),
+    ),
+    (
+        r"what (.*)",
+        ("hee u think im gonna tell u? .v.", "booooooooring! ask me somethin else!"),
+    ),
+    (r"how (.*)", ("not tellin!! kekekekekeke ^_^",)),
+    (r"(hi|hello|hey) (.*)", ("hi!!! how r u!!",)),
+    (
+        r"quit",
+        (
+            "mom says i have to go eat dinner now :,( bye!!",
+            "awww u have to go?? see u next time!!",
+            "how to see u again soon! ^_^",
+        ),
+    ),
+    (
+        r"(.*)",
+        (
+            "ur funny! kekeke",
+            "boooooring! talk about something else! tell me wat u like!",
+            "do u like anime??",
+            "do u watch anime? i like sailor moon! ^_^",
+            "i wish i was a kitty!! kekekeke ^_^",
+        ),
+    ),
+)
  
  iesha_chatbot = Chat(pairs, reflections)
  
+
  def iesha_chat():
      print("Iesha the TeenBoT\n---------")
      print("Talk to the program by typing in plain English, using normal upper-")
      print('and lower-case letters and punctuation.  Enter "quit" when done.')
-    print('='*72)
+    print("=" * 72)
      print("hi!! i'm iesha! who r u??!")
  
      iesha_chatbot.converse()
  
+
  def demo():
      iesha_chat()
  
+
  if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/chat/iesha.pyc b/nlp_resource_data/nltk/chat/iesha.pyc

deleted file mode 100755 (executable)

index ec19f91..0000000

Binary files a/nlp_resource_data/nltk/chat/iesha.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/rude.py b/nlp_resource_data/nltk/chat/rude.py

old mode 100755 (executable)

new mode 100644 (file)

index 0e571d7..c7b1b1b
--- a/nlp_resource_data/nltk/chat/rude.py
+++ b/nlp_resource_data/nltk/chat/rude.py
@@ -1,92 +1,125 @@
  # Natural Language Toolkit: Rude Chatbot
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Peter Spiller <pspiller@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function
  
  from nltk.chat.util import Chat, reflections
  
  pairs = (
-    (r'We (.*)',
-        ("What do you mean, 'we'?",
-        "Don't include me in that!",
-        "I wouldn't be so sure about that.")),
-
-    (r'You should (.*)',
-        ("Don't tell me what to do, buddy.",
-        "Really? I should, should I?")),
-
-    (r'You\'re(.*)',
-        ("More like YOU'RE %1!",
-        "Hah! Look who's talking.",
-        "Come over here and tell me I'm %1.")),
-
-    (r'You are(.*)',
-        ("More like YOU'RE %1!",
-        "Hah! Look who's talking.",
-        "Come over here and tell me I'm %1.")),
-
-    (r'I can\'t(.*)',
-        ("You do sound like the type who can't %1.",
-        "Hear that splashing sound? That's my heart bleeding for you.",
-        "Tell somebody who might actually care.")),
-
-    (r'I think (.*)',
-        ("I wouldn't think too hard if I were you.",
-        "You actually think? I'd never have guessed...")),
-
-    (r'I (.*)',
-        ("I'm getting a bit tired of hearing about you.",
-        "How about we talk about me instead?",
-        "Me, me, me... Frankly, I don't care.")),
-
-    (r'How (.*)',
-        ("How do you think?",
-        "Take a wild guess.",
-        "I'm not even going to dignify that with an answer.")),
-
-    (r'What (.*)',
-        ("Do I look like an encyclopedia?",
-        "Figure it out yourself.")),
-
-    (r'Why (.*)',
-        ("Why not?",
-        "That's so obvious I thought even you'd have already figured it out.")),
-
-    (r'(.*)shut up(.*)',
-        ("Make me.",
-        "Getting angry at a feeble NLP assignment? Somebody's losing it.",
-        "Say that again, I dare you.")),
-
-    (r'Shut up(.*)',
-        ("Make me.",
-        "Getting angry at a feeble NLP assignment? Somebody's losing it.",
-        "Say that again, I dare you.")),
-
-    (r'Hello(.*)',
-        ("Oh good, somebody else to talk to. Joy.",
-        "'Hello'? How original...")),
-
-    (r'(.*)',
-        ("I'm getting bored here. Become more interesting.",
-        "Either become more thrilling or get lost, buddy.",
-        "Change the subject before I die of fatal boredom."))
+    (
+        r"We (.*)",
+        (
+            "What do you mean, 'we'?",
+            "Don't include me in that!",
+            "I wouldn't be so sure about that.",
+        ),
+    ),
+    (
+        r"You should (.*)",
+        ("Don't tell me what to do, buddy.", "Really? I should, should I?"),
+    ),
+    (
+        r"You\'re(.*)",
+        (
+            "More like YOU'RE %1!",
+            "Hah! Look who's talking.",
+            "Come over here and tell me I'm %1.",
+        ),
+    ),
+    (
+        r"You are(.*)",
+        (
+            "More like YOU'RE %1!",
+            "Hah! Look who's talking.",
+            "Come over here and tell me I'm %1.",
+        ),
+    ),
+    (
+        r"I can\'t(.*)",
+        (
+            "You do sound like the type who can't %1.",
+            "Hear that splashing sound? That's my heart bleeding for you.",
+            "Tell somebody who might actually care.",
+        ),
+    ),
+    (
+        r"I think (.*)",
+        (
+            "I wouldn't think too hard if I were you.",
+            "You actually think? I'd never have guessed...",
+        ),
+    ),
+    (
+        r"I (.*)",
+        (
+            "I'm getting a bit tired of hearing about you.",
+            "How about we talk about me instead?",
+            "Me, me, me... Frankly, I don't care.",
+        ),
+    ),
+    (
+        r"How (.*)",
+        (
+            "How do you think?",
+            "Take a wild guess.",
+            "I'm not even going to dignify that with an answer.",
+        ),
+    ),
+    (r"What (.*)", ("Do I look like an encyclopedia?", "Figure it out yourself.")),
+    (
+        r"Why (.*)",
+        (
+            "Why not?",
+            "That's so obvious I thought even you'd have already figured it out.",
+        ),
+    ),
+    (
+        r"(.*)shut up(.*)",
+        (
+            "Make me.",
+            "Getting angry at a feeble NLP assignment? Somebody's losing it.",
+            "Say that again, I dare you.",
+        ),
+    ),
+    (
+        r"Shut up(.*)",
+        (
+            "Make me.",
+            "Getting angry at a feeble NLP assignment? Somebody's losing it.",
+            "Say that again, I dare you.",
+        ),
+    ),
+    (
+        r"Hello(.*)",
+        ("Oh good, somebody else to talk to. Joy.", "'Hello'? How original..."),
+    ),
+    (
+        r"(.*)",
+        (
+            "I'm getting bored here. Become more interesting.",
+            "Either become more thrilling or get lost, buddy.",
+            "Change the subject before I die of fatal boredom.",
+        ),
+    ),
  )
  
  rude_chatbot = Chat(pairs, reflections)
  
+
  def rude_chat():
      print("Talk to the program by typing in plain English, using normal upper-")
      print('and lower-case letters and punctuation.  Enter "quit" when done.')
-    print('='*72)
+    print("=" * 72)
      print("I suppose I should say hello.")
  
      rude_chatbot.converse()
  
+
  def demo():
      rude_chat()
  
+
  if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/chat/rude.pyc b/nlp_resource_data/nltk/chat/rude.pyc

deleted file mode 100755 (executable)

index 2398cf5..0000000

Binary files a/nlp_resource_data/nltk/chat/rude.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/suntsu.py b/nlp_resource_data/nltk/chat/suntsu.py

old mode 100755 (executable)

new mode 100644 (file)

index f2f1e1b..4c68a77
--- a/nlp_resource_data/nltk/chat/suntsu.py
+++ b/nlp_resource_data/nltk/chat/suntsu.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Sun Tsu-Bot
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Sam Huston 2007
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -13,105 +13,128 @@ Translated by LIONEL GILES, M.A. 1910
  Hosted by the Gutenberg Project
  http://www.gutenberg.org/
  """
-from __future__ import print_function
  
  from nltk.chat.util import Chat, reflections
  
  pairs = (
-
-  (r'quit',
-  ( "Good-bye.",
-    "Plan well",
-    "May victory be your future")),
-
-  (r'[^\?]*\?',
-  ("Please consider whether you can answer your own question.",
-   "Ask me no questions!")),
-
-  (r'[0-9]+(.*)',
-  ("It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.",
-   "There are five essentials for victory")),
-
-
-  (r'[A-Ca-c](.*)',
-  ("The art of war is of vital importance to the State.",
-   "All warfare is based on deception.",
-   "If your opponent is secure at all points, be prepared for him. If he is in superior strength, evade him.",
-   "If the campaign is protracted, the resources of the State will not be equal to the strain.",
-   "Attack him where he is unprepared, appear where you are not expected.",
-   "There is no instance of a country having benefited from prolonged warfare.")),
-
-  (r'[D-Fd-f](.*)',
-  ("The skillful soldier does not raise a second levy, neither are his supply-wagons loaded more than twice.",
-   "Bring war material with you from home, but forage on the enemy.",
-   "In war, then, let your great object be victory, not lengthy campaigns.",
-   "To fight and conquer in all your battles is not supreme excellence; supreme excellence consists in breaking the enemy's resistance without fighting.")),
-
-  (r'[G-Ig-i](.*)',
-  ("Heaven signifies night and day, cold and heat, times and seasons.",
-   "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.",
-   "The good fighters of old first put themselves beyond the possibility of defeat, and then waited for an opportunity of defeating the enemy.",
-   "One may know how to conquer without being able to do it.")),
-
-  (r'[J-Lj-l](.*)',
-  ("There are three ways in which a ruler can bring misfortune upon his army.",
-   "By commanding the army to advance or to retreat, being ignorant of the fact that it cannot obey. This is called hobbling the army.",
-   "By attempting to govern an army in the same way as he administers a kingdom, being ignorant of the conditions which obtain in an army. This causes restlessness in the soldier's minds.",
-   "By employing the officers of his army without discrimination, through ignorance of the military principle of adaptation to circumstances. This shakes the confidence of the soldiers.",
-   "There are five essentials for victory",
-   "He will win who knows when to fight and when not to fight.",
-   "He will win who knows how to handle both superior and inferior forces.",
-   "He will win whose army is animated by the same spirit throughout all its ranks.",
-   "He will win who, prepared himself, waits to take the enemy unprepared.",
-   "He will win who has military capacity and is not interfered with by the sovereign.")),
-
-  (r'[M-Om-o](.*)',
-  ("If you know the enemy and know yourself, you need not fear the result of a hundred battles.",
-   "If you know yourself but not the enemy, for every victory gained you will also suffer a defeat.",
-   "If you know neither the enemy nor yourself, you will succumb in every battle.",
-   "The control of a large force is the same principle as the control of a few men: it is merely a question of dividing up their numbers.")),
-
-  (r'[P-Rp-r](.*)',
-  ("Security against defeat implies defensive tactics; ability to defeat the enemy means taking the offensive.",
-   "Standing on the defensive indicates insufficient strength; attacking, a superabundance of strength.",
-   "He wins his battles by making no mistakes. Making no mistakes is what establishes the certainty of victory, for it means conquering an enemy that is already defeated.",
-   "A victorious army opposed to a routed one, is as a pound's weight placed in the scale against a single grain.",
-   "The onrush of a conquering force is like the bursting of pent-up waters into a chasm a thousand fathoms deep.")),
-
-  (r'[S-Us-u](.*)',
-  ("What the ancients called a clever fighter is one who not only wins, but excels in winning with ease.",
-   "Hence his victories bring him neither reputation for wisdom nor credit for courage.",
-   "Hence the skillful fighter puts himself into a position which makes defeat impossible, and does not miss the moment for defeating the enemy.",
-   "In war the victorious strategist only seeks battle after the victory has been won, whereas he who is destined to defeat first fights and afterwards looks for victory.",
-   "There are not more than five musical notes, yet the combinations of these five give rise to more melodies than can ever be heard.",
-   "Appear at points which the enemy must hasten to defend; march swiftly to places where you are not expected.")),
-
-  (r'[V-Zv-z](.*)',
-  ("It is a matter of life and death, a road either to safety or to ruin.",
-  "Hold out baits to entice the enemy. Feign disorder, and crush him.",
-  "All men can see the tactics whereby I conquer, but what none can see is the strategy out of which victory is evolved.",
-  "Do not repeat the tactics which have gained you one victory, but let your methods be regulated by the infinite variety of circumstances.",
-  "So in war, the way is to avoid what is strong and to strike at what is weak.",
-  "Just as water retains no constant shape, so in warfare there are no constant conditions.")),
-
-  (r'(.*)',
-  ( "Your statement insults me.",
-    ""))
+    (r"quit", ("Good-bye.", "Plan well", "May victory be your future")),
+    (
+        r"[^\?]*\?",
+        (
+            "Please consider whether you can answer your own question.",
+            "Ask me no questions!",
+        ),
+    ),
+    (
+        r"[0-9]+(.*)",
+        (
+            "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.",
+            "There are five essentials for victory",
+        ),
+    ),
+    (
+        r"[A-Ca-c](.*)",
+        (
+            "The art of war is of vital importance to the State.",
+            "All warfare is based on deception.",
+            "If your opponent is secure at all points, be prepared for him. If he is in superior strength, evade him.",
+            "If the campaign is protracted, the resources of the State will not be equal to the strain.",
+            "Attack him where he is unprepared, appear where you are not expected.",
+            "There is no instance of a country having benefited from prolonged warfare.",
+        ),
+    ),
+    (
+        r"[D-Fd-f](.*)",
+        (
+            "The skillful soldier does not raise a second levy, neither are his supply-wagons loaded more than twice.",
+            "Bring war material with you from home, but forage on the enemy.",
+            "In war, then, let your great object be victory, not lengthy campaigns.",
+            "To fight and conquer in all your battles is not supreme excellence; supreme excellence consists in breaking the enemy's resistance without fighting.",
+        ),
+    ),
+    (
+        r"[G-Ig-i](.*)",
+        (
+            "Heaven signifies night and day, cold and heat, times and seasons.",
+            "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.",
+            "The good fighters of old first put themselves beyond the possibility of defeat, and then waited for an opportunity of defeating the enemy.",
+            "One may know how to conquer without being able to do it.",
+        ),
+    ),
+    (
+        r"[J-Lj-l](.*)",
+        (
+            "There are three ways in which a ruler can bring misfortune upon his army.",
+            "By commanding the army to advance or to retreat, being ignorant of the fact that it cannot obey. This is called hobbling the army.",
+            "By attempting to govern an army in the same way as he administers a kingdom, being ignorant of the conditions which obtain in an army. This causes restlessness in the soldier's minds.",
+            "By employing the officers of his army without discrimination, through ignorance of the military principle of adaptation to circumstances. This shakes the confidence of the soldiers.",
+            "There are five essentials for victory",
+            "He will win who knows when to fight and when not to fight.",
+            "He will win who knows how to handle both superior and inferior forces.",
+            "He will win whose army is animated by the same spirit throughout all its ranks.",
+            "He will win who, prepared himself, waits to take the enemy unprepared.",
+            "He will win who has military capacity and is not interfered with by the sovereign.",
+        ),
+    ),
+    (
+        r"[M-Om-o](.*)",
+        (
+            "If you know the enemy and know yourself, you need not fear the result of a hundred battles.",
+            "If you know yourself but not the enemy, for every victory gained you will also suffer a defeat.",
+            "If you know neither the enemy nor yourself, you will succumb in every battle.",
+            "The control of a large force is the same principle as the control of a few men: it is merely a question of dividing up their numbers.",
+        ),
+    ),
+    (
+        r"[P-Rp-r](.*)",
+        (
+            "Security against defeat implies defensive tactics; ability to defeat the enemy means taking the offensive.",
+            "Standing on the defensive indicates insufficient strength; attacking, a superabundance of strength.",
+            "He wins his battles by making no mistakes. Making no mistakes is what establishes the certainty of victory, for it means conquering an enemy that is already defeated.",
+            "A victorious army opposed to a routed one, is as a pound's weight placed in the scale against a single grain.",
+            "The onrush of a conquering force is like the bursting of pent-up waters into a chasm a thousand fathoms deep.",
+        ),
+    ),
+    (
+        r"[S-Us-u](.*)",
+        (
+            "What the ancients called a clever fighter is one who not only wins, but excels in winning with ease.",
+            "Hence his victories bring him neither reputation for wisdom nor credit for courage.",
+            "Hence the skillful fighter puts himself into a position which makes defeat impossible, and does not miss the moment for defeating the enemy.",
+            "In war the victorious strategist only seeks battle after the victory has been won, whereas he who is destined to defeat first fights and afterwards looks for victory.",
+            "There are not more than five musical notes, yet the combinations of these five give rise to more melodies than can ever be heard.",
+            "Appear at points which the enemy must hasten to defend; march swiftly to places where you are not expected.",
+        ),
+    ),
+    (
+        r"[V-Zv-z](.*)",
+        (
+            "It is a matter of life and death, a road either to safety or to ruin.",
+            "Hold out baits to entice the enemy. Feign disorder, and crush him.",
+            "All men can see the tactics whereby I conquer, but what none can see is the strategy out of which victory is evolved.",
+            "Do not repeat the tactics which have gained you one victory, but let your methods be regulated by the infinite variety of circumstances.",
+            "So in war, the way is to avoid what is strong and to strike at what is weak.",
+            "Just as water retains no constant shape, so in warfare there are no constant conditions.",
+        ),
+    ),
+    (r"(.*)", ("Your statement insults me.", "")),
  )
  
  suntsu_chatbot = Chat(pairs, reflections)
  
+
  def suntsu_chat():
      print("Talk to the program by typing in plain English, using normal upper-")
      print('and lower-case letters and punctuation.  Enter "quit" when done.')
-    print('='*72)
+    print("=" * 72)
      print("You seek enlightenment?")
  
      suntsu_chatbot.converse()
  
+
  def demo():
      suntsu_chat()
  
+
  if __name__ == "__main__":
      demo()
-
diff --git a/nlp_resource_data/nltk/chat/suntsu.pyc b/nlp_resource_data/nltk/chat/suntsu.pyc

deleted file mode 100755 (executable)

index 2110fe4..0000000

Binary files a/nlp_resource_data/nltk/chat/suntsu.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/util.py b/nlp_resource_data/nltk/chat/util.py

old mode 100755 (executable)

new mode 100644 (file)

index c38b90a..8f4ec5d
--- a/nlp_resource_data/nltk/chat/util.py
+++ b/nlp_resource_data/nltk/chat/util.py
@@ -1,39 +1,37 @@
  # Natural Language Toolkit: Chatbot Utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  # Based on an Eliza implementation by Joe Strout <joe@strout.net>,
  # Jeff Epler <jepler@inetnebr.com> and Jez Higgins <jez@jezuk.co.uk>.
-from __future__ import print_function
  
  import re
  import random
  
-from six.moves import input
-
  
  reflections = {
-  "i am"       : "you are",
-  "i was"      : "you were",
-  "i"          : "you",
-  "i'm"        : "you are",
-  "i'd"        : "you would",
-  "i've"       : "you have",
-  "i'll"       : "you will",
-  "my"         : "your",
-  "you are"    : "I am",
-  "you were"   : "I was",
-  "you've"     : "I have",
-  "you'll"     : "I will",
-  "your"       : "my",
-  "yours"      : "mine",
-  "you"        : "me",
-  "me"         : "you"
+    "i am": "you are",
+    "i was": "you were",
+    "i": "you",
+    "i'm": "you are",
+    "i'd": "you would",
+    "i've": "you have",
+    "i'll": "you will",
+    "my": "your",
+    "you are": "I am",
+    "you were": "I was",
+    "you've": "I have",
+    "you'll": "I will",
+    "your": "my",
+    "yours": "mine",
+    "you": "me",
+    "me": "you",
  }
  
+
  class Chat(object):
      def __init__(self, pairs, reflections={}):
          """
@@ -51,16 +49,15 @@ class Chat(object):
          :rtype: None
          """
  
-        self._pairs = [(re.compile(x, re.IGNORECASE),y) for (x,y) in pairs]
+        self._pairs = [(re.compile(x, re.IGNORECASE), y) for (x, y) in pairs]
          self._reflections = reflections
          self._regex = self._compile_reflections()
  
-
      def _compile_reflections(self):
-        sorted_refl = sorted(self._reflections.keys(), key=len,
-                reverse=True)
-        return  re.compile(r"\b({0})\b".format("|".join(map(re.escape,
-            sorted_refl))), re.IGNORECASE)
+        sorted_refl = sorted(self._reflections, key=len, reverse=True)
+        return re.compile(
+            r"\b({0})\b".format("|".join(map(re.escape, sorted_refl))), re.IGNORECASE
+        )
  
      def _substitute(self, str):
          """
@@ -72,18 +69,20 @@ class Chat(object):
          :rtype: str
          """
  
-        return self._regex.sub(lambda mo:
-                self._reflections[mo.string[mo.start():mo.end()]],
-                    str.lower())
+        return self._regex.sub(
+            lambda mo: self._reflections[mo.string[mo.start() : mo.end()]], str.lower()
+        )
  
      def _wildcards(self, response, match):
-        pos = response.find('%')
+        pos = response.find("%")
          while pos >= 0:
-            num = int(response[pos+1:pos+2])
-            response = response[:pos] + \
-                self._substitute(match.group(num)) + \
-                response[pos+2:]
-            pos = response.find('%')
+            num = int(response[pos + 1 : pos + 2])
+            response = (
+                response[:pos]
+                + self._substitute(match.group(num))
+                + response[pos + 2 :]
+            )
+            pos = response.find("%")
          return response
  
      def respond(self, str):
@@ -101,12 +100,14 @@ class Chat(object):
  
              # did the pattern match?
              if match:
-                resp = random.choice(response)    # pick a random response
-                resp = self._wildcards(resp, match) # process wildcards
+                resp = random.choice(response)  # pick a random response
+                resp = self._wildcards(resp, match)  # process wildcards
  
                  # fix munged punctuation at the end
-                if resp[-2:] == '?.': resp = resp[:-2] + '.'
-                if resp[-2:] == '??': resp = resp[:-2] + '?'
+                if resp[-2:] == "?.":
+                    resp = resp[:-2] + "."
+                if resp[-2:] == "??":
+                    resp = resp[:-2] + "?"
                  return resp
  
      # Hold a conversation with a chatbot
@@ -114,9 +115,11 @@ class Chat(object):
          user_input = ""
          while user_input != quit:
              user_input = quit
-            try: user_input = input(">")
+            try:
+                user_input = input(">")
              except EOFError:
                  print(user_input)
              if user_input:
-                while user_input[-1] in "!.": user_input = user_input[:-1]
+                while user_input[-1] in "!.":
+                    user_input = user_input[:-1]
                  print(self.respond(user_input))
diff --git a/nlp_resource_data/nltk/chat/util.pyc b/nlp_resource_data/nltk/chat/util.pyc

deleted file mode 100755 (executable)

index bef18fc..0000000

Binary files a/nlp_resource_data/nltk/chat/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chat/zen.py b/nlp_resource_data/nltk/chat/zen.py

old mode 100755 (executable)

new mode 100644 (file)

index c06a122..2ae944b
--- a/nlp_resource_data/nltk/chat/zen.py
+++ b/nlp_resource_data/nltk/chat/zen.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Zen Chatbot
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Amy Holland <amyrh@csse.unimelb.edu.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -35,248 +35,295 @@ responses are very limited. Zen Chatbot will usually answer very vaguely, or
  respond to a question by asking a different question, in much the same way
  as Eliza.
  """
-from __future__ import print_function
  
  from nltk.chat.util import Chat, reflections
  
  # responses are matched top to bottom, so non-specific matches occur later
  # for each match, a list of possible responses is provided
  responses = (
-
-# Zen Chatbot opens with the line "Welcome, my child." The usual
-# response will be a greeting problem: 'good' matches "good morning",
-# "good day" etc, but also "good grief!"  and other sentences starting
-# with the word 'good' that may not be a greeting
-
-    (r'(hello(.*))|(good [a-zA-Z]+)',
-    ( "The path to enlightenment is often difficult to see.",
-      "Greetings. I sense your mind is troubled. Tell me of your troubles.",
-      "Ask the question you have come to ask.",
-      "Hello. Do you seek englightenment?")),
-
-
-# "I need" and "I want" can be followed by a thing (eg 'help')
-# or an action (eg 'to see you')
-#
-# This is a problem with this style of response -
-# person:    "I need you"
-# chatbot:    "me can be achieved by hard work and dedication of the mind"
-# i.e. 'you' is not really a thing that can be mapped this way, so this
-# interpretation only makes sense for some inputs
-#
-    (r'i need (.*)',
-    ( "%1 can be achieved by hard work and dedication of the mind.",
-      "%1 is not a need, but a desire of the mind. Clear your mind of such concerns.",
-      "Focus your mind on%1, and you will find what you need.")),
-
-    (r'i want (.*)',
-    ( "Desires of the heart will distract you from the path to enlightenment.",
-      "Will%1 help you attain enlightenment?",
-      "Is%1 a desire of the mind, or of the heart?")),
-
-
-# why questions are separated into three types:
-# "why..I"     e.g. "why am I here?" "Why do I like cake?"
-# "why..you"    e.g. "why are you here?" "Why won't you tell me?"
-# "why..."    e.g. "Why is the sky blue?"
-# problems:
-#     person:  "Why can't you tell me?"
-#     chatbot: "Are you sure I tell you?"
-# - this style works for positives (e.g. "why do you like cake?")
-#   but does not work for negatives (e.g. "why don't you like cake?")
-    (r'why (.*) i (.*)\?',
-    ( "You%1%2?",
-      "Perhaps you only think you%1%2")),
-
-    (r'why (.*) you(.*)\?',
-    ( "Why%1 you%2?",
-      "%2 I%1",
-      "Are you sure I%2?")),
-
-    (r'why (.*)\?',
-    ( "I cannot tell you why%1.",
-      "Why do you think %1?" )),
-
-# e.g. "are you listening?", "are you a duck"
-    (r'are you (.*)\?',
-    ( "Maybe%1, maybe not%1.",
-      "Whether I am%1 or not is God's business.")),
-
-# e.g. "am I a duck?", "am I going to die?"
-    (r'am i (.*)\?',
-    ( "Perhaps%1, perhaps not%1.",
-      "Whether you are%1 or not is not for me to say.")),
-
-# what questions, e.g. "what time is it?"
-# problems:
-#     person:  "What do you want?"
-#    chatbot: "Seek truth, not what do me want."
-    (r'what (.*)\?',
-    ( "Seek truth, not what%1.",
-      "What%1 should not concern you.")),
-
-# how questions, e.g. "how do you do?"
-    (r'how (.*)\?',
-    ( "How do you suppose?",
-      "Will an answer to that really help in your search for enlightenment?",
-      "Ask yourself not how, but why.")),
-
-# can questions, e.g. "can you run?", "can you come over here please?"
-    (r'can you (.*)\?',
-    ( "I probably can, but I may not.",
-      "Maybe I can%1, and maybe I cannot.",
-      "I can do all, and I can do nothing.")),
-
-# can questions, e.g. "can I have some cake?", "can I know truth?"
-    (r'can i (.*)\?',
-    ( "You can%1 if you believe you can%1, and have a pure spirit.",
-      "Seek truth and you will know if you can%1.")),
-
-# e.g. "It is raining" - implies the speaker is certain of a fact
-    (r'it is (.*)',
-    ( "How can you be certain that%1, when you do not even know yourself?",
-      "Whether it is%1 or not does not change the way the world is.")),
-
-# e.g. "is there a doctor in the house?"
-    (r'is there (.*)\?',
-    ( "There is%1 if you believe there is.",
-      "It is possible that there is%1.")),
-
-# e.g. "is it possible?", "is this true?"
-    (r'is(.*)\?',
-    ( "%1 is not relevant.",
-      "Does this matter?")),
-
-# non-specific question
-    (r'(.*)\?',
-    ( "Do you think %1?",
-      "You seek the truth. Does the truth seek you?",
-      "If you intentionally pursue the answers to your questions, the answers become hard to see.",
-      "The answer to your question cannot be told. It must be experienced.")),
-
-# expression of hate of form "I hate you" or "Kelly hates cheese"
-    (r'(.*) (hate[s]?)|(dislike[s]?)|(don\'t like)(.*)',
-    ( "Perhaps it is not about hating %2, but about hate from within.",
-      "Weeds only grow when we dislike them",
-      "Hate is a very strong emotion.")),
-
-# statement containing the word 'truth'
-    (r'(.*) truth(.*)',
-    ( "Seek truth, and truth will seek you.",
-      "Remember, it is not the spoon which bends - only yourself.",
-      "The search for truth is a long journey.")),
-
-# desire to do an action
-# e.g. "I want to go shopping"
-    (r'i want to (.*)',
-    ( "You may %1 if your heart truly desires to.",
-      "You may have to %1.")),
-
-# desire for an object
-# e.g. "I want a pony"
-    (r'i want (.*)',
-    ( "Does your heart truly desire %1?",
-      "Is this a desire of the heart, or of the mind?")),
-
-# e.g. "I can't wait" or "I can't do this"
-    (r'i can\'t (.*)',
-    ( "What we can and can't do is a limitation of the mind.",
-      "There are limitations of the body, and limitations of the mind.",
-      "Have you tried to%1 with a clear mind?")),
-
-# "I think.." indicates uncertainty. e.g. "I think so."
-# problem: exceptions...
-# e.g. "I think, therefore I am"
-    (r'i think (.*)',
-    ( "Uncertainty in an uncertain world.",
-     "Indeed, how can we be certain of anything in such uncertain times.",
-     "Are you not, in fact, certain that%1?")),
-
-# "I feel...emotions/sick/light-headed..."
-    (r'i feel (.*)',
-    ( "Your body and your emotions are both symptoms of your mind."
-      "What do you believe is the root of such feelings?",
-      "Feeling%1 can be a sign of your state-of-mind.")),
-
-
-# exclaimation mark indicating emotion
-# e.g. "Wow!" or "No!"
-    (r'(.*)!',
-    ( "I sense that you are feeling emotional today.",
-      "You need to calm your emotions.")),
-
-# because [statement]
-# e.g. "because I said so"
-    (r'because (.*)',
-    ( "Does knowning the reasons behind things help you to understand"
-      " the things themselves?",
-      "If%1, what else must be true?")),
-
-# yes or no - raise an issue of certainty/correctness
-    (r'(yes)|(no)',
-    ( "Is there certainty in an uncertain world?",
-      "It is better to be right than to be certain.")),
-
-# sentence containing word 'love'
-    (r'(.*)love(.*)',
-    ( "Think of the trees: they let the birds perch and fly with no intention to call them when they come, and no longing for their return when they fly away. Let your heart be like the trees.",
-      "Free love!")),
-
-# sentence containing word 'understand' - r
-    (r'(.*)understand(.*)',
-    ( "If you understand, things are just as they are;"
-      " if you do not understand, things are just as they are.",
-      "Imagination is more important than knowledge.")),
-
-# 'I', 'me', 'my' - person is talking about themself.
-# this breaks down when words contain these - eg 'Thyme', 'Irish'
-    (r'(.*)(me )|( me)|(my)|(mine)|(i)(.*)',
-    ( "'I', 'me', 'my'... these are selfish expressions.",
-      "Have you ever considered that you might be a selfish person?",
-      "Try to consider others, not just yourself.",
-      "Think not just of yourself, but of others.")),
-
-# 'you' starting a sentence
-# e.g. "you stink!"
-    (r'you (.*)',
-    ( "My path is not of conern to you.",
-      "I am but one, and you but one more.")),
-
-# say goodbye with some extra Zen wisdom.
-    (r'exit',
-    ( "Farewell. The obstacle is the path.",
-      "Farewell. Life is a journey, not a destination.",
-      "Good bye. We are cups, constantly and quietly being filled."
-      "\nThe trick is knowning how to tip ourselves over and let the beautiful stuff out.")),
-
-
-# fall through case -
-# when stumped, respond with generic zen wisdom
-#
-    (r'(.*)',
-    ( "When you're enlightened, every word is wisdom.",
-      "Random talk is useless.",
-      "The reverse side also has a reverse side.",
-      "Form is emptiness, and emptiness is form.",
-      "I pour out a cup of water. Is the cup empty?"))
+    # Zen Chatbot opens with the line "Welcome, my child." The usual
+    # response will be a greeting problem: 'good' matches "good morning",
+    # "good day" etc, but also "good grief!"  and other sentences starting
+    # with the word 'good' that may not be a greeting
+    (
+        r"(hello(.*))|(good [a-zA-Z]+)",
+        (
+            "The path to enlightenment is often difficult to see.",
+            "Greetings. I sense your mind is troubled. Tell me of your troubles.",
+            "Ask the question you have come to ask.",
+            "Hello. Do you seek englightenment?",
+        ),
+    ),
+    # "I need" and "I want" can be followed by a thing (eg 'help')
+    # or an action (eg 'to see you')
+    #
+    # This is a problem with this style of response -
+    # person:    "I need you"
+    # chatbot:    "me can be achieved by hard work and dedication of the mind"
+    # i.e. 'you' is not really a thing that can be mapped this way, so this
+    # interpretation only makes sense for some inputs
+    #
+    (
+        r"i need (.*)",
+        (
+            "%1 can be achieved by hard work and dedication of the mind.",
+            "%1 is not a need, but a desire of the mind. Clear your mind of such concerns.",
+            "Focus your mind on%1, and you will find what you need.",
+        ),
+    ),
+    (
+        r"i want (.*)",
+        (
+            "Desires of the heart will distract you from the path to enlightenment.",
+            "Will%1 help you attain enlightenment?",
+            "Is%1 a desire of the mind, or of the heart?",
+        ),
+    ),
+    # why questions are separated into three types:
+    # "why..I"     e.g. "why am I here?" "Why do I like cake?"
+    # "why..you"    e.g. "why are you here?" "Why won't you tell me?"
+    # "why..."    e.g. "Why is the sky blue?"
+    # problems:
+    #     person:  "Why can't you tell me?"
+    #     chatbot: "Are you sure I tell you?"
+    # - this style works for positives (e.g. "why do you like cake?")
+    #   but does not work for negatives (e.g. "why don't you like cake?")
+    (r"why (.*) i (.*)\?", ("You%1%2?", "Perhaps you only think you%1%2")),
+    (r"why (.*) you(.*)\?", ("Why%1 you%2?", "%2 I%1", "Are you sure I%2?")),
+    (r"why (.*)\?", ("I cannot tell you why%1.", "Why do you think %1?")),
+    # e.g. "are you listening?", "are you a duck"
+    (
+        r"are you (.*)\?",
+        ("Maybe%1, maybe not%1.", "Whether I am%1 or not is God's business."),
+    ),
+    # e.g. "am I a duck?", "am I going to die?"
+    (
+        r"am i (.*)\?",
+        ("Perhaps%1, perhaps not%1.", "Whether you are%1 or not is not for me to say."),
+    ),
+    # what questions, e.g. "what time is it?"
+    # problems:
+    #     person:  "What do you want?"
+    #    chatbot: "Seek truth, not what do me want."
+    (r"what (.*)\?", ("Seek truth, not what%1.", "What%1 should not concern you.")),
+    # how questions, e.g. "how do you do?"
+    (
+        r"how (.*)\?",
+        (
+            "How do you suppose?",
+            "Will an answer to that really help in your search for enlightenment?",
+            "Ask yourself not how, but why.",
+        ),
+    ),
+    # can questions, e.g. "can you run?", "can you come over here please?"
+    (
+        r"can you (.*)\?",
+        (
+            "I probably can, but I may not.",
+            "Maybe I can%1, and maybe I cannot.",
+            "I can do all, and I can do nothing.",
+        ),
+    ),
+    # can questions, e.g. "can I have some cake?", "can I know truth?"
+    (
+        r"can i (.*)\?",
+        (
+            "You can%1 if you believe you can%1, and have a pure spirit.",
+            "Seek truth and you will know if you can%1.",
+        ),
+    ),
+    # e.g. "It is raining" - implies the speaker is certain of a fact
+    (
+        r"it is (.*)",
+        (
+            "How can you be certain that%1, when you do not even know yourself?",
+            "Whether it is%1 or not does not change the way the world is.",
+        ),
+    ),
+    # e.g. "is there a doctor in the house?"
+    (
+        r"is there (.*)\?",
+        ("There is%1 if you believe there is.", "It is possible that there is%1."),
+    ),
+    # e.g. "is it possible?", "is this true?"
+    (r"is(.*)\?", ("%1 is not relevant.", "Does this matter?")),
+    # non-specific question
+    (
+        r"(.*)\?",
+        (
+            "Do you think %1?",
+            "You seek the truth. Does the truth seek you?",
+            "If you intentionally pursue the answers to your questions, the answers become hard to see.",
+            "The answer to your question cannot be told. It must be experienced.",
+        ),
+    ),
+    # expression of hate of form "I hate you" or "Kelly hates cheese"
+    (
+        r"(.*) (hate[s]?)|(dislike[s]?)|(don\'t like)(.*)",
+        (
+            "Perhaps it is not about hating %2, but about hate from within.",
+            "Weeds only grow when we dislike them",
+            "Hate is a very strong emotion.",
+        ),
+    ),
+    # statement containing the word 'truth'
+    (
+        r"(.*) truth(.*)",
+        (
+            "Seek truth, and truth will seek you.",
+            "Remember, it is not the spoon which bends - only yourself.",
+            "The search for truth is a long journey.",
+        ),
+    ),
+    # desire to do an action
+    # e.g. "I want to go shopping"
+    (
+        r"i want to (.*)",
+        ("You may %1 if your heart truly desires to.", "You may have to %1."),
+    ),
+    # desire for an object
+    # e.g. "I want a pony"
+    (
+        r"i want (.*)",
+        (
+            "Does your heart truly desire %1?",
+            "Is this a desire of the heart, or of the mind?",
+        ),
+    ),
+    # e.g. "I can't wait" or "I can't do this"
+    (
+        r"i can\'t (.*)",
+        (
+            "What we can and can't do is a limitation of the mind.",
+            "There are limitations of the body, and limitations of the mind.",
+            "Have you tried to%1 with a clear mind?",
+        ),
+    ),
+    # "I think.." indicates uncertainty. e.g. "I think so."
+    # problem: exceptions...
+    # e.g. "I think, therefore I am"
+    (
+        r"i think (.*)",
+        (
+            "Uncertainty in an uncertain world.",
+            "Indeed, how can we be certain of anything in such uncertain times.",
+            "Are you not, in fact, certain that%1?",
+        ),
+    ),
+    # "I feel...emotions/sick/light-headed..."
+    (
+        r"i feel (.*)",
+        (
+            "Your body and your emotions are both symptoms of your mind."
+            "What do you believe is the root of such feelings?",
+            "Feeling%1 can be a sign of your state-of-mind.",
+        ),
+    ),
+    # exclaimation mark indicating emotion
+    # e.g. "Wow!" or "No!"
+    (
+        r"(.*)!",
+        (
+            "I sense that you are feeling emotional today.",
+            "You need to calm your emotions.",
+        ),
+    ),
+    # because [statement]
+    # e.g. "because I said so"
+    (
+        r"because (.*)",
+        (
+            "Does knowning the reasons behind things help you to understand"
+            " the things themselves?",
+            "If%1, what else must be true?",
+        ),
+    ),
+    # yes or no - raise an issue of certainty/correctness
+    (
+        r"(yes)|(no)",
+        (
+            "Is there certainty in an uncertain world?",
+            "It is better to be right than to be certain.",
+        ),
+    ),
+    # sentence containing word 'love'
+    (
+        r"(.*)love(.*)",
+        (
+            "Think of the trees: they let the birds perch and fly with no intention to call them when they come, and no longing for their return when they fly away. Let your heart be like the trees.",
+            "Free love!",
+        ),
+    ),
+    # sentence containing word 'understand' - r
+    (
+        r"(.*)understand(.*)",
+        (
+            "If you understand, things are just as they are;"
+            " if you do not understand, things are just as they are.",
+            "Imagination is more important than knowledge.",
+        ),
+    ),
+    # 'I', 'me', 'my' - person is talking about themself.
+    # this breaks down when words contain these - eg 'Thyme', 'Irish'
+    (
+        r"(.*)(me )|( me)|(my)|(mine)|(i)(.*)",
+        (
+            "'I', 'me', 'my'... these are selfish expressions.",
+            "Have you ever considered that you might be a selfish person?",
+            "Try to consider others, not just yourself.",
+            "Think not just of yourself, but of others.",
+        ),
+    ),
+    # 'you' starting a sentence
+    # e.g. "you stink!"
+    (
+        r"you (.*)",
+        ("My path is not of conern to you.", "I am but one, and you but one more."),
+    ),
+    # say goodbye with some extra Zen wisdom.
+    (
+        r"exit",
+        (
+            "Farewell. The obstacle is the path.",
+            "Farewell. Life is a journey, not a destination.",
+            "Good bye. We are cups, constantly and quietly being filled."
+            "\nThe trick is knowning how to tip ourselves over and let the beautiful stuff out.",
+        ),
+    ),
+    # fall through case -
+    # when stumped, respond with generic zen wisdom
+    #
+    (
+        r"(.*)",
+        (
+            "When you're enlightened, every word is wisdom.",
+            "Random talk is useless.",
+            "The reverse side also has a reverse side.",
+            "Form is emptiness, and emptiness is form.",
+            "I pour out a cup of water. Is the cup empty?",
+        ),
+    ),
  )
  
  zen_chatbot = Chat(responses, reflections)
  
+
  def zen_chat():
-    print('*'*75)
+    print("*" * 75)
      print("Zen Chatbot!".center(75))
-    print('*'*75)
+    print("*" * 75)
      print('"Look beyond mere words and letters - look into your mind"'.center(75))
      print("* Talk your way to truth with Zen Chatbot.")
      print("* Type 'quit' when you have had enough.")
-    print('*'*75)
+    print("*" * 75)
      print("Welcome, my child.")
  
      zen_chatbot.converse()
  
+
  def demo():
      zen_chat()
  
+
  if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/chat/zen.pyc b/nlp_resource_data/nltk/chat/zen.pyc

deleted file mode 100755 (executable)

index 87a0630..0000000

Binary files a/nlp_resource_data/nltk/chat/zen.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chunk/__init__.py b/nlp_resource_data/nltk/chunk/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 8520202..3ec1e19
--- a/nlp_resource_data/nltk/chunk/__init__.py
+++ b/nlp_resource_data/nltk/chunk/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Chunkers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -155,14 +155,23 @@ zero-length assertions).
  from nltk.data import load
  
  from nltk.chunk.api import ChunkParserI
-from nltk.chunk.util import (ChunkScore, accuracy, tagstr2tree, conllstr2tree,
-                             conlltags2tree, tree2conlltags, tree2conllstr, tree2conlltags,
-                             ieerstr2tree)
+from nltk.chunk.util import (
+    ChunkScore,
+    accuracy,
+    tagstr2tree,
+    conllstr2tree,
+    conlltags2tree,
+    tree2conlltags,
+    tree2conllstr,
+    tree2conlltags,
+    ieerstr2tree,
+)
  from nltk.chunk.regexp import RegexpChunkParser, RegexpParser
  
  # Standard treebank POS tagger
-_BINARY_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_binary.pickle'
-_MULTICLASS_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_multiclass.pickle'
+_BINARY_NE_CHUNKER = "chunkers/maxent_ne_chunker/english_ace_binary.pickle"
+_MULTICLASS_NE_CHUNKER = "chunkers/maxent_ne_chunker/english_ace_multiclass.pickle"
+
  
  def ne_chunk(tagged_tokens, binary=False):
      """
@@ -176,6 +185,7 @@ def ne_chunk(tagged_tokens, binary=False):
      chunker = load(chunker_pickle)
      return chunker.parse(tagged_tokens)
  
+
  def ne_chunk_sents(tagged_sentences, binary=False):
      """
      Use NLTK's currently recommended named entity chunker to chunk the
@@ -187,4 +197,3 @@ def ne_chunk_sents(tagged_sentences, binary=False):
          chunker_pickle = _MULTICLASS_NE_CHUNKER
      chunker = load(chunker_pickle)
      return chunker.parse_sents(tagged_sentences)
-
diff --git a/nlp_resource_data/nltk/chunk/__init__.pyc b/nlp_resource_data/nltk/chunk/__init__.pyc

deleted file mode 100755 (executable)

index 8ad6b0d..0000000

Binary files a/nlp_resource_data/nltk/chunk/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chunk/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9e922d0

Binary files /dev/null and b/nlp_resource_data/nltk/chunk/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chunk/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f9acf6d

Binary files /dev/null and b/nlp_resource_data/nltk/chunk/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chunk/__pycache__/named_entity.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/named_entity.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fa26906

Binary files /dev/null and b/nlp_resource_data/nltk/chunk/__pycache__/named_entity.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chunk/__pycache__/regexp.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/regexp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..48374fb

Binary files /dev/null and b/nlp_resource_data/nltk/chunk/__pycache__/regexp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chunk/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f9a8212

Binary files /dev/null and b/nlp_resource_data/nltk/chunk/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/chunk/api.py b/nlp_resource_data/nltk/chunk/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 5e41f7a..1e4af77
--- a/nlp_resource_data/nltk/chunk/api.py
+++ b/nlp_resource_data/nltk/chunk/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Chunk parsing API
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
@@ -14,6 +14,7 @@ from nltk.parse import ParserI
  
  from nltk.chunk.util import ChunkScore
  
+
  class ChunkParserI(ParserI):
      """
      A processing interface for identifying non-overlapping groups in
@@ -22,6 +23,7 @@ class ChunkParserI(ParserI):
      ``ParserI``, ``ChunkParserI`` guarantees that the ``parse()`` method
      will always generate a parse.
      """
+
      def parse(self, tokens):
          """
          Return the best chunk structure for the given tokens
@@ -48,4 +50,3 @@ class ChunkParserI(ParserI):
          for correct in gold:
              chunkscore.score(correct, self.parse(correct.leaves()))
          return chunkscore
-
diff --git a/nlp_resource_data/nltk/chunk/api.pyc b/nlp_resource_data/nltk/chunk/api.pyc

deleted file mode 100755 (executable)

index 86a759a..0000000

Binary files a/nlp_resource_data/nltk/chunk/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chunk/named_entity.py b/nlp_resource_data/nltk/chunk/named_entity.py

old mode 100755 (executable)

new mode 100644 (file)

index 9867b0a..863ee99
--- a/nlp_resource_data/nltk/chunk/named_entity.py
+++ b/nlp_resource_data/nltk/chunk/named_entity.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Chunk parsing API
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -8,7 +8,6 @@
  """
  Named entity chunker
  """
-from __future__ import print_function
  
  import os, re, pickle
  from xml.etree import ElementTree as ET
@@ -27,26 +26,29 @@ from nltk.data import find
  from nltk.chunk.api import ChunkParserI
  from nltk.chunk.util import ChunkScore
  
+
  class NEChunkParserTagger(ClassifierBasedTagger):
      """
      The IOB tagger used by the chunk parser.
      """
+
      def __init__(self, train):
          ClassifierBasedTagger.__init__(
-            self, train=train,
-            classifier_builder=self._classifier_builder)
+            self, train=train, classifier_builder=self._classifier_builder
+        )
  
      def _classifier_builder(self, train):
-        return MaxentClassifier.train(train, algorithm='megam',
-                                           gaussian_prior_sigma=1,
-                                           trace=2)
+        return MaxentClassifier.train(
+            train, algorithm="megam", gaussian_prior_sigma=1, trace=2
+        )
  
      def _english_wordlist(self):
          try:
              wl = self._en_wordlist
          except AttributeError:
              from nltk.corpus import words
-            self._en_wordlist = set(words.words('en-basic'))
+
+            self._en_wordlist = set(words.words("en-basic"))
              wl = self._en_wordlist
          return wl
  
@@ -58,60 +60,62 @@ class NEChunkParserTagger(ClassifierBasedTagger):
              prevpos = prevprevpos = None
              prevshape = prevtag = prevprevtag = None
          elif index == 1:
-            prevword = tokens[index-1][0].lower()
+            prevword = tokens[index - 1][0].lower()
              prevprevword = None
-            prevpos = simplify_pos(tokens[index-1][1])
+            prevpos = simplify_pos(tokens[index - 1][1])
              prevprevpos = None
-            prevtag = history[index-1][0]
+            prevtag = history[index - 1][0]
              prevshape = prevprevtag = None
          else:
-            prevword = tokens[index-1][0].lower()
-            prevprevword = tokens[index-2][0].lower()
-            prevpos = simplify_pos(tokens[index-1][1])
-            prevprevpos = simplify_pos(tokens[index-2][1])
-            prevtag = history[index-1]
-            prevprevtag = history[index-2]
+            prevword = tokens[index - 1][0].lower()
+            prevprevword = tokens[index - 2][0].lower()
+            prevpos = simplify_pos(tokens[index - 1][1])
+            prevprevpos = simplify_pos(tokens[index - 2][1])
+            prevtag = history[index - 1]
+            prevprevtag = history[index - 2]
              prevshape = shape(prevword)
-        if index == len(tokens)-1:
+        if index == len(tokens) - 1:
              nextword = nextnextword = None
              nextpos = nextnextpos = None
-        elif index == len(tokens)-2:
-            nextword = tokens[index+1][0].lower()
-            nextpos = tokens[index+1][1].lower()
+        elif index == len(tokens) - 2:
+            nextword = tokens[index + 1][0].lower()
+            nextpos = tokens[index + 1][1].lower()
              nextnextword = None
              nextnextpos = None
          else:
-            nextword = tokens[index+1][0].lower()
-            nextpos = tokens[index+1][1].lower()
-            nextnextword = tokens[index+2][0].lower()
-            nextnextpos = tokens[index+2][1].lower()
+            nextword = tokens[index + 1][0].lower()
+            nextpos = tokens[index + 1][1].lower()
+            nextnextword = tokens[index + 2][0].lower()
+            nextnextpos = tokens[index + 2][1].lower()
  
          # 89.6
          features = {
-            'bias': True,
-            'shape': shape(word),
-            'wordlen': len(word),
-            'prefix3': word[:3].lower(),
-            'suffix3': word[-3:].lower(),
-            'pos': pos,
-            'word': word,
-            'en-wordlist': (word in self._english_wordlist()),
-            'prevtag': prevtag,
-            'prevpos': prevpos,
-            'nextpos': nextpos,
-            'prevword': prevword,
-            'nextword': nextword,
-            'word+nextpos': '{0}+{1}'.format(word.lower(), nextpos),
-            'pos+prevtag': '{0}+{1}'.format(pos, prevtag),
-            'shape+prevtag': '{0}+{1}'.format(prevshape, prevtag),
-            }
+            "bias": True,
+            "shape": shape(word),
+            "wordlen": len(word),
+            "prefix3": word[:3].lower(),
+            "suffix3": word[-3:].lower(),
+            "pos": pos,
+            "word": word,
+            "en-wordlist": (word in self._english_wordlist()),
+            "prevtag": prevtag,
+            "prevpos": prevpos,
+            "nextpos": nextpos,
+            "prevword": prevword,
+            "nextword": nextword,
+            "word+nextpos": "{0}+{1}".format(word.lower(), nextpos),
+            "pos+prevtag": "{0}+{1}".format(pos, prevtag),
+            "shape+prevtag": "{0}+{1}".format(prevshape, prevtag),
+        }
  
          return features
  
+
  class NEChunkParser(ChunkParserI):
      """
      Expected input: list of pos-tagged words
      """
+
      def __init__(self, train):
          self._train(train)
  
@@ -133,16 +137,15 @@ class NEChunkParser(ChunkParserI):
          """
          Convert a list of tagged tokens to a chunk-parse tree.
          """
-        sent = Tree('S', [])
+        sent = Tree("S", [])
  
-        for (tok,tag) in tagged_tokens:
-            if tag == 'O':
+        for (tok, tag) in tagged_tokens:
+            if tag == "O":
                  sent.append(tok)
-            elif tag.startswith('B-'):
+            elif tag.startswith("B-"):
                  sent.append(Tree(tag[2:], [tok]))
-            elif tag.startswith('I-'):
-                if (sent and isinstance(sent[-1], Tree) and
-                    sent[-1].label() == tag[2:]):
+            elif tag.startswith("I-"):
+                if sent and isinstance(sent[-1], Tree) and sent[-1].label() == tag[2:]:
                      sent[-1].append(tok)
                  else:
                      sent.append(Tree(tag[2:], [tok]))
@@ -159,110 +162,124 @@ class NEChunkParser(ChunkParserI):
                  if len(child) == 0:
                      print("Warning -- empty chunk in sentence")
                      continue
-                toks.append((child[0], 'B-{0}'.format(child.label())))
+                toks.append((child[0], "B-{0}".format(child.label())))
                  for tok in child[1:]:
-                    toks.append((tok, 'I-{0}'.format(child.label())))
+                    toks.append((tok, "I-{0}".format(child.label())))
              else:
-                toks.append((child, 'O'))
+                toks.append((child, "O"))
          return toks
  
+
  def shape(word):
-    if re.match('[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$', word, re.UNICODE):
-        return 'number'
-    elif re.match('\W+$', word, re.UNICODE):
-        return 'punct'
-    elif re.match('\w+$', word, re.UNICODE):
+    if re.match("[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$", word, re.UNICODE):
+        return "number"
+    elif re.match("\W+$", word, re.UNICODE):
+        return "punct"
+    elif re.match("\w+$", word, re.UNICODE):
          if word.istitle():
-            return 'upcase'
+            return "upcase"
          elif word.islower():
-            return 'downcase'
+            return "downcase"
          else:
-            return 'mixedcase'
+            return "mixedcase"
      else:
-        return 'other'
+        return "other"
+
  
  def simplify_pos(s):
-    if s.startswith('V'): return "V"
-    else: return s.split('-')[0]
+    if s.startswith("V"):
+        return "V"
+    else:
+        return s.split("-")[0]
+
  
  def postag_tree(tree):
      # Part-of-speech tagging.
      words = tree.leaves()
      tag_iter = (pos for (word, pos) in pos_tag(words))
-    newtree = Tree('S', [])
+    newtree = Tree("S", [])
      for child in tree:
          if isinstance(child, Tree):
              newtree.append(Tree(child.label(), []))
              for subchild in child:
-                newtree[-1].append( (subchild, next(tag_iter)) )
+                newtree[-1].append((subchild, next(tag_iter)))
          else:
-            newtree.append( (child, next(tag_iter)) )
+            newtree.append((child, next(tag_iter)))
      return newtree
  
-def load_ace_data(roots, fmt='binary', skip_bnews=True):
+
+def load_ace_data(roots, fmt="binary", skip_bnews=True):
      for root in roots:
          for root, dirs, files in os.walk(root):
-            if root.endswith('bnews') and skip_bnews:
+            if root.endswith("bnews") and skip_bnews:
                  continue
              for f in files:
-                if f.endswith('.sgm'):
+                if f.endswith(".sgm"):
                      for sent in load_ace_file(os.path.join(root, f), fmt):
                          yield sent
  
+
  def load_ace_file(textfile, fmt):
-    print('  - {0}'.format(os.path.split(textfile)[1]))
-    annfile = textfile+'.tmx.rdc.xml'
+    print("  - {0}".format(os.path.split(textfile)[1]))
+    annfile = textfile + ".tmx.rdc.xml"
  
      # Read the xml file, and get a list of entities
      entities = []
-    with open(annfile, 'r') as infile:
+    with open(annfile, "r") as infile:
          xml = ET.parse(infile).getroot()
-    for entity in xml.findall('document/entity'):
-        typ = entity.find('entity_type').text
-        for mention in entity.findall('entity_mention'):
-            if mention.get('TYPE') != 'NAME': continue # only NEs
-            s = int(mention.find('head/charseq/start').text)
-            e = int(mention.find('head/charseq/end').text)+1
-            entities.append( (s, e, typ) )
+    for entity in xml.findall("document/entity"):
+        typ = entity.find("entity_type").text
+        for mention in entity.findall("entity_mention"):
+            if mention.get("TYPE") != "NAME":
+                continue  # only NEs
+            s = int(mention.find("head/charseq/start").text)
+            e = int(mention.find("head/charseq/end").text) + 1
+            entities.append((s, e, typ))
  
      # Read the text file, and mark the entities.
-    with open(textfile, 'r') as infile:
+    with open(textfile, "r") as infile:
          text = infile.read()
  
      # Strip XML tags, since they don't count towards the indices
-    text = re.sub('<(?!/?TEXT)[^>]+>', '', text)
+    text = re.sub("<(?!/?TEXT)[^>]+>", "", text)
  
      # Blank out anything before/after <TEXT>
-    def subfunc(m): return ' '*(m.end()-m.start()-6)
-    text = re.sub('[\s\S]*<TEXT>', subfunc, text)
-    text = re.sub('</TEXT>[\s\S]*', '', text)
+    def subfunc(m):
+        return " " * (m.end() - m.start() - 6)
+
+    text = re.sub("[\s\S]*<TEXT>", subfunc, text)
+    text = re.sub("</TEXT>[\s\S]*", "", text)
  
      # Simplify quotes
      text = re.sub("``", ' "', text)
      text = re.sub("''", '" ', text)
  
-    entity_types = set(typ for (s,e,typ) in entities)
+    entity_types = set(typ for (s, e, typ) in entities)
  
      # Binary distinction (NE or not NE)
-    if fmt == 'binary':
+    if fmt == "binary":
          i = 0
-        toks = Tree('S', [])
-        for (s,e,typ) in sorted(entities):
-            if s < i: s = i # Overlapping!  Deal with this better?
-            if e <= s: continue
+        toks = Tree("S", [])
+        for (s, e, typ) in sorted(entities):
+            if s < i:
+                s = i  # Overlapping!  Deal with this better?
+            if e <= s:
+                continue
              toks.extend(word_tokenize(text[i:s]))
-            toks.append(Tree('NE', text[s:e].split()))
+            toks.append(Tree("NE", text[s:e].split()))
              i = e
          toks.extend(word_tokenize(text[i:]))
          yield toks
  
      # Multiclass distinction (NE type)
-    elif fmt == 'multiclass':
+    elif fmt == "multiclass":
          i = 0
-        toks = Tree('S', [])
-        for (s,e,typ) in sorted(entities):
-            if s < i: s = i # Overlapping!  Deal with this better?
-            if e <= s: continue
+        toks = Tree("S", [])
+        for (s, e, typ) in sorted(entities):
+            if s < i:
+                s = i  # Overlapping!  Deal with this better?
+            if e <= s:
+                continue
              toks.extend(word_tokenize(text[i:s]))
              toks.append(Tree(typ, text[s:e].split()))
              i = e
@@ -270,7 +287,8 @@ def load_ace_file(textfile, fmt):
          yield toks
  
      else:
-        raise ValueError('bad fmt value')
+        raise ValueError("bad fmt value")
+
  
  # This probably belongs in a more general-purpose location (as does
  # the parse_to_tagged function).
@@ -279,53 +297,56 @@ def cmp_chunks(correct, guessed):
      guessed = NEChunkParser._parse_to_tagged(guessed)
      ellipsis = False
      for (w, ct), (w, gt) in zip(correct, guessed):
-        if ct == gt == 'O':
+        if ct == gt == "O":
              if not ellipsis:
                  print("  {:15} {:15} {2}".format(ct, gt, w))
-                print('  {:15} {:15} {2}'.format('...', '...', '...'))
+                print("  {:15} {:15} {2}".format("...", "...", "..."))
                  ellipsis = True
          else:
              ellipsis = False
              print("  {:15} {:15} {2}".format(ct, gt, w))
  
-def build_model(fmt='binary'):
-    print('Loading training data...')
-    train_paths = [find('corpora/ace_data/ace.dev'),
-                   find('corpora/ace_data/ace.heldout'),
-                   find('corpora/ace_data/bbn.dev'),
-                   find('corpora/ace_data/muc.dev')]
+
+def build_model(fmt="binary"):
+    print("Loading training data...")
+    train_paths = [
+        find("corpora/ace_data/ace.dev"),
+        find("corpora/ace_data/ace.heldout"),
+        find("corpora/ace_data/bbn.dev"),
+        find("corpora/ace_data/muc.dev"),
+    ]
      train_trees = load_ace_data(train_paths, fmt)
      train_data = [postag_tree(t) for t in train_trees]
-    print('Training...')
+    print("Training...")
      cp = NEChunkParser(train_data)
      del train_data
  
-    print('Loading eval data...')
-    eval_paths = [find('corpora/ace_data/ace.eval')]
+    print("Loading eval data...")
+    eval_paths = [find("corpora/ace_data/ace.eval")]
      eval_trees = load_ace_data(eval_paths, fmt)
      eval_data = [postag_tree(t) for t in eval_trees]
  
-    print('Evaluating...')
+    print("Evaluating...")
      chunkscore = ChunkScore()
      for i, correct in enumerate(eval_data):
          guess = cp.parse(correct.leaves())
          chunkscore.score(correct, guess)
-        if i < 3: cmp_chunks(correct, guess)
+        if i < 3:
+            cmp_chunks(correct, guess)
      print(chunkscore)
  
-    outfilename = '/tmp/ne_chunker_{0}.pickle'.format(fmt)
-    print('Saving chunker to {0}...'.format(outfilename))
+    outfilename = "/tmp/ne_chunker_{0}.pickle".format(fmt)
+    print("Saving chunker to {0}...".format(outfilename))
  
-    with open(outfilename, 'wb') as outfile:
+    with open(outfilename, "wb") as outfile:
          pickle.dump(cp, outfile, -1)
  
      return cp
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      # Make sure that the pickled object has the right class name:
      from nltk.chunk.named_entity import build_model
  
-    build_model('binary')
-    build_model('multiclass')
-
+    build_model("binary")
+    build_model("multiclass")
diff --git a/nlp_resource_data/nltk/chunk/named_entity.pyc b/nlp_resource_data/nltk/chunk/named_entity.pyc

deleted file mode 100755 (executable)

index d8feefb..0000000

Binary files a/nlp_resource_data/nltk/chunk/named_entity.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chunk/regexp.py b/nlp_resource_data/nltk/chunk/regexp.py

old mode 100755 (executable)

new mode 100644 (file)

index 63855b0..f0e910c
--- a/nlp_resource_data/nltk/chunk/regexp.py
+++ b/nlp_resource_data/nltk/chunk/regexp.py
@@ -1,26 +1,21 @@
  # Natural Language Toolkit: Regular Expression Chunkers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
-from __future__ import division
  
  import re
  
-from six import string_types
-
  from nltk.tree import Tree
  from nltk.chunk.api import ChunkParserI
-from nltk.compat import python_2_unicode_compatible, unicode_repr
  
  ##//////////////////////////////////////////////////////
  ##  ChunkString
  ##//////////////////////////////////////////////////////
  
-@python_2_unicode_compatible
+
  class ChunkString(object):
      """
      A string-based encoding of a particular chunking of a text.
@@ -58,18 +53,19 @@ class ChunkString(object):
      :cvar IN_CHINK_PATTERN: A zero-width regexp pattern string that
          will only match positions that are in chinks.
      """
-    CHUNK_TAG_CHAR = r'[^\{\}<>]'
-    CHUNK_TAG = r'(<%s+?>)' % CHUNK_TAG_CHAR
  
-    IN_CHUNK_PATTERN = r'(?=[^\{]*\})'
-    IN_CHINK_PATTERN = r'(?=[^\}]*(\{|$))'
+    CHUNK_TAG_CHAR = r"[^\{\}<>]"
+    CHUNK_TAG = r"(<%s+?>)" % CHUNK_TAG_CHAR
+
+    IN_CHUNK_PATTERN = r"(?=[^\{]*\})"
+    IN_CHINK_PATTERN = r"(?=[^\}]*(\{|$))"
  
      # These are used by _verify
-    _CHUNK = r'(\{%s+?\})+?' % CHUNK_TAG
-    _CHINK = r'(%s+?)+?' % CHUNK_TAG
-    _VALID = re.compile(r'^(\{?%s\}?)*?$' % CHUNK_TAG)
-    _BRACKETS = re.compile('[^\{\}]+')
-    _BALANCED_BRACKETS = re.compile(r'(\{\})*$')
+    _CHUNK = r"(\{%s+?\})+?" % CHUNK_TAG
+    _CHINK = r"(%s+?)+?" % CHUNK_TAG
+    _VALID = re.compile(r"^(\{?%s\}?)*?$" % CHUNK_TAG)
+    _BRACKETS = re.compile("[^\{\}]+")
+    _BALANCED_BRACKETS = re.compile(r"(\{\})*$")
  
      def __init__(self, chunk_struct, debug_level=1):
          """
@@ -95,7 +91,7 @@ class ChunkString(object):
          self._root_label = chunk_struct.label()
          self._pieces = chunk_struct[:]
          tags = [self._tag(tok) for tok in self._pieces]
-        self._str = '<' + '><'.join(tags) + '>'
+        self._str = "<" + "><".join(tags) + ">"
          self._debug = debug_level
  
      def _tag(self, tok):
@@ -104,8 +100,7 @@ class ChunkString(object):
          elif isinstance(tok, Tree):
              return tok.label()
          else:
-            raise ValueError('chunk structures must contain tagged '
-                             'tokens or trees')
+            raise ValueError("chunk structures must contain tagged " "tokens or trees")
  
      def _verify(self, s, verify_tags):
          """
@@ -125,28 +120,32 @@ class ChunkString(object):
          """
          # Check overall form
          if not ChunkString._VALID.match(s):
-            raise ValueError('Transformation generated invalid '
-                             'chunkstring:\n  %s' % s)
+            raise ValueError(
+                "Transformation generated invalid " "chunkstring:\n  %s" % s
+            )
  
          # Check that parens are balanced.  If the string is long, we
          # have to do this in pieces, to avoid a maximum recursion
          # depth limit for regular expressions.
-        brackets = ChunkString._BRACKETS.sub('', s)
+        brackets = ChunkString._BRACKETS.sub("", s)
          for i in range(1 + len(brackets) // 5000):
-            substr = brackets[i*5000:i*5000+5000]
+            substr = brackets[i * 5000 : i * 5000 + 5000]
              if not ChunkString._BALANCED_BRACKETS.match(substr):
-                raise ValueError('Transformation generated invalid '
-                                 'chunkstring:\n  %s' % s)
+                raise ValueError(
+                    "Transformation generated invalid " "chunkstring:\n  %s" % s
+                )
  
-        if verify_tags<=0: return
+        if verify_tags <= 0:
+            return
  
-        tags1 = (re.split(r'[\{\}<>]+', s))[1:-1]
+        tags1 = (re.split(r"[\{\}<>]+", s))[1:-1]
          tags2 = [self._tag(piece) for piece in self._pieces]
          if tags1 != tags2:
-            raise ValueError('Transformation generated invalid '
-                             'chunkstring: tag changed')
+            raise ValueError(
+                "Transformation generated invalid " "chunkstring: tag changed"
+            )
  
-    def to_chunkstruct(self, chunk_label='CHUNK'):
+    def to_chunkstruct(self, chunk_label="CHUNK"):
          """
          Return the chunk structure encoded by this ``ChunkString``.
  
@@ -154,17 +153,18 @@ class ChunkString(object):
          :raise ValueError: If a transformation has generated an
              invalid chunkstring.
          """
-        if self._debug > 0: self._verify(self._str, 1)
+        if self._debug > 0:
+            self._verify(self._str, 1)
  
          # Use this alternating list to create the chunkstruct.
          pieces = []
          index = 0
          piece_in_chunk = 0
-        for piece in re.split('[{}]', self._str):
+        for piece in re.split("[{}]", self._str):
  
              # Find the list of tokens contained in this piece.
-            length = piece.count('<')
-            subsequence = self._pieces[index:index+length]
+            length = piece.count("<")
+            subsequence = self._pieces[index : index + length]
  
              # Add this list of tokens to our pieces.
              if piece_in_chunk:
@@ -209,10 +209,11 @@ class ChunkString(object):
          # The substitution might have generated "empty chunks"
          # (substrings of the form "{}").  Remove them, so they don't
          # interfere with other transformations.
-        s = re.sub('\{\}', '', s)
+        s = re.sub("\{\}", "", s)
  
          # Make sure that the transformation was legal.
-        if self._debug > 1: self._verify(s, self._debug-2)
+        if self._debug > 1:
+            self._verify(s, self._debug - 2)
  
          # Commit the transformation.
          self._str = s
@@ -226,7 +227,7 @@ class ChunkString(object):
  
          :rtype: str
          """
-        return '<ChunkString: %s>' % unicode_repr(self._str)
+        return "<ChunkString: %s>" % repr(self._str)
  
      def __str__(self):
          """
@@ -238,16 +239,18 @@ class ChunkString(object):
         :rtype: str
          """
          # Add spaces to make everything line up.
-        str = re.sub(r'>(?!\})', r'> ', self._str)
-        str = re.sub(r'([^\{])<', r'\1 <', str)
-        if str[0] == '<': str = ' ' + str
+        str = re.sub(r">(?!\})", r"> ", self._str)
+        str = re.sub(r"([^\{])<", r"\1 <", str)
+        if str[0] == "<":
+            str = " " + str
          return str
  
+
  ##//////////////////////////////////////////////////////
  ##  Chunking Rules
  ##//////////////////////////////////////////////////////
  
-@python_2_unicode_compatible
+
  class RegexpChunkRule(object):
      """
      A rule specifying how to modify the chunking in a ``ChunkString``,
@@ -273,6 +276,7 @@ class RegexpChunkRule(object):
      of angle-bracket delimited tags.  Furthermore, this transformation
      may not result in nested or mismatched bracketing.
      """
+
      def __init__(self, regexp, repl, descr):
          """
          Construct a new RegexpChunkRule.
@@ -291,7 +295,7 @@ class RegexpChunkRule(object):
          :param descr: A short description of the purpose and/or effect
              of this rule.
          """
-        if isinstance(regexp, string_types):
+        if isinstance(regexp, str):
              regexp = re.compile(regexp)
          self._repl = repl
          self._descr = descr
@@ -333,8 +337,13 @@ class RegexpChunkRule(object):
  
          :rtype: str
          """
-        return ('<RegexpChunkRule: '+unicode_repr(self._regexp.pattern)+
-                '->'+unicode_repr(self._repl)+'>')
+        return (
+            "<RegexpChunkRule: "
+            + repr(self._regexp.pattern)
+            + "->"
+            + repr(self._repl)
+            + ">"
+        )
  
      @staticmethod
      def fromstring(s):
@@ -356,34 +365,33 @@ class RegexpChunkRule(object):
          <ChunkRule: '<DT>?<NN.*>+'>
          """
          # Split off the comment (but don't split on '\#')
-        m = re.match(r'(?P<rule>(\\.|[^#])*)(?P<comment>#.*)?', s)
-        rule = m.group('rule').strip()
-        comment = (m.group('comment') or '')[1:].strip()
+        m = re.match(r"(?P<rule>(\\.|[^#])*)(?P<comment>#.*)?", s)
+        rule = m.group("rule").strip()
+        comment = (m.group("comment") or "")[1:].strip()
  
          # Pattern bodies: chunk, chink, split, merge
          try:
              if not rule:
-                raise ValueError('Empty chunk pattern')
-            if rule[0] == '{' and rule[-1] == '}':
+                raise ValueError("Empty chunk pattern")
+            if rule[0] == "{" and rule[-1] == "}":
                  return ChunkRule(rule[1:-1], comment)
-            elif rule[0] == '}' and rule[-1] == '{':
+            elif rule[0] == "}" and rule[-1] == "{":
                  return ChinkRule(rule[1:-1], comment)
-            elif '}{' in rule:
-                left, right = rule.split('}{')
+            elif "}{" in rule:
+                left, right = rule.split("}{")
                  return SplitRule(left, right, comment)
-            elif '{}' in rule:
-                left, right = rule.split('{}')
+            elif "{}" in rule:
+                left, right = rule.split("{}")
                  return MergeRule(left, right, comment)
-            elif re.match('[^{}]*{[^{}]*}[^{}]*', rule):
-                left, chunk, right = re.split('[{}]', rule)
+            elif re.match("[^{}]*{[^{}]*}[^{}]*", rule):
+                left, chunk, right = re.split("[{}]", rule)
                  return ChunkRuleWithContext(left, chunk, right, comment)
              else:
-                raise ValueError('Illegal chunk pattern: %s' % rule)
+                raise ValueError("Illegal chunk pattern: %s" % rule)
          except (ValueError, re.error):
-            raise ValueError('Illegal chunk pattern: %s' % rule)
+            raise ValueError("Illegal chunk pattern: %s" % rule)
  
  
-@python_2_unicode_compatible
  class ChunkRule(RegexpChunkRule):
      """
      A rule specifying how to add chunks to a ``ChunkString``, using a
@@ -392,6 +400,7 @@ class ChunkRule(RegexpChunkRule):
      already part of a chunk, and create a new chunk containing that
      substring.
      """
+
      def __init__(self, tag_pattern, descr):
  
          """
@@ -407,10 +416,11 @@ class ChunkRule(RegexpChunkRule):
              of this rule.
          """
          self._pattern = tag_pattern
-        regexp = re.compile('(?P<chunk>%s)%s' %
-                            (tag_pattern2re_pattern(tag_pattern),
-                             ChunkString.IN_CHINK_PATTERN))
-        RegexpChunkRule.__init__(self, regexp, '{\g<chunk>}', descr)
+        regexp = re.compile(
+            "(?P<chunk>%s)%s"
+            % (tag_pattern2re_pattern(tag_pattern), ChunkString.IN_CHINK_PATTERN)
+        )
+        RegexpChunkRule.__init__(self, regexp, "{\g<chunk>}", descr)
  
      def __repr__(self):
          """
@@ -424,9 +434,9 @@ class ChunkRule(RegexpChunkRule):
  
          :rtype: str
          """
-        return '<ChunkRule: '+unicode_repr(self._pattern)+'>'
+        return "<ChunkRule: " + repr(self._pattern) + ">"
+
  
-@python_2_unicode_compatible
  class ChinkRule(RegexpChunkRule):
      """
      A rule specifying how to remove chinks to a ``ChunkString``,
@@ -435,6 +445,7 @@ class ChinkRule(RegexpChunkRule):
      tag pattern and that is contained in a chunk, and remove it
      from that chunk, thus creating two new chunks.
      """
+
      def __init__(self, tag_pattern, descr):
          """
          Construct a new ``ChinkRule``.
@@ -450,10 +461,11 @@ class ChinkRule(RegexpChunkRule):
              of this rule.
          """
          self._pattern = tag_pattern
-        regexp = re.compile('(?P<chink>%s)%s' %
-                            (tag_pattern2re_pattern(tag_pattern),
-                             ChunkString.IN_CHUNK_PATTERN))
-        RegexpChunkRule.__init__(self, regexp, '}\g<chink>{', descr)
+        regexp = re.compile(
+            "(?P<chink>%s)%s"
+            % (tag_pattern2re_pattern(tag_pattern), ChunkString.IN_CHUNK_PATTERN)
+        )
+        RegexpChunkRule.__init__(self, regexp, "}\g<chink>{", descr)
  
      def __repr__(self):
          """
@@ -467,10 +479,9 @@ class ChinkRule(RegexpChunkRule):
  
          :rtype: str
          """
-        return '<ChinkRule: '+unicode_repr(self._pattern)+'>'
+        return "<ChinkRule: " + repr(self._pattern) + ">"
  
  
-@python_2_unicode_compatible
  class UnChunkRule(RegexpChunkRule):
      """
      A rule specifying how to remove chunks to a ``ChunkString``,
@@ -478,6 +489,7 @@ class UnChunkRule(RegexpChunkRule):
      ``ChunkString``, it will find any complete chunk that matches this
      tag pattern, and un-chunk it.
      """
+
      def __init__(self, tag_pattern, descr):
          """
          Construct a new ``UnChunkRule``.
@@ -492,9 +504,8 @@ class UnChunkRule(RegexpChunkRule):
              of this rule.
          """
          self._pattern = tag_pattern
-        regexp = re.compile('\{(?P<chunk>%s)\}' %
-                            tag_pattern2re_pattern(tag_pattern))
-        RegexpChunkRule.__init__(self, regexp, '\g<chunk>', descr)
+        regexp = re.compile("\{(?P<chunk>%s)\}" % tag_pattern2re_pattern(tag_pattern))
+        RegexpChunkRule.__init__(self, regexp, "\g<chunk>", descr)
  
      def __repr__(self):
          """
@@ -508,10 +519,9 @@ class UnChunkRule(RegexpChunkRule):
  
          :rtype: str
          """
-        return '<UnChunkRule: '+unicode_repr(self._pattern)+'>'
+        return "<UnChunkRule: " + repr(self._pattern) + ">"
  
  
-@python_2_unicode_compatible
  class MergeRule(RegexpChunkRule):
      """
      A rule specifying how to merge chunks in a ``ChunkString``, using
@@ -521,6 +531,7 @@ class MergeRule(RegexpChunkRule):
      beginning matches right pattern.  It will then merge those two
      chunks into a single chunk.
      """
+
      def __init__(self, left_tag_pattern, right_tag_pattern, descr):
          """
          Construct a new ``MergeRule``.
@@ -551,10 +562,14 @@ class MergeRule(RegexpChunkRule):
  
          self._left_tag_pattern = left_tag_pattern
          self._right_tag_pattern = right_tag_pattern
-        regexp = re.compile('(?P<left>%s)}{(?=%s)' %
-                            (tag_pattern2re_pattern(left_tag_pattern),
-                             tag_pattern2re_pattern(right_tag_pattern)))
-        RegexpChunkRule.__init__(self, regexp, '\g<left>', descr)
+        regexp = re.compile(
+            "(?P<left>%s)}{(?=%s)"
+            % (
+                tag_pattern2re_pattern(left_tag_pattern),
+                tag_pattern2re_pattern(right_tag_pattern),
+            )
+        )
+        RegexpChunkRule.__init__(self, regexp, "\g<left>", descr)
  
      def __repr__(self):
          """
@@ -568,11 +583,15 @@ class MergeRule(RegexpChunkRule):
  
          :rtype: str
          """
-        return ('<MergeRule: '+unicode_repr(self._left_tag_pattern)+', '+
-                unicode_repr(self._right_tag_pattern)+'>')
+        return (
+            "<MergeRule: "
+            + repr(self._left_tag_pattern)
+            + ", "
+            + repr(self._right_tag_pattern)
+            + ">"
+        )
  
  
-@python_2_unicode_compatible
  class SplitRule(RegexpChunkRule):
      """
      A rule specifying how to split chunks in a ``ChunkString``, using
@@ -582,6 +601,7 @@ class SplitRule(RegexpChunkRule):
      then split the chunk into two new chunks, at the point between the
      two pattern matches.
      """
+
      def __init__(self, left_tag_pattern, right_tag_pattern, descr):
          """
          Construct a new ``SplitRule``.
@@ -611,10 +631,14 @@ class SplitRule(RegexpChunkRule):
  
          self._left_tag_pattern = left_tag_pattern
          self._right_tag_pattern = right_tag_pattern
-        regexp = re.compile('(?P<left>%s)(?=%s)' %
-                            (tag_pattern2re_pattern(left_tag_pattern),
-                             tag_pattern2re_pattern(right_tag_pattern)))
-        RegexpChunkRule.__init__(self, regexp, r'\g<left>}{', descr)
+        regexp = re.compile(
+            "(?P<left>%s)(?=%s)"
+            % (
+                tag_pattern2re_pattern(left_tag_pattern),
+                tag_pattern2re_pattern(right_tag_pattern),
+            )
+        )
+        RegexpChunkRule.__init__(self, regexp, r"\g<left>}{", descr)
  
      def __repr__(self):
          """
@@ -628,11 +652,15 @@ class SplitRule(RegexpChunkRule):
  
         :rtype: str
          """
-        return ('<SplitRule: '+unicode_repr(self._left_tag_pattern)+', '+
-                unicode_repr(self._right_tag_pattern)+'>')
+        return (
+            "<SplitRule: "
+            + repr(self._left_tag_pattern)
+            + ", "
+            + repr(self._right_tag_pattern)
+            + ">"
+        )
  
  
-@python_2_unicode_compatible
  class ExpandLeftRule(RegexpChunkRule):
      """
      A rule specifying how to expand chunks in a ``ChunkString`` to the left,
@@ -642,6 +670,7 @@ class ExpandLeftRule(RegexpChunkRule):
      end matches left pattern.  It will then expand the chunk to incorporate
      the new material on the left.
      """
+
      def __init__(self, left_tag_pattern, right_tag_pattern, descr):
          """
          Construct a new ``ExpandRightRule``.
@@ -672,10 +701,14 @@ class ExpandLeftRule(RegexpChunkRule):
  
          self._left_tag_pattern = left_tag_pattern
          self._right_tag_pattern = right_tag_pattern
-        regexp = re.compile('(?P<left>%s)\{(?P<right>%s)' %
-                            (tag_pattern2re_pattern(left_tag_pattern),
-                             tag_pattern2re_pattern(right_tag_pattern)))
-        RegexpChunkRule.__init__(self, regexp, '{\g<left>\g<right>', descr)
+        regexp = re.compile(
+            "(?P<left>%s)\{(?P<right>%s)"
+            % (
+                tag_pattern2re_pattern(left_tag_pattern),
+                tag_pattern2re_pattern(right_tag_pattern),
+            )
+        )
+        RegexpChunkRule.__init__(self, regexp, "{\g<left>\g<right>", descr)
  
      def __repr__(self):
          """
@@ -689,11 +722,15 @@ class ExpandLeftRule(RegexpChunkRule):
  
          :rtype: str
          """
-        return ('<ExpandLeftRule: '+unicode_repr(self._left_tag_pattern)+', '+
-                unicode_repr(self._right_tag_pattern)+'>')
+        return (
+            "<ExpandLeftRule: "
+            + repr(self._left_tag_pattern)
+            + ", "
+            + repr(self._right_tag_pattern)
+            + ">"
+        )
  
  
-@python_2_unicode_compatible
  class ExpandRightRule(RegexpChunkRule):
      """
      A rule specifying how to expand chunks in a ``ChunkString`` to the
@@ -703,6 +740,7 @@ class ExpandRightRule(RegexpChunkRule):
      a chink whose beginning matches right pattern.  It will then
      expand the chunk to incorporate the new material on the right.
      """
+
      def __init__(self, left_tag_pattern, right_tag_pattern, descr):
          """
          Construct a new ``ExpandRightRule``.
@@ -733,10 +771,14 @@ class ExpandRightRule(RegexpChunkRule):
  
          self._left_tag_pattern = left_tag_pattern
          self._right_tag_pattern = right_tag_pattern
-        regexp = re.compile('(?P<left>%s)\}(?P<right>%s)' %
-                            (tag_pattern2re_pattern(left_tag_pattern),
-                             tag_pattern2re_pattern(right_tag_pattern)))
-        RegexpChunkRule.__init__(self, regexp, '\g<left>\g<right>}', descr)
+        regexp = re.compile(
+            "(?P<left>%s)\}(?P<right>%s)"
+            % (
+                tag_pattern2re_pattern(left_tag_pattern),
+                tag_pattern2re_pattern(right_tag_pattern),
+            )
+        )
+        RegexpChunkRule.__init__(self, regexp, "\g<left>\g<right>}", descr)
  
      def __repr__(self):
          """
@@ -750,11 +792,15 @@ class ExpandRightRule(RegexpChunkRule):
  
          :rtype: str
          """
-        return ('<ExpandRightRule: '+unicode_repr(self._left_tag_pattern)+', '+
-                unicode_repr(self._right_tag_pattern)+'>')
+        return (
+            "<ExpandRightRule: "
+            + repr(self._left_tag_pattern)
+            + ", "
+            + repr(self._right_tag_pattern)
+            + ">"
+        )
  
  
-@python_2_unicode_compatible
  class ChunkRuleWithContext(RegexpChunkRule):
      """
      A rule specifying how to add chunks to a ``ChunkString``, using
@@ -770,8 +816,14 @@ class ChunkRuleWithContext(RegexpChunkRule):
      rule matches; therefore, if you need to find overlapping matches,
      you will need to apply your rule more than once.
      """
-    def __init__(self, left_context_tag_pattern, chunk_tag_pattern,
-                 right_context_tag_pattern, descr):
+
+    def __init__(
+        self,
+        left_context_tag_pattern,
+        chunk_tag_pattern,
+        right_context_tag_pattern,
+        descr,
+    ):
          """
          Construct a new ``ChunkRuleWithContext``.
  
@@ -800,12 +852,16 @@ class ChunkRuleWithContext(RegexpChunkRule):
          self._left_context_tag_pattern = left_context_tag_pattern
          self._chunk_tag_pattern = chunk_tag_pattern
          self._right_context_tag_pattern = right_context_tag_pattern
-        regexp = re.compile('(?P<left>%s)(?P<chunk>%s)(?P<right>%s)%s' %
-                            (tag_pattern2re_pattern(left_context_tag_pattern),
-                             tag_pattern2re_pattern(chunk_tag_pattern),
-                             tag_pattern2re_pattern(right_context_tag_pattern),
-                             ChunkString.IN_CHINK_PATTERN))
-        replacement = r'\g<left>{\g<chunk>}\g<right>'
+        regexp = re.compile(
+            "(?P<left>%s)(?P<chunk>%s)(?P<right>%s)%s"
+            % (
+                tag_pattern2re_pattern(left_context_tag_pattern),
+                tag_pattern2re_pattern(chunk_tag_pattern),
+                tag_pattern2re_pattern(right_context_tag_pattern),
+                ChunkString.IN_CHINK_PATTERN,
+            )
+        )
+        replacement = r"\g<left>{\g<chunk>}\g<right>"
          RegexpChunkRule.__init__(self, regexp, replacement, descr)
  
      def __repr__(self):
@@ -820,9 +876,12 @@ class ChunkRuleWithContext(RegexpChunkRule):
  
          :rtype: str
          """
-        return '<ChunkRuleWithContext:  %r, %r, %r>' % (
-            self._left_context_tag_pattern, self._chunk_tag_pattern,
-            self._right_context_tag_pattern)
+        return "<ChunkRuleWithContext:  %r, %r, %r>" % (
+            self._left_context_tag_pattern,
+            self._chunk_tag_pattern,
+            self._right_context_tag_pattern,
+        )
+
  
  ##//////////////////////////////////////////////////////
  ##  Tag Pattern Format Conversion
@@ -830,12 +889,9 @@ class ChunkRuleWithContext(RegexpChunkRule):
  
  # this should probably be made more strict than it is -- e.g., it
  # currently accepts 'foo'.
-CHUNK_TAG_PATTERN = re.compile(r'^((%s|<%s>)*)$' %
-                                ('([^\{\}<>]|\{\d+,?\}|\{\d*,\d+\})+',
-                                 '[^\{\}<>]+'))
-
-
-
+CHUNK_TAG_PATTERN = re.compile(
+    r"^((%s|<%s>)*)$" % ("([^\{\}<>]|\{\d+,?\}|\{\d*,\d+\})+", "[^\{\}<>]+")
+)
  
  
  def tag_pattern2re_pattern(tag_pattern):
@@ -875,13 +931,13 @@ def tag_pattern2re_pattern(tag_pattern):
          ``tag_pattern``.
      """
      # Clean up the regular expression
-    tag_pattern = re.sub(r'\s', '', tag_pattern)
-    tag_pattern = re.sub(r'<', '(<(', tag_pattern)
-    tag_pattern = re.sub(r'>', ')>)', tag_pattern)
+    tag_pattern = re.sub(r"\s", "", tag_pattern)
+    tag_pattern = re.sub(r"<", "(<(", tag_pattern)
+    tag_pattern = re.sub(r">", ")>)", tag_pattern)
  
      # Check the regular expression
      if not CHUNK_TAG_PATTERN.match(tag_pattern):
-        raise ValueError('Bad tag pattern: %r' % tag_pattern)
+        raise ValueError("Bad tag pattern: %r" % tag_pattern)
  
      # Replace "." with CHUNK_TAG_CHAR.
      # We have to do this after, since it adds {}[]<>s, which would
@@ -892,10 +948,11 @@ def tag_pattern2re_pattern(tag_pattern):
      def reverse_str(str):
          lst = list(str)
          lst.reverse()
-        return ''.join(lst)
+        return "".join(lst)
+
      tc_rev = reverse_str(ChunkString.CHUNK_TAG_CHAR)
      reversed = reverse_str(tag_pattern)
-    reversed = re.sub(r'\.(?!\\(\\\\)*($|[^\\]))', tc_rev, reversed)
+    reversed = re.sub(r"\.(?!\\(\\\\)*($|[^\\]))", tc_rev, reversed)
      tag_pattern = reverse_str(reversed)
  
      return tag_pattern
@@ -905,7 +962,7 @@ def tag_pattern2re_pattern(tag_pattern):
  ##  RegexpChunkParser
  ##//////////////////////////////////////////////////////
  
-@python_2_unicode_compatible
+
  class RegexpChunkParser(ChunkParserI):
      """
      A regular expression based chunk parser.  ``RegexpChunkParser`` uses a
@@ -927,7 +984,8 @@ class RegexpChunkParser(ChunkParserI):
      :ivar _trace: The default level of tracing.
  
      """
-    def __init__(self, rules, chunk_label='NP', root_label='S', trace=0):
+
+    def __init__(self, rules, chunk_label="NP", root_label="S", trace=0):
          """
          Construct a new ``RegexpChunkParser``.
  
@@ -966,14 +1024,14 @@ class RegexpChunkParser(ChunkParserI):
          :param verbose: Whether output should be verbose.
          :rtype: None
          """
-        print('# Input:')
+        print("# Input:")
          print(chunkstr)
          for rule in self._rules:
              rule.apply(chunkstr)
              if verbose:
-                print('#', rule.descr()+' ('+unicode_repr(rule)+'):')
+                print("#", rule.descr() + " (" + repr(rule) + "):")
              else:
-                print('#', rule.descr()+':')
+                print("#", rule.descr() + ":")
              print(chunkstr)
  
      def _notrace_apply(self, chunkstr):
@@ -1009,7 +1067,7 @@ class RegexpChunkParser(ChunkParserI):
              used to define this ``RegexpChunkParser``.
          """
          if len(chunk_struct) == 0:
-            print('Warning: parsing empty text')
+            print("Warning: parsing empty text")
              return Tree(self._root_label, [])
  
          try:
@@ -1018,13 +1076,14 @@ class RegexpChunkParser(ChunkParserI):
              chunk_struct = Tree(self._root_label, chunk_struct)
  
          # Use the default trace value?
-        if trace is None: trace = self._trace
+        if trace is None:
+            trace = self._trace
  
          chunkstr = ChunkString(chunk_struct)
  
          # Apply the sequence of rules to the chunkstring.
          if trace:
-            verbose = (trace>1)
+            verbose = trace > 1
              self._trace_apply(chunkstr, verbose)
          else:
              self._notrace_apply(chunkstr)
@@ -1057,18 +1116,19 @@ class RegexpChunkParser(ChunkParserI):
          for rule in self._rules:
              margin = max(margin, len(rule.descr()))
          if margin < 35:
-            format = "    %" + repr(-(margin+3)) + "s%s\n"
+            format = "    %" + repr(-(margin + 3)) + "s%s\n"
          else:
              format = "    %s\n      %s\n"
          for rule in self._rules:
-            s += format % (rule.descr(), unicode_repr(rule))
+            s += format % (rule.descr(), repr(rule))
          return s[:-1]
  
+
  ##//////////////////////////////////////////////////////
  ##  Chunk Grammar
  ##//////////////////////////////////////////////////////
  
-@python_2_unicode_compatible
+
  class RegexpParser(ChunkParserI):
      """
      A grammar based chunk parser.  ``chunk.RegexpParser`` uses a set of
@@ -1110,7 +1170,8 @@ class RegexpParser(ChunkParserI):
      :ivar _stages: The list of parsing stages corresponding to the grammar
  
      """
-    def __init__(self, grammar, root_label='S', loop=1, trace=0):
+
+    def __init__(self, grammar, root_label="S", loop=1, trace=0):
          """
          Create a new chunk parser, from the given start state
          and set of chunk patterns.
@@ -1132,14 +1193,17 @@ class RegexpParser(ChunkParserI):
          self._grammar = grammar
          self._loop = loop
  
-        if isinstance(grammar, string_types):
+        if isinstance(grammar, str):
              self._read_grammar(grammar, root_label, trace)
          else:
              # Make sur the grammar looks like it has the right type:
-            type_err = ('Expected string or list of RegexpChunkParsers '
-                        'for the grammar.')
-            try: grammar = list(grammar)
-            except: raise TypeError(type_err)
+            type_err = (
+                "Expected string or list of RegexpChunkParsers " "for the grammar."
+            )
+            try:
+                grammar = list(grammar)
+            except:
+                raise TypeError(type_err)
              for elt in grammar:
                  if not isinstance(elt, RegexpChunkParser):
                      raise TypeError(type_err)
@@ -1152,21 +1216,22 @@ class RegexpParser(ChunkParserI):
          """
          rules = []
          lhs = None
-        for line in grammar.split('\n'):
+        for line in grammar.split("\n"):
              line = line.strip()
  
              # New stage begins if there's an unescaped ':'
-            m = re.match('(?P<nonterminal>(\\.|[^:])*)(:(?P<rule>.*))', line)
+            m = re.match("(?P<nonterminal>(\\.|[^:])*)(:(?P<rule>.*))", line)
              if m:
                  # Record the stage that we just completed.
                  self._add_stage(rules, lhs, root_label, trace)
                  # Start a new stage.
-                lhs = m.group('nonterminal').strip()
+                lhs = m.group("nonterminal").strip()
                  rules = []
-                line = m.group('rule').strip()
+                line = m.group("rule").strip()
  
              # Skip blank & comment-only lines
-            if line=='' or line.startswith('#'): continue
+            if line == "" or line.startswith("#"):
+                continue
  
              # Add the rule
              rules.append(RegexpChunkRule.fromstring(line))
@@ -1180,9 +1245,10 @@ class RegexpParser(ChunkParserI):
          """
          if rules != []:
              if not lhs:
-                raise ValueError('Expected stage marker (eg NP:)')
-            parser = RegexpChunkParser(rules, chunk_label=lhs,
-                                       root_label=root_label, trace=trace)
+                raise ValueError("Expected stage marker (eg NP:)")
+            parser = RegexpChunkParser(
+                rules, chunk_label=lhs, root_label=root_label, trace=trace
+            )
              self._stages.append(parser)
  
      def parse(self, chunk_struct, trace=None):
@@ -1202,7 +1268,8 @@ class RegexpParser(ChunkParserI):
          :return: the chunked output.
          :rtype: Tree
          """
-        if trace is None: trace = self._trace
+        if trace is None:
+            trace = self._trace
          for i in range(self._loop):
              for parser in self._stages:
                  chunk_struct = parser.parse(chunk_struct, trace=trace)
@@ -1227,10 +1294,12 @@ class RegexpParser(ChunkParserI):
              s += "%s\n" % parser
          return s[:-1]
  
+
  ##//////////////////////////////////////////////////////
  ##  Demonstration code
  ##//////////////////////////////////////////////////////
  
+
  def demo_eval(chunkparser, text):
      """
      Demonstration code for evaluating a chunk parser, using a
@@ -1254,45 +1323,46 @@ def demo_eval(chunkparser, text):
      # Evaluate our chunk parser.
      chunkscore = chunk.ChunkScore()
  
-    for sentence in text.split('\n'):
+    for sentence in text.split("\n"):
          print(sentence)
          sentence = sentence.strip()
-        if not sentence: continue
+        if not sentence:
+            continue
          gold = chunk.tagstr2tree(sentence)
          tokens = gold.leaves()
-        test = chunkparser.parse(Tree('S', tokens), trace=1)
+        test = chunkparser.parse(Tree("S", tokens), trace=1)
          chunkscore.score(gold, test)
          print()
  
-    print('/'+('='*75)+'\\')
-    print('Scoring', chunkparser)
-    print(('-'*77))
-    print('Precision: %5.1f%%' % (chunkscore.precision()*100), ' '*4, end=' ')
-    print('Recall: %5.1f%%' % (chunkscore.recall()*100), ' '*6, end=' ')
-    print('F-Measure: %5.1f%%' % (chunkscore.f_measure()*100))
-
+    print("/" + ("=" * 75) + "\\")
+    print("Scoring", chunkparser)
+    print(("-" * 77))
+    print("Precision: %5.1f%%" % (chunkscore.precision() * 100), " " * 4, end=" ")
+    print("Recall: %5.1f%%" % (chunkscore.recall() * 100), " " * 6, end=" ")
+    print("F-Measure: %5.1f%%" % (chunkscore.f_measure() * 100))
  
      # Missed chunks.
      if chunkscore.missed():
-        print('Missed:')
+        print("Missed:")
          missed = chunkscore.missed()
          for chunk in missed[:10]:
-            print('  ', ' '.join(map(str,chunk)))
+            print("  ", " ".join(map(str, chunk)))
          if len(chunkscore.missed()) > 10:
-            print('  ...')
+            print("  ...")
  
      # Incorrect chunks.
      if chunkscore.incorrect():
-        print('Incorrect:')
+        print("Incorrect:")
          incorrect = chunkscore.incorrect()
          for chunk in incorrect[:10]:
-            print('  ', ' '.join(map(str,chunk)))
+            print("  ", " ".join(map(str, chunk)))
          if len(chunkscore.incorrect()) > 10:
-            print('  ...')
+            print("  ...")
  
-    print('\\'+('='*75)+'/')
+    print("\\" + ("=" * 75) + "/")
      print()
  
+
  def demo():
      """
      A demonstration for the ``RegexpChunkParser`` class.  A single text is
@@ -1308,10 +1378,10 @@ def demo():
      [ John/NNP ] thinks/VBZ [ Mary/NN ] saw/VBD [ the/DT cat/NN ] sit/VB on/IN [ the/DT mat/NN ]./.
      """
  
-    print('*'*75)
-    print('Evaluation text:')
+    print("*" * 75)
+    print("Evaluation text:")
      print(text)
-    print('*'*75)
+    print("*" * 75)
      print()
  
      grammar = r"""
@@ -1348,7 +1418,7 @@ def demo():
      cp = chunk.RegexpParser(grammar)
      demo_eval(cp, text)
  
-# Evaluation
+    # Evaluation
  
      from nltk.corpus import conll2000
  
@@ -1356,8 +1426,7 @@ def demo():
      print("Demonstration of empty grammar:")
  
      cp = chunk.RegexpParser("")
-    print(chunk.accuracy(cp, conll2000.chunked_sents('test.txt',
-                                                     chunk_types=('NP',))))
+    print(chunk.accuracy(cp, conll2000.chunked_sents("test.txt", chunk_types=("NP",))))
  
      print()
      print("Demonstration of accuracy evaluation using CoNLL tags:")
@@ -1369,7 +1438,7 @@ def demo():
        <DT|JJ>{}<NN.*>     # merge det/adj with nouns
      """
      cp = chunk.RegexpParser(grammar)
-    print(chunk.accuracy(cp, conll2000.chunked_sents('test.txt')[:5]))
+    print(chunk.accuracy(cp, conll2000.chunked_sents("test.txt")[:5]))
  
      print()
      print("Demonstration of tagged token input")
@@ -1382,9 +1451,21 @@ def demo():
      VP: {<VB.*><NP|PP>*}    # VP = verb words + NPs and PPs
      """
      cp = chunk.RegexpParser(grammar)
-    print(cp.parse([("the","DT"), ("little","JJ"), ("cat", "NN"),
-                    ("sat", "VBD"), ("on", "IN"), ("the", "DT"),
-                    ("mat", "NN"), (".", ".")]))
-
-if __name__ == '__main__':
+    print(
+        cp.parse(
+            [
+                ("the", "DT"),
+                ("little", "JJ"),
+                ("cat", "NN"),
+                ("sat", "VBD"),
+                ("on", "IN"),
+                ("the", "DT"),
+                ("mat", "NN"),
+                (".", "."),
+            ]
+        )
+    )
+
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/chunk/regexp.pyc b/nlp_resource_data/nltk/chunk/regexp.pyc

deleted file mode 100755 (executable)

index 15a5aed..0000000

Binary files a/nlp_resource_data/nltk/chunk/regexp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/chunk/util.py b/nlp_resource_data/nltk/chunk/util.py

old mode 100755 (executable)

new mode 100644 (file)

index 0a99dc6..35ee79d
--- a/nlp_resource_data/nltk/chunk/util.py
+++ b/nlp_resource_data/nltk/chunk/util.py
@@ -1,24 +1,24 @@
  # Natural Language Toolkit: Chunk format conversions
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals, division
  
  import re
  
  from nltk.tree import Tree
  from nltk.tag.mapping import map_tag
  from nltk.tag.util import str2tuple
-from nltk.compat import python_2_unicode_compatible
  
  ##//////////////////////////////////////////////////////
  ## EVALUATION
  ##//////////////////////////////////////////////////////
  
  from nltk.metrics import accuracy as _accuracy
+
+
  def accuracy(chunker, gold):
      """
      Score the accuracy of the chunker against the gold standard.
@@ -39,8 +39,8 @@ def accuracy(chunker, gold):
          gold_tags += tree2conlltags(gold_tree)
          test_tags += tree2conlltags(test_tree)
  
-#    print 'GOLD:', gold_tags[:50]
-#    print 'TEST:', test_tags[:50]
+    #    print 'GOLD:', gold_tags[:50]
+    #    print 'TEST:', test_tags[:50]
      return _accuracy(gold_tags, test_tags)
  
  
@@ -111,16 +111,17 @@ class ChunkScore(object):
      :type _fn_num: int
      :ivar _fn_num: Number of false negatives.
      """
+
      def __init__(self, **kwargs):
          self._correct = set()
          self._guessed = set()
          self._tp = set()
          self._fp = set()
          self._fn = set()
-        self._max_tp = kwargs.get('max_tp_examples', 100)
-        self._max_fp = kwargs.get('max_fp_examples', 100)
-        self._max_fn = kwargs.get('max_fn_examples', 100)
-        self._chunk_label = kwargs.get('chunk_label', '.*')
+        self._max_tp = kwargs.get("max_tp_examples", 100)
+        self._max_fp = kwargs.get("max_fp_examples", 100)
+        self._max_fn = kwargs.get("max_fn_examples", 100)
+        self._chunk_label = kwargs.get("chunk_label", ".*")
          self._tp_num = 0
          self._fp_num = 0
          self._fn_num = 0
@@ -131,14 +132,14 @@ class ChunkScore(object):
          self._measuresNeedUpdate = False
  
      def _updateMeasures(self):
-        if (self._measuresNeedUpdate):
-           self._tp = self._guessed & self._correct
-           self._fn = self._correct - self._guessed
-           self._fp = self._guessed - self._correct
-           self._tp_num = len(self._tp)
-           self._fp_num = len(self._fp)
-           self._fn_num = len(self._fn)
-           self._measuresNeedUpdate = False
+        if self._measuresNeedUpdate:
+            self._tp = self._guessed & self._correct
+            self._fn = self._correct - self._guessed
+            self._fp = self._guessed - self._correct
+            self._tp_num = len(self._tp)
+            self._fp_num = len(self._fp)
+            self._fn_num = len(self._fn)
+            self._measuresNeedUpdate = False
  
      def score(self, correct, guessed):
          """
@@ -165,9 +166,9 @@ class ChunkScore(object):
              # is too deeply nested to be printed in CoNLL format."
              correct_tags = guessed_tags = ()
          self._tags_total += len(correct_tags)
-        self._tags_correct += sum(1 for (t,g) in zip(guessed_tags,
-                                                     correct_tags)
-                                  if t==g)
+        self._tags_correct += sum(
+            1 for (t, g) in zip(guessed_tags, correct_tags) if t == g
+        )
  
      def accuracy(self):
          """
@@ -177,8 +178,9 @@ class ChunkScore(object):
  
          :rtype: float
          """
-        if self._tags_total == 0: return 1
-        return self._tags_correct/self._tags_total
+        if self._tags_total == 0:
+            return 1
+        return self._tags_correct / self._tags_total
  
      def precision(self):
          """
@@ -189,8 +191,10 @@ class ChunkScore(object):
          """
          self._updateMeasures()
          div = self._tp_num + self._fp_num
-        if div == 0: return 0
-        else: return self._tp_num / div
+        if div == 0:
+            return 0
+        else:
+            return self._tp_num / div
  
      def recall(self):
          """
@@ -201,8 +205,10 @@ class ChunkScore(object):
          """
          self._updateMeasures()
          div = self._tp_num + self._fn_num
-        if div == 0: return 0
-        else: return self._tp_num / div
+        if div == 0:
+            return 0
+        else:
+            return self._tp_num / div
  
      def f_measure(self, alpha=0.5):
          """
@@ -219,9 +225,9 @@ class ChunkScore(object):
          self._updateMeasures()
          p = self.precision()
          r = self.recall()
-        if p == 0 or r == 0:    # what if alpha is 0 or 1?
+        if p == 0 or r == 0:  # what if alpha is 0 or 1?
              return 0
-        return 1/(alpha/p + (1-alpha)/r)
+        return 1 / (alpha / p + (1 - alpha) / r)
  
      def missed(self):
          """
@@ -276,7 +282,7 @@ class ChunkScore(object):
  
          :rtype: str
          """
-        return '<ChunkScoring of '+repr(len(self))+' chunks>'
+        return "<ChunkScoring of " + repr(len(self)) + " chunks>"
  
      def __str__(self):
          """
@@ -287,11 +293,14 @@ class ChunkScore(object):
  
          :rtype: str
          """
-        return ("ChunkParse score:\n" +
-                ("    IOB Accuracy: {:5.1f}%%\n".format(self.accuracy()*100)) +
-                ("    Precision:    {:5.1f}%%\n".format(self.precision()*100)) +
-                ("    Recall:       {:5.1f}%%\n".format(self.recall()*100))+
-                ("    F-Measure:    {:5.1f}%%".format(self.f_measure()*100)))
+        return (
+            "ChunkParse score:\n"
+            + ("    IOB Accuracy: {:5.1f}%%\n".format(self.accuracy() * 100))
+            + ("    Precision:    {:5.1f}%%\n".format(self.precision() * 100))
+            + ("    Recall:       {:5.1f}%%\n".format(self.recall() * 100))
+            + ("    F-Measure:    {:5.1f}%%".format(self.f_measure() * 100))
+        )
+
  
  # extract chunks, and assign unique id, the absolute position of
  # the first word of the chunk
@@ -308,8 +317,9 @@ def _chunksets(t, count, chunk_label):
      return set(chunks)
  
  
-def tagstr2tree(s, chunk_label="NP", root_label="S", sep='/',
-                source_tagset=None, target_tagset=None):
+def tagstr2tree(
+    s, chunk_label="NP", root_label="S", sep="/", source_tagset=None, target_tagset=None
+):
      """
      Divide a string of bracketted tagged text into
      chunks and unchunked tokens, and produce a Tree.
@@ -327,20 +337,20 @@ def tagstr2tree(s, chunk_label="NP", root_label="S", sep='/',
      :rtype: Tree
      """
  
-    WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+')
+    WORD_OR_BRACKET = re.compile(r"\[|\]|[^\[\]\s]+")
  
      stack = [Tree(root_label, [])]
      for match in WORD_OR_BRACKET.finditer(s):
          text = match.group()
-        if text[0] == '[':
+        if text[0] == "[":
              if len(stack) != 1:
-                raise ValueError('Unexpected [ at char {:d}'.format(match.start()))
+                raise ValueError("Unexpected [ at char {:d}".format(match.start()))
              chunk = Tree(chunk_label, [])
              stack[-1].append(chunk)
              stack.append(chunk)
-        elif text[0] == ']':
+        elif text[0] == "]":
              if len(stack) != 2:
-                raise ValueError('Unexpected ] at char {:d}'.format(match.start()))
+                raise ValueError("Unexpected ] at char {:d}".format(match.start()))
              stack.pop()
          else:
              if sep is None:
@@ -352,13 +362,16 @@ def tagstr2tree(s, chunk_label="NP", root_label="S", sep='/',
                  stack[-1].append((word, tag))
  
      if len(stack) != 1:
-        raise ValueError('Expected ] at char {:d}'.format(len(s)))
+        raise ValueError("Expected ] at char {:d}".format(len(s)))
      return stack[0]
  
+
  ### CONLL
  
-_LINE_RE = re.compile('(\S+)\s+(\S+)\s+([IOB])-?(\S+)?')
-def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), root_label="S"):
+_LINE_RE = re.compile("(\S+)\s+(\S+)\s+([IOB])-?(\S+)?")
+
+
+def conllstr2tree(s, chunk_types=("NP", "PP", "VP"), root_label="S"):
      """
      Return a chunk structure for a single sentence
      encoded in the given CONLL 2000 style string.
@@ -378,28 +391,29 @@ def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), root_label="S"):
  
      stack = [Tree(root_label, [])]
  
-    for lineno, line in enumerate(s.split('\n')):
-        if not line.strip(): continue
+    for lineno, line in enumerate(s.split("\n")):
+        if not line.strip():
+            continue
  
          # Decode the line.
          match = _LINE_RE.match(line)
          if match is None:
-            raise ValueError('Error on line {:d}'.format(lineno))
+            raise ValueError("Error on line {:d}".format(lineno))
          (word, tag, state, chunk_type) = match.groups()
  
          # If it's a chunk type we don't care about, treat it as O.
-        if (chunk_types is not None and
-            chunk_type not in chunk_types):
-            state = 'O'
+        if chunk_types is not None and chunk_type not in chunk_types:
+            state = "O"
  
          # For "Begin"/"Outside", finish any completed chunks -
          # also do so for "Inside" which don't match the previous token.
-        mismatch_I = state == 'I' and chunk_type != stack[-1].label()
-        if state in 'BO' or mismatch_I:
-            if len(stack) == 2: stack.pop()
+        mismatch_I = state == "I" and chunk_type != stack[-1].label()
+        if state in "BO" or mismatch_I:
+            if len(stack) == 2:
+                stack.pop()
  
          # For "Begin", start a new chunk.
-        if state == 'B' or mismatch_I:
+        if state == "B" or mismatch_I:
              chunk = Tree(chunk_type, [])
              stack[-1].append(chunk)
              stack.append(chunk)
@@ -409,6 +423,7 @@ def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), root_label="S"):
  
      return stack[0]
  
+
  def tree2conlltags(t):
      """
      Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
@@ -426,15 +441,19 @@ def tree2conlltags(t):
              prefix = "B-"
              for contents in child:
                  if isinstance(contents, Tree):
-                    raise ValueError("Tree is too deeply nested to be printed in CoNLL format")
-                tags.append((contents[0], contents[1], prefix+category))
+                    raise ValueError(
+                        "Tree is too deeply nested to be printed in CoNLL format"
+                    )
+                tags.append((contents[0], contents[1], prefix + category))
                  prefix = "I-"
          except AttributeError:
              tags.append((child[0], child[1], "O"))
      return tags
  
-def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
-                   root_label='S', strict=False):
+
+def conlltags2tree(
+    sentence, chunk_types=("NP", "PP", "VP"), root_label="S", strict=False
+):
      """
      Convert the CoNLL IOB format to a tree.
      """
@@ -445,25 +464,29 @@ def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
                  raise ValueError("Bad conll tag sequence")
              else:
                  # Treat as O
-                tree.append((word,postag))
-        elif chunktag.startswith('B-'):
-            tree.append(Tree(chunktag[2:], [(word,postag)]))
-        elif chunktag.startswith('I-'):
-            if (len(tree)==0 or not isinstance(tree[-1], Tree) or
-                tree[-1].label() != chunktag[2:]):
+                tree.append((word, postag))
+        elif chunktag.startswith("B-"):
+            tree.append(Tree(chunktag[2:], [(word, postag)]))
+        elif chunktag.startswith("I-"):
+            if (
+                len(tree) == 0
+                or not isinstance(tree[-1], Tree)
+                or tree[-1].label() != chunktag[2:]
+            ):
                  if strict:
                      raise ValueError("Bad conll tag sequence")
                  else:
                      # Treat as B-*
-                    tree.append(Tree(chunktag[2:], [(word,postag)]))
+                    tree.append(Tree(chunktag[2:], [(word, postag)]))
              else:
-                tree[-1].append((word,postag))
-        elif chunktag == 'O':
-            tree.append((word,postag))
+                tree[-1].append((word, postag))
+        elif chunktag == "O":
+            tree.append((word, postag))
          else:
              raise ValueError("Bad conll tag {0!r}".format(chunktag))
      return tree
  
+
  def tree2conllstr(t):
      """
      Return a multiline string where each line contains a word, tag and IOB tag.
@@ -474,52 +497,73 @@ def tree2conllstr(t):
      :rtype: str
      """
      lines = [" ".join(token) for token in tree2conlltags(t)]
-    return '\n'.join(lines)
+    return "\n".join(lines)
+
  
  ### IEER
  
-_IEER_DOC_RE = re.compile(r'<DOC>\s*'
-                          r'(<DOCNO>\s*(?P<docno>.+?)\s*</DOCNO>\s*)?'
-                          r'(<DOCTYPE>\s*(?P<doctype>.+?)\s*</DOCTYPE>\s*)?'
-                          r'(<DATE_TIME>\s*(?P<date_time>.+?)\s*</DATE_TIME>\s*)?'
-                          r'<BODY>\s*'
-                          r'(<HEADLINE>\s*(?P<headline>.+?)\s*</HEADLINE>\s*)?'
-                          r'<TEXT>(?P<text>.*?)</TEXT>\s*'
-                          r'</BODY>\s*</DOC>\s*', re.DOTALL)
+_IEER_DOC_RE = re.compile(
+    r"<DOC>\s*"
+    r"(<DOCNO>\s*(?P<docno>.+?)\s*</DOCNO>\s*)?"
+    r"(<DOCTYPE>\s*(?P<doctype>.+?)\s*</DOCTYPE>\s*)?"
+    r"(<DATE_TIME>\s*(?P<date_time>.+?)\s*</DATE_TIME>\s*)?"
+    r"<BODY>\s*"
+    r"(<HEADLINE>\s*(?P<headline>.+?)\s*</HEADLINE>\s*)?"
+    r"<TEXT>(?P<text>.*?)</TEXT>\s*"
+    r"</BODY>\s*</DOC>\s*",
+    re.DOTALL,
+)
  
  _IEER_TYPE_RE = re.compile('<b_\w+\s+[^>]*?type="(?P<type>\w+)"')
  
+
  def _ieer_read_text(s, root_label):
      stack = [Tree(root_label, [])]
      # s will be None if there is no headline in the text
      # return the empty list in place of a Tree
      if s is None:
          return []
-    for piece_m in re.finditer('<[^>]+>|[^\s<]+', s):
+    for piece_m in re.finditer("<[^>]+>|[^\s<]+", s):
          piece = piece_m.group()
          try:
-            if piece.startswith('<b_'):
+            if piece.startswith("<b_"):
                  m = _IEER_TYPE_RE.match(piece)
-                if m is None: print('XXXX', piece)
-                chunk = Tree(m.group('type'), [])
+                if m is None:
+                    print("XXXX", piece)
+                chunk = Tree(m.group("type"), [])
                  stack[-1].append(chunk)
                  stack.append(chunk)
-            elif piece.startswith('<e_'):
+            elif piece.startswith("<e_"):
                  stack.pop()
-#           elif piece.startswith('<'):
-#               print "ERROR:", piece
-#               raise ValueError # Unexpected HTML
+            #           elif piece.startswith('<'):
+            #               print "ERROR:", piece
+            #               raise ValueError # Unexpected HTML
              else:
                  stack[-1].append(piece)
          except (IndexError, ValueError):
-            raise ValueError('Bad IEER string (error at character {:d})'.format \
-                             (piece_m.start()))
+            raise ValueError(
+                "Bad IEER string (error at character {:d})".format(piece_m.start())
+            )
      if len(stack) != 1:
-        raise ValueError('Bad IEER string')
+        raise ValueError("Bad IEER string")
      return stack[0]
  
-def ieerstr2tree(s, chunk_types = ['LOCATION', 'ORGANIZATION', 'PERSON', 'DURATION',
-               'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'], root_label="S"):
+
+def ieerstr2tree(
+    s,
+    chunk_types=[
+        "LOCATION",
+        "ORGANIZATION",
+        "PERSON",
+        "DURATION",
+        "DATE",
+        "CARDINAL",
+        "PERCENT",
+        "MONEY",
+        "MEASURE",
+    ],
+    root_label="S",
+):
      """
      Return a chunk structure containing the chunked tagged text that is
      encoded in the given IEER style string.
@@ -536,14 +580,14 @@ def ieerstr2tree(s, chunk_types = ['LOCATION', 'ORGANIZATION', 'PERSON', 'DURATI
      m = _IEER_DOC_RE.match(s)
      if m:
          return {
-            'text': _ieer_read_text(m.group('text'), root_label),
-            'docno': m.group('docno'),
-            'doctype': m.group('doctype'),
-            'date_time': m.group('date_time'),
+            "text": _ieer_read_text(m.group("text"), root_label),
+            "docno": m.group("docno"),
+            "doctype": m.group("doctype"),
+            "date_time": m.group("date_time"),
              #'headline': m.group('headline')
              # we want to capture NEs in the headline too!
-            'headline': _ieer_read_text(m.group('headline'), root_label),
-            }
+            "headline": _ieer_read_text(m.group("headline"), root_label),
+        }
      else:
          return _ieer_read_text(s, root_label)
  
@@ -552,7 +596,8 @@ def demo():
  
      s = "[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./."
      import nltk
-    t = nltk.chunk.tagstr2tree(s, chunk_label='NP')
+
+    t = nltk.chunk.tagstr2tree(s, chunk_label="NP")
      t.pprint()
      print()
  
@@ -586,7 +631,7 @@ better JJR I-ADJP
  . . O
  """
  
-    conll_tree = conllstr2tree(s, chunk_types=('NP', 'PP'))
+    conll_tree = conllstr2tree(s, chunk_types=("NP", "PP"))
      conll_tree.pprint()
  
      # Demonstrate CoNLL output
@@ -595,6 +640,5 @@ better JJR I-ADJP
      print()
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
-
diff --git a/nlp_resource_data/nltk/chunk/util.pyc b/nlp_resource_data/nltk/chunk/util.pyc

deleted file mode 100755 (executable)

index 3e7a886..0000000

Binary files a/nlp_resource_data/nltk/chunk/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/__init__.py b/nlp_resource_data/nltk/classify/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 2acfbfa..36b5059
--- a/nlp_resource_data/nltk/classify/__init__.py
+++ b/nlp_resource_data/nltk/classify/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Classifiers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -57,7 +57,7 @@ to the classifier:
      >>> from nltk.corpus import gutenberg
      >>> for fileid in gutenberg.fileids(): # doctest: +SKIP
      ...     doc = gutenberg.words(fileid) # doctest: +SKIP
-    ...     print fileid, classifier.classify(document_features(doc)) # doctest: +SKIP
+    ...     print(fileid, classifier.classify(document_features(doc))) # doctest: +SKIP
  
  The parameters that a feature detector expects will vary, depending on
  the task and the needs of the feature detector.  For example, a
@@ -91,8 +91,11 @@ from nltk.classify.decisiontree import DecisionTreeClassifier
  from nltk.classify.rte_classify import rte_classifier, rte_features, RTEFeatureExtractor
  from nltk.classify.util import accuracy, apply_features, log_likelihood
  from nltk.classify.scikitlearn import SklearnClassifier
-from nltk.classify.maxent import (MaxentClassifier, BinaryMaxentFeatureEncoding,
-                                  TypedMaxentFeatureEncoding,
-                                  ConditionalExponentialClassifier)
+from nltk.classify.maxent import (
+    MaxentClassifier,
+    BinaryMaxentFeatureEncoding,
+    TypedMaxentFeatureEncoding,
+    ConditionalExponentialClassifier,
+)
  from nltk.classify.senna import Senna
  from nltk.classify.textcat import TextCat
diff --git a/nlp_resource_data/nltk/classify/__init__.pyc b/nlp_resource_data/nltk/classify/__init__.pyc

deleted file mode 100755 (executable)

index 4a9eded..0000000

Binary files a/nlp_resource_data/nltk/classify/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7df3038

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8584b79

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/decisiontree.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/decisiontree.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5f56ab9

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/decisiontree.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/maxent.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/maxent.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f15741b

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/maxent.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/megam.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/megam.cpython-37.pyc

new file mode 100644 (file)

index 0000000..bee0930

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/megam.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/naivebayes.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/naivebayes.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c78ac74

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/naivebayes.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc

new file mode 100644 (file)

index 0000000..82d8249

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/rte_classify.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/rte_classify.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3045dfd

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/rte_classify.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc

new file mode 100644 (file)

index 0000000..048bb8a

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/senna.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/senna.cpython-37.pyc

new file mode 100644 (file)

index 0000000..286c8c5

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/senna.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/svm.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/svm.cpython-37.pyc

new file mode 100644 (file)

index 0000000..38da623

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/svm.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/tadm.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/tadm.cpython-37.pyc

new file mode 100644 (file)

index 0000000..649d9d1

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/tadm.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/textcat.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/textcat.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c0b9144

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/textcat.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d46f923

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/__pycache__/weka.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/weka.cpython-37.pyc

new file mode 100644 (file)

index 0000000..cf63549

Binary files /dev/null and b/nlp_resource_data/nltk/classify/__pycache__/weka.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/classify/api.py b/nlp_resource_data/nltk/classify/api.py

old mode 100755 (executable)

new mode 100644 (file)

index fc32b0d..ba6d88e
--- a/nlp_resource_data/nltk/classify/api.py
+++ b/nlp_resource_data/nltk/classify/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Classifier Interface
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
@@ -21,9 +21,10 @@ that each text belongs to zero or more categories.
  from nltk.internals import overridden
  
  ##//////////////////////////////////////////////////////
-#{ Classification Interfaces
+# { Classification Interfaces
  ##//////////////////////////////////////////////////////
  
+
  class ClassifierI(object):
      """
      A processing interface for labeling tokens with a single category
@@ -38,6 +39,7 @@ class ClassifierI(object):
      Subclasses may define:
        - either ``prob_classify()`` or ``prob_classify_many()`` (or both)
      """
+
      def labels(self):
          """
          :return: the list of category labels used by this classifier.
@@ -101,6 +103,7 @@ class MultiClassifierI(object):
      Subclasses may define:
        - either ``prob_classify()`` or ``prob_classify_many()`` (or both)
      """
+
      def labels(self):
          """
          :return: the list of category labels used by this classifier.
@@ -190,4 +193,3 @@ class MultiClassifierI(object):
  #         of ``featuresets``.
  #         """
  #         raise NotImplementedError()
-
diff --git a/nlp_resource_data/nltk/classify/api.pyc b/nlp_resource_data/nltk/classify/api.pyc

deleted file mode 100755 (executable)

index 735c40b..0000000

Binary files a/nlp_resource_data/nltk/classify/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/decisiontree.py b/nlp_resource_data/nltk/classify/decisiontree.py

old mode 100755 (executable)

new mode 100644 (file)

index 2bf5742..10c784c
--- a/nlp_resource_data/nltk/classify/decisiontree.py
+++ b/nlp_resource_data/nltk/classify/decisiontree.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Decision Tree Classifiers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -10,15 +10,13 @@ A classifier model that decides which label to assign to a token on
  the basis of a tree structure, where branches correspond to conditions
  on feature values, and leaves correspond to label assignments.
  """
-from __future__ import print_function, unicode_literals, division
  
  from collections import defaultdict
  
  from nltk.probability import FreqDist, MLEProbDist, entropy
  from nltk.classify.api import ClassifierI
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class DecisionTreeClassifier(ClassifierI):
      def __init__(self, label, feature_name=None, decisions=None, default=None):
          """
@@ -69,9 +67,9 @@ class DecisionTreeClassifier(ClassifierI):
          for featureset, label in labeled_featuresets:
              if self.classify(featureset) != label:
                  errors += 1
-        return errors/len(labeled_featuresets)
+        return errors / len(labeled_featuresets)
  
-    def pretty_format(self, width=70, prefix='', depth=4):
+    def pretty_format(self, width=70, prefix="", depth=4):
          """
          Return a string containing a pretty-printed version of this
          decision tree.  Each line in this string corresponds to a
@@ -80,23 +78,27 @@ class DecisionTreeClassifier(ClassifierI):
          """
          # [xx] display default!!
          if self._fname is None:
-            n = width-len(prefix)-15
-            return '{0}{1} {2}\n'.format(prefix, '.'*n, self._label)
+            n = width - len(prefix) - 15
+            return '{0}{1} {2}\n'.format(prefix, '.' * n, self._label)
          s = ''
-        for i, (fval, result) in enumerate(sorted(self._decisions.items())):
+        for i, (fval, result) in enumerate(sorted(self._decisions.items(), 
+                                                  key=lambda item: 
+                                                  (item[0] in [None, False, True], str(item[0]).lower())
+                                                 )
+                                          ):
              hdr = '{0}{1}={2}? '.format(prefix, self._fname, fval)
-            n = width-15-len(hdr)
-            s += '{0}{1} {2}\n'.format(hdr, '.'*(n), result._label)
-            if result._fname is not None and depth>1:
-                s += result.pretty_format(width, prefix+'  ', depth-1)
+            n = width - 15 - len(hdr)
+            s += "{0}{1} {2}\n".format(hdr, "." * (n), result._label)
+            if result._fname is not None and depth > 1:
+                s += result.pretty_format(width, prefix + "  ", depth - 1)
          if self._default is not None:
-            n = width-len(prefix)-21
-            s += '{0}else: {1} {2}\n'.format(prefix, '.'*n, self._default._label)
-            if self._default._fname is not None and depth>1:
-                s += self._default.pretty_format(width, prefix+'  ', depth-1)
+            n = width - len(prefix) - 21
+            s += "{0}else: {1} {2}\n".format(prefix, "." * n, self._default._label)
+            if self._default._fname is not None and depth > 1:
+                s += self._default.pretty_format(width, prefix + "  ", depth - 1)
          return s
  
-    def pseudocode(self, prefix='', depth=4):
+    def pseudocode(self, prefix="", depth=4):
          """
          Return a string representation of this decision tree that
          expresses the decisions it makes as a nested set of pseudocode
@@ -105,31 +107,41 @@ class DecisionTreeClassifier(ClassifierI):
          if self._fname is None:
              return "{0}return {1!r}\n".format(prefix, self._label)
          s = ''
-        for (fval, result) in sorted(self._decisions.items()):
+        for (fval, result) in sorted(self._decisions.items(),
+                                    key=lambda item: 
+                                     (item[0] in [None, False, True], str(item[0]).lower())
+                                    ):
              s += '{0}if {1} == {2!r}: '.format(prefix, self._fname, fval)
-            if result._fname is not None and depth>1:
-                s += '\n'+result.pseudocode(prefix+'  ', depth-1)
+            if result._fname is not None and depth > 1:
+                s += "\n" + result.pseudocode(prefix + "  ", depth - 1)
              else:
-                s += 'return {0!r}\n'.format(result._label)
+                s += "return {0!r}\n".format(result._label)
          if self._default is not None:
              if len(self._decisions) == 1:
-                s += '{0}if {1} != {2!r}: '.format(prefix, self._fname,
-                                         list(self._decisions.keys())[0])
+                s += "{0}if {1} != {2!r}: ".format(
+                    prefix, self._fname, list(self._decisions.keys())[0]
+                )
              else:
-                s += '{0}else: '.format(prefix)
-            if self._default._fname is not None and depth>1:
-                s += '\n'+self._default.pseudocode(prefix+'  ', depth-1)
+                s += "{0}else: ".format(prefix)
+            if self._default._fname is not None and depth > 1:
+                s += "\n" + self._default.pseudocode(prefix + "  ", depth - 1)
              else:
-                s += 'return {0!r}\n'.format(self._default._label)
+                s += "return {0!r}\n".format(self._default._label)
          return s
  
      def __str__(self):
          return self.pretty_format()
  
      @staticmethod
-    def train(labeled_featuresets, entropy_cutoff=0.05, depth_cutoff=100,
-              support_cutoff=10, binary=False, feature_values=None,
-              verbose=False):
+    def train(
+        labeled_featuresets,
+        entropy_cutoff=0.05,
+        depth_cutoff=100,
+        support_cutoff=10,
+        binary=False,
+        feature_values=None,
+        verbose=False,
+    ):
          """
          :param binary: If true, then treat all feature/value pairs as
              individual binary features, rather than using a single n-way
@@ -151,67 +163,98 @@ class DecisionTreeClassifier(ClassifierI):
          # Start with a stump.
          if not binary:
              tree = DecisionTreeClassifier.best_stump(
-                feature_names, labeled_featuresets, verbose)
+                feature_names, labeled_featuresets, verbose
+            )
          else:
              tree = DecisionTreeClassifier.best_binary_stump(
-                feature_names, labeled_featuresets, feature_values, verbose)
+                feature_names, labeled_featuresets, feature_values, verbose
+            )
  
          # Refine the stump.
-        tree.refine(labeled_featuresets, entropy_cutoff, depth_cutoff-1,
-                    support_cutoff, binary, feature_values, verbose)
+        tree.refine(
+            labeled_featuresets,
+            entropy_cutoff,
+            depth_cutoff - 1,
+            support_cutoff,
+            binary,
+            feature_values,
+            verbose,
+        )
  
          # Return it
          return tree
  
      @staticmethod
      def leaf(labeled_featuresets):
-        label = FreqDist(label for (featureset, label)
-                         in labeled_featuresets).max()
+        label = FreqDist(label for (featureset, label) in labeled_featuresets).max()
          return DecisionTreeClassifier(label)
  
      @staticmethod
      def stump(feature_name, labeled_featuresets):
-        label = FreqDist(label for (featureset, label)
-                         in labeled_featuresets).max()
+        label = FreqDist(label for (featureset, label) in labeled_featuresets).max()
  
          # Find the best label for each value.
-        freqs = defaultdict(FreqDist) # freq(label|value)
+        freqs = defaultdict(FreqDist)  # freq(label|value)
          for featureset, label in labeled_featuresets:
              feature_value = featureset.get(feature_name)
              freqs[feature_value][label] += 1
  
-        decisions = dict((val, DecisionTreeClassifier(freqs[val].max()))
-                         for val in freqs)
+        decisions = dict(
+            (val, DecisionTreeClassifier(freqs[val].max())) for val in freqs
+        )
          return DecisionTreeClassifier(label, feature_name, decisions)
  
-    def refine(self, labeled_featuresets, entropy_cutoff, depth_cutoff,
-               support_cutoff, binary=False, feature_values=None,
-               verbose=False):
-        if len(labeled_featuresets) <= support_cutoff: return
-        if self._fname is None: return
-        if depth_cutoff <= 0: return
+    def refine(
+        self,
+        labeled_featuresets,
+        entropy_cutoff,
+        depth_cutoff,
+        support_cutoff,
+        binary=False,
+        feature_values=None,
+        verbose=False,
+    ):
+        if len(labeled_featuresets) <= support_cutoff:
+            return
+        if self._fname is None:
+            return
+        if depth_cutoff <= 0:
+            return
          for fval in self._decisions:
-            fval_featuresets = [(featureset, label) for (featureset, label)
-                                in labeled_featuresets
-                                if featureset.get(self._fname) == fval]
+            fval_featuresets = [
+                (featureset, label)
+                for (featureset, label) in labeled_featuresets
+                if featureset.get(self._fname) == fval
+            ]
  
-            label_freqs = FreqDist(label for (featureset, label)
-                                   in fval_featuresets)
+            label_freqs = FreqDist(label for (featureset, label) in fval_featuresets)
              if entropy(MLEProbDist(label_freqs)) > entropy_cutoff:
                  self._decisions[fval] = DecisionTreeClassifier.train(
-                    fval_featuresets, entropy_cutoff, depth_cutoff,
-                    support_cutoff, binary, feature_values, verbose)
+                    fval_featuresets,
+                    entropy_cutoff,
+                    depth_cutoff,
+                    support_cutoff,
+                    binary,
+                    feature_values,
+                    verbose,
+                )
          if self._default is not None:
-            default_featuresets = [(featureset, label) for (featureset, label)
-                                   in labeled_featuresets
-                                   if featureset.get(self._fname) not in
-                                   self._decisions]
-            label_freqs = FreqDist(label for (featureset, label)
-                                   in default_featuresets)
+            default_featuresets = [
+                (featureset, label)
+                for (featureset, label) in labeled_featuresets
+                if featureset.get(self._fname) not in self._decisions
+            ]
+            label_freqs = FreqDist(label for (featureset, label) in default_featuresets)
              if entropy(MLEProbDist(label_freqs)) > entropy_cutoff:
                  self._default = DecisionTreeClassifier.train(
-                    default_featuresets, entropy_cutoff, depth_cutoff,
-                    support_cutoff, binary, feature_values, verbose)
+                    default_featuresets,
+                    entropy_cutoff,
+                    depth_cutoff,
+                    support_cutoff,
+                    binary,
+                    feature_values,
+                    verbose,
+                )
  
      @staticmethod
      def best_stump(feature_names, labeled_featuresets, verbose=False):
@@ -224,14 +267,18 @@ class DecisionTreeClassifier(ClassifierI):
                  best_error = stump_error
                  best_stump = stump
          if verbose:
-            print(('best stump for {:6d} toks uses {:20} err={:6.4f}'.format \
-                   (len(labeled_featuresets), best_stump._fname, best_error)))
+            print(
+                (
+                    "best stump for {:6d} toks uses {:20} err={:6.4f}".format(
+                        len(labeled_featuresets), best_stump._fname, best_error
+                    )
+                )
+            )
          return best_stump
  
      @staticmethod
      def binary_stump(feature_name, feature_value, labeled_featuresets):
-        label = FreqDist(label for (featureset, label)
-                         in labeled_featuresets).max()
+        label = FreqDist(label for (featureset, label) in labeled_featuresets).max()
  
          # Find the best label for each value.
          pos_fdist = FreqDist()
@@ -242,7 +289,6 @@ class DecisionTreeClassifier(ClassifierI):
              else:
                  neg_fdist[label] += 1
  
-
          decisions = {}
          default = label
          # But hopefully we have observations!
@@ -254,42 +300,55 @@ class DecisionTreeClassifier(ClassifierI):
          return DecisionTreeClassifier(label, feature_name, decisions, default)
  
      @staticmethod
-    def best_binary_stump(feature_names, labeled_featuresets, feature_values,
-                          verbose=False):
+    def best_binary_stump(
+        feature_names, labeled_featuresets, feature_values, verbose=False
+    ):
          best_stump = DecisionTreeClassifier.leaf(labeled_featuresets)
          best_error = best_stump.error(labeled_featuresets)
          for fname in feature_names:
              for fval in feature_values[fname]:
                  stump = DecisionTreeClassifier.binary_stump(
-                    fname, fval, labeled_featuresets)
+                    fname, fval, labeled_featuresets
+                )
                  stump_error = stump.error(labeled_featuresets)
                  if stump_error < best_error:
                      best_error = stump_error
                      best_stump = stump
          if verbose:
              if best_stump._decisions:
-                descr = '{0}={1}'.format(best_stump._fname,
-                                         list(best_stump._decisions.keys())[0])
+                descr = "{0}={1}".format(
+                    best_stump._fname, list(best_stump._decisions.keys())[0]
+                )
              else:
-                descr = '(default)'
-            print(('best stump for {:6d} toks uses {:20} err={:6.4f}'.format \
-                   (len(labeled_featuresets), descr, best_error)))
+                descr = "(default)"
+            print(
+                (
+                    "best stump for {:6d} toks uses {:20} err={:6.4f}".format(
+                        len(labeled_featuresets), descr, best_error
+                    )
+                )
+            )
          return best_stump
  
+
  ##//////////////////////////////////////////////////////
  ##  Demo
  ##//////////////////////////////////////////////////////
  
+
  def f(x):
      return DecisionTreeClassifier.train(x, binary=True, verbose=True)
  
+
  def demo():
      from nltk.classify.util import names_demo, binary_names_demo_features
-    classifier = names_demo(f, #DecisionTreeClassifier.train,
-                            binary_names_demo_features)
-    print(classifier.pp(depth=7))
+
+    classifier = names_demo(
+        f, binary_names_demo_features  # DecisionTreeClassifier.train,
+    )
+    print(classifier.pretty_format(depth=7))
      print(classifier.pseudocode(depth=7))
  
-if __name__ == '__main__':
-    demo()
  
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/classify/decisiontree.pyc b/nlp_resource_data/nltk/classify/decisiontree.pyc

deleted file mode 100755 (executable)

index c66ab20..0000000

Binary files a/nlp_resource_data/nltk/classify/decisiontree.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/maxent.py b/nlp_resource_data/nltk/classify/maxent.py

old mode 100755 (executable)

new mode 100644 (file)

index f067394..7a03f81
--- a/nlp_resource_data/nltk/classify/maxent.py
+++ b/nlp_resource_data/nltk/classify/maxent.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Maximum Entropy Classifiers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Dmitry Chichkov <dchichkov@gmail.com> (TypedMaxentFeatureEncoding)
  # URL: <http://nltk.org/>
@@ -51,8 +51,6 @@ For all values of ``feat_val`` and ``some_label``.  This mapping is
  performed by classes that implement the ``MaxentFeatureEncodingI``
  interface.
  """
-from __future__ import print_function, unicode_literals
-
  try:
      import numpy
  except ImportError:
@@ -62,26 +60,22 @@ import tempfile
  import os
  from collections import defaultdict
  
-from six import integer_types
-
-from nltk import compat
  from nltk.data import gzip_open_unicode
  from nltk.util import OrderedDict
  from nltk.probability import DictionaryProbDist
  
  from nltk.classify.api import ClassifierI
  from nltk.classify.util import CutoffChecker, accuracy, log_likelihood
-from nltk.classify.megam import (call_megam,
-                                 write_megam_file, parse_megam_weights)
+from nltk.classify.megam import call_megam, write_megam_file, parse_megam_weights
  from nltk.classify.tadm import call_tadm, write_tadm_file, parse_tadm_weights
  
-__docformat__ = 'epytext en'
+__docformat__ = "epytext en"
  
  ######################################################################
-#{ Classifier Model
+# { Classifier Model
  ######################################################################
  
-@compat.python_2_unicode_compatible
+
  class MaxentClassifier(ClassifierI):
      """
      A maximum entropy classifier (also known as a "conditional
@@ -100,6 +94,7 @@ class MaxentClassifier(ClassifierI):
  
        dotprod(a,b) = sum(x*y for (x,y) in zip(a,b))
      """
+
      def __init__(self, encoding, weights, logarithmic=True):
          """
          Construct a new maxent classifier model.  Typically, new
@@ -120,7 +115,7 @@ class MaxentClassifier(ClassifierI):
          self._encoding = encoding
          self._weights = weights
          self._logarithmic = logarithmic
-        #self._logarithmic = False
+        # self._logarithmic = False
          assert encoding.length() == len(weights)
  
      def labels(self):
@@ -163,8 +158,7 @@ class MaxentClassifier(ClassifierI):
                  prob_dict[label] = prod
  
          # Normalize the dictionary to give a probability distribution
-        return DictionaryProbDist(prob_dict, log=self._logarithmic,
-                                  normalize=True)
+        return DictionaryProbDist(prob_dict, log=self._logarithmic, normalize=True)
  
      def explain(self, featureset, columns=4):
          """
@@ -173,62 +167,94 @@ class MaxentClassifier(ClassifierI):
          probabilities of each label for that featureset.
          """
          descr_width = 50
-        TEMPLATE = '  %-'+str(descr_width-2)+'s%s%8.3f'
+        TEMPLATE = "  %-" + str(descr_width - 2) + "s%s%8.3f"
  
          pdist = self.prob_classify(featureset)
          labels = sorted(pdist.samples(), key=pdist.prob, reverse=True)
          labels = labels[:columns]
-        print('  Feature'.ljust(descr_width)+''.join(
-            '%8s' % (("%s" % l)[:7]) for l in labels))
-        print('  '+'-'*(descr_width-2+8*len(labels)))
+        print(
+            "  Feature".ljust(descr_width)
+            + "".join("%8s" % (("%s" % l)[:7]) for l in labels)
+        )
+        print("  " + "-" * (descr_width - 2 + 8 * len(labels)))
          sums = defaultdict(int)
          for i, label in enumerate(labels):
              feature_vector = self._encoding.encode(featureset, label)
-            feature_vector.sort(key=lambda fid__: abs(self._weights[fid__[0]]),
-                                reverse=True)
+            feature_vector.sort(
+                key=lambda fid__: abs(self._weights[fid__[0]]), reverse=True
+            )
              for (f_id, f_val) in feature_vector:
                  if self._logarithmic:
                      score = self._weights[f_id] * f_val
-                else: score = self._weights[f_id] ** f_val
+                else:
+                    score = self._weights[f_id] ** f_val
                  descr = self._encoding.describe(f_id)
-                descr = descr.split(' and label is ')[0] # hack
-                descr += ' (%s)' % f_val                 # hack
+                descr = descr.split(" and label is ")[0]  # hack
+                descr += " (%s)" % f_val  # hack
                  if len(descr) > 47:
-                    descr = descr[:44]+'...'
-                print(TEMPLATE % (descr, i*8*' ', score))
+                    descr = descr[:44] + "..."
+                print(TEMPLATE % (descr, i * 8 * " ", score))
                  sums[label] += score
-        print('  '+'-'*(descr_width-1+8*len(labels)))
-        print('  TOTAL:'.ljust(descr_width)+''.join(
-            '%8.3f' % sums[l] for l in labels))
-        print('  PROBS:'.ljust(descr_width)+''.join(
-            '%8.3f' % pdist.prob(l) for l in labels))
-
-    def show_most_informative_features(self, n=10, show='all'):
+        print("  " + "-" * (descr_width - 1 + 8 * len(labels)))
+        print(
+            "  TOTAL:".ljust(descr_width) + "".join("%8.3f" % sums[l] for l in labels)
+        )
+        print(
+            "  PROBS:".ljust(descr_width)
+            + "".join("%8.3f" % pdist.prob(l) for l in labels)
+        )
+
+    def most_informative_features(self, n=10):
+        """
+        Generates the ranked list of informative features from most to least.
+        """
+        if hasattr(self, "_most_informative_features"):
+            return self._most_informative_features[:n]
+        else:
+            self._most_informative_features = sorted(
+                list(range(len(self._weights))),
+                key=lambda fid: abs(self._weights[fid]),
+                reverse=True,
+            )
+            return self._most_informative_features[:n]
+
+    def show_most_informative_features(self, n=10, show="all"):
          """
          :param show: all, neg, or pos (for negative-only or positive-only)
+        :type show: str
+        :param n: The no. of top features
+        :type n: int
          """
-        fids = sorted(list(range(len(self._weights))),
-                      key=lambda fid: abs(self._weights[fid]),
-                      reverse=True)
-        if show == 'pos':
+        # Use None the full list of ranked features.
+        fids = self.most_informative_features(None)
+        if show == "pos":
              fids = [fid for fid in fids if self._weights[fid] > 0]
-        elif show == 'neg':
+        elif show == "neg":
              fids = [fid for fid in fids if self._weights[fid] < 0]
          for fid in fids[:n]:
-            print('%8.3f %s' % (self._weights[fid],
-                                self._encoding.describe(fid)))
+            print("%8.3f %s" % (self._weights[fid], self._encoding.describe(fid)))
  
      def __repr__(self):
-        return ('<ConditionalExponentialClassifier: %d labels, %d features>' %
-                (len(self._encoding.labels()), self._encoding.length()))
+        return "<ConditionalExponentialClassifier: %d labels, %d features>" % (
+            len(self._encoding.labels()),
+            self._encoding.length(),
+        )
  
      #: A list of the algorithm names that are accepted for the
      #: ``train()`` method's ``algorithm`` parameter.
-    ALGORITHMS = ['GIS', 'IIS', 'MEGAM', 'TADM']
+    ALGORITHMS = ["GIS", "IIS", "MEGAM", "TADM"]
  
      @classmethod
-    def train(cls, train_toks, algorithm=None, trace=3, encoding=None,
-              labels=None, gaussian_prior_sigma=0, **cutoffs):
+    def train(
+        cls,
+        train_toks,
+        algorithm=None,
+        trace=3,
+        encoding=None,
+        labels=None,
+        gaussian_prior_sigma=0,
+        **cutoffs
+    ):
          """
          Train a new maxent classifier based on the given corpus of
          training samples.  This classifier will have its weights
@@ -281,32 +307,42 @@ class MaxentClassifier(ClassifierI):
                log likelihood by less than ``v``.
          """
          if algorithm is None:
-            algorithm = 'iis'
+            algorithm = "iis"
          for key in cutoffs:
-            if key not in ('max_iter', 'min_ll', 'min_lldelta',
-                           'max_acc', 'min_accdelta', 'count_cutoff',
-                           'norm', 'explicit', 'bernoulli'):
-                raise TypeError('Unexpected keyword arg %r' % key)
+            if key not in (
+                "max_iter",
+                "min_ll",
+                "min_lldelta",
+                "max_acc",
+                "min_accdelta",
+                "count_cutoff",
+                "norm",
+                "explicit",
+                "bernoulli",
+            ):
+                raise TypeError("Unexpected keyword arg %r" % key)
          algorithm = algorithm.lower()
-        if algorithm == 'iis':
+        if algorithm == "iis":
              return train_maxent_classifier_with_iis(
-                train_toks, trace, encoding, labels, **cutoffs)
-        elif algorithm == 'gis':
+                train_toks, trace, encoding, labels, **cutoffs
+            )
+        elif algorithm == "gis":
              return train_maxent_classifier_with_gis(
-                train_toks, trace, encoding, labels, **cutoffs)
-        elif algorithm == 'megam':
+                train_toks, trace, encoding, labels, **cutoffs
+            )
+        elif algorithm == "megam":
              return train_maxent_classifier_with_megam(
-                train_toks, trace, encoding, labels,
-                gaussian_prior_sigma, **cutoffs)
-        elif algorithm == 'tadm':
+                train_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffs
+            )
+        elif algorithm == "tadm":
              kwargs = cutoffs
-            kwargs['trace'] = trace
-            kwargs['encoding'] = encoding
-            kwargs['labels'] = labels
-            kwargs['gaussian_prior_sigma'] = gaussian_prior_sigma
+            kwargs["trace"] = trace
+            kwargs["encoding"] = encoding
+            kwargs["labels"] = labels
+            kwargs["gaussian_prior_sigma"] = gaussian_prior_sigma
              return TadmMaxentClassifier.train(train_toks, **kwargs)
          else:
-            raise ValueError('Unknown algorithm %s' % algorithm)
+            raise ValueError("Unknown algorithm %s" % algorithm)
  
  
  #: Alias for MaxentClassifier.
@@ -314,9 +350,10 @@ ConditionalExponentialClassifier = MaxentClassifier
  
  
  ######################################################################
-#{ Feature Encodings
+# { Feature Encodings
  ######################################################################
  
+
  class MaxentFeatureEncodingI(object):
      """
      A mapping that converts a set of input-feature values to a vector
@@ -339,6 +376,7 @@ class MaxentFeatureEncodingI(object):
      input-feature values and labels that are present in a given
      corpus.
      """
+
      def encode(self, featureset, label):
          """
          Given a (featureset, label) pair, return the corresponding
@@ -388,11 +426,13 @@ class MaxentFeatureEncodingI(object):
          """
          raise NotImplementedError()
  
+
  class FunctionBackedMaxentFeatureEncoding(MaxentFeatureEncodingI):
      """
      A feature encoding that calls a user-supplied function to map a
      given featureset/label pair to a sparse joint-feature vector.
      """
+
      def __init__(self, func, length, labels):
          """
          Construct a new feature encoding based on the given function.
@@ -431,7 +471,8 @@ class FunctionBackedMaxentFeatureEncoding(MaxentFeatureEncodingI):
          return self._labels
  
      def describe(self, fid):
-        return 'no description available'
+        return "no description available"
+
  
  class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
      """
@@ -473,8 +514,8 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
      These always-on features allow the maxent model to directly model
      the prior probabilities of each label.
      """
-    def __init__(self, labels, mapping, unseen_features=False,
-                 alwayson_features=False):
+
+    def __init__(self, labels, mapping, unseen_features=False, alwayson_features=False):
          """
          :param labels: A list of the \"known labels\" for this encoding.
  
@@ -492,8 +533,10 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
             features in the generated joint-feature vectors.
          """
          if set(mapping.values()) != set(range(len(mapping))):
-            raise ValueError('Mapping values must be exactly the '
-                             'set of integers from 0...len(mapping)')
+            raise ValueError(
+                "Mapping values must be exactly the "
+                "set of integers from 0...len(mapping)"
+            )
  
          self._labels = list(labels)
          """A list of attested labels."""
@@ -511,14 +554,16 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
          """dict mapping from fname -> fid"""
  
          if alwayson_features:
-            self._alwayson = dict((label, i+self._length)
-                                  for (i, label) in enumerate(labels))
+            self._alwayson = dict(
+                (label, i + self._length) for (i, label) in enumerate(labels)
+            )
              self._length += len(self._alwayson)
  
          if unseen_features:
              fnames = set(fname for (fname, fval, label) in mapping)
-            self._unseen = dict((fname, i+self._length)
-                                for (i, fname) in enumerate(fnames))
+            self._unseen = dict(
+                (fname, i + self._length) for (i, fname) in enumerate(fnames)
+            )
              self._length += len(fnames)
  
      def encode(self, featureset, label):
@@ -536,7 +581,7 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
                  # Have we seen this fname/fval combination with any label?
                  for label2 in self._labels:
                      if (fname, fval, label2) in self._mapping:
-                        break # we've seen this fname/fval combo
+                        break  # we've seen this fname/fval combo
                  # We haven't -- fire the unseen-value feature
                  else:
                      if fname in self._unseen:
@@ -550,28 +595,28 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
  
      def describe(self, f_id):
          # Inherit docs.
-        if not isinstance(f_id, integer_types):
-            raise TypeError('describe() expected an int')
+        if not isinstance(f_id, int):
+            raise TypeError("describe() expected an int")
          try:
              self._inv_mapping
          except AttributeError:
-            self._inv_mapping = [-1]*len(self._mapping)
+            self._inv_mapping = [-1] * len(self._mapping)
              for (info, i) in self._mapping.items():
                  self._inv_mapping[i] = info
  
          if f_id < len(self._mapping):
              (fname, fval, label) = self._inv_mapping[f_id]
-            return '%s==%r and label is %r' % (fname, fval, label)
+            return "%s==%r and label is %r" % (fname, fval, label)
          elif self._alwayson and f_id in self._alwayson.values():
              for (label, f_id2) in self._alwayson.items():
                  if f_id == f_id2:
-                    return 'label is %r' % label
+                    return "label is %r" % label
          elif self._unseen and f_id in self._unseen.values():
              for (fname, f_id2) in self._unseen.items():
                  if f_id == f_id2:
-                    return '%s is unseen' % fname
+                    return "%s is unseen" % fname
          else:
-            raise ValueError('Bad feature id')
+            raise ValueError("Bad feature id")
  
      def labels(self):
          # Inherit docs.
@@ -609,13 +654,13 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
          :param options: Extra parameters for the constructor, such as
              ``unseen_features`` and ``alwayson_features``.
          """
-        mapping = {}              # maps (fname, fval, label) -> fid
-        seen_labels = set()       # The set of labels we've encountered
+        mapping = {}  # maps (fname, fval, label) -> fid
+        seen_labels = set()  # The set of labels we've encountered
          count = defaultdict(int)  # maps (fname, fval) -> count
  
          for (tok, label) in train_toks:
              if labels and label not in labels:
-                raise ValueError('Unexpected label %s' % label)
+                raise ValueError("Unexpected label %s" % label)
              seen_labels.add(label)
  
              # Record each of the features.
@@ -633,6 +678,7 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI):
              labels = seen_labels
          return cls(labels, mapping, **options)
  
+
  class GISEncoding(BinaryMaxentFeatureEncoding):
      """
      A binary feature encoding which adds one new joint-feature to the
@@ -647,8 +693,10 @@ class GISEncoding(BinaryMaxentFeatureEncoding):
        - The feature vector must sum to a constant non-negative number
          for every token.
      """
-    def __init__(self, labels, mapping, unseen_features=False,
-                 alwayson_features=False, C=None):
+
+    def __init__(
+        self, labels, mapping, unseen_features=False, alwayson_features=False, C=None
+    ):
          """
          :param C: The correction constant.  The value of the correction
              feature is based on this value.  In particular, its value is
@@ -656,9 +704,10 @@ class GISEncoding(BinaryMaxentFeatureEncoding):
          :seealso: ``BinaryMaxentFeatureEncoding.__init__``
          """
          BinaryMaxentFeatureEncoding.__init__(
-            self, labels, mapping, unseen_features, alwayson_features)
+            self, labels, mapping, unseen_features, alwayson_features
+        )
          if C is None:
-            C = len(set(fname for (fname, fval, label) in mapping))+1
+            C = len(set(fname for (fname, fval, label) in mapping)) + 1
          self._C = C
  
      @property
@@ -675,8 +724,8 @@ class GISEncoding(BinaryMaxentFeatureEncoding):
          # Add a correction feature.
          total = sum(v for (f, v) in encoding)
          if total >= self._C:
-            raise ValueError('Correction feature is not high enough!')
-        encoding.append((base_length, self._C-total))
+            raise ValueError("Correction feature is not high enough!")
+        encoding.append((base_length, self._C - total))
  
          # Return the result
          return encoding
@@ -686,19 +735,18 @@ class GISEncoding(BinaryMaxentFeatureEncoding):
  
      def describe(self, f_id):
          if f_id == BinaryMaxentFeatureEncoding.length(self):
-            return 'Correction feature (%s)' % self._C
+            return "Correction feature (%s)" % self._C
          else:
              return BinaryMaxentFeatureEncoding.describe(self, f_id)
  
  
  class TadmEventMaxentFeatureEncoding(BinaryMaxentFeatureEncoding):
-    def __init__(self, labels, mapping, unseen_features=False,
-                       alwayson_features=False):
+    def __init__(self, labels, mapping, unseen_features=False, alwayson_features=False):
          self._mapping = OrderedDict(mapping)
          self._label_mapping = OrderedDict()
-        BinaryMaxentFeatureEncoding.__init__(self, labels, self._mapping,
-                                             unseen_features,
-                                             alwayson_features)
+        BinaryMaxentFeatureEncoding.__init__(
+            self, labels, self._mapping, unseen_features, alwayson_features
+        )
  
      def encode(self, featureset, label):
          encoding = []
@@ -710,8 +758,9 @@ class TadmEventMaxentFeatureEncoding(BinaryMaxentFeatureEncoding):
                      self._label_mapping[value] = len(self._label_mapping)
                  else:
                      self._label_mapping[value] = value
-            encoding.append((self._mapping[(feature, label)],
-                             self._label_mapping[value]))
+            encoding.append(
+                (self._mapping[(feature, label)], self._label_mapping[value])
+            )
          return encoding
  
      def labels(self):
@@ -803,8 +852,8 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
      These always-on features allow the maxent model to directly model
      the prior probabilities of each label.
      """
-    def __init__(self, labels, mapping, unseen_features=False,
-                 alwayson_features=False):
+
+    def __init__(self, labels, mapping, unseen_features=False, alwayson_features=False):
          """
          :param labels: A list of the \"known labels\" for this encoding.
  
@@ -822,8 +871,10 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
             features in the generated joint-feature vectors.
          """
          if set(mapping.values()) != set(range(len(mapping))):
-            raise ValueError('Mapping values must be exactly the '
-                             'set of integers from 0...len(mapping)')
+            raise ValueError(
+                "Mapping values must be exactly the "
+                "set of integers from 0...len(mapping)"
+            )
  
          self._labels = list(labels)
          """A list of attested labels."""
@@ -841,14 +892,16 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
          """dict mapping from fname -> fid"""
  
          if alwayson_features:
-            self._alwayson = dict((label, i+self._length)
-                                  for (i, label) in enumerate(labels))
+            self._alwayson = dict(
+                (label, i + self._length) for (i, label) in enumerate(labels)
+            )
              self._length += len(self._alwayson)
  
          if unseen_features:
              fnames = set(fname for (fname, fval, label) in mapping)
-            self._unseen = dict((fname, i+self._length)
-                                for (i, fname) in enumerate(fnames))
+            self._unseen = dict(
+                (fname, i + self._length) for (i, fname) in enumerate(fnames)
+            )
              self._length += len(fnames)
  
      def encode(self, featureset, label):
@@ -857,11 +910,10 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
  
          # Convert input-features to joint-features:
          for fname, fval in featureset.items():
-            if isinstance(fval, (integer_types, float)):
+            if isinstance(fval, (int, float)):
                  # Known feature name & value:
                  if (fname, type(fval), label) in self._mapping:
-                    encoding.append((self._mapping[fname, type(fval),
-                                                   label], fval))
+                    encoding.append((self._mapping[fname, type(fval), label], fval))
              else:
                  # Known feature name & value:
                  if (fname, fval, label) in self._mapping:
@@ -872,13 +924,12 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
                      # Have we seen this fname/fval combination with any label?
                      for label2 in self._labels:
                          if (fname, fval, label2) in self._mapping:
-                            break # we've seen this fname/fval combo
+                            break  # we've seen this fname/fval combo
                      # We haven't -- fire the unseen-value feature
                      else:
                          if fname in self._unseen:
                              encoding.append((self._unseen[fname], 1))
  
-
          # Add always-on features:
          if self._alwayson and label in self._alwayson:
              encoding.append((self._alwayson[label], 1))
@@ -887,28 +938,28 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
  
      def describe(self, f_id):
          # Inherit docs.
-        if not isinstance(f_id, integer_types):
-            raise TypeError('describe() expected an int')
+        if not isinstance(f_id, int):
+            raise TypeError("describe() expected an int")
          try:
              self._inv_mapping
          except AttributeError:
-            self._inv_mapping = [-1]*len(self._mapping)
+            self._inv_mapping = [-1] * len(self._mapping)
              for (info, i) in self._mapping.items():
                  self._inv_mapping[i] = info
  
          if f_id < len(self._mapping):
              (fname, fval, label) = self._inv_mapping[f_id]
-            return '%s==%r and label is %r' % (fname, fval, label)
+            return "%s==%r and label is %r" % (fname, fval, label)
          elif self._alwayson and f_id in self._alwayson.values():
              for (label, f_id2) in self._alwayson.items():
                  if f_id == f_id2:
-                    return 'label is %r' % label
+                    return "label is %r" % label
          elif self._unseen and f_id in self._unseen.values():
              for (fname, f_id2) in self._unseen.items():
                  if f_id == f_id2:
-                    return '%s is unseen' % fname
+                    return "%s is unseen" % fname
          else:
-            raise ValueError('Bad feature id')
+            raise ValueError("Bad feature id")
  
      def labels(self):
          # Inherit docs.
@@ -949,13 +1000,13 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
          :param options: Extra parameters for the constructor, such as
              ``unseen_features`` and ``alwayson_features``.
          """
-        mapping = {}              # maps (fname, fval, label) -> fid
-        seen_labels = set()       # The set of labels we've encountered
+        mapping = {}  # maps (fname, fval, label) -> fid
+        seen_labels = set()  # The set of labels we've encountered
          count = defaultdict(int)  # maps (fname, fval) -> count
  
          for (tok, label) in train_toks:
              if labels and label not in labels:
-                raise ValueError('Unexpected label %s' % label)
+                raise ValueError("Unexpected label %s" % label)
              seen_labels.add(label)
  
              # Record each of the features.
@@ -975,14 +1026,14 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI):
          return cls(labels, mapping, **options)
  
  
-
-
  ######################################################################
-#{ Classifier Trainer: Generalized Iterative Scaling
+# { Classifier Trainer: Generalized Iterative Scaling
  ######################################################################
  
-def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
-                                     labels=None, **cutoffs):
+
+def train_maxent_classifier_with_gis(
+    train_toks, trace=3, encoding=None, labels=None, **cutoffs
+):
      """
      Train a new ``ConditionalExponentialClassifier``, using the given
      training samples, using the Generalized Iterative Scaling
@@ -992,21 +1043,23 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
  
      :see: ``train_maxent_classifier()`` for parameter descriptions.
      """
-    cutoffs.setdefault('max_iter', 100)
+    cutoffs.setdefault("max_iter", 100)
      cutoffchecker = CutoffChecker(cutoffs)
  
      # Construct an encoding from the training data.
      if encoding is None:
          encoding = GISEncoding.train(train_toks, labels=labels)
  
-    if not hasattr(encoding, 'C'):
-        raise TypeError('The GIS algorithm requires an encoding that '
-                        'defines C (e.g., GISEncoding).')
+    if not hasattr(encoding, "C"):
+        raise TypeError(
+            "The GIS algorithm requires an encoding that "
+            "defines C (e.g., GISEncoding)."
+        )
  
      # Cinv is the inverse of the sum of each joint feature vector.
      # This controls the learning rate: higher Cinv (or lower C) gives
      # faster learning.
-    Cinv = 1.0/encoding.C
+    Cinv = 1.0 / encoding.C
  
      # Count how many times each feature occurs in the training data.
      empirical_fcount = calculate_empirical_fcount(train_toks, encoding)
@@ -1016,7 +1069,7 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
  
      # Build the classifier.  Start with weight=0 for each attested
      # feature, and weight=-infinity for each unattested feature.
-    weights = numpy.zeros(len(empirical_fcount), 'd')
+    weights = numpy.zeros(len(empirical_fcount), "d")
      for fid in unattested:
          weights[fid] = numpy.NINF
      classifier = ConditionalExponentialClassifier(encoding, weights)
@@ -1026,11 +1079,11 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
      del empirical_fcount
  
      if trace > 0:
-        print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
+        print("  ==> Training (%d iterations)" % cutoffs["max_iter"])
      if trace > 2:
          print()
-        print('      Iteration    Log Likelihood    Accuracy')
-        print('      ---------------------------------------')
+        print("      Iteration    Log Likelihood    Accuracy")
+        print("      ---------------------------------------")
  
      # Train the classifier.
      try:
@@ -1039,12 +1092,13 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
                  ll = cutoffchecker.ll or log_likelihood(classifier, train_toks)
                  acc = cutoffchecker.acc or accuracy(classifier, train_toks)
                  iternum = cutoffchecker.iter
-                print('     %9d    %14.5f    %9.3f' % (iternum, ll, acc))
+                print("     %9d    %14.5f    %9.3f" % (iternum, ll, acc))
  
              # Use the model to estimate the number of times each
              # feature should occur in the training data.
              estimated_fcount = calculate_estimated_fcount(
-                classifier, train_toks, encoding)
+                classifier, train_toks, encoding
+            )
  
              # Take the log of estimated fcount (avoid taking log(0).)
              for fid in unattested:
@@ -1062,20 +1116,21 @@ def train_maxent_classifier_with_gis(train_toks, trace=3, encoding=None,
                  break
  
      except KeyboardInterrupt:
-        print('      Training stopped: keyboard interrupt')
+        print("      Training stopped: keyboard interrupt")
      except:
          raise
  
      if trace > 2:
          ll = log_likelihood(classifier, train_toks)
          acc = accuracy(classifier, train_toks)
-        print('         Final    %14.5f    %9.3f' % (ll, acc))
+        print("         Final    %14.5f    %9.3f" % (ll, acc))
  
-# Return the classifier.
+    # Return the classifier.
      return classifier
  
+
  def calculate_empirical_fcount(train_toks, encoding):
-    fcount = numpy.zeros(encoding.length(), 'd')
+    fcount = numpy.zeros(encoding.length(), "d")
  
      for tok, label in train_toks:
          for (index, val) in encoding.encode(tok, label):
@@ -1083,25 +1138,28 @@ def calculate_empirical_fcount(train_toks, encoding):
  
      return fcount
  
+
  def calculate_estimated_fcount(classifier, train_toks, encoding):
-    fcount = numpy.zeros(encoding.length(), 'd')
+    fcount = numpy.zeros(encoding.length(), "d")
  
      for tok, label in train_toks:
          pdist = classifier.prob_classify(tok)
          for label in pdist.samples():
              prob = pdist.prob(label)
              for (fid, fval) in encoding.encode(tok, label):
-                fcount[fid] += prob*fval
+                fcount[fid] += prob * fval
  
      return fcount
  
  
  ######################################################################
-#{ Classifier Trainer: Improved Iterative Scaling
+# { Classifier Trainer: Improved Iterative Scaling
  ######################################################################
  
-def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
-                                     labels=None, **cutoffs):
+
+def train_maxent_classifier_with_iis(
+    train_toks, trace=3, encoding=None, labels=None, **cutoffs
+):
      """
      Train a new ``ConditionalExponentialClassifier``, using the given
      training samples, using the Improved Iterative Scaling algorithm.
@@ -1111,7 +1169,7 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
  
      :see: ``train_maxent_classifier()`` for parameter descriptions.
      """
-    cutoffs.setdefault('max_iter', 100)
+    cutoffs.setdefault("max_iter", 100)
      cutoffchecker = CutoffChecker(cutoffs)
  
      # Construct an encoding from the training data.
@@ -1119,8 +1177,7 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
          encoding = BinaryMaxentFeatureEncoding.train(train_toks, labels=labels)
  
      # Count how many times each feature occurs in the training data.
-    empirical_ffreq = (calculate_empirical_fcount(train_toks, encoding) /
-                       len(train_toks))
+    empirical_ffreq = calculate_empirical_fcount(train_toks, encoding) / len(train_toks)
  
      # Find the nf map, and related variables nfarray and nfident.
      # nf is the sum of the features for a given labeled text.
@@ -1128,7 +1185,7 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
      # nfarray performs the reverse operation.  nfident is
      # nfarray multiplied by an identity matrix.
      nfmap = calculate_nfmap(train_toks, encoding)
-    nfarray = numpy.array(sorted(nfmap, key=nfmap.__getitem__), 'd')
+    nfarray = numpy.array(sorted(nfmap, key=nfmap.__getitem__), "d")
      nftranspose = numpy.reshape(nfarray, (len(nfarray), 1))
  
      # Check for any features that are not attested in train_toks.
@@ -1136,17 +1193,17 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
  
      # Build the classifier.  Start with weight=0 for each attested
      # feature, and weight=-infinity for each unattested feature.
-    weights = numpy.zeros(len(empirical_ffreq), 'd')
+    weights = numpy.zeros(len(empirical_ffreq), "d")
      for fid in unattested:
          weights[fid] = numpy.NINF
      classifier = ConditionalExponentialClassifier(encoding, weights)
  
      if trace > 0:
-        print('  ==> Training (%d iterations)' % cutoffs['max_iter'])
+        print("  ==> Training (%d iterations)" % cutoffs["max_iter"])
      if trace > 2:
          print()
-        print('      Iteration    Log Likelihood    Accuracy')
-        print('      ---------------------------------------')
+        print("      Iteration    Log Likelihood    Accuracy")
+        print("      ---------------------------------------")
  
      # Train the classifier.
      try:
@@ -1155,12 +1212,19 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
                  ll = cutoffchecker.ll or log_likelihood(classifier, train_toks)
                  acc = cutoffchecker.acc or accuracy(classifier, train_toks)
                  iternum = cutoffchecker.iter
-                print('     %9d    %14.5f    %9.3f' % (iternum, ll, acc))
+                print("     %9d    %14.5f    %9.3f" % (iternum, ll, acc))
  
              # Calculate the deltas for this iteration, using Newton's method.
              deltas = calculate_deltas(
-                train_toks, classifier, unattested, empirical_ffreq,
-                nfmap, nfarray, nftranspose, encoding)
+                train_toks,
+                classifier,
+                unattested,
+                empirical_ffreq,
+                nfmap,
+                nfarray,
+                nftranspose,
+                encoding,
+            )
  
              # Use the deltas to update our weights.
              weights = classifier.weights()
@@ -1172,19 +1236,19 @@ def train_maxent_classifier_with_iis(train_toks, trace=3, encoding=None,
                  break
  
      except KeyboardInterrupt:
-        print('      Training stopped: keyboard interrupt')
+        print("      Training stopped: keyboard interrupt")
      except:
          raise
  
-
      if trace > 2:
          ll = log_likelihood(classifier, train_toks)
          acc = accuracy(classifier, train_toks)
-        print('         Final    %14.5f    %9.3f' % (ll, acc))
+        print("         Final    %14.5f    %9.3f" % (ll, acc))
  
      # Return the classifier.
      return classifier
  
+
  def calculate_nfmap(train_toks, encoding):
      """
      Construct a map that can be used to compress ``nf`` (which is
@@ -1212,8 +1276,17 @@ def calculate_nfmap(train_toks, encoding):
              nfset.add(sum(val for (id, val) in encoding.encode(tok, label)))
      return dict((nf, i) for (i, nf) in enumerate(nfset))
  
-def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
-                     nfmap, nfarray, nftranspose, encoding):
+
+def calculate_deltas(
+    train_toks,
+    classifier,
+    unattested,
+    ffreq_empirical,
+    nfmap,
+    nfarray,
+    nftranspose,
+    encoding,
+):
      """
      Calculate the update values for the classifier weights for
      this iteration of IIS.  These update weights are the value of
@@ -1280,12 +1353,12 @@ def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
      NEWTON_CONVERGE = 1e-12
      MAX_NEWTON = 300
  
-    deltas = numpy.ones(encoding.length(), 'd')
+    deltas = numpy.ones(encoding.length(), "d")
  
      # Precompute the A matrix:
      # A[nf][id] = sum ( p(fs) * p(label|fs) * f(fs,label) )
      # over all label,fs s.t. num_features[label,fs]=nf
-    A = numpy.zeros((len(nfmap), encoding.length()), 'd')
+    A = numpy.zeros((len(nfmap), encoding.length()), "d")
  
      for tok, label in train_toks:
          dist = classifier.prob_classify(tok)
@@ -1323,24 +1396,24 @@ def calculate_deltas(train_toks, classifier, unattested, ffreq_empirical,
          deltas -= (ffreq_empirical - sum1) / -sum2
  
          # We can stop once we converge.
-        n_error = (numpy.sum(abs((ffreq_empirical-sum1)))/
-                   numpy.sum(abs(deltas)))
+        n_error = numpy.sum(abs((ffreq_empirical - sum1))) / numpy.sum(abs(deltas))
          if n_error < NEWTON_CONVERGE:
              return deltas
  
      return deltas
  
+
  ######################################################################
-#{ Classifier Trainer: megam
+# { Classifier Trainer: megam
  ######################################################################
  
  # [xx] possible extension: add support for using implicit file format;
  # this would need to put requirements on what encoding is used.  But
  # we may need this for other maxent classifier trainers that require
  # implicit formats anyway.
-def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
-                                       labels=None, gaussian_prior_sigma=0,
-                                       **kwargs):
+def train_maxent_classifier_with_megam(
+    train_toks, trace=3, encoding=None, labels=None, gaussian_prior_sigma=0, **kwargs
+):
      """
      Train a new ``ConditionalExponentialClassifier``, using the given
      training samples, using the external ``megam`` library.  This
@@ -1354,65 +1427,66 @@ def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
  
      explicit = True
      bernoulli = True
-    if 'explicit' in kwargs:
-        explicit = kwargs['explicit']
-    if 'bernoulli' in kwargs:
-        bernoulli = kwargs['bernoulli']
+    if "explicit" in kwargs:
+        explicit = kwargs["explicit"]
+    if "bernoulli" in kwargs:
+        bernoulli = kwargs["bernoulli"]
  
      # Construct an encoding from the training data.
      if encoding is None:
          # Count cutoff can also be controlled by megam with the -minfc
          # option. Not sure where the best place for it is.
-        count_cutoff = kwargs.get('count_cutoff', 0)
-        encoding = BinaryMaxentFeatureEncoding.train(train_toks, count_cutoff,
-                                                     labels=labels,
-                                                     alwayson_features=True)
+        count_cutoff = kwargs.get("count_cutoff", 0)
+        encoding = BinaryMaxentFeatureEncoding.train(
+            train_toks, count_cutoff, labels=labels, alwayson_features=True
+        )
      elif labels is not None:
-        raise ValueError('Specify encoding or labels, not both')
+        raise ValueError("Specify encoding or labels, not both")
  
      # Write a training file for megam.
      try:
-        fd, trainfile_name = tempfile.mkstemp(prefix='nltk-')
-        with open(trainfile_name, 'w') as trainfile:
-            write_megam_file(train_toks, encoding, trainfile,
-                             explicit=explicit, bernoulli=bernoulli)
+        fd, trainfile_name = tempfile.mkstemp(prefix="nltk-")
+        with open(trainfile_name, "w") as trainfile:
+            write_megam_file(
+                train_toks, encoding, trainfile, explicit=explicit, bernoulli=bernoulli
+            )
          os.close(fd)
      except (OSError, IOError, ValueError) as e:
-        raise ValueError('Error while creating megam training file: %s' % e)
+        raise ValueError("Error while creating megam training file: %s" % e)
  
      # Run megam on the training file.
      options = []
-    options += ['-nobias', '-repeat', '10']
+    options += ["-nobias", "-repeat", "10"]
      if explicit:
-        options += ['-explicit']
+        options += ["-explicit"]
      if not bernoulli:
-        options += ['-fvals']
+        options += ["-fvals"]
      if gaussian_prior_sigma:
          # Lambda is just the precision of the Gaussian prior, i.e. it's the
          # inverse variance, so the parameter conversion is 1.0/sigma**2.
          # See http://www.umiacs.umd.edu/~hal/docs/daume04cg-bfgs.pdf.
-        inv_variance = 1.0 / gaussian_prior_sigma**2
+        inv_variance = 1.0 / gaussian_prior_sigma ** 2
      else:
          inv_variance = 0
-    options += ['-lambda', '%.2f' % inv_variance, '-tune']
+    options += ["-lambda", "%.2f" % inv_variance, "-tune"]
      if trace < 3:
-        options += ['-quiet']
-    if 'max_iter' in kwargs:
-        options += ['-maxi', '%s' % kwargs['max_iter']]
-    if 'll_delta' in kwargs:
+        options += ["-quiet"]
+    if "max_iter" in kwargs:
+        options += ["-maxi", "%s" % kwargs["max_iter"]]
+    if "ll_delta" in kwargs:
          # [xx] this is actually a perplexity delta, not a log
          # likelihood delta
-        options += ['-dpp', '%s' % abs(kwargs['ll_delta'])]
-    if hasattr(encoding, 'cost'):
-        options += ['-multilabel']  # each possible la
-    options += ['multiclass', trainfile_name]
+        options += ["-dpp", "%s" % abs(kwargs["ll_delta"])]
+    if hasattr(encoding, "cost"):
+        options += ["-multilabel"]  # each possible la
+    options += ["multiclass", trainfile_name]
      stdout = call_megam(options)
-    # print './megam_i686.opt ', ' '.join(options)
+    # print('./megam_i686.opt ', ' '.join(options))
      # Delete the training file
      try:
          os.remove(trainfile_name)
      except (OSError, IOError) as e:
-        print('Warning: unable to delete %s: %s' % (trainfile_name, e))
+        print("Warning: unable to delete %s: %s" % (trainfile_name, e))
  
      # Parse the generated weight vector.
      weights = parse_megam_weights(stdout, encoding.length(), explicit)
@@ -1423,56 +1497,58 @@ def train_maxent_classifier_with_megam(train_toks, trace=3, encoding=None,
      # Build the classifier
      return MaxentClassifier(encoding, weights)
  
+
  ######################################################################
-#{ Classifier Trainer: tadm
+# { Classifier Trainer: tadm
  ######################################################################
  
+
  class TadmMaxentClassifier(MaxentClassifier):
      @classmethod
      def train(cls, train_toks, **kwargs):
-        algorithm = kwargs.get('algorithm', 'tao_lmvm')
-        trace = kwargs.get('trace', 3)
-        encoding = kwargs.get('encoding', None)
-        labels = kwargs.get('labels', None)
-        sigma = kwargs.get('gaussian_prior_sigma', 0)
-        count_cutoff = kwargs.get('count_cutoff', 0)
-        max_iter = kwargs.get('max_iter')
-        ll_delta = kwargs.get('min_lldelta')
+        algorithm = kwargs.get("algorithm", "tao_lmvm")
+        trace = kwargs.get("trace", 3)
+        encoding = kwargs.get("encoding", None)
+        labels = kwargs.get("labels", None)
+        sigma = kwargs.get("gaussian_prior_sigma", 0)
+        count_cutoff = kwargs.get("count_cutoff", 0)
+        max_iter = kwargs.get("max_iter")
+        ll_delta = kwargs.get("min_lldelta")
  
          # Construct an encoding from the training data.
          if not encoding:
-            encoding = TadmEventMaxentFeatureEncoding.train(train_toks,
-                                                            count_cutoff,
-                                                            labels=labels)
+            encoding = TadmEventMaxentFeatureEncoding.train(
+                train_toks, count_cutoff, labels=labels
+            )
  
-        trainfile_fd, trainfile_name = \
-            tempfile.mkstemp(prefix='nltk-tadm-events-', suffix='.gz')
-        weightfile_fd, weightfile_name = \
-            tempfile.mkstemp(prefix='nltk-tadm-weights-')
+        trainfile_fd, trainfile_name = tempfile.mkstemp(
+            prefix="nltk-tadm-events-", suffix=".gz"
+        )
+        weightfile_fd, weightfile_name = tempfile.mkstemp(prefix="nltk-tadm-weights-")
  
-        trainfile = gzip_open_unicode(trainfile_name, 'w')
+        trainfile = gzip_open_unicode(trainfile_name, "w")
          write_tadm_file(train_toks, encoding, trainfile)
          trainfile.close()
  
          options = []
-        options.extend(['-monitor'])
-        options.extend(['-method', algorithm])
+        options.extend(["-monitor"])
+        options.extend(["-method", algorithm])
          if sigma:
-            options.extend(['-l2', '%.6f' % sigma**2])
+            options.extend(["-l2", "%.6f" % sigma ** 2])
          if max_iter:
-            options.extend(['-max_it', '%d' % max_iter])
+            options.extend(["-max_it", "%d" % max_iter])
          if ll_delta:
-            options.extend(['-fatol', '%.6f' % abs(ll_delta)])
-        options.extend(['-events_in', trainfile_name])
-        options.extend(['-params_out', weightfile_name])
+            options.extend(["-fatol", "%.6f" % abs(ll_delta)])
+        options.extend(["-events_in", trainfile_name])
+        options.extend(["-params_out", weightfile_name])
          if trace < 3:
-            options.extend(['2>&1'])
+            options.extend(["2>&1"])
          else:
-            options.extend(['-summary'])
+            options.extend(["-summary"])
  
          call_tadm(options)
  
-        with open(weightfile_name, 'r') as weightfile:
+        with open(weightfile_name, "r") as weightfile:
              weights = parse_tadm_weights(weightfile)
  
          os.remove(trainfile_name)
@@ -1484,12 +1560,15 @@ class TadmMaxentClassifier(MaxentClassifier):
          # Build the classifier
          return cls(encoding, weights)
  
+
  ######################################################################
-#{ Demo
+# { Demo
  ######################################################################
  def demo():
      from nltk.classify.util import names_demo
+
      classifier = names_demo(MaxentClassifier.train)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/classify/maxent.pyc b/nlp_resource_data/nltk/classify/maxent.pyc

deleted file mode 100755 (executable)

index 51f13dc..0000000

Binary files a/nlp_resource_data/nltk/classify/maxent.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/megam.py b/nlp_resource_data/nltk/classify/megam.py

old mode 100755 (executable)

new mode 100644 (file)

index 2db484d..6a80b7d
--- a/nlp_resource_data/nltk/classify/megam.py
+++ b/nlp_resource_data/nltk/classify/megam.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Interface to Megam Classifier
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -22,24 +22,22 @@ for details.
  
  .. _megam: http://www.umiacs.umd.edu/~hal/megam/index.html
  """
-from __future__ import print_function
-
  import subprocess
  
-from six import string_types
-
-from nltk import compat
  from nltk.internals import find_binary
+
  try:
      import numpy
  except ImportError:
      numpy = None
  
  ######################################################################
-#{ Configuration
+# { Configuration
  ######################################################################
  
  _megam_bin = None
+
+
  def config_megam(bin=None):
      """
      Configure NLTK's interface to the ``megam`` maxent optimization
@@ -52,17 +50,20 @@ def config_megam(bin=None):
      """
      global _megam_bin
      _megam_bin = find_binary(
-        'megam', bin,
-        env_vars=['MEGAM'],
-        binary_names=['megam.opt', 'megam', 'megam_686', 'megam_i686.opt'],
-        url='http://www.umiacs.umd.edu/~hal/megam/index.html')
+        "megam",
+        bin,
+        env_vars=["MEGAM"],
+        binary_names=["megam.opt", "megam", "megam_686", "megam_i686.opt"],
+        url="http://www.umiacs.umd.edu/~hal/megam/index.html",
+    )
+
  
  ######################################################################
-#{ Megam Interface Functions
+# { Megam Interface Functions
  ######################################################################
  
-def write_megam_file(train_toks, encoding, stream,
-                     bernoulli=True, explicit=True):
+
+def write_megam_file(train_toks, encoding, stream, bernoulli=True, explicit=True):
      """
      Generate an input file for ``megam`` based on the given corpus of
      classified tokens.
@@ -99,28 +100,28 @@ def write_megam_file(train_toks, encoding, stream,
      # Write the file, which contains one line per instance.
      for featureset, label in train_toks:
          # First, the instance number (or, in the weighted multiclass case, the cost of each label).
-        if hasattr(encoding, 'cost'):
-            stream.write(':'.join(str(encoding.cost(featureset, label, l))
-                                  for l in labels))
+        if hasattr(encoding, "cost"):
+            stream.write(
+                ":".join(str(encoding.cost(featureset, label, l)) for l in labels)
+            )
          else:
-            stream.write('%d' % labelnum[label])
+            stream.write("%d" % labelnum[label])
  
          # For implicit file formats, just list the features that fire
          # for this instance's actual label.
          if not explicit:
-            _write_megam_features(encoding.encode(featureset, label),
-                                  stream, bernoulli)
+            _write_megam_features(encoding.encode(featureset, label), stream, bernoulli)
  
          # For explicit formats, list the features that would fire for
          # any of the possible labels.
          else:
              for l in labels:
-                stream.write(' #')
-                _write_megam_features(encoding.encode(featureset, l),
-                                      stream, bernoulli)
+                stream.write(" #")
+                _write_megam_features(encoding.encode(featureset, l), stream, bernoulli)
  
          # End of the instance.
-        stream.write('\n')
+        stream.write("\n")
+
  
  def parse_megam_weights(s, features_count, explicit=True):
      """
@@ -129,36 +130,40 @@ def parse_megam_weights(s, features_count, explicit=True):
      vector.  This function does not currently handle bias features.
      """
      if numpy is None:
-        raise ValueError('This function requires that numpy be installed')
-    assert explicit, 'non-explicit not supported yet'
-    lines = s.strip().split('\n')
-    weights = numpy.zeros(features_count, 'd')
+        raise ValueError("This function requires that numpy be installed")
+    assert explicit, "non-explicit not supported yet"
+    lines = s.strip().split("\n")
+    weights = numpy.zeros(features_count, "d")
      for line in lines:
          if line.strip():
              fid, weight = line.split()
              weights[int(fid)] = float(weight)
      return weights
  
+
  def _write_megam_features(vector, stream, bernoulli):
      if not vector:
-        raise ValueError('MEGAM classifier requires the use of an '
-                         'always-on feature.')
+        raise ValueError(
+            "MEGAM classifier requires the use of an " "always-on feature."
+        )
      for (fid, fval) in vector:
          if bernoulli:
              if fval == 1:
-                stream.write(' %s' % fid)
+                stream.write(" %s" % fid)
              elif fval != 0:
-                raise ValueError('If bernoulli=True, then all'
-                                 'features must be binary.')
+                raise ValueError(
+                    "If bernoulli=True, then all" "features must be binary."
+                )
          else:
-            stream.write(' %s %s' % (fid, fval))
+            stream.write(" %s %s" % (fid, fval))
+
  
  def call_megam(args):
      """
      Call the ``megam`` binary with the given arguments.
      """
-    if isinstance(args, string_types):
-        raise TypeError('args should be a list of strings')
+    if isinstance(args, str):
+        raise TypeError("args should be a list of strings")
      if _megam_bin is None:
          config_megam()
  
@@ -171,9 +176,9 @@ def call_megam(args):
      if p.returncode != 0:
          print()
          print(stderr)
-        raise OSError('megam command failed!')
+        raise OSError("megam command failed!")
  
-    if isinstance(stdout, string_types):
+    if isinstance(stdout, str):
          return stdout
      else:
-        return stdout.decode('utf-8')
+        return stdout.decode("utf-8")
diff --git a/nlp_resource_data/nltk/classify/megam.pyc b/nlp_resource_data/nltk/classify/megam.pyc

deleted file mode 100755 (executable)

index 8eef864..0000000

Binary files a/nlp_resource_data/nltk/classify/megam.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/naivebayes.py b/nlp_resource_data/nltk/classify/naivebayes.py

old mode 100755 (executable)

new mode 100644 (file)

index b547a7a..abfed1a
--- a/nlp_resource_data/nltk/classify/naivebayes.py
+++ b/nlp_resource_data/nltk/classify/naivebayes.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Naive Bayes Classifiers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -29,7 +29,6 @@ sum to one:
  |  P(label|features) = --------------------------------------------
  |                        SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) )
  """
-from __future__ import print_function, unicode_literals
  
  from collections import defaultdict
  
@@ -40,6 +39,7 @@ from nltk.classify.api import ClassifierI
  ##  Naive Bayes Classifier
  ##//////////////////////////////////////////////////////
  
+
  class NaiveBayesClassifier(ClassifierI):
      """
      A Naive Bayes classifier.  Naive Bayes classifiers are
@@ -60,6 +60,7 @@ class NaiveBayesClassifier(ClassifierI):
      you generally should not use 'None' as a feature value for one of
      your own features.
      """
+
      def __init__(self, label_probdist, feature_probdist):
          """
          :param label_probdist: P(label), the probability distribution
@@ -97,7 +98,7 @@ class NaiveBayesClassifier(ClassifierI):
                  if (label, fname) in self._feature_probdist:
                      break
              else:
-                #print 'Ignoring unseen feature %s' % fname
+                # print('Ignoring unseen feature %s' % fname)
                  del featureset[fname]
  
          # Find the log probabilty of each label, given the features.
@@ -116,33 +117,41 @@ class NaiveBayesClassifier(ClassifierI):
                      # nb: This case will never come up if the
                      # classifier was created by
                      # NaiveBayesClassifier.train().
-                    logprob[label] += sum_logs([]) # = -INF.
+                    logprob[label] += sum_logs([])  # = -INF.
  
          return DictionaryProbDist(logprob, normalize=True, log=True)
  
      def show_most_informative_features(self, n=10):
          # Determine the most relevant features, and display them.
          cpdist = self._feature_probdist
-        print('Most Informative Features')
+        print("Most Informative Features")
  
          for (fname, fval) in self.most_informative_features(n):
+
              def labelprob(l):
                  return cpdist[l, fname].prob(fval)
  
-            labels = sorted([l for l in self._labels
-                             if fval in cpdist[l, fname].samples()],
-                            key=labelprob)
+            labels = sorted(
+                [l for l in self._labels if fval in cpdist[l, fname].samples()],
+                key=lambda element: (-labelprob(element), element),
+                reverse=True
+            )
              if len(labels) == 1:
                  continue
              l0 = labels[0]
              l1 = labels[-1]
              if cpdist[l0, fname].prob(fval) == 0:
-                ratio = 'INF'
+                ratio = "INF"
              else:
-                ratio = '%8.1f' % (cpdist[l1, fname].prob(fval) /
-                                   cpdist[l0, fname].prob(fval))
-            print(('%24s = %-14r %6s : %-6s = %s : 1.0' %
-                   (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio)))
+                ratio = "%8.1f" % (
+                    cpdist[l1, fname].prob(fval) / cpdist[l0, fname].prob(fval)
+                )
+            print(
+                (
+                    "%24s = %-14r %6s : %-6s = %s : 1.0"
+                    % (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio)
+                )
+            )
  
      def most_informative_features(self, n=100):
          """
@@ -154,29 +163,33 @@ class NaiveBayesClassifier(ClassifierI):
  
          |  max[ P(fname=fval|label1) / P(fname=fval|label2) ]
          """
-        # The set of (fname, fval) pairs used by this classifier.
-        features = set()
-        # The max & min probability associated w/ each (fname, fval)
-        # pair.  Maps (fname,fval) -> float.
-        maxprob = defaultdict(lambda: 0.0)
-        minprob = defaultdict(lambda: 1.0)
-
-        for (label, fname), probdist in self._feature_probdist.items():
-            for fval in probdist.samples():
-                feature = (fname, fval)
-                features.add(feature)
-                p = probdist.prob(fval)
-                maxprob[feature] = max(p, maxprob[feature])
-                minprob[feature] = min(p, minprob[feature])
-                if minprob[feature] == 0:
-                    features.discard(feature)
-
-        # Convert features to a list, & sort it by how informative
-        # features are.
-        features = sorted(features,
-                          key=lambda feature_:
-                          minprob[feature_]/maxprob[feature_])
-        return features[:n]
+        if hasattr(self, "_most_informative_features"):
+            return self._most_informative_features[:n]
+        else:
+            # The set of (fname, fval) pairs used by this classifier.
+            features = set()
+            # The max & min probability associated w/ each (fname, fval)
+            # pair.  Maps (fname,fval) -> float.
+            maxprob = defaultdict(lambda: 0.0)
+            minprob = defaultdict(lambda: 1.0)
+
+            for (label, fname), probdist in self._feature_probdist.items():
+                for fval in probdist.samples():
+                    feature = (fname, fval)
+                    features.add(feature)
+                    p = probdist.prob(fval)
+                    maxprob[feature] = max(p, maxprob[feature])
+                    minprob[feature] = min(p, minprob[feature])
+                    if minprob[feature] == 0:
+                        features.discard(feature)
+
+            # Convert features to a list, & sort it by how informative
+            # features are.
+            self._most_informative_features = sorted(
+                features, key=lambda feature_: (minprob[feature_] / maxprob[feature_], feature_[0],
+                                                feature_[1] in [None, False, True], str(feature_[1]).lower())
+            )
+        return self._most_informative_features[:n]
  
      @classmethod
      def train(cls, labeled_featuresets, estimator=ELEProbDist):
@@ -227,16 +240,18 @@ class NaiveBayesClassifier(ClassifierI):
  
          return cls(label_probdist, feature_probdist)
  
+
  ##//////////////////////////////////////////////////////
  ##  Demo
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      from nltk.classify.util import names_demo
+
      classifier = names_demo(NaiveBayesClassifier.train)
      classifier.show_most_informative_features()
  
-if __name__ == '__main__':
-    demo()
-
  
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/classify/naivebayes.pyc b/nlp_resource_data/nltk/classify/naivebayes.pyc

deleted file mode 100755 (executable)

index 3a7aa44..0000000

Binary files a/nlp_resource_data/nltk/classify/naivebayes.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/positivenaivebayes.py b/nlp_resource_data/nltk/classify/positivenaivebayes.py

old mode 100755 (executable)

new mode 100644 (file)

index c8f5511..58621f1
--- a/nlp_resource_data/nltk/classify/positivenaivebayes.py
+++ b/nlp_resource_data/nltk/classify/positivenaivebayes.py
@@ -59,8 +59,8 @@ The features of a sentence are simply the words it contains:
  
  We use the sports sentences as positive examples, the mixed ones ad unlabeled examples:
  
-    >>> positive_featuresets = list(map(features, sports_sentences))
-    >>> unlabeled_featuresets = list(map(features, various_sentences))
+    >>> positive_featuresets = map(features, sports_sentences)
+    >>> unlabeled_featuresets = map(features, various_sentences)
      >>> classifier = PositiveNaiveBayesClassifier.train(positive_featuresets,
      ...                                                 unlabeled_featuresets)
  
@@ -85,15 +85,20 @@ from nltk.classify.naivebayes import NaiveBayesClassifier
  ##  Positive Naive Bayes Classifier
  ##//////////////////////////////////////////////////////
  
+
  class PositiveNaiveBayesClassifier(NaiveBayesClassifier):
      @staticmethod
-    def train(positive_featuresets, unlabeled_featuresets, positive_prob_prior=0.5,
-              estimator=ELEProbDist):
+    def train(
+        positive_featuresets,
+        unlabeled_featuresets,
+        positive_prob_prior=0.5,
+        estimator=ELEProbDist,
+    ):
          """
-        :param positive_featuresets: A list of featuresets that are known as positive
+        :param positive_featuresets: An iterable of featuresets that are known as positive
              examples (i.e., their label is ``True``).
  
-        :param unlabeled_featuresets: A list of featuresets whose label is unknown.
+        :param unlabeled_featuresets: An iterable of featuresets whose label is unknown.
  
          :param positive_prob_prior: A prior estimate of the probability of the label
              ``True`` (default 0.5).
@@ -104,28 +109,30 @@ class PositiveNaiveBayesClassifier(NaiveBayesClassifier):
          fnames = set()
  
          # Count up how many times each feature value occurred in positive examples.
+        num_positive_examples = 0
          for featureset in positive_featuresets:
              for fname, fval in featureset.items():
                  positive_feature_freqdist[fname][fval] += 1
                  feature_values[fname].add(fval)
                  fnames.add(fname)
+            num_positive_examples += 1
  
          # Count up how many times each feature value occurred in unlabeled examples.
+        num_unlabeled_examples = 0
          for featureset in unlabeled_featuresets:
              for fname, fval in featureset.items():
                  unlabeled_feature_freqdist[fname][fval] += 1
                  feature_values[fname].add(fval)
                  fnames.add(fname)
+            num_unlabeled_examples += 1
  
          # If a feature didn't have a value given for an instance, then we assume that
          # it gets the implicit value 'None'.
-        num_positive_examples = len(positive_featuresets)
          for fname in fnames:
              count = positive_feature_freqdist[fname].N()
              positive_feature_freqdist[fname][None] += num_positive_examples - count
              feature_values[fname].add(None)
  
-        num_unlabeled_examples = len(unlabeled_featuresets)
          for fname in fnames:
              count = unlabeled_feature_freqdist[fname].N()
              unlabeled_feature_freqdist[fname][None] += num_unlabeled_examples - count
@@ -134,8 +141,9 @@ class PositiveNaiveBayesClassifier(NaiveBayesClassifier):
          negative_prob_prior = 1.0 - positive_prob_prior
  
          # Create the P(label) distribution.
-        label_probdist = DictionaryProbDist({True: positive_prob_prior,
-                                             False: negative_prob_prior})
+        label_probdist = DictionaryProbDist(
+            {True: positive_prob_prior, False: negative_prob_prior}
+        )
  
          # Create the P(fval|label, fname) distribution.
          feature_probdist = {}
@@ -147,24 +155,27 @@ class PositiveNaiveBayesClassifier(NaiveBayesClassifier):
              global_probdist = estimator(freqdist, bins=len(feature_values[fname]))
              negative_feature_probs = {}
              for fval in feature_values[fname]:
-                prob = (global_probdist.prob(fval)
-                        - positive_prob_prior *
-                        feature_probdist[True, fname].prob(fval)) \
-                        / negative_prob_prior
+                prob = (
+                    global_probdist.prob(fval)
+                    - positive_prob_prior * feature_probdist[True, fname].prob(fval)
+                ) / negative_prob_prior
                  # TODO: We need to add some kind of smoothing here, instead of
                  # setting negative probabilities to zero and normalizing.
                  negative_feature_probs[fval] = max(prob, 0.0)
-            feature_probdist[False, fname] = DictionaryProbDist(negative_feature_probs,
-                                                                normalize=True)
+            feature_probdist[False, fname] = DictionaryProbDist(
+                negative_feature_probs, normalize=True
+            )
  
          return PositiveNaiveBayesClassifier(label_probdist, feature_probdist)
  
+
  ##//////////////////////////////////////////////////////
  ##  Demo
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      from nltk.classify.util import partial_names_demo
+
      classifier = partial_names_demo(PositiveNaiveBayesClassifier.train)
      classifier.show_most_informative_features()
-
diff --git a/nlp_resource_data/nltk/classify/positivenaivebayes.pyc b/nlp_resource_data/nltk/classify/positivenaivebayes.pyc

deleted file mode 100755 (executable)

index 92faaf5..0000000

Binary files a/nlp_resource_data/nltk/classify/positivenaivebayes.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/rte_classify.py b/nlp_resource_data/nltk/classify/rte_classify.py

old mode 100755 (executable)

new mode 100644 (file)

index f396d23..0be8c81
--- a/nlp_resource_data/nltk/classify/rte_classify.py
+++ b/nlp_resource_data/nltk/classify/rte_classify.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: RTE Classifier
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -16,17 +16,18 @@ the hypothesis is more informative than (i.e not entailed by) the text.
  TO DO: better Named Entity classification
  TO DO: add lemmatization
  """
-from __future__ import print_function
  
  from nltk.tokenize import RegexpTokenizer
  from nltk.classify.util import accuracy, check_megam_config
  from nltk.classify.maxent import MaxentClassifier
  
+
  class RTEFeatureExtractor(object):
      """
      This builds a bag of words for both the text and the hypothesis after
      throwing away some stopwords, then calculates overlap and difference.
      """
+
      def __init__(self, rtepair, stop=True, use_lemmatize=False):
          """
          :param rtepair: a ``RTEPair`` from which features should be extracted
@@ -34,16 +35,32 @@ class RTEFeatureExtractor(object):
          :type stop: bool
          """
          self.stop = stop
-        self.stopwords = set(['a', 'the', 'it', 'they', 'of', 'in', 'to', 'is',
-                              'have', 'are', 'were', 'and', 'very', '.', ','])
-
-        self.negwords = set(['no', 'not', 'never', 'failed', 'rejected',
-                             'denied'])
+        self.stopwords = set(
+            [
+                "a",
+                "the",
+                "it",
+                "they",
+                "of",
+                "in",
+                "to",
+                "is",
+                "have",
+                "are",
+                "were",
+                "and",
+                "very",
+                ".",
+                ",",
+            ]
+        )
+
+        self.negwords = set(["no", "not", "never", "failed", "rejected", "denied"])
          # Try to tokenize so that abbreviations, monetary amounts, email
          # addresses, URLs are single tokens.
-        tokenizer = RegexpTokenizer('[\w.@:/]+|\w+|\$[\d.]+')
+        tokenizer = RegexpTokenizer("[\w.@:/]+|\w+|\$[\d.]+")
  
-        #Get the set of word types for text and hypothesis
+        # Get the set of word types for text and hypothesis
          self.text_tokens = tokenizer.tokenize(rtepair.text)
          self.hyp_tokens = tokenizer.tokenize(rtepair.hyp)
          self.text_words = set(self.text_tokens)
@@ -61,7 +78,6 @@ class RTEFeatureExtractor(object):
          self._hyp_extra = self.hyp_words - self.text_words
          self._txt_extra = self.text_words - self.hyp_words
  
-
      def overlap(self, toktype, debug=False):
          """
          Compute the overlap between text and hypothesis.
@@ -70,11 +86,11 @@ class RTEFeatureExtractor(object):
          :type toktype: 'ne' or 'word'
          """
          ne_overlap = set(token for token in self._overlap if self._ne(token))
-        if toktype == 'ne':
+        if toktype == "ne":
              if debug:
                  print("ne overlap", ne_overlap)
              return ne_overlap
-        elif toktype == 'word':
+        elif toktype == "word":
              if debug:
                  print("word overlap", self._overlap - ne_overlap)
              return self._overlap - ne_overlap
@@ -89,9 +105,9 @@ class RTEFeatureExtractor(object):
          :type toktype: 'ne' or 'word'
          """
          ne_extra = set(token for token in self._hyp_extra if self._ne(token))
-        if toktype == 'ne':
+        if toktype == "ne":
              return ne_extra
-        elif toktype == 'word':
+        elif toktype == "word":
              return self._hyp_extra - ne_extra
          else:
              raise ValueError("Type not recognized: '%s'" % toktype)
@@ -122,13 +138,13 @@ class RTEFeatureExtractor(object):
  def rte_features(rtepair):
      extractor = RTEFeatureExtractor(rtepair)
      features = {}
-    features['alwayson'] = True
-    features['word_overlap'] = len(extractor.overlap('word'))
-    features['word_hyp_extra'] = len(extractor.hyp_extra('word'))
-    features['ne_overlap'] = len(extractor.overlap('ne'))
-    features['ne_hyp_extra'] = len(extractor.hyp_extra('ne'))
-    features['neg_txt'] = len(extractor.negwords & extractor.text_words)
-    features['neg_hyp'] = len(extractor.negwords & extractor.hyp_words)
+    features["alwayson"] = True
+    features["word_overlap"] = len(extractor.overlap("word"))
+    features["word_hyp_extra"] = len(extractor.hyp_extra("word"))
+    features["ne_overlap"] = len(extractor.overlap("ne"))
+    features["ne_hyp_extra"] = len(extractor.hyp_extra("ne"))
+    features["neg_txt"] = len(extractor.negwords & extractor.text_words)
+    features["neg_hyp"] = len(extractor.negwords & extractor.hyp_words)
      return features
  
  
@@ -138,23 +154,26 @@ def rte_featurize(rte_pairs):
  
  def rte_classifier(algorithm):
      from nltk.corpus import rte as rte_corpus
-    train_set  = rte_corpus.pairs(['rte1_dev.xml', 'rte2_dev.xml', 'rte3_dev.xml'])
-    test_set = rte_corpus.pairs(['rte1_test.xml', 'rte2_test.xml', 'rte3_test.xml'])
+
+    train_set = rte_corpus.pairs(["rte1_dev.xml", "rte2_dev.xml", "rte3_dev.xml"])
+    test_set = rte_corpus.pairs(["rte1_test.xml", "rte2_test.xml", "rte3_test.xml"])
      featurized_train_set = rte_featurize(train_set)
      featurized_test_set = rte_featurize(test_set)
      # Train the classifier
-    print('Training classifier...')
-    if algorithm in ['megam', 'BFGS']: # MEGAM based algorithms.
+    print("Training classifier...")
+    if algorithm in ["megam", "BFGS"]:  # MEGAM based algorithms.
          # Ensure that MEGAM is configured first.
          check_megam_config()
          clf = lambda x: MaxentClassifier.train(featurized_train_set, algorithm)
-    elif algorithm in ['GIS', 'IIS']: # Use default GIS/IIS MaxEnt algorithm
+    elif algorithm in ["GIS", "IIS"]:  # Use default GIS/IIS MaxEnt algorithm
          clf = MaxentClassifier.train(featurized_train_set, algorithm)
      else:
-        err_msg = str("RTEClassifier only supports these algorithms:\n "
-                      "'megam', 'BFGS', 'GIS', 'IIS'.\n")
+        err_msg = str(
+            "RTEClassifier only supports these algorithms:\n "
+            "'megam', 'BFGS', 'GIS', 'IIS'.\n"
+        )
          raise Exception(err_msg)
-    print('Testing classifier...')
+    print("Testing classifier...")
      acc = accuracy(clf, featurized_test_set)
-    print('Accuracy: %6.4f' % acc)
+    print("Accuracy: %6.4f" % acc)
      return clf
diff --git a/nlp_resource_data/nltk/classify/rte_classify.pyc b/nlp_resource_data/nltk/classify/rte_classify.pyc

deleted file mode 100755 (executable)

index 05f7b45..0000000

Binary files a/nlp_resource_data/nltk/classify/rte_classify.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/scikitlearn.py b/nlp_resource_data/nltk/classify/scikitlearn.py

old mode 100755 (executable)

new mode 100644 (file)

index b7c7b6d..90b450b
--- a/nlp_resource_data/nltk/classify/scikitlearn.py
+++ b/nlp_resource_data/nltk/classify/scikitlearn.py
@@ -30,13 +30,9 @@ best 1000 features:
  ...                      ('nb', MultinomialNB())])
  >>> classif = SklearnClassifier(pipeline)
  """
-from __future__ import print_function, unicode_literals
-
-from six.moves import zip
  
  from nltk.classify.api import ClassifierI
  from nltk.probability import DictionaryProbDist
-from nltk import compat
  
  try:
      from sklearn.feature_extraction import DictVectorizer
@@ -44,10 +40,9 @@ try:
  except ImportError:
      pass
  
-__all__ = ['SklearnClassifier']
+__all__ = ["SklearnClassifier"]
  
  
-@compat.python_2_unicode_compatible
  class SklearnClassifier(ClassifierI):
      """Wrapper for scikit-learn classifiers."""
  
@@ -122,13 +117,13 @@ class SklearnClassifier(ClassifierI):
  
      def _make_probdist(self, y_proba):
          classes = self._encoder.classes_
-        return DictionaryProbDist(dict((classes[i], p)
-                                       for i, p in enumerate(y_proba)))
+        return DictionaryProbDist(dict((classes[i], p) for i, p in enumerate(y_proba)))
  
  
  # skip doctests if scikit-learn is not installed
  def setup_module(module):
      from nose import SkipTest
+
      try:
          import sklearn
      except ImportError:
@@ -143,11 +138,15 @@ if __name__ == "__main__":
      # Bernoulli Naive Bayes is designed for binary classification. We set the
      # binarize option to False since we know we're passing boolean features.
      print("scikit-learn Naive Bayes:")
-    names_demo(SklearnClassifier(BernoulliNB(binarize=False)).train,
-               features=names_demo_features)
+    names_demo(
+        SklearnClassifier(BernoulliNB(binarize=False)).train,
+        features=names_demo_features,
+    )
  
      # The C parameter on logistic regression (MaxEnt) controls regularization.
      # The higher it's set, the less regularized the classifier is.
      print("\n\nscikit-learn logistic regression:")
-    names_demo(SklearnClassifier(LogisticRegression(C=1000)).train,
-               features=names_demo_features)
+    names_demo(
+        SklearnClassifier(LogisticRegression(C=1000)).train,
+        features=names_demo_features,
+    )
diff --git a/nlp_resource_data/nltk/classify/scikitlearn.pyc b/nlp_resource_data/nltk/classify/scikitlearn.pyc

deleted file mode 100755 (executable)

index baa3b0b..0000000

Binary files a/nlp_resource_data/nltk/classify/scikitlearn.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/senna.py b/nlp_resource_data/nltk/classify/senna.py

old mode 100755 (executable)

new mode 100644 (file)

index d8d71db..35bd402
--- a/nlp_resource_data/nltk/classify/senna.py
+++ b/nlp_resource_data/nltk/classify/senna.py
@@ -1,7 +1,7 @@
  # encoding: utf-8
  # Natural Language Toolkit: Senna Interface
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Rami Al-Rfou' <ralrfou@cs.stonybrook.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -29,7 +29,6 @@ The input is:
  
  Note: Unit tests for this module can be found in test/unit/test_senna.py
  
-    >>> from __future__ import unicode_literals
      >>> from nltk.classify import Senna
      >>> pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
      >>> sent = 'Dusseldorf is an international business center'.split()
@@ -38,44 +37,40 @@ Note: Unit tests for this module can be found in test/unit/test_senna.py
      ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')]
  """
  
-
-from __future__ import unicode_literals
  from os import path, sep, environ
  from subprocess import Popen, PIPE
  from platform import architecture, system
  
-from six import text_type
-
  from nltk.tag.api import TaggerI
-from nltk.compat import python_2_unicode_compatible
  
-_senna_url = 'http://ml.nec-labs.com/senna/'
+_senna_url = "http://ml.nec-labs.com/senna/"
  
  
-@python_2_unicode_compatible
  class Senna(TaggerI):
  
-    SUPPORTED_OPERATIONS = ['pos', 'chk', 'ner']
+    SUPPORTED_OPERATIONS = ["pos", "chk", "ner"]
  
-    def __init__(self, senna_path, operations, encoding='utf-8'):
+    def __init__(self, senna_path, operations, encoding="utf-8"):
          self._encoding = encoding
          self._path = path.normpath(senna_path) + sep
  
          # Verifies the existence of the executable on the self._path first
-        #senna_binary_file_1 = self.executable(self._path)
+        # senna_binary_file_1 = self.executable(self._path)
          exe_file_1 = self.executable(self._path)
          if not path.isfile(exe_file_1):
              # Check for the system environment
-            if 'SENNA' in environ:
-                #self._path = path.join(environ['SENNA'],'')
-                self._path = path.normpath(environ['SENNA']) + sep
+            if "SENNA" in environ:
+                # self._path = path.join(environ['SENNA'],'')
+                self._path = path.normpath(environ["SENNA"]) + sep
                  exe_file_2 = self.executable(self._path)
                  if not path.isfile(exe_file_2):
-                    raise OSError("Senna executable expected at %s or %s but not found" % (exe_file_1,exe_file_2))
+                    raise OSError(
+                        "Senna executable expected at %s or %s but not found"
+                        % (exe_file_1, exe_file_2)
+                    )
  
          self.operations = operations
  
-
      def executable(self, base_path):
          """
          The function that determines the system specific binary that should be
@@ -83,16 +78,16 @@ class Senna(TaggerI):
          be used.
          """
          os_name = system()
-        if os_name == 'Linux':
+        if os_name == "Linux":
              bits = architecture()[0]
-            if bits == '64bit':
-                return path.join(base_path, 'senna-linux64')
-            return path.join(base_path, 'senna-linux32')
-        if os_name == 'Windows':
-            return path.join(base_path, 'senna-win32.exe')
-        if os_name == 'Darwin':
-            return path.join(base_path, 'senna-osx')
-        return path.join(base_path, 'senna')
+            if bits == "64bit":
+                return path.join(base_path, "senna-linux64")
+            return path.join(base_path, "senna-linux32")
+        if os_name == "Windows":
+            return path.join(base_path, "senna-win32.exe")
+        if os_name == "Darwin":
+            return path.join(base_path, "senna-osx")
+        return path.join(base_path, "senna")
  
      def _map(self):
          """
@@ -104,7 +99,7 @@ class Senna(TaggerI):
          for operation in Senna.SUPPORTED_OPERATIONS:
              if operation in self.operations:
                  _map[operation] = i
-                i+= 1
+                i += 1
          return _map
  
      def tag(self, tokens):
@@ -122,16 +117,24 @@ class Senna(TaggerI):
          encoding = self._encoding
  
          if not path.isfile(self.executable(self._path)):
-            raise OSError("Senna executable expected at %s but not found" % self.executable(self._path))
-
+            raise OSError(
+                "Senna executable expected at %s but not found"
+                % self.executable(self._path)
+            )
  
          # Build the senna command to run the tagger
-        _senna_cmd = [self.executable(self._path), '-path', self._path, '-usrtokens', '-iobtags']
-        _senna_cmd.extend(['-'+op for op in self.operations])
+        _senna_cmd = [
+            self.executable(self._path),
+            "-path",
+            self._path,
+            "-usrtokens",
+            "-iobtags",
+        ]
+        _senna_cmd.extend(["-" + op for op in self.operations])
  
          # Serialize the actual sentences to a temporary string
-        _input = '\n'.join((' '.join(x) for x in sentences))+'\n'
-        if isinstance(_input, text_type) and encoding:
+        _input = "\n".join((" ".join(x) for x in sentences)) + "\n"
+        if isinstance(_input, str) and encoding:
              _input = _input.encode(encoding)
  
          # Run the tagger and get the output
@@ -141,7 +144,7 @@ class Senna(TaggerI):
  
          # Check the return code.
          if p.returncode != 0:
-            raise RuntimeError('Senna command failed! Details: %s' % stderr)
+            raise RuntimeError("Senna command failed! Details: %s" % stderr)
  
          if encoding:
              senna_output = stdout.decode(encoding)
@@ -157,18 +160,19 @@ class Senna(TaggerI):
                  sentence_index += 1
                  token_index = 0
                  continue
-            tags = tagged_word.split('\t')
+            tags = tagged_word.split("\t")
              result = {}
              for tag in map_:
-              result[tag] = tags[map_[tag]].strip()
+                result[tag] = tags[map_[tag]].strip()
              try:
-              result['word'] = sentences[sentence_index][token_index]
+                result["word"] = sentences[sentence_index][token_index]
              except IndexError:
-              raise IndexError(
-                "Misalignment error occurred at sentence number %d. Possible reason"
-                " is that the sentence size exceeded the maximum size. Check the "
-                "documentation of Senna class for more information."
-                % sentence_index)
+                raise IndexError(
+                    "Misalignment error occurred at sentence number %d. Possible reason"
+                    " is that the sentence size exceeded the maximum size. Check the "
+                    "documentation of Senna class for more information."
+                    % sentence_index
+                )
              tagged_sentences[-1].append(result)
              token_index += 1
          return tagged_sentences
@@ -177,7 +181,8 @@ class Senna(TaggerI):
  # skip doctests if Senna is not installed
  def setup_module(module):
      from nose import SkipTest
+
      try:
-        tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
+        tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"])
      except OSError:
          raise SkipTest("Senna executable not found")
diff --git a/nlp_resource_data/nltk/classify/senna.pyc b/nlp_resource_data/nltk/classify/senna.pyc

deleted file mode 100755 (executable)

index c08c733..0000000

Binary files a/nlp_resource_data/nltk/classify/senna.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/svm.py b/nlp_resource_data/nltk/classify/svm.py

old mode 100755 (executable)

new mode 100644 (file)

index 98a4008..544f859
--- a/nlp_resource_data/nltk/classify/svm.py
+++ b/nlp_resource_data/nltk/classify/svm.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: SVM-based classifier
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Leon Derczynski <leon@dcs.shef.ac.uk>
  #
  # URL: <http://nltk.org/>
@@ -10,6 +10,8 @@ nltk.classify.svm was deprecated. For classification based
  on support vector machines SVMs use nltk.classify.scikitlearn
  (or `scikit-learn <http://scikit-learn.org>`_ directly).
  """
+
+
  class SvmClassifier(object):
      def __init__(self, *args, **kwargs):
          raise NotImplementedError(__doc__)
diff --git a/nlp_resource_data/nltk/classify/svm.pyc b/nlp_resource_data/nltk/classify/svm.pyc

deleted file mode 100755 (executable)

index a5c547e..0000000

Binary files a/nlp_resource_data/nltk/classify/svm.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/tadm.py b/nlp_resource_data/nltk/classify/tadm.py

old mode 100755 (executable)

new mode 100644 (file)

index 615523c..8780699
--- a/nlp_resource_data/nltk/classify/tadm.py
+++ b/nlp_resource_data/nltk/classify/tadm.py
@@ -1,30 +1,29 @@
  # Natural Language Toolkit: Interface to TADM Classifier
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Joseph Frazee <jfrazee@mail.utexas.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
  
  import sys
  import subprocess
  
-from six import string_types
-
  from nltk.internals import find_binary
+
  try:
      import numpy
  except ImportError:
      pass
  
  _tadm_bin = None
+
+
  def config_tadm(bin=None):
      global _tadm_bin
      _tadm_bin = find_binary(
-        'tadm', bin,
-        env_vars=['TADM'],
-        binary_names=['tadm'],
-        url='http://tadm.sf.net')
+        "tadm", bin, env_vars=["TADM"], binary_names=["tadm"], url="http://tadm.sf.net"
+    )
+
  
  def write_tadm_file(train_toks, encoding, stream):
      """
@@ -48,17 +47,18 @@ def write_tadm_file(train_toks, encoding, stream):
      # http://sf.net/forum/forum.php?thread_id=1675097&forum_id=473054
      labels = encoding.labels()
      for featureset, label in train_toks:
-        length_line = '%d\n' % len(labels)
+        length_line = "%d\n" % len(labels)
          stream.write(length_line)
          for known_label in labels:
              v = encoding.encode(featureset, known_label)
-            line = '%d %d %s\n' % (
+            line = "%d %d %s\n" % (
                  int(label == known_label),
                  len(v),
-                ' '.join('%d %d' % u for u in v)
+                " ".join("%d %d" % u for u in v),
              )
              stream.write(line)
  
+
  def parse_tadm_weights(paramfile):
      """
      Given the stdout output generated by ``tadm`` when training a
@@ -68,14 +68,15 @@ def parse_tadm_weights(paramfile):
      weights = []
      for line in paramfile:
          weights.append(float(line.strip()))
-    return numpy.array(weights, 'd')
+    return numpy.array(weights, "d")
+
  
  def call_tadm(args):
      """
      Call the ``tadm`` binary with the given arguments.
      """
-    if isinstance(args, string_types):
-        raise TypeError('args should be a list of strings')
+    if isinstance(args, str):
+        raise TypeError("args should be a list of strings")
      if _tadm_bin is None:
          config_tadm()
  
@@ -88,26 +89,33 @@ def call_tadm(args):
      if p.returncode != 0:
          print()
          print(stderr)
-        raise OSError('tadm command failed!')
+        raise OSError("tadm command failed!")
+
  
  def names_demo():
      from nltk.classify.util import names_demo
      from nltk.classify.maxent import TadmMaxentClassifier
+
      classifier = names_demo(TadmMaxentClassifier.train)
  
+
  def encoding_demo():
      import sys
      from nltk.classify.maxent import TadmEventMaxentFeatureEncoding
-    tokens = [({'f0':1, 'f1':1, 'f3':1}, 'A'),
-              ({'f0':1, 'f2':1, 'f4':1}, 'B'),
-              ({'f0':2, 'f2':1, 'f3':1, 'f4':1}, 'A')]
+
+    tokens = [
+        ({"f0": 1, "f1": 1, "f3": 1}, "A"),
+        ({"f0": 1, "f2": 1, "f4": 1}, "B"),
+        ({"f0": 2, "f2": 1, "f3": 1, "f4": 1}, "A"),
+    ]
      encoding = TadmEventMaxentFeatureEncoding.train(tokens)
      write_tadm_file(tokens, encoding, sys.stdout)
      print()
      for i in range(encoding.length()):
-        print('%s --> %d' % (encoding.describe(i), i))
+        print("%s --> %d" % (encoding.describe(i), i))
      print()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      encoding_demo()
      names_demo()
diff --git a/nlp_resource_data/nltk/classify/tadm.pyc b/nlp_resource_data/nltk/classify/tadm.pyc

deleted file mode 100755 (executable)

index 886063a..0000000

Binary files a/nlp_resource_data/nltk/classify/tadm.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/textcat.py b/nlp_resource_data/nltk/classify/textcat.py

old mode 100755 (executable)

new mode 100644 (file)

index 10c3ad2..97545d5
--- a/nlp_resource_data/nltk/classify/textcat.py
+++ b/nlp_resource_data/nltk/classify/textcat.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Language ID module using TextCat algorithm
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Avital Pekker <avital.pekker@utoronto.ca>
  #
  # URL: <http://nltk.org/>
@@ -10,15 +10,15 @@
  """
  A module for language identification using the TextCat algorithm.
  An implementation of the text categorization algorithm
-presented in Cavnar, W. B. and J. M. Trenkle, 
+presented in Cavnar, W. B. and J. M. Trenkle,
  "N-Gram-Based Text Categorization".
  
-The algorithm takes advantage of Zipf's law and uses 
+The algorithm takes advantage of Zipf's law and uses
  n-gram frequencies to profile languages and text-yet to
  be identified-then compares using a distance measure.
  
  Language n-grams are provided by the "An Crubadan"
-project. A corpus reader was created seperately to read
+project. A corpus reader was created separately to read
  those files.
  
  For details regarding the algorithm, see:
@@ -28,17 +28,10 @@ For details about An Crubadan, see:
  http://borel.slu.edu/crubadan/index.html
  """
  
-# Ensure that literal strings default to unicode rather than str.
-from __future__ import print_function, unicode_literals
+from sys import maxsize
  
-from nltk.compat import PY3
  from nltk.util import trigrams
  
-if PY3:
-    from sys import maxsize
-else:
-    from sys import maxint
-
  # Note: this is NOT "re" you're likely used to. The regex module
  # is an alternative to the standard re module that supports
  # Unicode codepoint properties with the \p{} syntax.
@@ -51,43 +44,47 @@ except ImportError:
  ##  Language identification using TextCat
  ######################################################################
  
+
  class TextCat(object):
  
      _corpus = None
      fingerprints = {}
      _START_CHAR = "<"
      _END_CHAR = ">"
-    
+
      last_distances = {}
-    
+
      def __init__(self):
          if not re:
-            raise EnvironmentError("classify.textcat requires the regex module that "
-                                   "supports unicode. Try '$ pip install regex' and "
-                                   "see https://pypi.python.org/pypi/regex for "
-                                   "further details.")
+            raise EnvironmentError(
+                "classify.textcat requires the regex module that "
+                "supports unicode. Try '$ pip install regex' and "
+                "see https://pypi.python.org/pypi/regex for "
+                "further details."
+            )
  
          from nltk.corpus import crubadan
+
          self._corpus = crubadan
          # Load all language ngrams into cache
          for lang in self._corpus.langs():
              self._corpus.lang_freq(lang)
-        
+
      def remove_punctuation(self, text):
-        ''' Get rid of punctuation except apostrophes '''
+        """ Get rid of punctuation except apostrophes """
          return re.sub(r"[^\P{P}\']+", "", text)
-    
+
      def profile(self, text):
-        ''' Create FreqDist of trigrams within text '''
+        """ Create FreqDist of trigrams within text """
          from nltk import word_tokenize, FreqDist
  
          clean_text = self.remove_punctuation(text)
          tokens = word_tokenize(clean_text)
-        
+
          fingerprint = FreqDist()
          for t in tokens:
              token_trigram_tuples = trigrams(self._START_CHAR + t + self._END_CHAR)
-            token_trigrams = [''.join(tri) for tri in token_trigram_tuples]
+            token_trigrams = ["".join(tri) for tri in token_trigram_tuples]
  
              for cur_trigram in token_trigrams:
                  if cur_trigram in fingerprint:
@@ -96,10 +93,10 @@ class TextCat(object):
                      fingerprint[cur_trigram] = 1
  
          return fingerprint
-        
+
      def calc_dist(self, lang, trigram, text_profile):
-        ''' Calculate the "out-of-place" measure between the
-            text and language profile for a single trigram '''
+        """ Calculate the "out-of-place" measure between the
+            text and language profile for a single trigram """
  
          lang_fd = self._corpus.lang_freq(lang)
          dist = 0
@@ -108,23 +105,20 @@ class TextCat(object):
              idx_lang_profile = list(lang_fd.keys()).index(trigram)
              idx_text = list(text_profile.keys()).index(trigram)
  
-            #print(idx_lang_profile, ", ", idx_text)
-            dist = abs(idx_lang_profile - idx_text) 
+            # print(idx_lang_profile, ", ", idx_text)
+            dist = abs(idx_lang_profile - idx_text)
          else:
              # Arbitrary but should be larger than
              # any possible trigram file length
              # in terms of total lines
-            if PY3:
-                dist = maxsize
-            else:
-                dist = maxint
+            dist = maxsize
  
          return dist
-        
+
      def lang_dists(self, text):
-        ''' Calculate the "out-of-place" measure between
-            the text and all languages '''
-        
+        """ Calculate the "out-of-place" measure between
+            the text and all languages """
+
          distances = {}
          profile = self.profile(text)
          # For all the languages
@@ -134,36 +128,47 @@ class TextCat(object):
              lang_dist = 0
              for trigram in profile:
                  lang_dist += self.calc_dist(lang, trigram, profile)
-        
+
              distances[lang] = lang_dist
-            
+
          return distances
-    
+
      def guess_language(self, text):
-        ''' Find the language with the min distance
-            to the text and return its ISO 639-3 code '''
+        """ Find the language with the min distance
+            to the text and return its ISO 639-3 code """
          self.last_distances = self.lang_dists(text)
-        
+
          return min(self.last_distances, key=self.last_distances.get)
          #################################################')
  
+
  def demo():
      from nltk.corpus import udhr
  
-    langs = ['Kurdish-UTF8', 'Abkhaz-UTF8', 'Farsi_Persian-UTF8',
-             'Hindi-UTF8', 'Hawaiian-UTF8', 'Russian-UTF8', 'Vietnamese-UTF8',
-             'Serbian_Srpski-UTF8','Esperanto-UTF8']
-
-    friendly = {'kmr':'Northern Kurdish',
-                'abk':'Abkhazian',
-                'pes':'Iranian Persian',
-                'hin':'Hindi',
-                'haw':'Hawaiian',
-                'rus':'Russian',
-                'vie':'Vietnamese',
-                'srp':'Serbian',
-                'epo':'Esperanto'}
-        
+    langs = [
+        "Kurdish-UTF8",
+        "Abkhaz-UTF8",
+        "Farsi_Persian-UTF8",
+        "Hindi-UTF8",
+        "Hawaiian-UTF8",
+        "Russian-UTF8",
+        "Vietnamese-UTF8",
+        "Serbian_Srpski-UTF8",
+        "Esperanto-UTF8",
+    ]
+
+    friendly = {
+        "kmr": "Northern Kurdish",
+        "abk": "Abkhazian",
+        "pes": "Iranian Persian",
+        "hin": "Hindi",
+        "haw": "Hawaiian",
+        "rus": "Russian",
+        "vie": "Vietnamese",
+        "srp": "Serbian",
+        "epo": "Esperanto",
+    }
+
      tc = TextCat()
  
      for cur_lang in langs:
@@ -172,22 +177,22 @@ def demo():
          rows = len(raw_sentences) - 1
          cols = list(map(len, raw_sentences))
  
-        sample = ''
-          
+        sample = ""
+
          # Generate a sample text of the language
          for i in range(0, rows):
-            cur_sent = ''
+            cur_sent = ""
              for j in range(0, cols[i]):
-                cur_sent += ' ' + raw_sentences[i][j]
-            
+                cur_sent += " " + raw_sentences[i][j]
+
              sample += cur_sent
-          
+
          # Try to detect what it is
-        print('Language snippet: ' + sample[0:140] + '...')
+        print("Language snippet: " + sample[0:140] + "...")
          guess = tc.guess_language(sample)
-        print('Language detection: %s (%s)' % (guess, friendly[guess]))
-        print('#' * 140)
+        print("Language detection: %s (%s)" % (guess, friendly[guess]))
+        print("#" * 140)
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/classify/textcat.pyc b/nlp_resource_data/nltk/classify/textcat.pyc

deleted file mode 100755 (executable)

index ce0a5cb..0000000

Binary files a/nlp_resource_data/nltk/classify/textcat.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/util.py b/nlp_resource_data/nltk/classify/util.py

old mode 100755 (executable)

new mode 100644 (file)

index bc80933..b9d1986
--- a/nlp_resource_data/nltk/classify/util.py
+++ b/nlp_resource_data/nltk/classify/util.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Classifier Utility Functions
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
@@ -9,16 +9,15 @@
  """
  Utility functions and classes for classifiers.
  """
-from __future__ import print_function, division
  
  import math
  
-#from nltk.util import Deprecated
-import nltk.classify.util # for accuracy & log_likelihood
+# from nltk.util import Deprecated
+import nltk.classify.util  # for accuracy & log_likelihood
  from nltk.util import LazyMap
  
  ######################################################################
-#{ Helper Functions
+# { Helper Functions
  ######################################################################
  
  # alternative name possibility: 'map_featurefunc()'?
@@ -61,12 +60,15 @@ def apply_features(feature_func, toks, labeled=None):
      if labeled is None:
          labeled = toks and isinstance(toks[0], (tuple, list))
      if labeled:
+
          def lazy_func(labeled_token):
              return (feature_func(labeled_token[0]), labeled_token[1])
+
          return LazyMap(lazy_func, toks)
      else:
          return LazyMap(feature_func, toks)
  
+
  def attested_labels(tokens):
      """
      :return: A list of all labels that are attested in the given list
@@ -78,11 +80,13 @@ def attested_labels(tokens):
      """
      return tuple(set(label for (tok, label) in tokens))
  
+
  def log_likelihood(classifier, gold):
      results = classifier.prob_classify_many([fs for (fs, l) in gold])
      ll = [pdist.prob(l) for ((fs, l), pdist) in zip(gold, results)]
      return math.log(sum(ll) / len(ll))
  
+
  def accuracy(classifier, gold):
      results = classifier.classify_many([fs for (fs, l) in gold])
      correct = [l == r for ((fs, l), r) in zip(gold, results)]
@@ -91,6 +95,7 @@ def accuracy(classifier, gold):
      else:
          return 0
  
+
  class CutoffChecker(object):
      """
      A helper class that implements cutoff checks based on number of
@@ -99,12 +104,13 @@ class CutoffChecker(object):
      Accuracy cutoffs are also implemented, but they're almost never
      a good idea to use.
      """
+
      def __init__(self, cutoffs):
          self.cutoffs = cutoffs.copy()
-        if 'min_ll' in cutoffs:
-            cutoffs['min_ll'] = -abs(cutoffs['min_ll'])
-        if 'min_lldelta' in cutoffs:
-            cutoffs['min_lldelta'] = abs(cutoffs['min_lldelta'])
+        if "min_ll" in cutoffs:
+            cutoffs["min_ll"] = -abs(cutoffs["min_ll"])
+        if "min_lldelta" in cutoffs:
+            cutoffs["min_lldelta"] = abs(cutoffs["min_lldelta"])
          self.ll = None
          self.acc = None
          self.iter = 1
@@ -112,66 +118,76 @@ class CutoffChecker(object):
      def check(self, classifier, train_toks):
          cutoffs = self.cutoffs
          self.iter += 1
-        if 'max_iter' in cutoffs and self.iter >= cutoffs['max_iter']:
-            return True # iteration cutoff.
+        if "max_iter" in cutoffs and self.iter >= cutoffs["max_iter"]:
+            return True  # iteration cutoff.
  
          new_ll = nltk.classify.util.log_likelihood(classifier, train_toks)
          if math.isnan(new_ll):
              return True
  
-        if 'min_ll' in cutoffs or 'min_lldelta' in cutoffs:
-            if 'min_ll' in cutoffs and new_ll >= cutoffs['min_ll']:
-                return True # log likelihood cutoff
-            if ('min_lldelta' in cutoffs and self.ll and
-                ((new_ll - self.ll) <= abs(cutoffs['min_lldelta']))):
-                return True # log likelihood delta cutoff
+        if "min_ll" in cutoffs or "min_lldelta" in cutoffs:
+            if "min_ll" in cutoffs and new_ll >= cutoffs["min_ll"]:
+                return True  # log likelihood cutoff
+            if (
+                "min_lldelta" in cutoffs
+                and self.ll
+                and ((new_ll - self.ll) <= abs(cutoffs["min_lldelta"]))
+            ):
+                return True  # log likelihood delta cutoff
              self.ll = new_ll
  
-        if 'max_acc' in cutoffs or 'min_accdelta' in cutoffs:
-            new_acc = nltk.classify.util.log_likelihood(
-                classifier, train_toks)
-            if 'max_acc' in cutoffs and new_acc >= cutoffs['max_acc']:
-                return True # log likelihood cutoff
-            if ('min_accdelta' in cutoffs and self.acc and
-                ((new_acc - self.acc) <= abs(cutoffs['min_accdelta']))):
-                return True # log likelihood delta cutoff
+        if "max_acc" in cutoffs or "min_accdelta" in cutoffs:
+            new_acc = nltk.classify.util.log_likelihood(classifier, train_toks)
+            if "max_acc" in cutoffs and new_acc >= cutoffs["max_acc"]:
+                return True  # log likelihood cutoff
+            if (
+                "min_accdelta" in cutoffs
+                and self.acc
+                and ((new_acc - self.acc) <= abs(cutoffs["min_accdelta"]))
+            ):
+                return True  # log likelihood delta cutoff
              self.acc = new_acc
  
-            return False # no cutoff reached.
+            return False  # no cutoff reached.
+
  
  ######################################################################
-#{ Demos
+# { Demos
  ######################################################################
  
+
  def names_demo_features(name):
      features = {}
-    features['alwayson'] = True
-    features['startswith'] = name[0].lower()
-    features['endswith'] = name[-1].lower()
-    for letter in 'abcdefghijklmnopqrstuvwxyz':
-        features['count(%s)' % letter] = name.lower().count(letter)
-        features['has(%s)' % letter] = letter in name.lower()
+    features["alwayson"] = True
+    features["startswith"] = name[0].lower()
+    features["endswith"] = name[-1].lower()
+    for letter in "abcdefghijklmnopqrstuvwxyz":
+        features["count(%s)" % letter] = name.lower().count(letter)
+        features["has(%s)" % letter] = letter in name.lower()
      return features
  
+
  def binary_names_demo_features(name):
      features = {}
-    features['alwayson'] = True
-    features['startswith(vowel)'] = name[0].lower() in 'aeiouy'
-    features['endswith(vowel)'] = name[-1].lower() in 'aeiouy'
-    for letter in 'abcdefghijklmnopqrstuvwxyz':
-        features['count(%s)' % letter] = name.lower().count(letter)
-        features['has(%s)' % letter] = letter in name.lower()
-        features['startswith(%s)' % letter] = (letter == name[0].lower())
-        features['endswith(%s)' % letter] = (letter == name[-1].lower())
+    features["alwayson"] = True
+    features["startswith(vowel)"] = name[0].lower() in "aeiouy"
+    features["endswith(vowel)"] = name[-1].lower() in "aeiouy"
+    for letter in "abcdefghijklmnopqrstuvwxyz":
+        features["count(%s)" % letter] = name.lower().count(letter)
+        features["has(%s)" % letter] = letter in name.lower()
+        features["startswith(%s)" % letter] = letter == name[0].lower()
+        features["endswith(%s)" % letter] = letter == name[-1].lower()
      return features
  
+
  def names_demo(trainer, features=names_demo_features):
      from nltk.corpus import names
      import random
  
      # Construct a list of classified names, using the names corpus.
-    namelist = ([(name, 'male') for name in names.words('male.txt')] +
-                [(name, 'female') for name in names.words('female.txt')])
+    namelist = [(name, "male") for name in names.words("male.txt")] + [
+        (name, "female") for name in names.words("female.txt")
+    ]
  
      # Randomly split the names into a test & train set.
      random.seed(123456)
@@ -180,42 +196,42 @@ def names_demo(trainer, features=names_demo_features):
      test = namelist[5000:5500]
  
      # Train up a classifier.
-    print('Training classifier...')
-    classifier = trainer( [(features(n), g) for (n, g) in train] )
+    print("Training classifier...")
+    classifier = trainer([(features(n), g) for (n, g) in train])
  
      # Run the classifier on the test data.
-    print('Testing classifier...')
+    print("Testing classifier...")
      acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
-    print('Accuracy: %6.4f' % acc)
+    print("Accuracy: %6.4f" % acc)
  
      # For classifiers that can find probabilities, show the log
      # likelihood and some sample probability distributions.
      try:
          test_featuresets = [features(n) for (n, g) in test]
          pdists = classifier.prob_classify_many(test_featuresets)
-        ll = [pdist.logprob(gold)
-              for ((name, gold), pdist) in zip(test, pdists)]
-        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
+        ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)]
+        print("Avg. log likelihood: %6.4f" % (sum(ll) / len(test)))
          print()
-        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
+        print("Unseen Names      P(Male)  P(Female)\n" + "-" * 40)
          for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
-            if gender == 'male':
-                fmt = '  %-15s *%6.4f   %6.4f'
+            if gender == "male":
+                fmt = "  %-15s *%6.4f   %6.4f"
              else:
-                fmt = '  %-15s  %6.4f  *%6.4f'
-            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
+                fmt = "  %-15s  %6.4f  *%6.4f"
+            print(fmt % (name, pdist.prob("male"), pdist.prob("female")))
      except NotImplementedError:
          pass
  
      # Return the classifier
      return classifier
  
+
  def partial_names_demo(trainer, features=names_demo_features):
      from nltk.corpus import names
      import random
  
-    male_names = names.words('male.txt')
-    female_names = names.words('female.txt')
+    male_names = names.words("male.txt")
+    female_names = names.words("female.txt")
  
      random.seed(654321)
      random.shuffle(male_names)
@@ -228,35 +244,35 @@ def partial_names_demo(trainer, features=names_demo_features):
      unlabeled = map(features, male_names[2000:2500] + female_names[:500])
  
      # Create a test set with correctly-labeled male and female names
-    test = [(name, True) for name in male_names[2500:2750]] \
-        + [(name, False) for name in female_names[500:750]]
+    test = [(name, True) for name in male_names[2500:2750]] + [
+        (name, False) for name in female_names[500:750]
+    ]
  
      random.shuffle(test)
  
      # Train up a classifier.
-    print('Training classifier...')
+    print("Training classifier...")
      classifier = trainer(positive, unlabeled)
  
      # Run the classifier on the test data.
-    print('Testing classifier...')
+    print("Testing classifier...")
      acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
-    print('Accuracy: %6.4f' % acc)
+    print("Accuracy: %6.4f" % acc)
  
      # For classifiers that can find probabilities, show the log
      # likelihood and some sample probability distributions.
      try:
          test_featuresets = [features(n) for (n, m) in test]
          pdists = classifier.prob_classify_many(test_featuresets)
-        ll = [pdist.logprob(gold)
-              for ((name, gold), pdist) in zip(test, pdists)]
-        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
+        ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)]
+        print("Avg. log likelihood: %6.4f" % (sum(ll) / len(test)))
          print()
-        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
+        print("Unseen Names      P(Male)  P(Female)\n" + "-" * 40)
          for ((name, is_male), pdist) in zip(test, pdists)[:5]:
              if is_male == True:
-                fmt = '  %-15s *%6.4f   %6.4f'
+                fmt = "  %-15s *%6.4f   %6.4f"
              else:
-                fmt = '  %-15s  %6.4f  *%6.4f'
+                fmt = "  %-15s  %6.4f  *%6.4f"
              print(fmt % (name, pdist.prob(True), pdist.prob(False)))
      except NotImplementedError:
          pass
@@ -264,13 +280,16 @@ def partial_names_demo(trainer, features=names_demo_features):
      # Return the classifier
      return classifier
  
+
  _inst_cache = {}
+
+
  def wsd_demo(trainer, word, features, n=1000):
      from nltk.corpus import senseval
      import random
  
      # Get the instances.
-    print('Reading data...')
+    print("Reading data...")
      global _inst_cache
      if word not in _inst_cache:
          _inst_cache[word] = [(i, i.senses[0]) for i in senseval.instances(word)]
@@ -278,32 +297,31 @@ def wsd_demo(trainer, word, features, n=1000):
      if n > len(instances):
          n = len(instances)
      senses = list(set(l for (i, l) in instances))
-    print('  Senses: ' + ' '.join(senses))
+    print("  Senses: " + " ".join(senses))
  
      # Randomly split the names into a test & train set.
-    print('Splitting into test & train...')
+    print("Splitting into test & train...")
      random.seed(123456)
      random.shuffle(instances)
-    train = instances[:int(.8*n)]
-    test = instances[int(.8*n):n]
+    train = instances[: int(0.8 * n)]
+    test = instances[int(0.8 * n) : n]
  
      # Train up a classifier.
-    print('Training classifier...')
+    print("Training classifier...")
      classifier = trainer([(features(i), l) for (i, l) in train])
  
      # Run the classifier on the test data.
-    print('Testing classifier...')
+    print("Testing classifier...")
      acc = accuracy(classifier, [(features(i), l) for (i, l) in test])
-    print('Accuracy: %6.4f' % acc)
+    print("Accuracy: %6.4f" % acc)
  
      # For classifiers that can find probabilities, show the log
      # likelihood and some sample probability distributions.
      try:
          test_featuresets = [features(i) for (i, n) in test]
          pdists = classifier.prob_classify_many(test_featuresets)
-        ll = [pdist.logprob(gold)
-              for ((name, gold), pdist) in zip(test, pdists)]
-        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
+        ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)]
+        print("Avg. log likelihood: %6.4f" % (sum(ll) / len(test)))
      except NotImplementedError:
          pass
  
@@ -311,14 +329,15 @@ def wsd_demo(trainer, word, features, n=1000):
      return classifier
  
  
-
-def check_megam_config(self):
+def check_megam_config():
      """
      Checks whether the MEGAM binary is configured.
      """
      try:
          _megam_bin
      except NameError:
-        err_msg = str("Please configure your megam binary first, e.g.\n"
-                      ">>> nltk.config_megam('/usr/bin/local/megam')")
+        err_msg = str(
+            "Please configure your megam binary first, e.g.\n"
+            ">>> nltk.config_megam('/usr/bin/local/megam')"
+        )
          raise NameError(err_msg)
diff --git a/nlp_resource_data/nltk/classify/util.pyc b/nlp_resource_data/nltk/classify/util.pyc

deleted file mode 100755 (executable)

index 9d91693..0000000

Binary files a/nlp_resource_data/nltk/classify/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/classify/weka.py b/nlp_resource_data/nltk/classify/weka.py

old mode 100755 (executable)

new mode 100644 (file)

index 2c0ab4b..3bfb311
--- a/nlp_resource_data/nltk/classify/weka.py
+++ b/nlp_resource_data/nltk/classify/weka.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Interface to Weka Classsifiers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -8,7 +8,7 @@
  """
  Classifiers that make use of the external 'Weka' package.
  """
-from __future__ import print_function
+
  import time
  import tempfile
  import os
@@ -17,19 +17,21 @@ import re
  import zipfile
  from sys import stdin
  
-from six import integer_types, string_types
-
  from nltk.probability import DictionaryProbDist
  from nltk.internals import java, config_java
  
  from nltk.classify.api import ClassifierI
  
  _weka_classpath = None
-_weka_search = ['.',
-                '/usr/share/weka',
-                '/usr/local/share/weka',
-                '/usr/lib/weka',
-                '/usr/local/lib/weka',]
+_weka_search = [
+    ".",
+    "/usr/share/weka",
+    "/usr/local/share/weka",
+    "/usr/lib/weka",
+    "/usr/local/lib/weka",
+]
+
+
  def config_weka(classpath=None):
      global _weka_classpath
  
@@ -41,25 +43,29 @@ def config_weka(classpath=None):
  
      if _weka_classpath is None:
          searchpath = _weka_search
-        if 'WEKAHOME' in os.environ:
-            searchpath.insert(0, os.environ['WEKAHOME'])
+        if "WEKAHOME" in os.environ:
+            searchpath.insert(0, os.environ["WEKAHOME"])
  
          for path in searchpath:
-            if os.path.exists(os.path.join(path, 'weka.jar')):
-                _weka_classpath = os.path.join(path, 'weka.jar')
+            if os.path.exists(os.path.join(path, "weka.jar")):
+                _weka_classpath = os.path.join(path, "weka.jar")
                  version = _check_weka_version(_weka_classpath)
                  if version:
-                    print(('[Found Weka: %s (version %s)]' %
-                           (_weka_classpath, version)))
+                    print(
+                        ("[Found Weka: %s (version %s)]" % (_weka_classpath, version))
+                    )
                  else:
-                    print('[Found Weka: %s]' % _weka_classpath)
+                    print("[Found Weka: %s]" % _weka_classpath)
                  _check_weka_version(_weka_classpath)
  
      if _weka_classpath is None:
-        raise LookupError('Unable to find weka.jar!  Use config_weka() '
-                          'or set the WEKAHOME environment variable. '
-                          'For more information about Weka, please see '
-                          'http://www.cs.waikato.ac.nz/ml/weka/')
+        raise LookupError(
+            "Unable to find weka.jar!  Use config_weka() "
+            "or set the WEKAHOME environment variable. "
+            "For more information about Weka, please see "
+            "http://www.cs.waikato.ac.nz/ml/weka/"
+        )
+
  
  def _check_weka_version(jar):
      try:
@@ -70,22 +76,23 @@ def _check_weka_version(jar):
          return None
      try:
          try:
-            return zf.read('weka/core/version.txt')
+            return zf.read("weka/core/version.txt")
          except KeyError:
              return None
      finally:
          zf.close()
  
+
  class WekaClassifier(ClassifierI):
      def __init__(self, formatter, model_filename):
          self._formatter = formatter
          self._model = model_filename
  
      def prob_classify_many(self, featuresets):
-        return self._classify_many(featuresets, ['-p', '0', '-distribution'])
+        return self._classify_many(featuresets, ["-p", "0", "-distribution"])
  
      def classify_many(self, featuresets):
-        return self._classify_many(featuresets, ['-p', '0'])
+        return self._classify_many(featuresets, ["-p", "0"])
  
      def _classify_many(self, featuresets, options):
          # Make sure we can find java & weka.
@@ -94,28 +101,37 @@ class WekaClassifier(ClassifierI):
          temp_dir = tempfile.mkdtemp()
          try:
              # Write the test data file.
-            test_filename = os.path.join(temp_dir, 'test.arff')
+            test_filename = os.path.join(temp_dir, "test.arff")
              self._formatter.write(test_filename, featuresets)
  
              # Call weka to classify the data.
-            cmd = ['weka.classifiers.bayes.NaiveBayes',
-                   '-l', self._model, '-T', test_filename] + options
-            (stdout, stderr) = java(cmd, classpath=_weka_classpath,
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.PIPE)
+            cmd = [
+                "weka.classifiers.bayes.NaiveBayes",
+                "-l",
+                self._model,
+                "-T",
+                test_filename,
+            ] + options
+            (stdout, stderr) = java(
+                cmd,
+                classpath=_weka_classpath,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
  
              # Check if something went wrong:
              if stderr and not stdout:
-                if 'Illegal options: -distribution' in stderr:
-                    raise ValueError('The installed version of weka does '
-                                     'not support probability distribution '
-                                     'output.')
+                if "Illegal options: -distribution" in stderr:
+                    raise ValueError(
+                        "The installed version of weka does "
+                        "not support probability distribution "
+                        "output."
+                    )
                  else:
-                    raise ValueError('Weka failed to generate output:\n%s'
-                                     % stderr)
+                    raise ValueError("Weka failed to generate output:\n%s" % stderr)
  
              # Parse weka's output.
-            return self.parse_weka_output(stdout.decode(stdin.encoding).split('\n'))
+            return self.parse_weka_output(stdout.decode(stdin.encoding).split("\n"))
  
          finally:
              for f in os.listdir(temp_dir):
@@ -123,37 +139,44 @@ class WekaClassifier(ClassifierI):
              os.rmdir(temp_dir)
  
      def parse_weka_distribution(self, s):
-        probs = [float(v) for v in re.split('[*,]+', s) if v.strip()]
+        probs = [float(v) for v in re.split("[*,]+", s) if v.strip()]
          probs = dict(zip(self._formatter.labels(), probs))
          return DictionaryProbDist(probs)
  
      def parse_weka_output(self, lines):
          # Strip unwanted text from stdout
-        for i,line in enumerate(lines):
+        for i, line in enumerate(lines):
              if line.strip().startswith("inst#"):
                  lines = lines[i:]
                  break
  
-        if lines[0].split() == ['inst#', 'actual', 'predicted',
-                                'error', 'prediction']:
-            return [line.split()[2].split(':')[1]
-                    for line in lines[1:] if line.strip()]
-        elif lines[0].split() == ['inst#', 'actual', 'predicted',
-                                  'error', 'distribution']:
-            return [self.parse_weka_distribution(line.split()[-1])
-                    for line in lines[1:] if line.strip()]
+        if lines[0].split() == ["inst#", "actual", "predicted", "error", "prediction"]:
+            return [line.split()[2].split(":")[1] for line in lines[1:] if line.strip()]
+        elif lines[0].split() == [
+            "inst#",
+            "actual",
+            "predicted",
+            "error",
+            "distribution",
+        ]:
+            return [
+                self.parse_weka_distribution(line.split()[-1])
+                for line in lines[1:]
+                if line.strip()
+            ]
  
          # is this safe:?
-        elif re.match(r'^0 \w+ [01]\.[0-9]* \?\s*$', lines[0]):
+        elif re.match(r"^0 \w+ [01]\.[0-9]* \?\s*$", lines[0]):
              return [line.split()[1] for line in lines if line.strip()]
  
          else:
              for line in lines[:10]:
                  print(line)
-            raise ValueError('Unhandled output format -- your version '
-                             'of weka may not be supported.\n'
-                             '  Header: %s' % lines[0])
-
+            raise ValueError(
+                "Unhandled output format -- your version "
+                "of weka may not be supported.\n"
+                "  Header: %s" % lines[0]
+            )
  
      # [xx] full list of classifiers (some may be abstract?):
      # ADTree, AODE, BayesNet, ComplementNaiveBayes, ConjunctiveRule,
@@ -169,16 +192,23 @@ class WekaClassifier(ClassifierI):
      # VotedPerceptron, Winnow, ZeroR
  
      _CLASSIFIER_CLASS = {
-        'naivebayes': 'weka.classifiers.bayes.NaiveBayes',
-        'C4.5': 'weka.classifiers.trees.J48',
-        'log_regression': 'weka.classifiers.functions.Logistic',
-        'svm': 'weka.classifiers.functions.SMO',
-        'kstar': 'weka.classifiers.lazy.KStar',
-        'ripper': 'weka.classifiers.rules.JRip',
-        }
+        "naivebayes": "weka.classifiers.bayes.NaiveBayes",
+        "C4.5": "weka.classifiers.trees.J48",
+        "log_regression": "weka.classifiers.functions.Logistic",
+        "svm": "weka.classifiers.functions.SMO",
+        "kstar": "weka.classifiers.lazy.KStar",
+        "ripper": "weka.classifiers.rules.JRip",
+    }
+
      @classmethod
-    def train(cls, model_filename, featuresets,
-              classifier='naivebayes', options=[], quiet=True):
+    def train(
+        cls,
+        model_filename,
+        featuresets,
+        classifier="naivebayes",
+        options=[],
+        quiet=True,
+    ):
          # Make sure we can find java & weka.
          config_weka()
  
@@ -188,7 +218,7 @@ class WekaClassifier(ClassifierI):
          temp_dir = tempfile.mkdtemp()
          try:
              # Write the training data file.
-            train_filename = os.path.join(temp_dir, 'train.arff')
+            train_filename = os.path.join(temp_dir, "train.arff")
              formatter.write(train_filename, featuresets)
  
              if classifier in cls._CLASSIFIER_CLASS:
@@ -196,14 +226,15 @@ class WekaClassifier(ClassifierI):
              elif classifier in cls._CLASSIFIER_CLASS.values():
                  javaclass = classifier
              else:
-                raise ValueError('Unknown classifier %s' % classifier)
+                raise ValueError("Unknown classifier %s" % classifier)
  
              # Train the weka model.
-            cmd = [javaclass, '-d', model_filename, '-t', train_filename]
+            cmd = [javaclass, "-d", model_filename, "-t", train_filename]
              cmd += list(options)
              if quiet:
                  stdout = subprocess.PIPE
-            else: stdout = None
+            else:
+                stdout = None
              java(cmd, classpath=_weka_classpath, stdout=stdout)
  
              # Return the new classifier.
@@ -245,8 +276,8 @@ class ARFF_Formatter:
  
      def write(self, outfile, tokens):
          """Writes ARFF data to a file for the given data."""
-        if not hasattr(outfile, 'write'):
-            outfile = open(outfile, 'w')
+        if not hasattr(outfile, "write"):
+            outfile = open(outfile, "w")
          outfile.write(self.format(tokens))
          outfile.close()
  
@@ -265,18 +296,18 @@ class ARFF_Formatter:
          for tok, label in tokens:
              for (fname, fval) in tok.items():
                  if issubclass(type(fval), bool):
-                    ftype = '{True, False}'
-                elif issubclass(type(fval), (integer_types, float, bool)):
-                    ftype = 'NUMERIC'
-                elif issubclass(type(fval), string_types):
-                    ftype = 'STRING'
+                    ftype = "{True, False}"
+                elif issubclass(type(fval), (int, float, bool)):
+                    ftype = "NUMERIC"
+                elif issubclass(type(fval), str):
+                    ftype = "STRING"
                  elif fval is None:
-                    continue # can't tell the type.
+                    continue  # can't tell the type.
                  else:
-                    raise ValueError('Unsupported value type %r' % ftype)
+                    raise ValueError("Unsupported value type %r" % ftype)
  
                  if features.get(fname, ftype) != ftype:
-                    raise ValueError('Inconsistent type for %s' % fname)
+                    raise ValueError("Inconsistent type for %s" % fname)
                  features[fname] = ftype
          features = sorted(features.items())
  
@@ -285,19 +316,21 @@ class ARFF_Formatter:
      def header_section(self):
          """Returns an ARFF header as a string."""
          # Header comment.
-        s = ('% Weka ARFF file\n' +
-             '% Generated automatically by NLTK\n' +
-             '%% %s\n\n' % time.ctime())
+        s = (
+            "% Weka ARFF file\n"
+            + "% Generated automatically by NLTK\n"
+            + "%% %s\n\n" % time.ctime()
+        )
  
          # Relation name
-        s += '@RELATION rel\n\n'
+        s += "@RELATION rel\n\n"
  
          # Input attribute specifications
          for fname, ftype in self._features:
-            s += '@ATTRIBUTE %-30r %s\n' % (fname, ftype)
+            s += "@ATTRIBUTE %-30r %s\n" % (fname, ftype)
  
          # Label attribute specification
-        s += '@ATTRIBUTE %-30r {%s}\n' % ('-label-', ','.join(self._labels))
+        s += "@ATTRIBUTE %-30r {%s}\n" % ("-label-", ",".join(self._labels))
  
          return s
  
@@ -319,28 +352,29 @@ class ARFF_Formatter:
              tokens = [(tok, None) for tok in tokens]
  
          # Data section
-        s = '\n@DATA\n'
+        s = "\n@DATA\n"
          for (tok, label) in tokens:
              for fname, ftype in self._features:
-                s += '%s,' % self._fmt_arff_val(tok.get(fname))
-            s += '%s\n' % self._fmt_arff_val(label)
+                s += "%s," % self._fmt_arff_val(tok.get(fname))
+            s += "%s\n" % self._fmt_arff_val(label)
  
          return s
  
      def _fmt_arff_val(self, fval):
          if fval is None:
-            return '?'
-        elif isinstance(fval, (bool, integer_types)):
-            return '%s' % fval
+            return "?"
+        elif isinstance(fval, (bool, int)):
+            return "%s" % fval
          elif isinstance(fval, float):
-            return '%r' % fval
+            return "%r" % fval
          else:
-            return '%r' % fval
+            return "%r" % fval
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      from nltk.classify.util import names_demo, binary_names_demo_features
+
      def make_classifier(featuresets):
-        return WekaClassifier.train('/tmp/name.model', featuresets,
-                                    'C4.5')
+        return WekaClassifier.train("/tmp/name.model", featuresets, "C4.5")
+
      classifier = names_demo(make_classifier, binary_names_demo_features)
diff --git a/nlp_resource_data/nltk/classify/weka.pyc b/nlp_resource_data/nltk/classify/weka.pyc

deleted file mode 100755 (executable)

index 0c25340..0000000

Binary files a/nlp_resource_data/nltk/classify/weka.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/cli.py b/nlp_resource_data/nltk/cli.py

new file mode 100644 (file)

index 0000000..01ff3d0
--- /dev/null
+++ b/nlp_resource_data/nltk/cli.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: NLTK Command-Line Interface
+#
+# Copyright (C) 2001-2020 NLTK Project
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+from functools import partial
+from itertools import chain
+from tqdm import tqdm
+
+import click
+
+from nltk import word_tokenize
+from nltk.util import parallelize_preprocess
+
+CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
+
+
+@click.group(context_settings=CONTEXT_SETTINGS)
+@click.version_option()
+def cli():
+    pass
+
+
+@cli.command("tokenize")
+@click.option(
+    "--language",
+    "-l",
+    default="en",
+    help="The language for the Punkt sentence tokenization.",
+)
+@click.option(
+    "--preserve-line",
+    "-l",
+    default=True,
+    is_flag=True,
+    help="An option to keep the preserve the sentence and not sentence tokenize it.",
+)
+@click.option("--processes", "-j", default=1, help="No. of processes.")
+@click.option("--encoding", "-e", default="utf8", help="Specify encoding of file.")
+@click.option(
+    "--delimiter", "-d", default=" ", help="Specify delimiter to join the tokens."
+)
+def tokenize_file(language, preserve_line, processes, encoding, delimiter):
+    """ This command tokenizes text stream using nltk.word_tokenize """
+    with click.get_text_stream("stdin", encoding=encoding) as fin:
+        with click.get_text_stream("stdout", encoding=encoding) as fout:
+            # If it's single process, joblib parallization is slower,
+            # so just process line by line normally.
+            if processes == 1:
+                for line in tqdm(fin.readlines()):
+                    print(delimiter.join(word_tokenize(line)), end="\n", file=fout)
+            else:
+                for outline in parallelize_preprocess(
+                    word_tokenize, fin.readlines(), processes, progress_bar=True
+                ):
+                    print(delimiter.join(outline), end="\n", file=fout)
diff --git a/nlp_resource_data/nltk/cluster/__init__.py b/nlp_resource_data/nltk/cluster/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index bf37a77..2310947
--- a/nlp_resource_data/nltk/cluster/__init__.py
+++ b/nlp_resource_data/nltk/cluster/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Clusterers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -79,8 +79,12 @@ objects. nltk_contrib.unimelb.tacohn.SparseArrays may be used for
  efficiency when required.
  """
  
-from nltk.cluster.util import (VectorSpaceClusterer, Dendrogram,
-                               euclidean_distance, cosine_distance)
+from nltk.cluster.util import (
+    VectorSpaceClusterer,
+    Dendrogram,
+    euclidean_distance,
+    cosine_distance,
+)
  from nltk.cluster.kmeans import KMeansClusterer
  from nltk.cluster.gaac import GAAClusterer
  from nltk.cluster.em import EMClusterer
diff --git a/nlp_resource_data/nltk/cluster/__init__.pyc b/nlp_resource_data/nltk/cluster/__init__.pyc

deleted file mode 100755 (executable)

index 9ed4138..0000000

Binary files a/nlp_resource_data/nltk/cluster/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/cluster/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a5235cb

Binary files /dev/null and b/nlp_resource_data/nltk/cluster/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/cluster/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0cbec0e

Binary files /dev/null and b/nlp_resource_data/nltk/cluster/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/cluster/__pycache__/em.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/em.cpython-37.pyc

new file mode 100644 (file)

index 0000000..40edf72

Binary files /dev/null and b/nlp_resource_data/nltk/cluster/__pycache__/em.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/cluster/__pycache__/gaac.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/gaac.cpython-37.pyc

new file mode 100644 (file)

index 0000000..14f29d8

Binary files /dev/null and b/nlp_resource_data/nltk/cluster/__pycache__/gaac.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/cluster/__pycache__/kmeans.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/kmeans.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c9b1d4e

Binary files /dev/null and b/nlp_resource_data/nltk/cluster/__pycache__/kmeans.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/cluster/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5403a35

Binary files /dev/null and b/nlp_resource_data/nltk/cluster/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/cluster/api.py b/nlp_resource_data/nltk/cluster/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 8679324..eb43b57
--- a/nlp_resource_data/nltk/cluster/api.py
+++ b/nlp_resource_data/nltk/cluster/api.py
@@ -1,21 +1,21 @@
  # Natural Language Toolkit: Clusterer Interfaces
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  # Porting: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
  from nltk.probability import DictionaryProbDist
  
-@add_metaclass(ABCMeta)
-class ClusterI(object):
+
+class ClusterI(metaclass=ABCMeta):
      """
      Interface covering basic clustering functionality.
      """
+
      @abstractmethod
      def cluster(self, vectors, assign_clusters=False):
          """
diff --git a/nlp_resource_data/nltk/cluster/api.pyc b/nlp_resource_data/nltk/cluster/api.pyc

deleted file mode 100755 (executable)

index 68a9bac..0000000

Binary files a/nlp_resource_data/nltk/cluster/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/cluster/em.py b/nlp_resource_data/nltk/cluster/em.py

old mode 100755 (executable)

new mode 100644 (file)

index 54b42f5..a93d19c
--- a/nlp_resource_data/nltk/cluster/em.py
+++ b/nlp_resource_data/nltk/cluster/em.py
@@ -1,19 +1,18 @@
  # Natural Language Toolkit: Expectation Maximization Clusterer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
+
  try:
      import numpy
  except ImportError:
      pass
  
-from nltk.compat import python_2_unicode_compatible
  from nltk.cluster.util import VectorSpaceClusterer
  
-@python_2_unicode_compatible
+
  class EMClusterer(VectorSpaceClusterer):
      """
      The Gaussian EM clusterer models the vectors as being produced by
@@ -29,9 +28,16 @@ class EMClusterer(VectorSpaceClusterer):
      the likelihood of the data does not significantly increase.
      """
  
-    def __init__(self, initial_means, priors=None, covariance_matrices=None,
-                       conv_threshold=1e-6, bias=0.1, normalise=False,
-                       svd_dimensions=None):
+    def __init__(
+        self,
+        initial_means,
+        priors=None,
+        covariance_matrices=None,
+        conv_threshold=1e-6,
+        bias=0.1,
+        normalise=False,
+        svd_dimensions=None,
+    ):
          """
          Creates an EM clusterer with the given starting parameters,
          convergence threshold and vector mangling parameters.
@@ -73,49 +79,49 @@ class EMClusterer(VectorSpaceClusterer):
          means = self._means
          priors = self._priors
          if not priors:
-            priors = self._priors = numpy.ones(self._num_clusters,
-                                        numpy.float64) / self._num_clusters
+            priors = self._priors = (
+                numpy.ones(self._num_clusters, numpy.float64) / self._num_clusters
+            )
          covariances = self._covariance_matrices
          if not covariances:
-            covariances = self._covariance_matrices = \
-                [ numpy.identity(dimensions, numpy.float64)
-                  for i in range(self._num_clusters) ]
+            covariances = self._covariance_matrices = [
+                numpy.identity(dimensions, numpy.float64)
+                for i in range(self._num_clusters)
+            ]
  
          # do the E and M steps until the likelihood plateaus
          lastl = self._loglikelihood(vectors, priors, means, covariances)
          converged = False
  
          while not converged:
-            if trace: print('iteration; loglikelihood', lastl)
+            if trace:
+                print("iteration; loglikelihood", lastl)
              # E-step, calculate hidden variables, h[i,j]
-            h = numpy.zeros((len(vectors), self._num_clusters),
-                numpy.float64)
+            h = numpy.zeros((len(vectors), self._num_clusters), numpy.float64)
              for i in range(len(vectors)):
                  for j in range(self._num_clusters):
-                    h[i,j] = priors[j] * self._gaussian(means[j],
-                                               covariances[j], vectors[i])
-                h[i,:] /= sum(h[i,:])
+                    h[i, j] = priors[j] * self._gaussian(
+                        means[j], covariances[j], vectors[i]
+                    )
+                h[i, :] /= sum(h[i, :])
  
              # M-step, update parameters - cvm, p, mean
              for j in range(self._num_clusters):
                  covariance_before = covariances[j]
-                new_covariance = numpy.zeros((dimensions, dimensions),
-                            numpy.float64)
+                new_covariance = numpy.zeros((dimensions, dimensions), numpy.float64)
                  new_mean = numpy.zeros(dimensions, numpy.float64)
                  sum_hj = 0.0
                  for i in range(len(vectors)):
                      delta = vectors[i] - means[j]
-                    new_covariance += h[i,j] * \
-                        numpy.multiply.outer(delta, delta)
-                    sum_hj += h[i,j]
-                    new_mean += h[i,j] * vectors[i]
+                    new_covariance += h[i, j] * numpy.multiply.outer(delta, delta)
+                    sum_hj += h[i, j]
+                    new_mean += h[i, j] * vectors[i]
                  covariances[j] = new_covariance / sum_hj
                  means[j] = new_mean / sum_hj
                  priors[j] = sum_hj / len(vectors)
  
                  # bias term to stop covariance matrix being singular
-                covariances[j] += self._bias * \
-                    numpy.identity(dimensions, numpy.float64)
+                covariances[j] += self._bias * numpy.identity(dimensions, numpy.float64)
  
              # calculate likelihood - FIXME: may be broken
              l = self._loglikelihood(vectors, priors, means, covariances)
@@ -128,28 +134,29 @@ class EMClusterer(VectorSpaceClusterer):
      def classify_vectorspace(self, vector):
          best = None
          for j in range(self._num_clusters):
-            p = self._priors[j] * self._gaussian(self._means[j],
-                                    self._covariance_matrices[j], vector)
+            p = self._priors[j] * self._gaussian(
+                self._means[j], self._covariance_matrices[j], vector
+            )
              if not best or p > best[0]:
                  best = (p, j)
          return best[1]
  
      def likelihood_vectorspace(self, vector, cluster):
          cid = self.cluster_names().index(cluster)
-        return self._priors[cluster] * self._gaussian(self._means[cluster],
-                                self._covariance_matrices[cluster], vector)
+        return self._priors[cluster] * self._gaussian(
+            self._means[cluster], self._covariance_matrices[cluster], vector
+        )
  
      def _gaussian(self, mean, cvm, x):
          m = len(mean)
-        assert cvm.shape == (m, m), \
-            'bad sized covariance matrix, %s' % str(cvm.shape)
+        assert cvm.shape == (m, m), "bad sized covariance matrix, %s" % str(cvm.shape)
          try:
              det = numpy.linalg.det(cvm)
              inv = numpy.linalg.inv(cvm)
              a = det ** -0.5 * (2 * numpy.pi) ** (-m / 2.0)
              dx = x - mean
              print(dx, inv)
-            b = -0.5 * numpy.dot( numpy.dot(dx, inv), dx)
+            b = -0.5 * numpy.dot(numpy.dot(dx, inv), dx)
              return a * numpy.exp(b)
          except OverflowError:
              # happens when the exponent is negative infinity - i.e. b = 0
@@ -161,13 +168,13 @@ class EMClusterer(VectorSpaceClusterer):
          for vector in vectors:
              p = 0
              for j in range(len(priors)):
-                p += priors[j] * \
-                         self._gaussian(means[j], covariances[j], vector)
+                p += priors[j] * self._gaussian(means[j], covariances[j], vector)
              llh += numpy.log(p)
          return llh
  
      def __repr__(self):
-        return '<EMClusterer means=%s>' % list(self._means)
+        return "<EMClusterer means=%s>" % list(self._means)
+
  
  def demo():
      """
@@ -184,64 +191,28 @@ def demo():
      clusterer = cluster.EMClusterer(means, bias=0.1)
      clusters = clusterer.cluster(vectors, True, trace=True)
  
-    print('Clustered:', vectors)
-    print('As:       ', clusters)
+    print("Clustered:", vectors)
+    print("As:       ", clusters)
      print()
  
      for c in range(2):
-        print('Cluster:', c)
-        print('Prior:  ', clusterer._priors[c])
-        print('Mean:   ', clusterer._means[c])
-        print('Covar:  ', clusterer._covariance_matrices[c])
+        print("Cluster:", c)
+        print("Prior:  ", clusterer._priors[c])
+        print("Mean:   ", clusterer._means[c])
+        print("Covar:  ", clusterer._covariance_matrices[c])
          print()
  
      # classify a new vector
      vector = numpy.array([2, 2])
-    print('classify(%s):' % vector, end=' ')
+    print("classify(%s):" % vector, end=" ")
      print(clusterer.classify(vector))
  
      # show the classification probabilities
      vector = numpy.array([2, 2])
-    print('classification_probdist(%s):' % vector)
+    print("classification_probdist(%s):" % vector)
      pdist = clusterer.classification_probdist(vector)
      for sample in pdist.samples():
-        print('%s => %.0f%%' % (sample,
-                    pdist.prob(sample) *100))
+        print("%s => %.0f%%" % (sample, pdist.prob(sample) * 100))
  
-#
-#     The following demo code is broken.
-#
-#     # use a set of tokens with 2D indices
-#     vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]]
-
-#     # test the EM clusterer with means given by k-means (2) and
-#     # dimensionality reduction
-#     clusterer = cluster.KMeans(2, euclidean_distance, svd_dimensions=1)
-#     print 'Clusterer:', clusterer
-#     clusters = clusterer.cluster(vectors)
-#     means = clusterer.means()
-#     print 'Means:', clusterer.means()
-#     print
-
-#     clusterer = cluster.EMClusterer(means, svd_dimensions=1)
-#     clusters = clusterer.cluster(vectors, True)
-#     print 'Clusterer:', clusterer
-#     print 'Clustered:', str(vectors)[:60], '...'
-#     print 'As:', str(clusters)[:60], '...'
-#     print
-
-#     # classify a new vector
-#     vector = numpy.array([3, 3])
-#     print 'classify(%s):' % vector,
-#     print clusterer.classify(vector)
-#     print
-
-#     # show the classification probabilities
-#     vector = numpy.array([2.2, 2])
-#     print 'classification_probdist(%s)' % vector
-#     pdist = clusterer.classification_probdist(vector)
-#     for sample in pdist:
-#         print '%s => %.0f%%' % (sample, pdist.prob(sample) *100)
-
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/cluster/em.pyc b/nlp_resource_data/nltk/cluster/em.pyc

deleted file mode 100755 (executable)

index eeabc37..0000000

Binary files a/nlp_resource_data/nltk/cluster/em.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/cluster/gaac.py b/nlp_resource_data/nltk/cluster/gaac.py

old mode 100755 (executable)

new mode 100644 (file)

index 2ec63c4..436ef98
--- a/nlp_resource_data/nltk/cluster/gaac.py
+++ b/nlp_resource_data/nltk/cluster/gaac.py
@@ -1,10 +1,9 @@
  # Natural Language Toolkit: Group Average Agglomerative Clusterer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals, division
  
  try:
      import numpy
@@ -12,9 +11,8 @@ except ImportError:
      pass
  
  from nltk.cluster.util import VectorSpaceClusterer, Dendrogram, cosine_distance
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class GAAClusterer(VectorSpaceClusterer):
      """
      The Group Average Agglomerative starts with each of the N vectors as singleton
@@ -37,21 +35,22 @@ class GAAClusterer(VectorSpaceClusterer):
      def cluster(self, vectors, assign_clusters=False, trace=False):
          # stores the merge order
          self._dendrogram = Dendrogram(
-            [numpy.array(vector, numpy.float64) for vector in vectors])
+            [numpy.array(vector, numpy.float64) for vector in vectors]
+        )
          return VectorSpaceClusterer.cluster(self, vectors, assign_clusters, trace)
  
      def cluster_vectorspace(self, vectors, trace=False):
          # variables describing the initial situation
          N = len(vectors)
-        cluster_len = [1]*N
+        cluster_len = [1] * N
          cluster_count = N
          index_map = numpy.arange(N)
  
          # construct the similarity matrix
          dims = (N, N)
-        dist = numpy.ones(dims, dtype=numpy.float)*numpy.inf
+        dist = numpy.ones(dims, dtype=numpy.float) * numpy.inf
          for i in range(N):
-            for j in range(i+1, N):
+            for j in range(i + 1, N):
                  dist[i, j] = cosine_distance(vectors[i], vectors[j])
  
          while cluster_count > max(self._num_clusters, 1):
@@ -67,13 +66,13 @@ class GAAClusterer(VectorSpaceClusterer):
              dist[j, :] = numpy.inf
  
              # merge the clusters
-            cluster_len[i] = cluster_len[i]+cluster_len[j]
+            cluster_len[i] = cluster_len[i] + cluster_len[j]
              self._dendrogram.merge(index_map[i], index_map[j])
              cluster_count -= 1
  
              # update the index map to reflect the indexes if we
              # had removed j
-            index_map[j+1:] -= 1
+            index_map[j + 1 :] -= 1
              index_map[j] = N
  
          self.update_clusters(self._num_clusters)
@@ -84,16 +83,18 @@ class GAAClusterer(VectorSpaceClusterer):
          # number of points in the clusters i and j
          i_weight = cluster_len[i]
          j_weight = cluster_len[j]
-        weight_sum = i_weight+j_weight
+        weight_sum = i_weight + j_weight
  
          # update for x<i
-        dist[:i, i] = dist[:i, i]*i_weight + dist[:i, j]*j_weight
+        dist[:i, i] = dist[:i, i] * i_weight + dist[:i, j] * j_weight
          dist[:i, i] /= weight_sum
          # update for i<x<j
-        dist[i, i+1:j] = dist[i, i+1:j]*i_weight + dist[i+1:j, j]*j_weight
+        dist[i, i + 1 : j] = (
+            dist[i, i + 1 : j] * i_weight + dist[i + 1 : j, j] * j_weight
+        )
          # update for i<j<x
-        dist[i, j+1:] = dist[i, j+1:]*i_weight + dist[j, j+1:]*j_weight
-        dist[i, i+1:] /= weight_sum
+        dist[i, j + 1 :] = dist[i, j + 1 :] * i_weight + dist[j, j + 1 :] * j_weight
+        dist[i, i + 1 :] /= weight_sum
  
      def update_clusters(self, num_clusters):
          clusters = self._dendrogram.groups(num_clusters)
@@ -133,7 +134,8 @@ class GAAClusterer(VectorSpaceClusterer):
          return self._num_clusters
  
      def __repr__(self):
-        return '<GroupAverageAgglomerative Clusterer n=%d>' % self._num_clusters
+        return "<GroupAverageAgglomerative Clusterer n=%d>" % self._num_clusters
+
  
  def demo():
      """
@@ -149,9 +151,9 @@ def demo():
      clusterer = GAAClusterer(4)
      clusters = clusterer.cluster(vectors, True)
  
-    print('Clusterer:', clusterer)
-    print('Clustered:', vectors)
-    print('As:', clusters)
+    print("Clusterer:", clusterer)
+    print("Clustered:", vectors)
+    print("As:", clusters)
      print()
  
      # show the dendrogram
@@ -159,10 +161,10 @@ def demo():
  
      # classify a new vector
      vector = numpy.array([3, 3])
-    print('classify(%s):' % vector, end=' ')
+    print("classify(%s):" % vector, end=" ")
      print(clusterer.classify(vector))
      print()
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/cluster/gaac.pyc b/nlp_resource_data/nltk/cluster/gaac.pyc

deleted file mode 100755 (executable)

index db3db2d..0000000

Binary files a/nlp_resource_data/nltk/cluster/gaac.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/cluster/kmeans.py b/nlp_resource_data/nltk/cluster/kmeans.py

old mode 100755 (executable)

new mode 100644 (file)

index 2da6c7c..389ff68
--- a/nlp_resource_data/nltk/cluster/kmeans.py
+++ b/nlp_resource_data/nltk/cluster/kmeans.py
@@ -1,10 +1,9 @@
  # Natural Language Toolkit: K-Means Clusterer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals, division
  
  import copy
  import random
@@ -17,10 +16,8 @@ except ImportError:
  
  
  from nltk.cluster.util import VectorSpaceClusterer
-from nltk.compat import python_2_unicode_compatible
  
  
-@python_2_unicode_compatible
  class KMeansClusterer(VectorSpaceClusterer):
      """
      The K-means clusterer starts with k arbitrary chosen means then allocates
@@ -32,10 +29,18 @@ class KMeansClusterer(VectorSpaceClusterer):
      commonly occurring output means are chosen.
      """
  
-    def __init__(self, num_means, distance, repeats=1,
-                       conv_test=1e-6, initial_means=None,
-                       normalise=False, svd_dimensions=None,
-                       rng=None, avoid_empty_clusters=False):
+    def __init__(
+        self,
+        num_means,
+        distance,
+        repeats=1,
+        conv_test=1e-6,
+        initial_means=None,
+        normalise=False,
+        svd_dimensions=None,
+        rng=None,
+        avoid_empty_clusters=False,
+    ):
  
          """
          :param  num_means:  the number of means to use (may use fewer)
@@ -70,16 +75,17 @@ class KMeansClusterer(VectorSpaceClusterer):
          assert repeats >= 1
          assert not (initial_means and repeats > 1)
          self._repeats = repeats
-        self._rng = (rng if rng else random.Random())
+        self._rng = rng if rng else random.Random()
          self._avoid_empty_clusters = avoid_empty_clusters
  
      def cluster_vectorspace(self, vectors, trace=False):
          if self._means and self._repeats > 1:
-            print('Warning: means will be discarded for subsequent trials')
+            print("Warning: means will be discarded for subsequent trials")
  
          meanss = []
          for trial in range(self._repeats):
-            if trace: print('k-means trial', trial)
+            if trace:
+                print("k-means trial", trial)
              if not self._means or trial > 1:
                  self._means = self._rng.sample(list(vectors), self._num_means)
              self._cluster_vectorspace(vectors, trace)
@@ -116,9 +122,10 @@ class KMeansClusterer(VectorSpaceClusterer):
                      index = self.classify_vectorspace(vector)
                      clusters[index].append(vector)
  
-                if trace: print('iteration')
-                #for i in range(self._num_means):
-                    #print '  mean', i, 'allocated', len(clusters[i]), 'vectors'
+                if trace:
+                    print("iteration")
+                # for i in range(self._num_means):
+                # print '  mean', i, 'allocated', len(clusters[i]), 'vectors'
  
                  # recalculate cluster means by computing the centroid of each cluster
                  new_means = list(map(self._centroid, clusters, self._means))
@@ -165,23 +172,26 @@ class KMeansClusterer(VectorSpaceClusterer):
              centroid = copy.copy(mean)
              for vector in cluster:
                  centroid += vector
-            return centroid / (1+len(cluster))
+            return centroid / (1 + len(cluster))
          else:
              if not len(cluster):
-                sys.stderr.write('Error: no centroid defined for empty cluster.\n')
-                sys.stderr.write('Try setting argument \'avoid_empty_clusters\' to True\n')
-                assert(False)
+                sys.stderr.write("Error: no centroid defined for empty cluster.\n")
+                sys.stderr.write(
+                    "Try setting argument 'avoid_empty_clusters' to True\n"
+                )
+                assert False
              centroid = copy.copy(cluster[0])
              for vector in cluster[1:]:
                  centroid += vector
              return centroid / len(cluster)
  
      def __repr__(self):
-        return '<KMeansClusterer means=%s repeats=%d>' % \
-                    (self._means, self._repeats)
+        return "<KMeansClusterer means=%s repeats=%d>" % (self._means, self._repeats)
+
  
  #################################################################################
  
+
  def demo():
      # example from figure 14.9, page 517, Manning and Schutze
  
@@ -193,9 +203,9 @@ def demo():
      clusterer = KMeansClusterer(2, euclidean_distance, initial_means=means)
      clusters = clusterer.cluster(vectors, True, trace=True)
  
-    print('Clustered:', vectors)
-    print('As:', clusters)
-    print('Means:', clusterer.means())
+    print("Clustered:", vectors)
+    print("As:", clusters)
+    print("Means:", clusterer.means())
      print()
  
      vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]]
@@ -205,17 +215,17 @@ def demo():
  
      clusterer = KMeansClusterer(2, euclidean_distance, repeats=10)
      clusters = clusterer.cluster(vectors, True)
-    print('Clustered:', vectors)
-    print('As:', clusters)
-    print('Means:', clusterer.means())
+    print("Clustered:", vectors)
+    print("As:", clusters)
+    print("Means:", clusterer.means())
      print()
  
      # classify a new vector
      vector = numpy.array([3, 3])
-    print('classify(%s):' % vector, end=' ')
+    print("classify(%s):" % vector, end=" ")
      print(clusterer.classify(vector))
      print()
  
-if __name__ == '__main__':
-    demo()
  
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/cluster/kmeans.pyc b/nlp_resource_data/nltk/cluster/kmeans.pyc

deleted file mode 100755 (executable)

index bc5fc15..0000000

Binary files a/nlp_resource_data/nltk/cluster/kmeans.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/cluster/util.py b/nlp_resource_data/nltk/cluster/util.py

old mode 100755 (executable)

new mode 100644 (file)

index 08930aa..c7ab691
--- a/nlp_resource_data/nltk/cluster/util.py
+++ b/nlp_resource_data/nltk/cluster/util.py
@@ -1,11 +1,10 @@
  # Natural Language Toolkit: Clusterer Utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  # Contributor: J Richard Snape
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals, division
  from abc import abstractmethod
  
  import copy
@@ -18,7 +17,6 @@ except ImportError:
      pass
  
  from nltk.cluster.api import ClusterI
-from nltk.compat import python_2_unicode_compatible
  
  
  class VectorSpaceClusterer(ClusterI):
@@ -27,6 +25,7 @@ class VectorSpaceClusterer(ClusterI):
      Optionally performs singular value decomposition to reduce the
      dimensionality.
      """
+
      def __init__(self, normalise=False, svd_dimensions=None):
          """
          :param normalise:       should vectors be normalised to length 1
@@ -48,12 +47,12 @@ class VectorSpaceClusterer(ClusterI):
  
          # use SVD to reduce the dimensionality
          if self._svd_dimensions and self._svd_dimensions < len(vectors[0]):
-            [u, d, vt] = numpy.linalg.svd(numpy.transpose(
-                            numpy.array(vectors)))
-            S = d[:self._svd_dimensions] * \
-                numpy.identity(self._svd_dimensions, numpy.float64)
-            T = u[:, :self._svd_dimensions]
-            Dt = vt[:self._svd_dimensions, :]
+            [u, d, vt] = numpy.linalg.svd(numpy.transpose(numpy.array(vectors)))
+            S = d[: self._svd_dimensions] * numpy.identity(
+                self._svd_dimensions, numpy.float64
+            )
+            T = u[:, : self._svd_dimensions]
+            Dt = vt[: self._svd_dimensions, :]
              vectors = numpy.transpose(numpy.dot(S, Dt))
              self._Tt = numpy.transpose(T)
  
@@ -96,7 +95,7 @@ class VectorSpaceClusterer(ClusterI):
          Returns the likelihood of the vector belonging to the cluster.
          """
          predicted = self.classify_vectorspace(vector)
-        return (1.0 if cluster == predicted else 0.0)
+        return 1.0 if cluster == predicted else 0.0
  
      def vector(self, vector):
          """
@@ -129,8 +128,7 @@ def cosine_distance(u, v):
      Returns 1 minus the cosine of the angle between vectors v and u. This is
      equal to 1 - (u.v / |u||v|).
      """
-    return 1 - (numpy.dot(u, v) / (
-                sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v))))
+    return 1 - (numpy.dot(u, v) / (sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v))))
  
  
  class _DendrogramNode(object):
@@ -176,7 +174,6 @@ class _DendrogramNode(object):
          return cosine_distance(self._value, comparator._value) < 0
  
  
-@python_2_unicode_compatible
  class Dendrogram(object):
      """
      Represents a dendrogram, a tree with a specified branching order.  This
@@ -231,7 +228,7 @@ class Dendrogram(object):
          """
  
          # ASCII rendering characters
-        JOIN, HLINK, VLINK = '+', '-', '|'
+        JOIN, HLINK, VLINK = "+", "-", "|"
  
          # find the root (or create one)
          if len(self._items) > 1:
@@ -251,19 +248,18 @@ class Dendrogram(object):
          rhalf = int(width - lhalf - 1)
  
          # display functions
-        def format(centre, left=' ', right=' '):
-            return '%s%s%s' % (lhalf*left, centre, right*rhalf)
+        def format(centre, left=" ", right=" "):
+            return "%s%s%s" % (lhalf * left, centre, right * rhalf)
  
          def display(str):
              stdout.write(str)
  
          # for each merge, top down
          queue = [(root._value, root)]
-        verticals = [format(' ') for leaf in leaves]
+        verticals = [format(" ") for leaf in leaves]
          while queue:
              priority, node = queue.pop()
-            child_left_leaf = list(map(
-                                lambda c: c.leaves(False)[0], node._children))
+            child_left_leaf = list(map(lambda c: c.leaves(False)[0], node._children))
              indices = list(map(leaves.index, child_left_leaf))
              if child_left_leaf:
                  min_idx = min(indices)
@@ -271,9 +267,9 @@ class Dendrogram(object):
              for i in range(len(leaves)):
                  if leaves[i] in child_left_leaf:
                      if i == min_idx:
-                        display(format(JOIN, ' ', HLINK))
+                        display(format(JOIN, " ", HLINK))
                      elif i == max_idx:
-                        display(format(JOIN, HLINK, ' '))
+                        display(format(JOIN, HLINK, " "))
                      else:
                          display(format(JOIN, HLINK, HLINK))
                      verticals[i] = format(VLINK)
@@ -281,7 +277,7 @@ class Dendrogram(object):
                      display(format(HLINK, HLINK, HLINK))
                  else:
                      display(verticals[i])
-            display('\n')
+            display("\n")
              for child in node._children:
                  if child._children:
                      queue.append((child._value, child))
@@ -289,11 +285,11 @@ class Dendrogram(object):
  
              for vertical in verticals:
                  display(vertical)
-            display('\n')
+            display("\n")
  
          # finally, display the last line
-        display(''.join(item.center(width) for item in last_row))
-        display('\n')
+        display("".join(item.center(width) for item in last_row))
+        display("\n")
  
      def __repr__(self):
          if len(self._items) > 1:
@@ -301,4 +297,4 @@ class Dendrogram(object):
          else:
              root = self._items[0]
          leaves = root.leaves(False)
-        return '<Dendrogram with %d leaves>' % len(leaves)
+        return "<Dendrogram with %d leaves>" % len(leaves)
diff --git a/nlp_resource_data/nltk/cluster/util.pyc b/nlp_resource_data/nltk/cluster/util.pyc

deleted file mode 100755 (executable)

index 9d704a3..0000000

Binary files a/nlp_resource_data/nltk/cluster/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/collections.py b/nlp_resource_data/nltk/collections.py

old mode 100755 (executable)

new mode 100644 (file)

index 1107f7d..efbb78c
--- a/nlp_resource_data/nltk/collections.py
+++ b/nlp_resource_data/nltk/collections.py
@@ -1,36 +1,29 @@
  # Natural Language Toolkit: Collections
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, absolute_import
  
-import locale
-import re
-import types
-import textwrap
-import pydoc
  import bisect
-import os
-from itertools import islice, chain, combinations
+from itertools import islice, chain
  from functools import total_ordering
-from collections import defaultdict, deque, Counter
  
-from six import text_type
+# this unused import is for python 2.7
+from collections import defaultdict, deque, Counter
  
  from nltk.internals import slice_bounds, raise_unorderable_types
-from nltk.compat import python_2_unicode_compatible
  
  
  ##########################################################################
  # Ordered Dictionary
  ##########################################################################
  
+
  class OrderedDict(dict):
      def __init__(self, data=None, **kwargs):
-        self._keys = self.keys(data, kwargs.get('keys'))
-        self._default_factory = kwargs.get('default_factory')
+        self._keys = self.keys(data, kwargs.get("keys"))
+        self._default_factory = kwargs.get("default_factory")
          if data is None:
              dict.__init__(self)
          else:
@@ -79,14 +72,16 @@ class OrderedDict(dict):
                  assert len(data) == len(keys)
                  return keys
              else:
-                assert isinstance(data, dict) or \
-                       isinstance(data, OrderedDict) or \
-                       isinstance(data, list)
+                assert (
+                    isinstance(data, dict)
+                    or isinstance(data, OrderedDict)
+                    or isinstance(data, list)
+                )
                  if isinstance(data, dict) or isinstance(data, OrderedDict):
                      return data.keys()
                  elif isinstance(data, list):
                      return [key for (key, value) in data]
-        elif '_keys' in self.__dict__:
+        elif "_keys" in self.__dict__:
              return self._keys
          else:
              return []
@@ -115,12 +110,13 @@ class OrderedDict(dict):
          # returns iterator under python 3
          return map(self.get, self._keys)
  
+
  ######################################################################
  # Lazy Sequences
  ######################################################################
  
+
  @total_ordering
-@python_2_unicode_compatible
  class AbstractLazySequence(object):
      """
      An abstract base class for read-only sequences whose values are
@@ -141,12 +137,13 @@ class AbstractLazySequence(object):
      Subclasses are required to define two methods: ``__len__()``
      and ``iterate_from()``.
      """
+
      def __len__(self):
          """
          Return the number of tokens in the corpus file underlying this
          corpus view.
          """
-        raise NotImplementedError('should be implemented by subclass')
+        raise NotImplementedError("should be implemented by subclass")
  
      def iterate_from(self, start):
          """
@@ -155,7 +152,7 @@ class AbstractLazySequence(object):
          ``start``.  If ``start>=len(self)``, then this iterator will
          generate no tokens.
          """
-        raise NotImplementedError('should be implemented by subclass')
+        raise NotImplementedError("should be implemented by subclass")
  
      def __getitem__(self, i):
          """
@@ -167,13 +164,15 @@ class AbstractLazySequence(object):
              return LazySubsequence(self, start, stop)
          else:
              # Handle negative indices
-            if i < 0: i += len(self)
-            if i < 0: raise IndexError('index out of range')
+            if i < 0:
+                i += len(self)
+            if i < 0:
+                raise IndexError("index out of range")
              # Use iterate_from to extract it.
              try:
                  return next(self.iterate_from(i))
              except StopIteration:
-                raise IndexError('index out of range')
+                raise IndexError("index out of range")
  
      def __iter__(self):
          """Return an iterator that generates the tokens in the corpus
@@ -182,7 +181,7 @@ class AbstractLazySequence(object):
  
      def count(self, value):
          """Return the number of times this list contains ``value``."""
-        return sum(1 for elt in self if elt==value)
+        return sum(1 for elt in self if elt == value)
  
      def index(self, value, start=None, stop=None):
          """Return the index of the first occurrence of ``value`` in this
@@ -191,8 +190,9 @@ class AbstractLazySequence(object):
          slice bounds -- i.e., they count from the end of the list."""
          start, stop = slice_bounds(self, slice(start, stop))
          for i, elt in enumerate(islice(self, start, stop)):
-            if elt == value: return i+start
-        raise ValueError('index(x): x not in list')
+            if elt == value:
+                return i + start
+        raise ValueError("index(x): x not in list")
  
      def __contains__(self, value):
          """Return true if this list contains ``value``."""
@@ -215,6 +215,7 @@ class AbstractLazySequence(object):
          return LazyConcatenation([self] * count)
  
      _MAX_REPR_SIZE = 60
+
      def __repr__(self):
          """
          Return a string representation for this corpus view that is
@@ -227,11 +228,11 @@ class AbstractLazySequence(object):
              pieces.append(repr(elt))
              length += len(pieces[-1]) + 2
              if length > self._MAX_REPR_SIZE and len(pieces) > 2:
-                return '[%s, ...]' % text_type(', ').join(pieces[:-1])
-        return '[%s]' % text_type(', ').join(pieces)
+                return "[%s, ...]" % ", ".join(pieces[:-1])
+        return "[%s]" % ", ".join(pieces)
  
      def __eq__(self, other):
-        return (type(self) == type(other) and list(self) == list(other))
+        return type(self) == type(other) and list(self) == list(other)
  
      def __ne__(self, other):
          return not self == other
@@ -245,8 +246,7 @@ class AbstractLazySequence(object):
          """
          :raise ValueError: Corpus view objects are unhashable.
          """
-        raise ValueError('%s objects are unhashable' %
-                         self.__class__.__name__)
+        raise ValueError("%s objects are unhashable" % self.__class__.__name__)
  
  
  class LazySubsequence(AbstractLazySequence):
@@ -271,8 +271,8 @@ class LazySubsequence(AbstractLazySequence):
          of a list) or greater than the length of ``source``.
          """
          # If the slice is small enough, just use a tuple.
-        if stop-start < cls.MIN_SIZE:
-            return list(islice(source.iterate_from(start), stop-start))
+        if stop - start < cls.MIN_SIZE:
+            return list(islice(source.iterate_from(start), stop - start))
          else:
              return object.__new__(cls)
  
@@ -285,8 +285,9 @@ class LazySubsequence(AbstractLazySequence):
          return self._stop - self._start
  
      def iterate_from(self, start):
-        return islice(self._source.iterate_from(start+self._start),
-                      max(0, len(self)-start))
+        return islice(
+            self._source.iterate_from(start + self._start), max(0, len(self) - start)
+        )
  
  
  class LazyConcatenation(AbstractLazySequence):
@@ -297,20 +298,22 @@ class LazyConcatenation(AbstractLazySequence):
      between offsets in the concatenated lists and offsets in the
      sublists.
      """
+
      def __init__(self, list_of_lists):
          self._list = list_of_lists
          self._offsets = [0]
  
      def __len__(self):
          if len(self._offsets) <= len(self._list):
-            for tok in self.iterate_from(self._offsets[-1]): pass
+            for tok in self.iterate_from(self._offsets[-1]):
+                pass
          return self._offsets[-1]
  
      def iterate_from(self, start_index):
          if start_index < self._offsets[-1]:
-            sublist_index = bisect.bisect_right(self._offsets, start_index)-1
+            sublist_index = bisect.bisect_right(self._offsets, start_index) - 1
          else:
-            sublist_index = len(self._offsets)-1
+            sublist_index = len(self._offsets) - 1
  
          index = self._offsets[sublist_index]
  
@@ -321,15 +324,17 @@ class LazyConcatenation(AbstractLazySequence):
              sublist_iter = islice(self._list, sublist_index, None)
  
          for sublist in sublist_iter:
-            if sublist_index == (len(self._offsets)-1):
-                assert index+len(sublist) >= self._offsets[-1], (
-                        'offests not monotonic increasing!')
-                self._offsets.append(index+len(sublist))
+            if sublist_index == (len(self._offsets) - 1):
+                assert (
+                    index + len(sublist) >= self._offsets[-1]
+                ), "offests not monotonic increasing!"
+                self._offsets.append(index + len(sublist))
              else:
-                assert self._offsets[sublist_index+1] == index+len(sublist), (
-                        'inconsistent list value (num elts)')
+                assert self._offsets[sublist_index + 1] == index + len(
+                    sublist
+                ), "inconsistent list value (num elts)"
  
-            for value in sublist[max(0, start_index-index):]:
+            for value in sublist[max(0, start_index - index) :]:
                  yield value
  
              index += len(sublist)
@@ -370,6 +375,7 @@ class LazyMap(AbstractLazySequence):
      using a ``LazyMap`` can significantly reduce memory usage when
      training and running classifiers.
      """
+
      def __init__(self, function, *lists, **config):
          """
          :param function: The function that should be applied to
@@ -380,18 +386,19 @@ class LazyMap(AbstractLazySequence):
              by this lazy map.  (default=5)
          """
          if not lists:
-            raise TypeError('LazyMap requires at least two args')
+            raise TypeError("LazyMap requires at least two args")
  
          self._lists = lists
          self._func = function
-        self._cache_size = config.get('cache_size', 5)
-        self._cache = ({} if self._cache_size > 0 else None)
+        self._cache_size = config.get("cache_size", 5)
+        self._cache = {} if self._cache_size > 0 else None
  
          # If you just take bool() of sum() here _all_lazy will be true just
          # in case n >= 1 list is an AbstractLazySequence.  Presumably this
          # isn't what's intended.
-        self._all_lazy = sum(isinstance(lst, AbstractLazySequence)
-                             for lst in lists) == len(lists)
+        self._all_lazy = sum(
+            isinstance(lst, AbstractLazySequence) for lst in lists
+        ) == len(lists)
  
      def iterate_from(self, index):
          # Special case: one lazy sublist
@@ -403,8 +410,10 @@ class LazyMap(AbstractLazySequence):
          # Special case: one non-lazy sublist
          elif len(self._lists) == 1:
              while True:
-                try: yield self._func(self._lists[0][index])
-                except IndexError: return
+                try:
+                    yield self._func(self._lists[0][index])
+                except IndexError:
+                    return
                  index += 1
  
          # Special case: n lazy sublists
@@ -413,8 +422,10 @@ class LazyMap(AbstractLazySequence):
              while True:
                  elements = []
                  for iterator in iterators:
-                    try: elements.append(next(iterator))
-                    except: elements.append(None)
+                    try:
+                        elements.append(next(iterator))
+                    except:  # FIXME: What is this except really catching? StopIteration?
+                        elements.append(None)
                  if elements == [None] * len(self._lists):
                      return
                  yield self._func(*elements)
@@ -423,12 +434,15 @@ class LazyMap(AbstractLazySequence):
          # general case
          else:
              while True:
-                try: elements = [lst[index] for lst in self._lists]
+                try:
+                    elements = [lst[index] for lst in self._lists]
                  except IndexError:
                      elements = [None] * len(self._lists)
                      for i, lst in enumerate(self._lists):
-                        try: elements[i] = lst[index]
-                        except IndexError: pass
+                        try:
+                            elements[i] = lst[index]
+                        except IndexError:
+                            pass
                      if elements == [None] * len(self._lists):
                          return
                  yield self._func(*elements)
@@ -440,19 +454,22 @@ class LazyMap(AbstractLazySequence):
              return LazyMap(self._func, *sliced_lists)
          else:
              # Handle negative indices
-            if index < 0: index += len(self)
-            if index < 0: raise IndexError('index out of range')
+            if index < 0:
+                index += len(self)
+            if index < 0:
+                raise IndexError("index out of range")
              # Check the cache
              if self._cache is not None and index in self._cache:
                  return self._cache[index]
              # Calculate the value
-            try: val = next(self.iterate_from(index))
+            try:
+                val = next(self.iterate_from(index))
              except StopIteration:
-                raise IndexError('index out of range')
+                raise IndexError("index out of range")
              # Update the cache
              if self._cache is not None:
                  if len(self._cache) > self._cache_size:
-                    self._cache.popitem() # discard random entry
+                    self._cache.popitem()  # discard random entry
                  self._cache[index] = val
              # Return the value
              return val
@@ -492,6 +509,7 @@ class LazyZip(LazyMap):
      avoiding the creation of an additional long sequence, memory usage can be
      significantly reduced.
      """
+
      def __init__(self, *lists):
          """
          :param lists: the underlying lists
@@ -546,12 +564,14 @@ class LazyEnumerate(LazyZip):
          """
          LazyZip.__init__(self, range(len(lst)), lst)
  
+
  class LazyIteratorList(AbstractLazySequence):
      """
      Wraps an iterator, loading its elements on demand
      and making them subscriptable.
      __repr__ displays only the first few elements.
      """
+
      def __init__(self, it, known_len=None):
          self._it = it
          self._len = known_len
@@ -567,11 +587,11 @@ class LazyIteratorList(AbstractLazySequence):
  
      def iterate_from(self, start):
          """Create a new iterator over this list starting at the given offset."""
-        while len(self._cache)<start:
+        while len(self._cache) < start:
              v = next(self._it)
              self._cache.append(v)
          i = start
-        while i<len(self._cache):
+        while i < len(self._cache):
              yield self._cache[i]
              i += 1
          while True:
@@ -588,15 +608,17 @@ class LazyIteratorList(AbstractLazySequence):
          """Return a list concatenating other with self."""
          return type(self)(chain(other, self))
  
+
  ######################################################################
  # Trie Implementation
  ######################################################################
-class Trie(defaultdict):
+class Trie(dict):
      """A Trie implementation for strings"""
+
      LEAF = True
  
      def __init__(self, strings=None):
-        """Builds a Trie object, which is built around a ``defaultdict``
+        """Builds a Trie object, which is built around a ``dict``
  
          If ``strings`` is provided, it will add the ``strings``, which
          consist of a ``list`` of ``strings``, to the Trie.
@@ -607,7 +629,7 @@ class Trie(defaultdict):
          :type strings: list(str)
  
          """
-        defaultdict.__init__(self, Trie)
+        super(Trie, self).__init__()
          if strings:
              for string in strings:
                  self.insert(string)
@@ -621,9 +643,11 @@ class Trie(defaultdict):
          :Example:
  
          >>> from nltk.collections import Trie
-        >>> trie = Trie(["ab"])
-        >>> trie
-        defaultdict(<class 'nltk.collections.Trie'>, {'a': defaultdict(<class 'nltk.collections.Trie'>, {'b': defaultdict(<class 'nltk.collections.Trie'>, {True: None})})})
+        >>> trie = Trie(["abc", "def"])
+        >>> expected = {'a': {'b': {'c': {True: None}}}, \
+                        'd': {'e': {'f': {True: None}}}}
+        >>> trie == expected
+        True
  
          """
          if len(string):
@@ -632,56 +656,6 @@ class Trie(defaultdict):
              # mark the string is complete
              self[Trie.LEAF] = None
  
-    def __str__(self):
-        return str(self.as_dict())
-
-    def as_dict(self, d=None):
-        """Convert ``defaultdict`` to common ``dict`` representation.
-
-        :param: A defaultdict containing strings mapped to nested defaultdicts.
-            This is the structure of the trie. (Default is None)
-        :type: defaultdict(str -> defaultdict)
-        :return: Even though ``defaultdict`` is a subclass of ``dict`` and thus
-            can be converted to a simple ``dict`` using ``dict()``, in our case
-            it's a nested ``defaultdict``, so here's a quick trick to provide to
-            us the ``dict`` representation of the ``Trie`` without
-            ``defaultdict(<class 'nltk.collections.Trie'>, ...``
-        :rtype: dict(str -> dict(bool -> None))
-            Note: there can be an arbitrarily deeply nested
-            ``dict(str -> dict(str -> dict(..))``, but the last
-            level will have ``dict(str -> dict(bool -> None))``
-
-        :Example:
-
-        >>> from nltk.collections import Trie
-        >>> trie = Trie(["abc", "def"])
-        >>> expected = {'a': {'b': {'c': {True: None}}}, 'd': {'e': {'f': {True: None}}}}
-        >>> trie.as_dict() == expected
-        True
-
-        """
-        def _default_to_regular(d):
-            """
-            Source: http://stackoverflow.com/a/26496899/4760801
-
-            :param d: Nested ``defaultdict`` to convert to regular ``dict``
-            :type d: defaultdict(str -> defaultdict(...))
-            :return: A dict representation of the defaultdict
-            :rtype: dict(str -> dict(str -> ...))
-
-            :Example:
-
-            >>> from collections import defaultdict
-            >>> d = defaultdict(defaultdict)
-            >>> d["one"]["two"] = "three"
-            >>> d
-            defaultdict(<type 'collections.defaultdict'>, {'one': defaultdict(None, {'two': 'three'})})
-            >>> _default_to_regular(d)
-            {'one': {'two': 'three'}}
-
-            """
-            if isinstance(d, defaultdict):
-                d = {k: _default_to_regular(v) for k, v in d.items()}
-            return d
-
-        return _default_to_regular(self)
+    def __missing__(self, key):
+        self[key] = Trie()
+        return self[key]
diff --git a/nlp_resource_data/nltk/collections.pyc b/nlp_resource_data/nltk/collections.pyc

deleted file mode 100755 (executable)

index 128856b..0000000

Binary files a/nlp_resource_data/nltk/collections.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/collocations.py b/nlp_resource_data/nltk/collocations.py

old mode 100755 (executable)

new mode 100644 (file)

index 0b5a1f5..150e29b
--- a/nlp_resource_data/nltk/collocations.py
+++ b/nlp_resource_data/nltk/collocations.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Collocations and Association Measures
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Joel Nothman <jnothman@student.usyd.edu.au>
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
@@ -23,7 +23,6 @@ these functionalities, dependent on being provided a function which scores a
  ngram given appropriate frequency counts. A number of standard association
  measures are provided in bigram_measures and trigram_measures.
  """
-from __future__ import print_function
  
  # Possible TODOs:
  # - consider the distinction between f(x,_) and f(x) and whether our
@@ -32,11 +31,17 @@ from __future__ import print_function
  #   and unigram counts (raw_freq, pmi, student_t)
  
  import itertools as _itertools
-from six import iteritems
  
  from nltk.probability import FreqDist
  from nltk.util import ngrams
-from nltk.metrics import ContingencyMeasures, BigramAssocMeasures, TrigramAssocMeasures
+
+# these two unused imports are referenced in collocations.doctest
+from nltk.metrics import (
+    ContingencyMeasures,
+    BigramAssocMeasures,
+    TrigramAssocMeasures,
+    QuadgramAssocMeasures,
+)
  from nltk.metrics.spearman import ranks_from_scores, spearman_correlation
  
  
@@ -57,34 +62,42 @@ class AbstractCollocationFinder(object):
          self.ngram_fd = ngram_fd
  
      @classmethod
-    def _build_new_documents(cls, documents, window_size, pad_left=False, pad_right=False, pad_symbol=None):
-        '''
+    def _build_new_documents(
+        cls, documents, window_size, pad_left=False, pad_right=False, pad_symbol=None
+    ):
+        """
          Pad the document with the place holder according to the window_size
-        '''
+        """
          padding = (pad_symbol,) * (window_size - 1)
          if pad_right:
-            return _itertools.chain.from_iterable(_itertools.chain(doc, padding) for doc in documents)
+            return _itertools.chain.from_iterable(
+                _itertools.chain(doc, padding) for doc in documents
+            )
          if pad_left:
-            return _itertools.chain.from_iterable(_itertools.chain(padding, doc) for doc in documents)
+            return _itertools.chain.from_iterable(
+                _itertools.chain(padding, doc) for doc in documents
+            )
  
      @classmethod
      def from_documents(cls, documents):
          """Constructs a collocation finder given a collection of documents,
          each of which is a list (or iterable) of tokens.
          """
-        #return cls.from_words(_itertools.chain(*documents))
-        return cls.from_words(cls._build_new_documents(documents, cls.default_ws, pad_right=True))
+        # return cls.from_words(_itertools.chain(*documents))
+        return cls.from_words(
+            cls._build_new_documents(documents, cls.default_ws, pad_right=True)
+        )
  
      @staticmethod
      def _ngram_freqdist(words, n):
-        return FreqDist(tuple(words[i:i + n]) for i in range(len(words) - 1))
+        return FreqDist(tuple(words[i : i + n]) for i in range(len(words) - 1))
  
      def _apply_filter(self, fn=lambda ngram, freq: False):
          """Generic filter removes ngrams from the frequency distribution
          if the function returns True when passed an ngram tuple.
          """
          tmp_ngram = FreqDist()
-        for ngram, freq in iteritems(self.ngram_fd):
+        for ngram, freq in self.ngram_fd.items():
              if not fn(ngram, freq):
                  tmp_ngram[ngram] = freq
          self.ngram_fd = tmp_ngram
@@ -140,6 +153,7 @@ class BigramCollocationFinder(AbstractCollocationFinder):
      association measures. It is often useful to use from_words() rather than
      constructing an instance directly.
      """
+
      default_ws = 2
  
      def __init__(self, word_fd, bigram_fd, window_size=2):
@@ -190,6 +204,7 @@ class TrigramCollocationFinder(AbstractCollocationFinder):
      association measures. It is often useful to use from_words() rather than
      constructing an instance directly.
      """
+
      default_ws = 3
  
      def __init__(self, word_fd, bigram_fd, wildcard_fd, trigram_fd):
@@ -249,16 +264,14 @@ class TrigramCollocationFinder(AbstractCollocationFinder):
          n_ixx = self.word_fd[w1]
          n_xix = self.word_fd[w2]
          n_xxi = self.word_fd[w3]
-        return score_fn(n_iii,
-                        (n_iix, n_ixi, n_xii),
-                        (n_ixx, n_xix, n_xxi),
-                        n_all)
+        return score_fn(n_iii, (n_iix, n_ixi, n_xii), (n_ixx, n_xix, n_xxi), n_all)
  
  
  class QuadgramCollocationFinder(AbstractCollocationFinder):
      """A tool for the finding and ranking of quadgram collocations or other association measures.
      It is often useful to use from_words() rather than constructing an instance directly.
      """
+
      default_ws = 4
  
      def __init__(self, word_fd, quadgram_fd, ii, iii, ixi, ixxi, iixi, ixii):
@@ -330,16 +343,22 @@ class QuadgramCollocationFinder(AbstractCollocationFinder):
          n_xixx = self.word_fd[w2]
          n_xxix = self.word_fd[w3]
          n_xxxi = self.word_fd[w4]
-        return score_fn(n_iiii,
-                        (n_iiix, n_iixi, n_ixii, n_xiii),
-                        (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix),
-                        (n_ixxx, n_xixx, n_xxix, n_xxxi),
-                        n_all)
+        return score_fn(
+            n_iiii,
+            (n_iiix, n_iixi, n_ixii, n_xiii),
+            (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix),
+            (n_ixxx, n_xixx, n_xxix, n_xxxi),
+            n_all,
+        )
  
  
  def demo(scorer=None, compare_scorer=None):
      """Finds bigram collocations in the files of the WebText corpus."""
-    from nltk.metrics import BigramAssocMeasures, spearman_correlation, ranks_from_scores
+    from nltk.metrics import (
+        BigramAssocMeasures,
+        spearman_correlation,
+        ranks_from_scores,
+    )
  
      if scorer is None:
          scorer = BigramAssocMeasures.likelihood_ratio
@@ -348,41 +367,46 @@ def demo(scorer=None, compare_scorer=None):
  
      from nltk.corpus import stopwords, webtext
  
-    ignored_words = stopwords.words('english')
+    ignored_words = stopwords.words("english")
      word_filter = lambda w: len(w) < 3 or w.lower() in ignored_words
  
      for file in webtext.fileids():
-        words = [word.lower()
-                 for word in webtext.words(file)]
+        words = [word.lower() for word in webtext.words(file)]
  
          cf = BigramCollocationFinder.from_words(words)
          cf.apply_freq_filter(3)
          cf.apply_word_filter(word_filter)
  
-        corr = spearman_correlation(ranks_from_scores(cf.score_ngrams(scorer)),
-                                    ranks_from_scores(cf.score_ngrams(compare_scorer)))
+        corr = spearman_correlation(
+            ranks_from_scores(cf.score_ngrams(scorer)),
+            ranks_from_scores(cf.score_ngrams(compare_scorer)),
+        )
          print(file)
-        print('\t', [' '.join(tup) for tup in cf.nbest(scorer, 15)])
-        print('\t Correlation to %s: %0.4f' % (compare_scorer.__name__, corr))
+        print("\t", [" ".join(tup) for tup in cf.nbest(scorer, 15)])
+        print("\t Correlation to %s: %0.4f" % (compare_scorer.__name__, corr))
+
  
  # Slows down loading too much
  # bigram_measures = BigramAssocMeasures()
  # trigram_measures = TrigramAssocMeasures()
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      import sys
      from nltk.metrics import BigramAssocMeasures
  
      try:
-        scorer = eval('BigramAssocMeasures.' + sys.argv[1])
+        scorer = eval("BigramAssocMeasures." + sys.argv[1])
      except IndexError:
          scorer = None
      try:
-        compare_scorer = eval('BigramAssocMeasures.' + sys.argv[2])
+        compare_scorer = eval("BigramAssocMeasures." + sys.argv[2])
      except IndexError:
          compare_scorer = None
  
      demo(scorer, compare_scorer)
  
-__all__ = ['BigramCollocationFinder',
-           'TrigramCollocationFinder', 'QuadgramCollocationFinder']
+__all__ = [
+    "BigramCollocationFinder",
+    "TrigramCollocationFinder",
+    "QuadgramCollocationFinder",
+]
diff --git a/nlp_resource_data/nltk/collocations.pyc b/nlp_resource_data/nltk/collocations.pyc

deleted file mode 100755 (executable)

index 7487fce..0000000

Binary files a/nlp_resource_data/nltk/collocations.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/compat.py b/nlp_resource_data/nltk/compat.py

old mode 100755 (executable)

new mode 100644 (file)

index 8efda7e..163a200
--- a/nlp_resource_data/nltk/compat.py
+++ b/nlp_resource_data/nltk/compat.py
@@ -1,215 +1,35 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Compatibility
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import absolute_import, print_function
  import os
-import sys
-from functools import update_wrapper, wraps
-import fractions
-import unicodedata
-
-from six import string_types, text_type
-
-# Python 2/3 compatibility layer. Based on six.
-
-PY3 = sys.version_info[0] == 3
-
-if PY3:
-    def get_im_class(meth):
-        return meth.__self__.__class__
-
-    import io
-    StringIO = io.StringIO
-    BytesIO = io.BytesIO
-
-    from datetime import timezone
-    UTC = timezone.utc
-
-    from tempfile import TemporaryDirectory
-
-else:
-    def get_im_class(meth):
-        return meth.im_class
-
-    try:
-        from cStringIO import StringIO
-    except ImportError:
-        from StringIO import StringIO
-    BytesIO = StringIO
-
-    from datetime import tzinfo, timedelta
-
-    ZERO = timedelta(0)
-    HOUR = timedelta(hours=1)
-
-    # A UTC class for python 2.7
-    class UTC(tzinfo):
-        """UTC"""
-
-        def utcoffset(self, dt):
-            return ZERO
-
-        def tzname(self, dt):
-            return "UTC"
-
-        def dst(self, dt):
-            return ZERO
-
-    UTC = UTC()
-
-    import csv
-    import codecs
-    import cStringIO
-
-    class UnicodeWriter:
-        """
-        A CSV writer which will write rows to CSV file "f",
-        which is encoded in the given encoding.
-        see https://docs.python.org/2/library/csv.html
-        """
-
-        def __init__(self, f, dialect=csv.excel, encoding="utf-8",
-                     errors='replace', **kwds):
-            # Redirect output to a queue
-            self.queue = cStringIO.StringIO()
-            self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
-            self.stream = f
-            encoder_cls = codecs.getincrementalencoder(encoding)
-            self.encoder = encoder_cls(errors=errors)
-
-        def encode(self, data):
-            if isinstance(data, string_types):
-                return data.encode("utf-8")
-            else:
-                return data
-
-        def writerow(self, row):
-            self.writer.writerow([self.encode(s) for s in row])
-            # Fetch UTF-8 output from the queue ...
-            data = self.queue.getvalue()
-            data = data.decode("utf-8")
-            # ... and reencode it into the target encoding
-            data = self.encoder.encode(data, 'replace')
-            # write to the target stream
-            self.stream.write(data)
-            # empty queue
-            self.queue.truncate(0)
-
-    import warnings as _warnings
-    import os as _os
-    from tempfile import mkdtemp
-
-    class TemporaryDirectory(object):
-        """Create and return a temporary directory.  This has the same
-        behavior as mkdtemp but can be used as a context manager.  For
-        example:
-
-            with TemporaryDirectory() as tmpdir:
-                ...
-
-        Upon exiting the context, the directory and everything contained
-        in it are removed.
-
-        http://stackoverflow.com/questions/19296146/tempfile-temporarydirectory-context-manager-in-python-2-7
-        """
-
-        def __init__(self, suffix="", prefix="tmp", dir=None):
-            self._closed = False
-            self.name = None  # Handle mkdtemp raising an exception
-            self.name = mkdtemp(suffix, prefix, dir)
-
-        def __repr__(self):
-            return "<{} {!r}>".format(self.__class__.__name__, self.name)
-
-        def __enter__(self):
-            return self.name
-
-        def cleanup(self, _warn=False):
-            if self.name and not self._closed:
-                try:
-                    self._rmtree(self.name)
-                except (TypeError, AttributeError) as ex:
-                    # Issue #10188: Emit a warning on stderr
-                    # if the directory could not be cleaned
-                    # up due to missing globals
-                    if "None" not in str(ex):
-                        raise
-                    print("ERROR: {!r} while cleaning up {!r}".format(ex,
-                                                                      self),
-                          file=sys.stderr)
-                    return
-                self._closed = True
-                if _warn:
-                    self._warn("Implicitly cleaning up {!r}".format(self),
-                               Warning)
-
-        def __exit__(self, exc, value, tb):
-            self.cleanup()
-
-        def __del__(self):
-            # Issue a Warning if implicit cleanup needed
-            self.cleanup(_warn=True)
-
-        # XXX (ncoghlan): The following code attempts to make
-        # this class tolerant of the module nulling out process
-        # that happens during CPython interpreter shutdown
-        # Alas, it doesn't actually manage it. See issue #10188
-        _listdir = staticmethod(_os.listdir)
-        _path_join = staticmethod(_os.path.join)
-        _isdir = staticmethod(_os.path.isdir)
-        _islink = staticmethod(_os.path.islink)
-        _remove = staticmethod(_os.remove)
-        _rmdir = staticmethod(_os.rmdir)
-        _warn = _warnings.warn
-
-        def _rmtree(self, path):
-            # Essentially a stripped down version of shutil.rmtree.  We can't
-            # use globals because they may be None'ed out at shutdown.
-            for name in self._listdir(path):
-                fullname = self._path_join(path, name)
-                try:
-                    isdir = (self._isdir(fullname) and not
-                             self._islink(fullname))
-                except OSError:
-                    isdir = False
-                if isdir:
-                    self._rmtree(fullname)
-                else:
-                    try:
-                        self._remove(fullname)
-                    except OSError:
-                        pass
-            try:
-                self._rmdir(path)
-            except OSError:
-                pass
+from functools import wraps
  
  # ======= Compatibility for datasets that care about Python versions ========
  
  # The following datasets have a /PY3 subdirectory containing
  # a full copy of the data which has been re-encoded or repickled.
-DATA_UPDATES = [("chunkers", "maxent_ne_chunker"),
-                ("help", "tagsets"),
-                ("taggers", "maxent_treebank_pos_tagger"),
-                ("tokenizers", "punkt")]
+DATA_UPDATES = [
+    ("chunkers", "maxent_ne_chunker"),
+    ("help", "tagsets"),
+    ("taggers", "maxent_treebank_pos_tagger"),
+    ("tokenizers", "punkt"),
+]
  
  _PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]
  
-
  def add_py3_data(path):
-    if PY3:
-        for item in _PY3_DATA_UPDATES:
-            if item in str(path) and "/PY3" not in str(path):
-                pos = path.index(item) + len(item)
-                if path[pos:pos + 4] == ".zip":
-                    pos += 4
-                path = path[:pos] + "/PY3" + path[pos:]
-                break
+    for item in _PY3_DATA_UPDATES:
+        if item in str(path) and "/PY3" not in str(path):
+            pos = path.index(item) + len(item)
+            if path[pos : pos + 4] == ".zip":
+                pos += 4
+            path = path[:pos] + "/PY3" + path[pos:]
+            break
      return path
  
  
@@ -219,147 +39,5 @@ def py3_data(init_func):
      def _decorator(*args, **kwargs):
          args = (args[0], add_py3_data(args[1])) + args[2:]
          return init_func(*args, **kwargs)
-    return wraps(init_func)(_decorator)
  
-
-# ======= Compatibility layer for __str__ and __repr__ ==========
-def remove_accents(text):
-
-    if isinstance(text, bytes):
-        text = text.decode('ascii')
-
-    category = unicodedata.category  # this gives a small (~10%) speedup
-    return ''.join(
-        c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn'
-    )
-
-
-# Select the best transliteration method:
-try:
-    # Older versions of Unidecode are licensed under Artistic License;
-    # assume an older version is installed.
-    from unidecode import unidecode as transliterate
-except ImportError:
-    try:
-        # text-unidecode implementation is worse than Unidecode
-        # implementation so Unidecode is preferred.
-        from text_unidecode import unidecode as transliterate
-    except ImportError:
-        # This transliteration method should be enough
-        # for many Western languages.
-        transliterate = remove_accents
-
-
-def python_2_unicode_compatible(klass):
-    """
-    This decorator defines __unicode__ method and fixes
-    __repr__ and __str__ methods under Python 2.
-
-    To support Python 2 and 3 with a single code base,
-    define __str__ and __repr__ methods returning unicode
-    text and apply this decorator to the class.
-
-    Original __repr__ and __str__ would be available
-    as unicode_repr and __unicode__ (under both Python 2
-    and Python 3).
-    """
-
-    if not issubclass(klass, object):
-        raise ValueError("This decorator doesn't work for old-style classes")
-
-    # both __unicode__ and unicode_repr are public because they
-    # may be useful in console under Python 2.x
-
-    # if __str__ or __repr__ are not overriden in a subclass,
-    # they may be already fixed by this decorator in a parent class
-    # and we shouldn't them again
-
-    if not _was_fixed(klass.__str__):
-        klass.__unicode__ = klass.__str__
-        if not PY3:
-            klass.__str__ = _7bit(_transliterated(klass.__unicode__))
-
-    if not _was_fixed(klass.__repr__):
-        klass.unicode_repr = klass.__repr__
-        if not PY3:
-            klass.__repr__ = _7bit(klass.unicode_repr)
-
-    return klass
-
-
-def unicode_repr(obj):
-    """
-    For classes that was fixed with @python_2_unicode_compatible
-    ``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings
-    the result is returned without "u" letter (to make output the
-    same under Python 2.x and Python 3.x); for other variables
-    it is the same as ``repr``.
-    """
-    if PY3:
-        return repr(obj)
-
-    # Python 2.x
-    if hasattr(obj, 'unicode_repr'):
-        return obj.unicode_repr()
-
-    if isinstance(obj, text_type):
-        return repr(obj)[1:]  # strip "u" letter from output
-
-    return repr(obj)
-
-
-def _transliterated(method):
-    def wrapper(self):
-        return transliterate(method(self))
-
-    update_wrapper(wrapper, method, ["__name__", "__doc__"])
-    if hasattr(method, "_nltk_compat_7bit"):
-        wrapper._nltk_compat_7bit = method._nltk_compat_7bit
-
-    wrapper._nltk_compat_transliterated = True
-    return wrapper
-
-
-def _7bit(method):
-    def wrapper(self):
-        return method(self).encode('ascii', 'backslashreplace')
-
-    update_wrapper(wrapper, method, ["__name__", "__doc__"])
-
-    if hasattr(method, "_nltk_compat_transliterated"):
-        wrapper._nltk_compat_transliterated = (
-            method._nltk_compat_transliterated
-        )
-
-    wrapper._nltk_compat_7bit = True
-    return wrapper
-
-
-def _was_fixed(method):
-    return (getattr(method, "_nltk_compat_7bit", False) or
-            getattr(method, "_nltk_compat_transliterated", False))
-
-
-class Fraction(fractions.Fraction):
-    """
-    This is a simplified backwards compatible version of fractions.Fraction
-    from Python >=3.5. It adds the `_normalize` parameter such that it does
-    not normalize the denominator to the Greatest Common Divisor (gcd) when
-    the numerator is 0.
-
-    This is most probably only used by the nltk.translate.bleu_score.py where
-    numerator and denominator of the different ngram precisions are mutable.
-    But the idea of "mutable" fraction might not be applicable to other usages,
-    See http://stackoverflow.com/questions/34561265
-
-    This objects should be deprecated once NLTK stops supporting Python < 3.5
-    See https://github.com/nltk/nltk/issues/1330
-    """
-    def __new__(cls, numerator=0, denominator=None, _normalize=True):
-        cls = super(Fraction, cls).__new__(cls, numerator, denominator)
-        # To emulate fraction.Fraction.from_float across Python >=2.7,
-        # check that numerator is an integer and denominator is not None.
-        if not _normalize and type(numerator) == int and denominator:
-            cls._numerator = numerator
-            cls._denominator = denominator
-        return cls
+    return wraps(init_func)(_decorator)
diff --git a/nlp_resource_data/nltk/compat.pyc b/nlp_resource_data/nltk/compat.pyc

deleted file mode 100755 (executable)

index 2f1580a..0000000

Binary files a/nlp_resource_data/nltk/compat.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/__init__.py b/nlp_resource_data/nltk/corpus/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index d9ccb54..b305c95
--- a/nlp_resource_data/nltk/corpus/__init__.py
+++ b/nlp_resource_data/nltk/corpus/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Corpus Readers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -66,211 +66,360 @@ from nltk.corpus.util import LazyCorpusLoader
  from nltk.corpus.reader import *
  
  abc = LazyCorpusLoader(
-    'abc', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding=[
-            ('science', 'latin_1'),
-            ('rural', 'utf8')])
-alpino = LazyCorpusLoader(
-    'alpino', AlpinoCorpusReader, tagset='alpino')
+    "abc",
+    PlaintextCorpusReader,
+    r"(?!\.).*\.txt",
+    encoding=[("science", "latin_1"), ("rural", "utf8")],
+)
+alpino = LazyCorpusLoader("alpino", AlpinoCorpusReader, tagset="alpino")
  brown = LazyCorpusLoader(
-    'brown', CategorizedTaggedCorpusReader, r'c[a-z]\d\d',
-    cat_file='cats.txt', tagset='brown', encoding="ascii")
+    "brown",
+    CategorizedTaggedCorpusReader,
+    r"c[a-z]\d\d",
+    cat_file="cats.txt",
+    tagset="brown",
+    encoding="ascii",
+)
  cess_cat = LazyCorpusLoader(
-    'cess_cat', BracketParseCorpusReader, r'(?!\.).*\.tbf',
-    tagset='unknown', encoding='ISO-8859-15')
+    "cess_cat",
+    BracketParseCorpusReader,
+    r"(?!\.).*\.tbf",
+    tagset="unknown",
+    encoding="ISO-8859-15",
+)
  cess_esp = LazyCorpusLoader(
-    'cess_esp', BracketParseCorpusReader, r'(?!\.).*\.tbf',
-    tagset='unknown', encoding='ISO-8859-15')
-cmudict = LazyCorpusLoader(
-    'cmudict', CMUDictCorpusReader, ['cmudict'])
-comtrans = LazyCorpusLoader(
-    'comtrans', AlignedCorpusReader, r'(?!\.).*\.txt')
+    "cess_esp",
+    BracketParseCorpusReader,
+    r"(?!\.).*\.tbf",
+    tagset="unknown",
+    encoding="ISO-8859-15",
+)
+cmudict = LazyCorpusLoader("cmudict", CMUDictCorpusReader, ["cmudict"])
+comtrans = LazyCorpusLoader("comtrans", AlignedCorpusReader, r"(?!\.).*\.txt")
  comparative_sentences = LazyCorpusLoader(
-    'comparative_sentences', ComparativeSentencesCorpusReader, r'labeledSentences\.txt',
-    encoding='latin-1')
+    "comparative_sentences",
+    ComparativeSentencesCorpusReader,
+    r"labeledSentences\.txt",
+    encoding="latin-1",
+)
  conll2000 = LazyCorpusLoader(
-    'conll2000', ConllChunkCorpusReader,
-    ['train.txt', 'test.txt'], ('NP','VP','PP'),
-    tagset='wsj', encoding='ascii')
+    "conll2000",
+    ConllChunkCorpusReader,
+    ["train.txt", "test.txt"],
+    ("NP", "VP", "PP"),
+    tagset="wsj",
+    encoding="ascii",
+)
  conll2002 = LazyCorpusLoader(
-    'conll2002', ConllChunkCorpusReader, '.*\.(test|train).*',
-    ('LOC', 'PER', 'ORG', 'MISC'), encoding='utf-8')
+    "conll2002",
+    ConllChunkCorpusReader,
+    ".*\.(test|train).*",
+    ("LOC", "PER", "ORG", "MISC"),
+    encoding="utf-8",
+)
  conll2007 = LazyCorpusLoader(
-    'conll2007', DependencyCorpusReader, '.*\.(test|train).*', encoding=[
-        ('eus', 'ISO-8859-2'),
-        ('esp', 'utf8')])
-crubadan = LazyCorpusLoader(
-    'crubadan', CrubadanCorpusReader, '.*\.txt')
+    "conll2007",
+    DependencyCorpusReader,
+    ".*\.(test|train).*",
+    encoding=[("eus", "ISO-8859-2"), ("esp", "utf8")],
+)
+crubadan = LazyCorpusLoader("crubadan", CrubadanCorpusReader, ".*\.txt")
  dependency_treebank = LazyCorpusLoader(
-    'dependency_treebank', DependencyCorpusReader, '.*\.dp',
-    encoding='ascii')
+    "dependency_treebank", DependencyCorpusReader, ".*\.dp", encoding="ascii"
+)
  floresta = LazyCorpusLoader(
-    'floresta', BracketParseCorpusReader, r'(?!\.).*\.ptb', '#',
-    tagset='unknown', encoding='ISO-8859-15')
+    "floresta",
+    BracketParseCorpusReader,
+    r"(?!\.).*\.ptb",
+    "#",
+    tagset="unknown",
+    encoding="ISO-8859-15",
+)
  framenet15 = LazyCorpusLoader(
-    'framenet_v15', FramenetCorpusReader, ['frRelation.xml','frameIndex.xml','fulltextIndex.xml','luIndex.xml','semTypes.xml'])
+    "framenet_v15",
+    FramenetCorpusReader,
+    [
+        "frRelation.xml",
+        "frameIndex.xml",
+        "fulltextIndex.xml",
+        "luIndex.xml",
+        "semTypes.xml",
+    ],
+)
  framenet = LazyCorpusLoader(
-    'framenet_v17', FramenetCorpusReader, ['frRelation.xml','frameIndex.xml','fulltextIndex.xml','luIndex.xml','semTypes.xml'])
+    "framenet_v17",
+    FramenetCorpusReader,
+    [
+        "frRelation.xml",
+        "frameIndex.xml",
+        "fulltextIndex.xml",
+        "luIndex.xml",
+        "semTypes.xml",
+    ],
+)
  gazetteers = LazyCorpusLoader(
-    'gazetteers', WordListCorpusReader, r'(?!LICENSE|\.).*\.txt',
-    encoding='ISO-8859-2')
+    "gazetteers", WordListCorpusReader, r"(?!LICENSE|\.).*\.txt", encoding="ISO-8859-2"
+)
  genesis = LazyCorpusLoader(
-    'genesis', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding=[
-        ('finnish|french|german', 'latin_1'),
-        ('swedish', 'cp865'),
-        ('.*', 'utf_8')])
+    "genesis",
+    PlaintextCorpusReader,
+    r"(?!\.).*\.txt",
+    encoding=[
+        ("finnish|french|german", "latin_1"),
+        ("swedish", "cp865"),
+        (".*", "utf_8"),
+    ],
+)
  gutenberg = LazyCorpusLoader(
-    'gutenberg', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1')
-ieer = LazyCorpusLoader(
-    'ieer', IEERCorpusReader, r'(?!README|\.).*')
+    "gutenberg", PlaintextCorpusReader, r"(?!\.).*\.txt", encoding="latin1"
+)
+ieer = LazyCorpusLoader("ieer", IEERCorpusReader, r"(?!README|\.).*")
  inaugural = LazyCorpusLoader(
-    'inaugural', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1')
+    "inaugural", PlaintextCorpusReader, r"(?!\.).*\.txt", encoding="latin1"
+)
  # [XX] This should probably just use TaggedCorpusReader:
  indian = LazyCorpusLoader(
-    'indian', IndianCorpusReader, r'(?!\.).*\.pos',
-    tagset='unknown', encoding='utf8')
+    "indian", IndianCorpusReader, r"(?!\.).*\.pos", tagset="unknown", encoding="utf8"
+)
  
-jeita = LazyCorpusLoader(
-    'jeita', ChasenCorpusReader, r'.*\.chasen', encoding='utf-8')
-knbc = LazyCorpusLoader(
-    'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp')
-lin_thesaurus = LazyCorpusLoader(
-    'lin_thesaurus', LinThesaurusCorpusReader, r'.*\.lsp')
+jeita = LazyCorpusLoader("jeita", ChasenCorpusReader, r".*\.chasen", encoding="utf-8")
+knbc = LazyCorpusLoader("knbc/corpus1", KNBCorpusReader, r".*/KN.*", encoding="euc-jp")
+lin_thesaurus = LazyCorpusLoader("lin_thesaurus", LinThesaurusCorpusReader, r".*\.lsp")
  mac_morpho = LazyCorpusLoader(
-    'mac_morpho', MacMorphoCorpusReader, r'(?!\.).*\.txt',
-    tagset='unknown', encoding='latin-1')
+    "mac_morpho",
+    MacMorphoCorpusReader,
+    r"(?!\.).*\.txt",
+    tagset="unknown",
+    encoding="latin-1",
+)
  machado = LazyCorpusLoader(
-    'machado', PortugueseCategorizedPlaintextCorpusReader,
-    r'(?!\.).*\.txt', cat_pattern=r'([a-z]*)/.*', encoding='latin-1')
+    "machado",
+    PortugueseCategorizedPlaintextCorpusReader,
+    r"(?!\.).*\.txt",
+    cat_pattern=r"([a-z]*)/.*",
+    encoding="latin-1",
+)
  masc_tagged = LazyCorpusLoader(
-    'masc_tagged', CategorizedTaggedCorpusReader, r'(spoken|written)/.*\.txt',
-    cat_file='categories.txt', tagset='wsj', encoding="utf-8", sep="_")
+    "masc_tagged",
+    CategorizedTaggedCorpusReader,
+    r"(spoken|written)/.*\.txt",
+    cat_file="categories.txt",
+    tagset="wsj",
+    encoding="utf-8",
+    sep="_",
+)
  movie_reviews = LazyCorpusLoader(
-    'movie_reviews', CategorizedPlaintextCorpusReader,
-    r'(?!\.).*\.txt', cat_pattern=r'(neg|pos)/.*',
-    encoding='ascii')
+    "movie_reviews",
+    CategorizedPlaintextCorpusReader,
+    r"(?!\.).*\.txt",
+    cat_pattern=r"(neg|pos)/.*",
+    encoding="ascii",
+)
  multext_east = LazyCorpusLoader(
-    'mte_teip5', MTECorpusReader, r'(oana).*\.xml', encoding="utf-8")
+    "mte_teip5", MTECorpusReader, r"(oana).*\.xml", encoding="utf-8"
+)
  names = LazyCorpusLoader(
-    'names', WordListCorpusReader, r'(?!\.).*\.txt', encoding='ascii')
+    "names", WordListCorpusReader, r"(?!\.).*\.txt", encoding="ascii"
+)
  nps_chat = LazyCorpusLoader(
-    'nps_chat', NPSChatCorpusReader, r'(?!README|\.).*\.xml', tagset='wsj')
+    "nps_chat", NPSChatCorpusReader, r"(?!README|\.).*\.xml", tagset="wsj"
+)
  opinion_lexicon = LazyCorpusLoader(
-    'opinion_lexicon', OpinionLexiconCorpusReader, r'(\w+)\-words\.txt',
-    encoding='ISO-8859-2')
+    "opinion_lexicon",
+    OpinionLexiconCorpusReader,
+    r"(\w+)\-words\.txt",
+    encoding="ISO-8859-2",
+)
  ppattach = LazyCorpusLoader(
-    'ppattach', PPAttachmentCorpusReader, ['training', 'test', 'devset'])
+    "ppattach", PPAttachmentCorpusReader, ["training", "test", "devset"]
+)
  product_reviews_1 = LazyCorpusLoader(
-    'product_reviews_1', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8')
+    "product_reviews_1", ReviewsCorpusReader, r"^(?!Readme).*\.txt", encoding="utf8"
+)
  product_reviews_2 = LazyCorpusLoader(
-    'product_reviews_2', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8')
+    "product_reviews_2", ReviewsCorpusReader, r"^(?!Readme).*\.txt", encoding="utf8"
+)
  pros_cons = LazyCorpusLoader(
-    'pros_cons', ProsConsCorpusReader, r'Integrated(Cons|Pros)\.txt',
-    cat_pattern=r'Integrated(Cons|Pros)\.txt', encoding='ISO-8859-2')
-ptb = LazyCorpusLoader( # Penn Treebank v3: WSJ and Brown portions
-    'ptb', CategorizedBracketParseCorpusReader, r'(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRG',
-    cat_file='allcats.txt', tagset='wsj')
+    "pros_cons",
+    ProsConsCorpusReader,
+    r"Integrated(Cons|Pros)\.txt",
+    cat_pattern=r"Integrated(Cons|Pros)\.txt",
+    encoding="ISO-8859-2",
+)
+ptb = LazyCorpusLoader(  # Penn Treebank v3: WSJ and Brown portions
+    "ptb",
+    CategorizedBracketParseCorpusReader,
+    r"(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRG",
+    cat_file="allcats.txt",
+    tagset="wsj",
+)
  qc = LazyCorpusLoader(
-    'qc', StringCategoryCorpusReader, ['train.txt', 'test.txt'], encoding='ISO-8859-2')
+    "qc", StringCategoryCorpusReader, ["train.txt", "test.txt"], encoding="ISO-8859-2"
+)
  reuters = LazyCorpusLoader(
-    'reuters', CategorizedPlaintextCorpusReader, '(training|test).*',
-    cat_file='cats.txt', encoding='ISO-8859-2')
-rte = LazyCorpusLoader(
-    'rte', RTECorpusReader, r'(?!\.).*\.xml')
-senseval = LazyCorpusLoader(
-    'senseval', SensevalCorpusReader, r'(?!\.).*\.pos')
+    "reuters",
+    CategorizedPlaintextCorpusReader,
+    "(training|test).*",
+    cat_file="cats.txt",
+    encoding="ISO-8859-2",
+)
+rte = LazyCorpusLoader("rte", RTECorpusReader, r"(?!\.).*\.xml")
+senseval = LazyCorpusLoader("senseval", SensevalCorpusReader, r"(?!\.).*\.pos")
  sentence_polarity = LazyCorpusLoader(
-    'sentence_polarity', CategorizedSentencesCorpusReader, r'rt-polarity\.(neg|pos)',
-    cat_pattern=r'rt-polarity\.(neg|pos)', encoding='utf-8')
+    "sentence_polarity",
+    CategorizedSentencesCorpusReader,
+    r"rt-polarity\.(neg|pos)",
+    cat_pattern=r"rt-polarity\.(neg|pos)",
+    encoding="utf-8",
+)
  sentiwordnet = LazyCorpusLoader(
-    'sentiwordnet', SentiWordNetCorpusReader, 'SentiWordNet_3.0.0.txt', encoding='utf-8')
-shakespeare = LazyCorpusLoader(
-    'shakespeare', XMLCorpusReader, r'(?!\.).*\.xml')
+    "sentiwordnet", SentiWordNetCorpusReader, "SentiWordNet_3.0.0.txt", encoding="utf-8"
+)
+shakespeare = LazyCorpusLoader("shakespeare", XMLCorpusReader, r"(?!\.).*\.xml")
  sinica_treebank = LazyCorpusLoader(
-    'sinica_treebank', SinicaTreebankCorpusReader, ['parsed'],
-    tagset='unknown', encoding='utf-8')
+    "sinica_treebank",
+    SinicaTreebankCorpusReader,
+    ["parsed"],
+    tagset="unknown",
+    encoding="utf-8",
+)
  state_union = LazyCorpusLoader(
-    'state_union', PlaintextCorpusReader, r'(?!\.).*\.txt',
-    encoding='ISO-8859-2')
+    "state_union", PlaintextCorpusReader, r"(?!\.).*\.txt", encoding="ISO-8859-2"
+)
  stopwords = LazyCorpusLoader(
-    'stopwords', WordListCorpusReader, r'(?!README|\.).*', encoding='utf8')
+    "stopwords", WordListCorpusReader, r"(?!README|\.).*", encoding="utf8"
+)
  subjectivity = LazyCorpusLoader(
-    'subjectivity', CategorizedSentencesCorpusReader, r'(quote.tok.gt9|plot.tok.gt9)\.5000',
-    cat_map={'quote.tok.gt9.5000':['subj'], 'plot.tok.gt9.5000':['obj']}, encoding='latin-1')
+    "subjectivity",
+    CategorizedSentencesCorpusReader,
+    r"(quote.tok.gt9|plot.tok.gt9)\.5000",
+    cat_map={"quote.tok.gt9.5000": ["subj"], "plot.tok.gt9.5000": ["obj"]},
+    encoding="latin-1",
+)
  swadesh = LazyCorpusLoader(
-    'swadesh', SwadeshCorpusReader, r'(?!README|\.).*', encoding='utf8')
+    "swadesh", SwadeshCorpusReader, r"(?!README|\.).*", encoding="utf8"
+)
  swadesh110 = LazyCorpusLoader(
-    'panlex_swadesh', SwadeshCorpusReader, r'swadesh110/.*\.txt', encoding='utf8')
+    'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh110/.*\.txt', encoding='utf8'
+)
  swadesh207 = LazyCorpusLoader(
-    'panlex_swadesh', SwadeshCorpusReader, r'swadesh207/.*\.txt', encoding='utf8')
-switchboard = LazyCorpusLoader(
-    'switchboard', SwitchboardCorpusReader, tagset='wsj')
-timit = LazyCorpusLoader(
-    'timit', TimitCorpusReader)
+    'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh207/.*\.txt', encoding='utf8'
+)
+switchboard = LazyCorpusLoader("switchboard", SwitchboardCorpusReader, tagset="wsj")
+timit = LazyCorpusLoader("timit", TimitCorpusReader)
  timit_tagged = LazyCorpusLoader(
-    'timit', TimitTaggedCorpusReader, '.+\.tags',
-    tagset='wsj', encoding='ascii')
+    "timit", TimitTaggedCorpusReader, ".+\.tags", tagset="wsj", encoding="ascii"
+)
  toolbox = LazyCorpusLoader(
-    'toolbox', ToolboxCorpusReader, r'(?!.*(README|\.)).*\.(dic|txt)')
+    "toolbox", ToolboxCorpusReader, r"(?!.*(README|\.)).*\.(dic|txt)"
+)
  treebank = LazyCorpusLoader(
-    'treebank/combined', BracketParseCorpusReader, r'wsj_.*\.mrg',
-    tagset='wsj', encoding='ascii')
+    "treebank/combined",
+    BracketParseCorpusReader,
+    r"wsj_.*\.mrg",
+    tagset="wsj",
+    encoding="ascii",
+)
  treebank_chunk = LazyCorpusLoader(
-    'treebank/tagged', ChunkedCorpusReader, r'wsj_.*\.pos',
-    sent_tokenizer=RegexpTokenizer(r'(?<=/\.)\s*(?![^\[]*\])', gaps=True),
-    para_block_reader=tagged_treebank_para_block_reader, tagset='wsj', encoding='ascii')
+    "treebank/tagged",
+    ChunkedCorpusReader,
+    r"wsj_.*\.pos",
+    sent_tokenizer=RegexpTokenizer(r"(?<=/\.)\s*(?![^\[]*\])", gaps=True),
+    para_block_reader=tagged_treebank_para_block_reader,
+    tagset="wsj",
+    encoding="ascii",
+)
  treebank_raw = LazyCorpusLoader(
-    'treebank/raw', PlaintextCorpusReader, r'wsj_.*', encoding='ISO-8859-2')
-twitter_samples = LazyCorpusLoader(
-    'twitter_samples', TwitterCorpusReader, '.*\.json')
-udhr = LazyCorpusLoader(
-    'udhr', UdhrCorpusReader)
-udhr2 = LazyCorpusLoader(
-    'udhr2', PlaintextCorpusReader, r'.*\.txt', encoding='utf8')
+    "treebank/raw", PlaintextCorpusReader, r"wsj_.*", encoding="ISO-8859-2"
+)
+twitter_samples = LazyCorpusLoader("twitter_samples", TwitterCorpusReader, ".*\.json")
+udhr = LazyCorpusLoader("udhr", UdhrCorpusReader)
+udhr2 = LazyCorpusLoader("udhr2", PlaintextCorpusReader, r".*\.txt", encoding="utf8")
  universal_treebanks = LazyCorpusLoader(
-    'universal_treebanks_v20', ConllCorpusReader, r'.*\.conll',
-    columntypes = ('ignore', 'words', 'ignore', 'ignore', 'pos',
-                   'ignore', 'ignore', 'ignore', 'ignore', 'ignore'))
-verbnet = LazyCorpusLoader(
-    'verbnet', VerbnetCorpusReader, r'(?!\.).*\.xml')
+    "universal_treebanks_v20",
+    ConllCorpusReader,
+    r".*\.conll",
+    columntypes=(
+        "ignore",
+        "words",
+        "ignore",
+        "ignore",
+        "pos",
+        "ignore",
+        "ignore",
+        "ignore",
+        "ignore",
+        "ignore",
+    ),
+)
+verbnet = LazyCorpusLoader("verbnet", VerbnetCorpusReader, r"(?!\.).*\.xml")
  webtext = LazyCorpusLoader(
-    'webtext', PlaintextCorpusReader, r'(?!README|\.).*\.txt', encoding='ISO-8859-2')
+    "webtext", PlaintextCorpusReader, r"(?!README|\.).*\.txt", encoding="ISO-8859-2"
+)
  wordnet = LazyCorpusLoader(
-    'wordnet', WordNetCorpusReader,
-    LazyCorpusLoader('omw', CorpusReader, r'.*/wn-data-.*\.tab', encoding='utf8'))
-wordnet_ic = LazyCorpusLoader(
-    'wordnet_ic', WordNetICCorpusReader, '.*\.dat')
+    "wordnet",
+    WordNetCorpusReader,
+    LazyCorpusLoader("omw", CorpusReader, r".*/wn-data-.*\.tab", encoding="utf8"),
+)
+wordnet_ic = LazyCorpusLoader("wordnet_ic", WordNetICCorpusReader, ".*\.dat")
  words = LazyCorpusLoader(
-    'words', WordListCorpusReader, r'(?!README|\.).*', encoding='ascii')
+    "words", WordListCorpusReader, r"(?!README|\.).*", encoding="ascii"
+)
  
  # defined after treebank
  propbank = LazyCorpusLoader(
-    'propbank', PropbankCorpusReader,
-    'prop.txt', 'frames/.*\.xml', 'verbs.txt',
-    lambda filename: re.sub(r'^wsj/\d\d/', '', filename),
-    treebank) # Must be defined *after* treebank corpus.
+    "propbank",
+    PropbankCorpusReader,
+    "prop.txt",
+    "frames/.*\.xml",
+    "verbs.txt",
+    lambda filename: re.sub(r"^wsj/\d\d/", "", filename),
+    treebank,
+)  # Must be defined *after* treebank corpus.
  nombank = LazyCorpusLoader(
-    'nombank.1.0', NombankCorpusReader,
-    'nombank.1.0', 'frames/.*\.xml', 'nombank.1.0.words',
-    lambda filename: re.sub(r'^wsj/\d\d/', '', filename),
-    treebank) # Must be defined *after* treebank corpus.
+    "nombank.1.0",
+    NombankCorpusReader,
+    "nombank.1.0",
+    "frames/.*\.xml",
+    "nombank.1.0.words",
+    lambda filename: re.sub(r"^wsj/\d\d/", "", filename),
+    treebank,
+)  # Must be defined *after* treebank corpus.
  propbank_ptb = LazyCorpusLoader(
-    'propbank', PropbankCorpusReader,
-    'prop.txt', 'frames/.*\.xml', 'verbs.txt',
+    "propbank",
+    PropbankCorpusReader,
+    "prop.txt",
+    "frames/.*\.xml",
+    "verbs.txt",
      lambda filename: filename.upper(),
-    ptb) # Must be defined *after* ptb corpus.
+    ptb,
+)  # Must be defined *after* ptb corpus.
  nombank_ptb = LazyCorpusLoader(
-    'nombank.1.0', NombankCorpusReader,
-    'nombank.1.0', 'frames/.*\.xml', 'nombank.1.0.words',
+    "nombank.1.0",
+    NombankCorpusReader,
+    "nombank.1.0",
+    "frames/.*\.xml",
+    "nombank.1.0.words",
      lambda filename: filename.upper(),
-    ptb) # Must be defined *after* ptb corpus.
+    ptb,
+)  # Must be defined *after* ptb corpus.
  semcor = LazyCorpusLoader(
-    'semcor', SemcorCorpusReader, r'brown./tagfiles/br-.*\.xml',
-    wordnet) # Must be defined *after* wordnet corpus.
+    "semcor", SemcorCorpusReader, r"brown./tagfiles/br-.*\.xml", wordnet
+)  # Must be defined *after* wordnet corpus.
  
  nonbreaking_prefixes = LazyCorpusLoader(
-    'nonbreaking_prefixes', NonbreakingPrefixesCorpusReader, r'(?!README|\.).*', encoding='utf8')
+    "nonbreaking_prefixes",
+    NonbreakingPrefixesCorpusReader,
+    r"(?!README|\.).*",
+    encoding="utf8",
+)
  perluniprops = LazyCorpusLoader(
-    'perluniprops', UnicharsCorpusReader, r'(?!README|\.).*', nltk_data_subdir='misc', encoding='utf8')
+    "perluniprops",
+    UnicharsCorpusReader,
+    r"(?!README|\.).*",
+    nltk_data_subdir="misc",
+    encoding="utf8",
+)
  
  # mwa_ppdb = LazyCorpusLoader(
  #     'mwa_ppdb', MWAPPDBCorpusReader, r'(?!README|\.).*', nltk_data_subdir='misc', encoding='utf8')
@@ -288,22 +437,22 @@ perluniprops = LazyCorpusLoader(
  # nkjp = LazyCorpusLoader(
  #     'nkjp', NKJPCorpusReader, r'', encoding='utf8')
  #
-#panlex_lite = LazyCorpusLoader(
+# panlex_lite = LazyCorpusLoader(
  #    'panlex_lite', PanLexLiteCorpusReader)
  #
  # ycoe = LazyCorpusLoader(
  #     'ycoe', YCOECorpusReader)
  #
  # corpus not available with NLTK; these lines caused help(nltk.corpus) to break
-#hebrew_treebank = LazyCorpusLoader(
+# hebrew_treebank = LazyCorpusLoader(
  #    'hebrew_treebank', BracketParseCorpusReader, r'.*\.txt')
  
-
+# FIXME:  override any imported demo from various corpora, see https://github.com/nltk/nltk/issues/2116
  def demo():
      # This is out-of-date:
      abc.demo()
      brown.demo()
-#    chat80.demo()
+    #    chat80.demo()
      cmudict.demo()
      conll2000.demo()
      conll2002.demo()
@@ -325,17 +474,20 @@ def demo():
      udhr.demo()
      webtext.demo()
      words.demo()
+
+
  #    ycoe.demo()
  
-if __name__ == '__main__':
-    #demo()
+if __name__ == "__main__":
+    # demo()
      pass
  
  # ** this is for nose **
  # unload all corpus after tests
  def teardown_module(module=None):
      import nltk.corpus
+
      for name in dir(nltk.corpus):
          obj = getattr(nltk.corpus, name, None)
-        if isinstance(obj, CorpusReader) and hasattr(obj, '_unload'):
+        if isinstance(obj, CorpusReader) and hasattr(obj, "_unload"):
              obj._unload()
diff --git a/nlp_resource_data/nltk/corpus/__init__.pyc b/nlp_resource_data/nltk/corpus/__init__.pyc

deleted file mode 100755 (executable)

index 3d1a4cc..0000000

Binary files a/nlp_resource_data/nltk/corpus/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/corpus/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..414d9b7

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc b/nlp_resource_data/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc

new file mode 100644 (file)

index 0000000..01cbbb4

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/corpus/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3a0893f

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/europarl_raw.py b/nlp_resource_data/nltk/corpus/europarl_raw.py

old mode 100755 (executable)

new mode 100644 (file)

index a8e62a5..a4caa7b
--- a/nlp_resource_data/nltk/corpus/europarl_raw.py
+++ b/nlp_resource_data/nltk/corpus/europarl_raw.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Europarl Corpus Readers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author:  Nitin Madnani <nmadnani@umiacs.umd.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -11,34 +11,45 @@ from nltk.corpus.reader import *
  
  # Create a new corpus reader instance for each European language
  danish = LazyCorpusLoader(
-    'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8')
+    "europarl_raw/danish", EuroparlCorpusReader, r"ep-.*\.da", encoding="utf-8"
+)
  
  dutch = LazyCorpusLoader(
-    'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8')
+    "europarl_raw/dutch", EuroparlCorpusReader, r"ep-.*\.nl", encoding="utf-8"
+)
  
  english = LazyCorpusLoader(
-    'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8')
+    "europarl_raw/english", EuroparlCorpusReader, r"ep-.*\.en", encoding="utf-8"
+)
  
  finnish = LazyCorpusLoader(
-    'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8')
+    "europarl_raw/finnish", EuroparlCorpusReader, r"ep-.*\.fi", encoding="utf-8"
+)
  
  french = LazyCorpusLoader(
-    'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8')
+    "europarl_raw/french", EuroparlCorpusReader, r"ep-.*\.fr", encoding="utf-8"
+)
  
  german = LazyCorpusLoader(
-    'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8')
+    "europarl_raw/german", EuroparlCorpusReader, r"ep-.*\.de", encoding="utf-8"
+)
  
  greek = LazyCorpusLoader(
-    'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8')
+    "europarl_raw/greek", EuroparlCorpusReader, r"ep-.*\.el", encoding="utf-8"
+)
  
  italian = LazyCorpusLoader(
-    'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8')
+    "europarl_raw/italian", EuroparlCorpusReader, r"ep-.*\.it", encoding="utf-8"
+)
  
  portuguese = LazyCorpusLoader(
-    'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8')
+    "europarl_raw/portuguese", EuroparlCorpusReader, r"ep-.*\.pt", encoding="utf-8"
+)
  
  spanish = LazyCorpusLoader(
-    'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8')
+    "europarl_raw/spanish", EuroparlCorpusReader, r"ep-.*\.es", encoding="utf-8"
+)
  
  swedish = LazyCorpusLoader(
-    'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8')
+    "europarl_raw/swedish", EuroparlCorpusReader, r"ep-.*\.sv", encoding="utf-8"
+)
diff --git a/nlp_resource_data/nltk/corpus/europarl_raw.pyc b/nlp_resource_data/nltk/corpus/europarl_raw.pyc

deleted file mode 100755 (executable)

index 2ce76dc..0000000

Binary files a/nlp_resource_data/nltk/corpus/europarl_raw.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__init__.py b/nlp_resource_data/nltk/corpus/reader/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 77e0eb0..a1db6d4
--- a/nlp_resource_data/nltk/corpus/reader/__init__.py
+++ b/nlp_resource_data/nltk/corpus/reader/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Corpus Readers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -103,45 +103,81 @@ from nltk.corpus.reader.pros_cons import *
  from nltk.corpus.reader.categorized_sents import *
  from nltk.corpus.reader.comparative_sents import *
  from nltk.corpus.reader.panlex_lite import *
+from nltk.corpus.reader.panlex_swadesh import *
  
  # Make sure that nltk.corpus.reader.bracket_parse gives the module, not
  # the function bracket_parse() defined in nltk.tree:
  from nltk.corpus.reader import bracket_parse
  
  __all__ = [
-    'CorpusReader', 'CategorizedCorpusReader',
-    'PlaintextCorpusReader', 'find_corpus_fileids',
-    'TaggedCorpusReader', 'CMUDictCorpusReader',
-    'ConllChunkCorpusReader', 'WordListCorpusReader',
-    'PPAttachmentCorpusReader', 'SensevalCorpusReader',
-    'IEERCorpusReader', 'ChunkedCorpusReader',
-    'SinicaTreebankCorpusReader', 'BracketParseCorpusReader',
-    'IndianCorpusReader', 'ToolboxCorpusReader',
-    'TimitCorpusReader', 'YCOECorpusReader',
-    'MacMorphoCorpusReader', 'SyntaxCorpusReader',
-    'AlpinoCorpusReader', 'RTECorpusReader',
-    'StringCategoryCorpusReader','EuroparlCorpusReader',
+    'CorpusReader',
+    'CategorizedCorpusReader',
+    'PlaintextCorpusReader',
+    'find_corpus_fileids',
+    'TaggedCorpusReader',
+    'CMUDictCorpusReader',
+    'ConllChunkCorpusReader',
+    'WordListCorpusReader',
+    'PPAttachmentCorpusReader',
+    'SensevalCorpusReader',
+    'IEERCorpusReader',
+    'ChunkedCorpusReader',
+    'SinicaTreebankCorpusReader',
+    'BracketParseCorpusReader',
+    'IndianCorpusReader',
+    'ToolboxCorpusReader',
+    'TimitCorpusReader',
+    'YCOECorpusReader',
+    'MacMorphoCorpusReader',
+    'SyntaxCorpusReader',
+    'AlpinoCorpusReader',
+    'RTECorpusReader',
+    'StringCategoryCorpusReader',
+    'EuroparlCorpusReader',
      'CategorizedBracketParseCorpusReader',
      'CategorizedTaggedCorpusReader',
      'CategorizedPlaintextCorpusReader',
      'PortugueseCategorizedPlaintextCorpusReader',
      'tagged_treebank_para_block_reader',
-    'PropbankCorpusReader', 'VerbnetCorpusReader',
-    'BNCCorpusReader', 'ConllCorpusReader',
-    'XMLCorpusReader', 'NPSChatCorpusReader',
-    'SwadeshCorpusReader', 'WordNetCorpusReader',
-    'WordNetICCorpusReader', 'SwitchboardCorpusReader',
-    'DependencyCorpusReader', 'NombankCorpusReader',
-    'IPIPANCorpusReader', 'Pl196xCorpusReader',
-    'TEICorpusView', 'KNBCorpusReader', 'ChasenCorpusReader',
-    'CHILDESCorpusReader', 'AlignedCorpusReader',
-    'TimitTaggedCorpusReader', 'LinThesaurusCorpusReader',
-    'SemcorCorpusReader', 'FramenetCorpusReader', 'UdhrCorpusReader',
-    'BNCCorpusReader', 'SentiWordNetCorpusReader', 'SentiSynset',
-    'TwitterCorpusReader', 'NKJPCorpusReader', 'CrubadanCorpusReader',
-    'MTECorpusReader', 'ReviewsCorpusReader', 'OpinionLexiconCorpusReader',
-    'ProsConsCorpusReader', 'CategorizedSentencesCorpusReader',
-    'ComparativeSentencesCorpusReader', 'PanLexLiteCorpusReader',
-    'NonbreakingPrefixesCorpusReader', 'UnicharsCorpusReader',
+    'PropbankCorpusReader',
+    'VerbnetCorpusReader',
+    'BNCCorpusReader',
+    'ConllCorpusReader',
+    'XMLCorpusReader',
+    'NPSChatCorpusReader',
+    'SwadeshCorpusReader',
+    'WordNetCorpusReader',
+    'WordNetICCorpusReader',
+    'SwitchboardCorpusReader',
+    'DependencyCorpusReader',
+    'NombankCorpusReader',
+    'IPIPANCorpusReader',
+    'Pl196xCorpusReader',
+    'TEICorpusView',
+    'KNBCorpusReader',
+    'ChasenCorpusReader',
+    'CHILDESCorpusReader',
+    'AlignedCorpusReader',
+    'TimitTaggedCorpusReader',
+    'LinThesaurusCorpusReader',
+    'SemcorCorpusReader',
+    'FramenetCorpusReader',
+    'UdhrCorpusReader',
+    'BNCCorpusReader',
+    'SentiWordNetCorpusReader',
+    'SentiSynset',
+    'TwitterCorpusReader',
+    'NKJPCorpusReader',
+    'CrubadanCorpusReader',
+    'MTECorpusReader',
+    'ReviewsCorpusReader',
+    'OpinionLexiconCorpusReader',
+    'ProsConsCorpusReader',
+    'CategorizedSentencesCorpusReader',
+    'ComparativeSentencesCorpusReader',
+    'PanLexLiteCorpusReader',
+    'NonbreakingPrefixesCorpusReader',
+    'UnicharsCorpusReader',
      'MWAPPDBCorpusReader',
+    'PanlexSwadeshCorpusReader',
  ]
diff --git a/nlp_resource_data/nltk/corpus/reader/__init__.pyc b/nlp_resource_data/nltk/corpus/reader/__init__.pyc

deleted file mode 100755 (executable)

index c947e12..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..dfc3584

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6012d40

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b576aec

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc

new file mode 100644 (file)

index 0000000..56e8750

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc

new file mode 100644 (file)

index 0000000..45e4401

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc

new file mode 100644 (file)

index 0000000..93e0803

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3e85205

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4f05bc6

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e2dc4e3

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc

new file mode 100644 (file)

index 0000000..02fec50

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8a041c1

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc

new file mode 100644 (file)

index 0000000..481bebb

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e9f3a3e

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9584742

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/framenet.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/framenet.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ea6b5ae

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/framenet.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/ieer.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/ieer.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ef6c803

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/ieer.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc

new file mode 100644 (file)

index 0000000..16ce5c3

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3e6ca29

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0d5c4bb

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f720098

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc

new file mode 100644 (file)

index 0000000..eb277f4

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ee49da0

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a6ec885

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc

new file mode 100644 (file)

index 0000000..943fbd0

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/opinion_lexicon.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/opinion_lexicon.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1454152

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/opinion_lexicon.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_lite.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_lite.cpython-37.pyc

new file mode 100644 (file)

index 0000000..acb862f

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_lite.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9665b8a

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/pl196x.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/pl196x.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6ebaa5a

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/pl196x.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/plaintext.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/plaintext.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d68d226

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/plaintext.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/ppattach.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/ppattach.cpython-37.pyc

new file mode 100644 (file)

index 0000000..10bcd17

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/ppattach.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc

new file mode 100644 (file)

index 0000000..dc95c18

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/pros_cons.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/pros_cons.cpython-37.pyc

new file mode 100644 (file)

index 0000000..415780d

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/pros_cons.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/reviews.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/reviews.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0823ce4

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/reviews.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e7277c5

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7cb913f

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc

new file mode 100644 (file)

index 0000000..505a1a6

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/sentiwordnet.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/sentiwordnet.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e3593be

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/sentiwordnet.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1d59b53

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/string_category.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/string_category.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f1f7b05

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/string_category.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/switchboard.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/switchboard.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7a23927

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/switchboard.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/tagged.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/tagged.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f9a64ca

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/tagged.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2132d55

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/toolbox.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/toolbox.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e194636

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/toolbox.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/twitter.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/twitter.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b2dc425

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/twitter.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/udhr.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/udhr.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0e4efeb

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/udhr.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f7acada

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4089e5d

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/wordlist.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/wordlist.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3cbb84f

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/wordlist.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ac95692

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0495bcb

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc

new file mode 100644 (file)

index 0000000..da94d7d

Binary files /dev/null and b/nlp_resource_data/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/corpus/reader/aligned.py b/nlp_resource_data/nltk/corpus/reader/aligned.py

old mode 100755 (executable)

new mode 100644 (file)

index 0b341c9..a97fecc
--- a/nlp_resource_data/nltk/corpus/reader/aligned.py
+++ b/nlp_resource_data/nltk/corpus/reader/aligned.py
@@ -1,29 +1,37 @@
  # Natural Language Toolkit: Aligned Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # Author: Steven Bird <stevenbird1@gmail.com>
  # For license information, see LICENSE.TXT
  
-from six import string_types
-
  from nltk.tokenize import WhitespaceTokenizer, RegexpTokenizer
  from nltk.translate import AlignedSent, Alignment
  
  from nltk.corpus.reader.api import CorpusReader
-from nltk.corpus.reader.util import StreamBackedCorpusView, concat,\
-    read_alignedsent_block
+from nltk.corpus.reader.util import (
+    StreamBackedCorpusView,
+    concat,
+    read_alignedsent_block,
+)
+
  
  class AlignedCorpusReader(CorpusReader):
      """
      Reader for corpora of word-aligned sentences.  Tokens are assumed
      to be separated by whitespace.  Sentences begin on separate lines.
      """
-    def __init__(self, root, fileids,
-                 sep='/', word_tokenizer=WhitespaceTokenizer(),
-                 sent_tokenizer=RegexpTokenizer('\n', gaps=True),
-                 alignedsent_block_reader=read_alignedsent_block,
-                 encoding='latin1'):
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        sep="/",
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=RegexpTokenizer("\n", gaps=True),
+        alignedsent_block_reader=read_alignedsent_block,
+        encoding="latin1",
+    ):
          """
          Construct a new Aligned Corpus reader for a set of documents
          located at the given root directory.  Example usage:
@@ -45,8 +53,10 @@ class AlignedCorpusReader(CorpusReader):
          :return: the given file(s) as a single string.
          :rtype: str
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def words(self, fileids=None):
@@ -55,11 +65,20 @@ class AlignedCorpusReader(CorpusReader):
              and punctuation symbols.
          :rtype: list(str)
          """
-        return concat([AlignedSentCorpusView(fileid, enc, False, False,
-                                             self._word_tokenizer,
-                                             self._sent_tokenizer,
-                                             self._alignedsent_block_reader)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                AlignedSentCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    False,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._alignedsent_block_reader,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
          """
@@ -68,22 +87,41 @@ class AlignedCorpusReader(CorpusReader):
              strings.
          :rtype: list(list(str))
          """
-        return concat([AlignedSentCorpusView(fileid, enc, False, True,
-                                             self._word_tokenizer,
-                                             self._sent_tokenizer,
-                                             self._alignedsent_block_reader)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                AlignedSentCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    True,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._alignedsent_block_reader,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def aligned_sents(self, fileids=None):
          """
          :return: the given file(s) as a list of AlignedSent objects.
          :rtype: list(AlignedSent)
          """
-        return concat([AlignedSentCorpusView(fileid, enc, True, True,
-                                             self._word_tokenizer,
-                                             self._sent_tokenizer,
-                                             self._alignedsent_block_reader)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                AlignedSentCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    True,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._alignedsent_block_reader,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
  
  class AlignedSentCorpusView(StreamBackedCorpusView):
      """
@@ -91,8 +129,17 @@ class AlignedSentCorpusView(StreamBackedCorpusView):
      ``AlignedSentCorpusView`` objects are typically created by
      ``AlignedCorpusReader`` (not directly by nltk users).
      """
-    def __init__(self, corpus_file, encoding, aligned, group_by_sent,
-                 word_tokenizer, sent_tokenizer, alignedsent_block_reader):
+
+    def __init__(
+        self,
+        corpus_file,
+        encoding,
+        aligned,
+        group_by_sent,
+        word_tokenizer,
+        sent_tokenizer,
+        alignedsent_block_reader,
+    ):
          self._aligned = aligned
          self._group_by_sent = group_by_sent
          self._word_tokenizer = word_tokenizer
@@ -101,11 +148,15 @@ class AlignedSentCorpusView(StreamBackedCorpusView):
          StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
  
      def read_block(self, stream):
-        block = [self._word_tokenizer.tokenize(sent_str)
-                 for alignedsent_str in self._alignedsent_block_reader(stream)
-                 for sent_str in self._sent_tokenizer.tokenize(alignedsent_str)]
+        block = [
+            self._word_tokenizer.tokenize(sent_str)
+            for alignedsent_str in self._alignedsent_block_reader(stream)
+            for sent_str in self._sent_tokenizer.tokenize(alignedsent_str)
+        ]
          if self._aligned:
-            block[2] = Alignment.fromstring(" ".join(block[2])) # kludge; we shouldn't have tokenized the alignment string
+            block[2] = Alignment.fromstring(
+                " ".join(block[2])
+            )  # kludge; we shouldn't have tokenized the alignment string
              block = [AlignedSent(*block)]
          elif self._group_by_sent:
              block = [block[0]]
diff --git a/nlp_resource_data/nltk/corpus/reader/aligned.pyc b/nlp_resource_data/nltk/corpus/reader/aligned.pyc

deleted file mode 100755 (executable)

index 44f5996..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/aligned.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/api.py b/nlp_resource_data/nltk/corpus/reader/api.py

old mode 100755 (executable)

new mode 100644 (file)

index fae5a11..98b3f5e
--- a/nlp_resource_data/nltk/corpus/reader/api.py
+++ b/nlp_resource_data/nltk/corpus/reader/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: API for Corpus Readers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -9,21 +9,17 @@
  """
  API for corpus readers.
  """
-from __future__ import unicode_literals
  
  import os
  import re
  from collections import defaultdict
  from itertools import chain
  
-from six import string_types
-
-from nltk import compat
  from nltk.data import PathPointer, FileSystemPathPointer, ZipFilePathPointer
  
  from nltk.corpus.reader.util import *
  
-@compat.python_2_unicode_compatible
+
  class CorpusReader(object):
      """
      A base class for "corpus reader" classes, each of which can be
@@ -43,7 +39,7 @@ class CorpusReader(object):
      be used to select which portion of the corpus should be returned.
      """
  
-    def __init__(self, root, fileids, encoding='utf8', tagset=None):
+    def __init__(self, root, fileids, encoding="utf8", tagset=None):
          """
          :type root: PathPointer or str
          :param root: A path pointer identifying the root directory for
@@ -75,18 +71,18 @@ class CorpusReader(object):
                tagged_...() methods.
          """
          # Convert the root to a path pointer, if necessary.
-        if isinstance(root, string_types) and not isinstance(root, PathPointer):
-            m = re.match('(.*\.zip)/?(.*)$|', root)
+        if isinstance(root, str) and not isinstance(root, PathPointer):
+            m = re.match("(.*\.zip)/?(.*)$|", root)
              zipfile, zipentry = m.groups()
              if zipfile:
                  root = ZipFilePathPointer(zipfile, zipentry)
              else:
                  root = FileSystemPathPointer(root)
          elif not isinstance(root, PathPointer):
-            raise TypeError('CorpusReader: expected a string or a PathPointer')
+            raise TypeError("CorpusReader: expected a string or a PathPointer")
  
          # If `fileids` is a regexp, then expand it.
-        if isinstance(fileids, string_types):
+        if isinstance(fileids, str):
              fileids = find_corpus_fileids(root, fileids)
  
          self._fileids = fileids
@@ -116,10 +112,10 @@ class CorpusReader(object):
  
      def __repr__(self):
          if isinstance(self._root, ZipFilePathPointer):
-            path = '%s/%s' % (self._root.zipfile.filename, self._root.entry)
+            path = "%s/%s" % (self._root.zipfile.filename, self._root.entry)
          else:
-            path = '%s' % self._root.path
-        return '<%s in %r>' % (self.__class__.__name__, path)
+            path = "%s" % self._root.path
+        return "<%s in %r>" % (self.__class__.__name__, path)
  
      def ensure_loaded(self):
          """
@@ -128,7 +124,7 @@ class CorpusReader(object):
          make sure a corpus is loaded -- e.g., in case a user wants to
          do help(some_corpus).
          """
-        pass # no need to actually do anything.
+        pass  # no need to actually do anything.
  
      def readme(self):
          """
@@ -166,8 +162,7 @@ class CorpusReader(object):
          """
          return self._root.join(fileid)
  
-    def abspaths(self, fileids=None, include_encoding=False,
-                 include_fileid=False):
+    def abspaths(self, fileids=None, include_encoding=False, include_fileid=False):
          """
          Return a list of the absolute paths for all fileids in this corpus;
          or for the given list of fileids, if specified.
@@ -187,7 +182,7 @@ class CorpusReader(object):
          """
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          paths = [self._root.join(f) for f in fileids]
@@ -224,17 +219,23 @@ class CorpusReader(object):
          else:
              return self._encoding
  
-    def _get_root(self): return self._root
-    root = property(_get_root, doc="""
+    def _get_root(self):
+        return self._root
+
+    root = property(
+        _get_root,
+        doc="""
          The directory where this corpus is stored.
  
-        :type: PathPointer""")
+        :type: PathPointer""",
+    )
  
  
  ######################################################################
-#{ Corpora containing categorized items
+# { Corpora containing categorized items
  ######################################################################
  
+
  class CategorizedCorpusReader(object):
      """
      A mixin class used to aid in the implementation of corpus readers
@@ -274,35 +275,35 @@ class CategorizedCorpusReader(object):
          more than one argument is specified, an exception will be
          raised.
          """
-        self._f2c = None #: file-to-category mapping
-        self._c2f = None #: category-to-file mapping
-
-        self._pattern = None #: regexp specifying the mapping
-        self._map = None #: dict specifying the mapping
-        self._file = None #: fileid of file containing the mapping
-        self._delimiter = None #: delimiter for ``self._file``
-
-        if 'cat_pattern' in kwargs:
-            self._pattern = kwargs['cat_pattern']
-            del kwargs['cat_pattern']
-        elif 'cat_map' in kwargs:
-            self._map = kwargs['cat_map']
-            del kwargs['cat_map']
-        elif 'cat_file' in kwargs:
-            self._file = kwargs['cat_file']
-            del kwargs['cat_file']
-            if 'cat_delimiter' in kwargs:
-                self._delimiter = kwargs['cat_delimiter']
-                del kwargs['cat_delimiter']
+        self._f2c = None  #: file-to-category mapping
+        self._c2f = None  #: category-to-file mapping
+
+        self._pattern = None  #: regexp specifying the mapping
+        self._map = None  #: dict specifying the mapping
+        self._file = None  #: fileid of file containing the mapping
+        self._delimiter = None  #: delimiter for ``self._file``
+
+        if "cat_pattern" in kwargs:
+            self._pattern = kwargs["cat_pattern"]
+            del kwargs["cat_pattern"]
+        elif "cat_map" in kwargs:
+            self._map = kwargs["cat_map"]
+            del kwargs["cat_map"]
+        elif "cat_file" in kwargs:
+            self._file = kwargs["cat_file"]
+            del kwargs["cat_file"]
+            if "cat_delimiter" in kwargs:
+                self._delimiter = kwargs["cat_delimiter"]
+                del kwargs["cat_delimiter"]
          else:
-            raise ValueError('Expected keyword argument cat_pattern or '
-                             'cat_map or cat_file.')
+            raise ValueError(
+                "Expected keyword argument cat_pattern or " "cat_map or cat_file."
+            )
  
-
-        if ('cat_pattern' in kwargs or 'cat_map' in kwargs or
-            'cat_file' in kwargs):
-            raise ValueError('Specify exactly one of: cat_pattern, '
-                             'cat_map, cat_file.')
+        if "cat_pattern" in kwargs or "cat_map" in kwargs or "cat_file" in kwargs:
+            raise ValueError(
+                "Specify exactly one of: cat_pattern, " "cat_map, cat_file."
+            )
  
      def _init(self):
          self._f2c = defaultdict(set)
@@ -323,8 +324,10 @@ class CategorizedCorpusReader(object):
                  line = line.strip()
                  file_id, categories = line.split(self._delimiter, 1)
                  if file_id not in self.fileids():
-                    raise ValueError('In category mapping file %s: %s '
-                                     'not found' % (self._file, file_id))
+                    raise ValueError(
+                        "In category mapping file %s: %s "
+                        "not found" % (self._file, file_id)
+                    )
                  for category in categories.split(self._delimiter):
                      self._add(file_id, category)
  
@@ -341,7 +344,7 @@ class CategorizedCorpusReader(object):
              self._init()
          if fileids is None:
              return sorted(self._c2f)
-        if isinstance(fileids, string_types):
+        if isinstance(fileids, str):
              fileids = [fileids]
          return sorted(set.union(*[self._f2c[d] for d in fileids]))
  
@@ -352,23 +355,24 @@ class CategorizedCorpusReader(object):
          """
          if categories is None:
              return super(CategorizedCorpusReader, self).fileids()
-        elif isinstance(categories, string_types):
+        elif isinstance(categories, str):
              if self._f2c is None:
                  self._init()
              if categories in self._c2f:
                  return sorted(self._c2f[categories])
              else:
-                raise ValueError('Category %s not found' % categories)
+                raise ValueError("Category %s not found" % categories)
          else:
              if self._f2c is None:
                  self._init()
              return sorted(set.union(*[self._c2f[c] for c in categories]))
  
+
  ######################################################################
-#{ Treebank readers
+# { Treebank readers
  ######################################################################
  
-#[xx] is it worth it to factor this out?
+# [xx] is it worth it to factor this out?
  class SyntaxCorpusReader(CorpusReader):
      """
      An abstract base class for reading corpora consisting of
@@ -383,50 +387,76 @@ class SyntaxCorpusReader(CorpusReader):
        - ``_parse``, which takes a block and returns a list of parsed
          sentences.
      """
+
      def _parse(self, s):
          raise NotImplementedError()
+
      def _word(self, s):
          raise NotImplementedError()
+
      def _tag(self, s):
          raise NotImplementedError()
+
      def _read_block(self, stream):
          raise NotImplementedError()
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def parsed_sents(self, fileids=None):
          reader = self._read_parsed_sent_block
-        return concat([StreamBackedCorpusView(fileid, reader, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_sents(self, fileids=None, tagset=None):
          def reader(stream):
              return self._read_tagged_sent_block(stream, tagset)
-        return concat([StreamBackedCorpusView(fileid, reader, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, True)])
+
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
          reader = self._read_sent_block
-        return concat([StreamBackedCorpusView(fileid, reader, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_words(self, fileids=None, tagset=None):
          def reader(stream):
              return self._read_tagged_word_block(stream, tagset)
-        return concat([StreamBackedCorpusView(fileid, reader, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, True)])
+
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
  
      def words(self, fileids=None):
-        return concat([StreamBackedCorpusView(fileid,
-                                              self._read_word_block,
-                                              encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_word_block, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
  
-    #------------------------------------------------------------
-    #{ Block Readers
+    # ------------------------------------------------------------
+    # { Block Readers
  
      def _read_word_block(self, stream):
          return list(chain(*self._read_sent_block(stream)))
@@ -438,11 +468,12 @@ class SyntaxCorpusReader(CorpusReader):
          return list(filter(None, [self._word(t) for t in self._read_block(stream)]))
  
      def _read_tagged_sent_block(self, stream, tagset=None):
-        return list(filter(None, [self._tag(t, tagset)
-                             for t in self._read_block(stream)]))
+        return list(
+            filter(None, [self._tag(t, tagset) for t in self._read_block(stream)])
+        )
  
      def _read_parsed_sent_block(self, stream):
          return list(filter(None, [self._parse(t) for t in self._read_block(stream)]))
  
-    #} End of Block Readers
-    #------------------------------------------------------------
+    # } End of Block Readers
+    # ------------------------------------------------------------
diff --git a/nlp_resource_data/nltk/corpus/reader/api.pyc b/nlp_resource_data/nltk/corpus/reader/api.pyc

deleted file mode 100755 (executable)

index d927c3f..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/bnc.py b/nlp_resource_data/nltk/corpus/reader/bnc.py

old mode 100755 (executable)

new mode 100644 (file)

index 01ad9a1..4f3f148
--- a/nlp_resource_data/nltk/corpus/reader/bnc.py
+++ b/nlp_resource_data/nltk/corpus/reader/bnc.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Plaintext Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -57,7 +57,7 @@ class BNCCorpusReader(XMLCorpusReader):
              word tokens.  Otherwise, leave the spaces on the tokens.
          :param stem: If true, then use word stems instead of word strings.
          """
-        tag = 'c5' if c5 else 'pos'
+        tag = "c5" if c5 else "pos"
          return self._views(fileids, False, tag, strip_space, stem)
  
      def sents(self, fileids=None, strip_space=True, stem=False):
@@ -85,13 +85,20 @@ class BNCCorpusReader(XMLCorpusReader):
              word tokens.  Otherwise, leave the spaces on the tokens.
          :param stem: If true, then use word stems instead of word strings.
          """
-        tag = 'c5' if c5 else 'pos'
-        return self._views(fileids, sent=True, tag=tag, strip_space=strip_space, stem=stem)
+        tag = "c5" if c5 else "pos"
+        return self._views(
+            fileids, sent=True, tag=tag, strip_space=strip_space, stem=stem
+        )
  
      def _views(self, fileids=None, sent=False, tag=False, strip_space=True, stem=False):
          """A helper function that instantiates BNCWordViews or the list of words/sentences."""
          f = BNCWordView if self._lazy else self._words
-        return concat([f(fileid, sent, tag, strip_space, stem) for fileid in self.abspaths(fileids)])
+        return concat(
+            [
+                f(fileid, sent, tag, strip_space, stem)
+                for fileid in self.abspaths(fileids)
+            ]
+        )
  
      def _words(self, fileid, bracket_sent, tag, strip_space, stem):
          """
@@ -107,7 +114,7 @@ class BNCCorpusReader(XMLCorpusReader):
          result = []
  
          xmldoc = ElementTree.parse(fileid).getroot()
-        for xmlsent in xmldoc.findall('.//s'):
+        for xmlsent in xmldoc.findall(".//s"):
              sent = []
              for xmlword in _all_xmlwords_in(xmlsent):
                  word = xmlword.text
@@ -116,14 +123,14 @@ class BNCCorpusReader(XMLCorpusReader):
                  if strip_space or stem:
                      word = word.strip()
                  if stem:
-                    word = xmlword.get('hw', word)
-                if tag == 'c5':
-                    word = (word, xmlword.get('c5'))
-                elif tag == 'pos':
-                    word = (word, xmlword.get('pos', xmlword.get('c5')))
+                    word = xmlword.get("hw", word)
+                if tag == "c5":
+                    word = (word, xmlword.get("c5"))
+                elif tag == "pos":
+                    word = (word, xmlword.get("pos", xmlword.get("c5")))
                  sent.append(word)
              if bracket_sent:
-                result.append(BNCSentence(xmlsent.attrib['n'], sent))
+                result.append(BNCSentence(xmlsent.attrib["n"], sent))
              else:
                  result.extend(sent)
  
@@ -135,7 +142,7 @@ def _all_xmlwords_in(elt, result=None):
      if result is None:
          result = []
      for child in elt:
-        if child.tag in ('c', 'w'):
+        if child.tag in ("c", "w"):
              result.append(child)
          else:
              _all_xmlwords_in(child, result)
@@ -147,6 +154,7 @@ class BNCSentence(list):
      A list of words, augmented by an attribute ``num`` used to record
      the sentence identifier (the ``n`` attribute from the XML).
      """
+
      def __init__(self, num, items):
          self.num = num
          list.__init__(self, items)
@@ -158,7 +166,7 @@ class BNCWordView(XMLCorpusView):
      """
  
      tags_to_ignore = set(
-        ['pb', 'gap', 'vocal', 'event', 'unclear', 'shift', 'pause', 'align']
+        ["pb", "gap", "vocal", "event", "unclear", "shift", "pause", "align"]
      )
      """These tags are ignored. For their description refer to the
      technical documentation, for example,
@@ -175,9 +183,9 @@ class BNCWordView(XMLCorpusView):
          :param stem: If true, then substitute stems for words.
          """
          if sent:
-            tagspec = '.*/s'
+            tagspec = ".*/s"
          else:
-            tagspec = '.*/s/(.*/)?(c|w)'
+            tagspec = ".*/s/(.*/)?(c|w)"
          self._sent = sent
          self._tag = tag
          self._strip_space = strip_space
@@ -192,7 +200,7 @@ class BNCWordView(XMLCorpusView):
  
          # Read in a tasty header.
          self._open()
-        self.read_block(self._stream, '.*/teiHeader$', self.handle_header)
+        self.read_block(self._stream, ".*/teiHeader$", self.handle_header)
          self.close()
  
          # Reset tag context.
@@ -200,24 +208,22 @@ class BNCWordView(XMLCorpusView):
  
      def handle_header(self, elt, context):
          # Set up some metadata!
-        titles = elt.findall('titleStmt/title')
+        titles = elt.findall("titleStmt/title")
          if titles:
-            self.title = '\n'.join(title.text.strip() for title in titles)
+            self.title = "\n".join(title.text.strip() for title in titles)
  
-        authors = elt.findall('titleStmt/author')
+        authors = elt.findall("titleStmt/author")
          if authors:
-            self.author = '\n'.join(author.text.strip() for author in authors)
+            self.author = "\n".join(author.text.strip() for author in authors)
  
-        editors = elt.findall('titleStmt/editor')
+        editors = elt.findall("titleStmt/editor")
          if editors:
-            self.editor = '\n'.join(editor.text.strip() for editor in editors)
+            self.editor = "\n".join(editor.text.strip() for editor in editors)
  
-        resps = elt.findall('titleStmt/respStmt')
+        resps = elt.findall("titleStmt/respStmt")
          if resps:
-            self.resps = '\n\n'.join(
-                '\n'.join(
-                    resp_elt.text.strip() for resp_elt in resp
-                ) for resp in resps
+            self.resps = "\n\n".join(
+                "\n".join(resp_elt.text.strip() for resp_elt in resp) for resp in resps
              )
  
      def handle_elt(self, elt, context):
@@ -233,20 +239,20 @@ class BNCWordView(XMLCorpusView):
          if self._strip_space or self._stem:
              word = word.strip()
          if self._stem:
-            word = elt.get('hw', word)
-        if self._tag == 'c5':
-            word = (word, elt.get('c5'))
-        elif self._tag == 'pos':
-            word = (word, elt.get('pos', elt.get('c5')))
+            word = elt.get("hw", word)
+        if self._tag == "c5":
+            word = (word, elt.get("c5"))
+        elif self._tag == "pos":
+            word = (word, elt.get("pos", elt.get("c5")))
          return word
  
      def handle_sent(self, elt):
          sent = []
          for child in elt:
-            if child.tag in ('mw', 'hi', 'corr', 'trunc'):
+            if child.tag in ("mw", "hi", "corr", "trunc"):
                  sent += [self.handle_word(w) for w in child]
-            elif child.tag in ('w', 'c'):
+            elif child.tag in ("w", "c"):
                  sent.append(self.handle_word(child))
              elif child.tag not in self.tags_to_ignore:
-                raise ValueError('Unexpected element %s' % child.tag)
-        return BNCSentence(elt.attrib['n'], sent)
+                raise ValueError("Unexpected element %s" % child.tag)
+        return BNCSentence(elt.attrib["n"], sent)
diff --git a/nlp_resource_data/nltk/corpus/reader/bnc.pyc b/nlp_resource_data/nltk/corpus/reader/bnc.pyc

deleted file mode 100755 (executable)

index 1c4da62..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/bnc.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/bracket_parse.py b/nlp_resource_data/nltk/corpus/reader/bracket_parse.py

old mode 100755 (executable)

new mode 100644 (file)

index 0944075..9a958c4
--- a/nlp_resource_data/nltk/corpus/reader/bracket_parse.py
+++ b/nlp_resource_data/nltk/corpus/reader/bracket_parse.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Penn Treebank Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -18,10 +18,11 @@ from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
  # we use [^\s()]+ instead of \S+? to avoid matching ()
-SORTTAGWRD = re.compile(r'\((\d+) ([^\s()]+) ([^\s()]+)\)') 
-TAGWORD = re.compile(r'\(([^\s()]+) ([^\s()]+)\)')
-WORD = re.compile(r'\([^\s()]+ ([^\s()]+)\)')
-EMPTY_BRACKETS = re.compile(r'\s*\(\s*\(')
+SORTTAGWRD = re.compile(r"\((\d+) ([^\s()]+) ([^\s()]+)\)")
+TAGWORD = re.compile(r"\(([^\s()]+) ([^\s()]+)\)")
+WORD = re.compile(r"\([^\s()]+ ([^\s()]+)\)")
+EMPTY_BRACKETS = re.compile(r"\s*\(\s*\(")
+
  
  class BracketParseCorpusReader(SyntaxCorpusReader):
      """
@@ -30,9 +31,16 @@ class BracketParseCorpusReader(SyntaxCorpusReader):
      e.g. "(S (NP (DT the) (JJ little) (NN dog)) (VP (VBD barked)))".
  
      """
-    def __init__(self, root, fileids, comment_char=None,
-                 detect_blocks='unindented_paren', encoding='utf8',
-                 tagset=None):
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        comment_char=None,
+        detect_blocks="unindented_paren",
+        encoding="utf8",
+        tagset=None,
+    ):
          """
          :param root: The root directory for this corpus.
          :param fileids: A list or regexp specifying the fileids in this corpus.
@@ -46,33 +54,32 @@ class BracketParseCorpusReader(SyntaxCorpusReader):
                for normalizing or converting the POS tags returned by the
                tagged_...() methods.
          """
+        # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
+        #       from CorpusReader?
          CorpusReader.__init__(self, root, fileids, encoding)
          self._comment_char = comment_char
          self._detect_blocks = detect_blocks
          self._tagset = tagset
  
      def _read_block(self, stream):
-        if self._detect_blocks == 'sexpr':
+        if self._detect_blocks == "sexpr":
              return read_sexpr_block(stream, comment_char=self._comment_char)
-        elif self._detect_blocks == 'blankline':
+        elif self._detect_blocks == "blankline":
              return read_blankline_block(stream)
-        elif self._detect_blocks == 'unindented_paren':
+        elif self._detect_blocks == "unindented_paren":
              # Tokens start with unindented left parens.
-            toks = read_regexp_block(stream, start_re=r'^\(')
+            toks = read_regexp_block(stream, start_re=r"^\(")
              # Strip any comments out of the tokens.
              if self._comment_char:
-                toks = [re.sub('(?m)^%s.*'%re.escape(self._comment_char),
-                               '', tok)
-                        for tok in toks]
+                toks = [
+                    re.sub("(?m)^%s.*" % re.escape(self._comment_char), "", tok)
+                    for tok in toks
+                ]
              return toks
          else:
-            assert 0, 'bad block type'
+            assert 0, "bad block type"
  
      def _normalize(self, t):
-        # If there's an empty set of brackets surrounding the actual
-        # parse, then strip them off.
-        if EMPTY_BRACKETS.match(t):
-            t = t.strip()[1:-1]
          # Replace leaves of the form (!), (,), with (! !), (, ,)
          t = re.sub(r"\((.)\)", r"(\1 \1)", t)
          # Replace leaves of the form (tag word root) with (tag word)
@@ -81,40 +88,52 @@ class BracketParseCorpusReader(SyntaxCorpusReader):
  
      def _parse(self, t):
          try:
-            return Tree.fromstring(self._normalize(t))
+            tree = Tree.fromstring(self._normalize(t))
+            # If there's an empty node at the top, strip it off
+            if tree.label() == '' and len(tree) == 1:
+                return tree[0]
+            else:
+                return tree
  
          except ValueError as e:
              sys.stderr.write("Bad tree detected; trying to recover...\n")
              # Try to recover, if we can:
-            if e.args == ('mismatched parens',):
+            if e.args == ("mismatched parens",):
                  for n in range(1, 5):
                      try:
-                        v = Tree(self._normalize(t+')'*n))
-                        sys.stderr.write("  Recovered by adding %d close "
-                                         "paren(s)\n" % n)
+                        v = Tree(self._normalize(t + ")" * n))
+                        sys.stderr.write(
+                            "  Recovered by adding %d close " "paren(s)\n" % n
+                        )
                          return v
-                    except ValueError: pass
+                    except ValueError:
+                        pass
              # Try something else:
              sys.stderr.write("  Recovered by returning a flat parse.\n")
-            #sys.stderr.write(' '.join(t.split())+'\n')
-            return Tree('S', self._tag(t))
+            # sys.stderr.write(' '.join(t.split())+'\n')
+            return Tree("S", self._tag(t))
  
      def _tag(self, t, tagset=None):
-        tagged_sent = [(w,p) for (p,w) in TAGWORD.findall(self._normalize(t))]
+        tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))]
          if tagset and tagset != self._tagset:
-            tagged_sent = [(w, map_tag(self._tagset, tagset, p)) for (w,p) in tagged_sent]
+            tagged_sent = [
+                (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
+            ]
          return tagged_sent
  
      def _word(self, t):
          return WORD.findall(self._normalize(t))
  
-class CategorizedBracketParseCorpusReader(CategorizedCorpusReader,
-                                          BracketParseCorpusReader):
+
+class CategorizedBracketParseCorpusReader(
+    CategorizedCorpusReader, BracketParseCorpusReader
+):
      """
      A reader for parsed corpora whose documents are
      divided into categories based on their file identifiers.
      @author: Nathan Schneider <nschneid@cs.cmu.edu>
      """
+
      def __init__(self, *args, **kwargs):
          """
          Initialize the corpus reader.  Categorization arguments
@@ -129,41 +148,54 @@ class CategorizedBracketParseCorpusReader(CategorizedCorpusReader,
  
      def _resolve(self, fileids, categories):
          if fileids is not None and categories is not None:
-            raise ValueError('Specify fileids or categories, not both')
+            raise ValueError("Specify fileids or categories, not both")
          if categories is not None:
              return self.fileids(categories)
          else:
              return fileids
+
      def raw(self, fileids=None, categories=None):
-        return BracketParseCorpusReader.raw(
-            self, self._resolve(fileids, categories))
+        return BracketParseCorpusReader.raw(self, self._resolve(fileids, categories))
+
      def words(self, fileids=None, categories=None):
-        return BracketParseCorpusReader.words(
-            self, self._resolve(fileids, categories))
+        return BracketParseCorpusReader.words(self, self._resolve(fileids, categories))
+
      def sents(self, fileids=None, categories=None):
-        return BracketParseCorpusReader.sents(
-            self, self._resolve(fileids, categories))
+        return BracketParseCorpusReader.sents(self, self._resolve(fileids, categories))
+
      def paras(self, fileids=None, categories=None):
-        return BracketParseCorpusReader.paras(
-            self, self._resolve(fileids, categories))
+        return BracketParseCorpusReader.paras(self, self._resolve(fileids, categories))
+
      def tagged_words(self, fileids=None, categories=None, tagset=None):
          return BracketParseCorpusReader.tagged_words(
-            self, self._resolve(fileids, categories), tagset)
+            self, self._resolve(fileids, categories), tagset
+        )
+
      def tagged_sents(self, fileids=None, categories=None, tagset=None):
          return BracketParseCorpusReader.tagged_sents(
-            self, self._resolve(fileids, categories), tagset)
+            self, self._resolve(fileids, categories), tagset
+        )
+
      def tagged_paras(self, fileids=None, categories=None, tagset=None):
          return BracketParseCorpusReader.tagged_paras(
-            self, self._resolve(fileids, categories), tagset)
+            self, self._resolve(fileids, categories), tagset
+        )
+
      def parsed_words(self, fileids=None, categories=None):
          return BracketParseCorpusReader.parsed_words(
-            self, self._resolve(fileids, categories))
+            self, self._resolve(fileids, categories)
+        )
+
      def parsed_sents(self, fileids=None, categories=None):
          return BracketParseCorpusReader.parsed_sents(
-            self, self._resolve(fileids, categories))
+            self, self._resolve(fileids, categories)
+        )
+
      def parsed_paras(self, fileids=None, categories=None):
          return BracketParseCorpusReader.parsed_paras(
-            self, self._resolve(fileids, categories))
+            self, self._resolve(fileids, categories)
+        )
+
  
  class AlpinoCorpusReader(BracketParseCorpusReader):
      """
@@ -172,19 +204,24 @@ class AlpinoCorpusReader(BracketParseCorpusReader):
      Unfortunately this puts punctuation and some other words out of the sentence
      order in the xml element tree. This is no good for tag_ and word_
      _tag and _word will be overridden to use a non-default new parameter 'ordered'
-    to the overridden _normalize function. The _parse function can then remain 
+    to the overridden _normalize function. The _parse function can then remain
      untouched.
      """
-    def __init__(self, root, encoding='ISO-8859-1', tagset=None):
-        BracketParseCorpusReader.__init__(self, root, 'alpino\.xml',
-                                 detect_blocks='blankline',
-                                 encoding=encoding,
-                                 tagset=tagset)
  
-    def _normalize(self, t, ordered = False):
+    def __init__(self, root, encoding="ISO-8859-1", tagset=None):
+        BracketParseCorpusReader.__init__(
+            self,
+            root,
+            "alpino\.xml",
+            detect_blocks="blankline",
+            encoding=encoding,
+            tagset=tagset,
+        )
+
+    def _normalize(self, t, ordered=False):
          """Normalize the xml sentence element in t.
-        The sentence elements <alpino_ds>, although embedded in a few overall 
-        xml elements, are seperated by blank lines. That's how the reader can 
+        The sentence elements <alpino_ds>, although embedded in a few overall
+        xml elements, are seperated by blank lines. That's how the reader can
          deliver them one at a time.
          Each sentence has a few category subnodes that are of no use to us.
          The remaining word nodes may or may not appear in the proper order.
@@ -192,7 +229,7 @@ class AlpinoCorpusReader(BracketParseCorpusReader):
          - begin : the position of the word in the sentence
          - pos   : Part of Speech: the Tag
          - word  : the actual word
-        The return value is a string with all xml elementes replaced by 
+        The return value is a string with all xml elementes replaced by
          clauses: either a cat clause with nested clauses, or a word clause.
          The order of the bracket clauses closely follows the xml.
          If ordered == True, the word clauses include an order sequence number.
@@ -203,8 +240,12 @@ class AlpinoCorpusReader(BracketParseCorpusReader):
          # convert XML to sexpr notation
          t = re.sub(r'  <node .*? cat="(\w+)".*>', r"(\1", t)
          if ordered:
-            t = re.sub(r'  <node. *?begin="(\d+)".*? pos="(\w+)".*? word="([^"]+)".*?/>', r"(\1 \2 \3)", t)
-        else: 
+            t = re.sub(
+                r'  <node. *?begin="(\d+)".*? pos="(\w+)".*? word="([^"]+)".*?/>',
+                r"(\1 \2 \3)",
+                t,
+            )
+        else:
              t = re.sub(r'  <node .*?pos="(\w+)".*? word="([^"]+)".*?/>', r"(\1 \2)", t)
          t = re.sub(r"  </node>", r")", t)
          t = re.sub(r"<sentence>.*</sentence>", r"", t)
@@ -212,16 +253,20 @@ class AlpinoCorpusReader(BracketParseCorpusReader):
          return t
  
      def _tag(self, t, tagset=None):
-        tagged_sent = [(int(o), w, p) for (o,p,w) in SORTTAGWRD.findall(self._normalize(t, ordered = True))]
+        tagged_sent = [
+            (int(o), w, p)
+            for (o, p, w) in SORTTAGWRD.findall(self._normalize(t, ordered=True))
+        ]
          tagged_sent.sort()
          if tagset and tagset != self._tagset:
-            tagged_sent = [(w, map_tag(self._tagset, tagset, p)) for (o,w,p) in tagged_sent]
+            tagged_sent = [
+                (w, map_tag(self._tagset, tagset, p)) for (o, w, p) in tagged_sent
+            ]
          else:
-            tagged_sent = [(w,p) for (o,w,p) in tagged_sent]
+            tagged_sent = [(w, p) for (o, w, p) in tagged_sent]
          return tagged_sent
  
      def _word(self, t):
          """Return a correctly ordered list if words"""
          tagged_sent = self._tag(t)
-        return [w for (w,p) in tagged_sent]      
-
+        return [w for (w, p) in tagged_sent]
diff --git a/nlp_resource_data/nltk/corpus/reader/bracket_parse.pyc b/nlp_resource_data/nltk/corpus/reader/bracket_parse.pyc

deleted file mode 100755 (executable)

index 627a986..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/bracket_parse.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/categorized_sents.py b/nlp_resource_data/nltk/corpus/reader/categorized_sents.py

old mode 100755 (executable)

new mode 100644 (file)

index fa139c2..0c597d5
--- a/nlp_resource_data/nltk/corpus/reader/categorized_sents.py
+++ b/nlp_resource_data/nltk/corpus/reader/categorized_sents.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Categorized Sentences Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -34,11 +34,11 @@ Related papers:
      sentiment categorization with respect to rating scales". Proceedings of the
      ACL, 2005.
  """
-from six import string_types
  
  from nltk.corpus.reader.api import *
  from nltk.tokenize import *
  
+
  class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
      """
      A reader for corpora in which each row represents a single instance, mainly
@@ -72,8 +72,15 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
  
      CorpusView = StreamBackedCorpusView
  
-    def __init__(self, root, fileids, word_tokenizer=WhitespaceTokenizer(),
-                 sent_tokenizer=None, encoding='utf8', **kwargs):
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=None,
+        encoding="utf8",
+        **kwargs
+    ):
          """
          :param root: The root directory for the corpus.
          :param fileids: a list or regexp specifying the fileids in the corpus.
@@ -91,7 +98,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
  
      def _resolve(self, fileids, categories):
          if fileids is not None and categories is not None:
-            raise ValueError('Specify fileids or categories, not both')
+            raise ValueError("Specify fileids or categories, not both")
          if categories is not None:
              return self.fileids(categories)
          else:
@@ -109,7 +116,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
          fileids = self._resolve(fileids, categories)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
@@ -134,10 +141,14 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
          fileids = self._resolve(fileids, categories)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
-        return concat([self.CorpusView(path, self._read_sent_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def words(self, fileids=None, categories=None):
          """
@@ -154,20 +165,28 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
          fileids = self._resolve(fileids, categories)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
-        return concat([self.CorpusView(path, self._read_word_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def _read_sent_block(self, stream):
          sents = []
-        for i in range(20): # Read 20 lines at a time.
+        for i in range(20):  # Read 20 lines at a time.
              line = stream.readline()
              if not line:
                  continue
              if self._sent_tokenizer:
-                sents.extend([self._word_tokenizer.tokenize(sent)
-                              for sent in self._sent_tokenizer.tokenize(line)])
+                sents.extend(
+                    [
+                        self._word_tokenizer.tokenize(sent)
+                        for sent in self._sent_tokenizer.tokenize(line)
+                    ]
+                )
              else:
                  sents.append(self._word_tokenizer.tokenize(line))
          return sents
diff --git a/nlp_resource_data/nltk/corpus/reader/categorized_sents.pyc b/nlp_resource_data/nltk/corpus/reader/categorized_sents.pyc

deleted file mode 100755 (executable)

index 7ea0ec1..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/categorized_sents.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/chasen.py b/nlp_resource_data/nltk/corpus/reader/chasen.py

old mode 100755 (executable)

new mode 100644 (file)

index eaf85dc..0d0cc5e
--- a/nlp_resource_data/nltk/corpus/reader/chasen.py
+++ b/nlp_resource_data/nltk/corpus/reader/chasen.py
@@ -1,61 +1,78 @@
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Masato Hagiwara <hagisan@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  # For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html
-from __future__ import print_function
  
  import sys
  
-from six import string_types
-
  from nltk.corpus.reader import util
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
-class ChasenCorpusReader(CorpusReader):
  
-    def __init__(self, root, fileids, encoding='utf8', sent_splitter=None):
+class ChasenCorpusReader(CorpusReader):
+    def __init__(self, root, fileids, encoding="utf8", sent_splitter=None):
          self._sent_splitter = sent_splitter
          CorpusReader.__init__(self, root, fileids, encoding)
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def words(self, fileids=None):
-        return concat([ChasenCorpusView(fileid, enc,
-                                        False, False, False, self._sent_splitter)
-            for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, False, False, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_words(self, fileids=None):
-        return concat([ChasenCorpusView(fileid, enc,
-                                        True, False, False, self._sent_splitter)
-            for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, True, False, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
-        return concat([ChasenCorpusView(fileid, enc,
-                                        False, True, False, self._sent_splitter)
-            for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, False, True, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_sents(self, fileids=None):
-        return concat([ChasenCorpusView(fileid, enc,
-                                        True, True, False, self._sent_splitter)
-            for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, True, True, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def paras(self, fileids=None):
-        return concat([ChasenCorpusView(fileid, enc,
-                                        False, True, True, self._sent_splitter)
-            for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, False, True, True, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_paras(self, fileids=None):
-        return concat([ChasenCorpusView(fileid, enc,
-                                        True, True, True, self._sent_splitter)
-            for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, True, True, True, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
  
  class ChasenCorpusView(StreamBackedCorpusView):
@@ -64,15 +81,21 @@ class ChasenCorpusView(StreamBackedCorpusView):
      but this'll use fixed sets of word and sentence tokenizer.
      """
  
-    def __init__(self, corpus_file, encoding,
-                 tagged, group_by_sent, group_by_para, sent_splitter=None):
+    def __init__(
+        self,
+        corpus_file,
+        encoding,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        sent_splitter=None,
+    ):
          self._tagged = tagged
          self._group_by_sent = group_by_sent
          self._group_by_para = group_by_para
          self._sent_splitter = sent_splitter
          StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
  
-
      def read_block(self, stream):
          """Reads one paragraph at a time."""
          block = []
@@ -83,23 +106,24 @@ class ChasenCorpusView(StreamBackedCorpusView):
              sent = []
              for line in para_str.splitlines():
  
-                _eos = line.strip() == 'EOS'
-                _cells = line.split('\t')
-                w = (_cells[0], '\t'.join(_cells[1:]))
-                if not _eos: sent.append(w)
+                _eos = line.strip() == "EOS"
+                _cells = line.split("\t")
+                w = (_cells[0], "\t".join(_cells[1:]))
+                if not _eos:
+                    sent.append(w)
  
                  if _eos or (self._sent_splitter and self._sent_splitter(w)):
                      if not self._tagged:
-                        sent = [w for (w,t) in sent]
+                        sent = [w for (w, t) in sent]
                      if self._group_by_sent:
                          para.append(sent)
                      else:
                          para.extend(sent)
                      sent = []
  
-            if len(sent)>0:
+            if len(sent) > 0:
                  if not self._tagged:
-                    sent = [w for (w,t) in sent]
+                    sent = [w for (w, t) in sent]
  
                  if self._group_by_sent:
                      para.append(sent)
@@ -113,28 +137,32 @@ class ChasenCorpusView(StreamBackedCorpusView):
  
          return block
  
+
  def demo():
  
      import nltk
      from nltk.corpus.util import LazyCorpusLoader
  
-    jeita = LazyCorpusLoader(
-        'jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8')
-    print('/'.join( jeita.words()[22100:22140] ))
+    jeita = LazyCorpusLoader("jeita", ChasenCorpusReader, r".*chasen", encoding="utf-8")
+    print("/".join(jeita.words()[22100:22140]))
  
+    print(
+        "\nEOS\n".join(
+            "\n".join("%s/%s" % (w[0], w[1].split("\t")[2]) for w in sent)
+            for sent in jeita.tagged_sents()[2170:2173]
+        )
+    )
  
-    print('\nEOS\n'.join('\n'.join("%s/%s" % (w[0],w[1].split('\t')[2]) for w in sent)
-                          for sent in jeita.tagged_sents()[2170:2173]))
  
  def test():
  
      from nltk.corpus.util import LazyCorpusLoader
  
-    jeita = LazyCorpusLoader(
-        'jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8')
+    jeita = LazyCorpusLoader("jeita", ChasenCorpusReader, r".*chasen", encoding="utf-8")
+
+    assert isinstance(jeita.tagged_words()[0][1], str)
  
-    assert isinstance(jeita.tagged_words()[0][1], string_types)
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
      test()
diff --git a/nlp_resource_data/nltk/corpus/reader/chasen.pyc b/nlp_resource_data/nltk/corpus/reader/chasen.pyc

deleted file mode 100755 (executable)

index 45a8040..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/chasen.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/childes.py b/nlp_resource_data/nltk/corpus/reader/childes.py

old mode 100755 (executable)

new mode 100644 (file)

index 0b092f1..1d163c6
--- a/nlp_resource_data/nltk/corpus/reader/childes.py
+++ b/nlp_resource_data/nltk/corpus/reader/childes.py
@@ -1,6 +1,6 @@
  # CHILDES XML Corpus Reader
  
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tomonori Nagano <tnagano@gc.cuny.edu>
  #         Alexis Dimitriadis <A.Dimitriadis@uu.nl>
  # URL: <http://nltk.org/>
@@ -9,13 +9,11 @@
  """
  Corpus reader for the XML version of the CHILDES corpus.
  """
-from __future__ import print_function, division
  
-__docformat__ = 'epytext en'
+__docformat__ = "epytext en"
  
  import re
  from collections import defaultdict
-from six import string_types
  
  from nltk.util import flatten, LazyMap, LazyConcatenation
  
@@ -23,25 +21,34 @@ from nltk.corpus.reader.util import concat
  from nltk.corpus.reader.xmldocs import XMLCorpusReader, ElementTree
  
  # to resolve the namespace issue
-NS = 'http://www.talkbank.org/ns/talkbank'
+NS = "http://www.talkbank.org/ns/talkbank"
+
  
  class CHILDESCorpusReader(XMLCorpusReader):
      """
      Corpus reader for the XML version of the CHILDES corpus.
-    The CHILDES corpus is available at ``http://childes.psy.cmu.edu/``. The XML
-    version of CHILDES is located at ``http://childes.psy.cmu.edu/data-xml/``.
+    The CHILDES corpus is available at ``https://childes.talkbank.org/``. The XML
+    version of CHILDES is located at ``https://childes.talkbank.org/data-xml/``.
      Copy the needed parts of the CHILDES XML corpus into the NLTK data directory
      (``nltk_data/corpora/CHILDES/``).
  
      For access to the file text use the usual nltk functions,
      ``words()``, ``sents()``, ``tagged_words()`` and ``tagged_sents()``.
      """
+
      def __init__(self, root, fileids, lazy=True):
          XMLCorpusReader.__init__(self, root, fileids)
          self._lazy = lazy
  
-    def words(self, fileids=None, speaker='ALL', stem=False,
-            relation=False, strip_space=True, replace=False):
+    def words(
+        self,
+        fileids=None,
+        speaker="ALL",
+        stem=False,
+        relation=False,
+        strip_space=True,
+        replace=False,
+    ):
          """
          :return: the given file(s) as a list of words
          :rtype: list(str)
@@ -58,18 +65,30 @@ class CHILDESCorpusReader(XMLCorpusReader):
          :param replace: If true, then use the replaced (intended) word instead
              of the original word (e.g., 'wat' will be replaced with 'watch')
          """
-        sent=None
-        pos=False
+        sent = None
+        pos = False
          if not self._lazy:
-            return [self._get_words(fileid, speaker, sent, stem, relation,
-                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
-
-        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace)
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
          return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
  
-    def tagged_words(self, fileids=None, speaker='ALL', stem=False,
-            relation=False, strip_space=True, replace=False):
+    def tagged_words(
+        self,
+        fileids=None,
+        speaker="ALL",
+        stem=False,
+        relation=False,
+        strip_space=True,
+        replace=False,
+    ):
          """
          :return: the given file(s) as a list of tagged
              words and punctuation symbols, encoded as tuples
@@ -88,18 +107,30 @@ class CHILDESCorpusReader(XMLCorpusReader):
          :param replace: If true, then use the replaced (intended) word instead
              of the original word (e.g., 'wat' will be replaced with 'watch')
          """
-        sent=None
-        pos=True
+        sent = None
+        pos = True
          if not self._lazy:
-            return [self._get_words(fileid, speaker, sent, stem, relation,
-                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
-
-        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace)
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
          return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
  
-    def sents(self, fileids=None, speaker='ALL', stem=False,
-            relation=None, strip_space=True, replace=False):
+    def sents(
+        self,
+        fileids=None,
+        speaker="ALL",
+        stem=False,
+        relation=None,
+        strip_space=True,
+        replace=False,
+    ):
          """
          :return: the given file(s) as a list of sentences or utterances, each
              encoded as a list of word strings.
@@ -118,18 +149,30 @@ class CHILDESCorpusReader(XMLCorpusReader):
          :param replace: If true, then use the replaced (intended) word instead
              of the original word (e.g., 'wat' will be replaced with 'watch')
          """
-        sent=True
-        pos=False
+        sent = True
+        pos = False
          if not self._lazy:
-            return [self._get_words(fileid, speaker, sent, stem, relation,
-                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
-
-        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace)
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
          return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
  
-    def tagged_sents(self, fileids=None, speaker='ALL', stem=False,
-            relation=None, strip_space=True, replace=False):
+    def tagged_sents(
+        self,
+        fileids=None,
+        speaker="ALL",
+        stem=False,
+        relation=None,
+        strip_space=True,
+        replace=False,
+    ):
          """
          :return: the given file(s) as a list of
              sentences, each encoded as a list of ``(word,tag)`` tuples.
@@ -148,14 +191,19 @@ class CHILDESCorpusReader(XMLCorpusReader):
          :param replace: If true, then use the replaced (intended) word instead
              of the original word (e.g., 'wat' will be replaced with 'watch')
          """
-        sent=True
-        pos=True
+        sent = True
+        pos = True
          if not self._lazy:
-            return [self._get_words(fileid, speaker, sent, stem, relation,
-                pos, strip_space, replace) for fileid in self.abspaths(fileids)]
-
-        get_words = lambda fileid: self._get_words(fileid, speaker, sent, stem, relation,
-            pos, strip_space, replace)
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
          return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
  
      def corpus(self, fileids=None):
@@ -192,13 +240,14 @@ class CHILDESCorpusReader(XMLCorpusReader):
          xmldoc = ElementTree.parse(fileid).getroot()
          # getting participants' data
          pat = dictOfDicts()
-        for participant in xmldoc.findall('.//{%s}Participants/{%s}participant'
-                                          % (NS,NS)):
-            for (key,value) in participant.items():
-                pat[participant.get('id')][key] = value
+        for participant in xmldoc.findall(
+            ".//{%s}Participants/{%s}participant" % (NS, NS)
+        ):
+            for (key, value) in participant.items():
+                pat[participant.get("id")][key] = value
          return pat
  
-    def age(self, fileids=None, speaker='CHI', month=False):
+    def age(self, fileids=None, speaker="CHI", month=False):
          """
          :return: the given file(s) as string or int
          :rtype: list or int
@@ -206,18 +255,19 @@ class CHILDESCorpusReader(XMLCorpusReader):
          :param month: If true, return months instead of year-month-date
          """
          if not self._lazy:
-            return [self._get_age(fileid, speaker, month)
-                for fileid in self.abspaths(fileids)]
+            return [
+                self._get_age(fileid, speaker, month)
+                for fileid in self.abspaths(fileids)
+            ]
          get_age = lambda fileid: self._get_age(fileid, speaker, month)
          return LazyMap(get_age, self.abspaths(fileids))
  
      def _get_age(self, fileid, speaker, month):
          xmldoc = ElementTree.parse(fileid).getroot()
-        for pat in xmldoc.findall('.//{%s}Participants/{%s}participant'
-                                  % (NS,NS)):
+        for pat in xmldoc.findall(".//{%s}Participants/{%s}participant" % (NS, NS)):
              try:
-                if pat.get('id') == speaker:
-                    age = pat.get('age')
+                if pat.get("id") == speaker:
+                    age = pat.get("age")
                      if month:
                          age = self.convert_age(age)
                      return age
@@ -227,8 +277,8 @@ class CHILDESCorpusReader(XMLCorpusReader):
  
      def convert_age(self, age_year):
          "Caclculate age in months from a string in CHILDES format"
-        m = re.match("P(\d+)Y(\d+)M?(\d?\d?)D?",age_year)
-        age_month = int(m.group(1))*12 + int(m.group(2))
+        m = re.match("P(\d+)Y(\d+)M?(\d?\d?)D?", age_year)
+        age_month = int(m.group(1)) * 12 + int(m.group(2))
          try:
              if int(m.group(3)) > 15:
                  age_month += 1
@@ -237,40 +287,50 @@ class CHILDESCorpusReader(XMLCorpusReader):
              pass
          return age_month
  
-    def MLU(self, fileids=None, speaker='CHI'):
+    def MLU(self, fileids=None, speaker="CHI"):
          """
          :return: the given file(s) as a floating number
          :rtype: list(float)
          """
          if not self._lazy:
-            return [self._getMLU(fileid, speaker=speaker)
-                for fileid in self.abspaths(fileids)]
+            return [
+                self._getMLU(fileid, speaker=speaker)
+                for fileid in self.abspaths(fileids)
+            ]
          get_MLU = lambda fileid: self._getMLU(fileid, speaker=speaker)
          return LazyMap(get_MLU, self.abspaths(fileids))
  
      def _getMLU(self, fileid, speaker):
-        sents = self._get_words(fileid, speaker=speaker, sent=True, stem=True,
-                    relation=False, pos=True, strip_space=True, replace=True)
+        sents = self._get_words(
+            fileid,
+            speaker=speaker,
+            sent=True,
+            stem=True,
+            relation=False,
+            pos=True,
+            strip_space=True,
+            replace=True,
+        )
          results = []
          lastSent = []
          numFillers = 0
          sentDiscount = 0
          for sent in sents:
-            posList = [pos for (word,pos) in sent]
+            posList = [pos for (word, pos) in sent]
              # if any part of the sentence is intelligible
-            if any(pos == 'unk' for pos in posList):
-                next
+            if any(pos == "unk" for pos in posList):
+                continue
              # if the sentence is null
              elif sent == []:
-                next
+                continue
              # if the sentence is the same as the last sent
              elif sent == lastSent:
-                next
+                continue
              else:
-                results.append([word for (word,pos) in sent])
+                results.append([word for (word, pos) in sent])
                  # count number of fillers
-                if len(set(['co',None]).intersection(posList)) > 0:
-                    numFillers += posList.count('co')
+                if len(set(["co", None]).intersection(posList)) > 0:
+                    numFillers += posList.count("co")
                      numFillers += posList.count(None)
                      sentDiscount += 1
              lastSent = sent
@@ -278,124 +338,163 @@ class CHILDESCorpusReader(XMLCorpusReader):
              thisWordList = flatten(results)
              # count number of morphemes
              # (e.g., 'read' = 1 morpheme but 'read-PAST' is 2 morphemes)
-            numWords = len(flatten([word.split('-')
-                                          for word in thisWordList])) - numFillers
+            numWords = (
+                len(flatten([word.split("-") for word in thisWordList])) - numFillers
+            )
              numSents = len(results) - sentDiscount
-            mlu = numWords/numSents
+            mlu = numWords / numSents
          except ZeroDivisionError:
              mlu = 0
          # return {'mlu':mlu,'wordNum':numWords,'sentNum':numSents}
          return mlu
  
-    def _get_words(self, fileid, speaker, sent, stem, relation, pos,
-            strip_space, replace):
-        if isinstance(speaker, string_types) and speaker != 'ALL':  # ensure we have a list of speakers
-            speaker = [ speaker ]
+    def _get_words(
+        self, fileid, speaker, sent, stem, relation, pos, strip_space, replace
+    ):
+        if (
+            isinstance(speaker, str) and speaker != "ALL"
+        ):  # ensure we have a list of speakers
+            speaker = [speaker]
          xmldoc = ElementTree.parse(fileid).getroot()
          # processing each xml doc
          results = []
-        for xmlsent in xmldoc.findall('.//{%s}u' % NS):
+        for xmlsent in xmldoc.findall(".//{%s}u" % NS):
              sents = []
              # select speakers
-            if speaker == 'ALL' or xmlsent.get('who') in speaker:
-                for xmlword in xmlsent.findall('.//{%s}w' % NS):
-                    infl = None ; suffixStem = None; suffixTag = None
+            if speaker == "ALL" or xmlsent.get("who") in speaker:
+                for xmlword in xmlsent.findall(".//{%s}w" % NS):
+                    infl = None
+                    suffixStem = None
+                    suffixTag = None
                      # getting replaced words
-                    if replace and xmlsent.find('.//{%s}w/{%s}replacement'
-                                                % (NS,NS)):
-                        xmlword = xmlsent.find('.//{%s}w/{%s}replacement/{%s}w'
-                                               % (NS,NS,NS))
-                    elif replace and xmlsent.find('.//{%s}w/{%s}wk' % (NS,NS)):
-                        xmlword = xmlsent.find('.//{%s}w/{%s}wk' % (NS,NS))
+                    if replace and xmlsent.find(".//{%s}w/{%s}replacement" % (NS, NS)):
+                        xmlword = xmlsent.find(
+                            ".//{%s}w/{%s}replacement/{%s}w" % (NS, NS, NS)
+                        )
+                    elif replace and xmlsent.find(".//{%s}w/{%s}wk" % (NS, NS)):
+                        xmlword = xmlsent.find(".//{%s}w/{%s}wk" % (NS, NS))
                      # get text
                      if xmlword.text:
                          word = xmlword.text
                      else:
-                        word = ''
+                        word = ""
                      # strip tailing space
                      if strip_space:
                          word = word.strip()
                      # stem
                      if relation or stem:
                          try:
-                            xmlstem = xmlword.find('.//{%s}stem' % NS)
+                            xmlstem = xmlword.find(".//{%s}stem" % NS)
                              word = xmlstem.text
                          except AttributeError as e:
                              pass
                          # if there is an inflection
                          try:
-                            xmlinfl = xmlword.find('.//{%s}mor/{%s}mw/{%s}mk'
-                                                   % (NS,NS,NS))
-                            word += '-' + xmlinfl.text
+                            xmlinfl = xmlword.find(
+                                ".//{%s}mor/{%s}mw/{%s}mk" % (NS, NS, NS)
+                            )
+                            word += "-" + xmlinfl.text
                          except:
                              pass
                          # if there is a suffix
                          try:
-                            xmlsuffix = xmlword.find('.//{%s}mor/{%s}mor-post/{%s}mw/{%s}stem'
-                                                     % (NS,NS,NS,NS))
+                            xmlsuffix = xmlword.find(
+                                ".//{%s}mor/{%s}mor-post/{%s}mw/{%s}stem"
+                                % (NS, NS, NS, NS)
+                            )
                              suffixStem = xmlsuffix.text
                          except AttributeError:
                              suffixStem = ""
                          if suffixStem:
-                            word += "~"+suffixStem
+                            word += "~" + suffixStem
                      # pos
                      if relation or pos:
                          try:
                              xmlpos = xmlword.findall(".//{%s}c" % NS)
                              xmlpos2 = xmlword.findall(".//{%s}s" % NS)
                              if xmlpos2 != []:
-                                tag = xmlpos[0].text+":"+xmlpos2[0].text
+                                tag = xmlpos[0].text + ":" + xmlpos2[0].text
                              else:
                                  tag = xmlpos[0].text
-                        except (AttributeError,IndexError) as e:
+                        except (AttributeError, IndexError) as e:
                              tag = ""
                          try:
-                            xmlsuffixpos = xmlword.findall('.//{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}c'
-                                                     % (NS,NS,NS,NS,NS))
-                            xmlsuffixpos2 = xmlword.findall('.//{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}s'
-                                                     % (NS,NS,NS,NS,NS))
+                            xmlsuffixpos = xmlword.findall(
+                                ".//{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}c"
+                                % (NS, NS, NS, NS, NS)
+                            )
+                            xmlsuffixpos2 = xmlword.findall(
+                                ".//{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}s"
+                                % (NS, NS, NS, NS, NS)
+                            )
                              if xmlsuffixpos2:
-                                suffixTag = xmlsuffixpos[0].text+":"+xmlsuffixpos2[0].text
+                                suffixTag = (
+                                    xmlsuffixpos[0].text + ":" + xmlsuffixpos2[0].text
+                                )
                              else:
                                  suffixTag = xmlsuffixpos[0].text
                          except:
                              pass
                          if suffixTag:
-                            tag += "~"+suffixTag
+                            tag += "~" + suffixTag
                          word = (word, tag)
                      # relational
                      # the gold standard is stored in
                      # <mor></mor><mor type="trn"><gra type="grt">
                      if relation == True:
-                        for xmlstem_rel in xmlword.findall('.//{%s}mor/{%s}gra'
-                                                           % (NS,NS)):
-                            if not xmlstem_rel.get('type') == 'grt':
-                                word = (word[0], word[1],
-                                        xmlstem_rel.get('index')
-                                        + "|" + xmlstem_rel.get('head')
-                                        + "|" + xmlstem_rel.get('relation'))
+                        for xmlstem_rel in xmlword.findall(
+                            ".//{%s}mor/{%s}gra" % (NS, NS)
+                        ):
+                            if not xmlstem_rel.get("type") == "grt":
+                                word = (
+                                    word[0],
+                                    word[1],
+                                    xmlstem_rel.get("index")
+                                    + "|"
+                                    + xmlstem_rel.get("head")
+                                    + "|"
+                                    + xmlstem_rel.get("relation"),
+                                )
                              else:
-                                word = (word[0], word[1], word[2],
-                                        word[0], word[1],
-                                        xmlstem_rel.get('index')
-                                        + "|" + xmlstem_rel.get('head')
-                                        + "|" + xmlstem_rel.get('relation'))
+                                word = (
+                                    word[0],
+                                    word[1],
+                                    word[2],
+                                    word[0],
+                                    word[1],
+                                    xmlstem_rel.get("index")
+                                    + "|"
+                                    + xmlstem_rel.get("head")
+                                    + "|"
+                                    + xmlstem_rel.get("relation"),
+                                )
                          try:
-                            for xmlpost_rel in xmlword.findall('.//{%s}mor/{%s}mor-post/{%s}gra'
-                                                               % (NS,NS,NS)):
-                                if not xmlpost_rel.get('type') == 'grt':
-                                    suffixStem = (suffixStem[0],
-                                                  suffixStem[1],
-                                                  xmlpost_rel.get('index')
-                                                  + "|" + xmlpost_rel.get('head')
-                                                  + "|" + xmlpost_rel.get('relation'))
+                            for xmlpost_rel in xmlword.findall(
+                                ".//{%s}mor/{%s}mor-post/{%s}gra" % (NS, NS, NS)
+                            ):
+                                if not xmlpost_rel.get("type") == "grt":
+                                    suffixStem = (
+                                        suffixStem[0],
+                                        suffixStem[1],
+                                        xmlpost_rel.get("index")
+                                        + "|"
+                                        + xmlpost_rel.get("head")
+                                        + "|"
+                                        + xmlpost_rel.get("relation"),
+                                    )
                                  else:
-                                    suffixStem = (suffixStem[0], suffixStem[1],
-                                                  suffixStem[2], suffixStem[0],
-                                                  suffixStem[1],
-                                                  xmlpost_rel.get('index')
-                                                  + "|" + xmlpost_rel.get('head')
-                                                  + "|" + xmlpost_rel.get('relation'))
+                                    suffixStem = (
+                                        suffixStem[0],
+                                        suffixStem[1],
+                                        suffixStem[2],
+                                        suffixStem[0],
+                                        suffixStem[1],
+                                        xmlpost_rel.get("index")
+                                        + "|"
+                                        + xmlpost_rel.get("head")
+                                        + "|"
+                                        + xmlpost_rel.get("relation"),
+                                    )
                          except:
                              pass
                      sents.append(word)
@@ -405,7 +504,6 @@ class CHILDESCorpusReader(XMLCorpusReader):
                      results.extend(sents)
          return LazyMap(lambda x: x, results)
  
-
      # Ready-to-use browser opener
  
      """
@@ -413,8 +511,7 @@ class CHILDESCorpusReader(XMLCorpusReader):
      shouldn't need to be changed, unless CHILDES changes the configuration
      of their server or unless the user sets up their own corpus webserver.
      """
-    childes_url_base = r'http://childes.psy.cmu.edu/browser/index.php?url='
-
+    childes_url_base = r"https://childes.talkbank.org/browser/index.php?url="
  
      def webview_file(self, fileid, urlbase=None):
          """Map a corpus file to its web version on the CHILDES website,
@@ -437,27 +534,27 @@ class CHILDESCorpusReader(XMLCorpusReader):
          corpus root points to the Cornell folder, urlbase='Eng-USA/Cornell'.
          """
  
-        import webbrowser, re
+        import webbrowser
  
          if urlbase:
-            path = urlbase+"/"+fileid
+            path = urlbase + "/" + fileid
          else:
              full = self.root + "/" + fileid
-            full = re.sub(r'\\', '/', full)
-            if '/childes/' in full.lower():
+            full = re.sub(r"\\", "/", full)
+            if "/childes/" in full.lower():
                  # Discard /data-xml/ if present
-                path = re.findall(r'(?i)/childes(?:/data-xml)?/(.*)\.xml', full)[0]
-            elif 'eng-usa' in full.lower():
-                path = 'Eng-USA/' + re.findall(r'/(?i)Eng-USA/(.*)\.xml', full)[0]
+                path = re.findall(r"(?i)/childes(?:/data-xml)?/(.*)\.xml", full)[0]
+            elif "eng-usa" in full.lower():
+                path = "Eng-USA/" + re.findall(r"/(?i)Eng-USA/(.*)\.xml", full)[0]
              else:
                  path = fileid
  
          # Strip ".xml" and add ".cha", as necessary:
-        if path.endswith('.xml'):
+        if path.endswith(".xml"):
              path = path[:-4]
  
-        if not path.endswith('.cha'):
-            path = path+'.cha'
+        if not path.endswith(".cha"):
+            path = path + ".cha"
  
          url = self.childes_url_base + path
  
@@ -467,7 +564,6 @@ class CHILDESCorpusReader(XMLCorpusReader):
          # raw_input("Hit Return to continue")
  
  
-
  def demo(corpus_root=None):
      """
      The CHILDES corpus should be manually downloaded and saved
@@ -475,29 +571,40 @@ def demo(corpus_root=None):
      """
      if not corpus_root:
          from nltk.data import find
-        corpus_root = find('corpora/childes/data-xml/Eng-USA/')
+
+        corpus_root = find("corpora/childes/data-xml/Eng-USA/")
  
      try:
-        childes = CHILDESCorpusReader(corpus_root, '.*.xml')
+        childes = CHILDESCorpusReader(corpus_root, ".*.xml")
          # describe all corpus
          for file in childes.fileids()[:5]:
-            corpus = ''
-            corpus_id = ''
-            for (key,value) in childes.corpus(file)[0].items():
-                if key == "Corpus": corpus = value
-                if key == "Id": corpus_id = value
-            print('Reading', corpus,corpus_id,' .....')
-            print("words:", childes.words(file)[:7],"...")
-            print("words with replaced words:", childes.words(file, replace=True)[:7]," ...")
-            print("words with pos tags:", childes.tagged_words(file)[:7]," ...")
-            print("words (only MOT):", childes.words(file, speaker='MOT')[:7], "...")
-            print("words (only CHI):", childes.words(file, speaker='CHI')[:7], "...")
-            print("stemmed words:", childes.words(file, stem=True)[:7]," ...")
-            print("words with relations and pos-tag:", childes.words(file, relation=True)[:5]," ...")
-            print("sentence:", childes.sents(file)[:2]," ...")
+            corpus = ""
+            corpus_id = ""
+            for (key, value) in childes.corpus(file)[0].items():
+                if key == "Corpus":
+                    corpus = value
+                if key == "Id":
+                    corpus_id = value
+            print("Reading", corpus, corpus_id, " .....")
+            print("words:", childes.words(file)[:7], "...")
+            print(
+                "words with replaced words:",
+                childes.words(file, replace=True)[:7],
+                " ...",
+            )
+            print("words with pos tags:", childes.tagged_words(file)[:7], " ...")
+            print("words (only MOT):", childes.words(file, speaker="MOT")[:7], "...")
+            print("words (only CHI):", childes.words(file, speaker="CHI")[:7], "...")
+            print("stemmed words:", childes.words(file, stem=True)[:7], " ...")
+            print(
+                "words with relations and pos-tag:",
+                childes.words(file, relation=True)[:5],
+                " ...",
+            )
+            print("sentence:", childes.sents(file)[:2], " ...")
              for (participant, values) in childes.participants(file)[0].items():
-                    for (key, value) in values.items():
-                        print("\tparticipant", participant, key, ":", value)
+                for (key, value) in values.items():
+                    print("\tparticipant", participant, key, ":", value)
              print("num of sent:", len(childes.sents(file)))
              print("num of morphemes:", len(childes.words(file, stem=True)))
              print("age:", childes.age(file))
@@ -506,16 +613,18 @@ def demo(corpus_root=None):
              print()
  
      except LookupError as e:
-        print("""The CHILDES corpus, or the parts you need, should be manually
-        downloaded from http://childes.psy.cmu.edu/data-xml/ and saved at
+        print(
+            """The CHILDES corpus, or the parts you need, should be manually
+        downloaded from https://childes.talkbank.org/data-xml/ and saved at
          [NLTK_Data_Dir]/corpora/childes/
              Alternately, you can call the demo with the path to a portion of the CHILDES corpus, e.g.:
          demo('/path/to/childes/data-xml/Eng-USA/")
-        """)
-        #corpus_root_http = urllib2.urlopen('http://childes.psy.cmu.edu/data-xml/Eng-USA/Bates.zip')
-        #corpus_root_http_bates = zipfile.ZipFile(cStringIO.StringIO(corpus_root_http.read()))
+        """
+        )
+        # corpus_root_http = urllib2.urlopen('https://childes.talkbank.org/data-xml/Eng-USA/Bates.zip')
+        # corpus_root_http_bates = zipfile.ZipFile(cStringIO.StringIO(corpus_root_http.read()))
          ##this fails
-        #childes = CHILDESCorpusReader(corpus_root_http_bates,corpus_root_http_bates.namelist())
+        # childes = CHILDESCorpusReader(corpus_root_http_bates,corpus_root_http_bates.namelist())
  
  
  if __name__ == "__main__":
diff --git a/nlp_resource_data/nltk/corpus/reader/childes.pyc b/nlp_resource_data/nltk/corpus/reader/childes.pyc

deleted file mode 100755 (executable)

index 922081e..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/childes.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/chunked.py b/nlp_resource_data/nltk/corpus/reader/chunked.py

old mode 100755 (executable)

new mode 100644 (file)

index b87ae06..bb32832
--- a/nlp_resource_data/nltk/corpus/reader/chunked.py
+++ b/nlp_resource_data/nltk/corpus/reader/chunked.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Chunked Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -13,8 +13,6 @@ documents.
  
  import os.path, codecs
  
-from six import string_types
-
  import nltk
  from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
  from nltk.tree import Tree
@@ -23,6 +21,7 @@ from nltk.chunk import tagstr2tree
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class ChunkedCorpusReader(CorpusReader):
      """
      Reader for chunked (and optionally tagged) corpora.  Paragraphs
@@ -34,11 +33,18 @@ class ChunkedCorpusReader(CorpusReader):
      on blank lines; sentences are listed one per line; and sentences
      are parsed into chunk trees using ``nltk.chunk.tagstr2tree``.
      """
-    def __init__(self, root, fileids, extension='',
-                 str2chunktree=tagstr2tree,
-                 sent_tokenizer=RegexpTokenizer('\n', gaps=True),
-                 para_block_reader=read_blankline_block,
-                 encoding='utf8', tagset=None):
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        extension="",
+        str2chunktree=tagstr2tree,
+        sent_tokenizer=RegexpTokenizer("\n", gaps=True),
+        para_block_reader=read_blankline_block,
+        encoding="utf8",
+        tagset=None,
+    ):
          """
          :param root: The root directory for this corpus.
          :param fileids: A list or regexp specifying the fileids in this corpus.
@@ -53,8 +59,10 @@ class ChunkedCorpusReader(CorpusReader):
          :return: the given file(s) as a single string.
          :rtype: str
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def words(self, fileids=None):
@@ -63,8 +71,12 @@ class ChunkedCorpusReader(CorpusReader):
              and punctuation symbols.
          :rtype: list(str)
          """
-        return concat([ChunkedCorpusView(f, enc, 0, 0, 0, 0, *self._cv_args)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(f, enc, 0, 0, 0, 0, *self._cv_args)
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
          """
@@ -73,8 +85,12 @@ class ChunkedCorpusReader(CorpusReader):
              strings.
          :rtype: list(list(str))
          """
-        return concat([ChunkedCorpusView(f, enc, 0, 1, 0, 0, *self._cv_args)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(f, enc, 0, 1, 0, 0, *self._cv_args)
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def paras(self, fileids=None):
          """
@@ -83,8 +99,12 @@ class ChunkedCorpusReader(CorpusReader):
              in turn encoded as lists of word strings.
          :rtype: list(list(list(str)))
          """
-        return concat([ChunkedCorpusView(f, enc, 0, 1, 1, 0, *self._cv_args)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(f, enc, 0, 1, 1, 0, *self._cv_args)
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_words(self, fileids=None, tagset=None):
          """
@@ -93,8 +113,14 @@ class ChunkedCorpusReader(CorpusReader):
              ``(word,tag)``.
          :rtype: list(tuple(str,str))
          """
-        return concat([ChunkedCorpusView(f, enc, 1, 0, 0, 0, *self._cv_args, target_tagset=tagset)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 0, 0, 0, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_sents(self, fileids=None, tagset=None):
          """
@@ -103,8 +129,14 @@ class ChunkedCorpusReader(CorpusReader):
  
          :rtype: list(list(tuple(str,str)))
          """
-        return concat([ChunkedCorpusView(f, enc, 1, 1, 0, 0, *self._cv_args, target_tagset=tagset)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 0, 0, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_paras(self, fileids=None, tagset=None):
          """
@@ -113,8 +145,14 @@ class ChunkedCorpusReader(CorpusReader):
              in turn encoded as lists of ``(word,tag)`` tuples.
          :rtype: list(list(list(tuple(str,str))))
          """
-        return concat([ChunkedCorpusView(f, enc, 1, 1, 1, 0, *self._cv_args, target_tagset=tagset)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 1, 0, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def chunked_words(self, fileids=None, tagset=None):
          """
@@ -125,8 +163,14 @@ class ChunkedCorpusReader(CorpusReader):
              trees over ``(word,tag)`` tuples or word strings.
          :rtype: list(tuple(str,str) and Tree)
          """
-        return concat([ChunkedCorpusView(f, enc, 1, 0, 0, 1, *self._cv_args, target_tagset=tagset)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 0, 0, 1, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def chunked_sents(self, fileids=None, tagset=None):
          """
@@ -137,8 +181,14 @@ class ChunkedCorpusReader(CorpusReader):
              tags).
          :rtype: list(Tree)
          """
-        return concat([ChunkedCorpusView(f, enc, 1, 1, 0, 1, *self._cv_args, target_tagset=tagset)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 0, 1, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def chunked_paras(self, fileids=None, tagset=None):
          """
@@ -149,16 +199,34 @@ class ChunkedCorpusReader(CorpusReader):
              has tags) or word strings (if the corpus has no tags).
          :rtype: list(list(Tree))
          """
-        return concat([ChunkedCorpusView(f, enc, 1, 1, 1, 1, *self._cv_args, target_tagset=tagset)
-                       for (f, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 1, 1, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def _read_block(self, stream):
          return [tagstr2tree(t) for t in read_blankline_block(stream)]
  
+
  class ChunkedCorpusView(StreamBackedCorpusView):
-    def __init__(self, fileid, encoding, tagged, group_by_sent,
-                 group_by_para, chunked, str2chunktree, sent_tokenizer,
-                 para_block_reader, source_tagset=None, target_tagset=None):
+    def __init__(
+        self,
+        fileid,
+        encoding,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        chunked,
+        str2chunktree,
+        sent_tokenizer,
+        para_block_reader,
+        source_tagset=None,
+        target_tagset=None,
+    ):
          StreamBackedCorpusView.__init__(self, fileid, encoding=encoding)
          self._tagged = tagged
          self._group_by_sent = group_by_sent
@@ -175,8 +243,11 @@ class ChunkedCorpusView(StreamBackedCorpusView):
          for para_str in self._para_block_reader(stream):
              para = []
              for sent_str in self._sent_tokenizer.tokenize(para_str):
-                sent = self._str2chunktree(sent_str, source_tagset=self._source_tagset,
-                                           target_tagset=self._target_tagset)
+                sent = self._str2chunktree(
+                    sent_str,
+                    source_tagset=self._source_tagset,
+                    target_tagset=self._target_tagset,
+                )
  
                  # If requested, throw away the tags.
                  if not self._tagged:
@@ -208,5 +279,5 @@ class ChunkedCorpusView(StreamBackedCorpusView):
              elif isinstance(child, tuple):
                  tree[i] = child[0]
              else:
-                raise ValueError('expected child to be Tree or tuple')
+                raise ValueError("expected child to be Tree or tuple")
          return tree
diff --git a/nlp_resource_data/nltk/corpus/reader/chunked.pyc b/nlp_resource_data/nltk/corpus/reader/chunked.pyc

deleted file mode 100755 (executable)

index c004ebe..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/chunked.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/cmudict.py b/nlp_resource_data/nltk/corpus/reader/cmudict.py

old mode 100755 (executable)

new mode 100644 (file)

index 6009dad..ba1cdf9
--- a/nlp_resource_data/nltk/corpus/reader/cmudict.py
+++ b/nlp_resource_data/nltk/corpus/reader/cmudict.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Carnegie Mellon Pronouncing Dictionary Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -45,32 +45,31 @@ Y       yield   Y IY L D       Z       zee     Z IY
  ZH      seizure S IY ZH ER
  """
  
-import codecs
-
-from six import string_types
-
-from nltk import compat
  from nltk.util import Index
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class CMUDictCorpusReader(CorpusReader):
      def entries(self):
          """
          :return: the cmudict lexicon as a list of entries
          containing (word, transcriptions) tuples.
          """
-        return concat([StreamBackedCorpusView(fileid, read_cmudict_block,
-                                              encoding=enc)
-                       for fileid, enc in self.abspaths(None, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, read_cmudict_block, encoding=enc)
+                for fileid, enc in self.abspaths(None, True)
+            ]
+        )
  
      def raw(self):
          """
          :return: the cmudict lexicon as a raw string.
          """
          fileids = self._fileids
-        if isinstance(fileids, string_types):
+        if isinstance(fileids, str):
              fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
@@ -87,11 +86,13 @@ class CMUDictCorpusReader(CorpusReader):
          """
          return dict(Index(self.entries()))
  
+
  def read_cmudict_block(stream):
      entries = []
-    while len(entries) < 100: # Read 100 at a time.
+    while len(entries) < 100:  # Read 100 at a time.
          line = stream.readline()
-        if line == '': return entries # end of file.
+        if line == "":
+            return entries  # end of file.
          pieces = line.split()
-        entries.append( (pieces[0].lower(), pieces[2:]) )
+        entries.append((pieces[0].lower(), pieces[2:]))
      return entries
diff --git a/nlp_resource_data/nltk/corpus/reader/cmudict.pyc b/nlp_resource_data/nltk/corpus/reader/cmudict.pyc

deleted file mode 100755 (executable)

index 70b4973..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/cmudict.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/comparative_sents.py b/nlp_resource_data/nltk/corpus/reader/comparative_sents.py

old mode 100755 (executable)

new mode 100644 (file)

index 1d81049..9d6fcdb
--- a/nlp_resource_data/nltk/corpus/reader/comparative_sents.py
+++ b/nlp_resource_data/nltk/corpus/reader/comparative_sents.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Comparative Sentence Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -35,26 +35,33 @@ Related papers:
  """
  import re
  
-from six import string_types
-
  from nltk.corpus.reader.api import *
  from nltk.tokenize import *
  
  # Regular expressions for dataset components
-STARS = re.compile(r'^\*+$')
-COMPARISON = re.compile(r'<cs-[1234]>')
-CLOSE_COMPARISON = re.compile(r'</cs-[1234]>')
-GRAD_COMPARISON = re.compile(r'<cs-[123]>')
-NON_GRAD_COMPARISON = re.compile(r'<cs-4>')
+STARS = re.compile(r"^\*+$")
+COMPARISON = re.compile(r"<cs-[1234]>")
+CLOSE_COMPARISON = re.compile(r"</cs-[1234]>")
+GRAD_COMPARISON = re.compile(r"<cs-[123]>")
+NON_GRAD_COMPARISON = re.compile(r"<cs-4>")
  ENTITIES_FEATS = re.compile(r"(\d)_((?:[\.\w\s/-](?!\d_))+)")
-KEYWORD = re.compile(r'\((?!.*\()(.*)\)$')
+KEYWORD = re.compile(r"\((?!.*\()(.*)\)$")
+
  
  class Comparison(object):
      """
      A Comparison represents a comparative sentence and its constituents.
      """
-    def __init__(self, text=None, comp_type=None, entity_1=None, entity_2=None,
-                 feature=None, keyword=None):
+
+    def __init__(
+        self,
+        text=None,
+        comp_type=None,
+        entity_1=None,
+        entity_2=None,
+        feature=None,
+        keyword=None,
+    ):
          """
          :param text: a string (optionally tokenized) containing a comparation.
          :param comp_type: an integer defining the type of comparison expressed.
@@ -73,9 +80,18 @@ class Comparison(object):
          self.keyword = keyword
  
      def __repr__(self):
-        return ("Comparison(text=\"{}\", comp_type={}, entity_1=\"{}\", entity_2=\"{}\", "
-                "feature=\"{}\", keyword=\"{}\")").format(self.text, self.comp_type,
-                self.entity_1, self.entity_2, self.feature, self.keyword)
+        return (
+            'Comparison(text="{}", comp_type={}, entity_1="{}", entity_2="{}", '
+            'feature="{}", keyword="{}")'
+        ).format(
+            self.text,
+            self.comp_type,
+            self.entity_1,
+            self.entity_2,
+            self.feature,
+            self.keyword,
+        )
+
  
  class ComparativeSentencesCorpusReader(CorpusReader):
      """
@@ -94,10 +110,17 @@ class ComparativeSentencesCorpusReader(CorpusReader):
          >>> len(comparative_sentences.comparisons())
          853
      """
+
      CorpusView = StreamBackedCorpusView
  
-    def __init__(self, root, fileids, word_tokenizer=WhitespaceTokenizer(),
-                 sent_tokenizer=None, encoding='utf8'):
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=None,
+        encoding="utf8",
+    ):
          """
          :param root: The root directory for this corpus.
          :param fileids: a list or regexp specifying the fileids in this corpus.
@@ -122,10 +145,14 @@ class ComparativeSentencesCorpusReader(CorpusReader):
          """
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
-        return concat([self.CorpusView(path, self._read_comparison_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_comparison_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def keywords(self, fileids=None):
          """
@@ -136,11 +163,14 @@ class ComparativeSentencesCorpusReader(CorpusReader):
          :return: the set of keywords and comparative phrases used in the corpus.
          :rtype: set(str)
          """
-        all_keywords = concat([self.CorpusView(path, self._read_keyword_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
-
-        keywords_set = set([keyword.lower() for keyword in all_keywords if keyword])
+        all_keywords = concat(
+            [
+                self.CorpusView(path, self._read_keyword_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+        keywords_set = set(keyword.lower() for keyword in all_keywords if keyword)
          return keywords_set
  
      def keywords_readme(self):
@@ -165,7 +195,7 @@ class ComparativeSentencesCorpusReader(CorpusReader):
          """
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
@@ -185,8 +215,12 @@ class ComparativeSentencesCorpusReader(CorpusReader):
              strings, if no word tokenizer is specified).
          :rtype: list(list(str)) or list(str)
          """
-        return concat([self.CorpusView(path, self._read_sent_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def words(self, fileids=None):
          """
@@ -197,15 +231,18 @@ class ComparativeSentencesCorpusReader(CorpusReader):
          :return: the given file(s) as a list of words and punctuation symbols.
          :rtype: list(str)
          """
-        return concat([self.CorpusView(path, self._read_word_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def _read_comparison_block(self, stream):
          while True:
              line = stream.readline()
              if not line:
-                return [] # end of file.
+                return []  # end of file.
              comparison_tags = re.findall(COMPARISON, line)
              if comparison_tags:
                  grad_comparisons = re.findall(GRAD_COMPARISON, line)
@@ -222,17 +259,19 @@ class ComparativeSentencesCorpusReader(CorpusReader):
                  if grad_comparisons:
                      # Each comparison tag has its own relations on a separate line
                      for comp in grad_comparisons:
-                        comp_type = int(re.match(r'<cs-(\d)>', comp).group(1))
-                        comparison = Comparison(text=comparison_text, comp_type=comp_type)
+                        comp_type = int(re.match(r"<cs-(\d)>", comp).group(1))
+                        comparison = Comparison(
+                            text=comparison_text, comp_type=comp_type
+                        )
                          line = stream.readline()
                          entities_feats = ENTITIES_FEATS.findall(line)
                          if entities_feats:
                              for (code, entity_feat) in entities_feats:
-                                if code == '1':
+                                if code == "1":
                                      comparison.entity_1 = entity_feat.strip()
-                                elif code == '2':
+                                elif code == "2":
                                      comparison.entity_2 = entity_feat.strip()
-                                elif code == '3':
+                                elif code == "3":
                                      comparison.feature = entity_feat.strip()
                          keyword = KEYWORD.findall(line)
                          if keyword:
@@ -243,8 +282,10 @@ class ComparativeSentencesCorpusReader(CorpusReader):
                  if non_grad_comparisons:
                      for comp in non_grad_comparisons:
                          # comp_type in this case should always be 4.
-                        comp_type = int(re.match(r'<cs-(\d)>', comp).group(1))
-                        comparison = Comparison(text=comparison_text, comp_type=comp_type)
+                        comp_type = int(re.match(r"<cs-(\d)>", comp).group(1))
+                        comparison = Comparison(
+                            text=comparison_text, comp_type=comp_type
+                        )
                          comparison_bundle.append(comparison)
                  # Flatten the list of comparisons before returning them
                  # return concat([comparison_bundle])
@@ -265,11 +306,16 @@ class ComparativeSentencesCorpusReader(CorpusReader):
                      if re.match(STARS, line):
                          break
                  continue
-            if not re.findall(COMPARISON, line) and not ENTITIES_FEATS.findall(line) \
-            and not re.findall(CLOSE_COMPARISON, line):
+            if (
+                not re.findall(COMPARISON, line)
+                and not ENTITIES_FEATS.findall(line)
+                and not re.findall(CLOSE_COMPARISON, line)
+            ):
                  if self._sent_tokenizer:
-                    return [self._word_tokenizer.tokenize(sent)
-                        for sent in self._sent_tokenizer.tokenize(line)]
+                    return [
+                        self._word_tokenizer.tokenize(sent)
+                        for sent in self._sent_tokenizer.tokenize(line)
+                    ]
                  else:
                      return [self._word_tokenizer.tokenize(line)]
  
diff --git a/nlp_resource_data/nltk/corpus/reader/comparative_sents.pyc b/nlp_resource_data/nltk/corpus/reader/comparative_sents.pyc

deleted file mode 100755 (executable)

index 972ab0d..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/comparative_sents.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/conll.py b/nlp_resource_data/nltk/corpus/reader/conll.py

old mode 100755 (executable)

new mode 100644 (file)

index 34d559f..e138a1b
--- a/nlp_resource_data/nltk/corpus/reader/conll.py
+++ b/nlp_resource_data/nltk/corpus/reader/conll.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: CONLL Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -10,15 +10,8 @@
  Read CoNLL-style chunk fileids.
  """
  
-from __future__ import unicode_literals
-
-import os
-import codecs
  import textwrap
  
-from six import string_types
-
-from nltk import compat
  from nltk.tree import Tree
  from nltk.util import LazyMap, LazyConcatenation
  from nltk.tag import map_tag
@@ -26,6 +19,7 @@ from nltk.tag import map_tag
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class ConllCorpusReader(CorpusReader):
      """
      A corpus reader for CoNLL-style files.  These files consist of a
@@ -35,7 +29,11 @@ class ConllCorpusReader(CorpusReader):
      annotation type.  The set of columns used by CoNLL-style files can
      vary from corpus to corpus; the ``ConllCorpusReader`` constructor
      therefore takes an argument, ``columntypes``, which is used to
-    specify the columns that are used by a given corpus.
+    specify the columns that are used by a given corpus. By default
+    columns are split by consecutive whitespaces, with the
+    ``separator`` argument you can set a string to split by (e.g.
+    ``\'\t\'``).
+
  
      @todo: Add support for reading from corpora where different
          parallel files contain different columns.
@@ -48,50 +46,63 @@ class ConllCorpusReader(CorpusReader):
          document at a time (eg parsed_documents()).
      """
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Column Types
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
-    WORDS = 'words'   #: column type for words
-    POS = 'pos'       #: column type for part-of-speech tags
-    TREE = 'tree'     #: column type for parse trees
-    CHUNK = 'chunk'   #: column type for chunk structures
-    NE = 'ne'         #: column type for named entities
-    SRL = 'srl'       #: column type for semantic role labels
-    IGNORE = 'ignore' #: column type for column that should be ignored
+    WORDS = "words"  #: column type for words
+    POS = "pos"  #: column type for part-of-speech tags
+    TREE = "tree"  #: column type for parse trees
+    CHUNK = "chunk"  #: column type for chunk structures
+    NE = "ne"  #: column type for named entities
+    SRL = "srl"  #: column type for semantic role labels
+    IGNORE = "ignore"  #: column type for column that should be ignored
  
      #: A list of all column types supported by the conll corpus reader.
      COLUMN_TYPES = (WORDS, POS, TREE, CHUNK, NE, SRL, IGNORE)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Constructor
-    #/////////////////////////////////////////////////////////////////
-
-    def __init__(self, root, fileids, columntypes,
-                 chunk_types=None, root_label='S', pos_in_tree=False,
-                 srl_includes_roleset=True, encoding='utf8',
-                 tree_class=Tree, tagset=None):
+    # /////////////////////////////////////////////////////////////////
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        columntypes,
+        chunk_types=None,
+        root_label="S",
+        pos_in_tree=False,
+        srl_includes_roleset=True,
+        encoding="utf8",
+        tree_class=Tree,
+        tagset=None,
+        separator=None,
+    ):
          for columntype in columntypes:
              if columntype not in self.COLUMN_TYPES:
-                raise ValueError('Bad column type %r' % columntype)
-        if isinstance(chunk_types, string_types):
+                raise ValueError("Bad column type %r" % columntype)
+        if isinstance(chunk_types, str):
              chunk_types = [chunk_types]
          self._chunk_types = chunk_types
-        self._colmap = dict((c,i) for (i,c) in enumerate(columntypes))
+        self._colmap = dict((c, i) for (i, c) in enumerate(columntypes))
          self._pos_in_tree = pos_in_tree
-        self._root_label = root_label # for chunks
+        self._root_label = root_label  # for chunks
          self._srl_includes_roleset = srl_includes_roleset
          self._tree_class = tree_class
          CorpusReader.__init__(self, root, fileids, encoding)
          self._tagset = tagset
+        self.sep = separator
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Data Access Methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def words(self, fileids=None):
@@ -104,39 +115,48 @@ class ConllCorpusReader(CorpusReader):
  
      def tagged_words(self, fileids=None, tagset=None):
          self._require(self.WORDS, self.POS)
+
          def get_tagged_words(grid):
              return self._get_tagged_words(grid, tagset)
-        return LazyConcatenation(LazyMap(get_tagged_words,
-                                         self._grids(fileids)))
+
+        return LazyConcatenation(LazyMap(get_tagged_words, self._grids(fileids)))
  
      def tagged_sents(self, fileids=None, tagset=None):
          self._require(self.WORDS, self.POS)
+
          def get_tagged_words(grid):
              return self._get_tagged_words(grid, tagset)
+
          return LazyMap(get_tagged_words, self._grids(fileids))
  
-    def chunked_words(self, fileids=None, chunk_types=None,
-                      tagset=None):
+    def chunked_words(self, fileids=None, chunk_types=None, tagset=None):
          self._require(self.WORDS, self.POS, self.CHUNK)
-        if chunk_types is None: chunk_types = self._chunk_types
-        def get_chunked_words(grid): # capture chunk_types as local var
+        if chunk_types is None:
+            chunk_types = self._chunk_types
+
+        def get_chunked_words(grid):  # capture chunk_types as local var
              return self._get_chunked_words(grid, chunk_types, tagset)
-        return LazyConcatenation(LazyMap(get_chunked_words,
-                                         self._grids(fileids)))
  
-    def chunked_sents(self, fileids=None, chunk_types=None,
-                      tagset=None):
+        return LazyConcatenation(LazyMap(get_chunked_words, self._grids(fileids)))
+
+    def chunked_sents(self, fileids=None, chunk_types=None, tagset=None):
          self._require(self.WORDS, self.POS, self.CHUNK)
-        if chunk_types is None: chunk_types = self._chunk_types
-        def get_chunked_words(grid): # capture chunk_types as local var
+        if chunk_types is None:
+            chunk_types = self._chunk_types
+
+        def get_chunked_words(grid):  # capture chunk_types as local var
              return self._get_chunked_words(grid, chunk_types, tagset)
+
          return LazyMap(get_chunked_words, self._grids(fileids))
  
      def parsed_sents(self, fileids=None, pos_in_tree=None, tagset=None):
          self._require(self.WORDS, self.POS, self.TREE)
-        if pos_in_tree is None: pos_in_tree = self._pos_in_tree
-        def get_parsed_sent(grid): # capture pos_in_tree as local var
+        if pos_in_tree is None:
+            pos_in_tree = self._pos_in_tree
+
+        def get_parsed_sent(grid):  # capture pos_in_tree as local var
              return self._get_parsed_sent(grid, pos_in_tree, tagset)
+
          return LazyMap(get_parsed_sent, self._grids(fileids))
  
      def srl_spans(self, fileids=None):
@@ -145,11 +165,15 @@ class ConllCorpusReader(CorpusReader):
  
      def srl_instances(self, fileids=None, pos_in_tree=None, flatten=True):
          self._require(self.WORDS, self.POS, self.TREE, self.SRL)
-        if pos_in_tree is None: pos_in_tree = self._pos_in_tree
-        def get_srl_instances(grid): # capture pos_in_tree as local var
+        if pos_in_tree is None:
+            pos_in_tree = self._pos_in_tree
+
+        def get_srl_instances(grid):  # capture pos_in_tree as local var
              return self._get_srl_instances(grid, pos_in_tree)
+
          result = LazyMap(get_srl_instances, self._grids(fileids))
-        if flatten: result = LazyConcatenation(result)
+        if flatten:
+            result = LazyConcatenation(result)
          return result
  
      def iob_words(self, fileids=None, tagset=None):
@@ -160,8 +184,10 @@ class ConllCorpusReader(CorpusReader):
          :type fileids: None or str or list
          """
          self._require(self.WORDS, self.POS, self.CHUNK)
+
          def get_iob_words(grid):
              return self._get_iob_words(grid, tagset)
+
          return LazyConcatenation(LazyMap(get_iob_words, self._grids(fileids)))
  
      def iob_sents(self, fileids=None, tagset=None):
@@ -172,91 +198,101 @@ class ConllCorpusReader(CorpusReader):
          :type fileids: None or str or list
          """
          self._require(self.WORDS, self.POS, self.CHUNK)
+
          def get_iob_words(grid):
              return self._get_iob_words(grid, tagset)
+
          return LazyMap(get_iob_words, self._grids(fileids))
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Grid Reading
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _grids(self, fileids=None):
          # n.b.: we could cache the object returned here (keyed on
          # fileids), which would let us reuse the same corpus view for
          # different things (eg srl and parse trees).
-        return concat([StreamBackedCorpusView(fileid, self._read_grid_block,
-                                              encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_grid_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def _read_grid_block(self, stream):
          grids = []
          for block in read_blankline_block(stream):
              block = block.strip()
-            if not block: continue
+            if not block:
+                continue
  
-            grid = [line.split() for line in block.split('\n')]
+            grid = [line.split(self.sep) for line in block.split("\n")]
  
              # If there's a docstart row, then discard. ([xx] eventually it
              # would be good to actually use it)
-            if grid[0][self._colmap.get('words', 0)] == '-DOCSTART-':
+            if grid[0][self._colmap.get("words", 0)] == "-DOCSTART-":
                  del grid[0]
  
              # Check that the grid is consistent.
              for row in grid:
                  if len(row) != len(grid[0]):
-                    raise ValueError('Inconsistent number of columns:\n%s'
-                                     % block)
+                    raise ValueError("Inconsistent number of columns:\n%s" % block)
              grids.append(grid)
          return grids
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Transforms
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # given a grid, transform it into some representation (e.g.,
      # a list of words or a parse tree).
  
      def _get_words(self, grid):
-        return self._get_column(grid, self._colmap['words'])
+        return self._get_column(grid, self._colmap["words"])
  
      def _get_tagged_words(self, grid, tagset=None):
-        pos_tags = self._get_column(grid, self._colmap['pos'])
+        pos_tags = self._get_column(grid, self._colmap["pos"])
          if tagset and tagset != self._tagset:
              pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
-        return list(zip(self._get_column(grid, self._colmap['words']), pos_tags))
+        return list(zip(self._get_column(grid, self._colmap["words"]), pos_tags))
  
      def _get_iob_words(self, grid, tagset=None):
-        pos_tags = self._get_column(grid, self._colmap['pos'])
+        pos_tags = self._get_column(grid, self._colmap["pos"])
          if tagset and tagset != self._tagset:
              pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
-        return list(zip(self._get_column(grid, self._colmap['words']), pos_tags,
-                   self._get_column(grid, self._colmap['chunk'])))
+        return list(
+            zip(
+                self._get_column(grid, self._colmap["words"]),
+                pos_tags,
+                self._get_column(grid, self._colmap["chunk"]),
+            )
+        )
  
      def _get_chunked_words(self, grid, chunk_types, tagset=None):
          # n.b.: this method is very similar to conllstr2tree.
-        words = self._get_column(grid, self._colmap['words'])
-        pos_tags = self._get_column(grid, self._colmap['pos'])
+        words = self._get_column(grid, self._colmap["words"])
+        pos_tags = self._get_column(grid, self._colmap["pos"])
          if tagset and tagset != self._tagset:
              pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
-        chunk_tags = self._get_column(grid, self._colmap['chunk'])
+        chunk_tags = self._get_column(grid, self._colmap["chunk"])
  
          stack = [Tree(self._root_label, [])]
  
          for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
-            if chunk_tag == 'O':
-                state, chunk_type = 'O', ''
+            if chunk_tag == "O":
+                state, chunk_type = "O", ""
              else:
-                (state, chunk_type) = chunk_tag.split('-')
+                (state, chunk_type) = chunk_tag.split("-")
              # If it's a chunk we don't care about, treat it as O.
              if chunk_types is not None and chunk_type not in chunk_types:
-                state = 'O'
+                state = "O"
              # Treat a mismatching I like a B.
-            if state == 'I' and chunk_type != stack[-1].label():
-                state = 'B'
+            if state == "I" and chunk_type != stack[-1].label():
+                state = "B"
              # For B or I: close any open chunks
-            if state in 'BO' and len(stack) == 2:
+            if state in "BO" and len(stack) == 2:
                  stack.pop()
              # For B: start a new chunk.
-            if state == 'B':
+            if state == "B":
                  new_chunk = Tree(chunk_type, [])
                  stack[-1].append(new_chunk)
                  stack.append(new_chunk)
@@ -266,32 +302,38 @@ class ConllCorpusReader(CorpusReader):
          return stack[0]
  
      def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
-        words = self._get_column(grid, self._colmap['words'])
-        pos_tags = self._get_column(grid, self._colmap['pos'])
+        words = self._get_column(grid, self._colmap["words"])
+        pos_tags = self._get_column(grid, self._colmap["pos"])
          if tagset and tagset != self._tagset:
              pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
-        parse_tags = self._get_column(grid, self._colmap['tree'])
+        parse_tags = self._get_column(grid, self._colmap["tree"])
  
-        treestr = ''
+        treestr = ""
          for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
-            if word == '(': word = '-LRB-'
-            if word == ')': word = '-RRB-'
-            if pos_tag == '(': pos_tag = '-LRB-'
-            if pos_tag == ')': pos_tag = '-RRB-'
-            (left, right) = parse_tag.split('*')
-            right = right.count(')')*')' # only keep ')'.
-            treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
+            if word == "(":
+                word = "-LRB-"
+            if word == ")":
+                word = "-RRB-"
+            if pos_tag == "(":
+                pos_tag = "-LRB-"
+            if pos_tag == ")":
+                pos_tag = "-RRB-"
+            (left, right) = parse_tag.split("*")
+            right = right.count(")") * ")"  # only keep ')'.
+            treestr += "%s (%s %s) %s" % (left, pos_tag, word, right)
          try:
              tree = self._tree_class.fromstring(treestr)
          except (ValueError, IndexError):
-            tree = self._tree_class.fromstring('(%s %s)' %
-                                          (self._root_label, treestr))
+            tree = self._tree_class.fromstring("(%s %s)" % (self._root_label, treestr))
  
          if not pos_in_tree:
              for subtree in tree.subtrees():
                  for i, child in enumerate(subtree):
-                    if (isinstance(child, Tree) and len(child)==1 and
-                        isinstance(child[0], string_types)):
+                    if (
+                        isinstance(child, Tree)
+                        and len(child) == 1
+                        and isinstance(child[0], str)
+                    ):
                          subtree[i] = (child[0], child.label())
  
          return tree
@@ -301,29 +343,29 @@ class ConllCorpusReader(CorpusReader):
          list of list of (start, end), tag) tuples
          """
          if self._srl_includes_roleset:
-            predicates = self._get_column(grid, self._colmap['srl']+1)
-            start_col = self._colmap['srl']+2
+            predicates = self._get_column(grid, self._colmap["srl"] + 1)
+            start_col = self._colmap["srl"] + 2
          else:
-            predicates = self._get_column(grid, self._colmap['srl'])
-            start_col = self._colmap['srl']+1
+            predicates = self._get_column(grid, self._colmap["srl"])
+            start_col = self._colmap["srl"] + 1
  
          # Count how many predicates there are.  This tells us how many
          # columns to expect for SRL data.
-        num_preds = len([p for p in predicates if p != '-'])
+        num_preds = len([p for p in predicates if p != "-"])
  
          spanlists = []
          for i in range(num_preds):
-            col = self._get_column(grid, start_col+i)
+            col = self._get_column(grid, start_col + i)
              spanlist = []
              stack = []
              for wordnum, srl_tag in enumerate(col):
-                (left, right) = srl_tag.split('*')
-                for tag in left.split('('):
+                (left, right) = srl_tag.split("*")
+                for tag in left.split("("):
                      if tag:
                          stack.append((tag, wordnum))
-                for i in range(right.count(')')):
+                for i in range(right.count(")")):
                      (tag, start) = stack.pop()
-                    spanlist.append( ((start, wordnum+1), tag) )
+                    spanlist.append(((start, wordnum + 1), tag))
              spanlists.append(spanlist)
  
          return spanlists
@@ -332,52 +374,56 @@ class ConllCorpusReader(CorpusReader):
          tree = self._get_parsed_sent(grid, pos_in_tree)
          spanlists = self._get_srl_spans(grid)
          if self._srl_includes_roleset:
-            predicates = self._get_column(grid, self._colmap['srl']+1)
-            rolesets = self._get_column(grid, self._colmap['srl'])
+            predicates = self._get_column(grid, self._colmap["srl"] + 1)
+            rolesets = self._get_column(grid, self._colmap["srl"])
          else:
-            predicates = self._get_column(grid, self._colmap['srl'])
+            predicates = self._get_column(grid, self._colmap["srl"])
              rolesets = [None] * len(predicates)
  
          instances = ConllSRLInstanceList(tree)
          for wordnum, predicate in enumerate(predicates):
-            if predicate == '-': continue
+            if predicate == "-":
+                continue
              # Decide which spanlist to use.  Don't assume that they're
              # sorted in the same order as the predicates (even though
              # they usually are).
              for spanlist in spanlists:
                  for (start, end), tag in spanlist:
-                    if wordnum in range(start,end) and tag in ('V', 'C-V'):
+                    if wordnum in range(start, end) and tag in ("V", "C-V"):
                          break
-                else: continue
+                else:
+                    continue
                  break
              else:
-                raise ValueError('No srl column found for %r' % predicate)
-            instances.append(ConllSRLInstance(tree, wordnum, predicate,
-                                              rolesets[wordnum], spanlist))
+                raise ValueError("No srl column found for %r" % predicate)
+            instances.append(
+                ConllSRLInstance(tree, wordnum, predicate, rolesets[wordnum], spanlist)
+            )
  
          return instances
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Helper Methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _require(self, *columntypes):
          for columntype in columntypes:
              if columntype not in self._colmap:
-                raise ValueError('This corpus does not contain a %s '
-                                 'column.' % columntype)
+                raise ValueError(
+                    "This corpus does not contain a %s " "column." % columntype
+                )
  
      @staticmethod
      def _get_column(grid, column_index):
          return [grid[i][column_index] for i in range(len(grid))]
  
  
-@compat.python_2_unicode_compatible
  class ConllSRLInstance(object):
      """
      An SRL instance from a CoNLL corpus, which identifies and
      providing labels for the arguments of a single verb.
      """
+
      # [xx] add inst.core_arguments, inst.argm_arguments?
  
      def __init__(self, tree, verb_head, verb_stem, roleset, tagged_spans):
@@ -417,36 +463,44 @@ class ConllSRLInstance(object):
  
          # Fill in the self.verb and self.arguments values.
          for (start, end), tag in tagged_spans:
-            if tag in ('V', 'C-V'):
+            if tag in ("V", "C-V"):
                  self.verb += list(range(start, end))
              else:
-                self.arguments.append( ((start, end), tag) )
+                self.arguments.append(((start, end), tag))
  
      def __repr__(self):
-        plural = len(self.arguments)!=1 and 's' or ''
-        return '<ConllSRLInstance for %r with %d argument%s>' % (
-            (self.verb_stem, len(self.arguments), plural))
+        # Originally, its:
+        ##plural = 's' if len(self.arguments) != 1 else ''
+        plural = "s" if len(self.arguments) != 1 else ""
+        return "<ConllSRLInstance for %r with %d argument%s>" % (
+            (self.verb_stem, len(self.arguments), plural)
+        )
  
      def pprint(self):
-        verbstr = ' '.join(self.words[i][0] for i in self.verb)
-        hdr = 'SRL for %r (stem=%r):\n' % (verbstr, self.verb_stem)
-        s = ''
+        verbstr = " ".join(self.words[i][0] for i in self.verb)
+        hdr = "SRL for %r (stem=%r):\n" % (verbstr, self.verb_stem)
+        s = ""
          for i, word in enumerate(self.words):
-            if isinstance(word, tuple): word = word[0]
+            if isinstance(word, tuple):
+                word = word[0]
              for (start, end), argid in self.arguments:
-                if i == start: s += '[%s ' % argid
-                if i == end: s += '] '
-            if i in self.verb: word = '<<%s>>' % word
-            s += word + ' '
-        return hdr + textwrap.fill(s.replace(' ]', ']'),
-                                   initial_indent='    ',
-                                   subsequent_indent='    ')
-
-@compat.python_2_unicode_compatible
+                if i == start:
+                    s += "[%s " % argid
+                if i == end:
+                    s += "] "
+            if i in self.verb:
+                word = "<<%s>>" % word
+            s += word + " "
+        return hdr + textwrap.fill(
+            s.replace(" ]", "]"), initial_indent="    ", subsequent_indent="    "
+        )
+
+
  class ConllSRLInstanceList(list):
      """
      Set of instances for a single sentence
      """
+
      def __init__(self, tree, instances=()):
          self.tree = tree
          list.__init__(self, instances)
@@ -458,66 +512,76 @@ class ConllSRLInstanceList(list):
          # Sanity check: trees should be the same
          for inst in self:
              if inst.tree != self.tree:
-                raise ValueError('Tree mismatch!')
+                raise ValueError("Tree mismatch!")
  
          # If desired, add trees:
          if include_tree:
              words = self.tree.leaves()
              pos = [None] * len(words)
-            synt = ['*'] * len(words)
+            synt = ["*"] * len(words)
              self._tree2conll(self.tree, 0, words, pos, synt)
  
-        s = ''
+        s = ""
          for i in range(len(words)):
              # optional tree columns
              if include_tree:
-                s += '%-20s ' % words[i]
-                s += '%-8s ' % pos[i]
-                s += '%15s*%-8s ' % tuple(synt[i].split('*'))
+                s += "%-20s " % words[i]
+                s += "%-8s " % pos[i]
+                s += "%15s*%-8s " % tuple(synt[i].split("*"))
  
              # verb head column
              for inst in self:
                  if i == inst.verb_head:
-                    s += '%-20s ' % inst.verb_stem
+                    s += "%-20s " % inst.verb_stem
                      break
              else:
-                s += '%-20s ' % '-'
+                s += "%-20s " % "-"
              # Remaining columns: self
              for inst in self:
-                argstr = '*'
+                argstr = "*"
                  for (start, end), argid in inst.tagged_spans:
-                    if i==start: argstr = '(%s%s' % (argid, argstr)
-                    if i==(end-1): argstr += ')'
-                s += '%-12s ' % argstr
-            s += '\n'
+                    if i == start:
+                        argstr = "(%s%s" % (argid, argstr)
+                    if i == (end - 1):
+                        argstr += ")"
+                s += "%-12s " % argstr
+            s += "\n"
          return s
  
      def _tree2conll(self, tree, wordnum, words, pos, synt):
          assert isinstance(tree, Tree)
-        if len(tree) == 1 and isinstance(tree[0], string_types):
+        if len(tree) == 1 and isinstance(tree[0], str):
              pos[wordnum] = tree.label()
              assert words[wordnum] == tree[0]
-            return wordnum+1
+            return wordnum + 1
          elif len(tree) == 1 and isinstance(tree[0], tuple):
              assert len(tree[0]) == 2
              pos[wordnum], pos[wordnum] = tree[0]
-            return wordnum+1
+            return wordnum + 1
          else:
-            synt[wordnum] = '(%s%s' % (tree.label(), synt[wordnum])
+            synt[wordnum] = "(%s%s" % (tree.label(), synt[wordnum])
              for child in tree:
-                wordnum = self._tree2conll(child, wordnum, words,
-                                                  pos, synt)
-            synt[wordnum-1] += ')'
+                wordnum = self._tree2conll(child, wordnum, words, pos, synt)
+            synt[wordnum - 1] += ")"
              return wordnum
  
+
  class ConllChunkCorpusReader(ConllCorpusReader):
      """
      A ConllCorpusReader whose data file contains three columns: words,
      pos, and chunk.
      """
-    def __init__(self, root, fileids, chunk_types, encoding='utf8',
-                 tagset=None):
+
+    def __init__(
+        self, root, fileids, chunk_types, encoding="utf8", tagset=None, separator=None
+    ):
          ConllCorpusReader.__init__(
-            self, root, fileids, ('words', 'pos', 'chunk'),
-            chunk_types=chunk_types, encoding=encoding,
-            tagset=tagset)
+            self,
+            root,
+            fileids,
+            ("words", "pos", "chunk"),
+            chunk_types=chunk_types,
+            encoding=encoding,
+            tagset=tagset,
+            separator=separator,
+        )
diff --git a/nlp_resource_data/nltk/corpus/reader/conll.pyc b/nlp_resource_data/nltk/corpus/reader/conll.pyc

deleted file mode 100755 (executable)

index 4b5866a..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/conll.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/crubadan.py b/nlp_resource_data/nltk/corpus/reader/crubadan.py

old mode 100755 (executable)

new mode 100644 (file)

index 84f603e..1831236
--- a/nlp_resource_data/nltk/corpus/reader/crubadan.py
+++ b/nlp_resource_data/nltk/corpus/reader/crubadan.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: An Crubadan N-grams Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Avital Pekker <avital.pekker@utoronto.ca>
  #
  # URL: <http://nltk.org/>
@@ -19,98 +19,89 @@ For details about An Crubadan, this data, and its potential uses, see:
  http://borel.slu.edu/crubadan/index.html
  """
  
-from __future__ import print_function, unicode_literals
-
  import re
-from nltk.compat import PY3
  from os import path
+
  from nltk.corpus.reader import CorpusReader
  from nltk.probability import FreqDist
  from nltk.data import ZipFilePathPointer
  
+
  class CrubadanCorpusReader(CorpusReader):
      """
      A corpus reader used to access language An Crubadan n-gram files.
      """
-    
-    _LANG_MAPPER_FILE = 'table.txt'
+
+    _LANG_MAPPER_FILE = "table.txt"
      _all_lang_freq = {}
-    
-    def __init__(self, root, fileids, encoding='utf8', tagset=None):
-        super(CrubadanCorpusReader, self).__init__(root, fileids, encoding='utf8')
+
+    def __init__(self, root, fileids, encoding="utf8", tagset=None):
+        super(CrubadanCorpusReader, self).__init__(root, fileids, encoding="utf8")
          self._lang_mapping_data = []
          self._load_lang_mapping_data()
-        
+
      def lang_freq(self, lang):
-        ''' Return n-gram FreqDist for a specific language
-            given ISO 639-3 language code '''
-        
+        """ Return n-gram FreqDist for a specific language
+            given ISO 639-3 language code """
+
          if lang not in self._all_lang_freq:
              self._all_lang_freq[lang] = self._load_lang_ngrams(lang)
  
          return self._all_lang_freq[lang]
-    
+
      def langs(self):
-        ''' Return a list of supported languages as ISO 639-3 codes '''
+        """ Return a list of supported languages as ISO 639-3 codes """
          return [row[1] for row in self._lang_mapping_data]
-            
+
      def iso_to_crubadan(self, lang):
-        ''' Return internal Crubadan code based on ISO 639-3 code '''
+        """ Return internal Crubadan code based on ISO 639-3 code """
          for i in self._lang_mapping_data:
              if i[1].lower() == lang.lower():
                  return i[0]
-    
+
      def crubadan_to_iso(self, lang):
-        ''' Return ISO 639-3 code given internal Crubadan code '''
+        """ Return ISO 639-3 code given internal Crubadan code """
          for i in self._lang_mapping_data:
              if i[0].lower() == lang.lower():
                  return i[1]
-    
+
      def _load_lang_mapping_data(self):
-        ''' Load language mappings between codes and description from table.txt '''
+        """ Load language mappings between codes and description from table.txt """
          if isinstance(self.root, ZipFilePathPointer):
-            raise RuntimeError("Please install the 'crubadan' corpus first, use nltk.download()")
-        
+            raise RuntimeError(
+                "Please install the 'crubadan' corpus first, use nltk.download()"
+            )
+
          mapper_file = path.join(self.root, self._LANG_MAPPER_FILE)
          if self._LANG_MAPPER_FILE not in self.fileids():
              raise RuntimeError("Could not find language mapper file: " + mapper_file)
  
-        if PY3:
-            raw = open(mapper_file, 'r', encoding='utf-8').read().strip()
-        else:
-            raw = open(mapper_file, 'rU').read().decode('utf-8').strip()
+        raw = open(mapper_file, "r", encoding="utf-8").read().strip()
+
+        self._lang_mapping_data = [row.split("\t") for row in raw.split("\n")]
  
-        self._lang_mapping_data = [row.split('\t') for row in raw.split('\n')]
-        
      def _load_lang_ngrams(self, lang):
-        ''' Load single n-gram language file given the ISO 639-3 language code
-            and return its FreqDist '''
+        """ Load single n-gram language file given the ISO 639-3 language code
+            and return its FreqDist """
  
          if lang not in self.langs():
              raise RuntimeError("Unsupported language.")
  
          crubadan_code = self.iso_to_crubadan(lang)
-        ngram_file = path.join(self.root, crubadan_code + '-3grams.txt')
+        ngram_file = path.join(self.root, crubadan_code + "-3grams.txt")
  
          if not path.isfile(ngram_file):
              raise RuntimeError("No N-gram file found for requested language.")
  
          counts = FreqDist()
-        if PY3:
-            f = open(ngram_file, 'r', encoding='utf-8')
-        else:
-            f = open(ngram_file, 'rU')
+        f = open(ngram_file, "r", encoding="utf-8")
  
          for line in f:
-            if PY3:
-                data = line.split(' ')
-            else:
-                data = line.decode('utf8').split(' ')
+            data = line.split(" ")
  
-            ngram = data[1].strip('\n')
+            ngram = data[1].strip("\n")
              freq = int(data[0])
-            
+
              counts[ngram] = freq
-            
+
          return counts
-        
diff --git a/nlp_resource_data/nltk/corpus/reader/crubadan.pyc b/nlp_resource_data/nltk/corpus/reader/crubadan.pyc

deleted file mode 100755 (executable)

index ed7be9e..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/crubadan.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/dependency.py b/nlp_resource_data/nltk/corpus/reader/dependency.py

old mode 100755 (executable)

new mode 100644 (file)

index c8a3a39..4314fbd
--- a/nlp_resource_data/nltk/corpus/reader/dependency.py
+++ b/nlp_resource_data/nltk/corpus/reader/dependency.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Dependency Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Kepa Sarasola <kepa.sarasola@ehu.es>
  #         Iker Manterola <returntothehangar@hotmail.com>
  #
@@ -15,13 +15,19 @@ from nltk.tokenize import *
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
-class DependencyCorpusReader(SyntaxCorpusReader):
-
-    def __init__(self, root, fileids, encoding='utf8',
-                 word_tokenizer=TabTokenizer(),
-                 sent_tokenizer=RegexpTokenizer('\n', gaps=True),
-                 para_block_reader=read_blankline_block):
  
+class DependencyCorpusReader(SyntaxCorpusReader):
+    def __init__(
+        self,
+        root,
+        fileids,
+        encoding="utf8",
+        word_tokenizer=TabTokenizer(),
+        sent_tokenizer=RegexpTokenizer("\n", gaps=True),
+        para_block_reader=read_blankline_block,
+    ):
+        # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
+        #       from CorpusReader?
          CorpusReader.__init__(self, root, fileids, encoding)
  
      #########################################################
@@ -41,32 +47,59 @@ class DependencyCorpusReader(SyntaxCorpusReader):
          return concat(result)
  
      def words(self, fileids=None):
-        return concat([DependencyCorpusView(fileid, False, False, False, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, include_encoding=True)])
+        return concat(
+            [
+                DependencyCorpusView(fileid, False, False, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
  
      def tagged_words(self, fileids=None):
-        return concat([DependencyCorpusView(fileid, True, False, False, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, include_encoding=True)])
+        return concat(
+            [
+                DependencyCorpusView(fileid, True, False, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
  
      def sents(self, fileids=None):
-        return concat([DependencyCorpusView(fileid, False, True, False, encoding=enc)
-                       for fileid, enc in self.abspaths(fileids, include_encoding=True)])
+        return concat(
+            [
+                DependencyCorpusView(fileid, False, True, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
  
      def tagged_sents(self, fileids=None):
-            return concat([DependencyCorpusView(fileid, True, True, False, encoding=enc)
-                           for fileid, enc in self.abspaths(fileids, include_encoding=True)])
+        return concat(
+            [
+                DependencyCorpusView(fileid, True, True, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
  
      def parsed_sents(self, fileids=None):
-        sents=concat([DependencyCorpusView(fileid, False, True, True, encoding=enc)
-                      for fileid, enc in self.abspaths(fileids, include_encoding=True)])
+        sents = concat(
+            [
+                DependencyCorpusView(fileid, False, True, True, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
          return [DependencyGraph(sent) for sent in sents]
  
  
  class DependencyCorpusView(StreamBackedCorpusView):
-    _DOCSTART = '-DOCSTART- -DOCSTART- O\n' #dokumentu hasiera definitzen da
-
-    def __init__(self, corpus_file, tagged, group_by_sent, dependencies,
-                 chunk_types=None, encoding='utf8'):
+    _DOCSTART = "-DOCSTART- -DOCSTART- O\n"  # dokumentu hasiera definitzen da
+
+    def __init__(
+        self,
+        corpus_file,
+        tagged,
+        group_by_sent,
+        dependencies,
+        chunk_types=None,
+        encoding="utf8",
+    ):
          self._tagged = tagged
          self._dependencies = dependencies
          self._group_by_sent = group_by_sent
@@ -78,17 +111,17 @@ class DependencyCorpusView(StreamBackedCorpusView):
          sent = read_blankline_block(stream)[0].strip()
          # Strip off the docstart marker, if present.
          if sent.startswith(self._DOCSTART):
-            sent = sent[len(self._DOCSTART):].lstrip()
+            sent = sent[len(self._DOCSTART) :].lstrip()
  
          # extract word and tag from any of the formats
          if not self._dependencies:
-            lines = [line.split('\t') for line in sent.split('\n')]
+            lines = [line.split("\t") for line in sent.split("\n")]
              if len(lines[0]) == 3 or len(lines[0]) == 4:
                  sent = [(line[0], line[1]) for line in lines]
              elif len(lines[0]) == 10:
                  sent = [(line[1], line[4]) for line in lines]
              else:
-                raise ValueError('Unexpected number of fields in dependency tree file')
+                raise ValueError("Unexpected number of fields in dependency tree file")
  
              # discard tags if they weren't requested
              if not self._tagged:
diff --git a/nlp_resource_data/nltk/corpus/reader/dependency.pyc b/nlp_resource_data/nltk/corpus/reader/dependency.pyc

deleted file mode 100755 (executable)

index 8047c8c..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/dependency.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/framenet.py b/nlp_resource_data/nltk/corpus/reader/framenet.py

old mode 100755 (executable)

new mode 100644 (file)

index 344efb4..4eaa6d1
--- a/nlp_resource_data/nltk/corpus/reader/framenet.py
+++ b/nlp_resource_data/nltk/corpus/reader/framenet.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Framenet Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Chuck Wooters <wooters@icsi.berkeley.edu>,
  #          Nathan Schneider <nathan.schneider@georgetown.edu>
  # URL: <http://nltk.org/>
@@ -10,49 +10,50 @@
  """
  Corpus reader for the FrameNet 1.7 lexicon and corpus.
  """
-from __future__ import print_function, unicode_literals
  
-import os, sys
+import os
  import re
  import textwrap
  import itertools
+import sys
  import types
+from collections import defaultdict, OrderedDict
+from operator import itemgetter
+from itertools import zip_longest
  
-from six import string_types, text_type
-from six.moves import zip_longest
+from pprint import pprint
  
-from collections import defaultdict, OrderedDict
-from pprint import pprint, pformat
-from nltk.internals import ElementWrapper
  from nltk.corpus.reader import XMLCorpusReader, XMLCorpusView
-from nltk.compat import python_2_unicode_compatible
-from nltk.util import AbstractLazySequence, LazyConcatenation, LazyMap, LazyIteratorList
  
-__docformat__ = 'epytext en'
+from nltk.util import LazyConcatenation, LazyMap, LazyIteratorList
+
+__docformat__ = "epytext en"
+
  
  def mimic_wrap(lines, wrap_at=65, **kwargs):
      """
      Wrap the first of 'lines' with textwrap and the remaining lines at exactly the same
      positions as the first.
      """
-    l0 = textwrap.fill(lines[0], wrap_at, drop_whitespace=False).split('\n')
+    l0 = textwrap.fill(lines[0], wrap_at, drop_whitespace=False).split("\n")
      yield l0
  
      def _(line):
          il0 = 0
-        while line and il0<len(l0)-1:
-            yield line[:len(l0[il0])]
-            line = line[len(l0[il0]):]
+        while line and il0 < len(l0) - 1:
+            yield line[: len(l0[il0])]
+            line = line[len(l0[il0]) :]
              il0 += 1
-        if line: # Remaining stuff on this line past the end of the mimicked line.
+        if line:  # Remaining stuff on this line past the end of the mimicked line.
              # So just textwrap this line.
-            for ln in textwrap.fill(line, wrap_at, drop_whitespace=False).split('\n'):
+            for ln in textwrap.fill(line, wrap_at, drop_whitespace=False).split("\n"):
                  yield ln
  
      for l in lines[1:]:
          yield list(_(l))
  
-def _pretty_longstring(defstr, prefix='', wrap_at=65):
+
+def _pretty_longstring(defstr, prefix="", wrap_at=65):
  
      """
      Helper function for pretty-printing a long string.
@@ -64,10 +65,11 @@ def _pretty_longstring(defstr, prefix='', wrap_at=65):
      """
  
      outstr = ""
-    for line in textwrap.fill(defstr, wrap_at).split('\n'):
-        outstr += prefix + line + '\n'
+    for line in textwrap.fill(defstr, wrap_at).split("\n"):
+        outstr += prefix + line + "\n"
      return outstr
  
+
  def _pretty_any(obj):
  
      """
@@ -81,15 +83,16 @@ def _pretty_any(obj):
  
      outstr = ""
      for k in obj:
-        if isinstance(obj[k], string_types) and len(obj[k]) > 65:
+        if isinstance(obj[k], str) and len(obj[k]) > 65:
              outstr += "[{0}]\n".format(k)
-            outstr += "{0}".format(_pretty_longstring(obj[k], prefix='  '))
-            outstr += '\n'
+            outstr += "{0}".format(_pretty_longstring(obj[k], prefix="  "))
+            outstr += "\n"
          else:
              outstr += "[{0}] {1}\n".format(k, obj[k])
  
      return outstr
  
+
  def _pretty_semtype(st):
  
      """
@@ -102,24 +105,30 @@ def _pretty_semtype(st):
      """
  
      semkeys = st.keys()
-    if len(semkeys) == 1: return "<None>"
+    if len(semkeys) == 1:
+        return "<None>"
  
      outstr = ""
      outstr += "semantic type ({0.ID}): {0.name}\n".format(st)
-    if 'abbrev' in semkeys:
+    if "abbrev" in semkeys:
          outstr += "[abbrev] {0}\n".format(st.abbrev)
-    if 'definition' in semkeys:
+    if "definition" in semkeys:
          outstr += "[definition]\n"
-        outstr += _pretty_longstring(st.definition,'  ')
+        outstr += _pretty_longstring(st.definition, "  ")
      outstr += "[rootType] {0}({1})\n".format(st.rootType.name, st.rootType.ID)
      if st.superType is None:
          outstr += "[superType] <None>\n"
      else:
          outstr += "[superType] {0}({1})\n".format(st.superType.name, st.superType.ID)
      outstr += "[subTypes] {0} subtypes\n".format(len(st.subTypes))
-    outstr += "  " + ", ".join('{0}({1})'.format(x.name, x.ID) for x in st.subTypes) + '\n'*(len(st.subTypes)>0)
+    outstr += (
+        "  "
+        + ", ".join("{0}({1})".format(x.name, x.ID) for x in st.subTypes)
+        + "\n" * (len(st.subTypes) > 0)
+    )
      return outstr
  
+
  def _pretty_frame_relation_type(freltyp):
  
      """
@@ -130,9 +139,12 @@ def _pretty_frame_relation_type(freltyp):
      :return: A nicely formated string representation of the frame relation type.
      :rtype: str
      """
-    outstr = "<frame relation type ({0.ID}): {0.superFrameName} -- {0.name} -> {0.subFrameName}>".format(freltyp)
+    outstr = "<frame relation type ({0.ID}): {0.superFrameName} -- {0.name} -> {0.subFrameName}>".format(
+        freltyp
+    )
      return outstr
  
+
  def _pretty_frame_relation(frel):
  
      """
@@ -143,9 +155,12 @@ def _pretty_frame_relation(frel):
      :return: A nicely formated string representation of the frame relation.
      :rtype: str
      """
-    outstr = "<{0.type.superFrameName}={0.superFrameName} -- {0.type.name} -> {0.type.subFrameName}={0.subFrameName}>".format(frel)
+    outstr = "<{0.type.superFrameName}={0.superFrameName} -- {0.type.name} -> {0.type.subFrameName}={0.subFrameName}>".format(
+        frel
+    )
      return outstr
  
+
  def _pretty_fe_relation(ferel):
  
      """
@@ -156,9 +171,12 @@ def _pretty_fe_relation(ferel):
      :return: A nicely formated string representation of the FE relation.
      :rtype: str
      """
-    outstr = "<{0.type.superFrameName}={0.frameRelation.superFrameName}.{0.superFEName} -- {0.type.name} -> {0.type.subFrameName}={0.frameRelation.subFrameName}.{0.subFEName}>".format(ferel)
+    outstr = "<{0.type.superFrameName}={0.frameRelation.superFrameName}.{0.superFEName} -- {0.type.name} -> {0.type.subFrameName}={0.frameRelation.subFrameName}.{0.subFEName}>".format(
+        ferel
+    )
      return outstr
  
+
  def _pretty_lu(lu):
  
      """
@@ -173,36 +191,47 @@ def _pretty_lu(lu):
      lukeys = lu.keys()
      outstr = ""
      outstr += "lexical unit ({0.ID}): {0.name}\n\n".format(lu)
-    if 'definition' in lukeys:
+    if "definition" in lukeys:
          outstr += "[definition]\n"
-        outstr += _pretty_longstring(lu.definition,'  ')
-    if 'frame' in lukeys:
-        outstr += "\n[frame] {0}({1})\n".format(lu.frame.name,lu.frame.ID)
-    if 'incorporatedFE' in lukeys:
+        outstr += _pretty_longstring(lu.definition, "  ")
+    if "frame" in lukeys:
+        outstr += "\n[frame] {0}({1})\n".format(lu.frame.name, lu.frame.ID)
+    if "incorporatedFE" in lukeys:
          outstr += "\n[incorporatedFE] {0}\n".format(lu.incorporatedFE)
-    if 'POS' in lukeys:
+    if "POS" in lukeys:
          outstr += "\n[POS] {0}\n".format(lu.POS)
-    if 'status' in lukeys:
+    if "status" in lukeys:
          outstr += "\n[status] {0}\n".format(lu.status)
-    if 'totalAnnotated' in lukeys:
-        outstr += "\n[totalAnnotated] {0} annotated examples\n".format(lu.totalAnnotated)
-    if 'lexemes' in lukeys:
-        outstr += "\n[lexemes] {0}\n".format(' '.join('{0}/{1}'.format(lex.name,lex.POS) for lex in lu.lexemes))
-    if 'semTypes' in lukeys:
+    if "totalAnnotated" in lukeys:
+        outstr += "\n[totalAnnotated] {0} annotated examples\n".format(
+            lu.totalAnnotated
+        )
+    if "lexemes" in lukeys:
+        outstr += "\n[lexemes] {0}\n".format(
+            " ".join("{0}/{1}".format(lex.name, lex.POS) for lex in lu.lexemes)
+        )
+    if "semTypes" in lukeys:
          outstr += "\n[semTypes] {0} semantic types\n".format(len(lu.semTypes))
-        outstr += "  "*(len(lu.semTypes)>0) + ", ".join('{0}({1})'.format(x.name, x.ID) for x in lu.semTypes) + '\n'*(len(lu.semTypes)>0)
-    if 'URL' in lukeys:
+        outstr += (
+            "  " * (len(lu.semTypes) > 0)
+            + ", ".join("{0}({1})".format(x.name, x.ID) for x in lu.semTypes)
+            + "\n" * (len(lu.semTypes) > 0)
+        )
+    if "URL" in lukeys:
          outstr += "\n[URL] {0}\n".format(lu.URL)
-    if 'subCorpus' in lukeys:
+    if "subCorpus" in lukeys:
          subc = [x.name for x in lu.subCorpus]
          outstr += "\n[subCorpus] {0} subcorpora\n".format(len(lu.subCorpus))
-        for line in textwrap.fill(", ".join(sorted(subc)), 60).split('\n'):
+        for line in textwrap.fill(", ".join(sorted(subc)), 60).split("\n"):
              outstr += "  {0}\n".format(line)
-    if 'exemplars' in lukeys:
-        outstr += "\n[exemplars] {0} sentences across all subcorpora\n".format(len(lu.exemplars))
+    if "exemplars" in lukeys:
+        outstr += "\n[exemplars] {0} sentences across all subcorpora\n".format(
+            len(lu.exemplars)
+        )
  
      return outstr
  
+
  def _pretty_exemplars(exemplars, lu):
      """
      Helper function for pretty-printing a list of exemplar sentences for a lexical unit.
@@ -215,11 +244,12 @@ def _pretty_exemplars(exemplars, lu):
  
      outstr = ""
      outstr += "exemplar sentences for {0.name} in {0.frame.name}:\n\n".format(lu)
-    for i,sent in enumerate(exemplars):
+    for i, sent in enumerate(exemplars):
          outstr += "[{0}] {1}\n".format(i, sent.text)
      outstr += "\n"
      return outstr
  
+
  def _pretty_fulltext_sentences(sents):
      """
      Helper function for pretty-printing a list of annotated sentences for a full-text document.
@@ -232,13 +262,16 @@ def _pretty_fulltext_sentences(sents):
  
      outstr = ""
      outstr += "full-text document ({0.ID}) {0.name}:\n\n".format(sents)
-    outstr += "[corpid] {0.corpid}\n[corpname] {0.corpname}\n[description] {0.description}\n[URL] {0.URL}\n\n".format(sents)
+    outstr += "[corpid] {0.corpid}\n[corpname] {0.corpname}\n[description] {0.description}\n[URL] {0.URL}\n\n".format(
+        sents
+    )
      outstr += "[sentence]\n".format(sents)
-    for i,sent in enumerate(sents.sentence):
+    for i, sent in enumerate(sents.sentence):
          outstr += "[{0}] {1}\n".format(i, sent.text)
      outstr += "\n"
      return outstr
  
+
  def _pretty_fulltext_sentence(sent):
      """
      Helper function for pretty-printing an annotated sentence from a full-text document.
@@ -250,14 +283,17 @@ def _pretty_fulltext_sentence(sent):
      """
  
      outstr = ""
-    outstr += "full-text sentence ({0.ID}) in {1}:\n\n".format(sent, sent.doc.get('name',sent.doc.description))
+    outstr += "full-text sentence ({0.ID}) in {1}:\n\n".format(
+        sent, sent.doc.get("name", sent.doc.description)
+    )
      outstr += "\n[POS] {0} tags\n".format(len(sent.POS))
      outstr += "\n[POS_tagset] {0}\n\n".format(sent.POS_tagset)
      outstr += "[text] + [annotationSet]\n\n"
-    outstr += sent._ascii() # -> _annotation_ascii()
+    outstr += sent._ascii()  # -> _annotation_ascii()
      outstr += "\n"
      return outstr
  
+
  def _pretty_pos(aset):
      """
      Helper function for pretty-printing a sentence with its POS tags.
@@ -269,35 +305,42 @@ def _pretty_pos(aset):
      """
  
      outstr = ""
-    outstr += "POS annotation set ({0.ID}) {0.POS_tagset} in sentence {0.sent.ID}:\n\n".format(aset)
+    outstr += "POS annotation set ({0.ID}) {0.POS_tagset} in sentence {0.sent.ID}:\n\n".format(
+        aset
+    )
  
      # list the target spans and their associated aset index
      overt = sorted(aset.POS)
  
      sent = aset.sent
      s0 = sent.text
-    s1 = ''
-    s2 = ''
+    s1 = ""
+    s2 = ""
      i = 0
      adjust = 0
-    for j,k,lbl in overt:
-        assert j>=i,('Overlapping targets?',(j,k,lbl))
-        s1 += ' '*(j-i) + '-'*(k-j)
-        if len(lbl)>(k-j):
+    for j, k, lbl in overt:
+        assert j >= i, ("Overlapping targets?", (j, k, lbl))
+        s1 += " " * (j - i) + "-" * (k - j)
+        if len(lbl) > (k - j):
              # add space in the sentence to make room for the annotation index
-            amt = len(lbl)-(k-j)
-            s0 = s0[:k+adjust]+ '~'*amt + s0[k+adjust:] # '~' to prevent line wrapping
-            s1 = s1[:k+adjust]+ ' '*amt + s1[k+adjust:]
+            amt = len(lbl) - (k - j)
+            s0 = (
+                s0[: k + adjust] + "~" * amt + s0[k + adjust :]
+            )  # '~' to prevent line wrapping
+            s1 = s1[: k + adjust] + " " * amt + s1[k + adjust :]
              adjust += amt
-        s2 += ' '*(j-i) + lbl.ljust(k-j)
+        s2 += " " * (j - i) + lbl.ljust(k - j)
          i = k
  
      long_lines = [s0, s1, s2]
  
-    outstr += '\n\n'.join(map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' '))).replace('~',' ')
+    outstr += "\n\n".join(
+        map("\n".join, zip_longest(*mimic_wrap(long_lines), fillvalue=" "))
+    ).replace("~", " ")
      outstr += "\n"
      return outstr
  
+
  def _pretty_annotation(sent, aset_level=False):
      """
      Helper function for pretty-printing an exemplar sentence for a lexical unit.
@@ -313,19 +356,31 @@ def _pretty_annotation(sent, aset_level=False):
      sentkeys = sent.keys()
      outstr = "annotation set" if aset_level else "exemplar sentence"
      outstr += " ({0.ID}):\n".format(sent)
-    if aset_level: # TODO: any UNANN exemplars?
+    if aset_level:  # TODO: any UNANN exemplars?
          outstr += "\n[status] {0}\n".format(sent.status)
-    for k in ('corpID', 'docID', 'paragNo', 'sentNo', 'aPos'):
+    for k in ("corpID", "docID", "paragNo", "sentNo", "aPos"):
          if k in sentkeys:
              outstr += "[{0}] {1}\n".format(k, sent[k])
-    outstr += "\n[LU] ({0.ID}) {0.name} in {0.frame.name}\n".format(sent.LU) if sent.LU else '\n[LU] Not found!'
-    outstr += "\n[frame] ({0.ID}) {0.name}\n".format(sent.frame)    # redundant with above, but .frame is convenient
+    outstr += (
+        "\n[LU] ({0.ID}) {0.name} in {0.frame.name}\n".format(sent.LU)
+        if sent.LU
+        else "\n[LU] Not found!"
+    )
+    outstr += "\n[frame] ({0.ID}) {0.name}\n".format(
+        sent.frame
+    )  # redundant with above, but .frame is convenient
      if not aset_level:
-        outstr += "\n[annotationSet] {0} annotation sets\n".format(len(sent.annotationSet))
+        outstr += "\n[annotationSet] {0} annotation sets\n".format(
+            len(sent.annotationSet)
+        )
          outstr += "\n[POS] {0} tags\n".format(len(sent.POS))
          outstr += "\n[POS_tagset] {0}\n".format(sent.POS_tagset)
-    outstr += "\n[GF] {0} relation{1}\n".format(len(sent.GF), "s" if len(sent.GF)!=1 else "")
-    outstr += "\n[PT] {0} phrase{1}\n".format(len(sent.PT), "s" if len(sent.PT)!=1 else "")
+    outstr += "\n[GF] {0} relation{1}\n".format(
+        len(sent.GF), "s" if len(sent.GF) != 1 else ""
+    )
+    outstr += "\n[PT] {0} phrase{1}\n".format(
+        len(sent.PT), "s" if len(sent.PT) != 1 else ""
+    )
      """
      Special Layers
      --------------
@@ -352,10 +407,10 @@ def _pretty_annotation(sent, aset_level=False):
      Gov (governor), X. Gov and X always cooccur.
  
      >>> from nltk.corpus import framenet as fn
->>> def f(luRE, lyr, ignore=set()):
-...   for i,ex in enumerate(fn.exemplars(luRE)):
-...     if lyr in ex and ex[lyr] and set(zip(*ex[lyr])[2]) - ignore:
-...       print(i,ex[lyr])
+    >>> def f(luRE, lyr, ignore=set()):
+    ...   for i,ex in enumerate(fn.exemplars(luRE)):
+    ...     if lyr in ex and ex[lyr] and set(zip(*ex[lyr])[2]) - ignore:
+    ...       print(i,ex[lyr])
  
      - Verb: Asp, Non-Asp
      - Noun: Cop, Supp, Ctrlr, Gov, X
@@ -365,202 +420,245 @@ def _pretty_annotation(sent, aset_level=False):
      - Scon: (none)
      - Art: (none)
      """
-    for lyr in ('NER', 'WSL', 'Other', 'Sent'):
+    for lyr in ("NER", "WSL", "Other", "Sent"):
          if lyr in sent and sent[lyr]:
-            outstr += "\n[{0}] {1} entr{2}\n".format(lyr, len(sent[lyr]), "ies" if len(sent[lyr])!=1 else "y")
+            outstr += "\n[{0}] {1} entr{2}\n".format(
+                lyr, len(sent[lyr]), "ies" if len(sent[lyr]) != 1 else "y"
+            )
      outstr += "\n[text] + [Target] + [FE]"
      # POS-specific layers: syntactically important words that are neither the target
      # nor the FEs. Include these along with the first FE layer but with '^' underlining.
-    for lyr in ('Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'):
+    for lyr in ("Verb", "Noun", "Adj", "Adv", "Prep", "Scon", "Art"):
          if lyr in sent and sent[lyr]:
              outstr += " + [{0}]".format(lyr)
-    if 'FE2' in sentkeys:
+    if "FE2" in sentkeys:
          outstr += " + [FE2]"
-        if 'FE3' in sentkeys:
+        if "FE3" in sentkeys:
              outstr += " + [FE3]"
      outstr += "\n\n"
-    outstr += sent._ascii() # -> _annotation_ascii()
+    outstr += sent._ascii()  # -> _annotation_ascii()
      outstr += "\n"
  
      return outstr
  
+
  def _annotation_ascii(sent):
-    '''
+    """
      Given a sentence or FE annotation set, construct the width-limited string showing
      an ASCII visualization of the sentence's annotations, calling either
      _annotation_ascii_frames() or _annotation_ascii_FEs() as appropriate.
      This will be attached as a method to appropriate AttrDict instances
      and called in the full pretty-printing of the instance.
-    '''
-    if sent._type=='fulltext_sentence' or ('annotationSet' in sent and len(sent.annotationSet)>2):
+    """
+    if sent._type == "fulltext_sentence" or (
+        "annotationSet" in sent and len(sent.annotationSet) > 2
+    ):
          # a full-text sentence OR sentence with multiple targets.
          # (multiple targets = >2 annotation sets, because the first annotation set is POS.)
          return _annotation_ascii_frames(sent)
-    else:   # an FE annotation set, or an LU sentence with 1 target
+    else:  # an FE annotation set, or an LU sentence with 1 target
          return _annotation_ascii_FEs(sent)
  
+
  def _annotation_ascii_frames(sent):
-    '''
+    """
      ASCII string rendering of the sentence along with its targets and frame names.
      Called for all full-text sentences, as well as the few LU sentences with multiple
      targets (e.g., fn.lu(6412).exemplars[82] has two want.v targets).
      Line-wrapped to limit the display width.
-    '''
+    """
      # list the target spans and their associated aset index
      overt = []
-    for a,aset in enumerate(sent.annotationSet[1:]):
-        for j,k in aset.Target:
-            indexS = "[{0}]".format(a+1)
-            if aset.status=='UNANN' or aset.LU.status=='Problem':
+    for a, aset in enumerate(sent.annotationSet[1:]):
+        for j, k in aset.Target:
+            indexS = "[{0}]".format(a + 1)
+            if aset.status == "UNANN" or aset.LU.status == "Problem":
                  indexS += " "
-                if aset.status=='UNANN':
-                    indexS += "!" # warning indicator that there is a frame annotation but no FE annotation
-                if aset.LU.status=='Problem':
-                    indexS += "?" # warning indicator that there is a missing LU definition (because the LU has Problem status)
-            overt.append((j,k,aset.LU.frame.name,indexS))
+                if aset.status == "UNANN":
+                    indexS += (
+                        "!"
+                    )  # warning indicator that there is a frame annotation but no FE annotation
+                if aset.LU.status == "Problem":
+                    indexS += (
+                        "?"
+                    )  # warning indicator that there is a missing LU definition (because the LU has Problem status)
+            overt.append((j, k, aset.LU.frame.name, indexS))
      overt = sorted(overt)
  
      duplicates = set()
-    for o,(j,k,fname,asetIndex) in enumerate(overt):
-        if o>0 and j<=overt[o-1][1]:
+    for o, (j, k, fname, asetIndex) in enumerate(overt):
+        if o > 0 and j <= overt[o - 1][1]:
              # multiple annotation sets on the same target
              # (e.g. due to a coordination construction or multiple annotators)
-            if overt[o-1][:2]==(j,k) and overt[o-1][2]==fname:    # same target, same frame
+            if (
+                overt[o - 1][:2] == (j, k) and overt[o - 1][2] == fname
+            ):  # same target, same frame
                  # splice indices together
-                combinedIndex = overt[o-1][3] + asetIndex    # e.g., '[1][2]', '[1]! [2]'
-                combinedIndex = combinedIndex.replace(' !', '! ').replace(' ?', '? ')
-                overt[o-1] = overt[o-1][:3]+(combinedIndex,)
+                combinedIndex = (
+                    overt[o - 1][3] + asetIndex
+                )  # e.g., '[1][2]', '[1]! [2]'
+                combinedIndex = combinedIndex.replace(" !", "! ").replace(" ?", "? ")
+                overt[o - 1] = overt[o - 1][:3] + (combinedIndex,)
                  duplicates.add(o)
-            else:   # different frames, same or overlapping targets
+            else:  # different frames, same or overlapping targets
                  s = sent.text
-                for j,k,fname,asetIndex in overt:
-                    s += '\n' + asetIndex + ' ' + sent.text[j:k] + ' :: ' + fname
-                s += '\n(Unable to display sentence with targets marked inline due to overlap)'
+                for j, k, fname, asetIndex in overt:
+                    s += "\n" + asetIndex + " " + sent.text[j:k] + " :: " + fname
+                s += "\n(Unable to display sentence with targets marked inline due to overlap)"
                  return s
      for o in reversed(sorted(duplicates)):
          del overt[o]
  
      s0 = sent.text
-    s1 = ''
-    s11 = ''
-    s2 = ''
+    s1 = ""
+    s11 = ""
+    s2 = ""
      i = 0
      adjust = 0
      fAbbrevs = OrderedDict()
-    for j,k,fname,asetIndex in overt:
-        if not j>=i:
-            assert j>=i,('Overlapping targets?'+(' UNANN' if any(aset.status=='UNANN' for aset in sent.annotationSet[1:]) else ''),(j,k,asetIndex))
-        s1 += ' '*(j-i) + '*'*(k-j)
-        short = fname[:k-j]
-        if (k-j)<len(fname):
+    for j, k, fname, asetIndex in overt:
+        if not j >= i:
+            assert j >= i, (
+                "Overlapping targets?"
+                + (
+                    " UNANN"
+                    if any(aset.status == "UNANN" for aset in sent.annotationSet[1:])
+                    else ""
+                ),
+                (j, k, asetIndex),
+            )
+        s1 += " " * (j - i) + "*" * (k - j)
+        short = fname[: k - j]
+        if (k - j) < len(fname):
              r = 0
              while short in fAbbrevs:
-                if fAbbrevs[short]==fname:
+                if fAbbrevs[short] == fname:
                      break
                  r += 1
-                short = fname[:k-j-1] + str(r)
-            else:   # short not in fAbbrevs
+                short = fname[: k - j - 1] + str(r)
+            else:  # short not in fAbbrevs
                  fAbbrevs[short] = fname
-        s11 += ' '*(j-i) + short.ljust(k-j)
-        if len(asetIndex)>(k-j):
+        s11 += " " * (j - i) + short.ljust(k - j)
+        if len(asetIndex) > (k - j):
              # add space in the sentence to make room for the annotation index
-            amt = len(asetIndex)-(k-j)
-            s0 = s0[:k+adjust]+ '~'*amt + s0[k+adjust:] # '~' to prevent line wrapping
-            s1 = s1[:k+adjust]+ ' '*amt + s1[k+adjust:]
-            s11 = s11[:k+adjust]+ ' '*amt + s11[k+adjust:]
+            amt = len(asetIndex) - (k - j)
+            s0 = (
+                s0[: k + adjust] + "~" * amt + s0[k + adjust :]
+            )  # '~' to prevent line wrapping
+            s1 = s1[: k + adjust] + " " * amt + s1[k + adjust :]
+            s11 = s11[: k + adjust] + " " * amt + s11[k + adjust :]
              adjust += amt
-        s2 += ' '*(j-i) + asetIndex.ljust(k-j)
+        s2 += " " * (j - i) + asetIndex.ljust(k - j)
          i = k
  
      long_lines = [s0, s1, s11, s2]
  
-    outstr = '\n\n'.join(map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' '))).replace('~',' ')
-    outstr += '\n'
+    outstr = "\n\n".join(
+        map("\n".join, zip_longest(*mimic_wrap(long_lines), fillvalue=" "))
+    ).replace("~", " ")
+    outstr += "\n"
      if fAbbrevs:
-        outstr += ' ('+', '.join('='.join(pair) for pair in fAbbrevs.items())+')'
-        assert len(fAbbrevs)==len(dict(fAbbrevs)),'Abbreviation clash'
+        outstr += " (" + ", ".join("=".join(pair) for pair in fAbbrevs.items()) + ")"
+        assert len(fAbbrevs) == len(dict(fAbbrevs)), "Abbreviation clash"
  
      return outstr
  
+
  def _annotation_ascii_FE_layer(overt, ni, feAbbrevs):
-    '''Helper for _annotation_ascii_FEs().'''
-    s1 = ''
-    s2 = ''
+    """Helper for _annotation_ascii_FEs()."""
+    s1 = ""
+    s2 = ""
      i = 0
-    for j,k,fename in overt:
-        s1 += ' '*(j-i) + ('^' if fename.islower() else '-')*(k-j)
-        short = fename[:k-j]
-        if len(fename)>len(short):
+    for j, k, fename in overt:
+        s1 += " " * (j - i) + ("^" if fename.islower() else "-") * (k - j)
+        short = fename[: k - j]
+        if len(fename) > len(short):
              r = 0
              while short in feAbbrevs:
-                if feAbbrevs[short]==fename:
+                if feAbbrevs[short] == fename:
                      break
                  r += 1
-                short = fename[:k-j-1] + str(r)
-            else:   # short not in feAbbrevs
+                short = fename[: k - j - 1] + str(r)
+            else:  # short not in feAbbrevs
                  feAbbrevs[short] = fename
-        s2 += ' '*(j-i) + short.ljust(k-j)
+        s2 += " " * (j - i) + short.ljust(k - j)
          i = k
  
-    sNI = ''
+    sNI = ""
      if ni:
-        sNI += ' ['+', '.join(':'.join(x) for x in sorted(ni.items()))+']'
-    return [s1,s2,sNI]
+        sNI += " [" + ", ".join(":".join(x) for x in sorted(ni.items())) + "]"
+    return [s1, s2, sNI]
+
  
  def _annotation_ascii_FEs(sent):
-    '''
+    """
      ASCII string rendering of the sentence along with a single target and its FEs.
      Secondary and tertiary FE layers are included if present.
      'sent' can be an FE annotation set or an LU sentence with a single target.
      Line-wrapped to limit the display width.
-    '''
+    """
      feAbbrevs = OrderedDict()
-    posspec = []    # POS-specific layer spans (e.g., Supp[ort], Cop[ula])
+    posspec = []  # POS-specific layer spans (e.g., Supp[ort], Cop[ula])
      posspec_separate = False
-    for lyr in ('Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'):
+    for lyr in ("Verb", "Noun", "Adj", "Adv", "Prep", "Scon", "Art"):
          if lyr in sent and sent[lyr]:
-            for a,b,lbl in sent[lyr]:
-                if lbl=='X': # skip this, which covers an entire phrase typically containing the target and all its FEs
+            for a, b, lbl in sent[lyr]:
+                if (
+                    lbl == "X"
+                ):  # skip this, which covers an entire phrase typically containing the target and all its FEs
                      # (but do display the Gov)
                      continue
-                if any(1 for x,y,felbl in sent.FE[0] if x<=a<y or a<=x<b):
+                if any(1 for x, y, felbl in sent.FE[0] if x <= a < y or a <= x < b):
                      # overlap between one of the POS-specific layers and first FE layer
-                    posspec_separate = True # show POS-specific layers on a separate line
-                posspec.append((a,b,lbl.lower().replace('-',''))) # lowercase Cop=>cop, Non-Asp=>nonasp, etc. to distinguish from FE names
+                    posspec_separate = (
+                        True
+                    )  # show POS-specific layers on a separate line
+                posspec.append(
+                    (a, b, lbl.lower().replace("-", ""))
+                )  # lowercase Cop=>cop, Non-Asp=>nonasp, etc. to distinguish from FE names
      if posspec_separate:
          POSSPEC = _annotation_ascii_FE_layer(posspec, {}, feAbbrevs)
-    FE1 = _annotation_ascii_FE_layer(sorted(sent.FE[0] + (posspec if not posspec_separate else [])), sent.FE[1], feAbbrevs)
+    FE1 = _annotation_ascii_FE_layer(
+        sorted(sent.FE[0] + (posspec if not posspec_separate else [])),
+        sent.FE[1],
+        feAbbrevs,
+    )
      FE2 = FE3 = None
-    if 'FE2' in sent:
+    if "FE2" in sent:
          FE2 = _annotation_ascii_FE_layer(sent.FE2[0], sent.FE2[1], feAbbrevs)
-        if 'FE3' in sent:
+        if "FE3" in sent:
              FE3 = _annotation_ascii_FE_layer(sent.FE3[0], sent.FE3[1], feAbbrevs)
  
-    for i,j in sent.Target:
+    for i, j in sent.Target:
          FE1span, FE1name, FE1exp = FE1
-        if len(FE1span)<j:
-            FE1span += ' '*(j-len(FE1span))
-        if len(FE1name)<j:
-            FE1name += ' '*(j-len(FE1name))
+        if len(FE1span) < j:
+            FE1span += " " * (j - len(FE1span))
+        if len(FE1name) < j:
+            FE1name += " " * (j - len(FE1name))
              FE1[1] = FE1name
-        FE1[0] = FE1span[:i] + FE1span[i:j].replace(' ','*').replace('-','=') + FE1span[j:]
+        FE1[0] = (
+            FE1span[:i] + FE1span[i:j].replace(" ", "*").replace("-", "=") + FE1span[j:]
+        )
      long_lines = [sent.text]
      if posspec_separate:
          long_lines.extend(POSSPEC[:2])
-    long_lines.extend([FE1[0], FE1[1]+FE1[2]]) # lines with no length limit
+    long_lines.extend([FE1[0], FE1[1] + FE1[2]])  # lines with no length limit
      if FE2:
-        long_lines.extend([FE2[0], FE2[1]+FE2[2]])
+        long_lines.extend([FE2[0], FE2[1] + FE2[2]])
          if FE3:
-            long_lines.extend([FE3[0], FE3[1]+FE3[2]])
-    long_lines.append('')
-    outstr = '\n'.join(map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')))
+            long_lines.extend([FE3[0], FE3[1] + FE3[2]])
+    long_lines.append("")
+    outstr = "\n".join(
+        map("\n".join, zip_longest(*mimic_wrap(long_lines), fillvalue=" "))
+    )
      if feAbbrevs:
-        outstr += '('+', '.join('='.join(pair) for pair in feAbbrevs.items())+')'
-        assert len(feAbbrevs)==len(dict(feAbbrevs)),'Abbreviation clash'
+        outstr += "(" + ", ".join("=".join(pair) for pair in feAbbrevs.items()) + ")"
+        assert len(feAbbrevs) == len(dict(feAbbrevs)), "Abbreviation clash"
      outstr += "\n"
  
      return outstr
  
+
  def _pretty_fe(fe):
  
      """
@@ -573,35 +671,38 @@ def _pretty_fe(fe):
      """
      fekeys = fe.keys()
      outstr = ""
-    outstr += "frame element ({0.ID}): {0.name}\n    of {1.name}({1.ID})\n".format(fe, fe.frame)
-    if 'definition' in fekeys:
+    outstr += "frame element ({0.ID}): {0.name}\n    of {1.name}({1.ID})\n".format(
+        fe, fe.frame
+    )
+    if "definition" in fekeys:
          outstr += "[definition]\n"
-        outstr += _pretty_longstring(fe.definition,'  ')
-    if 'abbrev' in fekeys:
+        outstr += _pretty_longstring(fe.definition, "  ")
+    if "abbrev" in fekeys:
          outstr += "[abbrev] {0}\n".format(fe.abbrev)
-    if 'coreType' in fekeys:
+    if "coreType" in fekeys:
          outstr += "[coreType] {0}\n".format(fe.coreType)
-    if 'requiresFE' in fekeys:
+    if "requiresFE" in fekeys:
          outstr += "[requiresFE] "
          if fe.requiresFE is None:
              outstr += "<None>\n"
          else:
              outstr += "{0}({1})\n".format(fe.requiresFE.name, fe.requiresFE.ID)
-    if 'excludesFE' in fekeys:
+    if "excludesFE" in fekeys:
          outstr += "[excludesFE] "
          if fe.excludesFE is None:
              outstr += "<None>\n"
          else:
              outstr += "{0}({1})\n".format(fe.excludesFE.name, fe.excludesFE.ID)
-    if 'semType' in fekeys:
+    if "semType" in fekeys:
          outstr += "[semType] "
          if fe.semType is None:
              outstr += "<None>\n"
          else:
-            outstr += "\n  " + "{0}({1})".format(fe.semType.name, fe.semType.ID) + '\n'
+            outstr += "\n  " + "{0}({1})".format(fe.semType.name, fe.semType.ID) + "\n"
  
      return outstr
  
+
  def _pretty_frame(frame):
  
      """
@@ -617,42 +718,65 @@ def _pretty_frame(frame):
      outstr += "frame ({0.ID}): {0.name}\n\n".format(frame)
      outstr += "[URL] {0}\n\n".format(frame.URL)
      outstr += "[definition]\n"
-    outstr += _pretty_longstring(frame.definition, '  ') + '\n'
+    outstr += _pretty_longstring(frame.definition, "  ") + "\n"
  
      outstr += "[semTypes] {0} semantic types\n".format(len(frame.semTypes))
-    outstr += "  "*(len(frame.semTypes)>0) + ", ".join("{0}({1})".format(x.name, x.ID) for x in frame.semTypes) + '\n'*(len(frame.semTypes)>0)
+    outstr += (
+        "  " * (len(frame.semTypes) > 0)
+        + ", ".join("{0}({1})".format(x.name, x.ID) for x in frame.semTypes)
+        + "\n" * (len(frame.semTypes) > 0)
+    )
  
-    outstr += "\n[frameRelations] {0} frame relations\n".format(len(frame.frameRelations))
-    outstr += '  ' + '\n  '.join(repr(frel) for frel in frame.frameRelations) + '\n'
+    outstr += "\n[frameRelations] {0} frame relations\n".format(
+        len(frame.frameRelations)
+    )
+    outstr += "  " + "\n  ".join(repr(frel) for frel in frame.frameRelations) + "\n"
  
      outstr += "\n[lexUnit] {0} lexical units\n".format(len(frame.lexUnit))
      lustrs = []
-    for luName,lu in sorted(frame.lexUnit.items()):
-        tmpstr = '{0} ({1})'.format(luName, lu.ID)
+    for luName, lu in sorted(frame.lexUnit.items()):
+        tmpstr = "{0} ({1})".format(luName, lu.ID)
          lustrs.append(tmpstr)
-    outstr += "{0}\n".format(_pretty_longstring(', '.join(lustrs),prefix='  '))
+    outstr += "{0}\n".format(_pretty_longstring(", ".join(lustrs), prefix="  "))
  
      outstr += "\n[FE] {0} frame elements\n".format(len(frame.FE))
      fes = {}
-    for feName,fe in sorted(frame.FE.items()):
+    for feName, fe in sorted(frame.FE.items()):
          try:
              fes[fe.coreType].append("{0} ({1})".format(feName, fe.ID))
          except KeyError:
              fes[fe.coreType] = []
              fes[fe.coreType].append("{0} ({1})".format(feName, fe.ID))
-    for ct in sorted(fes.keys(), key=lambda ct2: ['Core','Core-Unexpressed','Peripheral','Extra-Thematic'].index(ct2)):
-        outstr += "{0:>16}: {1}\n".format(ct, ', '.join(sorted(fes[ct])))
-
-    outstr += "\n[FEcoreSets] {0} frame element core sets\n".format(len(frame.FEcoreSets))
-    outstr += "  " + '\n  '.join(", ".join([x.name for x in coreSet]) for coreSet in frame.FEcoreSets) + '\n'
+    for ct in sorted(
+        fes.keys(),
+        key=lambda ct2: [
+            "Core",
+            "Core-Unexpressed",
+            "Peripheral",
+            "Extra-Thematic",
+        ].index(ct2),
+    ):
+        outstr += "{0:>16}: {1}\n".format(ct, ", ".join(sorted(fes[ct])))
+
+    outstr += "\n[FEcoreSets] {0} frame element core sets\n".format(
+        len(frame.FEcoreSets)
+    )
+    outstr += (
+        "  "
+        + "\n  ".join(
+            ", ".join([x.name for x in coreSet]) for coreSet in frame.FEcoreSets
+        )
+        + "\n"
+    )
  
      return outstr
  
+
  class FramenetError(Exception):
  
      """An exception class for framenet-related errors."""
  
-@python_2_unicode_compatible
+
  class AttrDict(dict):
  
      """A class that wraps a dict and allows accessing the keys of the
@@ -672,70 +796,76 @@ class AttrDict(dict):
  
      def __init__(self, *args, **kwargs):
          super(AttrDict, self).__init__(*args, **kwargs)
-        #self.__dict__ = self
+        # self.__dict__ = self
  
      def __setattr__(self, name, value):
          self[name] = value
+
      def __getattr__(self, name):
-        if name=='_short_repr':
+        if name == "_short_repr":
              return self._short_repr
          return self[name]
+
      def __getitem__(self, name):
-        v = super(AttrDict,self).__getitem__(name)
-        if isinstance(v,Future):
+        v = super(AttrDict, self).__getitem__(name)
+        if isinstance(v, Future):
              return v._data()
          return v
  
      def _short_repr(self):
-        if '_type' in self:
-            if self['_type'].endswith('relation'):
+        if "_type" in self:
+            if self["_type"].endswith("relation"):
                  return self.__repr__()
              try:
-                return "<{0} ID={1} name={2}>".format(self['_type'], self['ID'], self['name'])
+                return "<{0} ID={1} name={2}>".format(
+                    self["_type"], self["ID"], self["name"]
+                )
              except KeyError:
-                try:    # no ID--e.g., for _type=lusubcorpus
-                    return "<{0} name={1}>".format(self['_type'], self['name'])
-                except KeyError:    # no name--e.g., for _type=lusentence
-                    return "<{0} ID={1}>".format(self['_type'], self['ID'])
+                try:  # no ID--e.g., for _type=lusubcorpus
+                    return "<{0} name={1}>".format(self["_type"], self["name"])
+                except KeyError:  # no name--e.g., for _type=lusentence
+                    return "<{0} ID={1}>".format(self["_type"], self["ID"])
          else:
              return self.__repr__()
  
      def _str(self):
          outstr = ""
  
-        if not '_type' in self:
+        if "_type" not in self:
              outstr = _pretty_any(self)
-        elif self['_type'] == 'frame':
+        elif self["_type"] == "frame":
              outstr = _pretty_frame(self)
-        elif self['_type'] == 'fe':
+        elif self["_type"] == "fe":
              outstr = _pretty_fe(self)
-        elif self['_type'] == 'lu':
+        elif self["_type"] == "lu":
              outstr = _pretty_lu(self)
-        elif self['_type'] == 'luexemplars': # list of ALL exemplars for LU
+        elif self["_type"] == "luexemplars":  # list of ALL exemplars for LU
              outstr = _pretty_exemplars(self, self[0].LU)
-        elif self['_type'] == 'fulltext_annotation': # list of all sentences for full-text doc
+        elif (
+            self["_type"] == "fulltext_annotation"
+        ):  # list of all sentences for full-text doc
              outstr = _pretty_fulltext_sentences(self)
-        elif self['_type'] == 'lusentence':
+        elif self["_type"] == "lusentence":
              outstr = _pretty_annotation(self)
-        elif self['_type'] == 'fulltext_sentence':
+        elif self["_type"] == "fulltext_sentence":
              outstr = _pretty_fulltext_sentence(self)
-        elif self['_type'] in ('luannotationset', 'fulltext_annotationset'):
+        elif self["_type"] in ("luannotationset", "fulltext_annotationset"):
              outstr = _pretty_annotation(self, aset_level=True)
-        elif self['_type'] == 'posannotationset':
+        elif self["_type"] == "posannotationset":
              outstr = _pretty_pos(self)
-        elif self['_type'] == 'semtype':
+        elif self["_type"] == "semtype":
              outstr = _pretty_semtype(self)
-        elif self['_type'] == 'framerelationtype':
+        elif self["_type"] == "framerelationtype":
              outstr = _pretty_frame_relation_type(self)
-        elif self['_type'] == 'framerelation':
+        elif self["_type"] == "framerelation":
              outstr = _pretty_frame_relation(self)
-        elif self['_type'] == 'ferelation':
+        elif self["_type"] == "ferelation":
              outstr = _pretty_fe_relation(self)
          else:
              outstr = _pretty_any(self)
  
          # ensure result is unicode string prior to applying the
-        # @python_2_unicode_compatible decorator (because non-ASCII characters
+        #  decorator (because non-ASCII characters
          # could in principle occur in the data and would trigger an encoding error when
          # passed as arguments to str.format()).
          # assert isinstance(outstr, unicode) # not in Python 3.2
@@ -743,104 +873,118 @@ class AttrDict(dict):
  
      def __str__(self):
          return self._str()
+
      def __repr__(self):
          return self.__str__()
  
-@python_2_unicode_compatible
+
  class SpecialList(list):
      """
      A list subclass which adds a '_type' attribute for special printing
      (similar to an AttrDict, though this is NOT an AttrDict subclass).
      """
+
      def __init__(self, typ, *args, **kwargs):
-        super(SpecialList,self).__init__(*args, **kwargs)
+        super(SpecialList, self).__init__(*args, **kwargs)
          self._type = typ
  
      def _str(self):
          outstr = ""
  
          assert self._type
-        if len(self)==0:
+        if len(self) == 0:
              outstr = "[]"
-        elif self._type == 'luexemplars': # list of ALL exemplars for LU
+        elif self._type == "luexemplars":  # list of ALL exemplars for LU
              outstr = _pretty_exemplars(self, self[0].LU)
          else:
-            assert False,self._type
+            assert False, self._type
          return outstr
  
      def __str__(self):
          return self._str()
+
      def __repr__(self):
          return self.__str__()
  
+
  class Future(object):
      """
      Wraps and acts as a proxy for a value to be loaded lazily (on demand).
      Adapted from https://gist.github.com/sergey-miryanov/2935416
      """
+
      def __init__(self, loader, *args, **kwargs):
          """
          :param loader: when called with no arguments, returns the value to be stored
          :type loader: callable
          """
-        super (Future, self).__init__(*args, **kwargs)
+        super(Future, self).__init__(*args, **kwargs)
          self._loader = loader
          self._d = None
+
      def _data(self):
          if callable(self._loader):
              self._d = self._loader()
-            self._loader = None # the data is now cached
+            self._loader = None  # the data is now cached
          return self._d
  
      def __nonzero__(self):
          return bool(self._data())
+
      def __len__(self):
          return len(self._data())
  
      def __setitem__(self, key, value):
-        return self._data ().__setitem__(key, value)
+        return self._data().__setitem__(key, value)
+
      def __getitem__(self, key):
-        return self._data ().__getitem__(key)
+        return self._data().__getitem__(key)
+
      def __getattr__(self, key):
          return self._data().__getattr__(key)
  
      def __str__(self):
          return self._data().__str__()
+
      def __repr__(self):
          return self._data().__repr__()
  
-@python_2_unicode_compatible
+
  class PrettyDict(AttrDict):
      """
      Displays an abbreviated repr of values where possible.
      Inherits from AttrDict, so a callable value will
      be lazily converted to an actual value.
      """
+
      def __init__(self, *args, **kwargs):
-        _BREAK_LINES = kwargs.pop('breakLines', False)
+        _BREAK_LINES = kwargs.pop("breakLines", False)
          super(PrettyDict, self).__init__(*args, **kwargs)
-        dict.__setattr__(self, '_BREAK_LINES', _BREAK_LINES)
+        dict.__setattr__(self, "_BREAK_LINES", _BREAK_LINES)
+
      def __repr__(self):
          parts = []
-        for k,v in sorted(self.items()):
-            kv = repr(k)+': '
+        for k, v in sorted(self.items()):
+            kv = repr(k) + ": "
              try:
                  kv += v._short_repr()
              except AttributeError:
                  kv += repr(v)
              parts.append(kv)
-        return '{'+(',\n ' if self._BREAK_LINES else ', ').join(parts)+'}'
+        return "{" + (",\n " if self._BREAK_LINES else ", ").join(parts) + "}"
+
  
-@python_2_unicode_compatible
  class PrettyList(list):
      """
      Displays an abbreviated repr of only the first several elements, not the whole list.
      """
+
      # from nltk.util
      def __init__(self, *args, **kwargs):
-        self._MAX_REPR_SIZE = kwargs.pop('maxReprSize', 60)
-        self._BREAK_LINES = kwargs.pop('breakLines', False)
+        self._MAX_REPR_SIZE = kwargs.pop("maxReprSize", 60)
+        self._BREAK_LINES = kwargs.pop("breakLines", False)
          super(PrettyList, self).__init__(*args, **kwargs)
+
      def __repr__(self):
          """
          Return a string representation for this corpus view that is
@@ -851,19 +995,25 @@ class PrettyList(list):
          length = 5
  
          for elt in self:
-            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
+            pieces.append(
+                elt._short_repr()
+            )  # key difference from inherited version: call to _short_repr()
              length += len(pieces[-1]) + 2
              if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2:
-                return "[%s, ...]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces[:-1])
-        return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces)
+                return "[%s, ...]" % str(
+                    ",\n " if self._BREAK_LINES else ", "
+                ).join(pieces[:-1])
+        return "[%s]" % str(",\n " if self._BREAK_LINES else ", ").join(pieces)
+
  
-@python_2_unicode_compatible
  class PrettyLazyMap(LazyMap):
      """
      Displays an abbreviated repr of only the first several elements, not the whole list.
      """
+
      # from nltk.util
      _MAX_REPR_SIZE = 60
+
      def __repr__(self):
          """
          Return a string representation for this corpus view that is
@@ -873,19 +1023,23 @@ class PrettyLazyMap(LazyMap):
          pieces = []
          length = 5
          for elt in self:
-            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
+            pieces.append(
+                elt._short_repr()
+            )  # key difference from inherited version: call to _short_repr()
              length += len(pieces[-1]) + 2
              if length > self._MAX_REPR_SIZE and len(pieces) > 2:
-                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
-        return "[%s]" % text_type(', ').join(pieces)
+                return "[%s, ...]" % str(", ").join(pieces[:-1])
+        return "[%s]" % str(", ").join(pieces)
+
  
-@python_2_unicode_compatible
  class PrettyLazyIteratorList(LazyIteratorList):
      """
      Displays an abbreviated repr of only the first several elements, not the whole list.
      """
+
      # from nltk.util
      _MAX_REPR_SIZE = 60
+
      def __repr__(self):
          """
          Return a string representation for this corpus view that is
@@ -895,19 +1049,23 @@ class PrettyLazyIteratorList(LazyIteratorList):
          pieces = []
          length = 5
          for elt in self:
-            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
+            pieces.append(
+                elt._short_repr()
+            )  # key difference from inherited version: call to _short_repr()
              length += len(pieces[-1]) + 2
              if length > self._MAX_REPR_SIZE and len(pieces) > 2:
-                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
-        return "[%s]" % text_type(', ').join(pieces)
+                return "[%s, ...]" % str(", ").join(pieces[:-1])
+        return "[%s]" % str(", ").join(pieces)
+
  
-@python_2_unicode_compatible
  class PrettyLazyConcatenation(LazyConcatenation):
      """
      Displays an abbreviated repr of only the first several elements, not the whole list.
      """
+
      # from nltk.util
      _MAX_REPR_SIZE = 60
+
      def __repr__(self):
          """
          Return a string representation for this corpus view that is
@@ -917,11 +1075,13 @@ class PrettyLazyConcatenation(LazyConcatenation):
          pieces = []
          length = 5
          for elt in self:
-            pieces.append(elt._short_repr()) # key difference from inherited version: call to _short_repr()
+            pieces.append(
+                elt._short_repr()
+            )  # key difference from inherited version: call to _short_repr()
              length += len(pieces[-1]) + 2
              if length > self._MAX_REPR_SIZE and len(pieces) > 2:
-                return "[%s, ...]" % text_type(', ').join(pieces[:-1])
-        return "[%s]" % text_type(', ').join(pieces)
+                return "[%s, ...]" % str(", ").join(pieces[:-1])
+        return "[%s]" % str(", ").join(pieces)
  
      def __add__(self, other):
          """Return a list concatenating self with other."""
@@ -944,7 +1104,7 @@ class FramenetCorpusReader(XMLCorpusReader):
      True
      """
  
-    _bad_statuses = ['Problem']
+    _bad_statuses = ["Problem"]
      """
      When loading LUs for a frame, those whose status is in this list will be ignored.
      Due to caching, if user code modifies this, it should do so before loading any data.
@@ -979,14 +1139,14 @@ class FramenetCorpusReader(XMLCorpusReader):
  
          # Indexes used for faster look-ups
          self._frame_idx = None
-        self._cached_frames = {}    # name -> ID
+        self._cached_frames = {}  # name -> ID
          self._lu_idx = None
          self._fulltext_idx = None
          self._semtypes = None
-        self._freltyp_idx = None    # frame relation types (Inheritance, Using, etc.)
-        self._frel_idx = None   # frame-to-frame relation instances
+        self._freltyp_idx = None  # frame relation types (Inheritance, Using, etc.)
+        self._frel_idx = None  # frame-to-frame relation instances
          self._ferel_idx = None  # FE-to-FE relation instances
-        self._frel_f_idx = None # frame-to-frame relations associated with each frame
+        self._frel_f_idx = None  # frame-to-frame relations associated with each frame
  
      def help(self, attrname=None):
          """Display help information summarizing the main methods."""
@@ -998,10 +1158,9 @@ class FramenetCorpusReader(XMLCorpusReader):
          # as it's easier to just call frame().
          # Also not mentioning lu_basic().
  
-
          msg = """
-Citation: Nathan Schneider and Chuck Wooters (2017), 
-"The NLTK FrameNet API: Designing for Discoverability with a Rich Linguistic Resource". 
+Citation: Nathan Schneider and Chuck Wooters (2017),
+"The NLTK FrameNet API: Designing for Discoverability with a Rich Linguistic Resource".
  Proceedings of EMNLP: System Demonstrations. https://arxiv.org/abs/1703.07438
  
  Use the following methods to access data in FrameNet.
@@ -1080,17 +1239,20 @@ warnings(True) to display corpus consistency warnings when loading data
              self._buildrelationindex()  # always load frame relations before frames,
              # otherwise weird ordering effects might result in incomplete information
          self._frame_idx = {}
-        for f in XMLCorpusView(self.abspath("frameIndex.xml"),
-                               'frameIndex/frame', self._handle_elt):
-            self._frame_idx[f['ID']] = f
+        for f in XMLCorpusView(
+            self.abspath("frameIndex.xml"), "frameIndex/frame", self._handle_elt
+        ):
+            self._frame_idx[f["ID"]] = f
  
      def _buildcorpusindex(self):
          # The total number of fulltext annotated documents in Framenet
          # is fairly small (~90) so this index should not be very large
          self._fulltext_idx = {}
-        for doclist in XMLCorpusView(self.abspath("fulltextIndex.xml"),
-                                     'fulltextIndex/corpus',
-                                     self._handle_fulltextindex_elt):
+        for doclist in XMLCorpusView(
+            self.abspath("fulltextIndex.xml"),
+            "fulltextIndex/corpus",
+            self._handle_fulltextindex_elt,
+        ):
              for doc in doclist:
                  self._fulltext_idx[doc.ID] = doc
  
@@ -1098,16 +1260,24 @@ warnings(True) to display corpus consistency warnings when loading data
          # The number of LUs in Framenet is about 13,000 so this index
          # should not be very large
          self._lu_idx = {}
-        for lu in XMLCorpusView(self.abspath("luIndex.xml"),
-                                'luIndex/lu', self._handle_elt):
-            self._lu_idx[lu['ID']] = lu # populate with LU index entries. if any of these
+        for lu in XMLCorpusView(
+            self.abspath("luIndex.xml"), "luIndex/lu", self._handle_elt
+        ):
+            self._lu_idx[
+                lu["ID"]
+            ] = lu  # populate with LU index entries. if any of these
              # are looked up they will be replaced by full LU objects.
  
      def _buildrelationindex(self):
-        #print('building relation index...', file=sys.stderr)
-        freltypes = PrettyList(x for x in XMLCorpusView(self.abspath("frRelation.xml"),
-                                            'frameRelations/frameRelationType',
-                                            self._handle_framerelationtype_elt))
+        # print('building relation index...', file=sys.stderr)
+        freltypes = PrettyList(
+            x
+            for x in XMLCorpusView(
+                self.abspath("frRelation.xml"),
+                "frameRelations/frameRelationType",
+                self._handle_framerelationtype_elt,
+            )
+        )
          self._freltyp_idx = {}
          self._frel_idx = {}
          self._frel_f_idx = defaultdict(set)
@@ -1116,22 +1286,30 @@ warnings(True) to display corpus consistency warnings when loading data
          for freltyp in freltypes:
              self._freltyp_idx[freltyp.ID] = freltyp
              for frel in freltyp.frameRelations:
-                supF = frel.superFrame = frel[freltyp.superFrameName] = Future((lambda fID: lambda: self.frame_by_id(fID))(frel.supID))
-                subF = frel.subFrame = frel[freltyp.subFrameName] = Future((lambda fID: lambda: self.frame_by_id(fID))(frel.subID))
+                supF = frel.superFrame = frel[freltyp.superFrameName] = Future(
+                    (lambda fID: lambda: self.frame_by_id(fID))(frel.supID)
+                )
+                subF = frel.subFrame = frel[freltyp.subFrameName] = Future(
+                    (lambda fID: lambda: self.frame_by_id(fID))(frel.subID)
+                )
                  self._frel_idx[frel.ID] = frel
                  self._frel_f_idx[frel.supID].add(frel.ID)
                  self._frel_f_idx[frel.subID].add(frel.ID)
                  for ferel in frel.feRelations:
                      ferel.superFrame = supF
                      ferel.subFrame = subF
-                    ferel.superFE = Future((lambda fer: lambda: fer.superFrame.FE[fer.superFEName])(ferel))
-                    ferel.subFE = Future((lambda fer: lambda: fer.subFrame.FE[fer.subFEName])(ferel))
+                    ferel.superFE = Future(
+                        (lambda fer: lambda: fer.superFrame.FE[fer.superFEName])(ferel)
+                    )
+                    ferel.subFE = Future(
+                        (lambda fer: lambda: fer.subFrame.FE[fer.subFEName])(ferel)
+                    )
                      self._ferel_idx[ferel.ID] = ferel
-        #print('...done building relation index', file=sys.stderr)
+        # print('...done building relation index', file=sys.stderr)
  
      def _warn(self, *message, **kwargs):
          if self._warnings:
-            kwargs.setdefault('file', sys.stderr)
+            kwargs.setdefault("file", sys.stderr)
              print(*message, **kwargs)
  
      def readme(self):
@@ -1213,14 +1391,13 @@ warnings(True) to display corpus consistency warnings when loading data
              raise FramenetError("Unknown document id: {0}".format(fn_docid))
  
          # construct the path name for the xml file containing the document info
-        locpath = os.path.join(
-            "{0}".format(self._root), self._fulltext_dir, xmlfname)
+        locpath = os.path.join("{0}".format(self._root), self._fulltext_dir, xmlfname)
  
          # Grab the top-level xml element containing the fulltext annotation
-        elt = XMLCorpusView(locpath, 'fullTextAnnotation')[0]
+        elt = XMLCorpusView(locpath, "fullTextAnnotation")[0]
          info = self._handle_fulltextannotation_elt(elt)
          # add metadata
-        for k,v in self._fulltext_idx[fn_docid].items():
+        for k, v in self._fulltext_idx[fn_docid].items():
              info[k] = v
          return info
  
@@ -1255,14 +1432,14 @@ warnings(True) to display corpus consistency warnings when loading data
          # get the name of the frame with this id number
          try:
              fentry = self._frame_idx[fn_fid]
-            if '_type' in fentry:
-                return fentry   # full frame object is cached
-            name = fentry['name']
+            if "_type" in fentry:
+                return fentry  # full frame object is cached
+            name = fentry["name"]
          except TypeError:
              self._buildframeindex()
-            name = self._frame_idx[fn_fid]['name']
+            name = self._frame_idx[fn_fid]["name"]
          except KeyError:
-            raise FramenetError('Unknown frame id: {0}'.format(fn_fid))
+            raise FramenetError("Unknown frame id: {0}".format(fn_fid))
  
          return self.frame_by_name(name, ignorekeys, check_cache=False)
  
@@ -1300,35 +1477,37 @@ warnings(True) to display corpus consistency warnings when loading data
  
          # construct the path name for the xml file containing the Frame info
          locpath = os.path.join(
-            "{0}".format(self._root), self._frame_dir, fn_fname + ".xml")
-        #print(locpath, file=sys.stderr)
+            "{0}".format(self._root), self._frame_dir, fn_fname + ".xml"
+        )
+        # print(locpath, file=sys.stderr)
          # Grab the xml for the frame
          try:
-            elt = XMLCorpusView(locpath, 'frame')[0]
+            elt = XMLCorpusView(locpath, "frame")[0]
          except IOError:
-            raise FramenetError('Unknown frame: {0}'.format(fn_fname))
+            raise FramenetError("Unknown frame: {0}".format(fn_fname))
  
          fentry = self._handle_frame_elt(elt, ignorekeys)
          assert fentry
  
-        fentry.URL = self._fnweb_url + '/' + self._frame_dir + '/' + fn_fname + '.xml'
+        fentry.URL = self._fnweb_url + "/" + self._frame_dir + "/" + fn_fname + ".xml"
  
          # INFERENCE RULE: propagate lexical semtypes from the frame to all its LUs
          for st in fentry.semTypes:
-            if st.rootType.name=='Lexical_type':
+            if st.rootType.name == "Lexical_type":
                  for lu in fentry.lexUnit.values():
-                    if not any(x is st for x in lu.semTypes):  # identity containment check
+                    if not any(
+                        x is st for x in lu.semTypes
+                    ):  # identity containment check
                          lu.semTypes.append(st)
  
-
          self._frame_idx[fentry.ID] = fentry
          self._cached_frames[fentry.name] = fentry.ID
-        '''
+        """
          # now set up callables to resolve the LU pointers lazily.
          # (could also do this here--caching avoids infinite recursion.)
          for luName,luinfo in fentry.lexUnit.items():
              fentry.lexUnit[luName] = (lambda luID: Future(lambda: self.lu(luID)))(luinfo.ID)
-        '''
+        """
          return fentry
  
      def frame(self, fn_fid_or_fname, ignorekeys=[]):
@@ -1400,7 +1579,7 @@ warnings(True) to display corpus consistency warnings when loading data
          """
  
          # get the frame info by name or id number
-        if isinstance(fn_fid_or_fname, string_types):
+        if isinstance(fn_fid_or_fname, str):
              f = self.frame_by_name(fn_fid_or_fname, ignorekeys)
          else:
              f = self.frame_by_id(fn_fid_or_fname, ignorekeys)
@@ -1421,13 +1600,18 @@ warnings(True) to display corpus consistency warnings when loading data
          search through ALL of the frame XML files in the db.
  
          >>> from nltk.corpus import framenet as fn
-        >>> fn.frames_by_lemma(r'(?i)a little') # doctest: +ELLIPSIS
+        >>> from nltk.corpus.reader.framenet import PrettyList
+        >>> PrettyList(sorted(fn.frames_by_lemma(r'(?i)a little'), key=itemgetter('ID'))) # doctest: +ELLIPSIS
          [<frame ID=189 name=Quanti...>, <frame ID=2001 name=Degree>]
  
          :return: A list of frame objects.
          :rtype: list(AttrDict)
          """
-        return PrettyList(f for f in self.frames() if any(re.search(pat, luName) for luName in f.lexUnit))
+        return PrettyList(
+            f
+            for f in self.frames()
+            if any(re.search(pat, luName) for luName in f.lexUnit)
+        )
  
      def lu_basic(self, fn_luid):
          """
@@ -1441,7 +1625,7 @@ warnings(True) to display corpus consistency warnings when loading data
          >>> lu # doctest: +ELLIPSIS
          {'ID': 256,
           'POS': 'V',
-         'URL': u'https://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml',
+         'URL': 'https://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml',
           '_type': 'lu',
           'cBy': ...,
           'cDate': '02/08/2001 01:27:50 PST Thu',
@@ -1460,7 +1644,7 @@ warnings(True) to display corpus consistency warnings when loading data
          :return: Basic information about the lexical unit
          :rtype: dict
          """
-        return self.lu(fn_luid, ignorekeys=['subCorpus', 'exemplars'])
+        return self.lu(fn_luid, ignorekeys=["subCorpus", "exemplars"])
  
      def lu(self, fn_luid, ignorekeys=[], luName=None, frameID=None, frameName=None):
          """
@@ -1599,19 +1783,32 @@ warnings(True) to display corpus consistency warnings when loading data
              # LU not in the index. We create a placeholder by falling back to
              # luName, frameID, and frameName. However, this will not be listed
              # among the LUs for its frame.
-            self._warn('LU ID not found: {0} ({1}) in {2} ({3})'.format(luName, fn_luid, frameName, frameID))
-            luinfo = AttrDict({'_type': 'lu', 'ID': fn_luid, 'name': luName,
-                               'frameID': frameID, 'status': 'Problem'})
+            self._warn(
+                "LU ID not found: {0} ({1}) in {2} ({3})".format(
+                    luName, fn_luid, frameName, frameID
+                )
+            )
+            luinfo = AttrDict(
+                {
+                    "_type": "lu",
+                    "ID": fn_luid,
+                    "name": luName,
+                    "frameID": frameID,
+                    "status": "Problem",
+                }
+            )
              f = self.frame_by_id(luinfo.frameID)
-            assert f.name==frameName,(f.name,frameName)
-            luinfo['frame'] = f
+            assert f.name == frameName, (f.name, frameName)
+            luinfo["frame"] = f
              self._lu_idx[fn_luid] = luinfo
-        elif '_type' not in luinfo:
+        elif "_type" not in luinfo:
              # we only have an index entry for the LU. loading the frame will replace this.
              f = self.frame_by_id(luinfo.frameID)
              luinfo = self._lu_idx[fn_luid]
          if ignorekeys:
-            return AttrDict(dict((k, v) for k, v in luinfo.items() if k not in ignorekeys))
+            return AttrDict(
+                dict((k, v) for k, v in luinfo.items() if k not in ignorekeys)
+            )
  
          return luinfo
  
@@ -1624,39 +1821,45 @@ warnings(True) to display corpus consistency warnings when loading data
  
          fname = "lu{0}.xml".format(fn_luid)
          locpath = os.path.join("{0}".format(self._root), self._lu_dir, fname)
-        #print(locpath, file=sys.stderr)
+        # print(locpath, file=sys.stderr)
          if not self._lu_idx:
              self._buildluindex()
  
          try:
-            elt = XMLCorpusView(locpath, 'lexUnit')[0]
+            elt = XMLCorpusView(locpath, "lexUnit")[0]
          except IOError:
-            raise FramenetError('Unknown LU id: {0}'.format(fn_luid))
+            raise FramenetError("Unknown LU id: {0}".format(fn_luid))
  
          lu2 = self._handle_lexunit_elt(elt, ignorekeys)
-        lu.URL = self._fnweb_url + '/' + self._lu_dir + '/' + fname
+        lu.URL = self._fnweb_url + "/" + self._lu_dir + "/" + fname
          lu.subCorpus = lu2.subCorpus
-        lu.exemplars = SpecialList('luexemplars',
-                                   [sent for subc in lu.subCorpus for sent in subc.sentence])
+        lu.exemplars = SpecialList(
+            "luexemplars", [sent for subc in lu.subCorpus for sent in subc.sentence]
+        )
          for sent in lu.exemplars:
-            sent['LU'] = lu
-            sent['frame'] = lu.frame
+            sent["LU"] = lu
+            sent["frame"] = lu.frame
              for aset in sent.annotationSet:
-                aset['LU'] = lu
-                aset['frame'] = lu.frame
+                aset["LU"] = lu
+                aset["frame"] = lu.frame
  
          return lu
  
      def _loadsemtypes(self):
          """Create the semantic types index."""
          self._semtypes = AttrDict()
-        semtypeXML = [x for x in XMLCorpusView(self.abspath("semTypes.xml"),
-                                             'semTypes/semType',
-                                             self._handle_semtype_elt)]
+        semtypeXML = [
+            x
+            for x in XMLCorpusView(
+                self.abspath("semTypes.xml"),
+                "semTypes/semType",
+                self._handle_semtype_elt,
+            )
+        ]
          for st in semtypeXML:
-            n = st['name']
-            a = st['abbrev']
-            i = st['ID']
+            n = st["name"]
+            a = st["abbrev"]
+            i = st["ID"]
              # Both name and abbrev should be able to retrieve the
              # ID. The ID will retrieve the semantic type dict itself.
              self._semtypes[n] = i
@@ -1669,7 +1872,8 @@ warnings(True) to display corpus consistency warnings when loading data
                  st.superType = self.semtype(st.superType.supID)
                  st.superType.subTypes.append(st)
              else:
-                if st not in roots: roots.append(st)
+                if st not in roots:
+                    roots.append(st)
                  st.rootType = st
          queue = list(roots)
          assert queue
@@ -1678,7 +1882,7 @@ warnings(True) to display corpus consistency warnings when loading data
              for child in st.subTypes:
                  child.rootType = st.rootType
                  queue.append(child)
-        #self.propagate_semtypes()  # apply inferencing over FE relations
+        # self.propagate_semtypes()  # apply inferencing over FE relations
  
      def propagate_semtypes(self):
          """
@@ -1712,23 +1916,30 @@ warnings(True) to display corpus consistency warnings when loading data
                  try:
                      if superST and superST is not subST:
                          # propagate downward
-                        assert subST is None or self.semtype_inherits(subST, superST),(superST.name,ferel,subST.name)
+                        assert subST is None or self.semtype_inherits(subST, superST), (
+                            superST.name,
+                            ferel,
+                            subST.name,
+                        )
                          if subST is None:
                              ferel.subFE.semType = subST = superST
                              changed = True
                              nPropagations += 1
-                    if ferel.type.name in ['Perspective_on', 'Subframe', 'Precedes'] and subST \
-                        and subST is not superST:
+                    if (
+                        ferel.type.name in ["Perspective_on", "Subframe", "Precedes"]
+                        and subST
+                        and subST is not superST
+                    ):
                          # propagate upward
-                        assert superST is None,(superST.name,ferel,subST.name)
+                        assert superST is None, (superST.name, ferel, subST.name)
                          ferel.superFE.semType = superST = subST
                          changed = True
                          nPropagations += 1
                  except AssertionError as ex:
                      # bug in the data! ignore
-                    #print(ex, file=sys.stderr)
+                    # print(ex, file=sys.stderr)
                      continue
-            #print(i, nPropagations, file=sys.stderr)
+            # print(i, nPropagations, file=sys.stderr)
  
      def semtype(self, key):
          """
@@ -1782,7 +1993,7 @@ warnings(True) to display corpus consistency warnings when loading data
          >>> len(fn.frames()) in (1019, 1221)    # FN 1.5 and 1.7, resp.
          True
          >>> x = PrettyList(fn.frames(r'(?i)crim'), maxReprSize=0, breakLines=True)
-        >>> x.sort(key=lambda f: f.ID)
+        >>> x.sort(key=itemgetter('ID'))
          >>> x
          [<frame ID=200 name=Criminal_process>,
           <frame ID=500 name=Criminal_investigation>,
@@ -1842,7 +2053,9 @@ warnings(True) to display corpus consistency warnings when loading data
              fIDs = list(self._frame_idx.keys())
  
          if name is not None:
-            return PrettyList(self.frame(fID) for fID,finfo in self.frame_ids_and_names(name).items())
+            return PrettyList(
+                self.frame(fID) for fID, finfo in self.frame_ids_and_names(name).items()
+            )
          else:
              return PrettyLazyMap(self.frame, fIDs)
  
@@ -1853,10 +2066,14 @@ warnings(True) to display corpus consistency warnings when loading data
          """
          if not self._frame_idx:
              self._buildframeindex()
-        return dict((fID, finfo.name) for fID,finfo in self._frame_idx.items() if name is None or re.search(name, finfo.name) is not None)
+        return dict(
+            (fID, finfo.name)
+            for fID, finfo in self._frame_idx.items()
+            if name is None or re.search(name, finfo.name) is not None
+        )
  
      def fes(self, name=None, frame=None):
-        '''
+        """
          Lists frame element objects. If 'name' is provided, this is treated as
          a case-insensitive regular expression to filter by frame name.
          (Case-insensitivity is because casing of frame element names is not always
@@ -1888,19 +2105,24 @@ warnings(True) to display corpus consistency warnings when loading data
          :type name: str
          :return: A list of matching frame elements
          :rtype: list(AttrDict)
-        '''
+        """
          # what frames are we searching in?
          if frame is not None:
              if isinstance(frame, int):
                  frames = [self.frame(frame)]
-            elif isinstance(frame, string_types):
+            elif isinstance(frame, str):
                  frames = self.frames(frame)
              else:
                  frames = [frame]
          else:
              frames = self.frames()
  
-        return PrettyList(fe for f in frames for fename,fe in f.FE.items() if name is None or re.search(name, fename, re.I))
+        return PrettyList(
+            fe
+            for f in frames
+            for fename, fe in f.FE.items()
+            if name is None or re.search(name, fename, re.I)
+        )
  
      def lus(self, name=None, frame=None):
          """
@@ -1911,12 +2133,12 @@ warnings(True) to display corpus consistency warnings when loading data
          >>> from nltk.corpus import framenet as fn
          >>> len(fn.lus()) in (11829, 13572) # FN 1.5 and 1.7, resp.
          True
-        >>> PrettyList(fn.lus(r'(?i)a little'), maxReprSize=0, breakLines=True)
-        [<lu ID=14744 name=a little bit.adv>,
-         <lu ID=14733 name=a little.n>,
-         <lu ID=14743 name=a little.adv>]
-        >>> fn.lus(r'interest', r'(?i)stimulus')
-        [<lu ID=14920 name=interesting.a>, <lu ID=14894 name=interested.a>]
+        >>> PrettyList(sorted(fn.lus(r'(?i)a little'), key=itemgetter('ID')), maxReprSize=0, breakLines=True)
+        [<lu ID=14733 name=a little.n>,
+         <lu ID=14743 name=a little.adv>,
+         <lu ID=14744 name=a little bit.adv>]
+        >>> PrettyList(sorted(fn.lus(r'interest', r'(?i)stimulus'), key=itemgetter('ID')))
+        [<lu ID=14894 name=interested.a>, <lu ID=14920 name=interesting.a>]
  
          A brief intro to Lexical Units (excerpted from "FrameNet II:
          Extended Theory and Practice" by Ruppenhofer et. al., 2010):
@@ -2010,28 +2232,34 @@ warnings(True) to display corpus consistency warnings when loading data
          if not self._lu_idx:
              self._buildluindex()
  
-
-
-        if name is not None:    # match LUs, then restrict by frame
-            result = PrettyList(self.lu(luID) for luID,luName in self.lu_ids_and_names(name).items())
+        if name is not None:  # match LUs, then restrict by frame
+            result = PrettyList(
+                self.lu(luID) for luID, luName in self.lu_ids_and_names(name).items()
+            )
              if frame is not None:
                  if isinstance(frame, int):
                      frameIDs = {frame}
-                elif isinstance(frame, string_types):
+                elif isinstance(frame, str):
                      frameIDs = {f.ID for f in self.frames(frame)}
                  else:
                      frameIDs = {frame.ID}
                  result = PrettyList(lu for lu in result if lu.frame.ID in frameIDs)
-        elif frame is not None: # all LUs in matching frames
+        elif frame is not None:  # all LUs in matching frames
              if isinstance(frame, int):
                  frames = [self.frame(frame)]
-            elif isinstance(frame, string_types):
+            elif isinstance(frame, str):
                  frames = self.frames(frame)
              else:
                  frames = [frame]
-            result = PrettyLazyIteratorList(iter(LazyConcatenation(list(f.lexUnit.values()) for f in frames)))
-        else:   # all LUs
-            luIDs = [luID for luID,lu in self._lu_idx.items() if lu.status not in self._bad_statuses]
+            result = PrettyLazyIteratorList(
+                iter(LazyConcatenation(list(f.lexUnit.values()) for f in frames))
+            )
+        else:  # all LUs
+            luIDs = [
+                luID
+                for luID, lu in self._lu_idx.items()
+                if lu.status not in self._bad_statuses
+            ]
              result = PrettyLazyMap(self.lu, luIDs)
          return result
  
@@ -2042,9 +2270,12 @@ warnings(True) to display corpus consistency warnings when loading data
          """
          if not self._lu_idx:
              self._buildluindex()
-        return {luID: luinfo.name for luID,luinfo in self._lu_idx.items()
-                if luinfo.status not in self._bad_statuses
-                    and (name is None or re.search(name, luinfo.name) is not None)}
+        return {
+            luID: luinfo.name
+            for luID, luinfo in self._lu_idx.items()
+            if luinfo.status not in self._bad_statuses
+            and (name is None or re.search(name, luinfo.name) is not None)
+        }
  
      def docs_metadata(self, name=None):
          """
@@ -2089,7 +2320,9 @@ warnings(True) to display corpus consistency warnings when loading data
          if name is None:
              return ftlist
          else:
-            return PrettyList(x for x in ftlist if re.search(name, x['filename']) is not None)
+            return PrettyList(
+                x for x in ftlist if re.search(name, x["filename"]) is not None
+            )
  
      def docs(self, name=None):
          """
@@ -2116,14 +2349,21 @@ warnings(True) to display corpus consistency warnings when loading data
          """
  
          if exemplars:
-            epart = PrettyLazyIteratorList(sent.frameAnnotation for sent in self.exemplars(luNamePattern))
+            epart = PrettyLazyIteratorList(
+                sent.frameAnnotation for sent in self.exemplars(luNamePattern)
+            )
          else:
              epart = []
  
          if full_text:
              if luNamePattern is not None:
                  matchedLUIDs = set(self.lu_ids_and_names(luNamePattern).keys())
-            ftpart = PrettyLazyIteratorList(aset for sent in self.ft_sents() for aset in sent.annotationSet[1:] if luNamePattern is None or aset.get('luID','CXN_ASET') in matchedLUIDs)
+            ftpart = PrettyLazyIteratorList(
+                aset
+                for sent in self.ft_sents()
+                for aset in sent.annotationSet[1:]
+                if luNamePattern is None or aset.get("luID", "CXN_ASET") in matchedLUIDs
+            )
          else:
              ftpart = []
  
@@ -2143,22 +2383,26 @@ warnings(True) to display corpus consistency warnings when loading data
          be specified to retrieve sentences with both overt FEs (in either order).
          """
          if fe is None and fe2 is not None:
-            raise FramenetError('exemplars(..., fe=None, fe2=<value>) is not allowed')
+            raise FramenetError("exemplars(..., fe=None, fe2=<value>) is not allowed")
          elif fe is not None and fe2 is not None:
-            if not isinstance(fe2, string_types):
-                if isinstance(fe, string_types):
+            if not isinstance(fe2, str):
+                if isinstance(fe, str):
                      # fe2 is specific to a particular frame. swap fe and fe2 so fe is always used to determine the frame.
                      fe, fe2 = fe2, fe
-                elif fe.frame is not fe2.frame: # ensure frames match
-                    raise FramenetError('exemplars() call with inconsistent `fe` and `fe2` specification (frames must match)')
-        if frame is None and fe is not None and not isinstance(fe, string_types):
+                elif fe.frame is not fe2.frame:  # ensure frames match
+                    raise FramenetError(
+                        "exemplars() call with inconsistent `fe` and `fe2` specification (frames must match)"
+                    )
+        if frame is None and fe is not None and not isinstance(fe, str):
              frame = fe.frame
  
          # narrow down to frames matching criteria
  
-        lusByFrame = defaultdict(list)   # frame name -> matching LUs, if luNamePattern is specified
+        lusByFrame = defaultdict(
+            list
+        )  # frame name -> matching LUs, if luNamePattern is specified
          if frame is not None or luNamePattern is not None:
-            if frame is None or isinstance(frame, string_types):
+            if frame is None or isinstance(frame, str):
                  if luNamePattern is not None:
                      frames = set()
                      for lu in self.lus(luNamePattern, frame=frame):
@@ -2168,27 +2412,39 @@ warnings(True) to display corpus consistency warnings when loading data
                  else:
                      frames = self.frames(frame)
              else:
-                if isinstance(frame,int):
+                if isinstance(frame, int):
                      frames = [self.frame(frame)]
-                else:   # frame object
+                else:  # frame object
                      frames = [frame]
  
                  if luNamePattern is not None:
                      lusByFrame = {frame.name: self.lus(luNamePattern, frame=frame)}
  
              if fe is not None:  # narrow to frames that define this FE
-                if isinstance(fe, string_types):
-                    frames = PrettyLazyIteratorList(f for f in frames if fe in f.FE or any(re.search(fe, ffe, re.I) for ffe in f.FE.keys()))
+                if isinstance(fe, str):
+                    frames = PrettyLazyIteratorList(
+                        f
+                        for f in frames
+                        if fe in f.FE
+                        or any(re.search(fe, ffe, re.I) for ffe in f.FE.keys())
+                    )
                  else:
                      if fe.frame not in frames:
-                        raise FramenetError('exemplars() call with inconsistent `frame` and `fe` specification')
+                        raise FramenetError(
+                            "exemplars() call with inconsistent `frame` and `fe` specification"
+                        )
                      frames = [fe.frame]
  
-                if fe2 is not None: # narrow to frames that ALSO define this FE
-                    if isinstance(fe2, string_types):
-                        frames = PrettyLazyIteratorList(f for f in frames if fe2 in f.FE or any(re.search(fe2, ffe, re.I) for ffe in f.FE.keys()))
+                if fe2 is not None:  # narrow to frames that ALSO define this FE
+                    if isinstance(fe2, str):
+                        frames = PrettyLazyIteratorList(
+                            f
+                            for f in frames
+                            if fe2 in f.FE
+                            or any(re.search(fe2, ffe, re.I) for ffe in f.FE.keys())
+                        )
                      # else we already narrowed it to a single frame
-        else:   # frame, luNamePattern are None. fe, fe2 are None or strings
+        else:  # frame, luNamePattern are None. fe, fe2 are None or strings
              if fe is not None:
                  frames = {ffe.frame.ID for ffe in self.fes(fe)}
                  if fe2 is not None:
@@ -2203,15 +2459,29 @@ warnings(True) to display corpus consistency warnings when loading data
  
          def _matching_exs():
              for f in frames:
-                fes = fes2 = None   # FEs of interest
+                fes = fes2 = None  # FEs of interest
                  if fe is not None:
-                    fes = {ffe for ffe in f.FE.keys() if re.search(fe, ffe, re.I)} if isinstance(fe, string_types) else {fe.name}
+                    fes = (
+                        {ffe for ffe in f.FE.keys() if re.search(fe, ffe, re.I)}
+                        if isinstance(fe, str)
+                        else {fe.name}
+                    )
                      if fe2 is not None:
-                        fes2 = {ffe for ffe in f.FE.keys() if re.search(fe2, ffe, re.I)} if isinstance(fe2, string_types) else {fe2.name}
-
-                for lu in lusByFrame[f.name] if luNamePattern is not None else f.lexUnit.values():
+                        fes2 = (
+                            {ffe for ffe in f.FE.keys() if re.search(fe2, ffe, re.I)}
+                            if isinstance(fe2, str)
+                            else {fe2.name}
+                        )
+
+                for lu in (
+                    lusByFrame[f.name]
+                    if luNamePattern is not None
+                    else f.lexUnit.values()
+                ):
                      for ex in lu.exemplars:
-                        if (fes is None or self._exemplar_of_fes(ex, fes)) and (fes2 is None or self._exemplar_of_fes(ex, fes2)):
+                        if (fes is None or self._exemplar_of_fes(ex, fes)) and (
+                            fes2 is None or self._exemplar_of_fes(ex, fes2)
+                        ):
                              yield ex
  
          return PrettyLazyIteratorList(_matching_exs())
@@ -2224,9 +2494,9 @@ warnings(True) to display corpus consistency warnings when loading data
          If 'fes' is None, returns all overt FE names.
          """
          overtNames = set(list(zip(*ex.FE[0]))[2]) if ex.FE[0] else set()
-        if 'FE2' in ex:
+        if "FE2" in ex:
              overtNames |= set(list(zip(*ex.FE2[0]))[2]) if ex.FE2[0] else set()
-            if 'FE3' in ex:
+            if "FE3" in ex:
                  overtNames |= set(list(zip(*ex.FE3[0]))[2]) if ex.FE3[0] else set()
          return overtNames & fes if fes is not None else overtNames
  
@@ -2234,15 +2504,16 @@ warnings(True) to display corpus consistency warnings when loading data
          """
          Full-text annotation sentences, optionally filtered by document name.
          """
-        return PrettyLazyIteratorList(sent for d in self.docs(docNamePattern) for sent in d.sentence)
-
+        return PrettyLazyIteratorList(
+            sent for d in self.docs(docNamePattern) for sent in d.sentence
+        )
  
      def frame_relation_types(self):
          """
          Obtain a list of frame relation types.
  
          >>> from nltk.corpus import framenet as fn
-        >>> frts = list(fn.frame_relation_types())
+        >>> frts = sorted(fn.frame_relation_types(), key=itemgetter('ID'))
          >>> isinstance(frts, list)
          True
          >>> len(frts) in (9, 10)    # FN 1.5 and 1.7, resp.
@@ -2305,12 +2576,12 @@ warnings(True) to display corpus consistency warnings when loading data
  
          if relation_type is not None:
              if not isinstance(relation_type, dict):
-                type = [rt for rt in self.frame_relation_types() if rt.name==type][0]
-                assert isinstance(type,dict)
+                type = [rt for rt in self.frame_relation_types() if rt.name == type][0]
+                assert isinstance(type, dict)
  
          # lookup by 'frame'
          if frame is not None:
-            if isinstance(frame,dict) and 'frameRelations' in frame:
+            if isinstance(frame, dict) and "frameRelations" in frame:
                  rels = PrettyList(frame.frameRelations)
              else:
                  if not isinstance(frame, int):
@@ -2332,18 +2603,30 @@ warnings(True) to display corpus consistency warnings when loading data
          # filter by 'frame2'
          if frame2 is not None:
              if frame is None:
-                raise FramenetError("frame_relations(frame=None, frame2=<value>) is not allowed")
+                raise FramenetError(
+                    "frame_relations(frame=None, frame2=<value>) is not allowed"
+                )
              if not isinstance(frame2, int):
                  if isinstance(frame2, dict):
                      frame2 = frame2.ID
                  else:
                      frame2 = self.frame_by_name(frame2).ID
-            if frame==frame2:
-                raise FramenetError("The two frame arguments to frame_relations() must be different frames")
-            rels = [rel for rel in rels if rel.superFrame.ID==frame2 or rel.subFrame.ID==frame2]
-
-        return PrettyList(sorted(rels,
-                key=lambda frel: (frel.type.ID, frel.superFrameName, frel.subFrameName)))
+            if frame == frame2:
+                raise FramenetError(
+                    "The two frame arguments to frame_relations() must be different frames"
+                )
+            rels = [
+                rel
+                for rel in rels
+                if rel.superFrame.ID == frame2 or rel.subFrame.ID == frame2
+            ]
+
+        return PrettyList(
+            sorted(
+                rels,
+                key=lambda frel: (frel.type.ID, frel.superFrameName, frel.subFrameName),
+            )
+        )
  
      def fe_relations(self):
          """
@@ -2374,9 +2657,18 @@ warnings(True) to display corpus consistency warnings when loading data
          """
          if not self._ferel_idx:
              self._buildrelationindex()
-        return PrettyList(sorted(self._ferel_idx.values(),
-                key=lambda ferel: (ferel.type.ID, ferel.frameRelation.superFrameName,
-                    ferel.superFEName, ferel.frameRelation.subFrameName, ferel.subFEName)))
+        return PrettyList(
+            sorted(
+                self._ferel_idx.values(),
+                key=lambda ferel: (
+                    ferel.type.ID,
+                    ferel.frameRelation.superFrameName,
+                    ferel.superFEName,
+                    ferel.frameRelation.subFrameName,
+                    ferel.subFEName,
+                ),
+            )
+        )
  
      def semtypes(self):
          """
@@ -2394,7 +2686,9 @@ warnings(True) to display corpus consistency warnings when loading data
          """
          if not self._semtypes:
              self._loadsemtypes()
-        return PrettyList(self._semtypes[i] for i in self._semtypes if isinstance(i, int))
+        return PrettyList(
+            self._semtypes[i] for i in self._semtypes if isinstance(i, int)
+        )
  
      def _load_xml_attributes(self, d, elt):
          """
@@ -2420,8 +2714,13 @@ warnings(True) to display corpus consistency warnings when loading data
              return d
  
          # Ignore these attributes when loading attributes from an xml node
-        ignore_attrs = [ #'cBy', 'cDate', 'mDate', # <-- annotation metadata that could be of interest
-                        'xsi', 'schemaLocation', 'xmlns', 'bgColor', 'fgColor']
+        ignore_attrs = [  #'cBy', 'cDate', 'mDate', # <-- annotation metadata that could be of interest
+            "xsi",
+            "schemaLocation",
+            "xmlns",
+            "bgColor",
+            "fgColor",
+        ]
  
          for attr in attr_dict:
  
@@ -2445,35 +2744,35 @@ warnings(True) to display corpus consistency warnings when loading data
          """
  
          try:
-            '''
+            """
              # Look for boundary issues in markup. (Sometimes FEs are pluralized in definitions.)
              m = re.search(r'\w[<][^/]|[<][/][^>]+[>](s\w|[a-rt-z0-9])', data)
              if m:
                  print('Markup boundary:', data[max(0,m.start(0)-10):m.end(0)+10].replace('\n',' '), file=sys.stderr)
-            '''
-
-            data = data.replace('<t>', '')
-            data = data.replace('</t>', '')
-            data = re.sub('<fex name="[^"]+">', '', data)
-            data = data.replace('</fex>', '')
-            data = data.replace('<fen>', '')
-            data = data.replace('</fen>', '')
-            data = data.replace('<m>', '')
-            data = data.replace('</m>', '')
-            data = data.replace('<ment>', '')
-            data = data.replace('</ment>', '')
-            data = data.replace('<ex>', "'")
-            data = data.replace('</ex>', "'")
-            data = data.replace('<gov>', '')
-            data = data.replace('</gov>', '')
-            data = data.replace('<x>', '')
-            data = data.replace('</x>', '')
+            """
+
+            data = data.replace("<t>", "")
+            data = data.replace("</t>", "")
+            data = re.sub('<fex name="[^"]+">', "", data)
+            data = data.replace("</fex>", "")
+            data = data.replace("<fen>", "")
+            data = data.replace("</fen>", "")
+            data = data.replace("<m>", "")
+            data = data.replace("</m>", "")
+            data = data.replace("<ment>", "")
+            data = data.replace("</ment>", "")
+            data = data.replace("<ex>", "'")
+            data = data.replace("</ex>", "'")
+            data = data.replace("<gov>", "")
+            data = data.replace("</gov>", "")
+            data = data.replace("<x>", "")
+            data = data.replace("</x>", "")
  
              # Get rid of <def-root> and </def-root> tags
-            data = data.replace('<def-root>', '')
-            data = data.replace('</def-root>', '')
+            data = data.replace("<def-root>", "")
+            data = data.replace("</def-root>", "")
  
-            data = data.replace('\n', ' ')
+            data = data.replace("\n", " ")
          except AttributeError:
              pass
  
@@ -2500,14 +2799,16 @@ warnings(True) to display corpus consistency warnings when loading data
          corpid = ftinfo.ID
          retlist = []
          for sub in elt:
-            if sub.tag.endswith('document'):
+            if sub.tag.endswith("document"):
                  doc = self._load_xml_attributes(AttrDict(), sub)
-                if 'name' in doc:
+                if "name" in doc:
                      docname = doc.name
                  else:
                      docname = doc.description
                  doc.filename = "{0}__{1}.xml".format(corpname, docname)
-                doc.URL = self._fnweb_url + '/' + self._fulltext_dir + '/' + doc.filename
+                doc.URL = (
+                    self._fnweb_url + "/" + self._fulltext_dir + "/" + doc.filename
+                )
                  doc.corpname = corpname
                  doc.corpid = corpid
                  retlist.append(doc)
@@ -2518,58 +2819,70 @@ warnings(True) to display corpus consistency warnings when loading data
          """Load the info for a Frame from a frame xml file"""
          frinfo = self._load_xml_attributes(AttrDict(), elt)
  
-        frinfo['_type'] = 'frame'
-        frinfo['definition'] = ""
-        frinfo['definitionMarkup'] = ""
-        frinfo['FE'] = PrettyDict()
-        frinfo['FEcoreSets'] = []
-        frinfo['lexUnit'] = PrettyDict()
-        frinfo['semTypes'] = []
+        frinfo["_type"] = "frame"
+        frinfo["definition"] = ""
+        frinfo["definitionMarkup"] = ""
+        frinfo["FE"] = PrettyDict()
+        frinfo["FEcoreSets"] = []
+        frinfo["lexUnit"] = PrettyDict()
+        frinfo["semTypes"] = []
          for k in ignorekeys:
              if k in frinfo:
                  del frinfo[k]
  
          for sub in elt:
-            if sub.tag.endswith('definition') and 'definition' not in ignorekeys:
-                frinfo['definitionMarkup'] = sub.text
-                frinfo['definition'] = self._strip_tags(sub.text)
-            elif sub.tag.endswith('FE') and 'FE' not in ignorekeys:
+            if sub.tag.endswith("definition") and "definition" not in ignorekeys:
+                frinfo["definitionMarkup"] = sub.text
+                frinfo["definition"] = self._strip_tags(sub.text)
+            elif sub.tag.endswith("FE") and "FE" not in ignorekeys:
                  feinfo = self._handle_fe_elt(sub)
-                frinfo['FE'][feinfo.name] = feinfo
-                feinfo['frame'] = frinfo    # backpointer
-            elif sub.tag.endswith('FEcoreSet') and 'FEcoreSet' not in ignorekeys:
+                frinfo["FE"][feinfo.name] = feinfo
+                feinfo["frame"] = frinfo  # backpointer
+            elif sub.tag.endswith("FEcoreSet") and "FEcoreSet" not in ignorekeys:
                  coreset = self._handle_fecoreset_elt(sub)
                  # assumes all FEs have been loaded before coresets
-                frinfo['FEcoreSets'].append(PrettyList(frinfo['FE'][fe.name] for fe in coreset))
-            elif sub.tag.endswith('lexUnit') and 'lexUnit' not in ignorekeys:
+                frinfo["FEcoreSets"].append(
+                    PrettyList(frinfo["FE"][fe.name] for fe in coreset)
+                )
+            elif sub.tag.endswith("lexUnit") and "lexUnit" not in ignorekeys:
                  luentry = self._handle_framelexunit_elt(sub)
-                if luentry['status'] in self._bad_statuses:
+                if luentry["status"] in self._bad_statuses:
                      # problematic LU entry; ignore it
                      continue
-                luentry['frame'] = frinfo
-                luentry['URL'] = self._fnweb_url + '/' + self._lu_dir + '/' + "lu{0}.xml".format(luentry['ID'])
-                luentry['subCorpus'] = Future((lambda lu: lambda: self._lu_file(lu).subCorpus)(luentry))
-                luentry['exemplars'] = Future((lambda lu: lambda: self._lu_file(lu).exemplars)(luentry))
-                frinfo['lexUnit'][luentry.name] = luentry
+                luentry["frame"] = frinfo
+                luentry["URL"] = (
+                    self._fnweb_url
+                    + "/"
+                    + self._lu_dir
+                    + "/"
+                    + "lu{0}.xml".format(luentry["ID"])
+                )
+                luentry["subCorpus"] = Future(
+                    (lambda lu: lambda: self._lu_file(lu).subCorpus)(luentry)
+                )
+                luentry["exemplars"] = Future(
+                    (lambda lu: lambda: self._lu_file(lu).exemplars)(luentry)
+                )
+                frinfo["lexUnit"][luentry.name] = luentry
                  if not self._lu_idx:
                      self._buildluindex()
                  self._lu_idx[luentry.ID] = luentry
-            elif sub.tag.endswith('semType') and 'semTypes' not in ignorekeys:
+            elif sub.tag.endswith("semType") and "semTypes" not in ignorekeys:
                  semtypeinfo = self._load_xml_attributes(AttrDict(), sub)
-                frinfo['semTypes'].append(self.semtype(semtypeinfo.ID))
+                frinfo["semTypes"].append(self.semtype(semtypeinfo.ID))
  
-        frinfo['frameRelations'] = self.frame_relations(frame=frinfo)
+        frinfo["frameRelations"] = self.frame_relations(frame=frinfo)
  
          # resolve 'requires' and 'excludes' links between FEs of this frame
          for fe in frinfo.FE.values():
              if fe.requiresFE:
                  name, ID = fe.requiresFE.name, fe.requiresFE.ID
                  fe.requiresFE = frinfo.FE[name]
-                assert fe.requiresFE.ID==ID
+                assert fe.requiresFE.ID == ID
              if fe.excludesFE:
                  name, ID = fe.excludesFE.name, fe.excludesFE.ID
                  fe.excludesFE = frinfo.FE[name]
-                assert fe.excludesFE.ID==ID
+                assert fe.excludesFE.ID == ID
  
          return frinfo
  
@@ -2585,32 +2898,32 @@ warnings(True) to display corpus consistency warnings when loading data
      def _handle_framerelationtype_elt(self, elt, *args):
          """Load frame-relation element and its child fe-relation elements from frRelation.xml."""
          info = self._load_xml_attributes(AttrDict(), elt)
-        info['_type'] = 'framerelationtype'
-        info['frameRelations'] = PrettyList()
+        info["_type"] = "framerelationtype"
+        info["frameRelations"] = PrettyList()
  
          for sub in elt:
-            if sub.tag.endswith('frameRelation'):
+            if sub.tag.endswith("frameRelation"):
                  frel = self._handle_framerelation_elt(sub)
-                frel['type'] = info   # backpointer
+                frel["type"] = info  # backpointer
                  for ferel in frel.feRelations:
-                    ferel['type'] = info
-                info['frameRelations'].append(frel)
+                    ferel["type"] = info
+                info["frameRelations"].append(frel)
  
          return info
  
      def _handle_framerelation_elt(self, elt):
          """Load frame-relation element and its child fe-relation elements from frRelation.xml."""
          info = self._load_xml_attributes(AttrDict(), elt)
-        assert info['superFrameName']!=info['subFrameName'],(elt,info)
-        info['_type'] = 'framerelation'
-        info['feRelations'] = PrettyList()
+        assert info["superFrameName"] != info["subFrameName"], (elt, info)
+        info["_type"] = "framerelation"
+        info["feRelations"] = PrettyList()
  
          for sub in elt:
-            if sub.tag.endswith('FERelation'):
+            if sub.tag.endswith("FERelation"):
                  ferel = self._handle_elt(sub)
-                ferel['_type'] = 'ferelation'
-                ferel['frameRelation'] = info   # backpointer
-                info['feRelations'].append(ferel)
+                ferel["_type"] = "ferelation"
+                ferel["frameRelation"] = info  # backpointer
+                info["feRelations"].append(ferel)
  
          return info
  
@@ -2620,16 +2933,16 @@ warnings(True) to display corpus consistency warnings when loading data
          element (which we ignore here) and a bunch of 'sentence'
          elements."""
          info = AttrDict()
-        info['_type'] = 'fulltext_annotation'
-        info['sentence'] = []
+        info["_type"] = "fulltext_annotation"
+        info["sentence"] = []
  
          for sub in elt:
-            if sub.tag.endswith('header'):
+            if sub.tag.endswith("header"):
                  continue  # not used
-            elif sub.tag.endswith('sentence'):
+            elif sub.tag.endswith("sentence"):
                  s = self._handle_fulltext_sentence_elt(sub)
                  s.doc = info
-                info['sentence'].append(s)
+                info["sentence"].append(s)
  
          return info
  
@@ -2638,37 +2951,48 @@ warnings(True) to display corpus consistency warnings when loading data
          'sentence' element contains a "text" and "annotationSet" sub
          elements."""
          info = self._load_xml_attributes(AttrDict(), elt)
-        info['_type'] = "fulltext_sentence"
-        info['annotationSet'] = []
-        info['targets'] = []
+        info["_type"] = "fulltext_sentence"
+        info["annotationSet"] = []
+        info["targets"] = []
          target_spans = set()
-        info['_ascii'] = types.MethodType(_annotation_ascii, info)  # attach a method for this instance
-        info['text'] = ""
+        info["_ascii"] = types.MethodType(
+            _annotation_ascii, info
+        )  # attach a method for this instance
+        info["text"] = ""
  
          for sub in elt:
-            if sub.tag.endswith('text'):
-                info['text'] = self._strip_tags(sub.text)
-            elif sub.tag.endswith('annotationSet'):
-                a = self._handle_fulltextannotationset_elt(sub, is_pos=(len(info['annotationSet'])==0))
-                if 'cxnID' in a: # ignoring construction annotations for now
+            if sub.tag.endswith("text"):
+                info["text"] = self._strip_tags(sub.text)
+            elif sub.tag.endswith("annotationSet"):
+                a = self._handle_fulltextannotationset_elt(
+                    sub, is_pos=(len(info["annotationSet"]) == 0)
+                )
+                if "cxnID" in a:  # ignoring construction annotations for now
                      continue
                  a.sent = info
                  a.text = info.text
-                info['annotationSet'].append(a)
-                if 'Target' in a:
+                info["annotationSet"].append(a)
+                if "Target" in a:
                      for tspan in a.Target:
                          if tspan in target_spans:
-                            self._warn('Duplicate target span "{0}"'.format(info.text[slice(*tspan)]),
-                                tspan, 'in sentence',info['ID'], info.text)
+                            self._warn(
+                                'Duplicate target span "{0}"'.format(
+                                    info.text[slice(*tspan)]
+                                ),
+                                tspan,
+                                "in sentence",
+                                info["ID"],
+                                info.text,
+                            )
                              # this can happen in cases like "chemical and biological weapons"
                              # being annotated as "chemical weapons" and "biological weapons"
                          else:
                              target_spans.add(tspan)
-                    info['targets'].append((a.Target, a.luName, a.frameName))
+                    info["targets"].append((a.Target, a.luName, a.frameName))
  
-        assert info['annotationSet'][0].status=='UNANN'
-        info['POS'] = info['annotationSet'][0].POS
-        info['POS_tagset'] = info['annotationSet'][0].POS_tagset
+        assert info["annotationSet"][0].status == "UNANN"
+        info["POS"] = info["annotationSet"][0].POS
+        info["POS_tagset"] = info["annotationSet"][0].POS_tagset
          return info
  
      def _handle_fulltextannotationset_elt(self, elt, is_pos=False):
@@ -2677,58 +3001,62 @@ warnings(True) to display corpus consistency warnings when loading data
  
          info = self._handle_luannotationset_elt(elt, is_pos=is_pos)
          if not is_pos:
-            info['_type'] = 'fulltext_annotationset'
-            if 'cxnID' not in info: # ignoring construction annotations for now
-                info['LU'] = self.lu(info.luID, luName=info.luName, frameID=info.frameID, frameName=info.frameName)
-                info['frame'] = info.LU.frame
+            info["_type"] = "fulltext_annotationset"
+            if "cxnID" not in info:  # ignoring construction annotations for now
+                info["LU"] = self.lu(
+                    info.luID,
+                    luName=info.luName,
+                    frameID=info.frameID,
+                    frameName=info.frameName,
+                )
+                info["frame"] = info.LU.frame
          return info
  
      def _handle_fulltextlayer_elt(self, elt):
          """Load information from the given 'layer' element. Each
          'layer' contains several "label" elements."""
          info = self._load_xml_attributes(AttrDict(), elt)
-        info['_type'] = 'layer'
-        info['label'] = []
+        info["_type"] = "layer"
+        info["label"] = []
  
          for sub in elt:
-            if sub.tag.endswith('label'):
+            if sub.tag.endswith("label"):
                  l = self._load_xml_attributes(AttrDict(), sub)
-                info['label'].append(l)
+                info["label"].append(l)
  
          return info
  
      def _handle_framelexunit_elt(self, elt):
          """Load the lexical unit info from an xml element in a frame's xml file."""
          luinfo = AttrDict()
-        luinfo['_type'] = 'lu'
+        luinfo["_type"] = "lu"
          luinfo = self._load_xml_attributes(luinfo, elt)
          luinfo["definition"] = ""
          luinfo["definitionMarkup"] = ""
          luinfo["sentenceCount"] = PrettyDict()
-        luinfo['lexemes'] = PrettyList()   # multiword LUs have multiple lexemes
-        luinfo['semTypes'] = PrettyList()  # an LU can have multiple semtypes
+        luinfo["lexemes"] = PrettyList()  # multiword LUs have multiple lexemes
+        luinfo["semTypes"] = PrettyList()  # an LU can have multiple semtypes
  
          for sub in elt:
-            if sub.tag.endswith('definition'):
-                luinfo['definitionMarkup'] = sub.text
-                luinfo['definition'] = self._strip_tags(sub.text)
-            elif sub.tag.endswith('sentenceCount'):
-                luinfo['sentenceCount'] = self._load_xml_attributes(
-                    PrettyDict(), sub)
-            elif sub.tag.endswith('lexeme'):
+            if sub.tag.endswith("definition"):
+                luinfo["definitionMarkup"] = sub.text
+                luinfo["definition"] = self._strip_tags(sub.text)
+            elif sub.tag.endswith("sentenceCount"):
+                luinfo["sentenceCount"] = self._load_xml_attributes(PrettyDict(), sub)
+            elif sub.tag.endswith("lexeme"):
                  lexemeinfo = self._load_xml_attributes(PrettyDict(), sub)
-                if not isinstance(lexemeinfo.name, string_types):
+                if not isinstance(lexemeinfo.name, str):
                      # some lexeme names are ints by default: e.g.,
                      # thousand.num has lexeme with name="1000"
                      lexemeinfo.name = str(lexemeinfo.name)
-                luinfo['lexemes'].append(lexemeinfo)
-            elif sub.tag.endswith('semType'):
+                luinfo["lexemes"].append(lexemeinfo)
+            elif sub.tag.endswith("semType"):
                  semtypeinfo = self._load_xml_attributes(PrettyDict(), sub)
-                luinfo['semTypes'].append(self.semtype(semtypeinfo.ID))
+                luinfo["semTypes"].append(self.semtype(semtypeinfo.ID))
  
          # sort lexemes by 'order' attribute
          # otherwise, e.g., 'write down.v' may have lexemes in wrong order
-        luinfo['lexemes'].sort(key=lambda x: x.order)
+        luinfo["lexemes"].sort(key=lambda x: x.order)
  
          return luinfo
  
@@ -2739,33 +3067,33 @@ warnings(True) to display corpus consistency warnings when loading data
          (which are not included in frame files).
          """
          luinfo = self._load_xml_attributes(AttrDict(), elt)
-        luinfo['_type'] = 'lu'
-        luinfo['definition'] = ""
-        luinfo['definitionMarkup'] = ""
-        luinfo['subCorpus'] = PrettyList()
-        luinfo['lexemes'] = PrettyList()   # multiword LUs have multiple lexemes
-        luinfo['semTypes'] = PrettyList()  # an LU can have multiple semtypes
+        luinfo["_type"] = "lu"
+        luinfo["definition"] = ""
+        luinfo["definitionMarkup"] = ""
+        luinfo["subCorpus"] = PrettyList()
+        luinfo["lexemes"] = PrettyList()  # multiword LUs have multiple lexemes
+        luinfo["semTypes"] = PrettyList()  # an LU can have multiple semtypes
          for k in ignorekeys:
              if k in luinfo:
                  del luinfo[k]
  
          for sub in elt:
-            if sub.tag.endswith('header'):
+            if sub.tag.endswith("header"):
                  continue  # not used
-            elif sub.tag.endswith('valences'):
+            elif sub.tag.endswith("valences"):
                  continue  # not used
-            elif sub.tag.endswith('definition') and 'definition' not in ignorekeys:
-                luinfo['definitionMarkup'] = sub.text
-                luinfo['definition'] = self._strip_tags(sub.text)
-            elif sub.tag.endswith('subCorpus') and 'subCorpus' not in ignorekeys:
+            elif sub.tag.endswith("definition") and "definition" not in ignorekeys:
+                luinfo["definitionMarkup"] = sub.text
+                luinfo["definition"] = self._strip_tags(sub.text)
+            elif sub.tag.endswith("subCorpus") and "subCorpus" not in ignorekeys:
                  sc = self._handle_lusubcorpus_elt(sub)
                  if sc is not None:
-                    luinfo['subCorpus'].append(sc)
-            elif sub.tag.endswith('lexeme') and 'lexeme' not in ignorekeys:
-                luinfo['lexemes'].append(self._load_xml_attributes(PrettyDict(), sub))
-            elif sub.tag.endswith('semType') and 'semType' not in ignorekeys:
+                    luinfo["subCorpus"].append(sc)
+            elif sub.tag.endswith("lexeme") and "lexeme" not in ignorekeys:
+                luinfo["lexemes"].append(self._load_xml_attributes(PrettyDict(), sub))
+            elif sub.tag.endswith("semType") and "semType" not in ignorekeys:
                  semtypeinfo = self._load_xml_attributes(AttrDict(), sub)
-                luinfo['semTypes'].append(self.semtype(semtypeinfo.ID))
+                luinfo["semTypes"].append(self.semtype(semtypeinfo.ID))
  
          return luinfo
  
@@ -2773,167 +3101,221 @@ warnings(True) to display corpus consistency warnings when loading data
          """Load a subcorpus of a lexical unit from the given xml."""
          sc = AttrDict()
          try:
-            sc['name'] = elt.get('name')
+            sc["name"] = elt.get("name")
          except AttributeError:
              return None
-        sc['_type'] = "lusubcorpus"
-        sc['sentence'] = []
+        sc["_type"] = "lusubcorpus"
+        sc["sentence"] = []
  
          for sub in elt:
-            if sub.tag.endswith('sentence'):
+            if sub.tag.endswith("sentence"):
                  s = self._handle_lusentence_elt(sub)
                  if s is not None:
-                    sc['sentence'].append(s)
+                    sc["sentence"].append(s)
  
          return sc
  
      def _handle_lusentence_elt(self, elt):
          """Load a sentence from a subcorpus of an LU from xml."""
          info = self._load_xml_attributes(AttrDict(), elt)
-        info['_type'] = 'lusentence'
-        info['annotationSet'] = []
-        info['_ascii'] = types.MethodType(_annotation_ascii, info)  # attach a method for this instance
+        info["_type"] = "lusentence"
+        info["annotationSet"] = []
+        info["_ascii"] = types.MethodType(
+            _annotation_ascii, info
+        )  # attach a method for this instance
          for sub in elt:
-            if sub.tag.endswith('text'):
-                info['text'] = self._strip_tags(sub.text)
-            elif sub.tag.endswith('annotationSet'):
-                annset = self._handle_luannotationset_elt(sub, is_pos=(len(info['annotationSet'])==0))
+            if sub.tag.endswith("text"):
+                info["text"] = self._strip_tags(sub.text)
+            elif sub.tag.endswith("annotationSet"):
+                annset = self._handle_luannotationset_elt(
+                    sub, is_pos=(len(info["annotationSet"]) == 0)
+                )
                  if annset is not None:
-                    assert annset.status=='UNANN' or 'FE' in annset,annset
-                    if annset.status!='UNANN':
-                        info['frameAnnotation'] = annset
+                    assert annset.status == "UNANN" or "FE" in annset, annset
+                    if annset.status != "UNANN":
+                        info["frameAnnotation"] = annset
                      # copy layer info up to current level
-                    for k in ('Target', 'FE', 'FE2', 'FE3', 'GF', 'PT', 'POS', 'POS_tagset',
-                              'Other', 'Sent', 'Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'):
+                    for k in (
+                        "Target",
+                        "FE",
+                        "FE2",
+                        "FE3",
+                        "GF",
+                        "PT",
+                        "POS",
+                        "POS_tagset",
+                        "Other",
+                        "Sent",
+                        "Verb",
+                        "Noun",
+                        "Adj",
+                        "Adv",
+                        "Prep",
+                        "Scon",
+                        "Art",
+                    ):
                          if k in annset:
                              info[k] = annset[k]
-                    info['annotationSet'].append(annset)
-                    annset['sent'] = info
-                    annset['text'] = info.text
+                    info["annotationSet"].append(annset)
+                    annset["sent"] = info
+                    annset["text"] = info.text
          return info
  
      def _handle_luannotationset_elt(self, elt, is_pos=False):
          """Load an annotation set from a sentence in an subcorpus of an LU"""
          info = self._load_xml_attributes(AttrDict(), elt)
-        info['_type'] = 'posannotationset' if is_pos else 'luannotationset'
-        info['layer'] = []
-        info['_ascii'] = types.MethodType(_annotation_ascii, info)  # attach a method for this instance
+        info["_type"] = "posannotationset" if is_pos else "luannotationset"
+        info["layer"] = []
+        info["_ascii"] = types.MethodType(
+            _annotation_ascii, info
+        )  # attach a method for this instance
  
-        if 'cxnID' in info: # ignoring construction annotations for now.
+        if "cxnID" in info:  # ignoring construction annotations for now.
              return info
  
          for sub in elt:
-            if sub.tag.endswith('layer'):
+            if sub.tag.endswith("layer"):
                  l = self._handle_lulayer_elt(sub)
                  if l is not None:
                      overt = []
-                    ni = {} # null instantiations
+                    ni = {}  # null instantiations
  
-                    info['layer'].append(l)
+                    info["layer"].append(l)
                      for lbl in l.label:
-                        if 'start' in lbl:
-                            thespan = (lbl.start,lbl.end+1,lbl.name)
-                            if l.name not in ('Sent','Other'):  # 'Sent' and 'Other' layers sometimes contain accidental duplicate spans
-                                assert thespan not in overt,(info.ID,l.name,thespan)
+                        if "start" in lbl:
+                            thespan = (lbl.start, lbl.end + 1, lbl.name)
+                            if l.name not in (
+                                "Sent",
+                                "Other",
+                            ):  # 'Sent' and 'Other' layers sometimes contain accidental duplicate spans
+                                assert thespan not in overt, (info.ID, l.name, thespan)
                              overt.append(thespan)
-                        else: # null instantiation
+                        else:  # null instantiation
                              if lbl.name in ni:
-                                self._warn('FE with multiple NI entries:', lbl.name, ni[lbl.name], lbl.itype)
+                                self._warn(
+                                    "FE with multiple NI entries:",
+                                    lbl.name,
+                                    ni[lbl.name],
+                                    lbl.itype,
+                                )
                              else:
                                  ni[lbl.name] = lbl.itype
                      overt = sorted(overt)
  
-                    if l.name=='Target':
+                    if l.name == "Target":
                          if not overt:
-                            self._warn('Skipping empty Target layer in annotation set ID={0}'.format(info.ID))
+                            self._warn(
+                                "Skipping empty Target layer in annotation set ID={0}".format(
+                                    info.ID
+                                )
+                            )
                              continue
-                        assert all(lblname=='Target' for i,j,lblname in overt)
-                        if 'Target' in info:
-                            self._warn('Annotation set {0} has multiple Target layers'.format(info.ID))
+                        assert all(lblname == "Target" for i, j, lblname in overt)
+                        if "Target" in info:
+                            self._warn(
+                                "Annotation set {0} has multiple Target layers".format(
+                                    info.ID
+                                )
+                            )
                          else:
-                            info['Target'] = [(i,j) for (i,j,_) in overt]
-                    elif l.name=='FE':
-                        if l.rank==1:
-                            assert 'FE' not in info
-                            info['FE'] = (overt, ni)
-                            #assert False,info
+                            info["Target"] = [(i, j) for (i, j, _) in overt]
+                    elif l.name == "FE":
+                        if l.rank == 1:
+                            assert "FE" not in info
+                            info["FE"] = (overt, ni)
+                            # assert False,info
                          else:
                              # sometimes there are 3 FE layers! e.g. Change_position_on_a_scale.fall.v
-                            assert 2<=l.rank<=3,l.rank
-                            k = 'FE'+str(l.rank)
+                            assert 2 <= l.rank <= 3, l.rank
+                            k = "FE" + str(l.rank)
                              assert k not in info
                              info[k] = (overt, ni)
-                    elif l.name in ('GF', 'PT'):
-                        assert l.rank==1
+                    elif l.name in ("GF", "PT"):
+                        assert l.rank == 1
                          info[l.name] = overt
-                    elif l.name in ('BNC', 'PENN'):
-                        assert l.rank==1
-                        info['POS'] = overt
-                        info['POS_tagset'] = l.name
+                    elif l.name in ("BNC", "PENN"):
+                        assert l.rank == 1
+                        info["POS"] = overt
+                        info["POS_tagset"] = l.name
                      else:
                          if is_pos:
-                            if l.name not in ('NER', 'WSL'):
-                                self._warn('Unexpected layer in sentence annotationset:', l.name)
+                            if l.name not in ("NER", "WSL"):
+                                self._warn(
+                                    "Unexpected layer in sentence annotationset:",
+                                    l.name,
+                                )
                          else:
-                            if l.name not in ('Sent', 'Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art', 'Other'):
-                                self._warn('Unexpected layer in frame annotationset:', l.name)
+                            if l.name not in (
+                                "Sent",
+                                "Verb",
+                                "Noun",
+                                "Adj",
+                                "Adv",
+                                "Prep",
+                                "Scon",
+                                "Art",
+                                "Other",
+                            ):
+                                self._warn(
+                                    "Unexpected layer in frame annotationset:", l.name
+                                )
                          info[l.name] = overt
-        if not is_pos and 'cxnID' not in info:
-            if 'Target' not in info:
-                self._warn('Missing target in annotation set ID={0}'.format(info.ID))
-            assert 'FE' in info
-            if 'FE3' in info:
-                assert 'FE2' in info
+        if not is_pos and "cxnID" not in info:
+            if "Target" not in info:
+                self._warn("Missing target in annotation set ID={0}".format(info.ID))
+            assert "FE" in info
+            if "FE3" in info:
+                assert "FE2" in info
  
          return info
  
      def _handle_lulayer_elt(self, elt):
          """Load a layer from an annotation set"""
          layer = self._load_xml_attributes(AttrDict(), elt)
-        layer['_type'] = 'lulayer'
-        layer['label'] = []
+        layer["_type"] = "lulayer"
+        layer["label"] = []
  
          for sub in elt:
-            if sub.tag.endswith('label'):
+            if sub.tag.endswith("label"):
                  l = self._load_xml_attributes(AttrDict(), sub)
                  if l is not None:
-                    layer['label'].append(l)
+                    layer["label"].append(l)
          return layer
  
      def _handle_fe_elt(self, elt):
          feinfo = self._load_xml_attributes(AttrDict(), elt)
-        feinfo['_type'] = 'fe'
-        feinfo['definition'] = ""
-        feinfo['definitionMarkup'] = ""
-        feinfo['semType'] = None
-        feinfo['requiresFE'] = None
-        feinfo['excludesFE'] = None
+        feinfo["_type"] = "fe"
+        feinfo["definition"] = ""
+        feinfo["definitionMarkup"] = ""
+        feinfo["semType"] = None
+        feinfo["requiresFE"] = None
+        feinfo["excludesFE"] = None
          for sub in elt:
-            if sub.tag.endswith('definition'):
-                feinfo['definitionMarkup'] = sub.text
-                feinfo['definition'] = self._strip_tags(sub.text)
-            elif sub.tag.endswith('semType'):
+            if sub.tag.endswith("definition"):
+                feinfo["definitionMarkup"] = sub.text
+                feinfo["definition"] = self._strip_tags(sub.text)
+            elif sub.tag.endswith("semType"):
                  stinfo = self._load_xml_attributes(AttrDict(), sub)
-                feinfo['semType'] = self.semtype(stinfo.ID)
-            elif sub.tag.endswith('requiresFE'):
-                feinfo['requiresFE'] = self._load_xml_attributes(AttrDict(), sub)
-            elif sub.tag.endswith('excludesFE'):
-                feinfo['excludesFE'] = self._load_xml_attributes(AttrDict(), sub)
+                feinfo["semType"] = self.semtype(stinfo.ID)
+            elif sub.tag.endswith("requiresFE"):
+                feinfo["requiresFE"] = self._load_xml_attributes(AttrDict(), sub)
+            elif sub.tag.endswith("excludesFE"):
+                feinfo["excludesFE"] = self._load_xml_attributes(AttrDict(), sub)
  
          return feinfo
  
      def _handle_semtype_elt(self, elt, tagspec=None):
          semt = self._load_xml_attributes(AttrDict(), elt)
-        semt['_type'] = 'semtype'
-        semt['superType'] = None
-        semt['subTypes'] = PrettyList()
+        semt["_type"] = "semtype"
+        semt["superType"] = None
+        semt["subTypes"] = PrettyList()
          for sub in elt:
              if sub.text is not None:
-                semt['definitionMarkup'] = sub.text
-                semt['definition'] = self._strip_tags(sub.text)
+                semt["definitionMarkup"] = sub.text
+                semt["definition"] = self._strip_tags(sub.text)
              else:
                  supertypeinfo = self._load_xml_attributes(AttrDict(), sub)
-                semt['superType'] = supertypeinfo
+                semt["superType"] = supertypeinfo
                  # the supertype may not have been loaded yet
  
          return semt
@@ -2950,24 +3332,25 @@ def demo():
      # buildindexes(). We do this here just for demo purposes. If the
      # indexes are not built explicitely, they will be built as needed.
      #
-    print('Building the indexes...')
+    print("Building the indexes...")
      fn.buildindexes()
  
      #
      # Get some statistics about the corpus
      #
-    print('Number of Frames:', len(fn.frames()))
-    print('Number of Lexical Units:', len(fn.lus()))
-    print('Number of annotated documents:', len(fn.docs()))
+    print("Number of Frames:", len(fn.frames()))
+    print("Number of Lexical Units:", len(fn.lus()))
+    print("Number of annotated documents:", len(fn.docs()))
      print()
  
      #
      # Frames
      #
-    print('getting frames whose name matches the (case insensitive) regex: "(?i)medical"')
-    medframes = fn.frames(r'(?i)medical')
      print(
-        'Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes)))
+        'getting frames whose name matches the (case insensitive) regex: "(?i)medical"'
+    )
+    medframes = fn.frames(r"(?i)medical")
+    print('Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes)))
      print([(f.name, f.ID) for f in medframes])
  
      #
@@ -2980,26 +3363,28 @@ def demo():
      # get the frame relations
      #
      print(
-        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(m_frame.name,
-                                                                        m_frame.ID),
-        len(m_frame.frameRelations))
+        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(
+            m_frame.name, m_frame.ID
+        ),
+        len(m_frame.frameRelations),
+    )
      for fr in m_frame.frameRelations:
-        print('   ', fr)
+        print("   ", fr)
  
      #
      # get the names of the Frame Elements
      #
      print(
          '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
-        len(m_frame.FE))
-    print('   ', [x for x in m_frame.FE])
+        len(m_frame.FE),
+    )
+    print("   ", [x for x in m_frame.FE])
  
      #
      # get the names of the "Core" Frame Elements
      #
-    print(
-        '\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name))
-    print('   ', [x.name for x in m_frame.FE.values() if x.coreType == "Core"])
+    print('\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name))
+    print("   ", [x.name for x in m_frame.FE.values() if x.coreType == "Core"])
  
      #
      # get all of the Lexical Units that are incorporated in the
@@ -3007,29 +3392,35 @@ def demo():
      #
      print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
      m_frame = fn.frame(239)
-    ailment_lus = [x for x in m_frame.lexUnit.values() if 'incorporatedFE' in x and x.incorporatedFE == 'Ailment']
-    print('   ', [x.name for x in ailment_lus])
+    ailment_lus = [
+        x
+        for x in m_frame.lexUnit.values()
+        if "incorporatedFE" in x and x.incorporatedFE == "Ailment"
+    ]
+    print("   ", [x.name for x in ailment_lus])
  
      #
      # get all of the Lexical Units for the frame
      #
-    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
-          len(m_frame.lexUnit))
-    print('  ', [x.name for x in m_frame.lexUnit.values()][:5], '...')
+    print(
+        '\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
+        len(m_frame.lexUnit),
+    )
+    print("  ", [x.name for x in m_frame.lexUnit.values()][:5], "...")
  
      #
      # get basic info on the second LU in the frame
      #
-    tmp_id = m_frame.lexUnit['ailment.n'].ID  # grab the id of the specified LU
+    tmp_id = m_frame.lexUnit["ailment.n"].ID  # grab the id of the specified LU
      luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
-    print('\nInformation on the LU: {0}'.format(luinfo.name))
+    print("\nInformation on the LU: {0}".format(luinfo.name))
      pprint(luinfo)
  
      #
      # Get a list of all of the corpora used for fulltext annotation
      #
-    print('\nNames of all of the corpora used for fulltext annotation:')
-    allcorpora = set([x.corpname for x in fn.docs_metadata()])
+    print("\nNames of all of the corpora used for fulltext annotation:")
+    allcorpora = set(x.corpname for x in fn.docs_metadata())
      pprint(list(allcorpora))
  
      #
@@ -3037,8 +3428,7 @@ def demo():
      #
      firstcorp = list(allcorpora)[0]
      firstcorp_docs = fn.docs(firstcorp)
-    print(
-        '\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp))
+    print('\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp))
      pprint([x.filename for x in firstcorp_docs])
  
      #
@@ -3050,8 +3440,11 @@ def demo():
      #       lemmas to frames because each time frames_by_lemma() is
      #       called, it has to search through ALL of the frame XML files
      #       in the db.
-    print('\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":')
-    pprint(fn.frames_by_lemma(r'^run.v$'))
+    print(
+        '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":'
+    )
+    pprint(fn.frames_by_lemma(r"^run.v$"))
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/corpus/reader/framenet.pyc b/nlp_resource_data/nltk/corpus/reader/framenet.pyc

deleted file mode 100755 (executable)

index 3b56c32..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/framenet.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/ieer.py b/nlp_resource_data/nltk/corpus/reader/ieer.py

old mode 100755 (executable)

new mode 100644 (file)

index 91b9425..6f80742
--- a/nlp_resource_data/nltk/corpus/reader/ieer.py
+++ b/nlp_resource_data/nltk/corpus/reader/ieer.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: IEER Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -20,32 +20,28 @@ and filenames were shortened.
  The corpus contains the following files: APW_19980314, APW_19980424,
  APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
  """
-from __future__ import unicode_literals
-
-from six import string_types
  
  import nltk
-from nltk import compat
  from nltk.corpus.reader.api import *
  
  #: A dictionary whose keys are the names of documents in this corpus;
  #: and whose values are descriptions of those documents' contents.
  titles = {
-    'APW_19980314': 'Associated Press Weekly, 14 March 1998',
-    'APW_19980424': 'Associated Press Weekly, 24 April 1998',
-    'APW_19980429': 'Associated Press Weekly, 29 April 1998',
-    'NYT_19980315': 'New York Times, 15 March 1998',
-    'NYT_19980403': 'New York Times, 3 April 1998',
-    'NYT_19980407': 'New York Times, 7 April 1998',
-    }
+    "APW_19980314": "Associated Press Weekly, 14 March 1998",
+    "APW_19980424": "Associated Press Weekly, 24 April 1998",
+    "APW_19980429": "Associated Press Weekly, 29 April 1998",
+    "NYT_19980315": "New York Times, 15 March 1998",
+    "NYT_19980403": "New York Times, 3 April 1998",
+    "NYT_19980407": "New York Times, 7 April 1998",
+}
  
  #: A list of all documents in this corpus.
  documents = sorted(titles)
  
-@compat.python_2_unicode_compatible
+
+
  class IEERDocument(object):
-    def __init__(self, text, docno=None, doctype=None,
-                 date_time=None, headline=''):
+    def __init__(self, text, docno=None, doctype=None, date_time=None, headline=""):
          self.text = text
          self.docno = docno
          self.doctype = doctype
@@ -54,38 +50,51 @@ class IEERDocument(object):
  
      def __repr__(self):
          if self.headline:
-            headline = ' '.join(self.headline.leaves())
+            headline = " ".join(self.headline.leaves())
          else:
-            headline = ' '.join([w for w in self.text.leaves()
-                                 if w[:1] != '<'][:12])+'...'
+            headline = (
+                " ".join([w for w in self.text.leaves() if w[:1] != "<"][:12]) + "..."
+            )
          if self.docno is not None:
-            return '<IEERDocument %s: %r>' % (self.docno, headline)
+            return "<IEERDocument %s: %r>" % (self.docno, headline)
          else:
-            return '<IEERDocument: %r>' % headline
+            return "<IEERDocument: %r>" % headline
+
  
  class IEERCorpusReader(CorpusReader):
      """
      """
+
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def docs(self, fileids=None):
-        return concat([StreamBackedCorpusView(fileid, self._read_block,
-                                              encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def parsed_docs(self, fileids=None):
-        return concat([StreamBackedCorpusView(fileid,
-                                              self._read_parsed_block,
-                                              encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
-
-    def _read_parsed_block(self,stream):
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_parsed_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def _read_parsed_block(self, stream):
          # TODO: figure out while empty documents are being returned
-        return [self._parse(doc) for doc in self._read_block(stream)
-                if self._parse(doc).docno is not None]
+        return [
+            self._parse(doc)
+            for doc in self._read_block(stream)
+            if self._parse(doc).docno is not None
+        ]
  
      def _parse(self, doc):
          val = nltk.chunk.ieerstr2tree(doc, root_label="DOCUMENT")
@@ -99,14 +108,18 @@ class IEERCorpusReader(CorpusReader):
          # Skip any preamble.
          while True:
              line = stream.readline()
-            if not line: break
-            if line.strip() == '<DOC>': break
+            if not line:
+                break
+            if line.strip() == "<DOC>":
+                break
          out.append(line)
          # Read the document
          while True:
              line = stream.readline()
-            if not line: break
+            if not line:
+                break
              out.append(line)
-            if line.strip() == '</DOC>': break
+            if line.strip() == "</DOC>":
+                break
          # Return the document
-        return ['\n'.join(out)]
+        return ["\n".join(out)]
diff --git a/nlp_resource_data/nltk/corpus/reader/ieer.pyc b/nlp_resource_data/nltk/corpus/reader/ieer.pyc

deleted file mode 100755 (executable)

index 9eed214..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/ieer.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/indian.py b/nlp_resource_data/nltk/corpus/reader/indian.py

old mode 100755 (executable)

new mode 100644 (file)

index 1c50547..0788b54
--- a/nlp_resource_data/nltk/corpus/reader/indian.py
+++ b/nlp_resource_data/nltk/corpus/reader/indian.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -18,54 +18,69 @@ Contents:
    - Telugu: IIIT Hyderabad
  """
  
-from six import string_types
-
  from nltk.tag import str2tuple, map_tag
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class IndianCorpusReader(CorpusReader):
      """
      List of words, one per line.  Blank lines are ignored.
      """
+
      def words(self, fileids=None):
-        return concat([IndianCorpusView(fileid, enc,
-                                        False, False)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, False, False)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_words(self, fileids=None, tagset=None):
          if tagset and tagset != self._tagset:
              tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
          else:
              tag_mapping_function = None
-        return concat([IndianCorpusView(fileid, enc,
-                                        True, False, tag_mapping_function)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, True, False, tag_mapping_function)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
-        return concat([IndianCorpusView(fileid, enc,
-                                        False, True)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, False, True)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_sents(self, fileids=None, tagset=None):
          if tagset and tagset != self._tagset:
              tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
          else:
              tag_mapping_function = None
-        return concat([IndianCorpusView(fileid, enc,
-                                        True, True, tag_mapping_function)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, True, True, tag_mapping_function)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
  
  class IndianCorpusView(StreamBackedCorpusView):
-    def __init__(self, corpus_file, encoding, tagged,
-                 group_by_sent, tag_mapping_function=None):
+    def __init__(
+        self, corpus_file, encoding, tagged, group_by_sent, tag_mapping_function=None
+    ):
          self._tagged = tagged
          self._group_by_sent = group_by_sent
          self._tag_mapping_function = tag_mapping_function
@@ -73,12 +88,13 @@ class IndianCorpusView(StreamBackedCorpusView):
  
      def read_block(self, stream):
          line = stream.readline()
-        if line.startswith('<'):
+        if line.startswith("<"):
              return []
-        sent = [str2tuple(word, sep='_') for word in line.split()]
+        sent = [str2tuple(word, sep="_") for word in line.split()]
          if self._tag_mapping_function:
-            sent = [(w, self._tag_mapping_function(t)) for (w,t) in sent]
-        if not self._tagged: sent = [w for (w,t) in sent]
+            sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent]
+        if not self._tagged:
+            sent = [w for (w, t) in sent]
          if self._group_by_sent:
              return [sent]
          else:
diff --git a/nlp_resource_data/nltk/corpus/reader/indian.pyc b/nlp_resource_data/nltk/corpus/reader/indian.pyc

deleted file mode 100755 (executable)

index f0b521a..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/indian.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/ipipan.py b/nlp_resource_data/nltk/corpus/reader/ipipan.py

old mode 100755 (executable)

new mode 100644 (file)

index bf9b73e..de983dd
--- a/nlp_resource_data/nltk/corpus/reader/ipipan.py
+++ b/nlp_resource_data/nltk/corpus/reader/ipipan.py
@@ -1,26 +1,27 @@
  # Natural Language Toolkit: IPI PAN Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Konrad Goluchowski <kodie@mimuw.edu.pl>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  import functools
  
-from six import string_types
-
  from nltk.corpus.reader.util import StreamBackedCorpusView, concat
  from nltk.corpus.reader.api import CorpusReader
  
+
  def _parse_args(fun):
      @functools.wraps(fun)
      def decorator(self, fileids=None, **kwargs):
-        kwargs.pop('tags', None)
+        kwargs.pop("tags", None)
          if not fileids:
              fileids = self.fileids()
          return fun(self, fileids, **kwargs)
+
      return decorator
  
+
  class IPIPANCorpusReader(CorpusReader):
      """
      Corpus reader designed to work with corpus created by IPI PAN.
@@ -66,88 +67,113 @@ class IPIPANCorpusReader(CorpusReader):
  
          filecontents = []
          for fileid in self._list_morph_files(fileids):
-            with open(fileid, 'r') as infile:
+            with open(fileid, "r") as infile:
                  filecontents.append(infile.read())
-        return ''.join(filecontents)
+        return "".join(filecontents)
  
      def channels(self, fileids=None):
          if not fileids:
              fileids = self.fileids()
-        return self._parse_header(fileids, 'channel')
+        return self._parse_header(fileids, "channel")
  
      def domains(self, fileids=None):
          if not fileids:
              fileids = self.fileids()
-        return self._parse_header(fileids, 'domain')
+        return self._parse_header(fileids, "domain")
  
      def categories(self, fileids=None):
          if not fileids:
              fileids = self.fileids()
-        return [self._map_category(cat)
-                for cat in self._parse_header(fileids, 'keyTerm')]
+        return [
+            self._map_category(cat) for cat in self._parse_header(fileids, "keyTerm")
+        ]
  
      def fileids(self, channels=None, domains=None, categories=None):
-        if channels is not None and domains is not None and \
-                categories is not None:
-            raise ValueError('You can specify only one of channels, domains '
-                             'and categories parameter at once')
-        if channels is None and domains is None and \
-                categories is None:
+        if channels is not None and domains is not None and categories is not None:
+            raise ValueError(
+                "You can specify only one of channels, domains "
+                "and categories parameter at once"
+            )
+        if channels is None and domains is None and categories is None:
              return CorpusReader.fileids(self)
-        if isinstance(channels, string_types):
+        if isinstance(channels, str):
              channels = [channels]
-        if isinstance(domains, string_types):
+        if isinstance(domains, str):
              domains = [domains]
-        if isinstance(categories, string_types):
+        if isinstance(categories, str):
              categories = [categories]
          if channels:
-            return self._list_morph_files_by('channel', channels)
+            return self._list_morph_files_by("channel", channels)
          elif domains:
-            return self._list_morph_files_by('domain', domains)
+            return self._list_morph_files_by("domain", domains)
          else:
-            return self._list_morph_files_by('keyTerm', categories,
-                    map=self._map_category)
+            return self._list_morph_files_by(
+                "keyTerm", categories, map=self._map_category
+            )
  
      @_parse_args
      def sents(self, fileids=None, **kwargs):
-        return concat([self._view(fileid,
-            mode=IPIPANCorpusView.SENTS_MODE, tags=False, **kwargs)
-            for fileid in self._list_morph_files(fileids)])
+        return concat(
+            [
+                self._view(
+                    fileid, mode=IPIPANCorpusView.SENTS_MODE, tags=False, **kwargs
+                )
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
  
      @_parse_args
      def paras(self, fileids=None, **kwargs):
-        return concat([self._view(fileid,
-            mode=IPIPANCorpusView.PARAS_MODE, tags=False, **kwargs)
-            for fileid in self._list_morph_files(fileids)])
+        return concat(
+            [
+                self._view(
+                    fileid, mode=IPIPANCorpusView.PARAS_MODE, tags=False, **kwargs
+                )
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
  
      @_parse_args
      def words(self, fileids=None, **kwargs):
-        return concat([self._view(fileid, tags=False, **kwargs)
-            for fileid in self._list_morph_files(fileids)])
+        return concat(
+            [
+                self._view(fileid, tags=False, **kwargs)
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
  
      @_parse_args
      def tagged_sents(self, fileids=None, **kwargs):
-        return concat([self._view(fileid, mode=IPIPANCorpusView.SENTS_MODE,
-            **kwargs)
-            for fileid in self._list_morph_files(fileids)])
+        return concat(
+            [
+                self._view(fileid, mode=IPIPANCorpusView.SENTS_MODE, **kwargs)
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
  
      @_parse_args
      def tagged_paras(self, fileids=None, **kwargs):
-        return concat([self._view(fileid, mode=IPIPANCorpusView.PARAS_MODE,
-            **kwargs)
-            for fileid in self._list_morph_files(fileids)])
+        return concat(
+            [
+                self._view(fileid, mode=IPIPANCorpusView.PARAS_MODE, **kwargs)
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
  
      @_parse_args
      def tagged_words(self, fileids=None, **kwargs):
-        return concat([self._view(fileid, **kwargs)
-            for fileid in self._list_morph_files(fileids)])
+        return concat(
+            [self._view(fileid, **kwargs) for fileid in self._list_morph_files(fileids)]
+        )
  
      def _list_morph_files(self, fileids):
          return [f for f in self.abspaths(fileids)]
  
      def _list_header_files(self, fileids):
-        return [f.replace('morph.xml', 'header.xml')
-                for f in self._list_morph_files(fileids)]
+        return [
+            f.replace("morph.xml", "header.xml")
+            for f in self._list_morph_files(fileids)
+        ]
  
      def _parse_header(self, fileids, tag):
          values = set()
@@ -161,7 +187,7 @@ class IPIPANCorpusReader(CorpusReader):
          fileids = self.fileids()
          ret_fileids = set()
          for f in fileids:
-            fp = self.abspath(f).replace('morph.xml', 'header.xml')
+            fp = self.abspath(f).replace("morph.xml", "header.xml")
              values_list = self._get_tag(fp, tag)
              for value in values_list:
                  if map is not None:
@@ -172,48 +198,56 @@ class IPIPANCorpusReader(CorpusReader):
  
      def _get_tag(self, f, tag):
          tags = []
-        with open(f, 'r') as infile:
+        with open(f, "r") as infile:
              header = infile.read()
          tag_end = 0
          while True:
-            tag_pos = header.find('<'+tag, tag_end)
-            if tag_pos < 0: return tags
-            tag_end = header.find('</'+tag+'>', tag_pos)
-            tags.append(header[tag_pos+len(tag)+2:tag_end])
+            tag_pos = header.find("<" + tag, tag_end)
+            if tag_pos < 0:
+                return tags
+            tag_end = header.find("</" + tag + ">", tag_pos)
+            tags.append(header[tag_pos + len(tag) + 2 : tag_end])
  
      def _map_category(self, cat):
-        pos = cat.find('>')
+        pos = cat.find(">")
          if pos == -1:
              return cat
          else:
-            return cat[pos+1:]
+            return cat[pos + 1 :]
  
      def _view(self, filename, **kwargs):
-        tags = kwargs.pop('tags', True)
-        mode = kwargs.pop('mode', 0)
-        simplify_tags = kwargs.pop('simplify_tags', False)
-        one_tag = kwargs.pop('one_tag', True)
-        disamb_only = kwargs.pop('disamb_only', True)
-        append_no_space = kwargs.pop('append_no_space', False)
-        append_space = kwargs.pop('append_space', False)
-        replace_xmlentities = kwargs.pop('replace_xmlentities', True)
+        tags = kwargs.pop("tags", True)
+        mode = kwargs.pop("mode", 0)
+        simplify_tags = kwargs.pop("simplify_tags", False)
+        one_tag = kwargs.pop("one_tag", True)
+        disamb_only = kwargs.pop("disamb_only", True)
+        append_no_space = kwargs.pop("append_no_space", False)
+        append_space = kwargs.pop("append_space", False)
+        replace_xmlentities = kwargs.pop("replace_xmlentities", True)
  
          if len(kwargs) > 0:
-            raise ValueError('Unexpected arguments: %s' % kwargs.keys())
+            raise ValueError("Unexpected arguments: %s" % kwargs.keys())
          if not one_tag and not disamb_only:
-            raise ValueError('You cannot specify both one_tag=False and '
-                             'disamb_only=False')
+            raise ValueError(
+                "You cannot specify both one_tag=False and " "disamb_only=False"
+            )
          if not tags and (simplify_tags or not one_tag or not disamb_only):
-            raise ValueError('You cannot specify simplify_tags, one_tag or '
-                             'disamb_only with functions other than tagged_*')
-
-        return IPIPANCorpusView(filename,
-                 tags=tags, mode=mode, simplify_tags=simplify_tags,
-                 one_tag=one_tag, disamb_only=disamb_only,
-                 append_no_space=append_no_space,
-                 append_space=append_space,
-                 replace_xmlentities=replace_xmlentities
-                 )
+            raise ValueError(
+                "You cannot specify simplify_tags, one_tag or "
+                "disamb_only with functions other than tagged_*"
+            )
+
+        return IPIPANCorpusView(
+            filename,
+            tags=tags,
+            mode=mode,
+            simplify_tags=simplify_tags,
+            one_tag=one_tag,
+            disamb_only=disamb_only,
+            append_no_space=append_no_space,
+            append_space=append_space,
+            replace_xmlentities=replace_xmlentities,
+        )
  
  
  class IPIPANCorpusView(StreamBackedCorpusView):
@@ -227,14 +261,14 @@ class IPIPANCorpusView(StreamBackedCorpusView):
          self.in_sentence = False
          self.position = 0
  
-        self.show_tags = kwargs.pop('tags', True)
-        self.disamb_only = kwargs.pop('disamb_only', True)
-        self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE)
-        self.simplify_tags = kwargs.pop('simplify_tags', False)
-        self.one_tag = kwargs.pop('one_tag', True)
-        self.append_no_space = kwargs.pop('append_no_space', False)
-        self.append_space = kwargs.pop('append_space', False)
-        self.replace_xmlentities = kwargs.pop('replace_xmlentities', True)
+        self.show_tags = kwargs.pop("tags", True)
+        self.disamb_only = kwargs.pop("disamb_only", True)
+        self.mode = kwargs.pop("mode", IPIPANCorpusView.WORDS_MODE)
+        self.simplify_tags = kwargs.pop("simplify_tags", False)
+        self.one_tag = kwargs.pop("one_tag", True)
+        self.append_no_space = kwargs.pop("append_no_space", False)
+        self.append_space = kwargs.pop("append_space", False)
+        self.replace_xmlentities = kwargs.pop("replace_xmlentities", True)
  
      def read_block(self, stream):
          sentence = []
@@ -253,7 +287,7 @@ class IPIPANCorpusView(StreamBackedCorpusView):
                  self._seek(stream)
                  lines = self._read_data(stream)
  
-            if lines == ['']:
+            if lines == [""]:
                  assert not sentences
                  return []
  
@@ -264,14 +298,14 @@ class IPIPANCorpusView(StreamBackedCorpusView):
                  self.in_sentence = True
              elif line.startswith('<chunk type="p"'):
                  pass
-            elif line.startswith('<tok'):
+            elif line.startswith("<tok"):
                  if self.append_space and space and not no_space:
                      self._append_space(sentence)
                  space = True
                  no_space = False
                  orth = ""
                  tags = set()
-            elif line.startswith('</chunk'):
+            elif line.startswith("</chunk"):
                  if self.in_sentence:
                      self.in_sentence = False
                      self._seek(stream)
@@ -286,39 +320,39 @@ class IPIPANCorpusView(StreamBackedCorpusView):
                  elif self.mode == self.PARAS_MODE:
                      self._seek(stream)
                      return [sentences]
-            elif line.startswith('<orth'):
+            elif line.startswith("<orth"):
                  orth = line[6:-7]
                  if self.replace_xmlentities:
-                    orth = orth.replace('&quot;', '"').replace('&amp;', '&')
-            elif line.startswith('<lex'):
-                if not self.disamb_only or line.find('disamb=') != -1:
-                    tag = line[line.index('<ctag')+6 : line.index('</ctag') ]
+                    orth = orth.replace("&quot;", '"').replace("&amp;", "&")
+            elif line.startswith("<lex"):
+                if not self.disamb_only or line.find("disamb=") != -1:
+                    tag = line[line.index("<ctag") + 6 : line.index("</ctag")]
                      tags.add(tag)
-            elif line.startswith('</tok'):
+            elif line.startswith("</tok"):
                  if self.show_tags:
                      if self.simplify_tags:
-                        tags = [t.split(':')[0] for t in tags]
+                        tags = [t.split(":")[0] for t in tags]
                      if not self.one_tag or not self.disamb_only:
                          sentence.append((orth, tuple(tags)))
                      else:
                          sentence.append((orth, tags.pop()))
                  else:
                      sentence.append(orth)
-            elif line.startswith('<ns/>'):
+            elif line.startswith("<ns/>"):
                  if self.append_space:
                      no_space = True
                  if self.append_no_space:
                      if self.show_tags:
-                        sentence.append(('', 'no-space'))
+                        sentence.append(("", "no-space"))
                      else:
-                        sentence.append('')
-            elif line.startswith('</cesAna'):
+                        sentence.append("")
+            elif line.startswith("</cesAna"):
                  pass
  
      def _read_data(self, stream):
          self.position = stream.tell()
          buff = stream.read(4096)
-        lines = buff.split('\n')
+        lines = buff.split("\n")
          lines.reverse()
          return lines
  
@@ -327,6 +361,6 @@ class IPIPANCorpusView(StreamBackedCorpusView):
  
      def _append_space(self, sentence):
          if self.show_tags:
-            sentence.append((' ', 'space'))
+            sentence.append((" ", "space"))
          else:
-            sentence.append(' ')
+            sentence.append(" ")
diff --git a/nlp_resource_data/nltk/corpus/reader/ipipan.pyc b/nlp_resource_data/nltk/corpus/reader/ipipan.pyc

deleted file mode 100755 (executable)

index bb9ff46..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/ipipan.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/knbc.py b/nlp_resource_data/nltk/corpus/reader/knbc.py

old mode 100755 (executable)

new mode 100644 (file)

index 8ad90a7..965a6fe
--- a/nlp_resource_data/nltk/corpus/reader/knbc.py
+++ b/nlp_resource_data/nltk/corpus/reader/knbc.py
@@ -1,15 +1,13 @@
  #! /usr/bin/env python
  # KNB Corpus reader
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Masato Hagiwara <hagisan@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  # For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html
-from __future__ import print_function
  
  import re
-from six import string_types
  
  from nltk.parse import DependencyGraph
  
@@ -21,7 +19,7 @@ from nltk.corpus.reader.util import (
  from nltk.corpus.reader.api import SyntaxCorpusReader, CorpusReader
  
  # default function to convert morphlist to str for tree representation
-_morphs2str_default = lambda morphs: '/'.join(m[0] for m in morphs if m[0] != 'EOS')
+_morphs2str_default = lambda morphs: "/".join(m[0] for m in morphs if m[0] != "EOS")
  
  
  class KNBCorpusReader(SyntaxCorpusReader):
@@ -56,12 +54,14 @@ class KNBCorpusReader(SyntaxCorpusReader):
  
      """
  
-    def __init__(self, root, fileids, encoding='utf8', morphs2str=_morphs2str_default):
+    def __init__(self, root, fileids, encoding="utf8", morphs2str=_morphs2str_default):
          """
          Initialize KNBCorpusReader
          morphs2str is a function to convert morphlist to str for tree representation
          for _parse()
          """
+        # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
+        #       from CorpusReader?
          CorpusReader.__init__(self, root, fileids, encoding)
          self.morphs2str = morphs2str
  
@@ -87,7 +87,7 @@ class KNBCorpusReader(SyntaxCorpusReader):
              if not re.match(r"EOS|\*|\#|\+", line):
                  cells = line.strip().split(" ")
                  # convert cells to morph tuples
-                res.append((cells[0], ' '.join(cells[1:])))
+                res.append((cells[0], " ".join(cells[1:])))
  
          return res
  
@@ -95,7 +95,7 @@ class KNBCorpusReader(SyntaxCorpusReader):
          dg = DependencyGraph()
          i = 0
          for line in t.splitlines():
-            if line[0] in '*+':
+            if line[0] in "*+":
                  # start of bunsetsu or tag
  
                  cells = line.strip().split(" ", 3)
@@ -104,35 +104,30 @@ class KNBCorpusReader(SyntaxCorpusReader):
                  assert m is not None
  
                  node = dg.nodes[i]
-                node.update(
-                    {
-                        'address': i,
-                        'rel': m.group(2),
-                        'word': [],
-                    }
-                )
+                node.update({"address": i, "rel": m.group(2), "word": []})
  
                  dep_parent = int(m.group(1))
  
                  if dep_parent == -1:
                      dg.root = node
                  else:
-                    dg.nodes[dep_parent]['deps'].append(i)
+                    dg.nodes[dep_parent]["deps"].append(i)
  
                  i += 1
-            elif line[0] != '#':
+            elif line[0] != "#":
                  # normal morph
                  cells = line.strip().split(" ")
                  # convert cells to morph tuples
-                morph = cells[0], ' '.join(cells[1:])
-                dg.nodes[i - 1]['word'].append(morph)
+                morph = cells[0], " ".join(cells[1:])
+                dg.nodes[i - 1]["word"].append(morph)
  
          if self.morphs2str:
              for node in dg.nodes.values():
-                node['word'] = self.morphs2str(node['word'])
+                node["word"] = self.morphs2str(node["word"])
  
          return dg.tree()
  
+
  ######################################################################
  # Demo
  ######################################################################
@@ -143,31 +138,38 @@ def demo():
      import nltk
      from nltk.corpus.util import LazyCorpusLoader
  
-    root = nltk.data.find('corpora/knbc/corpus1')
-    fileids = [f for f in find_corpus_fileids(FileSystemPathPointer(root), ".*")
-               if re.search(r"\d\-\d\-[\d]+\-[\d]+", f)]
+    root = nltk.data.find("corpora/knbc/corpus1")
+    fileids = [
+        f
+        for f in find_corpus_fileids(FileSystemPathPointer(root), ".*")
+        if re.search(r"\d\-\d\-[\d]+\-[\d]+", f)
+    ]
  
      def _knbc_fileids_sort(x):
-        cells = x.split('-')
+        cells = x.split("-")
          return (cells[0], int(cells[1]), int(cells[2]), int(cells[3]))
  
-    knbc = LazyCorpusLoader('knbc/corpus1', KNBCorpusReader,
-                            sorted(fileids, key=_knbc_fileids_sort), encoding='euc-jp')
+    knbc = LazyCorpusLoader(
+        "knbc/corpus1",
+        KNBCorpusReader,
+        sorted(fileids, key=_knbc_fileids_sort),
+        encoding="euc-jp",
+    )
  
      print(knbc.fileids()[:10])
-    print(''.join(knbc.words()[:100]))
+    print("".join(knbc.words()[:100]))
  
-    print('\n\n'.join(str(tree) for tree in knbc.parsed_sents()[:2]))
+    print("\n\n".join(str(tree) for tree in knbc.parsed_sents()[:2]))
  
-    knbc.morphs2str = lambda morphs: '/'.join(
-        "%s(%s)" % (m[0], m[1].split(' ')[2]) for m in morphs if m[0] != 'EOS'
-    ).encode('utf-8')
+    knbc.morphs2str = lambda morphs: "/".join(
+        "%s(%s)" % (m[0], m[1].split(" ")[2]) for m in morphs if m[0] != "EOS"
+    ).encode("utf-8")
  
-    print('\n\n'.join('%s' % tree for tree in knbc.parsed_sents()[:2]))
+    print("\n\n".join("%s" % tree for tree in knbc.parsed_sents()[:2]))
  
      print(
-        '\n'.join(
-            ' '.join("%s/%s" % (w[0], w[1].split(' ')[2]) for w in sent)
+        "\n".join(
+            " ".join("%s/%s" % (w[0], w[1].split(" ")[2]) for w in sent)
              for sent in knbc.tagged_sents()[0:2]
          )
      )
@@ -176,12 +178,15 @@ def demo():
  def test():
  
      from nltk.corpus.util import LazyCorpusLoader
+
      knbc = LazyCorpusLoader(
-        'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp')
-    assert isinstance(knbc.words()[0], string_types)
-    assert isinstance(knbc.sents()[0][0], string_types)
+        "knbc/corpus1", KNBCorpusReader, r".*/KN.*", encoding="euc-jp"
+    )
+    assert isinstance(knbc.words()[0], str)
+    assert isinstance(knbc.sents()[0][0], str)
      assert isinstance(knbc.tagged_words()[0], tuple)
      assert isinstance(knbc.tagged_sents()[0][0], tuple)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/corpus/reader/knbc.pyc b/nlp_resource_data/nltk/corpus/reader/knbc.pyc

deleted file mode 100755 (executable)

index f344c95..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/knbc.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/lin.py b/nlp_resource_data/nltk/corpus/reader/lin.py

old mode 100755 (executable)

new mode 100644 (file)

index 49d8a93..613a275
--- a/nlp_resource_data/nltk/corpus/reader/lin.py
+++ b/nlp_resource_data/nltk/corpus/reader/lin.py
@@ -1,10 +1,9 @@
  # Natural Language Toolkit: Lin's Thesaurus
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Dan Blanchard <dblanchard@ets.org>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.txt
-from __future__ import print_function
  
  import re
  from collections import defaultdict
@@ -22,43 +21,47 @@ class LinThesaurusCorpusReader(CorpusReader):
  
      @staticmethod
      def __defaultdict_factory():
-        ''' Factory for creating defaultdict of defaultdict(dict)s '''
+        """ Factory for creating defaultdict of defaultdict(dict)s """
          return defaultdict(dict)
  
      def __init__(self, root, badscore=0.0):
-        '''
+        """
          Initialize the thesaurus.
  
          :param root: root directory containing thesaurus LISP files
          :type root: C{string}
          :param badscore: the score to give to words which do not appear in each other's sets of synonyms
          :type badscore: C{float}
-        '''
+        """
  
-        super(LinThesaurusCorpusReader, self).__init__(root, r'sim[A-Z]\.lsp')
+        super(LinThesaurusCorpusReader, self).__init__(root, r"sim[A-Z]\.lsp")
          self._thesaurus = defaultdict(LinThesaurusCorpusReader.__defaultdict_factory)
          self._badscore = badscore
-        for path, encoding, fileid in self.abspaths(include_encoding=True, include_fileid=True):
+        for path, encoding, fileid in self.abspaths(
+            include_encoding=True, include_fileid=True
+        ):
              with open(path) as lin_file:
                  first = True
                  for line in lin_file:
                      line = line.strip()
                      # Start of entry
                      if first:
-                        key = LinThesaurusCorpusReader._key_re.sub(r'\1', line)
+                        key = LinThesaurusCorpusReader._key_re.sub(r"\1", line)
                          first = False
                      # End of entry
-                    elif line == '))':
+                    elif line == "))":
                          first = True
                      # Lines with pairs of ngrams and scores
                      else:
-                        split_line = line.split('\t')
+                        split_line = line.split("\t")
                          if len(split_line) == 2:
                              ngram, score = split_line
-                            self._thesaurus[fileid][key][ngram.strip('"')] = float(score)
+                            self._thesaurus[fileid][key][ngram.strip('"')] = float(
+                                score
+                            )
  
      def similarity(self, ngram1, ngram2, fileid=None):
-        '''
+        """
          Returns the similarity score for two ngrams.
  
          :param ngram1: first ngram to compare
@@ -69,7 +72,7 @@ class LinThesaurusCorpusReader(CorpusReader):
          :type fileid: C{string}
          :return: If fileid is specified, just the score for the two ngrams; otherwise,
                   list of tuples of fileids and scores.
-        '''
+        """
          # Entries don't contain themselves, so make sure similarity between item and itself is 1.0
          if ngram1 == ngram2:
              if fileid:
@@ -78,13 +81,26 @@ class LinThesaurusCorpusReader(CorpusReader):
                  return [(fid, 1.0) for fid in self._fileids]
          else:
              if fileid:
-                return self._thesaurus[fileid][ngram1][ngram2] if ngram2 in self._thesaurus[fileid][ngram1] else self._badscore
+                return (
+                    self._thesaurus[fileid][ngram1][ngram2]
+                    if ngram2 in self._thesaurus[fileid][ngram1]
+                    else self._badscore
+                )
              else:
-                return [(fid, (self._thesaurus[fid][ngram1][ngram2] if ngram2 in self._thesaurus[fid][ngram1]
-                                  else self._badscore)) for fid in self._fileids]
+                return [
+                    (
+                        fid,
+                        (
+                            self._thesaurus[fid][ngram1][ngram2]
+                            if ngram2 in self._thesaurus[fid][ngram1]
+                            else self._badscore
+                        ),
+                    )
+                    for fid in self._fileids
+                ]
  
      def scored_synonyms(self, ngram, fileid=None):
-        '''
+        """
          Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram
  
          :param ngram: ngram to lookup
@@ -94,14 +110,17 @@ class LinThesaurusCorpusReader(CorpusReader):
          :return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
                   list of tuples of fileids and lists, where inner lists consist of tuples of
                   scores and synonyms.
-        '''
+        """
          if fileid:
              return self._thesaurus[fileid][ngram].items()
          else:
-            return [(fileid, self._thesaurus[fileid][ngram].items()) for fileid in self._fileids]
+            return [
+                (fileid, self._thesaurus[fileid][ngram].items())
+                for fileid in self._fileids
+            ]
  
      def synonyms(self, ngram, fileid=None):
-        '''
+        """
          Returns a list of synonyms for the current ngram.
  
          :param ngram: ngram to lookup
@@ -110,27 +129,35 @@ class LinThesaurusCorpusReader(CorpusReader):
          :type fileid: C{string}
          :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
                   lists, where inner lists contain synonyms.
-        '''
+        """
          if fileid:
              return self._thesaurus[fileid][ngram].keys()
          else:
-            return [(fileid, self._thesaurus[fileid][ngram].keys()) for fileid in self._fileids]
+            return [
+                (fileid, self._thesaurus[fileid][ngram].keys())
+                for fileid in self._fileids
+            ]
  
      def __contains__(self, ngram):
-        '''
+        """
          Determines whether or not the given ngram is in the thesaurus.
  
          :param ngram: ngram to lookup
          :type ngram: C{string}
          :return: whether the given ngram is in the thesaurus.
-        '''
-        return reduce(lambda accum, fileid: accum or (ngram in self._thesaurus[fileid]), self._fileids, False)
+        """
+        return reduce(
+            lambda accum, fileid: accum or (ngram in self._thesaurus[fileid]),
+            self._fileids,
+            False,
+        )
  
  
  ######################################################################
  # Demo
  ######################################################################
  
+
  def demo():
      from nltk.corpus import lin_thesaurus as thes
  
@@ -152,5 +179,5 @@ def demo():
      print(thes.similarity(word1, word2))
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/corpus/reader/lin.pyc b/nlp_resource_data/nltk/corpus/reader/lin.pyc

deleted file mode 100755 (executable)

index 59a51ae..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/lin.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/mte.py b/nlp_resource_data/nltk/corpus/reader/mte.py

old mode 100755 (executable)

new mode 100644 (file)

index cd443a1..085f257
--- a/nlp_resource_data/nltk/corpus/reader/mte.py
+++ b/nlp_resource_data/nltk/corpus/reader/mte.py
@@ -5,8 +5,6 @@ import os
  import re
  from functools import reduce
  
-from six import string_types
-
  from nltk.corpus.reader import concat, TaggedCorpusReader
  from nltk.corpus.reader.xmldocs import XMLCorpusView
  
@@ -14,6 +12,7 @@ from nltk.corpus.reader.xmldocs import XMLCorpusView
  def xpath(root, path, ns):
      return root.findall(path, ns)
  
+
  class MTECorpusView(XMLCorpusView):
      """
      Class for lazy viewing the MTE Corpus.
@@ -23,7 +22,13 @@ class MTECorpusView(XMLCorpusView):
          XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
  
      def read_block(self, stream, tagspec=None, elt_handler=None):
-        return list(filter(lambda x: x is not None, XMLCorpusView.read_block(self, stream, tagspec, elt_handler)))
+        return list(
+            filter(
+                lambda x: x is not None,
+                XMLCorpusView.read_block(self, stream, tagspec, elt_handler),
+            )
+        )
+
  
  class MTEFileReader:
      """
@@ -31,104 +36,137 @@ class MTEFileReader:
      parses the xml files and does some tag-filtering depending on the
      given method parameters.
      """
-    ns = {'tei': 'http://www.tei-c.org/ns/1.0',
-          'xml': 'http://www.w3.org/XML/1998/namespace'}
-    tag_ns = '{http://www.tei-c.org/ns/1.0}'
-    xml_ns = '{http://www.w3.org/XML/1998/namespace}'
+
+    ns = {
+        "tei": "http://www.tei-c.org/ns/1.0",
+        "xml": "http://www.w3.org/XML/1998/namespace",
+    }
+    tag_ns = "{http://www.tei-c.org/ns/1.0}"
+    xml_ns = "{http://www.w3.org/XML/1998/namespace}"
      word_path = "TEI/text/body/div/div/p/s/(w|c)"
      sent_path = "TEI/text/body/div/div/p/s"
      para_path = "TEI/text/body/div/div/p"
  
-
      def __init__(self, file_path):
          self.__file_path = file_path
  
      @classmethod
-    def _word_elt(self, elt, context):
+    def _word_elt(cls, elt, context):
          return elt.text
  
      @classmethod
-    def _sent_elt(self, elt, context):
-        return [self._word_elt(w, None) for w in xpath(elt, '*', self.ns)]
+    def _sent_elt(cls, elt, context):
+        return [cls._word_elt(w, None) for w in xpath(elt, "*", cls.ns)]
  
      @classmethod
-    def _para_elt(self, elt, context):
-        return [self._sent_elt(s, None) for s in xpath(elt, '*', self.ns)]
+    def _para_elt(cls, elt, context):
+        return [cls._sent_elt(s, None) for s in xpath(elt, "*", cls.ns)]
  
      @classmethod
-    def _tagged_word_elt(self, elt, context):
-        if ('ana' not in elt.attrib):
-            return (elt.text, '')
-
-        if self.__tags == "" and self.__tagset == "msd":
-            return (elt.text, elt.attrib['ana'])
-        elif self.__tags == "" and self.__tagset == "universal":
-            return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana']))
+    def _tagged_word_elt(cls, elt, context):
+        if "ana" not in elt.attrib:
+            return (elt.text, "")
+
+        if cls.__tags == "" and cls.__tagset == "msd":
+            return (elt.text, elt.attrib["ana"])
+        elif cls.__tags == "" and cls.__tagset == "universal":
+            return (elt.text, MTETagConverter.msd_to_universal(elt.attrib["ana"]))
          else:
-            tags = re.compile('^' + re.sub("-", ".", self.__tags) + '.*$')
-            if (tags.match(elt.attrib['ana'])):
-                if self.__tagset == "msd":
-                    return (elt.text, elt.attrib['ana'])
+            tags = re.compile("^" + re.sub("-", ".", cls.__tags) + ".*$")
+            if tags.match(elt.attrib["ana"]):
+                if cls.__tagset == "msd":
+                    return (elt.text, elt.attrib["ana"])
                  else:
-                    return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana']))
+                    return (
+                        elt.text,
+                        MTETagConverter.msd_to_universal(elt.attrib["ana"]),
+                    )
              else:
                  return None
  
      @classmethod
-    def _tagged_sent_elt(self, elt, context):
-        return list(filter(lambda x: x is not None, [self._tagged_word_elt(w, None) for w in xpath(elt, '*', self.ns)]))
+    def _tagged_sent_elt(cls, elt, context):
+        return list(
+            filter(
+                lambda x: x is not None,
+                [cls._tagged_word_elt(w, None) for w in xpath(elt, "*", cls.ns)],
+            )
+        )
  
      @classmethod
-    def _tagged_para_elt(self, elt, context):
-        return list(filter(lambda x: x is not None, [self._tagged_sent_elt(s, None) for s in xpath(elt, '*', self.ns)]))
+    def _tagged_para_elt(cls, elt, context):
+        return list(
+            filter(
+                lambda x: x is not None,
+                [cls._tagged_sent_elt(s, None) for s in xpath(elt, "*", cls.ns)],
+            )
+        )
  
      @classmethod
-    def _lemma_word_elt(self, elt, context):
-        if ('lemma' not in elt.attrib):
-            return (elt.text, '')
+    def _lemma_word_elt(cls, elt, context):
+        if "lemma" not in elt.attrib:
+            return (elt.text, "")
          else:
-            return (elt.text, elt.attrib['lemma'])
+            return (elt.text, elt.attrib["lemma"])
  
      @classmethod
-    def _lemma_sent_elt(self, elt, context):
-        return [self._lemma_word_elt(w, None) for w in xpath(elt, '*', self.ns)]
+    def _lemma_sent_elt(cls, elt, context):
+        return [cls._lemma_word_elt(w, None) for w in xpath(elt, "*", cls.ns)]
  
      @classmethod
-    def _lemma_para_elt(self, elt, context):
-        return [self._lemma_sent_elt(s, None) for s in xpath(elt, '*', self.ns)]
+    def _lemma_para_elt(cls, elt, context):
+        return [cls._lemma_sent_elt(s, None) for s in xpath(elt, "*", cls.ns)]
  
      def words(self):
-        return MTECorpusView(self.__file_path, MTEFileReader.word_path, MTEFileReader._word_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.word_path, MTEFileReader._word_elt
+        )
  
      def sents(self):
-        return MTECorpusView(self.__file_path, MTEFileReader.sent_path, MTEFileReader._sent_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.sent_path, MTEFileReader._sent_elt
+        )
  
      def paras(self):
-        return MTECorpusView(self.__file_path, MTEFileReader.para_path, MTEFileReader._para_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.para_path, MTEFileReader._para_elt
+        )
  
      def lemma_words(self):
-        return MTECorpusView(self.__file_path, MTEFileReader.word_path, MTEFileReader._lemma_word_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.word_path, MTEFileReader._lemma_word_elt
+        )
  
      def tagged_words(self, tagset, tags):
          MTEFileReader.__tagset = tagset
          MTEFileReader.__tags = tags
-        return MTECorpusView(self.__file_path, MTEFileReader.word_path, MTEFileReader._tagged_word_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.word_path, MTEFileReader._tagged_word_elt
+        )
  
      def lemma_sents(self):
-        return MTECorpusView(self.__file_path, MTEFileReader.sent_path, MTEFileReader._lemma_sent_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.sent_path, MTEFileReader._lemma_sent_elt
+        )
  
      def tagged_sents(self, tagset, tags):
          MTEFileReader.__tagset = tagset
          MTEFileReader.__tags = tags
-        return MTECorpusView(self.__file_path, MTEFileReader.sent_path, MTEFileReader._tagged_sent_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.sent_path, MTEFileReader._tagged_sent_elt
+        )
  
      def lemma_paras(self):
-        return MTECorpusView(self.__file_path, MTEFileReader.para_path, MTEFileReader._lemma_para_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.para_path, MTEFileReader._lemma_para_elt
+        )
  
      def tagged_paras(self, tagset, tags):
          MTEFileReader.__tagset = tagset
          MTEFileReader.__tags = tags
-        return MTECorpusView(self.__file_path, MTEFileReader.para_path, MTEFileReader._tagged_para_elt)
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.para_path, MTEFileReader._tagged_para_elt
+        )
  
  
  class MTETagConverter:
@@ -138,9 +176,19 @@ class MTETagConverter:
      """
  
      mapping_msd_universal = {
-        'A': 'ADJ', 'S': 'ADP', 'R': 'ADV', 'C': 'CONJ',
-        'D': 'DET', 'N': 'NOUN', 'M': 'NUM', 'Q': 'PRT',
-        'P': 'PRON', 'V': 'VERB', '.': '.', '-': 'X'}
+        "A": "ADJ",
+        "S": "ADP",
+        "R": "ADV",
+        "C": "CONJ",
+        "D": "DET",
+        "N": "NOUN",
+        "M": "NUM",
+        "Q": "PRT",
+        "P": "PRON",
+        "V": "VERB",
+        ".": ".",
+        "-": "X",
+    }
  
      @staticmethod
      def msd_to_universal(tag):
@@ -153,10 +201,11 @@ class MTETagConverter:
          indicator = tag[0] if not tag[0] == "#" else tag[1]
  
          if not indicator in MTETagConverter.mapping_msd_universal:
-            indicator = '-'
+            indicator = "-"
  
          return MTETagConverter.mapping_msd_universal[indicator]
  
+
  class MTECorpusReader(TaggedCorpusReader):
      """
      Reader for corpora following the TEI-p5 xml scheme, such as MULTEXT-East.
@@ -164,7 +213,7 @@ class MTECorpusReader(TaggedCorpusReader):
      scheme. These tags can be converted to the Universal tagset
      """
  
-    def __init__(self, root=None, fileids=None, encoding='utf8'):
+    def __init__(self, root=None, fileids=None, encoding="utf8"):
          """
          Construct a new MTECorpusreader for a set of documents
          located at the given root directory.  Example usage:
@@ -179,12 +228,14 @@ class MTECorpusReader(TaggedCorpusReader):
          TaggedCorpusReader.__init__(self, root, fileids, encoding)
  
      def __fileids(self, fileids):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          # filter wrong userinput
-        fileids = filter(lambda x : x in self._fileids, fileids)
+        fileids = filter(lambda x: x in self._fileids, fileids)
          # filter multext-east sourcefiles that are not compatible to the teip5 specification
-        fileids = filter(lambda x : x not in ["oana-bg.xml", "oana-mk.xml"], fileids)
+        fileids = filter(lambda x: x not in ["oana-bg.xml", "oana-mk.xml"], fileids)
          if not fileids:
              print("No valid multext-east file specified")
          return fileids
@@ -211,7 +262,12 @@ class MTECorpusReader(TaggedCorpusReader):
          :return: the given file(s) as a list of words and punctuation symbols.
          :rtype: list(str)
          """
-        return  concat([MTEFileReader(os.path.join(self._root, f)).words() for f in self.__fileids(fileids)])
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).words()
+                for f in self.__fileids(fileids)
+            ]
+        )
  
      def sents(self, fileids=None):
          """
@@ -220,7 +276,12 @@ class MTECorpusReader(TaggedCorpusReader):
                   each encoded as a list of word strings
          :rtype: list(list(str))
          """
-        return  concat([MTEFileReader(os.path.join(self._root, f)).sents() for f in self.__fileids(fileids)])
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).sents()
+                for f in self.__fileids(fileids)
+            ]
+        )
  
      def paras(self, fileids=None):
          """
@@ -229,7 +290,12 @@ class MTECorpusReader(TaggedCorpusReader):
                   of sentences, which are in turn encoded as lists of word string
          :rtype: list(list(list(str)))
          """
-        return  concat([MTEFileReader(os.path.join(self._root, f)).paras() for f in self.__fileids(fileids)])
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).paras()
+                for f in self.__fileids(fileids)
+            ]
+        )
  
      def lemma_words(self, fileids=None):
          """
@@ -238,7 +304,12 @@ class MTECorpusReader(TaggedCorpusReader):
                   and punctuation symbols, encoded as tuples (word, lemma)
          :rtype: list(tuple(str,str))
          """
-        return  concat([MTEFileReader(os.path.join(self._root, f)).lemma_words() for f in self.__fileids(fileids)])
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).lemma_words()
+                for f in self.__fileids(fileids)
+            ]
+        )
  
      def tagged_words(self, fileids=None, tagset="msd", tags=""):
          """
@@ -252,7 +323,14 @@ class MTECorpusReader(TaggedCorpusReader):
          :rtype: list(tuple(str, str))
          """
          if tagset == "universal" or tagset == "msd":
-            return concat([MTEFileReader(os.path.join(self._root, f)).tagged_words(tagset, tags) for f in self.__fileids(fileids)])
+            return concat(
+                [
+                    MTEFileReader(os.path.join(self._root, f)).tagged_words(
+                        tagset, tags
+                    )
+                    for f in self.__fileids(fileids)
+                ]
+            )
          else:
              print("Unknown tagset specified.")
  
@@ -264,8 +342,12 @@ class MTECorpusReader(TaggedCorpusReader):
                   lemma (word, lemma)
          :rtype: list(list(tuple(str, str)))
          """
-        return  concat([MTEFileReader(os.path.join(self._root, f)).lemma_sents() for f in self.__fileids(fileids)])
-
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).lemma_sents()
+                for f in self.__fileids(fileids)
+            ]
+        )
  
      def tagged_sents(self, fileids=None, tagset="msd", tags=""):
          """
@@ -279,7 +361,14 @@ class MTECorpusReader(TaggedCorpusReader):
          :rtype: list(list(tuple(str, str)))
          """
          if tagset == "universal" or tagset == "msd":
-            return concat([MTEFileReader(os.path.join(self._root, f)).tagged_sents(tagset, tags) for f in self.__fileids(fileids)])
+            return concat(
+                [
+                    MTEFileReader(os.path.join(self._root, f)).tagged_sents(
+                        tagset, tags
+                    )
+                    for f in self.__fileids(fileids)
+                ]
+            )
          else:
              print("Unknown tagset specified.")
  
@@ -291,7 +380,12 @@ class MTECorpusReader(TaggedCorpusReader):
                   tuples of the word and the corresponding lemma (word, lemma)
          :rtype: list(List(List(tuple(str, str))))
          """
-        return concat([MTEFileReader(os.path.join(self._root, f)).lemma_paras() for f in self.__fileids(fileids)])
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).lemma_paras()
+                for f in self.__fileids(fileids)
+            ]
+        )
  
      def tagged_paras(self, fileids=None, tagset="msd", tags=""):
          """
@@ -306,6 +400,13 @@ class MTECorpusReader(TaggedCorpusReader):
          :rtype: list(list(list(tuple(str, str))))
          """
          if tagset == "universal" or tagset == "msd":
-            return concat([MTEFileReader(os.path.join(self._root, f)).tagged_paras(tagset, tags) for f in self.__fileids(fileids)])
+            return concat(
+                [
+                    MTEFileReader(os.path.join(self._root, f)).tagged_paras(
+                        tagset, tags
+                    )
+                    for f in self.__fileids(fileids)
+                ]
+            )
          else:
              print("Unknown tagset specified.")
diff --git a/nlp_resource_data/nltk/corpus/reader/mte.pyc b/nlp_resource_data/nltk/corpus/reader/mte.pyc

deleted file mode 100755 (executable)

index d5d0838..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/mte.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/nkjp.py b/nlp_resource_data/nltk/corpus/reader/nkjp.py

old mode 100755 (executable)

new mode 100644 (file)

index 6f141a2..23be4b6
--- a/nlp_resource_data/nltk/corpus/reader/nkjp.py
+++ b/nlp_resource_data/nltk/corpus/reader/nkjp.py
@@ -1,19 +1,17 @@
  # Natural Language Toolkit: NKJP Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Gabriela Kaczka
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  import functools
  import os
+import re
  import tempfile
  
-from six import string_types
-
  from nltk.corpus.reader.util import concat
  from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView
-import re
  
  
  def _parse_args(fun):
@@ -21,6 +19,7 @@ def _parse_args(fun):
      Wraps function arguments:
      if fileids not specified then function set NKJPCorpusReader paths.
      """
+
      @functools.wraps(fun)
      def decorator(self, fileids=None, **kwargs):
          if not fileids:
@@ -36,7 +35,7 @@ class NKJPCorpusReader(XMLCorpusReader):
      HEADER_MODE = 2
      RAW_MODE = 3
  
-    def __init__(self, root, fileids='.*'):
+    def __init__(self, root, fileids=".*"):
          """
          Corpus reader designed to work with National Corpus of Polish.
          See http://nkjp.pl/ for more details about NKJP.
@@ -54,14 +53,19 @@ class NKJPCorpusReader(XMLCorpusReader):
          x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'])
          x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp'])
          """
-        if isinstance(fileids, string_types):
-            XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml')
+        if isinstance(fileids, str):
+            XMLCorpusReader.__init__(self, root, fileids + ".*/header.xml")
          else:
-            XMLCorpusReader.__init__(self, root, [fileid + '/header.xml' for fileid in fileids])
+            XMLCorpusReader.__init__(
+                self, root, [fileid + "/header.xml" for fileid in fileids]
+            )
          self._paths = self.get_paths()
  
      def get_paths(self):
-        return [os.path.join(str(self._root), f.split("header.xml")[0]) for f in self._fileids]
+        return [
+            os.path.join(str(self._root), f.split("header.xml")[0])
+            for f in self._fileids
+        ]
  
      def fileids(self):
          """
@@ -74,7 +78,7 @@ class NKJPCorpusReader(XMLCorpusReader):
          """
          Returns a view specialised for use with particular corpus file.
          """
-        mode = kwargs.pop('mode', NKJPCorpusReader.WORDS_MODE)
+        mode = kwargs.pop("mode", NKJPCorpusReader.WORDS_MODE)
          if mode is NKJPCorpusReader.WORDS_MODE:
              return NKJPCorpus_Morph_View(filename, tags=tags)
          elif mode is NKJPCorpusReader.SENTS_MODE:
@@ -82,10 +86,12 @@ class NKJPCorpusReader(XMLCorpusReader):
          elif mode is NKJPCorpusReader.HEADER_MODE:
              return NKJPCorpus_Header_View(filename, tags=tags)
          elif mode is NKJPCorpusReader.RAW_MODE:
-            return NKJPCorpus_Text_View(filename, tags=tags, mode=NKJPCorpus_Text_View.RAW_MODE)
+            return NKJPCorpus_Text_View(
+                filename, tags=tags, mode=NKJPCorpus_Text_View.RAW_MODE
+            )
  
          else:
-            raise NameError('No such mode!')
+            raise NameError("No such mode!")
  
      def add_root(self, fileid):
          """
@@ -100,18 +106,28 @@ class NKJPCorpusReader(XMLCorpusReader):
          """
          Returns header(s) of specified fileids.
          """
-        return concat([self._view(self.add_root(fileid),
-                                  mode=NKJPCorpusReader.HEADER_MODE, **kwargs).handle_query()
-                       for fileid in fileids])
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.HEADER_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
  
      @_parse_args
      def sents(self, fileids=None, **kwargs):
          """
          Returns sentences in specified fileids.
          """
-        return concat([self._view(self.add_root(fileid),
-                                  mode=NKJPCorpusReader.SENTS_MODE, **kwargs).handle_query()
-                       for fileid in fileids])
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.SENTS_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
  
      @_parse_args
      def words(self, fileids=None, **kwargs):
@@ -119,9 +135,14 @@ class NKJPCorpusReader(XMLCorpusReader):
          Returns words in specified fileids.
          """
  
-        return concat([self._view(self.add_root(fileid),
-                                  mode=NKJPCorpusReader.WORDS_MODE, **kwargs).handle_query()
-                       for fileid in fileids])
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.WORDS_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
  
      @_parse_args
      def tagged_words(self, fileids=None, **kwargs):
@@ -129,23 +150,35 @@ class NKJPCorpusReader(XMLCorpusReader):
          Call with specified tags as a list, e.g. tags=['subst', 'comp'].
          Returns tagged words in specified fileids.
          """
-        tags = kwargs.pop('tags', [])
-        return concat([self._view(self.add_root(fileid),
-                                  mode=NKJPCorpusReader.WORDS_MODE, tags=tags, **kwargs).handle_query()
-                       for fileid in fileids])
+        tags = kwargs.pop("tags", [])
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid),
+                    mode=NKJPCorpusReader.WORDS_MODE,
+                    tags=tags,
+                    **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
  
      @_parse_args
      def raw(self, fileids=None, **kwargs):
          """
          Returns words in specified fileids.
          """
-        return concat([self._view(self.add_root(fileid),
-                                  mode=NKJPCorpusReader.RAW_MODE, **kwargs).handle_query()
-                       for fileid in fileids])
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.RAW_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
  
  
  class NKJPCorpus_Header_View(XMLCorpusView):
-
      def __init__(self, filename, **kwargs):
          """
          HEADER_MODE
@@ -153,7 +186,7 @@ class NKJPCorpus_Header_View(XMLCorpusView):
          header.xml files in NKJP corpus.
          """
          self.tagspec = ".*/sourceDesc$"
-        XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec)
+        XMLCorpusView.__init__(self, filename + "header.xml", self.tagspec)
  
      def handle_query(self):
          self._open()
@@ -167,67 +200,74 @@ class NKJPCorpus_Header_View(XMLCorpusView):
          return header
  
      def handle_elt(self, elt, context):
-        titles = elt.findall('bibl/title')
+        titles = elt.findall("bibl/title")
          title = []
          if titles:
-            title = '\n'.join(title.text.strip() for title in titles)
+            title = "\n".join(title.text.strip() for title in titles)
  
-        authors = elt.findall('bibl/author')
+        authors = elt.findall("bibl/author")
          author = []
          if authors:
-            author = '\n'.join(author.text.strip() for author in authors)
+            author = "\n".join(author.text.strip() for author in authors)
  
-        dates = elt.findall('bibl/date')
+        dates = elt.findall("bibl/date")
          date = []
          if dates:
-            date = '\n'.join(date.text.strip() for date in dates)
+            date = "\n".join(date.text.strip() for date in dates)
  
-        publishers = elt.findall('bibl/publisher')
+        publishers = elt.findall("bibl/publisher")
          publisher = []
          if publishers:
-            publisher = '\n'.join(publisher.text.strip() for publisher in publishers)
+            publisher = "\n".join(publisher.text.strip() for publisher in publishers)
  
-        idnos = elt.findall('bibl/idno')
+        idnos = elt.findall("bibl/idno")
          idno = []
          if idnos:
-            idno = '\n'.join(idno.text.strip() for idno in idnos)
+            idno = "\n".join(idno.text.strip() for idno in idnos)
  
-        notes = elt.findall('bibl/note')
+        notes = elt.findall("bibl/note")
          note = []
          if notes:
-            note = '\n'.join(note.text.strip() for note in notes)
+            note = "\n".join(note.text.strip() for note in notes)
  
-        return {'title': title, 'author': author, 'date': date, 'publisher': publisher,
-                'idno': idno, 'note': note}
+        return {
+            "title": title,
+            "author": author,
+            "date": date,
+            "publisher": publisher,
+            "idno": idno,
+            "note": note,
+        }
  
  
-class XML_Tool():
+class XML_Tool:
      """
      Helper class creating xml file to one without references to nkjp: namespace.
      That's needed because the XMLCorpusView assumes that one can find short substrings
      of XML that are valid XML, which is not true if a namespace is declared at top level
      """
+
      def __init__(self, root, filename):
          self.read_file = os.path.join(root, filename)
          self.write_file = tempfile.NamedTemporaryFile(delete=False)
  
      def build_preprocessed_file(self):
          try:
-            fr = open(self.read_file, 'r')
+            fr = open(self.read_file, "r")
              fw = self.write_file
-            line = ' '
+            line = " "
              while len(line):
                  line = fr.readline()
-                x = re.split(r'nkjp:[^ ]* ', line)  #in all files
-                ret = ' '.join(x)
-                x = re.split('<nkjp:paren>', ret)   #in ann_segmentation.xml
-                ret = ' '.join(x)
-                x = re.split('</nkjp:paren>', ret)  #in ann_segmentation.xml
-                ret = ' '.join(x)
-                x = re.split('<choice>', ret)   #in ann_segmentation.xml
-                ret = ' '.join(x)
-                x = re.split('</choice>', ret)  #in ann_segmentation.xml
-                ret = ' '.join(x)
+                x = re.split(r"nkjp:[^ ]* ", line)  # in all files
+                ret = " ".join(x)
+                x = re.split("<nkjp:paren>", ret)  # in ann_segmentation.xml
+                ret = " ".join(x)
+                x = re.split("</nkjp:paren>", ret)  # in ann_segmentation.xml
+                ret = " ".join(x)
+                x = re.split("<choice>", ret)  # in ann_segmentation.xml
+                ret = " ".join(x)
+                x = re.split("</choice>", ret)  # in ann_segmentation.xml
+                ret = " ".join(x)
                  fw.write(ret)
              fr.close()
              fw.close()
@@ -238,7 +278,6 @@ class XML_Tool():
  
      def remove_preprocessed_file(self):
          os.remove(self.write_file.name)
-        pass
  
  
  class NKJPCorpus_Segmentation_View(XMLCorpusView):
@@ -248,33 +287,37 @@ class NKJPCorpus_Segmentation_View(XMLCorpusView):
      """
  
      def __init__(self, filename, **kwargs):
-        self.tagspec = '.*p/.*s'
-        #intersperse NKJPCorpus_Text_View
-        self.text_view = NKJPCorpus_Text_View(filename, mode=NKJPCorpus_Text_View.SENTS_MODE)
+        self.tagspec = ".*p/.*s"
+        # intersperse NKJPCorpus_Text_View
+        self.text_view = NKJPCorpus_Text_View(
+            filename, mode=NKJPCorpus_Text_View.SENTS_MODE
+        )
          self.text_view.handle_query()
-        #xml preprocessing
-        self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml')
-        #base class init
-        XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
+        # xml preprocessing
+        self.xml_tool = XML_Tool(filename, "ann_segmentation.xml")
+        # base class init
+        XMLCorpusView.__init__(
+            self, self.xml_tool.build_preprocessed_file(), self.tagspec
+        )
  
      def get_segm_id(self, example_word):
-        return example_word.split('(')[1].split(',')[0]
+        return example_word.split("(")[1].split(",")[0]
  
      def get_sent_beg(self, beg_word):
-        #returns index of beginning letter in sentence
-        return int(beg_word.split(',')[1])
+        # returns index of beginning letter in sentence
+        return int(beg_word.split(",")[1])
  
      def get_sent_end(self, end_word):
-        #returns index of end letter in sentence
-        splitted = end_word.split(')')[0].split(',')
+        # returns index of end letter in sentence
+        splitted = end_word.split(")")[0].split(",")
          return int(splitted[1]) + int(splitted[2])
  
      def get_sentences(self, sent_segm):
-        #returns one sentence
+        # returns one sentence
          id = self.get_segm_id(sent_segm[0])
-        segm = self.text_view.segm_dict[id]    #text segment
+        segm = self.text_view.segm_dict[id]  # text segment
          beg = self.get_sent_beg(sent_segm[0])
-        end = self.get_sent_end(sent_segm[len(sent_segm)-1])
+        end = self.get_sent_end(sent_segm[len(sent_segm) - 1])
          return segm[beg:end]
  
      def remove_choice(self, segm):
@@ -283,8 +326,8 @@ class NKJPCorpus_Segmentation_View(XMLCorpusView):
          prev_txt_nr = -1
          for word in segm:
              txt_nr = self.get_segm_id(word)
-            #get increasing sequence of ids: in case of choice get first possibility
-            if self.get_sent_beg(word) > prev_txt_end-1 or prev_txt_nr != txt_nr:
+            # get increasing sequence of ids: in case of choice get first possibility
+            if self.get_sent_beg(word) > prev_txt_end - 1 or prev_txt_nr != txt_nr:
                  ret.append(word)
                  prev_txt_end = self.get_sent_end(word)
              prev_txt_nr = txt_nr
@@ -312,7 +355,7 @@ class NKJPCorpus_Segmentation_View(XMLCorpusView):
      def handle_elt(self, elt, context):
          ret = []
          for seg in elt:
-            ret.append(seg.get('corresp'))
+            ret.append(seg.get("corresp"))
          return ret
  
  
@@ -321,17 +364,20 @@ class NKJPCorpus_Text_View(XMLCorpusView):
      A stream backed corpus view specialized for use with
      text.xml files in NKJP corpus.
      """
+
      SENTS_MODE = 0
      RAW_MODE = 1
  
      def __init__(self, filename, **kwargs):
-        self.mode = kwargs.pop('mode', 0)
-        self.tagspec = '.*/div/ab'
+        self.mode = kwargs.pop("mode", 0)
+        self.tagspec = ".*/div/ab"
          self.segm_dict = dict()
-        #xml preprocessing
-        self.xml_tool = XML_Tool(filename, 'text.xml')
-        #base class init
-        XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
+        # xml preprocessing
+        self.xml_tool = XML_Tool(filename, "text.xml")
+        # base class init
+        XMLCorpusView.__init__(
+            self, self.xml_tool.build_preprocessed_file(), self.tagspec
+        )
  
      def handle_query(self):
          try:
@@ -356,15 +402,15 @@ class NKJPCorpus_Text_View(XMLCorpusView):
              for part in segm:
                  txt.append(part)
  
-        return [' '.join([segm for segm in txt])]
+        return [" ".join([segm for segm in txt])]
  
      def get_segm_id(self, elt):
          for attr in elt.attrib:
-            if attr.endswith('id'):
+            if attr.endswith("id"):
                  return elt.get(attr)
  
      def handle_elt(self, elt, context):
-        #fill dictionary to use later in sents mode
+        # fill dictionary to use later in sents mode
          if self.mode is NKJPCorpus_Text_View.SENTS_MODE:
              self.segm_dict[self.get_segm_id(elt)] = elt.text
          return elt.text
@@ -377,10 +423,12 @@ class NKJPCorpus_Morph_View(XMLCorpusView):
      """
  
      def __init__(self, filename, **kwargs):
-        self.tags = kwargs.pop('tags', None)
-        self.tagspec = '.*/seg/fs'
-        self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml')
-        XMLCorpusView.__init__(self, self.xml_tool.build_preprocessed_file(), self.tagspec)
+        self.tags = kwargs.pop("tags", None)
+        self.tagspec = ".*/seg/fs"
+        self.xml_tool = XML_Tool(filename, "ann_morphosyntax.xml")
+        XMLCorpusView.__init__(
+            self, self.xml_tool.build_preprocessed_file(), self.tagspec
+        )
  
      def handle_query(self):
          try:
@@ -401,29 +449,39 @@ class NKJPCorpus_Morph_View(XMLCorpusView):
              raise Exception
  
      def handle_elt(self, elt, context):
-        word = ''
+        word = ""
          flag = False
          is_not_interp = True
-        #if tags not specified, then always return word
+        # if tags not specified, then always return word
          if self.tags is None:
              flag = True
  
          for child in elt:
  
-            #get word
-            if 'name' in child.keys() and child.attrib['name'] == 'orth':
+            # get word
+            if "name" in child.keys() and child.attrib["name"] == "orth":
                  for symbol in child:
-                    if symbol.tag == 'string':
+                    if symbol.tag == "string":
                          word = symbol.text
-            elif 'name' in child.keys() and child.attrib['name'] == 'interps':
+            elif "name" in child.keys() and child.attrib["name"] == "interps":
                  for symbol in child:
-                    if 'type' in symbol.keys() and symbol.attrib['type'] == 'lex':
+                    if "type" in symbol.keys() and symbol.attrib["type"] == "lex":
                          for symbol2 in symbol:
-                            if 'name' in symbol2.keys() and symbol2.attrib['name'] == 'ctag':
+                            if (
+                                "name" in symbol2.keys()
+                                and symbol2.attrib["name"] == "ctag"
+                            ):
                                  for symbol3 in symbol2:
-                                    if 'value' in symbol3.keys() and self.tags is not None and symbol3.attrib['value'] in self.tags:
+                                    if (
+                                        "value" in symbol3.keys()
+                                        and self.tags is not None
+                                        and symbol3.attrib["value"] in self.tags
+                                    ):
                                          flag = True
-                                    elif 'value' in symbol3.keys() and symbol3.attrib['value'] == 'interp':
+                                    elif (
+                                        "value" in symbol3.keys()
+                                        and symbol3.attrib["value"] == "interp"
+                                    ):
                                          is_not_interp = False
          if flag and is_not_interp:
              return word
diff --git a/nlp_resource_data/nltk/corpus/reader/nkjp.pyc b/nlp_resource_data/nltk/corpus/reader/nkjp.pyc

deleted file mode 100755 (executable)

index 54e51b4..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/nkjp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/nombank.py b/nlp_resource_data/nltk/corpus/reader/nombank.py

old mode 100755 (executable)

new mode 100644 (file)

index c6d7d16..06740d0
--- a/nlp_resource_data/nltk/corpus/reader/nombank.py
+++ b/nlp_resource_data/nltk/corpus/reader/nombank.py
@@ -1,24 +1,21 @@
  # Natural Language Toolkit: NomBank Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Paul Bedaride <paul.bedaride@gmail.com>
  #          Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals
  from xml.etree import ElementTree
  from functools import total_ordering
  
-from six import string_types
-
  from nltk.tree import Tree
  from nltk.internals import raise_unorderable_types
-from nltk.compat import python_2_unicode_compatible
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class NombankCorpusReader(CorpusReader):
      """
      Corpus reader for the nombank corpus, which augments the Penn
@@ -32,9 +29,17 @@ class NombankCorpusReader(CorpusReader):
      each "roleset", the frameset file provides descriptions of the
      argument roles, along with examples.
      """
-    def __init__(self, root, nomfile, framefiles='',
-                 nounsfile=None, parse_fileid_xform=None,
-                 parse_corpus=None, encoding='utf8'):
+
+    def __init__(
+        self,
+        root,
+        nomfile,
+        framefiles="",
+        nounsfile=None,
+        parse_fileid_xform=None,
+        parse_corpus=None,
+        encoding="utf8",
+    ):
          """
          :param root: The root directory for this corpus.
          :param nomfile: The name of the file containing the predicate-
@@ -49,17 +54,16 @@ class NombankCorpusReader(CorpusReader):
              corresponding to this corpus.  These parse trees are
              necessary to resolve the tree pointers used by nombank.
          """
+
          # If framefiles is specified as a regexp, expand it.
-        if isinstance(framefiles, string_types):
-            framefiles = find_corpus_fileids(root, framefiles)
-        framefiles = list(framefiles)
+        if isinstance(framefiles, str):
+            self._fileids = find_corpus_fileids(root, framefiles)
+        self._fileids = list(framefiles)
          # Initialze the corpus reader.
-        CorpusReader.__init__(self, root, [nomfile, nounsfile] + framefiles,
-                              encoding)
+        CorpusReader.__init__(self, root, framefiles, encoding)
  
-        # Record our frame fileids & nom file.
+        # Record our nom file & nouns file.
          self._nomfile = nomfile
-        self._framefiles = framefiles
          self._nounsfile = nounsfile
          self._parse_fileid_xform = parse_fileid_xform
          self._parse_corpus = parse_corpus
@@ -68,8 +72,10 @@ class NombankCorpusReader(CorpusReader):
          """
          :return: the text contents of the given fileids, as a single string.
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def instances(self, baseform=None):
@@ -79,59 +85,63 @@ class NombankCorpusReader(CorpusReader):
          """
          kwargs = {}
          if baseform is not None:
-            kwargs['instance_filter'] = lambda inst: inst.baseform==baseform
-        return StreamBackedCorpusView(self.abspath(self._nomfile),
-                                      lambda stream: self._read_instance_block(stream, **kwargs),
-                                      encoding=self.encoding(self._nomfile))
+            kwargs["instance_filter"] = lambda inst: inst.baseform == baseform
+        return StreamBackedCorpusView(
+            self.abspath(self._nomfile),
+            lambda stream: self._read_instance_block(stream, **kwargs),
+            encoding=self.encoding(self._nomfile),
+        )
  
      def lines(self):
          """
          :return: a corpus view that acts as a list of strings, one for
          each line in the predicate-argument annotation file.
          """
-        return StreamBackedCorpusView(self.abspath(self._nomfile),
-                                      read_line_block,
-                                      encoding=self.encoding(self._nomfile))
+        return StreamBackedCorpusView(
+            self.abspath(self._nomfile),
+            read_line_block,
+            encoding=self.encoding(self._nomfile),
+        )
  
      def roleset(self, roleset_id):
          """
          :return: the xml description for the given roleset.
          """
-        baseform = roleset_id.split('.')[0]
-        baseform = baseform.replace('perc-sign','%')
-        baseform = baseform.replace('oneslashonezero', '1/10').replace('1/10','1-slash-10')
-        framefile = 'frames/%s.xml' % baseform
-        if framefile not in self._framefiles:
-            raise ValueError('Frameset file for %s not found' %
-                             roleset_id)
+        baseform = roleset_id.split(".")[0]
+        baseform = baseform.replace("perc-sign", "%")
+        baseform = baseform.replace("oneslashonezero", "1/10").replace(
+            "1/10", "1-slash-10"
+        )
+        framefile = "frames/%s.xml" % baseform
+        if framefile not in self.fileids():
+            raise ValueError("Frameset file for %s not found" % roleset_id)
  
          # n.b.: The encoding for XML fileids is specified by the file
          # itself; so we ignore self._encoding here.
          etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
-        for roleset in etree.findall('predicate/roleset'):
-            if roleset.attrib['id'] == roleset_id:
+        for roleset in etree.findall("predicate/roleset"):
+            if roleset.attrib["id"] == roleset_id:
                  return roleset
-        raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile))
+        raise ValueError("Roleset %s not found in %s" % (roleset_id, framefile))
  
      def rolesets(self, baseform=None):
          """
          :return: list of xml descriptions for rolesets.
          """
          if baseform is not None:
-            framefile = 'frames/%s.xml' % baseform
-            if framefile not in self._framefiles:
-                raise ValueError('Frameset file for %s not found' %
-                                 baseform)
+            framefile = "frames/%s.xml" % baseform
+            if framefile not in self.fileids():
+                raise ValueError("Frameset file for %s not found" % baseform)
              framefiles = [framefile]
          else:
-            framefiles = self._framefiles
+            framefiles = self.fileids()
  
          rsets = []
          for framefile in framefiles:
              # n.b.: The encoding for XML fileids is specified by the file
              # itself; so we ignore self._encoding here.
              etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
-            rsets.append(etree.findall('predicate/roleset'))
+            rsets.append(etree.findall("predicate/roleset"))
          return LazyConcatenation(rsets)
  
      def nouns(self):
@@ -139,9 +149,11 @@ class NombankCorpusReader(CorpusReader):
          :return: a corpus view that acts as a list of all noun lemmas
          in this corpus (from the nombank.1.0.words file).
          """
-        return StreamBackedCorpusView(self.abspath(self._nounsfile),
-                                      read_line_block,
-                                      encoding=self.encoding(self._nounsfile))
+        return StreamBackedCorpusView(
+            self.abspath(self._nounsfile),
+            read_line_block,
+            encoding=self.encoding(self._nounsfile),
+        )
  
      def _read_instance_block(self, stream, instance_filter=lambda inst: True):
          block = []
@@ -151,22 +163,32 @@ class NombankCorpusReader(CorpusReader):
              line = stream.readline().strip()
              if line:
                  inst = NombankInstance.parse(
-                    line, self._parse_fileid_xform,
-                    self._parse_corpus)
+                    line, self._parse_fileid_xform, self._parse_corpus
+                )
                  if instance_filter(inst):
                      block.append(inst)
  
          return block
  
+
  ######################################################################
-#{ Nombank Instance & related datatypes
+# { Nombank Instance & related datatypes
  ######################################################################
  
-@python_2_unicode_compatible
-class NombankInstance(object):
  
-    def __init__(self, fileid, sentnum, wordnum, baseform, sensenumber,
-                 predicate, predid, arguments, parse_corpus=None):
+class NombankInstance(object):
+    def __init__(
+        self,
+        fileid,
+        sentnum,
+        wordnum,
+        baseform,
+        sensenumber,
+        predicate,
+        predid,
+        arguments,
+        parse_corpus=None,
+    ):
  
          self.fileid = fileid
          """The name of the file containing the parse tree for this
@@ -210,44 +232,57 @@ class NombankInstance(object):
          """The name of the roleset used by this instance's predicate.
          Use ``nombank.roleset() <NombankCorpusReader.roleset>`` to
          look up information about the roleset."""
-        r = self.baseform.replace('%', 'perc-sign')
-        r = r.replace('1/10', '1-slash-10').replace('1-slash-10', 'oneslashonezero')
-        return '%s.%s'%(r, self.sensenumber)
+        r = self.baseform.replace("%", "perc-sign")
+        r = r.replace("1/10", "1-slash-10").replace("1-slash-10", "oneslashonezero")
+        return "%s.%s" % (r, self.sensenumber)
  
      def __repr__(self):
-        return ('<NombankInstance: %s, sent %s, word %s>' %
-                (self.fileid, self.sentnum, self.wordnum))
+        return "<NombankInstance: %s, sent %s, word %s>" % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+        )
  
      def __str__(self):
-        s = '%s %s %s %s %s' % (self.fileid, self.sentnum, self.wordnum,
-                                self.baseform, self.sensenumber)
-        items = self.arguments + ((self.predicate, 'rel'),)
+        s = "%s %s %s %s %s" % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+            self.baseform,
+            self.sensenumber,
+        )
+        items = self.arguments + ((self.predicate, "rel"),)
          for (argloc, argid) in sorted(items):
-            s += ' %s-%s' % (argloc, argid)
+            s += " %s-%s" % (argloc, argid)
          return s
  
      def _get_tree(self):
-        if self.parse_corpus is None: return None
-        if self.fileid not in self.parse_corpus.fileids(): return None
+        if self.parse_corpus is None:
+            return None
+        if self.fileid not in self.parse_corpus.fileids():
+            return None
          return self.parse_corpus.parsed_sents(self.fileid)[self.sentnum]
-    tree = property(_get_tree, doc="""
+
+    tree = property(
+        _get_tree,
+        doc="""
          The parse tree corresponding to this instance, or None if
-        the corresponding tree is not available.""")
+        the corresponding tree is not available.""",
+    )
  
      @staticmethod
      def parse(s, parse_fileid_xform=None, parse_corpus=None):
          pieces = s.split()
          if len(pieces) < 6:
-            raise ValueError('Badly formatted nombank line: %r' % s)
+            raise ValueError("Badly formatted nombank line: %r" % s)
  
          # Divide the line into its basic pieces.
-        (fileid, sentnum, wordnum,
-          baseform, sensenumber) = pieces[:5]
+        (fileid, sentnum, wordnum, baseform, sensenumber) = pieces[:5]
  
          args = pieces[5:]
-        rel = [args.pop(i) for i,p in enumerate(args) if '-rel' in p]
+        rel = [args.pop(i) for i, p in enumerate(args) if "-rel" in p]
          if len(rel) != 1:
-            raise ValueError('Badly formatted nombank line: %r' % s)
+            raise ValueError("Badly formatted nombank line: %r" % s)
  
          # Apply the fileid selector, if any.
          if parse_fileid_xform is not None:
@@ -259,18 +294,28 @@ class NombankInstance(object):
  
          # Parse the predicate location.
  
-        predloc, predid = rel[0].split('-', 1)
+        predloc, predid = rel[0].split("-", 1)
          predicate = NombankTreePointer.parse(predloc)
  
          # Parse the arguments.
          arguments = []
          for arg in args:
-            argloc, argid = arg.split('-', 1)
-            arguments.append( (NombankTreePointer.parse(argloc), argid) )
+            argloc, argid = arg.split("-", 1)
+            arguments.append((NombankTreePointer.parse(argloc), argid))
  
          # Put it all together.
-        return NombankInstance(fileid, sentnum, wordnum, baseform, sensenumber,
-                               predicate, predid, arguments, parse_corpus)
+        return NombankInstance(
+            fileid,
+            sentnum,
+            wordnum,
+            baseform,
+            sensenumber,
+            predicate,
+            predid,
+            arguments,
+            parse_corpus,
+        )
+
  
  class NombankPointer(object):
      """
@@ -286,11 +331,12 @@ class NombankPointer(object):
        chains in a tree.  It consists of a sequence of pieces, which
        can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers.
      """
+
      def __init__(self):
          if self.__class__ == NombankPointer:
              raise NotImplementedError()
  
-@python_2_unicode_compatible
+
  class NombankChainTreePointer(NombankPointer):
      def __init__(self, pieces):
          self.pieces = pieces
@@ -299,14 +345,17 @@ class NombankChainTreePointer(NombankPointer):
             ``NombankTreePointer`` pointers."""
  
      def __str__(self):
-        return '*'.join('%s' % p for p in self.pieces)
+        return "*".join("%s" % p for p in self.pieces)
+
      def __repr__(self):
-        return '<NombankChainTreePointer: %s>' % self
+        return "<NombankChainTreePointer: %s>" % self
+
      def select(self, tree):
-        if tree is None: raise ValueError('Parse tree not avaialable')
-        return Tree('*CHAIN*', [p.select(tree) for p in self.pieces])
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
+        return Tree("*CHAIN*", [p.select(tree) for p in self.pieces])
+
  
-@python_2_unicode_compatible
  class NombankSplitTreePointer(NombankPointer):
      def __init__(self, pieces):
          self.pieces = pieces
@@ -314,21 +363,25 @@ class NombankSplitTreePointer(NombankPointer):
             all ``NombankTreePointer`` pointers."""
  
      def __str__(self):
-        return ','.join('%s' % p for p in self.pieces)
+        return ",".join("%s" % p for p in self.pieces)
+
      def __repr__(self):
-        return '<NombankSplitTreePointer: %s>' % self
+        return "<NombankSplitTreePointer: %s>" % self
+
      def select(self, tree):
-        if tree is None: raise ValueError('Parse tree not avaialable')
-        return Tree('*SPLIT*', [p.select(tree) for p in self.pieces])
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
+        return Tree("*SPLIT*", [p.select(tree) for p in self.pieces])
+
  
  @total_ordering
-@python_2_unicode_compatible
  class NombankTreePointer(NombankPointer):
      """
      wordnum:height*wordnum:height*...
      wordnum:height,
  
      """
+
      def __init__(self, wordnum, height):
          self.wordnum = wordnum
          self.height = height
@@ -336,44 +389,45 @@ class NombankTreePointer(NombankPointer):
      @staticmethod
      def parse(s):
          # Deal with chains (xx*yy*zz)
-        pieces = s.split('*')
+        pieces = s.split("*")
          if len(pieces) > 1:
-            return NombankChainTreePointer([NombankTreePointer.parse(elt)
-                                              for elt in pieces])
+            return NombankChainTreePointer(
+                [NombankTreePointer.parse(elt) for elt in pieces]
+            )
  
          # Deal with split args (xx,yy,zz)
-        pieces = s.split(',')
+        pieces = s.split(",")
          if len(pieces) > 1:
-            return NombankSplitTreePointer([NombankTreePointer.parse(elt)
-                                             for elt in pieces])
+            return NombankSplitTreePointer(
+                [NombankTreePointer.parse(elt) for elt in pieces]
+            )
  
          # Deal with normal pointers.
-        pieces = s.split(':')
-        if len(pieces) != 2: raise ValueError('bad nombank pointer %r' % s)
+        pieces = s.split(":")
+        if len(pieces) != 2:
+            raise ValueError("bad nombank pointer %r" % s)
          return NombankTreePointer(int(pieces[0]), int(pieces[1]))
  
      def __str__(self):
-        return '%s:%s' % (self.wordnum, self.height)
+        return "%s:%s" % (self.wordnum, self.height)
  
      def __repr__(self):
-        return 'NombankTreePointer(%d, %d)' % (self.wordnum, self.height)
+        return "NombankTreePointer(%d, %d)" % (self.wordnum, self.height)
  
      def __eq__(self, other):
-        while isinstance(other, (NombankChainTreePointer,
-                                 NombankSplitTreePointer)):
+        while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)):
              other = other.pieces[0]
  
          if not isinstance(other, NombankTreePointer):
              return self is other
  
-        return (self.wordnum == other.wordnum and self.height == other.height)
+        return self.wordnum == other.wordnum and self.height == other.height
  
      def __ne__(self, other):
          return not self == other
  
      def __lt__(self, other):
-        while isinstance(other, (NombankChainTreePointer,
-                                 NombankSplitTreePointer)):
+        while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)):
              other = other.pieces[0]
  
          if not isinstance(other, NombankTreePointer):
@@ -382,7 +436,8 @@ class NombankTreePointer(NombankPointer):
          return (self.wordnum, -self.height) < (other.wordnum, -other.height)
  
      def select(self, tree):
-        if tree is None: raise ValueError('Parse tree not avaialable')
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
          return tree[self.treepos(tree)]
  
      def treepos(self, tree):
@@ -390,14 +445,13 @@ class NombankTreePointer(NombankPointer):
          Convert this pointer to a standard 'tree position' pointer,
          given that it points to the given tree.
          """
-        if tree is None: raise ValueError('Parse tree not avaialable')
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
          stack = [tree]
          treepos = []
  
          wordnum = 0
          while True:
-            #print treepos
-            #print stack[-1]
              # tree node:
              if isinstance(stack[-1], Tree):
                  # Select the next child.
@@ -415,7 +469,7 @@ class NombankTreePointer(NombankPointer):
              # word node:
              else:
                  if wordnum == self.wordnum:
-                    return tuple(treepos[:len(treepos)-self.height-1])
+                    return tuple(treepos[: len(treepos) - self.height - 1])
                  else:
                      wordnum += 1
                      stack.pop()
diff --git a/nlp_resource_data/nltk/corpus/reader/nombank.pyc b/nlp_resource_data/nltk/corpus/reader/nombank.pyc

deleted file mode 100755 (executable)

index 506b3eb..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/nombank.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/nps_chat.py b/nlp_resource_data/nltk/corpus/reader/nps_chat.py

old mode 100755 (executable)

new mode 100644 (file)

index a2da13c..8dfd8a5
--- a/nlp_resource_data/nltk/corpus/reader/nps_chat.py
+++ b/nlp_resource_data/nltk/corpus/reader/nps_chat.py
@@ -1,10 +1,9 @@
  # Natural Language Toolkit: NPS Chat Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import unicode_literals
  
  import re
  import textwrap
@@ -17,32 +16,48 @@ from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  from nltk.corpus.reader.xmldocs import *
  
-class NPSChatCorpusReader(XMLCorpusReader):
  
+class NPSChatCorpusReader(XMLCorpusReader):
      def __init__(self, root, fileids, wrap_etree=False, tagset=None):
          XMLCorpusReader.__init__(self, root, fileids, wrap_etree)
          self._tagset = tagset
  
      def xml_posts(self, fileids=None):
          if self._wrap_etree:
-            return concat([XMLCorpusView(fileid, 'Session/Posts/Post',
-                                         self._wrap_elt)
-                           for fileid in self.abspaths(fileids)])
+            return concat(
+                [
+                    XMLCorpusView(fileid, "Session/Posts/Post", self._wrap_elt)
+                    for fileid in self.abspaths(fileids)
+                ]
+            )
          else:
-            return concat([XMLCorpusView(fileid, 'Session/Posts/Post')
-                           for fileid in self.abspaths(fileids)])
+            return concat(
+                [
+                    XMLCorpusView(fileid, "Session/Posts/Post")
+                    for fileid in self.abspaths(fileids)
+                ]
+            )
  
      def posts(self, fileids=None):
-        return concat([XMLCorpusView(fileid, 'Session/Posts/Post/terminals',
-                                     self._elt_to_words)
-                       for fileid in self.abspaths(fileids)])
+        return concat(
+            [
+                XMLCorpusView(
+                    fileid, "Session/Posts/Post/terminals", self._elt_to_words
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+        )
  
      def tagged_posts(self, fileids=None, tagset=None):
          def reader(elt, handler):
              return self._elt_to_tagged_words(elt, handler, tagset)
-        return concat([XMLCorpusView(fileid, 'Session/Posts/Post/terminals',
-                                     reader)
-                       for fileid in self.abspaths(fileids)])
+
+        return concat(
+            [
+                XMLCorpusView(fileid, "Session/Posts/Post/terminals", reader)
+                for fileid in self.abspaths(fileids)
+            ]
+        )
  
      def words(self, fileids=None):
          return LazyConcatenation(self.posts(fileids))
@@ -54,20 +69,23 @@ class NPSChatCorpusReader(XMLCorpusReader):
          return ElementWrapper(elt)
  
      def _elt_to_words(self, elt, handler):
-        return [self._simplify_username(t.attrib['word'])
-                for t in elt.findall('t')]
+        return [self._simplify_username(t.attrib["word"]) for t in elt.findall("t")]
  
      def _elt_to_tagged_words(self, elt, handler, tagset=None):
-        tagged_post = [(self._simplify_username(t.attrib['word']),
-                        t.attrib['pos']) for t in elt.findall('t')]
+        tagged_post = [
+            (self._simplify_username(t.attrib["word"]), t.attrib["pos"])
+            for t in elt.findall("t")
+        ]
          if tagset and tagset != self._tagset:
-            tagged_post = [(w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_post]
+            tagged_post = [
+                (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_post
+            ]
          return tagged_post
  
      @staticmethod
      def _simplify_username(word):
-        if 'User' in word:
-            word = 'U' + word.split('User', 1)[1]
+        if "User" in word:
+            word = "U" + word.split("User", 1)[1]
          elif isinstance(word, bytes):
-            word = word.decode('ascii')
+            word = word.decode("ascii")
          return word
diff --git a/nlp_resource_data/nltk/corpus/reader/nps_chat.pyc b/nlp_resource_data/nltk/corpus/reader/nps_chat.pyc

deleted file mode 100755 (executable)

index 5fefe8a..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/nps_chat.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py b/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py

old mode 100755 (executable)

new mode 100644 (file)

index 0c70278..598db32
--- a/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py
+++ b/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Opinion Lexicon Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -27,15 +27,16 @@ Related papers:
      Comparing Opinions on the Web". Proceedings of the 14th International World
      Wide Web conference (WWW-2005), May 10-14, 2005, Chiba, Japan.
  """
-from six import string_types
  
  from nltk.corpus.reader import WordListCorpusReader
  from nltk.corpus.reader.api import *
  
+
  class IgnoreReadmeCorpusView(StreamBackedCorpusView):
      """
      This CorpusView is used to skip the initial readme block of the corpus.
      """
+
      def __init__(self, *args, **kwargs):
          StreamBackedCorpusView.__init__(self, *args, **kwargs)
          # open self._stream
@@ -82,10 +83,16 @@ class OpinionLexiconCorpusReader(WordListCorpusReader):
          :return: the given file(s) as a list of words and punctuation symbols.
          :rtype: list(str)
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
-        return concat([self.CorpusView(path, self._read_word_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def positive(self):
          """
@@ -94,7 +101,7 @@ class OpinionLexiconCorpusReader(WordListCorpusReader):
          :return: a list of positive words.
          :rtype: list(str)
          """
-        return self.words('positive-words.txt')
+        return self.words("positive-words.txt")
  
      def negative(self):
          """
@@ -103,11 +110,11 @@ class OpinionLexiconCorpusReader(WordListCorpusReader):
          :return: a list of negative words.
          :rtype: list(str)
          """
-        return self.words('negative-words.txt')
+        return self.words("negative-words.txt")
  
      def _read_word_block(self, stream):
          words = []
-        for i in range(20): # Read 20 lines at a time.
+        for i in range(20):  # Read 20 lines at a time.
              line = stream.readline()
              if not line:
                  continue
diff --git a/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.pyc b/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.pyc

deleted file mode 100755 (executable)

index a0cdfa5..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/panlex_lite.py b/nlp_resource_data/nltk/corpus/reader/panlex_lite.py

old mode 100755 (executable)

new mode 100644 (file)

index 08d3399..ab71dc7
--- a/nlp_resource_data/nltk/corpus/reader/panlex_lite.py
+++ b/nlp_resource_data/nltk/corpus/reader/panlex_lite.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: PanLex Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: David Kamholz <kamholz@panlex.org>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -16,6 +16,7 @@ import sqlite3
  
  from nltk.corpus.reader.api import CorpusReader
  
+
  class PanLexLiteCorpusReader(CorpusReader):
      MEANING_Q = """
          SELECT dnx2.mn, dnx2.uq, dnx2.ap, dnx2.ui, ex2.tt, ex2.lv
@@ -42,12 +43,12 @@ class PanLexLiteCorpusReader(CorpusReader):
      """
  
      def __init__(self, root):
-        self._c = sqlite3.connect(os.path.join(root, 'db.sqlite')).cursor()
+        self._c = sqlite3.connect(os.path.join(root, "db.sqlite")).cursor()
  
          self._uid_lv = {}
          self._lv_uid = {}
  
-        for row in self._c.execute('SELECT uid, lv FROM lv'):
+        for row in self._c.execute("SELECT uid, lv FROM lv"):
              self._uid_lv[row[0]] = row[1]
              self._lv_uid[row[1]] = row[0]
  
@@ -63,10 +64,12 @@ class PanLexLiteCorpusReader(CorpusReader):
          :rtype: list(tuple)
          """
  
-        if lc == None:
-            return self._c.execute('SELECT uid, tt FROM lv ORDER BY uid').fetchall()
+        if lc is None:
+            return self._c.execute("SELECT uid, tt FROM lv ORDER BY uid").fetchall()
          else:
-            return self._c.execute('SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid', (lc,)).fetchall()
+            return self._c.execute(
+                "SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid", (lc,)
+            ).fetchall()
  
      def meanings(self, expr_uid, expr_tt):
          """
@@ -88,14 +91,19 @@ class PanLexLiteCorpusReader(CorpusReader):
              uid = self._lv_uid[i[5]]
  
              if not mn in mn_info:
-                mn_info[mn] = { 'uq': i[1], 'ap': i[2], 'ui': i[3], 'ex': { expr_uid: [expr_tt] } }
+                mn_info[mn] = {
+                    "uq": i[1],
+                    "ap": i[2],
+                    "ui": i[3],
+                    "ex": {expr_uid: [expr_tt]},
+                }
  
-            if not uid in mn_info[mn]['ex']:
-                mn_info[mn]['ex'][uid] = []
+            if not uid in mn_info[mn]["ex"]:
+                mn_info[mn]["ex"][uid] = []
  
-            mn_info[mn]['ex'][uid].append(i[4])
+            mn_info[mn]["ex"][uid].append(i[4])
  
-        return [ Meaning(mn, mn_info[mn]) for mn in mn_info ]
+        return [Meaning(mn, mn_info[mn]) for mn in mn_info]
  
      def translations(self, from_uid, from_tt, to_uid):
          """
@@ -107,7 +115,7 @@ class PanLexLiteCorpusReader(CorpusReader):
          :param from_tt: the source expression's text.
          :param to_uid: the target language variety, as a seven-character
              uniform identifier.
-        :return a list of translation tuples. The first element is the expression 
+        :return a list of translation tuples. The first element is the expression
              text and the second element is the translation quality.
          :rtype: list(tuple)
          """
@@ -117,6 +125,7 @@ class PanLexLiteCorpusReader(CorpusReader):
  
          return self._c.execute(self.TRANSLATION_Q, (from_lv, from_tt, to_lv)).fetchall()
  
+
  class Meaning(dict):
      """
      Represents a single PanLex meaning. A meaning is a translation set derived
@@ -125,35 +134,35 @@ class Meaning(dict):
  
      def __init__(self, mn, attr):
          super(Meaning, self).__init__(**attr)
-        self['mn'] = mn
+        self["mn"] = mn
  
      def id(self):
          """
          :return: the meaning's id.
          :rtype: int
          """
-        return self['mn']
+        return self["mn"]
  
      def quality(self):
          """
          :return: the meaning's source's quality (0=worst, 9=best).
          :rtype: int
          """
-        return self['uq']
+        return self["uq"]
  
      def source(self):
          """
          :return: the meaning's source id.
          :rtype: int
          """
-        return self['ap']
+        return self["ap"]
  
      def source_group(self):
          """
          :return: the meaning's source group id.
          :rtype: int
          """
-        return self['ui']
+        return self["ui"]
  
      def expressions(self):
          """
@@ -162,4 +171,4 @@ class Meaning(dict):
              texts.
          :rtype: dict
          """
-        return self['ex']
+        return self["ex"]
diff --git a/nlp_resource_data/nltk/corpus/reader/panlex_lite.pyc b/nlp_resource_data/nltk/corpus/reader/panlex_lite.pyc

deleted file mode 100755 (executable)

index c57a041..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/panlex_lite.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py b/nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py

new file mode 100644 (file)

index 0000000..ed46a4b
--- /dev/null
+++ b/nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Word List Corpus Reader
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+from collections import namedtuple, defaultdict
+import re
+
+from nltk.tokenize import line_tokenize
+
+from nltk.corpus.reader.wordlist import WordListCorpusReader
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+PanlexLanguage = namedtuple('PanlexLanguage',
+                          ['panlex_uid',  # (1) PanLex UID
+                           'iso639',      # (2) ISO 639 language code
+                           'iso639_type', # (3) ISO 639 language type, see README
+                           'script',      # (4) normal scripts of expressions
+                           'name',        # (5) PanLex default name
+                           'langvar_uid'  # (6) UID of the language variety in which the default name is an expression
+                           ])
+
+class PanlexSwadeshCorpusReader(WordListCorpusReader):
+    """
+    This is a class to read the PanLex Swadesh list from
+
+    David Kamholz, Jonathan Pool, and Susan M. Colowick (2014).
+    PanLex: Building a Resource for Panlingual Lexical Translation.
+    In LREC. http://www.lrec-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf
+
+    License: CC0 1.0 Universal
+    https://creativecommons.org/publicdomain/zero/1.0/legalcode
+    """
+    def __init__(self, *args, **kwargs):
+        super(PanlexSwadeshCorpusReader, self).__init__(*args, **kwargs)
+        # Find the swadesh size using the fileids' path.
+        self.swadesh_size = re.match(r'swadesh([0-9].*)\/', self.fileids()[0]).group(1)
+        self._languages = {lang.panlex_uid:lang for lang in self.get_languages()}
+        self._macro_langauges = self.get_macrolanguages()
+
+    def license(self):
+        print('CC0 1.0 Universal')
+
+    def readme(self):
+        print(self.raw('README'))
+
+    def language_codes(self):
+        return self._languages.keys()
+
+    def get_languages(self):
+        for line in self.raw('langs{}.txt'.format(self.swadesh_size)).split('\n'):
+            if not line.strip(): # Skip empty lines.
+                continue
+            yield PanlexLanguage(*line.strip().split('\t'))
+
+    def get_macrolanguages(self):
+        macro_langauges = defaultdict(list)
+        for lang in self._languages.values():
+            macro_langauges[lang.iso639].append(lang.panlex_uid)
+        return macro_langauges
+
+    def words_by_lang(self, lang_code):
+        """
+        :return: a list of list(str)
+        """
+        fileid = 'swadesh{}/{}.txt'.format(self.swadesh_size, lang_code)
+        return [concept.split('\t') for concept in self.words(fileid)]
+
+    def words_by_iso639(self, iso63_code):
+        """
+        :return: a list of list(str)
+        """
+        fileids = ['swadesh{}/{}.txt'.format(self.swadesh_size, lang_code)
+                   for lang_code in self._macro_langauges[iso63_code]]
+        return [concept.split('\t') for fileid in fileids for concept in self.words(fileid)]
+
+    def entries(self, fileids=None):
+        """
+        :return: a tuple of words for the specified fileids.
+        """
+        if not fileids:
+            fileids = self.fileids()
+
+        wordlists = [self.words(f) for f in fileids]
+        return list(zip(*wordlists))
diff --git a/nlp_resource_data/nltk/corpus/reader/pl196x.py b/nlp_resource_data/nltk/corpus/reader/pl196x.py

old mode 100755 (executable)

new mode 100644 (file)

index 93b8b19..aaf280d
--- a/nlp_resource_data/nltk/corpus/reader/pl196x.py
+++ b/nlp_resource_data/nltk/corpus/reader/pl196x.py
@@ -1,21 +1,19 @@
  # Natural Language Toolkit:
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Piotr Kasprzyk <p.j.kasprzyk@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from six import string_types
-
  from nltk.corpus.reader.api import *
  from nltk.corpus.reader.xmldocs import XMLCorpusReader
  
  
-PARA = re.compile(r'<p(?: [^>]*){0,1}>(.*?)</p>')
-SENT = re.compile(r'<s(?: [^>]*){0,1}>(.*?)</s>')
+PARA = re.compile(r"<p(?: [^>]*){0,1}>(.*?)</p>")
+SENT = re.compile(r"<s(?: [^>]*){0,1}>(.*?)</s>")
  
-TAGGEDWORD = re.compile(r'<([wc](?: [^>]*){0,1}>)(.*?)</[wc]>')
-WORD = re.compile(r'<[wc](?: [^>]*){0,1}>(.*?)</[wc]>')
+TAGGEDWORD = re.compile(r"<([wc](?: [^>]*){0,1}>)(.*?)</[wc]>")
+WORD = re.compile(r"<[wc](?: [^>]*){0,1}>(.*?)</[wc]>")
  
  TYPE = re.compile(r'type="(.*?)"')
  ANA = re.compile(r'ana="(.*?)"')
@@ -24,9 +22,16 @@ TEXTID = re.compile(r'text id="(.*?)"')
  
  
  class TEICorpusView(StreamBackedCorpusView):
-    def __init__(self, corpus_file,
-                 tagged, group_by_sent, group_by_para,
-                 tagset=None, head_len=0, textids=None):
+    def __init__(
+        self,
+        corpus_file,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        tagset=None,
+        head_len=0,
+        textids=None,
+    ):
  
          self._tagged = tagged
          self._textids = textids
@@ -41,22 +46,23 @@ class TEICorpusView(StreamBackedCorpusView):
      def read_block(self, stream):
          block = stream.readlines(self._pagesize)
          block = concat(block)
-        while (block.count('<text id') > block.count('</text>')) \
-                or block.count('<text id') == 0:
+        while (block.count("<text id") > block.count("</text>")) or block.count(
+            "<text id"
+        ) == 0:
              tmp = stream.readline()
              if len(tmp) <= 0:
                  break
              block += tmp
  
-        block = block.replace('\n', '')
+        block = block.replace("\n", "")
  
          textids = TEXTID.findall(block)
          if self._textids:
              for tid in textids:
                  if tid not in self._textids:
                      beg = block.find(tid) - 1
-                    end = block[beg:].find('</text>') + len('</text>')
-                    block = block[:beg] + block[beg + end:]
+                    end = block[beg:].find("</text>") + len("</text>")
+                    block = block[:beg] + block[beg + end :]
  
          output = []
          for para_str in PARA.findall(block):
@@ -65,8 +71,7 @@ class TEICorpusView(StreamBackedCorpusView):
                  if not self._tagged:
                      sent = WORD.findall(sent_str)
                  else:
-                    sent = list(
-                        map(self._parse_tag, TAGGEDWORD.findall(sent_str)))
+                    sent = list(map(self._parse_tag, TAGGEDWORD.findall(sent_str)))
                  if self._group_by_sent:
                      para.append(sent)
                  else:
@@ -79,7 +84,7 @@ class TEICorpusView(StreamBackedCorpusView):
  
      def _parse_tag(self, tag_word_tuple):
          (tag, word) = tag_word_tuple
-        if tag.startswith('w'):
+        if tag.startswith("w"):
              tag = ANA.search(tag).group(1)
          else:  # tag.startswith('c')
              tag = TYPE.search(tag).group(1)
@@ -90,8 +95,8 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
      head_len = 2770
  
      def __init__(self, *args, **kwargs):
-        if 'textid_file' in kwargs:
-            self._textids = kwargs['textid_file']
+        if "textid_file" in kwargs:
+            self._textids = kwargs["textid_file"]
          else:
              self._textids = None
  
@@ -107,10 +112,10 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
              with open(self._textids) as fp:
                  for line in fp:
                      line = line.strip()
-                    file_id, text_ids = line.split(' ', 1)
+                    file_id, text_ids = line.split(" ", 1)
                      if file_id not in self.fileids():
                          raise ValueError(
-                            'In text_id mapping file %s: %s not found'
+                            "In text_id mapping file %s: %s not found"
                              % (self._textids, file_id)
                          )
                      for text_id in text_ids.split(self._delimiter):
@@ -122,11 +127,18 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
  
      def _resolve(self, fileids, categories, textids=None):
          tmp = None
-        if len(filter(lambda accessor: accessor is None,
-                      (fileids, categories, textids))) != 1:
-
-            raise ValueError('Specify exactly one of: fileids, '
-                             'categories or textids')
+        if (
+            len(list(
+                filter(
+                    lambda accessor: accessor is None, (fileids, categories, textids)
+                )
+            ))
+            != 1
+        ):
+
+            raise ValueError(
+                "Specify exactly one of: fileids, " "categories or textids"
+            )
  
          if fileids is not None:
              return fileids, None
@@ -135,12 +147,12 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
              return self.fileids(categories), None
  
          if textids is not None:
-            if isinstance(textids, string_types):
+            if isinstance(textids, str):
                  textids = [textids]
              files = sum((self._t2f[t] for t in textids), [])
              tdict = dict()
              for f in files:
-                tdict[f] = (set(self._f2t[f]) & set(textids))
+                tdict[f] = set(self._f2t[f]) & set(textids)
              return files, tdict
  
      def decode_tag(self, tag):
@@ -156,9 +168,10 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
          of required chunks---giving much more control to the user.
          """
          fileids, _ = self._resolve(fileids, categories)
-        if fileids is None: return sorted(self._t2f)
+        if fileids is None:
+            return sorted(self._t2f)
  
-        if isinstance(fileids, string_types):
+        if isinstance(fileids, str):
              fileids = [fileids]
          return sorted(sum((self._f2t[d] for d in fileids), []))
  
@@ -166,127 +179,203 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
          fileids, textids = self._resolve(fileids, categories, textids)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          if textids:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         False, False, False,
-                                         head_len=self.head_len,
-                                         textids=textids[fileid])
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        False,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
          else:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         False, False, False,
-                                         head_len=self.head_len)
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        False,
+                        False,
+                        head_len=self.head_len,
+                    )
+                    for fileid in fileids
+                ]
+            )
  
      def sents(self, fileids=None, categories=None, textids=None):
          fileids, textids = self._resolve(fileids, categories, textids)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          if textids:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         False, True, False,
-                                         head_len=self.head_len,
-                                         textids=textids[fileid])
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        True,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
          else:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         False, True, False,
-                                         head_len=self.head_len)
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), False, True, False, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
  
      def paras(self, fileids=None, categories=None, textids=None):
          fileids, textids = self._resolve(fileids, categories, textids)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          if textids:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         False, True, True,
-                                         head_len=self.head_len,
-                                         textids=textids[fileid])
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        True,
+                        True,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
          else:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         False, True, True,
-                                         head_len=self.head_len)
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), False, True, True, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
  
      def tagged_words(self, fileids=None, categories=None, textids=None):
          fileids, textids = self._resolve(fileids, categories, textids)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          if textids:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         True, False, False,
-                                         head_len=self.head_len,
-                                         textids=textids[fileid])
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        True,
+                        False,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
          else:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         True, False, False,
-                                         head_len=self.head_len)
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), True, False, False, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
  
      def tagged_sents(self, fileids=None, categories=None, textids=None):
          fileids, textids = self._resolve(fileids, categories, textids)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          if textids:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         True, True, False,
-                                         head_len=self.head_len,
-                                         textids=textids[fileid])
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        True,
+                        True,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
          else:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         True, True, False,
-                                         head_len=self.head_len)
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), True, True, False, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
  
      def tagged_paras(self, fileids=None, categories=None, textids=None):
          fileids, textids = self._resolve(fileids, categories, textids)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
  
          if textids:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         True, True, True,
-                                         head_len=self.head_len,
-                                         textids=textids[fileid])
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        True,
+                        True,
+                        True,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
          else:
-            return concat([TEICorpusView(self.abspath(fileid),
-                                         True, True, True,
-                                         head_len=self.head_len)
-                           for fileid in fileids])
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), True, True, True, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
  
      def xml(self, fileids=None, categories=None):
          fileids, _ = self._resolve(fileids, categories)
          if len(fileids) == 1:
              return XMLCorpusReader.xml(self, fileids[0])
          else:
-            raise TypeError('Expected a single file')
+            raise TypeError("Expected a single file")
  
      def raw(self, fileids=None, categories=None):
          fileids, _ = self._resolve(fileids, categories)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
diff --git a/nlp_resource_data/nltk/corpus/reader/pl196x.pyc b/nlp_resource_data/nltk/corpus/reader/pl196x.pyc

deleted file mode 100755 (executable)

index 9cafe4a..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/pl196x.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/plaintext.py b/nlp_resource_data/nltk/corpus/reader/plaintext.py

old mode 100755 (executable)

new mode 100644 (file)

index 332b6aa..17f484b
--- a/nlp_resource_data/nltk/corpus/reader/plaintext.py
+++ b/nlp_resource_data/nltk/corpus/reader/plaintext.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Plaintext Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  #         Nitin Madnani <nmadnani@umiacs.umd.edu>
@@ -11,15 +11,13 @@
  A reader for corpora that consist of plaintext documents.
  """
  
-from six import string_types
-import codecs
-
  import nltk.data
  from nltk.tokenize import *
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class PlaintextCorpusReader(CorpusReader):
      """
      Reader for corpora that consist of plaintext documents.  Paragraphs
@@ -37,12 +35,15 @@ class PlaintextCorpusReader(CorpusReader):
         ``PlaintextCorpusReader`` may specify alternative corpus view
         classes (e.g., to skip the preface sections of documents.)"""
  
-    def __init__(self, root, fileids,
-                 word_tokenizer=WordPunctTokenizer(),
-                 sent_tokenizer=nltk.data.LazyLoader(
-                     'tokenizers/punkt/english.pickle'),
-                 para_block_reader=read_blankline_block,
-                 encoding='utf8'):
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WordPunctTokenizer(),
+        sent_tokenizer=nltk.data.LazyLoader("tokenizers/punkt/english.pickle"),
+        para_block_reader=read_blankline_block,
+        encoding="utf8",
+    ):
          """
          Construct a new plaintext corpus reader for a set of documents
          located at the given root directory.  Example usage:
@@ -69,8 +70,10 @@ class PlaintextCorpusReader(CorpusReader):
          :return: the given file(s) as a single string.
          :rtype: str
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          raw_texts = []
          for f in fileids:
              _fin = self.open(f)
@@ -84,9 +87,12 @@ class PlaintextCorpusReader(CorpusReader):
              and punctuation symbols.
          :rtype: list(str)
          """
-        return concat([self.CorpusView(path, self._read_word_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def sents(self, fileids=None):
          """
@@ -96,11 +102,14 @@ class PlaintextCorpusReader(CorpusReader):
          :rtype: list(list(str))
          """
          if self._sent_tokenizer is None:
-            raise ValueError('No sentence tokenizer for this corpus')
+            raise ValueError("No sentence tokenizer for this corpus")
  
-        return concat([self.CorpusView(path, self._read_sent_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def paras(self, fileids=None):
          """
@@ -110,39 +119,50 @@ class PlaintextCorpusReader(CorpusReader):
          :rtype: list(list(list(str)))
          """
          if self._sent_tokenizer is None:
-            raise ValueError('No sentence tokenizer for this corpus')
+            raise ValueError("No sentence tokenizer for this corpus")
  
-        return concat([self.CorpusView(path, self._read_para_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_para_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def _read_word_block(self, stream):
          words = []
-        for i in range(20): # Read 20 lines at a time.
+        for i in range(20):  # Read 20 lines at a time.
              words.extend(self._word_tokenizer.tokenize(stream.readline()))
          return words
  
      def _read_sent_block(self, stream):
          sents = []
          for para in self._para_block_reader(stream):
-            sents.extend([self._word_tokenizer.tokenize(sent)
-                          for sent in self._sent_tokenizer.tokenize(para)])
+            sents.extend(
+                [
+                    self._word_tokenizer.tokenize(sent)
+                    for sent in self._sent_tokenizer.tokenize(para)
+                ]
+            )
          return sents
  
      def _read_para_block(self, stream):
          paras = []
          for para in self._para_block_reader(stream):
-            paras.append([self._word_tokenizer.tokenize(sent)
-                          for sent in self._sent_tokenizer.tokenize(para)])
+            paras.append(
+                [
+                    self._word_tokenizer.tokenize(sent)
+                    for sent in self._sent_tokenizer.tokenize(para)
+                ]
+            )
          return paras
  
  
-class CategorizedPlaintextCorpusReader(CategorizedCorpusReader,
-                                    PlaintextCorpusReader):
+class CategorizedPlaintextCorpusReader(CategorizedCorpusReader, PlaintextCorpusReader):
      """
      A reader for plaintext corpora whose documents are divided into
      categories based on their file identifiers.
      """
+
      def __init__(self, *args, **kwargs):
          """
          Initialize the corpus reader.  Categorization arguments
@@ -155,31 +175,37 @@ class CategorizedPlaintextCorpusReader(CategorizedCorpusReader,
  
      def _resolve(self, fileids, categories):
          if fileids is not None and categories is not None:
-            raise ValueError('Specify fileids or categories, not both')
+            raise ValueError("Specify fileids or categories, not both")
          if categories is not None:
              return self.fileids(categories)
          else:
              return fileids
+
      def raw(self, fileids=None, categories=None):
-        return PlaintextCorpusReader.raw(
-            self, self._resolve(fileids, categories))
+        return PlaintextCorpusReader.raw(self, self._resolve(fileids, categories))
+
      def words(self, fileids=None, categories=None):
-        return PlaintextCorpusReader.words(
-            self, self._resolve(fileids, categories))
+        return PlaintextCorpusReader.words(self, self._resolve(fileids, categories))
+
      def sents(self, fileids=None, categories=None):
-        return PlaintextCorpusReader.sents(
-            self, self._resolve(fileids, categories))
+        return PlaintextCorpusReader.sents(self, self._resolve(fileids, categories))
+
      def paras(self, fileids=None, categories=None):
-        return PlaintextCorpusReader.paras(
-            self, self._resolve(fileids, categories))
+        return PlaintextCorpusReader.paras(self, self._resolve(fileids, categories))
  
-# is there a better way?
+
+# FIXME: Is there a better way? How to not hardcode this?
+#       Possibly, add a language kwargs to CategorizedPlaintextCorpusReader to
+#       override the `sent_tokenizer`.
  class PortugueseCategorizedPlaintextCorpusReader(CategorizedPlaintextCorpusReader):
      def __init__(self, *args, **kwargs):
          CategorizedCorpusReader.__init__(self, kwargs)
-        kwargs['sent_tokenizer'] = nltk.data.LazyLoader('tokenizers/punkt/portuguese.pickle')
+        kwargs["sent_tokenizer"] = nltk.data.LazyLoader(
+            "tokenizers/punkt/portuguese.pickle"
+        )
          PlaintextCorpusReader.__init__(self, *args, **kwargs)
  
+
  class EuroparlCorpusReader(PlaintextCorpusReader):
  
      """
@@ -201,7 +227,7 @@ class EuroparlCorpusReader(PlaintextCorpusReader):
  
      def _read_word_block(self, stream):
          words = []
-        for i in range(20): # Read 20 lines at a time.
+        for i in range(20):  # Read 20 lines at a time.
              words.extend(stream.readline().split())
          return words
  
@@ -224,9 +250,14 @@ class EuroparlCorpusReader(PlaintextCorpusReader):
              in turn encoded as lists of word strings.
          :rtype: list(list(list(str)))
          """
-        return concat([self.CorpusView(fileid, self._read_para_block,
-                                       encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                self.CorpusView(fileid, self._read_para_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def paras(self, fileids=None):
-        raise NotImplementedError('The Europarl corpus reader does not support paragraphs. Please use chapters() instead.')
+        raise NotImplementedError(
+            "The Europarl corpus reader does not support paragraphs. Please use chapters() instead."
+        )
diff --git a/nlp_resource_data/nltk/corpus/reader/plaintext.pyc b/nlp_resource_data/nltk/corpus/reader/plaintext.pyc

deleted file mode 100755 (executable)

index 467729c..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/plaintext.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/ppattach.py b/nlp_resource_data/nltk/corpus/reader/ppattach.py

old mode 100755 (executable)

new mode 100644 (file)

index 9c0ac65..60c2b02
--- a/nlp_resource_data/nltk/corpus/reader/ppattach.py
+++ b/nlp_resource_data/nltk/corpus/reader/ppattach.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: PP Attachment Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -37,16 +37,11 @@ Conference.  [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps]
  The PP Attachment Corpus is distributed with NLTK with the permission
  of the author.
  """
-from __future__ import unicode_literals
  
-from six import string_types
-
-from nltk import compat
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
  
-@compat.python_2_unicode_compatible
  class PPAttachment(object):
      def __init__(self, sent, verb, noun1, prep, noun2, attachment):
          self.sent = sent
@@ -57,28 +52,39 @@ class PPAttachment(object):
          self.attachment = attachment
  
      def __repr__(self):
-        return ('PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, '
-                'noun2=%r, attachment=%r)' %
-                (self.sent, self.verb, self.noun1, self.prep,
-                 self.noun2, self.attachment))
+        return (
+            "PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, "
+            "noun2=%r, attachment=%r)"
+            % (self.sent, self.verb, self.noun1, self.prep, self.noun2, self.attachment)
+        )
+
  
  class PPAttachmentCorpusReader(CorpusReader):
      """
      sentence_id verb noun1 preposition noun2 attachment
      """
+
      def attachments(self, fileids):
-        return concat([StreamBackedCorpusView(fileid, self._read_obj_block,
-                                              encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_obj_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tuples(self, fileids):
-        return concat([StreamBackedCorpusView(fileid, self._read_tuple_block,
-                                              encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_tuple_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def _read_tuple_block(self, stream):
diff --git a/nlp_resource_data/nltk/corpus/reader/ppattach.pyc b/nlp_resource_data/nltk/corpus/reader/ppattach.pyc

deleted file mode 100755 (executable)

index 8b4f874..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/ppattach.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/propbank.py b/nlp_resource_data/nltk/corpus/reader/propbank.py

old mode 100755 (executable)

new mode 100644 (file)

index 343858a..7c49edc
--- a/nlp_resource_data/nltk/corpus/reader/propbank.py
+++ b/nlp_resource_data/nltk/corpus/reader/propbank.py
@@ -1,23 +1,21 @@
  # Natural Language Toolkit: PropBank Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals
  import re
  from functools import total_ordering
  from xml.etree import ElementTree
  
-from six import string_types
-
  from nltk.tree import Tree
  from nltk.internals import raise_unorderable_types
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class PropbankCorpusReader(CorpusReader):
      """
      Corpus reader for the propbank corpus, which augments the Penn
@@ -31,9 +29,17 @@ class PropbankCorpusReader(CorpusReader):
      each "roleset", the frameset file provides descriptions of the
      argument roles, along with examples.
      """
-    def __init__(self, root, propfile, framefiles='',
-                 verbsfile=None, parse_fileid_xform=None,
-                 parse_corpus=None, encoding='utf8'):
+
+    def __init__(
+        self,
+        root,
+        propfile,
+        framefiles="",
+        verbsfile=None,
+        parse_fileid_xform=None,
+        parse_corpus=None,
+        encoding="utf8",
+    ):
          """
          :param root: The root directory for this corpus.
          :param propfile: The name of the file containing the predicate-
@@ -49,12 +55,11 @@ class PropbankCorpusReader(CorpusReader):
              necessary to resolve the tree pointers used by propbank.
          """
          # If framefiles is specified as a regexp, expand it.
-        if isinstance(framefiles, string_types):
+        if isinstance(framefiles, str):
              framefiles = find_corpus_fileids(root, framefiles)
          framefiles = list(framefiles)
          # Initialze the corpus reader.
-        CorpusReader.__init__(self, root, [propfile, verbsfile] + framefiles,
-                              encoding)
+        CorpusReader.__init__(self, root, [propfile, verbsfile] + framefiles, encoding)
  
          # Record our frame fileids & prop file.
          self._propfile = propfile
@@ -67,8 +72,10 @@ class PropbankCorpusReader(CorpusReader):
          """
          :return: the text contents of the given fileids, as a single string.
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, ): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def instances(self, baseform=None):
@@ -78,47 +85,49 @@ class PropbankCorpusReader(CorpusReader):
          """
          kwargs = {}
          if baseform is not None:
-            kwargs['instance_filter'] = lambda inst: inst.baseform==baseform
-        return StreamBackedCorpusView(self.abspath(self._propfile),
-                                      lambda stream: self._read_instance_block(stream, **kwargs),
-                                      encoding=self.encoding(self._propfile))
+            kwargs["instance_filter"] = lambda inst: inst.baseform == baseform
+        return StreamBackedCorpusView(
+            self.abspath(self._propfile),
+            lambda stream: self._read_instance_block(stream, **kwargs),
+            encoding=self.encoding(self._propfile),
+        )
  
      def lines(self):
          """
          :return: a corpus view that acts as a list of strings, one for
          each line in the predicate-argument annotation file.
          """
-        return StreamBackedCorpusView(self.abspath(self._propfile),
-                                      read_line_block,
-                                      encoding=self.encoding(self._propfile))
+        return StreamBackedCorpusView(
+            self.abspath(self._propfile),
+            read_line_block,
+            encoding=self.encoding(self._propfile),
+        )
  
      def roleset(self, roleset_id):
          """
          :return: the xml description for the given roleset.
          """
-        baseform = roleset_id.split('.')[0]
-        framefile = 'frames/%s.xml' % baseform
+        baseform = roleset_id.split(".")[0]
+        framefile = "frames/%s.xml" % baseform
          if framefile not in self._framefiles:
-            raise ValueError('Frameset file for %s not found' %
-                             roleset_id)
+            raise ValueError("Frameset file for %s not found" % roleset_id)
  
          # n.b.: The encoding for XML fileids is specified by the file
          # itself; so we ignore self._encoding here.
          etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
-        for roleset in etree.findall('predicate/roleset'):
-            if roleset.attrib['id'] == roleset_id:
+        for roleset in etree.findall("predicate/roleset"):
+            if roleset.attrib["id"] == roleset_id:
                  return roleset
-        raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile))
+        raise ValueError("Roleset %s not found in %s" % (roleset_id, framefile))
  
      def rolesets(self, baseform=None):
          """
          :return: list of xml descriptions for rolesets.
          """
          if baseform is not None:
-            framefile = 'frames/%s.xml' % baseform
+            framefile = "frames/%s.xml" % baseform
              if framefile not in self._framefiles:
-                raise ValueError('Frameset file for %s not found' %
-                                 baseform)
+                raise ValueError("Frameset file for %s not found" % baseform)
              framefiles = [framefile]
          else:
              framefiles = self._framefiles
@@ -128,7 +137,7 @@ class PropbankCorpusReader(CorpusReader):
              # n.b.: The encoding for XML fileids is specified by the file
              # itself; so we ignore self._encoding here.
              etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
-            rsets.append(etree.findall('predicate/roleset'))
+            rsets.append(etree.findall("predicate/roleset"))
          return LazyConcatenation(rsets)
  
      def verbs(self):
@@ -136,9 +145,11 @@ class PropbankCorpusReader(CorpusReader):
          :return: a corpus view that acts as a list of all verb lemmas
          in this corpus (from the verbs.txt file).
          """
-        return StreamBackedCorpusView(self.abspath(self._verbsfile),
-                                      read_line_block,
-                                      encoding=self.encoding(self._verbsfile))
+        return StreamBackedCorpusView(
+            self.abspath(self._verbsfile),
+            read_line_block,
+            encoding=self.encoding(self._verbsfile),
+        )
  
      def _read_instance_block(self, stream, instance_filter=lambda inst: True):
          block = []
@@ -148,22 +159,33 @@ class PropbankCorpusReader(CorpusReader):
              line = stream.readline().strip()
              if line:
                  inst = PropbankInstance.parse(
-                    line, self._parse_fileid_xform,
-                    self._parse_corpus)
+                    line, self._parse_fileid_xform, self._parse_corpus
+                )
                  if instance_filter(inst):
                      block.append(inst)
  
          return block
  
+
  ######################################################################
-#{ Propbank Instance & related datatypes
+# { Propbank Instance & related datatypes
  ######################################################################
  
-@compat.python_2_unicode_compatible
-class PropbankInstance(object):
  
-    def __init__(self, fileid, sentnum, wordnum, tagger, roleset,
-                 inflection, predicate, arguments, parse_corpus=None):
+
+class PropbankInstance(object):
+    def __init__(
+        self,
+        fileid,
+        sentnum,
+        wordnum,
+        tagger,
+        roleset,
+        inflection,
+        predicate,
+        arguments,
+        parse_corpus=None,
+    ):
  
          self.fileid = fileid
          """The name of the file containing the parse tree for this
@@ -209,51 +231,65 @@ class PropbankInstance(object):
      @property
      def baseform(self):
          """The baseform of the predicate."""
-        return self.roleset.split('.')[0]
+        return self.roleset.split(".")[0]
  
      @property
      def sensenumber(self):
          """The sense number of the predicate."""
-        return self.roleset.split('.')[1]
+        return self.roleset.split(".")[1]
  
      @property
      def predid(self):
          """Identifier of the predicate."""
-        return 'rel'
+        return "rel"
  
      def __repr__(self):
-        return ('<PropbankInstance: %s, sent %s, word %s>' %
-                (self.fileid, self.sentnum, self.wordnum))
+        return "<PropbankInstance: %s, sent %s, word %s>" % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+        )
  
      def __str__(self):
-        s = '%s %s %s %s %s %s' % (self.fileid, self.sentnum, self.wordnum,
-                                   self.tagger, self.roleset, self.inflection)
-        items = self.arguments + ((self.predicate, 'rel'),)
+        s = "%s %s %s %s %s %s" % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+            self.tagger,
+            self.roleset,
+            self.inflection,
+        )
+        items = self.arguments + ((self.predicate, "rel"),)
          for (argloc, argid) in sorted(items):
-            s += ' %s-%s' % (argloc, argid)
+            s += " %s-%s" % (argloc, argid)
          return s
  
      def _get_tree(self):
-        if self.parse_corpus is None: return None
-        if self.fileid not in self.parse_corpus.fileids(): return None
+        if self.parse_corpus is None:
+            return None
+        if self.fileid not in self.parse_corpus.fileids():
+            return None
          return self.parse_corpus.parsed_sents(self.fileid)[self.sentnum]
-    tree = property(_get_tree, doc="""
+
+    tree = property(
+        _get_tree,
+        doc="""
          The parse tree corresponding to this instance, or None if
-        the corresponding tree is not available.""")
+        the corresponding tree is not available.""",
+    )
  
      @staticmethod
      def parse(s, parse_fileid_xform=None, parse_corpus=None):
          pieces = s.split()
          if len(pieces) < 7:
-            raise ValueError('Badly formatted propbank line: %r' % s)
+            raise ValueError("Badly formatted propbank line: %r" % s)
  
          # Divide the line into its basic pieces.
-        (fileid, sentnum, wordnum,
-         tagger, roleset, inflection) = pieces[:6]
-        rel = [p for p in pieces[6:] if p.endswith('-rel')]
-        args = [p for p in pieces[6:] if not p.endswith('-rel')]
+        (fileid, sentnum, wordnum, tagger, roleset, inflection) = pieces[:6]
+        rel = [p for p in pieces[6:] if p.endswith("-rel")]
+        args = [p for p in pieces[6:] if not p.endswith("-rel")]
          if len(rel) != 1:
-            raise ValueError('Badly formatted propbank line: %r' % s)
+            raise ValueError("Badly formatted propbank line: %r" % s)
  
          # Apply the fileid selector, if any.
          if parse_fileid_xform is not None:
@@ -272,13 +308,22 @@ class PropbankInstance(object):
          # Parse the arguments.
          arguments = []
          for arg in args:
-            argloc, argid = arg.split('-', 1)
-            arguments.append( (PropbankTreePointer.parse(argloc), argid) )
+            argloc, argid = arg.split("-", 1)
+            arguments.append((PropbankTreePointer.parse(argloc), argid))
  
          # Put it all together.
-        return PropbankInstance(fileid, sentnum, wordnum, tagger,
-                                roleset, inflection, predicate,
-                                arguments, parse_corpus)
+        return PropbankInstance(
+            fileid,
+            sentnum,
+            wordnum,
+            tagger,
+            roleset,
+            inflection,
+            predicate,
+            arguments,
+            parse_corpus,
+        )
+
  
  class PropbankPointer(object):
      """
@@ -294,11 +339,13 @@ class PropbankPointer(object):
          chains in a tree.  It consists of a sequence of pieces, which
          can be ``PropbankTreePointer`` or ``PropbankSplitTreePointer`` pointers.
      """
+
      def __init__(self):
          if self.__class__ == PropbankPointer:
              raise NotImplementedError()
  
-@compat.python_2_unicode_compatible
+
+
  class PropbankChainTreePointer(PropbankPointer):
      def __init__(self, pieces):
          self.pieces = pieces
@@ -307,15 +354,18 @@ class PropbankChainTreePointer(PropbankPointer):
             ``PropbankTreePointer`` pointers."""
  
      def __str__(self):
-        return '*'.join('%s' % p for p in self.pieces)
+        return "*".join("%s" % p for p in self.pieces)
+
      def __repr__(self):
-        return '<PropbankChainTreePointer: %s>' % self
+        return "<PropbankChainTreePointer: %s>" % self
+
      def select(self, tree):
-        if tree is None: raise ValueError('Parse tree not avaialable')
-        return Tree('*CHAIN*', [p.select(tree) for p in self.pieces])
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
+        return Tree("*CHAIN*", [p.select(tree) for p in self.pieces])
+
  
  
-@compat.python_2_unicode_compatible
  class PropbankSplitTreePointer(PropbankPointer):
      def __init__(self, pieces):
          self.pieces = pieces
@@ -323,22 +373,26 @@ class PropbankSplitTreePointer(PropbankPointer):
             all ``PropbankTreePointer`` pointers."""
  
      def __str__(self):
-        return ','.join('%s' % p for p in self.pieces)
+        return ",".join("%s" % p for p in self.pieces)
+
      def __repr__(self):
-        return '<PropbankSplitTreePointer: %s>' % self
+        return "<PropbankSplitTreePointer: %s>" % self
+
      def select(self, tree):
-        if tree is None: raise ValueError('Parse tree not avaialable')
-        return Tree('*SPLIT*', [p.select(tree) for p in self.pieces])
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
+        return Tree("*SPLIT*", [p.select(tree) for p in self.pieces])
  
  
  @total_ordering
-@compat.python_2_unicode_compatible
+
  class PropbankTreePointer(PropbankPointer):
      """
      wordnum:height*wordnum:height*...
      wordnum:height,
  
      """
+
      def __init__(self, wordnum, height):
          self.wordnum = wordnum
          self.height = height
@@ -346,44 +400,45 @@ class PropbankTreePointer(PropbankPointer):
      @staticmethod
      def parse(s):
          # Deal with chains (xx*yy*zz)
-        pieces = s.split('*')
+        pieces = s.split("*")
          if len(pieces) > 1:
-            return PropbankChainTreePointer([PropbankTreePointer.parse(elt)
-                                              for elt in pieces])
+            return PropbankChainTreePointer(
+                [PropbankTreePointer.parse(elt) for elt in pieces]
+            )
  
          # Deal with split args (xx,yy,zz)
-        pieces = s.split(',')
+        pieces = s.split(",")
          if len(pieces) > 1:
-            return PropbankSplitTreePointer([PropbankTreePointer.parse(elt)
-                                             for elt in pieces])
+            return PropbankSplitTreePointer(
+                [PropbankTreePointer.parse(elt) for elt in pieces]
+            )
  
          # Deal with normal pointers.
-        pieces = s.split(':')
-        if len(pieces) != 2: raise ValueError('bad propbank pointer %r' % s)
+        pieces = s.split(":")
+        if len(pieces) != 2:
+            raise ValueError("bad propbank pointer %r" % s)
          return PropbankTreePointer(int(pieces[0]), int(pieces[1]))
  
      def __str__(self):
-        return '%s:%s' % (self.wordnum, self.height)
+        return "%s:%s" % (self.wordnum, self.height)
  
      def __repr__(self):
-        return 'PropbankTreePointer(%d, %d)' % (self.wordnum, self.height)
+        return "PropbankTreePointer(%d, %d)" % (self.wordnum, self.height)
  
      def __eq__(self, other):
-        while isinstance(other, (PropbankChainTreePointer,
-                                 PropbankSplitTreePointer)):
+        while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)):
              other = other.pieces[0]
  
          if not isinstance(other, PropbankTreePointer):
              return self is other
  
-        return (self.wordnum == other.wordnum and self.height == other.height)
+        return self.wordnum == other.wordnum and self.height == other.height
  
      def __ne__(self, other):
          return not self == other
  
      def __lt__(self, other):
-        while isinstance(other, (PropbankChainTreePointer,
-                                 PropbankSplitTreePointer)):
+        while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)):
              other = other.pieces[0]
  
          if not isinstance(other, PropbankTreePointer):
@@ -392,7 +447,8 @@ class PropbankTreePointer(PropbankPointer):
          return (self.wordnum, -self.height) < (other.wordnum, -other.height)
  
      def select(self, tree):
-        if tree is None: raise ValueError('Parse tree not avaialable')
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
          return tree[self.treepos(tree)]
  
      def treepos(self, tree):
@@ -400,14 +456,13 @@ class PropbankTreePointer(PropbankPointer):
          Convert this pointer to a standard 'tree position' pointer,
          given that it points to the given tree.
          """
-        if tree is None: raise ValueError('Parse tree not avaialable')
+        if tree is None:
+            raise ValueError("Parse tree not avaialable")
          stack = [tree]
          treepos = []
  
          wordnum = 0
          while True:
-            #print treepos
-            #print stack[-1]
              # tree node:
              if isinstance(stack[-1], Tree):
                  # Select the next child.
@@ -425,36 +480,37 @@ class PropbankTreePointer(PropbankPointer):
              # word node:
              else:
                  if wordnum == self.wordnum:
-                    return tuple(treepos[:len(treepos)-self.height-1])
+                    return tuple(treepos[: len(treepos) - self.height - 1])
                  else:
                      wordnum += 1
                      stack.pop()
  
-@compat.python_2_unicode_compatible
+
+
  class PropbankInflection(object):
-    #{ Inflection Form
-    INFINITIVE = 'i'
-    GERUND = 'g'
-    PARTICIPLE = 'p'
-    FINITE = 'v'
-    #{ Inflection Tense
-    FUTURE = 'f'
-    PAST = 'p'
-    PRESENT = 'n'
-    #{ Inflection Aspect
-    PERFECT = 'p'
-    PROGRESSIVE = 'o'
-    PERFECT_AND_PROGRESSIVE = 'b'
-    #{ Inflection Person
-    THIRD_PERSON = '3'
-    #{ Inflection Voice
-    ACTIVE = 'a'
-    PASSIVE = 'p'
-    #{ Inflection
-    NONE = '-'
-    #}
-
-    def __init__(self, form='-', tense='-', aspect='-', person='-', voice='-'):
+    # { Inflection Form
+    INFINITIVE = "i"
+    GERUND = "g"
+    PARTICIPLE = "p"
+    FINITE = "v"
+    # { Inflection Tense
+    FUTURE = "f"
+    PAST = "p"
+    PRESENT = "n"
+    # { Inflection Aspect
+    PERFECT = "p"
+    PROGRESSIVE = "o"
+    PERFECT_AND_PROGRESSIVE = "b"
+    # { Inflection Person
+    THIRD_PERSON = "3"
+    # { Inflection Voice
+    ACTIVE = "a"
+    PASSIVE = "p"
+    # { Inflection
+    NONE = "-"
+    # }
+
+    def __init__(self, form="-", tense="-", aspect="-", person="-", voice="-"):
          self.form = form
          self.tense = tense
          self.aspect = aspect
@@ -462,18 +518,17 @@ class PropbankInflection(object):
          self.voice = voice
  
      def __str__(self):
-        return self.form+self.tense+self.aspect+self.person+self.voice
+        return self.form + self.tense + self.aspect + self.person + self.voice
  
      def __repr__(self):
-        return '<PropbankInflection: %s>' % self
+        return "<PropbankInflection: %s>" % self
  
-    _VALIDATE = re.compile(r'[igpv\-][fpn\-][pob\-][3\-][ap\-]$')
+    _VALIDATE = re.compile(r"[igpv\-][fpn\-][pob\-][3\-][ap\-]$")
  
      @staticmethod
      def parse(s):
-        if not isinstance(s, string_types):
-            raise TypeError('expected a string')
-        if (len(s) != 5 or
-            not PropbankInflection._VALIDATE.match(s)):
-            raise ValueError('Bad propbank inflection string %r' % s)
+        if not isinstance(s, str):
+            raise TypeError("expected a string")
+        if len(s) != 5 or not PropbankInflection._VALIDATE.match(s):
+            raise ValueError("Bad propbank inflection string %r" % s)
          return PropbankInflection(*s)
diff --git a/nlp_resource_data/nltk/corpus/reader/propbank.pyc b/nlp_resource_data/nltk/corpus/reader/propbank.pyc

deleted file mode 100755 (executable)

index 52302d3..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/propbank.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/pros_cons.py b/nlp_resource_data/nltk/corpus/reader/pros_cons.py

old mode 100755 (executable)

new mode 100644 (file)

index 61e904e..ca9e540
--- a/nlp_resource_data/nltk/corpus/reader/pros_cons.py
+++ b/nlp_resource_data/nltk/corpus/reader/pros_cons.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Pros and Cons Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -27,8 +27,6 @@ Related papers:
  """
  import re
  
-from six import string_types
-
  from nltk.corpus.reader.api import *
  from nltk.tokenize import *
  
@@ -45,10 +43,17 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader):
          >>> pros_cons.words('IntegratedPros.txt')
          ['Easy', 'to', 'use', ',', 'economical', '!', ...]
      """
+
      CorpusView = StreamBackedCorpusView
  
-    def __init__(self, root, fileids, word_tokenizer=WordPunctTokenizer(),
-                 encoding='utf8', **kwargs):
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WordPunctTokenizer(),
+        encoding="utf8",
+        **kwargs
+    ):
          """
          :param root: The root directory for the corpus.
          :param fileids: a list or regexp specifying the fileids in the corpus.
@@ -77,10 +82,14 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader):
          fileids = self._resolve(fileids, categories)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
-        return concat([self.CorpusView(path, self._read_sent_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def words(self, fileids=None, categories=None):
          """
@@ -97,14 +106,18 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader):
          fileids = self._resolve(fileids, categories)
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
-        return concat([self.CorpusView(path, self._read_word_block, encoding=enc)
-            for (path, enc, fileid) in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def _read_sent_block(self, stream):
          sents = []
-        for i in range(20): # Read 20 lines at a time.
+        for i in range(20):  # Read 20 lines at a time.
              line = stream.readline()
              if not line:
                  continue
@@ -121,7 +134,7 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader):
  
      def _resolve(self, fileids, categories):
          if fileids is not None and categories is not None:
-            raise ValueError('Specify fileids or categories, not both')
+            raise ValueError("Specify fileids or categories, not both")
          if categories is not None:
              return self.fileids(categories)
          else:
diff --git a/nlp_resource_data/nltk/corpus/reader/pros_cons.pyc b/nlp_resource_data/nltk/corpus/reader/pros_cons.pyc

deleted file mode 100755 (executable)

index a1daa86..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/pros_cons.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/reviews.py b/nlp_resource_data/nltk/corpus/reader/reviews.py

old mode 100755 (executable)

new mode 100644 (file)

index 1ce3d25..fc0b61c
--- a/nlp_resource_data/nltk/corpus/reader/reviews.py
+++ b/nlp_resource_data/nltk/corpus/reader/reviews.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Product Reviews Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -59,26 +59,24 @@ Note: Some of the files (e.g. "ipod.txt", "Canon PowerShot SD500.txt") do not
      consideration.
  """
  
-from __future__ import division
-
-from six import string_types
-
  import re
  
  from nltk.corpus.reader.api import *
  from nltk.tokenize import *
  
-TITLE = re.compile(r'^\[t\](.*)$') # [t] Title
-FEATURES = re.compile(r'((?:(?:\w+\s)+)?\w+)\[((?:\+|\-)\d)\]') # find 'feature' in feature[+3]
-NOTES = re.compile(r'\[(?!t)(p|u|s|cc|cs)\]') # find 'p' in camera[+2][p]
-SENT = re.compile(r'##(.*)$') # find tokenized sentence
+TITLE = re.compile(r"^\[t\](.*)$")  # [t] Title
+FEATURES = re.compile(
+    r"((?:(?:\w+\s)+)?\w+)\[((?:\+|\-)\d)\]"
+)  # find 'feature' in feature[+3]
+NOTES = re.compile(r"\[(?!t)(p|u|s|cc|cs)\]")  # find 'p' in camera[+2][p]
+SENT = re.compile(r"##(.*)$")  # find tokenized sentence
  
  
-@compat.python_2_unicode_compatible
  class Review(object):
      """
      A Review is the main block of a ReviewsCorpusReader.
      """
+
      def __init__(self, title=None, review_lines=None):
          """
          :param title: the title of the review.
@@ -122,15 +120,17 @@ class Review(object):
          return [review_line.sent for review_line in self.review_lines]
  
      def __repr__(self):
-        return 'Review(title=\"{}\", review_lines={})'.format(self.title, self.review_lines)
+        return 'Review(title="{}", review_lines={})'.format(
+            self.title, self.review_lines
+        )
  
  
-@compat.python_2_unicode_compatible
  class ReviewLine(object):
      """
      A ReviewLine represents a sentence of the review, together with (optional)
      annotations of its features and notes about the reviewed item.
      """
+
      def __init__(self, sent, features=None, notes=None):
          self.sent = sent
          if features is None:
@@ -144,8 +144,9 @@ class ReviewLine(object):
              self.notes = notes
  
      def __repr__(self):
-        return ('ReviewLine(features={}, notes={}, sent={})'.format(
-            self.features, self.notes, self.sent))
+        return "ReviewLine(features={}, notes={}, sent={})".format(
+            self.features, self.notes, self.sent
+        )
  
  
  class ReviewsCorpusReader(CorpusReader):
@@ -173,18 +174,18 @@ class ReviewsCorpusReader(CorpusReader):
  
      We can compute stats for specific product features:
  
-        >>> from __future__ import division
          >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
          >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
-        >>> # We use float for backward compatibility with division in Python2.7
          >>> mean = tot / n_reviews
          >>> print(n_reviews, tot, mean)
          15 24 1.6
      """
+
      CorpusView = StreamBackedCorpusView
  
-    def __init__(self, root, fileids, word_tokenizer=WordPunctTokenizer(),
-                 encoding='utf8'):
+    def __init__(
+        self, root, fileids, word_tokenizer=WordPunctTokenizer(), encoding="utf8"
+    ):
          """
          :param root: The root directory for the corpus.
          :param fileids: a list or regexp specifying the fileids in the corpus.
@@ -208,10 +209,14 @@ class ReviewsCorpusReader(CorpusReader):
          """
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
-        return concat([self.CorpusView(fileid, self._read_features, encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                self.CorpusView(fileid, self._read_features, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def raw(self, fileids=None):
          """
@@ -222,7 +227,7 @@ class ReviewsCorpusReader(CorpusReader):
          """
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
@@ -243,8 +248,12 @@ class ReviewsCorpusReader(CorpusReader):
          """
          if fileids is None:
              fileids = self._fileids
-        return concat([self.CorpusView(fileid, self._read_review_block, encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                self.CorpusView(fileid, self._read_review_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
          """
@@ -256,9 +265,12 @@ class ReviewsCorpusReader(CorpusReader):
              list of word strings.
          :rtype: list(list(str))
          """
-        return concat([self.CorpusView(path, self._read_sent_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def words(self, fileids=None):
          """
@@ -270,9 +282,12 @@ class ReviewsCorpusReader(CorpusReader):
          :return: the given file(s) as a list of words and punctuation symbols.
          :rtype: list(str)
          """
-        return concat([self.CorpusView(path, self._read_word_block, encoding=enc)
-                       for (path, enc, fileid)
-                       in self.abspaths(fileids, True, True)])
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def _read_features(self, stream):
          features = []
@@ -287,10 +302,12 @@ class ReviewsCorpusReader(CorpusReader):
          while True:
              line = stream.readline()
              if not line:
-                return [] # end of file.
+                return []  # end of file.
              title_match = re.match(TITLE, line)
              if title_match:
-                review = Review(title=title_match.group(1).strip()) # We create a new review
+                review = Review(
+                    title=title_match.group(1).strip()
+                )  # We create a new review
                  break
  
          # Scan until we find another line matching the regexp, or EOF.
@@ -322,7 +339,7 @@ class ReviewsCorpusReader(CorpusReader):
  
      def _read_word_block(self, stream):
          words = []
-        for i in range(20): # Read 20 lines at a time.
+        for i in range(20):  # Read 20 lines at a time.
              line = stream.readline()
              sent = re.findall(SENT, line)
              if sent:
diff --git a/nlp_resource_data/nltk/corpus/reader/reviews.pyc b/nlp_resource_data/nltk/corpus/reader/reviews.pyc

deleted file mode 100755 (executable)

index 103b814..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/reviews.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/rte.py b/nlp_resource_data/nltk/corpus/reader/rte.py

old mode 100755 (executable)

new mode 100644 (file)

index 66c702d..9538f47
--- a/nlp_resource_data/nltk/corpus/reader/rte.py
+++ b/nlp_resource_data/nltk/corpus/reader/rte.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: RTE Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author:  Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -32,11 +32,6 @@ In order to provide globally unique IDs for each pair, a new attribute
  file, taking values 1, 2 or 3. The GID is formatted 'm-n', where 'm' is the
  challenge number and 'n' is the pair ID.
  """
-from __future__ import unicode_literals
-
-from six import string_types
-
-from nltk import compat
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  from nltk.corpus.reader.xmldocs import *
@@ -52,13 +47,10 @@ def norm(value_string):
      :rtype: int
      """
  
-    valdict = {"TRUE": 1,
-                     "FALSE": 0,
-                     "YES": 1,
-                     "NO": 0}
+    valdict = {"TRUE": 1, "FALSE": 0, "YES": 1, "NO": 0}
      return valdict[value_string.upper()]
  
-@compat.python_2_unicode_compatible
+
  class RTEPair(object):
      """
      Container for RTE text-hypothesis pairs.
@@ -67,8 +59,18 @@ class RTEPair(object):
      ``entailment`` in RTE2 and RTE3. These both get mapped on to the ``entailment``
      attribute of this class.
      """
-    def __init__(self, pair, challenge=None, id=None, text=None, hyp=None,
-             value=None, task=None, length=None):
+
+    def __init__(
+        self,
+        pair,
+        challenge=None,
+        id=None,
+        text=None,
+        hyp=None,
+        value=None,
+        task=None,
+        length=None,
+    ):
          """
          :param challenge: version of the RTE challenge (i.e., RTE1, RTE2 or RTE3)
          :param id: identifier for the pair
@@ -78,7 +80,7 @@ class RTEPair(object):
          :param task: attribute for the particular NLP task that the data was drawn from
          :param length: attribute for the length of the text of the pair
          """
-        self.challenge =  challenge
+        self.challenge = challenge
          self.id = pair.attrib["id"]
          self.gid = "%s-%s" % (self.challenge, self.id)
          self.text = pair[0].text
@@ -101,9 +103,9 @@ class RTEPair(object):
  
      def __repr__(self):
          if self.challenge:
-            return '<RTEPair: gid=%s-%s>' % (self.challenge, self.id)
+            return "<RTEPair: gid=%s-%s>" % (self.challenge, self.id)
          else:
-            return '<RTEPair: id=%s>' % self.id
+            return "<RTEPair: id=%s>" % self.id
  
  
  class RTECorpusReader(XMLCorpusReader):
@@ -125,12 +127,10 @@ class RTECorpusReader(XMLCorpusReader):
          :rtype: list(RTEPair)
          """
          try:
-            challenge = doc.attrib['challenge']
+            challenge = doc.attrib["challenge"]
          except KeyError:
              challenge = None
-        return [RTEPair(pair, challenge=challenge)
-                for pair in doc.getiterator("pair")]
-
+        return [RTEPair(pair, challenge=challenge) for pair in doc.getiterator("pair")]
  
      def pairs(self, fileids):
          """
@@ -140,5 +140,6 @@ class RTECorpusReader(XMLCorpusReader):
          :type: list
          :rtype: list(RTEPair)
          """
-        if isinstance(fileids, string_types): fileids = [fileids]
+        if isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self._read_etree(self.xml(fileid)) for fileid in fileids])
diff --git a/nlp_resource_data/nltk/corpus/reader/rte.pyc b/nlp_resource_data/nltk/corpus/reader/rte.pyc

deleted file mode 100755 (executable)

index fadecfc..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/rte.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/semcor.py b/nlp_resource_data/nltk/corpus/reader/semcor.py

old mode 100755 (executable)

new mode 100644 (file)

index 826439f..f04ea45
--- a/nlp_resource_data/nltk/corpus/reader/semcor.py
+++ b/nlp_resource_data/nltk/corpus/reader/semcor.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: SemCor Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Nathan Schneider <nschneid@cs.cmu.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -8,13 +8,14 @@
  """
  Corpus reader for the SemCor Corpus.
  """
-from __future__ import absolute_import, unicode_literals
-__docformat__ = 'epytext en'
+
+__docformat__ = "epytext en"
  
  from nltk.corpus.reader.api import *
  from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView
  from nltk.tree import Tree
  
+
  class SemcorCorpusReader(XMLCorpusReader):
      """
      Corpus reader for the SemCor Corpus.
@@ -22,6 +23,7 @@ class SemcorCorpusReader(XMLCorpusReader):
      method.  For access to simple word lists and tagged word lists, use
      ``words()``, ``sents()``, ``tagged_words()``, and ``tagged_sents()``.
      """
+
      def __init__(self, root, fileids, wordnet, lazy=True):
          XMLCorpusReader.__init__(self, root, fileids)
          self._lazy = lazy
@@ -32,7 +34,7 @@ class SemcorCorpusReader(XMLCorpusReader):
          :return: the given file(s) as a list of words and punctuation symbols.
          :rtype: list(str)
          """
-        return self._items(fileids, 'word', False, False, False)
+        return self._items(fileids, "word", False, False, False)
  
      def chunks(self, fileids=None):
          """
@@ -41,9 +43,9 @@ class SemcorCorpusReader(XMLCorpusReader):
              that form a unit.
          :rtype: list(list(str))
          """
-        return self._items(fileids, 'chunk', False, False, False)
+        return self._items(fileids, "chunk", False, False, False)
  
-    def tagged_chunks(self, fileids=None, tag=('pos' or 'sem' or 'both')):
+    def tagged_chunks(self, fileids=None, tag=("pos" or "sem" or "both")):
          """
          :return: the given file(s) as a list of tagged chunks, represented
              in tree form.
@@ -56,7 +58,7 @@ class SemcorCorpusReader(XMLCorpusReader):
              have no lemma.  Other chunks not in WordNet have no semantic tag.
              Punctuation tokens have `None` for their part of speech tag.)
          """
-        return self._items(fileids, 'chunk', False, tag!='sem', tag!='pos')
+        return self._items(fileids, "chunk", False, tag != "sem", tag != "pos")
  
      def sents(self, fileids=None):
          """
@@ -64,7 +66,7 @@ class SemcorCorpusReader(XMLCorpusReader):
              as a list of word strings.
          :rtype: list(list(str))
          """
-        return self._items(fileids, 'word', True, False, False)
+        return self._items(fileids, "word", True, False, False)
  
      def chunk_sents(self, fileids=None):
          """
@@ -72,9 +74,9 @@ class SemcorCorpusReader(XMLCorpusReader):
              as a list of chunks.
          :rtype: list(list(list(str)))
          """
-        return self._items(fileids, 'chunk', True, False, False)
+        return self._items(fileids, "chunk", True, False, False)
  
-    def tagged_sents(self, fileids=None, tag=('pos' or 'sem' or 'both')):
+    def tagged_sents(self, fileids=None, tag=("pos" or "sem" or "both")):
          """
          :return: the given file(s) as a list of sentences. Each sentence
              is represented as a list of tagged chunks (in tree form).
@@ -87,17 +89,23 @@ class SemcorCorpusReader(XMLCorpusReader):
              have no lemma.  Other chunks not in WordNet have no semantic tag.
              Punctuation tokens have `None` for their part of speech tag.)
          """
-        return self._items(fileids, 'chunk', True, tag!='sem', tag!='pos')
+        return self._items(fileids, "chunk", True, tag != "sem", tag != "pos")
  
      def _items(self, fileids, unit, bracket_sent, pos_tag, sem_tag):
-        if unit=='word' and not bracket_sent:
+        if unit == "word" and not bracket_sent:
              # the result of the SemcorWordView may be a multiword unit, so the
              # LazyConcatenation will make sure the sentence is flattened
-            _ = lambda *args: LazyConcatenation((SemcorWordView if self._lazy else self._words)(*args))
+            _ = lambda *args: LazyConcatenation(
+                (SemcorWordView if self._lazy else self._words)(*args)
+            )
          else:
              _ = SemcorWordView if self._lazy else self._words
-        return concat([_(fileid, unit, bracket_sent, pos_tag, sem_tag, self._wordnet)
-                       for fileid in self.abspaths(fileids)])
+        return concat(
+            [
+                _(fileid, unit, bracket_sent, pos_tag, sem_tag, self._wordnet)
+                for fileid in self.abspaths(fileids)
+            ]
+        )
  
      def _words(self, fileid, unit, bracket_sent, pos_tag, sem_tag):
          """
@@ -113,21 +121,23 @@ class SemcorCorpusReader(XMLCorpusReader):
          :param sem_tag: Whether to include semantic tags, namely WordNet lemma
              and OOV named entity status.
          """
-        assert unit in ('token', 'word', 'chunk')
+        assert unit in ("token", "word", "chunk")
          result = []
  
          xmldoc = ElementTree.parse(fileid).getroot()
-        for xmlsent in xmldoc.findall('.//s'):
+        for xmlsent in xmldoc.findall(".//s"):
              sent = []
              for xmlword in _all_xmlwords_in(xmlsent):
-                itm = SemcorCorpusReader._word(xmlword, unit, pos_tag, sem_tag, self._wordnet)
-                if unit=='word':
+                itm = SemcorCorpusReader._word(
+                    xmlword, unit, pos_tag, sem_tag, self._wordnet
+                )
+                if unit == "word":
                      sent.extend(itm)
                  else:
                      sent.append(itm)
  
              if bracket_sent:
-                result.append(SemcorSentence(xmlsent.attrib['snum'], sent))
+                result.append(SemcorSentence(xmlsent.attrib["snum"], sent))
              else:
                  result.extend(sent)
  
@@ -138,83 +148,107 @@ class SemcorCorpusReader(XMLCorpusReader):
      def _word(xmlword, unit, pos_tag, sem_tag, wordnet):
          tkn = xmlword.text
          if not tkn:
-            tkn = "" # fixes issue 337?
+            tkn = ""  # fixes issue 337?
  
-        lemma = xmlword.get('lemma', tkn) # lemma or NE class
-        lexsn = xmlword.get('lexsn') # lex_sense (locator for the lemma's sense)
+        lemma = xmlword.get("lemma", tkn)  # lemma or NE class
+        lexsn = xmlword.get("lexsn")  # lex_sense (locator for the lemma's sense)
          if lexsn is not None:
-            sense_key = lemma + '%' + lexsn
-            wnpos = ('n','v','a','r','s')[int(lexsn.split(':')[0])-1]   # see http://wordnet.princeton.edu/man/senseidx.5WN.html
+            sense_key = lemma + "%" + lexsn
+            wnpos = ("n", "v", "a", "r", "s")[
+                int(lexsn.split(":")[0]) - 1
+            ]  # see http://wordnet.princeton.edu/man/senseidx.5WN.html
          else:
              sense_key = wnpos = None
-        redef = xmlword.get('rdf', tkn)        # redefinition--this indicates the lookup string
+        redef = xmlword.get(
+            "rdf", tkn
+        )  # redefinition--this indicates the lookup string
          # does not exactly match the enclosed string, e.g. due to typographical adjustments
          # or discontinuity of a multiword expression. If a redefinition has occurred,
          # the "rdf" attribute holds its inflected form and "lemma" holds its lemma.
          # For NEs, "rdf", "lemma", and "pn" all hold the same value (the NE class).
-        sensenum = xmlword.get('wnsn')  # WordNet sense number
-        isOOVEntity = 'pn' in xmlword.keys()   # a "personal name" (NE) not in WordNet
-        pos = xmlword.get('pos')    # part of speech for the whole chunk (None for punctuation)
+        sensenum = xmlword.get("wnsn")  # WordNet sense number
+        isOOVEntity = "pn" in xmlword.keys()  # a "personal name" (NE) not in WordNet
+        pos = xmlword.get(
+            "pos"
+        )  # part of speech for the whole chunk (None for punctuation)
  
-        if unit=='token':
+        if unit == "token":
              if not pos_tag and not sem_tag:
                  itm = tkn
              else:
-                itm = (tkn,) + ((pos,) if pos_tag else ()) + ((lemma, wnpos, sensenum, isOOVEntity) if sem_tag else ())
+                itm = (
+                    (tkn,)
+                    + ((pos,) if pos_tag else ())
+                    + ((lemma, wnpos, sensenum, isOOVEntity) if sem_tag else ())
+                )
              return itm
          else:
-            ww = tkn.split('_') # TODO: case where punctuation intervenes in MWE
-            if unit=='word':
+            ww = tkn.split("_")  # TODO: case where punctuation intervenes in MWE
+            if unit == "word":
                  return ww
              else:
                  if sensenum is not None:
                      try:
-                        sense = wordnet.lemma_from_key(sense_key)   # Lemma object
+                        sense = wordnet.lemma_from_key(sense_key)  # Lemma object
                      except Exception:
                          # cannot retrieve the wordnet.Lemma object. possible reasons:
                          #  (a) the wordnet corpus is not downloaded;
-                        #  (b) a nonexistant sense is annotated: e.g., such.s.00 triggers: 
+                        #  (b) a nonexistant sense is annotated: e.g., such.s.00 triggers:
                          #  nltk.corpus.reader.wordnet.WordNetError: No synset found for key u'such%5:00:01:specified:00'
                          # solution: just use the lemma name as a string
                          try:
-                            sense = '%s.%s.%02d' % (lemma, wnpos, int(sensenum))    # e.g.: reach.v.02
+                            sense = "%s.%s.%02d" % (
+                                lemma,
+                                wnpos,
+                                int(sensenum),
+                            )  # e.g.: reach.v.02
                          except ValueError:
-                            sense = lemma+'.'+wnpos+'.'+sensenum  # e.g. the sense number may be "2;1"
+                            sense = (
+                                lemma + "." + wnpos + "." + sensenum
+                            )  # e.g. the sense number may be "2;1"
  
                  bottom = [Tree(pos, ww)] if pos_tag else ww
  
                  if sem_tag and isOOVEntity:
                      if sensenum is not None:
-                        return Tree(sense, [Tree('NE', bottom)])
-                    else:      # 'other' NE
-                        return Tree('NE', bottom)
+                        return Tree(sense, [Tree("NE", bottom)])
+                    else:  # 'other' NE
+                        return Tree("NE", bottom)
                  elif sem_tag and sensenum is not None:
                      return Tree(sense, bottom)
                  elif pos_tag:
                      return bottom[0]
                  else:
-                    return bottom # chunk as a list
+                    return bottom  # chunk as a list
+
  
  def _all_xmlwords_in(elt, result=None):
-    if result is None: result = []
+    if result is None:
+        result = []
      for child in elt:
-        if child.tag in ('wf', 'punc'): result.append(child)
-        else: _all_xmlwords_in(child, result)
+        if child.tag in ("wf", "punc"):
+            result.append(child)
+        else:
+            _all_xmlwords_in(child, result)
      return result
  
+
  class SemcorSentence(list):
      """
      A list of words, augmented by an attribute ``num`` used to record
      the sentence identifier (the ``n`` attribute from the XML).
      """
+
      def __init__(self, num, items):
          self.num = num
          list.__init__(self, items)
  
+
  class SemcorWordView(XMLCorpusView):
      """
      A stream backed corpus view specialized for use with the BNC corpus.
      """
+
      def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag, wordnet):
          """
          :param fileid: The name of the underlying file.
@@ -224,8 +258,10 @@ class SemcorWordView(XMLCorpusView):
          :param sem_tag: Whether to include semantic tags, namely WordNet lemma
              and OOV named entity status.
          """
-        if bracket_sent: tagspec = '.*/s'
-        else: tagspec = '.*/s/(punc|wf)'
+        if bracket_sent:
+            tagspec = ".*/s"
+        else:
+            tagspec = ".*/s/(punc|wf)"
  
          self._unit = unit
          self._sent = bracket_sent
@@ -236,21 +272,25 @@ class SemcorWordView(XMLCorpusView):
          XMLCorpusView.__init__(self, fileid, tagspec)
  
      def handle_elt(self, elt, context):
-        if self._sent: return self.handle_sent(elt)
-        else: return self.handle_word(elt)
+        if self._sent:
+            return self.handle_sent(elt)
+        else:
+            return self.handle_word(elt)
  
      def handle_word(self, elt):
-        return SemcorCorpusReader._word(elt, self._unit, self._pos_tag, self._sem_tag, self._wordnet)
+        return SemcorCorpusReader._word(
+            elt, self._unit, self._pos_tag, self._sem_tag, self._wordnet
+        )
  
      def handle_sent(self, elt):
          sent = []
          for child in elt:
-            if child.tag in ('wf','punc'):
+            if child.tag in ("wf", "punc"):
                  itm = self.handle_word(child)
-                if self._unit=='word':
+                if self._unit == "word":
                      sent.extend(itm)
                  else:
                      sent.append(itm)
              else:
-                raise ValueError('Unexpected element %s' % child.tag)
-        return SemcorSentence(elt.attrib['snum'], sent)
+                raise ValueError("Unexpected element %s" % child.tag)
+        return SemcorSentence(elt.attrib["snum"], sent)
diff --git a/nlp_resource_data/nltk/corpus/reader/semcor.pyc b/nlp_resource_data/nltk/corpus/reader/semcor.pyc

deleted file mode 100755 (executable)

index 4998693..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/semcor.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/senseval.py b/nlp_resource_data/nltk/corpus/reader/senseval.py

old mode 100755 (executable)

new mode 100644 (file)

index e8a0f3e..5d1a250
--- a/nlp_resource_data/nltk/corpus/reader/senseval.py
+++ b/nlp_resource_data/nltk/corpus/reader/senseval.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Senseval 2 Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  #         Steven Bird <stevenbird1@gmail.com> (modifications)
  # URL: <http://nltk.org/>
@@ -21,20 +21,16 @@ The NLTK version of the Senseval 2 files uses well-formed XML.
  Each instance of the ambiguous words "hard", "interest", "line", and "serve"
  is tagged with a sense identifier, and supplied with context.
  """
-from __future__ import print_function, unicode_literals
-
-from six import string_types
  
  import re
  from xml.etree import ElementTree
  
-from nltk import compat
  from nltk.tokenize import *
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
-@compat.python_2_unicode_compatible
+
  class SensevalInstance(object):
      def __init__(self, word, position, context, senses):
          self.word = word
@@ -43,32 +39,40 @@ class SensevalInstance(object):
          self.context = context
  
      def __repr__(self):
-        return ('SensevalInstance(word=%r, position=%r, '
-                'context=%r, senses=%r)' %
-                (self.word, self.position, self.context, self.senses))
+        return "SensevalInstance(word=%r, position=%r, " "context=%r, senses=%r)" % (
+            self.word,
+            self.position,
+            self.context,
+            self.senses,
+        )
  
  
  class SensevalCorpusReader(CorpusReader):
      def instances(self, fileids=None):
-        return concat([SensevalCorpusView(fileid, enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                SensevalCorpusView(fileid, enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def raw(self, fileids=None):
          """
          :return: the text contents of the given fileids, as a single string.
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def _entry(self, tree):
          elts = []
-        for lexelt in tree.findall('lexelt'):
-            for inst in lexelt.findall('instance'):
-                sense = inst[0].attrib['senseid']
-                context = [(w.text, w.attrib['pos'])
-                           for w in inst[1]]
-                elts.append( (sense, context) )
+        for lexelt in tree.findall("lexelt"):
+            for inst in lexelt.findall("instance"):
+                sense = inst[0].attrib["senseid"]
+                context = [(w.text, w.attrib["pos"]) for w in inst[1]]
+                elts.append((sense, context))
          return elts
  
  
@@ -77,27 +81,27 @@ class SensevalCorpusView(StreamBackedCorpusView):
          StreamBackedCorpusView.__init__(self, fileid, encoding=encoding)
  
          self._word_tokenizer = WhitespaceTokenizer()
-        self._lexelt_starts = [0] # list of streampos
-        self._lexelts = [None] # list of lexelt names
+        self._lexelt_starts = [0]  # list of streampos
+        self._lexelts = [None]  # list of lexelt names
  
      def read_block(self, stream):
          # Decide which lexical element we're in.
-        lexelt_num = bisect.bisect_right(self._lexelt_starts, stream.tell())-1
+        lexelt_num = bisect.bisect_right(self._lexelt_starts, stream.tell()) - 1
          lexelt = self._lexelts[lexelt_num]
  
          instance_lines = []
          in_instance = False
          while True:
              line = stream.readline()
-            if line == '':
+            if line == "":
                  assert instance_lines == []
                  return []
  
              # Start of a lexical element?
-            if line.lstrip().startswith('<lexelt'):
+            if line.lstrip().startswith("<lexelt"):
                  lexelt_num += 1
-                m = re.search('item=("[^"]+"|\'[^\']+\')', line)
-                assert m is not None # <lexelt> has no 'item=...'
+                m = re.search("item=(\"[^\"]+\"|'[^']+')", line)
+                assert m is not None  # <lexelt> has no 'item=...'
                  lexelt = m.group(1)[1:-1]
                  if lexelt_num < len(self._lexelts):
                      assert lexelt == self._lexelts[lexelt_num]
@@ -106,7 +110,7 @@ class SensevalCorpusView(StreamBackedCorpusView):
                      self._lexelt_starts.append(stream.tell())
  
              # Start of an instance?
-            if line.lstrip().startswith('<instance'):
+            if line.lstrip().startswith("<instance"):
                  assert instance_lines == []
                  in_instance = True
  
@@ -115,8 +119,8 @@ class SensevalCorpusView(StreamBackedCorpusView):
                  instance_lines.append(line)
  
              # End of an instance?
-            if line.lstrip().startswith('</instance'):
-                xml_block = '\n'.join(instance_lines)
+            if line.lstrip().startswith("</instance"):
+                xml_block = "\n".join(instance_lines)
                  xml_block = _fixXML(xml_block)
                  inst = ElementTree.fromstring(xml_block)
                  return [self._parse_instance(inst, lexelt)]
@@ -126,78 +130,78 @@ class SensevalCorpusView(StreamBackedCorpusView):
          context = []
          position = None
          for child in instance:
-            if child.tag == 'answer':
-                senses.append(child.attrib['senseid'])
-            elif child.tag == 'context':
+            if child.tag == "answer":
+                senses.append(child.attrib["senseid"])
+            elif child.tag == "context":
                  context += self._word_tokenizer.tokenize(child.text)
                  for cword in child:
-                    if cword.tag == 'compound':
-                        cword = cword[0] # is this ok to do?
+                    if cword.tag == "compound":
+                        cword = cword[0]  # is this ok to do?
  
-                    if cword.tag == 'head':
+                    if cword.tag == "head":
                          # Some santiy checks:
-                        assert position is None, 'head specified twice'
-                        assert cword.text.strip() or len(cword)==1
-                        assert not (cword.text.strip() and len(cword)==1)
+                        assert position is None, "head specified twice"
+                        assert cword.text.strip() or len(cword) == 1
+                        assert not (cword.text.strip() and len(cword) == 1)
                          # Record the position of the head:
                          position = len(context)
                          # Addd on the head word itself:
                          if cword.text.strip():
                              context.append(cword.text.strip())
-                        elif cword[0].tag == 'wf':
-                            context.append((cword[0].text,
-                                            cword[0].attrib['pos']))
+                        elif cword[0].tag == "wf":
+                            context.append((cword[0].text, cword[0].attrib["pos"]))
                              if cword[0].tail:
-                                context += self._word_tokenizer.tokenize(
-                                    cword[0].tail)
+                                context += self._word_tokenizer.tokenize(cword[0].tail)
                          else:
-                            assert False, 'expected CDATA or wf in <head>'
-                    elif cword.tag == 'wf':
-                        context.append((cword.text, cword.attrib['pos']))
-                    elif cword.tag == 's':
-                        pass # Sentence boundary marker.
+                            assert False, "expected CDATA or wf in <head>"
+                    elif cword.tag == "wf":
+                        context.append((cword.text, cword.attrib["pos"]))
+                    elif cword.tag == "s":
+                        pass  # Sentence boundary marker.
  
                      else:
-                        print('ACK', cword.tag)
-                        assert False, 'expected CDATA or <wf> or <head>'
+                        print("ACK", cword.tag)
+                        assert False, "expected CDATA or <wf> or <head>"
                      if cword.tail:
                          context += self._word_tokenizer.tokenize(cword.tail)
              else:
-                assert False, 'unexpected tag %s' % child.tag
+                assert False, "unexpected tag %s" % child.tag
          return SensevalInstance(lexelt, position, context, senses)
  
+
  def _fixXML(text):
      """
      Fix the various issues with Senseval pseudo-XML.
      """
      # <~> or <^> => ~ or ^
-    text = re.sub(r'<([~\^])>', r'\1', text)
+    text = re.sub(r"<([~\^])>", r"\1", text)
      # fix lone &
-    text = re.sub(r'(\s+)\&(\s+)', r'\1&amp;\2', text)
+    text = re.sub(r"(\s+)\&(\s+)", r"\1&amp;\2", text)
      # fix """
-    text = re.sub(r'"""', '\'"\'', text)
+    text = re.sub(r'"""', "'\"'", text)
      # fix <s snum=dd> => <s snum="dd"/>
      text = re.sub(r'(<[^<]*snum=)([^">]+)>', r'\1"\2"/>', text)
      # fix foreign word tag
-    text = re.sub(r'<\&frasl>\s*<p[^>]*>', 'FRASL', text)
+    text = re.sub(r"<\&frasl>\s*<p[^>]*>", "FRASL", text)
      # remove <&I .>
-    text = re.sub(r'<\&I[^>]*>', '', text)
+    text = re.sub(r"<\&I[^>]*>", "", text)
      # fix <{word}>
-    text = re.sub(r'<{([^}]+)}>', r'\1', text)
+    text = re.sub(r"<{([^}]+)}>", r"\1", text)
      # remove <@>, <p>, </p>
-    text = re.sub(r'<(@|/?p)>', r'', text)
+    text = re.sub(r"<(@|/?p)>", r"", text)
      # remove <&M .> and <&T .> and <&Ms .>
-    text = re.sub(r'<&\w+ \.>', r'', text)
+    text = re.sub(r"<&\w+ \.>", r"", text)
      # remove <!DOCTYPE... > lines
-    text = re.sub(r'<!DOCTYPE[^>]*>', r'', text)
+    text = re.sub(r"<!DOCTYPE[^>]*>", r"", text)
      # remove <[hi]> and <[/p]> etc
-    text = re.sub(r'<\[\/?[^>]+\]*>', r'', text)
+    text = re.sub(r"<\[\/?[^>]+\]*>", r"", text)
      # take the thing out of the brackets: <&hellip;>
-    text = re.sub(r'<(\&\w+;)>', r'\1', text)
+    text = re.sub(r"<(\&\w+;)>", r"\1", text)
      # and remove the & for those patterns that aren't regular XML
-    text = re.sub(r'&(?!amp|gt|lt|apos|quot)', r'', text)
+    text = re.sub(r"&(?!amp|gt|lt|apos|quot)", r"", text)
      # fix 'abc <p="foo"/>' style tags - now <wf pos="foo">abc</wf>
-    text = re.sub(r'[ \t]*([^<>\s]+?)[ \t]*<p="([^"]*"?)"/>',
-                  r' <wf pos="\2">\1</wf>', text)
+    text = re.sub(
+        r'[ \t]*([^<>\s]+?)[ \t]*<p="([^"]*"?)"/>', r' <wf pos="\2">\1</wf>', text
+    )
      text = re.sub(r'\s*"\s*<p=\'"\'/>', " <wf pos='\"'>\"</wf>", text)
      return text
diff --git a/nlp_resource_data/nltk/corpus/reader/senseval.pyc b/nlp_resource_data/nltk/corpus/reader/senseval.pyc

deleted file mode 100755 (executable)

index 500f9b5..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/senseval.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py b/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py

old mode 100755 (executable)

new mode 100644 (file)

index afb398b..f0097c2
--- a/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py
+++ b/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: SentiWordNet
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Christopher Potts <cgpotts@stanford.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -37,50 +37,50 @@ http://sentiwordnet.isti.cnr.it/
  """
  
  import re
-from nltk.compat import python_2_unicode_compatible
+
  from nltk.corpus.reader import CorpusReader
  
-@python_2_unicode_compatible
+
  class SentiWordNetCorpusReader(CorpusReader):
-    def __init__(self, root, fileids, encoding='utf-8'):
+    def __init__(self, root, fileids, encoding="utf-8"):
          """
          Construct a new SentiWordNet Corpus Reader, using data from
         the specified file.
-        """        
-        super(SentiWordNetCorpusReader, self).__init__(root, fileids,
-                                                  encoding=encoding)
+        """
+        super(SentiWordNetCorpusReader, self).__init__(root, fileids, encoding=encoding)
          if len(self._fileids) != 1:
-            raise ValueError('Exactly one file must be specified')
+            raise ValueError("Exactly one file must be specified")
          self._db = {}
          self._parse_src_file()
  
      def _parse_src_file(self):
          lines = self.open(self._fileids[0]).read().splitlines()
-        lines = filter((lambda x : not re.search(r"^\s*#", x)), lines)
+        lines = filter((lambda x: not re.search(r"^\s*#", x)), lines)
          for i, line in enumerate(lines):
              fields = [field.strip() for field in re.split(r"\t+", line)]
-            try:            
+            try:
                  pos, offset, pos_score, neg_score, synset_terms, gloss = fields
              except:
-                raise ValueError('Line %s formatted incorrectly: %s\n' % (i, line))
+                raise ValueError("Line %s formatted incorrectly: %s\n" % (i, line))
              if pos and offset:
                  offset = int(offset)
                  self._db[(pos, offset)] = (float(pos_score), float(neg_score))
  
-    def senti_synset(self, *vals):        
+    def senti_synset(self, *vals):
          from nltk.corpus import wordnet as wn
+
          if tuple(vals) in self._db:
              pos_score, neg_score = self._db[tuple(vals)]
              pos, offset = vals
-            if pos == 's':
-                pos = 'a'
-            synset = wn._synset_from_pos_and_offset(pos, offset)
+            if pos == "s":
+                pos = "a"
+            synset = wn.synset_from_pos_and_offset(pos, offset)
              return SentiSynset(pos_score, neg_score, synset)
          else:
              synset = wn.synset(vals[0])
              pos = synset.pos()
-            if pos == 's':
-                pos = 'a'
+            if pos == "s":
+                pos = "a"
              offset = synset.offset()
              if (pos, offset) in self._db:
                  pos_score, neg_score = self._db[(pos, offset)]
@@ -90,23 +90,24 @@ class SentiWordNetCorpusReader(CorpusReader):
  
      def senti_synsets(self, string, pos=None):
          from nltk.corpus import wordnet as wn
+
          sentis = []
          synset_list = wn.synsets(string, pos)
          for synset in synset_list:
              sentis.append(self.senti_synset(synset.name()))
-        sentis = filter(lambda x : x, sentis)
+        sentis = filter(lambda x: x, sentis)
          return sentis
  
      def all_senti_synsets(self):
          from nltk.corpus import wordnet as wn
+
          for key, fields in self._db.items():
              pos, offset = key
              pos_score, neg_score = fields
-            synset = wn._synset_from_pos_and_offset(pos, offset)
+            synset = wn.synset_from_pos_and_offset(pos, offset)
              yield SentiSynset(pos_score, neg_score, synset)
  
  
-@python_2_unicode_compatible
  class SentiSynset(object):
      def __init__(self, pos_score, neg_score, synset):
          self._pos_score = pos_score
@@ -134,4 +135,3 @@ class SentiSynset(object):
  
      def __repr__(self):
          return "Senti" + repr(self.synset)
-                    
diff --git a/nlp_resource_data/nltk/corpus/reader/sentiwordnet.pyc b/nlp_resource_data/nltk/corpus/reader/sentiwordnet.pyc

deleted file mode 100755 (executable)

index cfbfcac..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/sentiwordnet.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py b/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py

old mode 100755 (executable)

new mode 100644 (file)

index c63f7ad..15b997c
--- a/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py
+++ b/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Sinica Treebank Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -18,7 +18,7 @@ at http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html
  Language and Knowledge Processing Group, Institute of Information
  Science, Academia Sinica
  
-It is distributed with the Natural Language Toolkit under the terms of
+The data is distributed with the Natural Language Toolkit under the terms of
  the Creative Commons Attribution-NonCommercial-ShareAlike License
  [http://creativecommons.org/licenses/by-nc-sa/2.5/].
  
@@ -38,37 +38,38 @@ Chen Keh-Jiann and Yu-Ming Hsieh (2004) Chinese Treebanks and Grammar
  Extraction, Proceedings of IJCNLP-04, pp560-565.
  """
  
-import os
-import re
-
  from nltk.tree import sinica_parse
  from nltk.tag import map_tag
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
-IDENTIFIER = re.compile(r'^#\S+\s')
-APPENDIX = re.compile(r'(?<=\))#.*$')
-TAGWORD = re.compile(r':([^:()|]+):([^:()|]+)')
-WORD = re.compile(r':[^:()|]+:([^:()|]+)')
+IDENTIFIER = re.compile(r"^#\S+\s")
+APPENDIX = re.compile(r"(?<=\))#.*$")
+TAGWORD = re.compile(r":([^:()|]+):([^:()|]+)")
+WORD = re.compile(r":[^:()|]+:([^:()|]+)")
+
  
  class SinicaTreebankCorpusReader(SyntaxCorpusReader):
      """
      Reader for the sinica treebank.
      """
+
      def _read_block(self, stream):
          sent = stream.readline()
-        sent = IDENTIFIER.sub('', sent)
-        sent = APPENDIX.sub('', sent)
+        sent = IDENTIFIER.sub("", sent)
+        sent = APPENDIX.sub("", sent)
          return [sent]
  
      def _parse(self, sent):
          return sinica_parse(sent)
  
      def _tag(self, sent, tagset=None):
-        tagged_sent = [(w,t) for (t,w) in TAGWORD.findall(sent)]
+        tagged_sent = [(w, t) for (t, w) in TAGWORD.findall(sent)]
          if tagset and tagset != self._tagset:
-            tagged_sent = [(w, map_tag(self._tagset, tagset, t)) for (w,t) in tagged_sent]
+            tagged_sent = [
+                (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_sent
+            ]
          return tagged_sent
  
      def _word(self, sent):
diff --git a/nlp_resource_data/nltk/corpus/reader/sinica_treebank.pyc b/nlp_resource_data/nltk/corpus/reader/sinica_treebank.pyc

deleted file mode 100755 (executable)

index 57038c1..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/sinica_treebank.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/string_category.py b/nlp_resource_data/nltk/corpus/reader/string_category.py

old mode 100755 (executable)

new mode 100644 (file)

index 2afd080..136a62e
--- a/nlp_resource_data/nltk/corpus/reader/string_category.py
+++ b/nlp_resource_data/nltk/corpus/reader/string_category.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: String Category Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -18,9 +18,6 @@ NUM:date When did Hawaii become a state ?
  """
  
  # based on PPAttachmentCorpusReader
-from six import string_types
-
-from nltk import compat
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
@@ -28,7 +25,7 @@ from nltk.corpus.reader.api import *
  # in nltk, we use the form (data, tag) -- e.g., tagged words and
  # labeled texts for classifiers.
  class StringCategoryCorpusReader(CorpusReader):
-    def __init__(self, root, fileids, delimiter=' ', encoding='utf8'):
+    def __init__(self, root, fileids, delimiter=" ", encoding="utf8"):
          """
          :param root: The root directory for this corpus.
          :param fileids: A list or regexp specifying the fileids in this corpus.
@@ -38,18 +35,25 @@ class StringCategoryCorpusReader(CorpusReader):
          self._delimiter = delimiter
  
      def tuples(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
-        return concat([StreamBackedCorpusView(fileid, self._read_tuple_block,
-                                              encoding=enc)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_tuple_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def raw(self, fileids=None):
          """
          :return: the text contents of the given fileids, as a single string.
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def _read_tuple_block(self, stream):
diff --git a/nlp_resource_data/nltk/corpus/reader/string_category.pyc b/nlp_resource_data/nltk/corpus/reader/string_category.pyc

deleted file mode 100755 (executable)

index b9fdee2..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/string_category.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/switchboard.py b/nlp_resource_data/nltk/corpus/reader/switchboard.py

old mode 100755 (executable)

new mode 100644 (file)

index f07e2f6..593ef45
--- a/nlp_resource_data/nltk/corpus/reader/switchboard.py
+++ b/nlp_resource_data/nltk/corpus/reader/switchboard.py
@@ -1,20 +1,17 @@
  # Natural Language Toolkit: Switchboard Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import unicode_literals
  import re
  
  from nltk.tag import str2tuple, map_tag
-from nltk import compat
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
  
-@compat.python_2_unicode_compatible
  class SwitchboardTurn(list):
      """
      A specialized list object used to encode switchboard utterances.
@@ -23,6 +20,7 @@ class SwitchboardTurn(list):
      spearker identifier and utterance id.  Note that utterance ids
      are only unique within a given discourse.
      """
+
      def __init__(self, words, speaker, id):
          list.__init__(self, words)
          self.speaker = speaker
@@ -30,16 +28,16 @@ class SwitchboardTurn(list):
  
      def __repr__(self):
          if len(self) == 0:
-            text = ''
+            text = ""
          elif isinstance(self[0], tuple):
-            text = ' '.join('%s/%s' % w for w in self)
+            text = " ".join("%s/%s" % w for w in self)
          else:
-            text = ' '.join(self)
-        return '<%s.%s: %r>' % (self.speaker, self.id, text)
+            text = " ".join(self)
+        return "<%s.%s: %r>" % (self.speaker, self.id, text)
  
  
  class SwitchboardCorpusReader(CorpusReader):
-    _FILES = ['tagged']
+    _FILES = ["tagged"]
      # Use the "tagged" file even for non-tagged data methods, since
      # it's tokenized.
  
@@ -48,47 +46,57 @@ class SwitchboardCorpusReader(CorpusReader):
          self._tagset = tagset
  
      def words(self):
-        return StreamBackedCorpusView(self.abspath('tagged'),
-                                      self._words_block_reader)
+        return StreamBackedCorpusView(self.abspath("tagged"), self._words_block_reader)
  
      def tagged_words(self, tagset=None):
          def tagged_words_block_reader(stream):
              return self._tagged_words_block_reader(stream, tagset)
-        return StreamBackedCorpusView(self.abspath('tagged'),
-                                      tagged_words_block_reader)
+
+        return StreamBackedCorpusView(self.abspath("tagged"), tagged_words_block_reader)
  
      def turns(self):
-        return StreamBackedCorpusView(self.abspath('tagged'),
-                                      self._turns_block_reader)
+        return StreamBackedCorpusView(self.abspath("tagged"), self._turns_block_reader)
  
      def tagged_turns(self, tagset=None):
          def tagged_turns_block_reader(stream):
              return self._tagged_turns_block_reader(stream, tagset)
-        return StreamBackedCorpusView(self.abspath('tagged'),
-                                      tagged_turns_block_reader)
+
+        return StreamBackedCorpusView(self.abspath("tagged"), tagged_turns_block_reader)
  
      def discourses(self):
-        return StreamBackedCorpusView(self.abspath('tagged'),
-                                      self._discourses_block_reader)
+        return StreamBackedCorpusView(
+            self.abspath("tagged"), self._discourses_block_reader
+        )
  
      def tagged_discourses(self, tagset=False):
          def tagged_discourses_block_reader(stream):
              return self._tagged_discourses_block_reader(stream, tagset)
-        return StreamBackedCorpusView(self.abspath('tagged'),
-                                      tagged_discourses_block_reader)
+
+        return StreamBackedCorpusView(
+            self.abspath("tagged"), tagged_discourses_block_reader
+        )
  
      def _discourses_block_reader(self, stream):
          # returns at most 1 discourse.  (The other methods depend on this.)
-        return [[self._parse_utterance(u, include_tag=False)
-                 for b in read_blankline_block(stream)
-                 for u in b.split('\n') if u.strip()]]
+        return [
+            [
+                self._parse_utterance(u, include_tag=False)
+                for b in read_blankline_block(stream)
+                for u in b.split("\n")
+                if u.strip()
+            ]
+        ]
  
      def _tagged_discourses_block_reader(self, stream, tagset=None):
          # returns at most 1 discourse.  (The other methods depend on this.)
-        return [[self._parse_utterance(u, include_tag=True,
-                                       tagset=tagset)
-                 for b in read_blankline_block(stream)
-                 for u in b.split('\n') if u.strip()]]
+        return [
+            [
+                self._parse_utterance(u, include_tag=True, tagset=tagset)
+                for b in read_blankline_block(stream)
+                for u in b.split("\n")
+                if u.strip()
+            ]
+        ]
  
      def _turns_block_reader(self, stream):
          return self._discourses_block_reader(stream)[0]
@@ -100,20 +108,19 @@ class SwitchboardCorpusReader(CorpusReader):
          return sum(self._discourses_block_reader(stream)[0], [])
  
      def _tagged_words_block_reader(self, stream, tagset=None):
-        return sum(self._tagged_discourses_block_reader(stream,
-                                                        tagset)[0], [])
+        return sum(self._tagged_discourses_block_reader(stream, tagset)[0], [])
+
+    _UTTERANCE_RE = re.compile("(\w+)\.(\d+)\:\s*(.*)")
+    _SEP = "/"
  
-    _UTTERANCE_RE = re.compile('(\w+)\.(\d+)\:\s*(.*)')
-    _SEP = '/'
      def _parse_utterance(self, utterance, include_tag, tagset=None):
          m = self._UTTERANCE_RE.match(utterance)
          if m is None:
-            raise ValueError('Bad utterance %r' % utterance)
+            raise ValueError("Bad utterance %r" % utterance)
          speaker, id, text = m.groups()
          words = [str2tuple(s, self._SEP) for s in text.split()]
          if not include_tag:
-            words = [w for (w,t) in words]
+            words = [w for (w, t) in words]
          elif tagset and tagset != self._tagset:
-            words = [(w, map_tag(self._tagset, tagset, t)) for (w,t) in words]
+            words = [(w, map_tag(self._tagset, tagset, t)) for (w, t) in words]
          return SwitchboardTurn(words, speaker, id)
-
diff --git a/nlp_resource_data/nltk/corpus/reader/switchboard.pyc b/nlp_resource_data/nltk/corpus/reader/switchboard.pyc

deleted file mode 100755 (executable)

index 42731ae..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/switchboard.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/tagged.py b/nlp_resource_data/nltk/corpus/reader/tagged.py

old mode 100755 (executable)

new mode 100644 (file)

index d7f563d..afd27b1
--- a/nlp_resource_data/nltk/corpus/reader/tagged.py
+++ b/nlp_resource_data/nltk/corpus/reader/tagged.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tagged Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  #         Jacob Perkins <japerk@gmail.com>
@@ -13,8 +13,6 @@ A reader for corpora whose documents contain part-of-speech-tagged words.
  
  import os
  
-from six import string_types
-
  from nltk.tag import str2tuple, map_tag
  from nltk.tokenize import *
  
@@ -22,6 +20,7 @@ from nltk.corpus.reader.api import *
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.timit import read_timit_block
  
+
  class TaggedCorpusReader(CorpusReader):
      """
      Reader for simple part-of-speech tagged corpora.  Paragraphs are
@@ -37,12 +36,18 @@ class TaggedCorpusReader(CorpusReader):
      constructor.  Part of speech tags are case-normalized to upper
      case.
      """
-    def __init__(self, root, fileids,
-                 sep='/', word_tokenizer=WhitespaceTokenizer(),
-                 sent_tokenizer=RegexpTokenizer('\n', gaps=True),
-                 para_block_reader=read_blankline_block,
-                 encoding='utf8',
-                 tagset=None):
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        sep="/",
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=RegexpTokenizer("\n", gaps=True),
+        para_block_reader=read_blankline_block,
+        encoding="utf8",
+        tagset=None,
+    ):
          """
          Construct a new Tagged Corpus reader for a set of documents
          located at the given root directory.  Example usage:
@@ -65,8 +70,10 @@ class TaggedCorpusReader(CorpusReader):
          :return: the given file(s) as a single string.
          :rtype: str
          """
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
      def words(self, fileids=None):
@@ -75,13 +82,23 @@ class TaggedCorpusReader(CorpusReader):
              and punctuation symbols.
          :rtype: list(str)
          """
-        return concat([TaggedCorpusView(fileid, enc,
-                                        False, False, False,
-                                        self._sep, self._word_tokenizer,
-                                        self._sent_tokenizer,
-                                        self._para_block_reader,
-                                        None)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    False,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    None,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def sents(self, fileids=None):
          """
@@ -90,13 +107,23 @@ class TaggedCorpusReader(CorpusReader):
              strings.
          :rtype: list(list(str))
          """
-        return concat([TaggedCorpusView(fileid, enc,
-                                        False, True, False,
-                                        self._sep, self._word_tokenizer,
-                                        self._sent_tokenizer,
-                                        self._para_block_reader,
-                                        None)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    True,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    None,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def paras(self, fileids=None):
          """
@@ -105,13 +132,23 @@ class TaggedCorpusReader(CorpusReader):
              in turn encoded as lists of word strings.
          :rtype: list(list(list(str)))
          """
-        return concat([TaggedCorpusView(fileid, enc,
-                                        False, True, True,
-                                        self._sep, self._word_tokenizer,
-                                        self._sent_tokenizer,
-                                        self._para_block_reader,
-                                        None)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    True,
+                    True,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    None,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_words(self, fileids=None, tagset=None):
          """
@@ -124,13 +161,23 @@ class TaggedCorpusReader(CorpusReader):
              tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
          else:
              tag_mapping_function = None
-        return concat([TaggedCorpusView(fileid, enc,
-                                        True, False, False,
-                                        self._sep, self._word_tokenizer,
-                                        self._sent_tokenizer,
-                                        self._para_block_reader,
-                                        tag_mapping_function)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    False,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    tag_mapping_function,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_sents(self, fileids=None, tagset=None):
          """
@@ -143,13 +190,23 @@ class TaggedCorpusReader(CorpusReader):
              tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
          else:
              tag_mapping_function = None
-        return concat([TaggedCorpusView(fileid, enc,
-                                        True, True, False,
-                                        self._sep, self._word_tokenizer,
-                                        self._sent_tokenizer,
-                                        self._para_block_reader,
-                                        tag_mapping_function)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    True,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    tag_mapping_function,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
      def tagged_paras(self, fileids=None, tagset=None):
          """
@@ -162,20 +219,31 @@ class TaggedCorpusReader(CorpusReader):
              tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
          else:
              tag_mapping_function = None
-        return concat([TaggedCorpusView(fileid, enc,
-                                        True, True, True,
-                                        self._sep, self._word_tokenizer,
-                                        self._sent_tokenizer,
-                                        self._para_block_reader,
-                                        tag_mapping_function)
-                       for (fileid, enc) in self.abspaths(fileids, True)])
-
-class CategorizedTaggedCorpusReader(CategorizedCorpusReader,
-                                    TaggedCorpusReader):
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    True,
+                    True,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    tag_mapping_function,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+
+class CategorizedTaggedCorpusReader(CategorizedCorpusReader, TaggedCorpusReader):
      """
      A reader for part-of-speech tagged corpora whose documents are
      divided into categories based on their file identifiers.
      """
+
      def __init__(self, *args, **kwargs):
          """
          Initialize the corpus reader.  Categorization arguments
@@ -188,32 +256,39 @@ class CategorizedTaggedCorpusReader(CategorizedCorpusReader,
  
      def _resolve(self, fileids, categories):
          if fileids is not None and categories is not None:
-            raise ValueError('Specify fileids or categories, not both')
+            raise ValueError("Specify fileids or categories, not both")
          if categories is not None:
              return self.fileids(categories)
          else:
              return fileids
+
      def raw(self, fileids=None, categories=None):
-        return TaggedCorpusReader.raw(
-            self, self._resolve(fileids, categories))
+        return TaggedCorpusReader.raw(self, self._resolve(fileids, categories))
+
      def words(self, fileids=None, categories=None):
-        return TaggedCorpusReader.words(
-            self, self._resolve(fileids, categories))
+        return TaggedCorpusReader.words(self, self._resolve(fileids, categories))
+
      def sents(self, fileids=None, categories=None):
-        return TaggedCorpusReader.sents(
-            self, self._resolve(fileids, categories))
+        return TaggedCorpusReader.sents(self, self._resolve(fileids, categories))
+
      def paras(self, fileids=None, categories=None):
-        return TaggedCorpusReader.paras(
-            self, self._resolve(fileids, categories))
+        return TaggedCorpusReader.paras(self, self._resolve(fileids, categories))
+
      def tagged_words(self, fileids=None, categories=None, tagset=None):
          return TaggedCorpusReader.tagged_words(
-            self, self._resolve(fileids, categories), tagset)
+            self, self._resolve(fileids, categories), tagset
+        )
+
      def tagged_sents(self, fileids=None, categories=None, tagset=None):
          return TaggedCorpusReader.tagged_sents(
-            self, self._resolve(fileids, categories), tagset)
+            self, self._resolve(fileids, categories), tagset
+        )
+
      def tagged_paras(self, fileids=None, categories=None, tagset=None):
          return TaggedCorpusReader.tagged_paras(
-            self, self._resolve(fileids, categories), tagset)
+            self, self._resolve(fileids, categories), tagset
+        )
+
  
  class TaggedCorpusView(StreamBackedCorpusView):
      """
@@ -223,9 +298,20 @@ class TaggedCorpusView(StreamBackedCorpusView):
      ``TaggedCorpusView`` objects are typically created by
      ``TaggedCorpusReader`` (not directly by nltk users).
      """
-    def __init__(self, corpus_file, encoding, tagged, group_by_sent,
-                 group_by_para, sep, word_tokenizer, sent_tokenizer,
-                 para_block_reader, tag_mapping_function=None):
+
+    def __init__(
+        self,
+        corpus_file,
+        encoding,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        sep,
+        word_tokenizer,
+        sent_tokenizer,
+        para_block_reader,
+        tag_mapping_function=None,
+    ):
          self._tagged = tagged
          self._group_by_sent = group_by_sent
          self._group_by_para = group_by_para
@@ -242,12 +328,14 @@ class TaggedCorpusView(StreamBackedCorpusView):
          for para_str in self._para_block_reader(stream):
              para = []
              for sent_str in self._sent_tokenizer.tokenize(para_str):
-                sent = [str2tuple(s, self._sep) for s in
-                        self._word_tokenizer.tokenize(sent_str)]
+                sent = [
+                    str2tuple(s, self._sep)
+                    for s in self._word_tokenizer.tokenize(sent_str)
+                ]
                  if self._tag_mapping_function:
-                    sent = [(w, self._tag_mapping_function(t)) for (w,t) in sent]
+                    sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent]
                  if not self._tagged:
-                    sent = [w for (w,t) in sent]
+                    sent = [w for (w, t) in sent]
                  if self._group_by_sent:
                      para.append(sent)
                  else:
@@ -258,6 +346,7 @@ class TaggedCorpusView(StreamBackedCorpusView):
                  block.extend(para)
          return block
  
+
  # needs to implement simplified tags
  class MacMorphoCorpusReader(TaggedCorpusReader):
      """
@@ -268,28 +357,36 @@ class MacMorphoCorpusReader(TaggedCorpusReader):
      ``self.paras()`` and ``self.tagged_paras()`` contains a single
      sentence.
      """
-    def __init__(self, root, fileids, encoding='utf8', tagset=None):
+
+    def __init__(self, root, fileids, encoding="utf8", tagset=None):
          TaggedCorpusReader.__init__(
-            self, root, fileids, sep='_',
+            self,
+            root,
+            fileids,
+            sep="_",
              word_tokenizer=LineTokenizer(),
-            sent_tokenizer=RegexpTokenizer('.*\n'),
+            sent_tokenizer=RegexpTokenizer(".*\n"),
              para_block_reader=self._read_block,
              encoding=encoding,
-            tagset=tagset)
+            tagset=tagset,
+        )
  
      def _read_block(self, stream):
-        return read_regexp_block(stream, r'.*', r'.*_\.')
+        return read_regexp_block(stream, r".*", r".*_\.")
+
  
  class TimitTaggedCorpusReader(TaggedCorpusReader):
      """
      A corpus reader for tagged sentences that are included in the TIMIT corpus.
      """
+
      def __init__(self, *args, **kwargs):
          TaggedCorpusReader.__init__(
-            self, para_block_reader=read_timit_block, *args, **kwargs)
+            self, para_block_reader=read_timit_block, *args, **kwargs
+        )
  
      def paras(self):
-        raise NotImplementedError('use sents() instead')
+        raise NotImplementedError("use sents() instead")
  
      def tagged_paras(self):
-        raise NotImplementedError('use tagged_sents() instead')
+        raise NotImplementedError("use tagged_sents() instead")
diff --git a/nlp_resource_data/nltk/corpus/reader/tagged.pyc b/nlp_resource_data/nltk/corpus/reader/tagged.pyc

deleted file mode 100755 (executable)

index 4253455..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/tagged.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/timit.py b/nlp_resource_data/nltk/corpus/reader/timit.py

old mode 100755 (executable)

new mode 100644 (file)

index b8346df..7d63248
--- a/nlp_resource_data/nltk/corpus/reader/timit.py
+++ b/nlp_resource_data/nltk/corpus/reader/timit.py
@@ -118,23 +118,19 @@ The 4 functions are as follows.
     timit.audiodata function.
  
  """
-from __future__ import print_function, unicode_literals
-
  import sys
  import os
  import re
  import tempfile
  import time
  
-from six import string_types
-
-from nltk import compat
  from nltk.tree import Tree
  from nltk.internals import import_from_stdlib
  
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class TimitCorpusReader(CorpusReader):
      """
      Reader for the TIMIT corpus (or any other corpus with the same
@@ -153,32 +149,32 @@ class TimitCorpusReader(CorpusReader):
        - <utterance-id>.wav: utterance sound file
      """
  
-    _FILE_RE = (r'(\w+-\w+/\w+\.(phn|txt|wav|wrd))|' +
-                r'timitdic\.txt|spkrinfo\.txt')
+    _FILE_RE = r"(\w+-\w+/\w+\.(phn|txt|wav|wrd))|" + r"timitdic\.txt|spkrinfo\.txt"
      """A regexp matching fileids that are used by this corpus reader."""
-    _UTTERANCE_RE = r'\w+-\w+/\w+\.txt'
+    _UTTERANCE_RE = r"\w+-\w+/\w+\.txt"
  
-    def __init__(self, root, encoding='utf8'):
+    def __init__(self, root, encoding="utf8"):
          """
          Construct a new TIMIT corpus reader in the given directory.
          :param root: The root directory for this corpus.
          """
          # Ensure that wave files don't get treated as unicode data:
-        if isinstance(encoding, string_types):
-            encoding = [('.*\.wav', None), ('.*', encoding)]
+        if isinstance(encoding, str):
+            encoding = [(".*\.wav", None), (".*", encoding)]
  
-        CorpusReader.__init__(self, root,
-                              find_corpus_fileids(root, self._FILE_RE),
-                              encoding=encoding)
+        CorpusReader.__init__(
+            self, root, find_corpus_fileids(root, self._FILE_RE), encoding=encoding
+        )
  
-        self._utterances = [name[:-4] for name in
-                            find_corpus_fileids(root, self._UTTERANCE_RE)]
+        self._utterances = [
+            name[:-4] for name in find_corpus_fileids(root, self._UTTERANCE_RE)
+        ]
          """A list of the utterance identifiers for all utterances in
          this corpus."""
  
          self._speakerinfo = None
          self._root = root
-        self.speakers = sorted(set(u.split('/')[0] for u in self._utterances))
+        self.speakers = sorted(set(u.split("/")[0] for u in self._utterances))
  
      def fileids(self, filetype=None):
          """
@@ -192,26 +188,32 @@ class TimitCorpusReader(CorpusReader):
          """
          if filetype is None:
              return CorpusReader.fileids(self)
-        elif filetype in ('txt', 'wrd', 'phn', 'wav'):
-            return ['%s.%s' % (u, filetype) for u in self._utterances]
-        elif filetype == 'metadata':
-            return ['timitdic.txt', 'spkrinfo.txt']
+        elif filetype in ("txt", "wrd", "phn", "wav"):
+            return ["%s.%s" % (u, filetype) for u in self._utterances]
+        elif filetype == "metadata":
+            return ["timitdic.txt", "spkrinfo.txt"]
          else:
-            raise ValueError('Bad value for filetype: %r' % filetype)
+            raise ValueError("Bad value for filetype: %r" % filetype)
  
-    def utteranceids(self, dialect=None, sex=None, spkrid=None,
-                   sent_type=None, sentid=None):
+    def utteranceids(
+        self, dialect=None, sex=None, spkrid=None, sent_type=None, sentid=None
+    ):
          """
          :return: A list of the utterance identifiers for all
          utterances in this corpus, or for the given speaker, dialect
          region, gender, sentence type, or sentence number, if
          specified.
          """
-        if isinstance(dialect, string_types): dialect = [dialect]
-        if isinstance(sex, string_types): sex = [sex]
-        if isinstance(spkrid, string_types): spkrid = [spkrid]
-        if isinstance(sent_type, string_types): sent_type = [sent_type]
-        if isinstance(sentid, string_types): sentid = [sentid]
+        if isinstance(dialect, str):
+            dialect = [dialect]
+        if isinstance(sex, str):
+            sex = [sex]
+        if isinstance(spkrid, str):
+            spkrid = [spkrid]
+        if isinstance(sent_type, str):
+            sent_type = [sent_type]
+        if isinstance(sentid, str):
+            sentid = [sentid]
  
          utterances = self._utterances[:]
          if dialect is not None:
@@ -232,29 +234,34 @@ class TimitCorpusReader(CorpusReader):
          each word.
          """
          _transcriptions = {}
-        for line in self.open('timitdic.txt'):
-            if not line.strip() or line[0] == ';': continue
-            m = re.match(r'\s*(\S+)\s+/(.*)/\s*$', line)
-            if not m: raise ValueError('Bad line: %r' % line)
+        for line in self.open("timitdic.txt"):
+            if not line.strip() or line[0] == ";":
+                continue
+            m = re.match(r"\s*(\S+)\s+/(.*)/\s*$", line)
+            if not m:
+                raise ValueError("Bad line: %r" % line)
              _transcriptions[m.group(1)] = m.group(2).split()
          return _transcriptions
  
      def spkrid(self, utterance):
-        return utterance.split('/')[0]
+        return utterance.split("/")[0]
  
      def sentid(self, utterance):
-        return utterance.split('/')[1]
+        return utterance.split("/")[1]
  
      def utterance(self, spkrid, sentid):
-        return '%s/%s' % (spkrid, sentid)
+        return "%s/%s" % (spkrid, sentid)
  
      def spkrutteranceids(self, speaker):
          """
          :return: A list of all utterances associated with a given
          speaker.
          """
-        return [utterance for utterance in self._utterances
-                if utterance.startswith(speaker+'/')]
+        return [
+            utterance
+            for utterance in self._utterances
+            if utterance.startswith(speaker + "/")
+        ]
  
      def spkrinfo(self, speaker):
          """
@@ -265,51 +272,73 @@ class TimitCorpusReader(CorpusReader):
  
          if self._speakerinfo is None:
              self._speakerinfo = {}
-            for line in self.open('spkrinfo.txt'):
-                if not line.strip() or line[0] == ';': continue
+            for line in self.open("spkrinfo.txt"):
+                if not line.strip() or line[0] == ";":
+                    continue
                  rec = line.strip().split(None, 9)
-                key = "dr%s-%s%s" % (rec[2],rec[1].lower(),rec[0].lower())
+                key = "dr%s-%s%s" % (rec[2], rec[1].lower(), rec[0].lower())
                  self._speakerinfo[key] = SpeakerInfo(*rec)
  
          return self._speakerinfo[speaker]
  
      def phones(self, utterances=None):
-        return [line.split()[-1]
-                for fileid in self._utterance_fileids(utterances, '.phn')
-                for line in self.open(fileid) if line.strip()]
+        return [
+            line.split()[-1]
+            for fileid in self._utterance_fileids(utterances, ".phn")
+            for line in self.open(fileid)
+            if line.strip()
+        ]
  
      def phone_times(self, utterances=None):
          """
          offset is represented as a number of 16kHz samples!
          """
-        return [(line.split()[2], int(line.split()[0]), int(line.split()[1]))
-                for fileid in self._utterance_fileids(utterances, '.phn')
-                for line in self.open(fileid) if line.strip()]
+        return [
+            (line.split()[2], int(line.split()[0]), int(line.split()[1]))
+            for fileid in self._utterance_fileids(utterances, ".phn")
+            for line in self.open(fileid)
+            if line.strip()
+        ]
  
      def words(self, utterances=None):
-        return [line.split()[-1]
-                for fileid in self._utterance_fileids(utterances, '.wrd')
-                for line in self.open(fileid) if line.strip()]
+        return [
+            line.split()[-1]
+            for fileid in self._utterance_fileids(utterances, ".wrd")
+            for line in self.open(fileid)
+            if line.strip()
+        ]
  
      def word_times(self, utterances=None):
-        return [(line.split()[2], int(line.split()[0]), int(line.split()[1]))
-                for fileid in self._utterance_fileids(utterances, '.wrd')
-                for line in self.open(fileid) if line.strip()]
+        return [
+            (line.split()[2], int(line.split()[0]), int(line.split()[1]))
+            for fileid in self._utterance_fileids(utterances, ".wrd")
+            for line in self.open(fileid)
+            if line.strip()
+        ]
  
      def sents(self, utterances=None):
-        return [[line.split()[-1]
-                 for line in self.open(fileid) if line.strip()]
-                for fileid in self._utterance_fileids(utterances, '.wrd')]
+        return [
+            [line.split()[-1] for line in self.open(fileid) if line.strip()]
+            for fileid in self._utterance_fileids(utterances, ".wrd")
+        ]
  
      def sent_times(self, utterances=None):
-        return [(line.split(None,2)[-1].strip(),
-                 int(line.split()[0]), int(line.split()[1]))
-                for fileid in self._utterance_fileids(utterances, '.txt')
-                for line in self.open(fileid) if line.strip()]
+        return [
+            (
+                line.split(None, 2)[-1].strip(),
+                int(line.split()[0]),
+                int(line.split()[1]),
+            )
+            for fileid in self._utterance_fileids(utterances, ".txt")
+            for line in self.open(fileid)
+            if line.strip()
+        ]
  
      def phone_trees(self, utterances=None):
-        if utterances is None: utterances = self._utterances
-        if isinstance(utterances, string_types): utterances = [utterances]
+        if utterances is None:
+            utterances = self._utterances
+        if isinstance(utterances, str):
+            utterances = [utterances]
  
          trees = []
          for utterance in utterances:
@@ -319,9 +348,10 @@ class TimitCorpusReader(CorpusReader):
  
              while sent_times:
                  (sent, sent_start, sent_end) = sent_times.pop(0)
-                trees.append(Tree('S', []))
-                while (word_times and phone_times and
-                       phone_times[0][2] <= word_times[0][1]):
+                trees.append(Tree("S", []))
+                while (
+                    word_times and phone_times and phone_times[0][2] <= word_times[0][1]
+                ):
                      trees[-1].append(phone_times.pop(0)[0])
                  while word_times and word_times[0][2] <= sent_end:
                      (word, word_start, word_end) = word_times.pop(0)
@@ -337,21 +367,21 @@ class TimitCorpusReader(CorpusReader):
      # fileids.
      def wav(self, utterance, start=0, end=None):
          # nltk.chunk conflicts with the stdlib module 'chunk'
-        wave = import_from_stdlib('wave')
+        wave = import_from_stdlib("wave")
  
-        w = wave.open(self.open(utterance+'.wav'), 'rb')
+        w = wave.open(self.open(utterance + ".wav"), "rb")
  
          if end is None:
              end = w.getnframes()
  
          # Skip past frames before start, then read the frames we want
          w.readframes(start)
-        frames = w.readframes(end-start)
+        frames = w.readframes(end - start)
  
          # Open a new temporary file -- the wave module requires
          # an actual file, and won't work w/ stringio. :(
          tf = tempfile.TemporaryFile()
-        out = wave.open(tf, 'w')
+        out = wave.open(tf, "w")
  
          # Write the parameters & data to the new file.
          out.setparams(w.getparams())
@@ -364,18 +394,20 @@ class TimitCorpusReader(CorpusReader):
          return tf.read()
  
      def audiodata(self, utterance, start=0, end=None):
-        assert(end is None or end > start)
+        assert end is None or end > start
          headersize = 44
          if end is None:
-            data = self.open(utterance+'.wav').read()
+            data = self.open(utterance + ".wav").read()
          else:
-            data = self.open(utterance+'.wav').read(headersize+end*2)
-        return data[headersize+start*2:]
+            data = self.open(utterance + ".wav").read(headersize + end * 2)
+        return data[headersize + start * 2 :]
  
      def _utterance_fileids(self, utterances, extension):
-        if utterances is None: utterances = self._utterances
-        if isinstance(utterances, string_types): utterances = [utterances]
-        return ['%s%s' % (u, extension) for u in utterances]
+        if utterances is None:
+            utterances = self._utterances
+        if isinstance(utterances, str):
+            utterances = [utterances]
+        return ["%s%s" % (u, extension) for u in utterances]
  
      def play(self, utterance, start=0, end=None):
          """
@@ -386,16 +418,22 @@ class TimitCorpusReader(CorpusReader):
          # Method 1: os audio dev.
          try:
              import ossaudiodev
+
              try:
-                dsp = ossaudiodev.open('w')
+                dsp = ossaudiodev.open("w")
                  dsp.setfmt(ossaudiodev.AFMT_S16_LE)
                  dsp.channels(1)
                  dsp.speed(16000)
                  dsp.write(self.audiodata(utterance, start, end))
                  dsp.close()
              except IOError as e:
-                print(("can't acquire the audio device; please "
-                                     "activate your audio device."), file=sys.stderr)
+                print(
+                    (
+                        "can't acquire the audio device; please "
+                        "activate your audio device."
+                    ),
+                    file=sys.stderr,
+                )
                  print("system error message:", str(e), file=sys.stderr)
              return
          except ImportError:
@@ -405,6 +443,7 @@ class TimitCorpusReader(CorpusReader):
          try:
              # FIXME: this won't work under python 3
              import pygame.mixer, StringIO
+
              pygame.mixer.init(16000)
              f = StringIO.StringIO(self.wav(utterance, start, end))
              pygame.mixer.Sound(f).play()
@@ -415,14 +454,16 @@ class TimitCorpusReader(CorpusReader):
              pass
  
          # Method 3: complain. :)
-        print(("you must install pygame or ossaudiodev "
-                             "for audio playback."), file=sys.stderr)
+        print(
+            ("you must install pygame or ossaudiodev " "for audio playback."),
+            file=sys.stderr,
+        )
  
  
-@compat.python_2_unicode_compatible
  class SpeakerInfo(object):
-    def __init__(self, id, sex, dr, use, recdate, birthdate,
-                 ht, race, edu, comments=None):
+    def __init__(
+        self, id, sex, dr, use, recdate, birthdate, ht, race, edu, comments=None
+    ):
          self.id = id
          self.sex = sex
          self.dr = dr
@@ -435,10 +476,9 @@ class SpeakerInfo(object):
          self.comments = comments
  
      def __repr__(self):
-        attribs = 'id sex dr use recdate birthdate ht race edu comments'
-        args = ['%s=%r' % (attr, getattr(self, attr))
-                for attr in attribs.split()]
-        return 'SpeakerInfo(%s)' % (', '.join(args))
+        attribs = "id sex dr use recdate birthdate ht race edu comments"
+        args = ["%s=%r" % (attr, getattr(self, attr)) for attr in attribs.split()]
+        return "SpeakerInfo(%s)" % (", ".join(args))
  
  
  def read_timit_block(stream):
@@ -447,6 +487,7 @@ def read_timit_block(stream):
      number that will be ignored.
      """
      line = stream.readline()
-    if not line: return []
-    n, sent = line.split(' ', 1)
+    if not line:
+        return []
+    n, sent = line.split(" ", 1)
      return [sent]
diff --git a/nlp_resource_data/nltk/corpus/reader/timit.pyc b/nlp_resource_data/nltk/corpus/reader/timit.pyc

deleted file mode 100755 (executable)

index fe95108..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/timit.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/toolbox.py b/nlp_resource_data/nltk/corpus/reader/toolbox.py

old mode 100755 (executable)

new mode 100644 (file)

index 169ed02..aead10b
--- a/nlp_resource_data/nltk/corpus/reader/toolbox.py
+++ b/nlp_resource_data/nltk/corpus/reader/toolbox.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Toolbox Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Greg Aumann <greg_aumann@sil.org>
  #         Stuart Robinson <Stuart.Robinson@mpi.nl>
  #         Steven Bird <stevenbird1@gmail.com>
@@ -12,35 +12,47 @@ Module for reading, writing and manipulating
  Toolbox databases and settings fileids.
  """
  
-import os
-import re
-import codecs
-
-from six import string_types
-
  from nltk.toolbox import ToolboxData
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class ToolboxCorpusReader(CorpusReader):
      def xml(self, fileids, key=None):
-        return concat([ToolboxData(path, enc).parse(key=key)
-                       for (path, enc) in self.abspaths(fileids, True)])
+        return concat(
+            [
+                ToolboxData(path, enc).parse(key=key)
+                for (path, enc) in self.abspaths(fileids, True)
+            ]
+        )
  
-    def fields(self, fileids, strip=True, unwrap=True, encoding='utf8',
-               errors='strict', unicode_fields=None):
-        return concat([list(ToolboxData(fileid,enc).fields(
-                             strip, unwrap, encoding, errors, unicode_fields))
-                       for (fileid, enc)
-                       in self.abspaths(fileids, include_encoding=True)])
+    def fields(
+        self,
+        fileids,
+        strip=True,
+        unwrap=True,
+        encoding="utf8",
+        errors="strict",
+        unicode_fields=None,
+    ):
+        return concat(
+            [
+                list(
+                    ToolboxData(fileid, enc).fields(
+                        strip, unwrap, encoding, errors, unicode_fields
+                    )
+                )
+                for (fileid, enc) in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
  
      # should probably be done lazily:
      def entries(self, fileids, **kwargs):
-        if 'key' in kwargs:
-            key = kwargs['key']
-            del kwargs['key']
+        if "key" in kwargs:
+            key = kwargs["key"]
+            del kwargs["key"]
          else:
-            key = 'lx'  # the default key in MDF
+            key = "lx"  # the default key in MDF
          entries = []
          for marker, contents in self.fields(fileids, **kwargs):
              if marker == key:
@@ -52,17 +64,20 @@ class ToolboxCorpusReader(CorpusReader):
                      pass
          return entries
  
-    def words(self, fileids, key='lx'):
+    def words(self, fileids, key="lx"):
          return [contents for marker, contents in self.fields(fileids) if marker == key]
  
      def raw(self, fileids):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
  
  def demo():
      pass
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/corpus/reader/toolbox.pyc b/nlp_resource_data/nltk/corpus/reader/toolbox.pyc

deleted file mode 100755 (executable)

index 582c6cd..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/toolbox.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/twitter.py b/nlp_resource_data/nltk/corpus/reader/twitter.py

old mode 100755 (executable)

new mode 100644 (file)

index 5b48dcf..7f9b7b7
--- a/nlp_resource_data/nltk/corpus/reader/twitter.py
+++ b/nlp_resource_data/nltk/corpus/reader/twitter.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Twitter Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -13,8 +13,6 @@ have been serialised into line-delimited JSON.
  import json
  import os
  
-from six import string_types
-
  from nltk.tokenize import TweetTokenizer
  
  from nltk.corpus.reader.util import StreamBackedCorpusView, concat, ZipFilePathPointer
@@ -58,9 +56,9 @@ class TwitterCorpusReader(CorpusReader):
      The corpus view class used by this reader.
      """
  
-    def __init__(self, root, fileids=None,
-                 word_tokenizer=TweetTokenizer(),
-                 encoding='utf8'):
+    def __init__(
+        self, root, fileids=None, word_tokenizer=TweetTokenizer(), encoding="utf8"
+    ):
          """
  
          :param root: The root directory for this corpus.
@@ -82,8 +80,6 @@ class TwitterCorpusReader(CorpusReader):
  
          self._word_tokenizer = word_tokenizer
  
-
-
      def docs(self, fileids=None):
          """
          Returns the full Tweet objects, as specified by `Twitter
@@ -94,9 +90,12 @@ class TwitterCorpusReader(CorpusReader):
          from JSON.
          :rtype: list(dict)
          """
-        return concat([self.CorpusView(path, self._read_tweets, encoding=enc)
-                       for (path, enc, fileid) in self.abspaths(fileids, True, True)])
-
+        return concat(
+            [
+                self.CorpusView(path, self._read_tweets, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
  
      def strings(self, fileids=None):
          """
@@ -109,7 +108,7 @@ class TwitterCorpusReader(CorpusReader):
          tweets = []
          for jsono in fulltweets:
              try:
-                text = jsono['text']
+                text = jsono["text"]
                  if isinstance(text, bytes):
                      text = text.decode(self.encoding)
                  tweets.append(text)
@@ -117,7 +116,6 @@ class TwitterCorpusReader(CorpusReader):
                  pass
          return tweets
  
-
      def tokenized(self, fileids=None):
          """
          :return: the given file(s) as a list of the text content of Tweets as
@@ -129,18 +127,16 @@ class TwitterCorpusReader(CorpusReader):
          tokenizer = self._word_tokenizer
          return [tokenizer.tokenize(t) for t in tweets]
  
-
      def raw(self, fileids=None):
          """
          Return the corpora in their raw form.
          """
          if fileids is None:
              fileids = self._fileids
-        elif isinstance(fileids, string_types):
+        elif isinstance(fileids, str):
              fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
-
      def _read_tweets(self, stream):
          """
          Assumes that each line in ``stream`` is a JSON-serialised object.
diff --git a/nlp_resource_data/nltk/corpus/reader/twitter.pyc b/nlp_resource_data/nltk/corpus/reader/twitter.pyc

deleted file mode 100755 (executable)

index 15a6534..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/twitter.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/udhr.py b/nlp_resource_data/nltk/corpus/reader/udhr.py

old mode 100755 (executable)

new mode 100644 (file)

index 523c521..4bfb551
--- a/nlp_resource_data/nltk/corpus/reader/udhr.py
+++ b/nlp_resource_data/nltk/corpus/reader/udhr.py
@@ -2,79 +2,75 @@
  """
  UDHR corpus reader. It mostly deals with encodings.
  """
-from __future__ import absolute_import, unicode_literals
  
  from nltk.corpus.reader.util import find_corpus_fileids
  from nltk.corpus.reader.plaintext import PlaintextCorpusReader
  
+
  class UdhrCorpusReader(PlaintextCorpusReader):
  
      ENCODINGS = [
-        ('.*-Latin1$', 'latin-1'),
-        ('.*-Hebrew$', 'hebrew'),
-        ('.*-Arabic$', 'cp1256'),
-        ('Czech_Cesky-UTF8', 'cp1250'), # yeah
-        ('.*-Cyrillic$', 'cyrillic'),
-        ('.*-SJIS$', 'SJIS'),
-        ('.*-GB2312$', 'GB2312'),
-        ('.*-Latin2$', 'ISO-8859-2'),
-        ('.*-Greek$', 'greek'),
-        ('.*-UTF8$', 'utf-8'),
-
-        ('Hungarian_Magyar-Unicode', 'utf-16-le'),
-        ('Amahuaca', 'latin1'),
-        ('Turkish_Turkce-Turkish', 'latin5'),
-        ('Lithuanian_Lietuviskai-Baltic', 'latin4'),
-        ('Japanese_Nihongo-EUC', 'EUC-JP'),
-        ('Japanese_Nihongo-JIS', 'iso2022_jp'),
-        ('Chinese_Mandarin-HZ', 'hz'),
-        ('Abkhaz\-Cyrillic\+Abkh', 'cp1251'),
+        (".*-Latin1$", "latin-1"),
+        (".*-Hebrew$", "hebrew"),
+        (".*-Arabic$", "cp1256"),
+        ("Czech_Cesky-UTF8", "cp1250"),  # yeah
+        (".*-Cyrillic$", "cyrillic"),
+        (".*-SJIS$", "SJIS"),
+        (".*-GB2312$", "GB2312"),
+        (".*-Latin2$", "ISO-8859-2"),
+        (".*-Greek$", "greek"),
+        (".*-UTF8$", "utf-8"),
+        ("Hungarian_Magyar-Unicode", "utf-16-le"),
+        ("Amahuaca", "latin1"),
+        ("Turkish_Turkce-Turkish", "latin5"),
+        ("Lithuanian_Lietuviskai-Baltic", "latin4"),
+        ("Japanese_Nihongo-EUC", "EUC-JP"),
+        ("Japanese_Nihongo-JIS", "iso2022_jp"),
+        ("Chinese_Mandarin-HZ", "hz"),
+        ("Abkhaz\-Cyrillic\+Abkh", "cp1251"),
      ]
  
-    SKIP = set([
-        # The following files are not fully decodable because they
-        # were truncated at wrong bytes:
-        'Burmese_Myanmar-UTF8',
-        'Japanese_Nihongo-JIS',
-        'Chinese_Mandarin-HZ',
-        'Chinese_Mandarin-UTF8',
-        'Gujarati-UTF8',
-        'Hungarian_Magyar-Unicode',
-        'Lao-UTF8',
-        'Magahi-UTF8',
-        'Marathi-UTF8',
-        'Tamil-UTF8',
-
-        # Unfortunately, encodings required for reading
-        # the following files are not supported by Python:
-        'Vietnamese-VPS',
-        'Vietnamese-VIQR',
-        'Vietnamese-TCVN',
-        'Magahi-Agra',
-        'Bhojpuri-Agra',
-        'Esperanto-T61', # latin3 raises an exception
-
-        # The following files are encoded for specific fonts:
-        'Burmese_Myanmar-WinResearcher',
-        'Armenian-DallakHelv',
-        'Tigrinya_Tigrigna-VG2Main',
-        'Amharic-Afenegus6..60375', # ?
-        'Navaho_Dine-Navajo-Navaho-font',
-
-        # What are these?
-        'Azeri_Azerbaijani_Cyrillic-Az.Times.Cyr.Normal0117',
-        'Azeri_Azerbaijani_Latin-Az.Times.Lat0117',
-
-        # The following files are unintended:
-        'Czech-Latin2-err',
-        'Russian_Russky-UTF8~',
-    ])
-
+    SKIP = set(
+        [
+            # The following files are not fully decodable because they
+            # were truncated at wrong bytes:
+            "Burmese_Myanmar-UTF8",
+            "Japanese_Nihongo-JIS",
+            "Chinese_Mandarin-HZ",
+            "Chinese_Mandarin-UTF8",
+            "Gujarati-UTF8",
+            "Hungarian_Magyar-Unicode",
+            "Lao-UTF8",
+            "Magahi-UTF8",
+            "Marathi-UTF8",
+            "Tamil-UTF8",
+            # Unfortunately, encodings required for reading
+            # the following files are not supported by Python:
+            "Vietnamese-VPS",
+            "Vietnamese-VIQR",
+            "Vietnamese-TCVN",
+            "Magahi-Agra",
+            "Bhojpuri-Agra",
+            "Esperanto-T61",  # latin3 raises an exception
+            # The following files are encoded for specific fonts:
+            "Burmese_Myanmar-WinResearcher",
+            "Armenian-DallakHelv",
+            "Tigrinya_Tigrigna-VG2Main",
+            "Amharic-Afenegus6..60375",  # ?
+            "Navaho_Dine-Navajo-Navaho-font",
+            # What are these?
+            "Azeri_Azerbaijani_Cyrillic-Az.Times.Cyr.Normal0117",
+            "Azeri_Azerbaijani_Latin-Az.Times.Lat0117",
+            # The following files are unintended:
+            "Czech-Latin2-err",
+            "Russian_Russky-UTF8~",
+        ]
+    )
  
-    def __init__(self, root='udhr'):
-        fileids = find_corpus_fileids(root, r'(?!README|\.).*')
+    def __init__(self, root="udhr"):
+        fileids = find_corpus_fileids(root, r"(?!README|\.).*")
          super(UdhrCorpusReader, self).__init__(
              root,
              [fileid for fileid in fileids if fileid not in self.SKIP],
-            encoding=self.ENCODINGS
+            encoding=self.ENCODINGS,
          )
diff --git a/nlp_resource_data/nltk/corpus/reader/udhr.pyc b/nlp_resource_data/nltk/corpus/reader/udhr.pyc

deleted file mode 100755 (executable)

index 026b536..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/udhr.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/util.py b/nlp_resource_data/nltk/corpus/reader/util.py

old mode 100755 (executable)

new mode 100644 (file)

index cf44eb9..b85c33b
--- a/nlp_resource_data/nltk/corpus/reader/util.py
+++ b/nlp_resource_data/nltk/corpus/reader/util.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Corpus Reader Utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -10,16 +10,9 @@ import os
  import bisect
  import re
  import tempfile
-from six import string_types, text_type
+import pickle
  from functools import reduce
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-# Use the c version of ElementTree, which is faster, if possible:
-try: from xml.etree import cElementTree as ElementTree
-except ImportError: from xml.etree import ElementTree
+from xml.etree import ElementTree
  
  from nltk.tokenize import wordpunct_tokenize
  from nltk.internals import slice_bounds
@@ -28,9 +21,10 @@ from nltk.data import SeekableUnicodeStreamReader
  from nltk.util import AbstractLazySequence, LazySubsequence, LazyConcatenation, py25
  
  ######################################################################
-#{ Corpus View
+# { Corpus View
  ######################################################################
  
+
  class StreamBackedCorpusView(AbstractLazySequence):
      """
      A 'view' of a corpus file, which acts like a sequence of tokens:
@@ -123,8 +117,8 @@ class StreamBackedCorpusView(AbstractLazySequence):
         end_toknum is the token index of the first token not in the
         block; and tokens is a list of the tokens in the block.
      """
-    def __init__(self, fileid, block_reader=None, startpos=0,
-                 encoding='utf8'):
+
+    def __init__(self, fileid, block_reader=None, startpos=0, encoding="utf8"):
          """
          Create a new corpus view, based on the file ``fileid``, and
          read with ``block_reader``.  See the class documentation
@@ -176,17 +170,19 @@ class StreamBackedCorpusView(AbstractLazySequence):
              else:
                  self._eofpos = os.stat(self._fileid).st_size
          except Exception as exc:
-            raise ValueError('Unable to open or access %r -- %s' %
-                             (fileid, exc))
+            raise ValueError("Unable to open or access %r -- %s" % (fileid, exc))
  
          # Maintain a cache of the most recently read block, to
          # increase efficiency of random access.
          self._cache = (-1, -1, None)
  
-    fileid = property(lambda self: self._fileid, doc="""
+    fileid = property(
+        lambda self: self._fileid,
+        doc="""
          The fileid of the file that is accessed by this view.
  
-        :type: str or PathPointer""")
+        :type: str or PathPointer""",
+    )
  
      def read_block(self, stream):
          """
@@ -197,7 +193,7 @@ class StreamBackedCorpusView(AbstractLazySequence):
          :param stream: an input stream
          :type stream: stream
          """
-        raise NotImplementedError('Abstract Method')
+        raise NotImplementedError("Abstract Method")
  
      def _open(self):
          """
@@ -209,9 +205,10 @@ class StreamBackedCorpusView(AbstractLazySequence):
              self._stream = self._fileid.open(self._encoding)
          elif self._encoding:
              self._stream = SeekableUnicodeStreamReader(
-                open(self._fileid, 'rb'), self._encoding)
+                open(self._fileid, "rb"), self._encoding
+            )
          else:
-            self._stream = open(self._fileid, 'rb')
+            self._stream = open(self._fileid, "rb")
  
      def close(self):
          """
@@ -230,7 +227,8 @@ class StreamBackedCorpusView(AbstractLazySequence):
          if self._len is None:
              # iterate_from() sets self._len when it reaches the end
              # of the file:
-            for tok in self.iterate_from(self._toknum[-1]): pass
+            for tok in self.iterate_from(self._toknum[-1]):
+                pass
          return self._len
  
      def __getitem__(self, i):
@@ -239,29 +237,31 @@ class StreamBackedCorpusView(AbstractLazySequence):
              # Check if it's in the cache.
              offset = self._cache[0]
              if offset <= start and stop <= self._cache[1]:
-                return self._cache[2][start-offset:stop-offset]
+                return self._cache[2][start - offset : stop - offset]
              # Construct & return the result.
              return LazySubsequence(self, start, stop)
          else:
              # Handle negative indices
-            if i < 0: i += len(self)
-            if i < 0: raise IndexError('index out of range')
+            if i < 0:
+                i += len(self)
+            if i < 0:
+                raise IndexError("index out of range")
              # Check if it's in the cache.
              offset = self._cache[0]
              if offset <= i < self._cache[1]:
-                return self._cache[2][i-offset]
+                return self._cache[2][i - offset]
              # Use iterate_from to extract it.
              try:
                  return next(self.iterate_from(i))
              except StopIteration:
-                raise IndexError('index out of range')
+                raise IndexError("index out of range")
  
      # If we wanted to be thread-safe, then this method would need to
      # do some locking.
      def iterate_from(self, start_tok):
          # Start by feeding from the cache, if possible.
          if self._cache[0] <= start_tok < self._cache[1]:
-            for tok in self._cache[2][start_tok-self._cache[0]:]:
+            for tok in self._cache[2][start_tok - self._cache[0] :]:
                  yield tok
                  start_tok += 1
  
@@ -269,11 +269,11 @@ class StreamBackedCorpusView(AbstractLazySequence):
          # our mapping, then we can jump straight to the correct block;
          # otherwise, start at the last block we've processed.
          if start_tok < self._toknum[-1]:
-            block_index = bisect.bisect_right(self._toknum, start_tok)-1
+            block_index = bisect.bisect_right(self._toknum, start_tok) - 1
              toknum = self._toknum[block_index]
              filepos = self._filepos[block_index]
          else:
-            block_index = len(self._toknum)-1
+            block_index = len(self._toknum) - 1
              toknum = self._toknum[-1]
              filepos = self._filepos[-1]
  
@@ -295,31 +295,35 @@ class StreamBackedCorpusView(AbstractLazySequence):
              self._current_blocknum = block_index
              tokens = self.read_block(self._stream)
              assert isinstance(tokens, (tuple, list, AbstractLazySequence)), (
-                'block reader %s() should return list or tuple.' %
-                self.read_block.__name__)
+                "block reader %s() should return list or tuple."
+                % self.read_block.__name__
+            )
              num_toks = len(tokens)
              new_filepos = self._stream.tell()
              assert new_filepos > filepos, (
-                'block reader %s() should consume at least 1 byte (filepos=%d)' %
-                (self.read_block.__name__, filepos))
+                "block reader %s() should consume at least 1 byte (filepos=%d)"
+                % (self.read_block.__name__, filepos)
+            )
  
              # Update our cache.
-            self._cache = (toknum, toknum+num_toks, list(tokens))
+            self._cache = (toknum, toknum + num_toks, list(tokens))
  
              # Update our mapping.
              assert toknum <= self._toknum[-1]
              if num_toks > 0:
                  block_index += 1
                  if toknum == self._toknum[-1]:
-                    assert new_filepos > self._filepos[-1] # monotonic!
+                    assert new_filepos > self._filepos[-1]  # monotonic!
                      self._filepos.append(new_filepos)
-                    self._toknum.append(toknum+num_toks)
+                    self._toknum.append(toknum + num_toks)
                  else:
                      # Check for consistency:
-                    assert new_filepos == self._filepos[block_index], (
-                        'inconsistent block reader (num chars read)')
-                    assert toknum+num_toks == self._toknum[block_index], (
-                        'inconsistent block reader (num tokens returned)')
+                    assert (
+                        new_filepos == self._filepos[block_index]
+                    ), "inconsistent block reader (num chars read)"
+                    assert (
+                        toknum + num_toks == self._toknum[block_index]
+                    ), "inconsistent block reader (num tokens returned)"
  
              # If we reached the end of the file, then update self._len
              if new_filepos == self._eofpos:
@@ -327,7 +331,7 @@ class StreamBackedCorpusView(AbstractLazySequence):
              # Generate the tokens in this block (but skip any tokens
              # before start_tok).  Note that between yields, our state
              # may be modified.
-            for tok in tokens[max(0, start_tok-toknum):]:
+            for tok in tokens[max(0, start_tok - toknum) :]:
                  yield tok
              # If we're at the end of the file, then we're done.
              assert new_filepos <= self._eofpos
@@ -347,19 +351,24 @@ class StreamBackedCorpusView(AbstractLazySequence):
      # when possible.
      def __add__(self, other):
          return concat([self, other])
+
      def __radd__(self, other):
          return concat([other, self])
+
      def __mul__(self, count):
          return concat([self] * count)
+
      def __rmul__(self, count):
          return concat([self] * count)
  
+
  class ConcatenatedCorpusView(AbstractLazySequence):
      """
      A 'view' of a corpus file that joins together one or more
      ``StreamBackedCorpusViews<StreamBackedCorpusView>``.  At most
      one file handle is left open at any time.
      """
+
      def __init__(self, corpus_views):
          self._pieces = corpus_views
          """A list of the corpus subviews that make up this
@@ -377,7 +386,8 @@ class ConcatenatedCorpusView(AbstractLazySequence):
      def __len__(self):
          if len(self._offsets) <= len(self._pieces):
              # Iterate to the end of the corpus.
-            for tok in self.iterate_from(self._offsets[-1]): pass
+            for tok in self.iterate_from(self._offsets[-1]):
+                pass
  
          return self._offsets[-1]
  
@@ -386,7 +396,7 @@ class ConcatenatedCorpusView(AbstractLazySequence):
              piece.close()
  
      def iterate_from(self, start_tok):
-        piecenum = bisect.bisect_right(self._offsets, start_tok)-1
+        piecenum = bisect.bisect_right(self._offsets, start_tok) - 1
  
          while piecenum < len(self._pieces):
              offset = self._offsets[piecenum]
@@ -399,16 +409,17 @@ class ConcatenatedCorpusView(AbstractLazySequence):
                  self._open_piece = piece
  
              # Get everything we can from this piece.
-            for tok in piece.iterate_from(max(0, start_tok-offset)):
+            for tok in piece.iterate_from(max(0, start_tok - offset)):
                  yield tok
  
              # Update the offset table.
-            if piecenum+1 == len(self._offsets):
+            if piecenum + 1 == len(self._offsets):
                  self._offsets.append(self._offsets[-1] + len(piece))
  
              # Move on to the next piece.
              piecenum += 1
  
+
  def concat(docs):
      """
      Concatenate together the contents of multiple documents from a
@@ -419,18 +430,17 @@ def concat(docs):
      if len(docs) == 1:
          return docs[0]
      if len(docs) == 0:
-        raise ValueError('concat() expects at least one object!')
+        raise ValueError("concat() expects at least one object!")
  
      types = set(d.__class__ for d in docs)
  
      # If they're all strings, use string concatenation.
-    if all(isinstance(doc, string_types) for doc in docs):
-        return ''.join(docs)
+    if all(isinstance(doc, str) for doc in docs):
+        return "".join(docs)
  
      # If they're all corpus views, then use ConcatenatedCorpusView.
      for typ in types:
-        if not issubclass(typ, (StreamBackedCorpusView,
-                                ConcatenatedCorpusView)):
+        if not issubclass(typ, (StreamBackedCorpusView, ConcatenatedCorpusView)):
              break
      else:
          return ConcatenatedCorpusView(docs)
@@ -447,23 +457,26 @@ def concat(docs):
          typ = list(types)[0]
  
          if issubclass(typ, list):
-            return reduce((lambda a,b:a+b), docs, [])
+            return reduce((lambda a, b: a + b), docs, [])
  
          if issubclass(typ, tuple):
-            return reduce((lambda a,b:a+b), docs, ())
+            return reduce((lambda a, b: a + b), docs, ())
  
          if ElementTree.iselement(typ):
-            xmltree = ElementTree.Element('documents')
-            for doc in docs: xmltree.append(doc)
+            xmltree = ElementTree.Element("documents")
+            for doc in docs:
+                xmltree.append(doc)
              return xmltree
  
      # No method found!
      raise ValueError("Don't know how to concatenate types: %r" % types)
  
+
  ######################################################################
-#{ Corpus View for Pickled Sequences
+# { Corpus View for Pickled Sequences
  ######################################################################
  
+
  class PickleCorpusView(StreamBackedCorpusView):
      """
      A stream backed corpus view for corpus files that consist of
@@ -480,6 +493,7 @@ class PickleCorpusView(StreamBackedCorpusView):
          >>> PickleCorpusView.write(feature_corpus, some_fileid)  # doctest: +SKIP
          >>> pcv = PickleCorpusView(some_fileid) # doctest: +SKIP
      """
+
      BLOCK_SIZE = 100
      PROTOCOL = -1
  
@@ -497,8 +511,10 @@ class PickleCorpusView(StreamBackedCorpusView):
      def read_block(self, stream):
          result = []
          for i in range(self.BLOCK_SIZE):
-            try: result.append(pickle.load(stream))
-            except EOFError: break
+            try:
+                result.append(pickle.load(stream))
+            except EOFError:
+                break
          return result
  
      def __del__(self):
@@ -508,16 +524,18 @@ class PickleCorpusView(StreamBackedCorpusView):
          fileid.  (This method is called whenever a
          ``PickledCorpusView`` is garbage-collected.
          """
-        if getattr(self, '_delete_on_gc'):
+        if getattr(self, "_delete_on_gc"):
              if os.path.exists(self._fileid):
-                try: os.remove(self._fileid)
-                except (OSError, IOError): pass
-        self.__dict__.clear() # make the garbage collector's job easier
+                try:
+                    os.remove(self._fileid)
+                except (OSError, IOError):
+                    pass
+        self.__dict__.clear()  # make the garbage collector's job easier
  
      @classmethod
      def write(cls, sequence, output_file):
-        if isinstance(output_file, string_types):
-            output_file = open(output_file, 'wb')
+        if isinstance(output_file, str):
+            output_file = open(output_file, "wb")
          for item in sequence:
              pickle.dump(item, output_file, cls.PROTOCOL)
  
@@ -532,71 +550,82 @@ class PickleCorpusView(StreamBackedCorpusView):
              deleted whenever this object gets garbage-collected.
          """
          try:
-            fd, output_file_name = tempfile.mkstemp('.pcv', 'nltk-')
-            output_file = os.fdopen(fd, 'wb')
+            fd, output_file_name = tempfile.mkstemp(".pcv", "nltk-")
+            output_file = os.fdopen(fd, "wb")
              cls.write(sequence, output_file)
              output_file.close()
              return PickleCorpusView(output_file_name, delete_on_gc)
          except (OSError, IOError) as e:
-            raise ValueError('Error while creating temp file: %s' % e)
-
+            raise ValueError("Error while creating temp file: %s" % e)
  
  
  ######################################################################
-#{ Block Readers
+# { Block Readers
  ######################################################################
  
+
  def read_whitespace_block(stream):
      toks = []
-    for i in range(20): # Read 20 lines at a time.
+    for i in range(20):  # Read 20 lines at a time.
          toks.extend(stream.readline().split())
      return toks
  
+
  def read_wordpunct_block(stream):
      toks = []
-    for i in range(20): # Read 20 lines at a time.
+    for i in range(20):  # Read 20 lines at a time.
          toks.extend(wordpunct_tokenize(stream.readline()))
      return toks
  
+
  def read_line_block(stream):
      toks = []
      for i in range(20):
          line = stream.readline()
-        if not line: return toks
-        toks.append(line.rstrip('\n'))
+        if not line:
+            return toks
+        toks.append(line.rstrip("\n"))
      return toks
  
+
  def read_blankline_block(stream):
-    s = ''
+    s = ""
      while True:
          line = stream.readline()
          # End of file:
          if not line:
-            if s: return [s]
-            else: return []
+            if s:
+                return [s]
+            else:
+                return []
          # Blank line:
          elif line and not line.strip():
-            if s: return [s]
+            if s:
+                return [s]
          # Other line:
          else:
              s += line
  
+
  def read_alignedsent_block(stream):
-    s = ''
+    s = ""
      while True:
          line = stream.readline()
-        if line[0] == '=' or line[0] == '\n' or line[:2] == '\r\n':
+        if line[0] == "=" or line[0] == "\n" or line[:2] == "\r\n":
              continue
          # End of file:
          if not line:
-            if s: return [s]
-            else: return []
+            if s:
+                return [s]
+            else:
+                return []
          # Other line:
          else:
              s += line
-            if re.match('^\d+-\d+', line) is not None:
+            if re.match("^\d+-\d+", line) is not None:
                  return [s]
  
+
  def read_regexp_block(stream, start_re, end_re=None):
      """
      Read a sequence of tokens from a stream, where tokens begin with
@@ -607,8 +636,10 @@ def read_regexp_block(stream, start_re, end_re=None):
      # Scan until we find a line matching the start regexp.
      while True:
          line = stream.readline()
-        if not line: return [] # end of file.
-        if re.match(start_re, line): break
+        if not line:
+            return []  # end of file.
+        if re.match(start_re, line):
+            break
  
      # Scan until we find another line matching the regexp, or EOF.
      lines = [line]
@@ -617,18 +648,19 @@ def read_regexp_block(stream, start_re, end_re=None):
          line = stream.readline()
          # End of file:
          if not line:
-            return [''.join(lines)]
+            return ["".join(lines)]
          # End of token:
          if end_re is not None and re.match(end_re, line):
-            return [''.join(lines)]
+            return ["".join(lines)]
          # Start of new token: backup to just before it starts, and
          # return the token we've already collected.
          if end_re is None and re.match(start_re, line):
              stream.seek(oldpos)
-            return [''.join(lines)]
+            return ["".join(lines)]
          # Anything else is part of the token.
          lines.append(line)
  
+
  def read_sexpr_block(stream, block_size=16384, comment_char=None):
      """
      Read a sequence of s-expressions from the stream, and leave the
@@ -650,17 +682,20 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None):
      """
      start = stream.tell()
      block = stream.read(block_size)
-    encoding = getattr(stream, 'encoding', None)
-    assert encoding is not None or isinstance(block, text_type)
-    if encoding not in (None, 'utf-8'):
+    encoding = getattr(stream, "encoding", None)
+    assert encoding is not None or isinstance(block, str)
+    if encoding not in (None, "utf-8"):
          import warnings
-        warnings.warn('Parsing may fail, depending on the properties '
-                      'of the %s encoding!' % encoding)
+
+        warnings.warn(
+            "Parsing may fail, depending on the properties "
+            "of the %s encoding!" % encoding
+        )
          # (e.g., the utf-16 encoding does not work because it insists
          # on adding BOMs to the beginning of encoded strings.)
  
      if comment_char:
-        COMMENT = re.compile('(?m)^%s.*$' % re.escape(comment_char))
+        COMMENT = re.compile("(?m)^%s.*$" % re.escape(comment_char))
      while True:
          try:
              # If we're stripping comments, then make sure our block ends
@@ -673,18 +708,18 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None):
              # Read the block.
              tokens, offset = _parse_sexpr_block(block)
              # Skip whitespace
-            offset = re.compile(r'\s*').search(block, offset).end()
+            offset = re.compile(r"\s*").search(block, offset).end()
  
              # Move to the end position.
              if encoding is None:
-                stream.seek(start+offset)
+                stream.seek(start + offset)
              else:
-                stream.seek(start+len(block[:offset].encode(encoding)))
+                stream.seek(start + len(block[:offset].encode(encoding)))
  
              # Return the list of tokens we processed
              return tokens
          except ValueError as e:
-            if e.args[0] == 'Block too small':
+            if e.args[0] == "Block too small":
                  next_block = stream.read(block_size)
                  if next_block:
                      block += next_block
@@ -692,45 +727,52 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None):
                  else:
                      # The file ended mid-sexpr -- return what we got.
                      return [block.strip()]
-            else: raise
+            else:
+                raise
+
  
  def _sub_space(m):
      """Helper function: given a regexp match, return a string of
      spaces that's the same length as the matched string."""
-    return ' '*(m.end()-m.start())
+    return " " * (m.end() - m.start())
+
  
  def _parse_sexpr_block(block):
      tokens = []
      start = end = 0
  
      while end < len(block):
-        m = re.compile(r'\S').search(block, end)
+        m = re.compile(r"\S").search(block, end)
          if not m:
              return tokens, end
  
          start = m.start()
  
          # Case 1: sexpr is not parenthesized.
-        if m.group() != '(':
-            m2 = re.compile(r'[\s(]').search(block, start)
+        if m.group() != "(":
+            m2 = re.compile(r"[\s(]").search(block, start)
              if m2:
                  end = m2.start()
              else:
-                if tokens: return tokens, end
-                raise ValueError('Block too small')
+                if tokens:
+                    return tokens, end
+                raise ValueError("Block too small")
  
          # Case 2: parenthesized sexpr.
          else:
              nesting = 0
-            for m in re.compile(r'[()]').finditer(block, start):
-                if m.group()=='(': nesting += 1
-                else: nesting -= 1
+            for m in re.compile(r"[()]").finditer(block, start):
+                if m.group() == "(":
+                    nesting += 1
+                else:
+                    nesting -= 1
                  if nesting == 0:
                      end = m.end()
                      break
              else:
-                if tokens: return tokens, end
-                raise ValueError('Block too small')
+                if tokens:
+                    return tokens, end
+                raise ValueError("Block too small")
  
          tokens.append(block[start:end])
  
@@ -738,19 +780,23 @@ def _parse_sexpr_block(block):
  
  
  ######################################################################
-#{ Finding Corpus Items
+# { Finding Corpus Items
  ######################################################################
  
+
  def find_corpus_fileids(root, regexp):
      if not isinstance(root, PathPointer):
-        raise TypeError('find_corpus_fileids: expected a PathPointer')
-    regexp += '$'
+        raise TypeError("find_corpus_fileids: expected a PathPointer")
+    regexp += "$"
  
      # Find fileids in a zipfile: scan the zipfile's namelist.  Filter
      # out entries that end in '/' -- they're directories.
      if isinstance(root, ZipFilePathPointer):
-        fileids = [name[len(root.entry):] for name in root.zipfile.namelist()
-                 if not name.endswith('/')]
+        fileids = [
+            name[len(root.entry) :]
+            for name in root.zipfile.namelist()
+            if not name.endswith("/")
+        ]
          items = [name for name in fileids if re.match(regexp, name)]
          return sorted(items)
  
@@ -761,20 +807,25 @@ def find_corpus_fileids(root, regexp):
          # workaround for py25 which doesn't support followlinks
          kwargs = {}
          if not py25():
-            kwargs = {'followlinks': True}
+            kwargs = {"followlinks": True}
          for dirname, subdirs, fileids in os.walk(root.path, **kwargs):
-            prefix = ''.join('%s/' % p for p in _path_from(root.path, dirname))
-            items += [prefix+fileid for fileid in fileids
-                      if re.match(regexp, prefix+fileid)]
+            prefix = "".join("%s/" % p for p in _path_from(root.path, dirname))
+            items += [
+                prefix + fileid
+                for fileid in fileids
+                if re.match(regexp, prefix + fileid)
+            ]
              # Don't visit svn directories:
-            if '.svn' in subdirs: subdirs.remove('.svn')
+            if ".svn" in subdirs:
+                subdirs.remove(".svn")
          return sorted(items)
  
      else:
          raise AssertionError("Don't know how to handle %r" % root)
  
+
  def _path_from(parent, child):
-    if os.path.split(parent)[1] == '':
+    if os.path.split(parent)[1] == "":
          parent = os.path.split(parent)[0]
      path = []
      while parent != child:
@@ -783,22 +834,27 @@ def _path_from(parent, child):
          assert os.path.split(child)[0] != child
      return path
  
+
  ######################################################################
-#{ Paragraph structure in Treebank files
+# { Paragraph structure in Treebank files
  ######################################################################
  
+
  def tagged_treebank_para_block_reader(stream):
      # Read the next paragraph.
-    para = ''
+    para = ""
      while True:
          line = stream.readline()
          # End of paragraph:
-        if re.match('======+\s*$', line):
-            if para.strip(): return [para]
+        if re.match("======+\s*$", line):
+            if para.strip():
+                return [para]
          # End of file:
-        elif line == '':
-            if para.strip(): return [para]
-            else: return []
+        elif line == "":
+            if para.strip():
+                return [para]
+            else:
+                return []
          # Content line:
          else:
              para += line
diff --git a/nlp_resource_data/nltk/corpus/reader/util.pyc b/nlp_resource_data/nltk/corpus/reader/util.pyc

deleted file mode 100755 (executable)

index cb17813..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/verbnet.py b/nlp_resource_data/nltk/corpus/reader/verbnet.py

old mode 100755 (executable)

new mode 100644 (file)

index 641cff9..0ab5f59
--- a/nlp_resource_data/nltk/corpus/reader/verbnet.py
+++ b/nlp_resource_data/nltk/corpus/reader/verbnet.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Verbnet Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -11,14 +11,11 @@ An NLTK interface to the VerbNet verb lexicon
  For details about VerbNet see:
  https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
  """
-from __future__ import unicode_literals
  
  import re
  import textwrap
  from collections import defaultdict
  
-from six import string_types
-
  from nltk.corpus.reader.xmldocs import XMLCorpusReader
  
  
@@ -60,14 +57,15 @@ class VerbnetCorpusReader(XMLCorpusReader):
          # runs 2-30 times faster.
          self._quick_index()
  
-    _LONGID_RE = re.compile(r'([^\-\.]*)-([\d+.\-]+)$')
+    _LONGID_RE = re.compile(r"([^\-\.]*)-([\d+.\-]+)$")
      """Regular expression that matches (and decomposes) longids"""
  
-    _SHORTID_RE = re.compile(r'[\d+.\-]+$')
+    _SHORTID_RE = re.compile(r"[\d+.\-]+$")
      """Regular expression that matches shortids"""
  
-    _INDEX_RE = re.compile(r'<MEMBER name="\??([^"]+)" wn="([^"]*)"[^>]+>|'
-                           r'<VNSUBCLASS ID="([^"]+)"/?>')
+    _INDEX_RE = re.compile(
+        r'<MEMBER name="\??([^"]+)" wn="([^"]*)"[^>]+>|' r'<VNSUBCLASS ID="([^"]+)"/?>'
+    )
      """Regular expression used by ``_index()`` to quickly scan the corpus
         for basic information."""
  
@@ -80,10 +78,9 @@ class VerbnetCorpusReader(XMLCorpusReader):
              return sorted(self._lemma_to_class.keys())
          else:
              # [xx] should this include subclass members?
-            if isinstance(vnclass, string_types):
+            if isinstance(vnclass, str):
                  vnclass = self.vnclass(vnclass)
-            return [member.get('name') for member in
-                    vnclass.findall('MEMBERS/MEMBER')]
+            return [member.get("name") for member in vnclass.findall("MEMBERS/MEMBER")]
  
      def wordnetids(self, vnclass=None):
          """
@@ -94,10 +91,15 @@ class VerbnetCorpusReader(XMLCorpusReader):
              return sorted(self._wordnet_to_class.keys())
          else:
              # [xx] should this include subclass members?
-            if isinstance(vnclass, string_types):
+            if isinstance(vnclass, str):
                  vnclass = self.vnclass(vnclass)
-            return sum([member.get('wn', '').split() for member in
-                        vnclass.findall('MEMBERS/MEMBER')], [])
+            return sum(
+                [
+                    member.get("wn", "").split()
+                    for member in vnclass.findall("MEMBERS/MEMBER")
+                ],
+                [],
+            )
  
      def classids(self, lemma=None, wordnetid=None, fileid=None, classid=None):
          """
@@ -113,22 +115,23 @@ class VerbnetCorpusReader(XMLCorpusReader):
          If nothing is specified, return all classids within VerbNet
          """
          if fileid is not None:
-            return [c for (c, f) in self._class_to_fileid.items()
-                    if f == fileid]
+            return [c for (c, f) in self._class_to_fileid.items() if f == fileid]
          elif lemma is not None:
              return self._lemma_to_class[lemma]
          elif wordnetid is not None:
              return self._wordnet_to_class[wordnetid]
          elif classid is not None:
              xmltree = self.vnclass(classid)
-            return [subclass.get('ID') for subclass in
-                    xmltree.findall('SUBCLASSES/VNSUBCLASS')]
+            return [
+                subclass.get("ID")
+                for subclass in xmltree.findall("SUBCLASSES/VNSUBCLASS")
+            ]
          else:
              return sorted(self._class_to_fileid.keys())
  
      def vnclass(self, fileid_or_classid):
          """Returns VerbNet class ElementTree
-        
+
          Return an ElementTree containing the xml for the specified
          VerbNet class.
  
@@ -147,17 +150,17 @@ class VerbnetCorpusReader(XMLCorpusReader):
          if classid in self._class_to_fileid:
              fileid = self._class_to_fileid[self.longid(classid)]
              tree = self.xml(fileid)
-            if classid == tree.get('ID'):
+            if classid == tree.get("ID"):
                  return tree
              else:
-                for subclass in tree.findall('.//VNSUBCLASS'):
-                    if classid == subclass.get('ID'):
+                for subclass in tree.findall(".//VNSUBCLASS"):
+                    if classid == subclass.get("ID"):
                          return subclass
                  else:
                      assert False  # we saw it during _index()!
  
          else:
-            raise ValueError('Unknown identifier {}'.format(fileid_or_classid))
+            raise ValueError("Unknown identifier {}".format(fileid_or_classid))
  
      def fileids(self, vnclass_ids=None):
          """
@@ -167,76 +170,85 @@ class VerbnetCorpusReader(XMLCorpusReader):
          """
          if vnclass_ids is None:
              return self._fileids
-        elif isinstance(vnclass_ids, string_types):
+        elif isinstance(vnclass_ids, str):
              return [self._class_to_fileid[self.longid(vnclass_ids)]]
          else:
-            return [self._class_to_fileid[self.longid(vnclass_id)]
-                    for vnclass_id in vnclass_ids]
+            return [
+                self._class_to_fileid[self.longid(vnclass_id)]
+                for vnclass_id in vnclass_ids
+            ]
  
      def frames(self, vnclass):
          """Given a VerbNet class, this method returns VerbNet frames
-        
+
          The members returned are:
          1) Example
          2) Description
          3) Syntax
          4) Semantics
-        
+
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          :return: frames - a list of frame dictionaries
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
          frames = []
-        vnframes = vnclass.findall('FRAMES/FRAME')
+        vnframes = vnclass.findall("FRAMES/FRAME")
          for vnframe in vnframes:
-            frames.append({
-                'example': self._get_example_within_frame(vnframe),
-                'description': self._get_description_within_frame(vnframe),
-                'syntax': self._get_syntactic_list_within_frame(vnframe),
-                'semantics': self._get_semantics_within_frame(vnframe)
-            })
+            frames.append(
+                {
+                    "example": self._get_example_within_frame(vnframe),
+                    "description": self._get_description_within_frame(vnframe),
+                    "syntax": self._get_syntactic_list_within_frame(vnframe),
+                    "semantics": self._get_semantics_within_frame(vnframe),
+                }
+            )
          return frames
  
      def subclasses(self, vnclass):
-        """Returns subclass ids, if any exist 
-        
+        """Returns subclass ids, if any exist
+
          Given a VerbNet class, this method returns subclass ids (if they exist)
          in a list of strings.
-        
+
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          :return: list of subclasses
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
  
-        subclasses = [subclass.get('ID') for subclass in
-                      vnclass.findall('SUBCLASSES/VNSUBCLASS')]
+        subclasses = [
+            subclass.get("ID") for subclass in vnclass.findall("SUBCLASSES/VNSUBCLASS")
+        ]
          return subclasses
  
      def themroles(self, vnclass):
          """Returns thematic roles participating in a VerbNet class
-        
+
          Members returned as part of roles are-
          1) Type
          2) Modifiers
-        
+
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          :return: themroles: A list of thematic roles in the VerbNet class
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
  
          themroles = []
-        for trole in vnclass.findall('THEMROLES/THEMROLE'):
-            themroles.append({
-                'type': trole.get('type'),
-                'modifiers': [{'value': restr.get('Value'), 'type': restr.get('type')}
-                              for restr in trole.findall('SELRESTRS/SELRESTR')]
-            })
+        for trole in vnclass.findall("THEMROLES/THEMROLE"):
+            themroles.append(
+                {
+                    "type": trole.get("type"),
+                    "modifiers": [
+                        {"value": restr.get("Value"), "type": restr.get("type")}
+                        for restr in trole.findall("SELRESTRS/SELRESTR")
+                    ],
+                }
+            )
          return themroles
  
      ######################################################################
@@ -247,23 +259,23 @@ class VerbnetCorpusReader(XMLCorpusReader):
          """
          Initialize the indexes ``_lemma_to_class``,
          ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
-        through the corpus fileids.  This is fast with cElementTree
-        (<0.1 secs), but quite slow (>10 secs) with the python
-        implementation of ElementTree.
+        through the corpus fileids.  This is fast if ElementTree
+        uses the C implementation (<0.1 secs), but quite slow (>10 secs)
+        if only the python implementation is available.
          """
          for fileid in self._fileids:
              self._index_helper(self.xml(fileid), fileid)
  
      def _index_helper(self, xmltree, fileid):
          """Helper for ``_index()``"""
-        vnclass = xmltree.get('ID')
+        vnclass = xmltree.get("ID")
          self._class_to_fileid[vnclass] = fileid
          self._shortid_to_longid[self.shortid(vnclass)] = vnclass
-        for member in xmltree.findall('MEMBERS/MEMBER'):
-            self._lemma_to_class[member.get('name')].append(vnclass)
-            for wn in member.get('wn', '').split():
+        for member in xmltree.findall("MEMBERS/MEMBER"):
+            self._lemma_to_class[member.get("name")].append(vnclass)
+            for wn in member.get("wn", "").split():
                  self._wordnet_to_class[wn].append(vnclass)
-        for subclass in xmltree.findall('SUBCLASSES/VNSUBCLASS'):
+        for subclass in xmltree.findall("SUBCLASSES/VNSUBCLASS"):
              self._index_helper(subclass, fileid)
  
      def _quick_index(self):
@@ -273,8 +285,8 @@ class VerbnetCorpusReader(XMLCorpusReader):
          through the corpus fileids.  This doesn't do proper xml parsing,
          but is good enough to find everything in the standard VerbNet
          corpus -- and it runs about 30 times faster than xml parsing
-        (with the python ElementTree; only 2-3 times faster with
-        cElementTree).
+        (with the python ElementTree; only 2-3 times faster
+        if ElementTree uses the C implementation).
          """
          # nb: if we got rid of wordnet_to_class, this would run 2-3
          # times faster.
@@ -293,7 +305,7 @@ class VerbnetCorpusReader(XMLCorpusReader):
                      vnclass = groups[2]  # for <MEMBER> elts.
                      self._shortid_to_longid[self.shortid(vnclass)] = vnclass
                  else:
-                    assert False, 'unexpected match condition'
+                    assert False, "unexpected match condition"
  
      ######################################################################
      # { Identifier conversion
@@ -301,22 +313,22 @@ class VerbnetCorpusReader(XMLCorpusReader):
  
      def longid(self, shortid):
          """Returns longid of a VerbNet class
-        
+
          Given a short VerbNet class identifier (eg '37.10'), map it
          to a long id (eg 'confess-37.10').  If ``shortid`` is already a
          long id, then return it as-is"""
          if self._LONGID_RE.match(shortid):
              return shortid  # it's already a longid.
          elif not self._SHORTID_RE.match(shortid):
-            raise ValueError('vnclass identifier %r not found' % shortid)
+            raise ValueError("vnclass identifier %r not found" % shortid)
          try:
              return self._shortid_to_longid[shortid]
          except KeyError:
-            raise ValueError('vnclass identifier %r not found' % shortid)
+            raise ValueError("vnclass identifier %r not found" % shortid)
  
      def shortid(self, longid):
          """Returns shortid of a VerbNet class
-        
+
          Given a long VerbNet class identifier (eg 'confess-37.10'),
          map it to a short id (eg '37.10').  If ``longid`` is already a
          short id, then return it as-is."""
@@ -326,7 +338,7 @@ class VerbnetCorpusReader(XMLCorpusReader):
          if m:
              return m.group(2)
          else:
-            raise ValueError('vnclass identifier %r not found' % longid)
+            raise ValueError("vnclass identifier %r not found" % longid)
  
      ######################################################################
      # { Frame access utility functions
@@ -334,36 +346,37 @@ class VerbnetCorpusReader(XMLCorpusReader):
  
      def _get_semantics_within_frame(self, vnframe):
          """Returns semantics within a single frame
-        
+
          A utility function to retrieve semantics within a frame in VerbNet
          Members of the semantics dictionary:
-        1) Predicate value 
+        1) Predicate value
          2) Arguments
-        
+
          :param vnframe: An ElementTree containing the xml contents of
              a VerbNet frame.
          :return: semantics: semantics dictionary
          """
          semantics_within_single_frame = []
-        for pred in vnframe.findall('SEMANTICS/PRED'):
-            arguments = [{'type': arg.get('type'), 'value': arg.get('value')}
-                         for arg in pred.findall('ARGS/ARG')]
-            semantics_within_single_frame.append({
-                'predicate_value': pred.get('value'),
-                'arguments': arguments
-            })
+        for pred in vnframe.findall("SEMANTICS/PRED"):
+            arguments = [
+                {"type": arg.get("type"), "value": arg.get("value")}
+                for arg in pred.findall("ARGS/ARG")
+            ]
+            semantics_within_single_frame.append(
+                {"predicate_value": pred.get("value"), "arguments": arguments}
+            )
          return semantics_within_single_frame
  
      def _get_example_within_frame(self, vnframe):
          """Returns example within a frame
-        
+
          A utility function to retrieve an example within a frame in VerbNet.
-        
+
          :param vnframe: An ElementTree containing the xml contents of
              a VerbNet frame.
          :return: example_text: The example sentence for this particular frame
          """
-        example_element = vnframe.find('EXAMPLES/EXAMPLE')
+        example_element = vnframe.find("EXAMPLES/EXAMPLE")
          if example_element is not None:
              example_text = example_element.text
          else:
@@ -372,45 +385,48 @@ class VerbnetCorpusReader(XMLCorpusReader):
  
      def _get_description_within_frame(self, vnframe):
          """Returns member description within frame
-         
+
          A utility function to retrieve a description of participating members
          within a frame in VerbNet.
-        
+
          :param vnframe: An ElementTree containing the xml contents of
              a VerbNet frame.
-        :return: description: a description dictionary with members - primary and secondary 
+        :return: description: a description dictionary with members - primary and secondary
          """
-        description_element = vnframe.find('DESCRIPTION')
+        description_element = vnframe.find("DESCRIPTION")
          return {
-            'primary': description_element.attrib['primary'],
-            'secondary': description_element.get('secondary', '')
+            "primary": description_element.attrib["primary"],
+            "secondary": description_element.get("secondary", ""),
          }
  
      def _get_syntactic_list_within_frame(self, vnframe):
          """Returns semantics within a frame
-        
+
          A utility function to retrieve semantics within a frame in VerbNet.
          Members of the syntactic dictionary:
          1) POS Tag
          2) Modifiers
-        
+
          :param vnframe: An ElementTree containing the xml contents of
              a VerbNet frame.
          :return: syntax_within_single_frame
          """
          syntax_within_single_frame = []
-        for elt in vnframe.find('SYNTAX'):
+        for elt in vnframe.find("SYNTAX"):
              pos_tag = elt.tag
              modifiers = dict()
-            modifiers['value'] = elt.get('value') if 'value' in elt.attrib else ""
-            modifiers['selrestrs'] = [{'value': restr.get('Value'), 'type': restr.get('type')}
-                                      for restr in elt.findall('SELRESTRS/SELRESTR')]
-            modifiers['synrestrs'] = [{'value': restr.get('Value'), 'type': restr.get('type')}
-                                      for restr in elt.findall('SYNRESTRS/SYNRESTR')]
-            syntax_within_single_frame.append({
-                'pos_tag': pos_tag,
-                'modifiers': modifiers
-            })
+            modifiers["value"] = elt.get("value") if "value" in elt.attrib else ""
+            modifiers["selrestrs"] = [
+                {"value": restr.get("Value"), "type": restr.get("type")}
+                for restr in elt.findall("SELRESTRS/SELRESTR")
+            ]
+            modifiers["synrestrs"] = [
+                {"value": restr.get("Value"), "type": restr.get("type")}
+                for restr in elt.findall("SYNRESTRS/SYNRESTR")
+            ]
+            syntax_within_single_frame.append(
+                {"pos_tag": pos_tag, "modifiers": modifiers}
+            )
          return syntax_within_single_frame
  
      ######################################################################
@@ -419,145 +435,152 @@ class VerbnetCorpusReader(XMLCorpusReader):
  
      def pprint(self, vnclass):
          """Returns pretty printed version of a VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet class.
  
          :param vnclass: A VerbNet class identifier; or an ElementTree
          containing the xml contents of a VerbNet class.
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
  
-        s = vnclass.get('ID') + '\n'
-        s += self.pprint_subclasses(vnclass, indent='  ') + '\n'
-        s += self.pprint_members(vnclass, indent='  ') + '\n'
-        s += '  Thematic roles:\n'
-        s += self.pprint_themroles(vnclass, indent='    ') + '\n'
-        s += '  Frames:\n'
-        s += self.pprint_frames(vnclass, indent='    ')
+        s = vnclass.get("ID") + "\n"
+        s += self.pprint_subclasses(vnclass, indent="  ") + "\n"
+        s += self.pprint_members(vnclass, indent="  ") + "\n"
+        s += "  Thematic roles:\n"
+        s += self.pprint_themroles(vnclass, indent="    ") + "\n"
+        s += "  Frames:\n"
+        s += self.pprint_frames(vnclass, indent="    ")
          return s
  
-    def pprint_subclasses(self, vnclass, indent=''):
+    def pprint_subclasses(self, vnclass, indent=""):
          """Returns pretty printed version of subclasses of VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet class's subclasses.
  
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
  
          subclasses = self.subclasses(vnclass)
-        if not subclasses: subclasses = ['(none)']
-        s = 'Subclasses: ' + ' '.join(subclasses)
-        return textwrap.fill(s, 70, initial_indent=indent,
-                             subsequent_indent=indent + '  ')
-
-    def pprint_members(self, vnclass, indent=''):
+        if not subclasses:
+            subclasses = ["(none)"]
+        s = "Subclasses: " + " ".join(subclasses)
+        return textwrap.fill(
+            s, 70, initial_indent=indent, subsequent_indent=indent + "  "
+        )
+
+    def pprint_members(self, vnclass, indent=""):
          """Returns pretty printed version of members in a VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet class's member verbs.
  
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
  
          members = self.lemmas(vnclass)
          if not members:
-            members = ['(none)']
-        s = 'Members: ' + ' '.join(members)
-        return textwrap.fill(s, 70, initial_indent=indent,
-                             subsequent_indent=indent + '  ')
+            members = ["(none)"]
+        s = "Members: " + " ".join(members)
+        return textwrap.fill(
+            s, 70, initial_indent=indent, subsequent_indent=indent + "  "
+        )
  
-    def pprint_themroles(self, vnclass, indent=''):
+    def pprint_themroles(self, vnclass, indent=""):
          """Returns pretty printed version of thematic roles in a VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet class's thematic roles.
  
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
  
          pieces = []
          for themrole in self.themroles(vnclass):
-            piece = indent + '* ' + themrole.get('type')
-            modifiers = [modifier['value'] + modifier['type']
-                         for modifier in themrole['modifiers']]
+            piece = indent + "* " + themrole.get("type")
+            modifiers = [
+                modifier["value"] + modifier["type"]
+                for modifier in themrole["modifiers"]
+            ]
              if modifiers:
-                piece += '[{}]'.format(' '.join(modifiers))
+                piece += "[{}]".format(" ".join(modifiers))
              pieces.append(piece)
-        return '\n'.join(pieces)
+        return "\n".join(pieces)
  
-    def pprint_frames(self, vnclass, indent=''):
+    def pprint_frames(self, vnclass, indent=""):
          """Returns pretty version of all frames in a VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the list of frames within the VerbNet class.
  
          :param vnclass: A VerbNet class identifier; or an ElementTree
              containing the xml contents of a VerbNet class.
          """
-        if isinstance(vnclass, string_types):
+        if isinstance(vnclass, str):
              vnclass = self.vnclass(vnclass)
          pieces = []
          for vnframe in self.frames(vnclass):
              pieces.append(self._pprint_single_frame(vnframe, indent))
-        return '\n'.join(pieces)
+        return "\n".join(pieces)
  
-    def _pprint_single_frame(self, vnframe, indent=''):
+    def _pprint_single_frame(self, vnframe, indent=""):
          """Returns pretty printed version of a single frame in a VerbNet class
-        
+
          Returns a string containing a pretty-printed representation of
          the given frame.
-        
+
          :param vnframe: An ElementTree containing the xml contents of
              a VerbNet frame.
          """
-        frame_string = self._pprint_description_within_frame(vnframe, indent) + '\n'
-        frame_string += self._pprint_example_within_frame(vnframe, indent + ' ') + '\n'
-        frame_string += self._pprint_syntax_within_frame(vnframe, indent + '  Syntax: ') + '\n'
-        frame_string += indent + '  Semantics:\n'
-        frame_string += self._pprint_semantics_within_frame(vnframe, indent + '    ')
+        frame_string = self._pprint_description_within_frame(vnframe, indent) + "\n"
+        frame_string += self._pprint_example_within_frame(vnframe, indent + " ") + "\n"
+        frame_string += (
+            self._pprint_syntax_within_frame(vnframe, indent + "  Syntax: ") + "\n"
+        )
+        frame_string += indent + "  Semantics:\n"
+        frame_string += self._pprint_semantics_within_frame(vnframe, indent + "    ")
          return frame_string
  
-    def _pprint_example_within_frame(self, vnframe, indent=''):
+    def _pprint_example_within_frame(self, vnframe, indent=""):
          """Returns pretty printed version of example within frame in a VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet frame example.
  
          :param vnframe: An ElementTree containing the xml contents of
              a Verbnet frame.
          """
-        if vnframe['example']:
-            return indent + ' Example: ' + vnframe['example']
+        if vnframe["example"]:
+            return indent + " Example: " + vnframe["example"]
  
-    def _pprint_description_within_frame(self, vnframe, indent=''):
+    def _pprint_description_within_frame(self, vnframe, indent=""):
          """Returns pretty printed version of a VerbNet frame description
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet frame description.
  
          :param vnframe: An ElementTree containing the xml contents of
              a VerbNet frame.
          """
-        description = indent + vnframe['description']['primary']
-        if vnframe['description']['secondary']:
-            description += ' ({})'.format(vnframe['description']['secondary'])
+        description = indent + vnframe["description"]["primary"]
+        if vnframe["description"]["secondary"]:
+            description += " ({})".format(vnframe["description"]["secondary"])
          return description
  
-    def _pprint_syntax_within_frame(self, vnframe, indent=''):
-        """Returns pretty printed version of syntax within a frame in a VerbNet class 
-        
+    def _pprint_syntax_within_frame(self, vnframe, indent=""):
+        """Returns pretty printed version of syntax within a frame in a VerbNet class
+
          Return a string containing a pretty-printed representation of
          the given VerbNet frame syntax.
  
@@ -565,23 +588,27 @@ class VerbnetCorpusReader(XMLCorpusReader):
              a VerbNet frame.
          """
          pieces = []
-        for element in vnframe['syntax']:
-            piece = element['pos_tag']
+        for element in vnframe["syntax"]:
+            piece = element["pos_tag"]
              modifier_list = []
-            if 'value' in element['modifiers'] and element['modifiers']['value']:
-                modifier_list.append(element['modifiers']['value'])
-            modifier_list += ['{}{}'.format(restr['value'], restr['type'])
-                              for restr in (element['modifiers']['selrestrs'] +
-                                            element['modifiers']['synrestrs'])]
+            if "value" in element["modifiers"] and element["modifiers"]["value"]:
+                modifier_list.append(element["modifiers"]["value"])
+            modifier_list += [
+                "{}{}".format(restr["value"], restr["type"])
+                for restr in (
+                    element["modifiers"]["selrestrs"]
+                    + element["modifiers"]["synrestrs"]
+                )
+            ]
              if modifier_list:
-                piece += '[{}]'.format(' '.join(modifier_list))
+                piece += "[{}]".format(" ".join(modifier_list))
              pieces.append(piece)
  
-        return indent + ' '.join(pieces)
+        return indent + " ".join(pieces)
  
-    def _pprint_semantics_within_frame(self, vnframe, indent=''):
+    def _pprint_semantics_within_frame(self, vnframe, indent=""):
          """Returns a pretty printed version of semantics within frame in a VerbNet class
-        
+
          Return a string containing a pretty-printed representation of
          the given VerbNet frame semantics.
  
@@ -589,7 +616,9 @@ class VerbnetCorpusReader(XMLCorpusReader):
              a VerbNet frame.
          """
          pieces = []
-        for predicate in vnframe['semantics']:
-            arguments = [argument['value'] for argument in predicate['arguments']]
-            pieces.append('{}({})'.format(predicate['predicate_value'], ', '.join(arguments)))
-        return '\n'.join('{}* {}'.format(indent, piece) for piece in pieces)
+        for predicate in vnframe["semantics"]:
+            arguments = [argument["value"] for argument in predicate["arguments"]]
+            pieces.append(
+                "{}({})".format(predicate["predicate_value"], ", ".join(arguments))
+            )
+        return "\n".join("{}* {}".format(indent, piece) for piece in pieces)
diff --git a/nlp_resource_data/nltk/corpus/reader/verbnet.pyc b/nlp_resource_data/nltk/corpus/reader/verbnet.pyc

deleted file mode 100755 (executable)

index a5f172c..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/verbnet.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/wordlist.py b/nlp_resource_data/nltk/corpus/reader/wordlist.py

old mode 100755 (executable)

new mode 100644 (file)

index 24e06ae..0d0d214
--- a/nlp_resource_data/nltk/corpus/reader/wordlist.py
+++ b/nlp_resource_data/nltk/corpus/reader/wordlist.py
@@ -1,13 +1,11 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Word List Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from six import string_types
-
  from nltk.tokenize import line_tokenize
  
  from nltk.corpus.reader.util import *
@@ -18,13 +16,19 @@ class WordListCorpusReader(CorpusReader):
      """
      List of words, one per line.  Blank lines are ignored.
      """
-    def words(self, fileids=None, ignore_lines_startswith='\n'):
-        return [line for line in line_tokenize(self.raw(fileids))
-                if not line.startswith(ignore_lines_startswith)]
+
+    def words(self, fileids=None, ignore_lines_startswith="\n"):
+        return [
+            line
+            for line in line_tokenize(self.raw(fileids))
+            if not line.startswith(ignore_lines_startswith)
+        ]
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
  
@@ -46,17 +50,34 @@ class NonbreakingPrefixesCorpusReader(WordListCorpusReader):
      Moses Machine Translation toolkit. These lists are used in the Python port
      of the Moses' word tokenizer.
      """
-    available_langs = {'catalan': 'ca', 'czech': 'cs', 'german': 'de',
-                        'greek': 'el', 'english': 'en', 'spanish': 'es',
-                        'finnish': 'fi',  'french': 'fr', 'hungarian': 'hu',
-                        'icelandic': 'is', 'italian': 'it', 'latvian': 'lv',
-                        'dutch': 'nl', 'polish': 'pl', 'portuguese': 'pt',
-                        'romanian': 'ro', 'russian': 'ru', 'slovak': 'sk',
-                        'slovenian': 'sl', 'swedish': 'sv',  'tamil': 'ta'}
+
+    available_langs = {
+        "catalan": "ca",
+        "czech": "cs",
+        "german": "de",
+        "greek": "el",
+        "english": "en",
+        "spanish": "es",
+        "finnish": "fi",
+        "french": "fr",
+        "hungarian": "hu",
+        "icelandic": "is",
+        "italian": "it",
+        "latvian": "lv",
+        "dutch": "nl",
+        "polish": "pl",
+        "portuguese": "pt",
+        "romanian": "ro",
+        "russian": "ru",
+        "slovak": "sk",
+        "slovenian": "sl",
+        "swedish": "sv",
+        "tamil": "ta",
+    }
      # Also, add the lang IDs as the keys.
-    available_langs.update({v:v for v in available_langs.values()})
+    available_langs.update({v: v for v in available_langs.values()})
  
-    def words(self, lang=None, fileids=None, ignore_lines_startswith='#'):
+    def words(self, lang=None, fileids=None, ignore_lines_startswith="#"):
          """
          This module returns a list of nonbreaking prefixes for the specified
          language(s).
@@ -74,9 +95,13 @@ class NonbreakingPrefixesCorpusReader(WordListCorpusReader):
          # all languages when fileids==None.
          if lang in self.available_langs:
              lang = self.available_langs[lang]
-            fileids = ['nonbreaking_prefix.'+lang]
-        return [line for line in line_tokenize(self.raw(fileids))
-                if not line.startswith(ignore_lines_startswith)]
+            fileids = ["nonbreaking_prefix." + lang]
+        return [
+            line
+            for line in line_tokenize(self.raw(fileids))
+            if not line.startswith(ignore_lines_startswith)
+        ]
+
  
  class UnicharsCorpusReader(WordListCorpusReader):
      """
@@ -85,12 +110,25 @@ class UnicharsCorpusReader(WordListCorpusReader):
      The files in the perluniprop.zip are extracted using the Unicode::Tussle
      module from http://search.cpan.org/~bdfoy/Unicode-Tussle-1.11/lib/Unicode/Tussle.pm
      """
+
      # These are categories similar to the Perl Unicode Properties
-    available_categories = ['Close_Punctuation', 'Currency_Symbol',
-                            'IsAlnum', 'IsAlpha', 'IsLower', 'IsN', 'IsSc',
-                            'IsSo', 'IsUpper', 'Line_Separator', 'Number',
-                            'Open_Punctuation', 'Punctuation', 'Separator',
-                            'Symbol']
+    available_categories = [
+        "Close_Punctuation",
+        "Currency_Symbol",
+        "IsAlnum",
+        "IsAlpha",
+        "IsLower",
+        "IsN",
+        "IsSc",
+        "IsSo",
+        "IsUpper",
+        "Line_Separator",
+        "Number",
+        "Open_Punctuation",
+        "Punctuation",
+        "Separator",
+        "Symbol",
+    ]
  
      def chars(self, category=None, fileids=None):
          """
@@ -108,7 +146,7 @@ class UnicharsCorpusReader(WordListCorpusReader):
          :return: a list of characters given the specific unicode character category
          """
          if category in self.available_categories:
-            fileids = [category+'.txt']
+            fileids = [category + ".txt"]
          return list(self.raw(fileids).strip())
  
  
@@ -126,9 +164,11 @@ class MWAPPDBCorpusReader(WordListCorpusReader):
  
      :return: a list of tuples of similar lexical terms.
      """
-    mwa_ppdb_xxxl_file = 'ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs'
+
+    mwa_ppdb_xxxl_file = "ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs"
+
      def entries(self, fileids=mwa_ppdb_xxxl_file):
          """
          :return: a tuple of synonym word pairs.
          """
-        return [tuple(line.split('\t')) for line in line_tokenize(self.raw(fileids))]
+        return [tuple(line.split("\t")) for line in line_tokenize(self.raw(fileids))]
diff --git a/nlp_resource_data/nltk/corpus/reader/wordlist.pyc b/nlp_resource_data/nltk/corpus/reader/wordlist.pyc

deleted file mode 100755 (executable)

index 1df236f..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/wordlist.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/wordnet.py b/nlp_resource_data/nltk/corpus/reader/wordnet.py

old mode 100755 (executable)

new mode 100644 (file)

index 7063aed..3ced8a4
--- a/nlp_resource_data/nltk/corpus/reader/wordnet.py
+++ b/nlp_resource_data/nltk/corpus/reader/wordnet.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: WordNet
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bethard <Steven.Bethard@colorado.edu>
  #         Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
@@ -29,8 +29,6 @@ http://compling.hss.ntu.edu.sg/omw/
  
  """
  
-from __future__ import print_function, unicode_literals
-
  import math
  import re
  from itertools import islice, chain
@@ -38,13 +36,9 @@ from functools import total_ordering
  from operator import itemgetter
  from collections import defaultdict, deque
  
-from six import iteritems
-from six.moves import range
-
  from nltk.corpus.reader import CorpusReader
  from nltk.util import binary_search_file as _binary_search_file
  from nltk.probability import FreqDist
-from nltk.compat import python_2_unicode_compatible
  from nltk.internals import deprecated
  
  ######################################################################
@@ -68,7 +62,7 @@ from nltk.internals import deprecated
  _INF = 1e300
  
  # { Part-of-speech constants
-ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v'
+ADJ, ADJ_SAT, ADV, NOUN, VERB = "a", "s", "r", "n", "v"
  # }
  
  POS_LIST = [NOUN, VERB, ADJ, ADV]
@@ -110,9 +104,10 @@ VERB_FRAME_STRINGS = (
      "Somebody %s INFINITIVE",
      "Somebody %s VERB-ing",
      "It %s that CLAUSE",
-    "Something %s INFINITIVE")
+    "Something %s INFINITIVE",
+)
  
-SENSENUM_RE = re.compile(r'\.[\d]+\.')
+SENSENUM_RE = re.compile(r"\.[\d]+\.")
  
  
  ######################################################################
@@ -129,67 +124,76 @@ class _WordNetObject(object):
      """A common base class for lemmas and synsets."""
  
      def hypernyms(self):
-        return self._related('@')
+        return self._related("@")
  
      def _hypernyms(self):
-        return self._related('@')
+        return self._related("@")
  
      def instance_hypernyms(self):
-        return self._related('@i')
+        return self._related("@i")
  
      def _instance_hypernyms(self):
-        return self._related('@i')
+        return self._related("@i")
  
      def hyponyms(self):
-        return self._related('~')
+        return self._related("~")
  
      def instance_hyponyms(self):
-        return self._related('~i')
+        return self._related("~i")
  
      def member_holonyms(self):
-        return self._related('#m')
+        return self._related("#m")
  
      def substance_holonyms(self):
-        return self._related('#s')
+        return self._related("#s")
  
      def part_holonyms(self):
-        return self._related('#p')
+        return self._related("#p")
  
      def member_meronyms(self):
-        return self._related('%m')
+        return self._related("%m")
  
      def substance_meronyms(self):
-        return self._related('%s')
+        return self._related("%s")
  
      def part_meronyms(self):
-        return self._related('%p')
+        return self._related("%p")
  
      def topic_domains(self):
-        return self._related(';c')
+        return self._related(";c")
+
+    def in_topic_domains(self):
+        return self._related("-c")
  
      def region_domains(self):
-        return self._related(';r')
+        return self._related(";r")
+
+    def in_region_domains(self):
+        return self._related("-r")
  
      def usage_domains(self):
-        return self._related(';u')
+        return self._related(";u")
+
+    def in_usage_domains(self):
+        return self._related("-u")
  
      def attributes(self):
-        return self._related('=')
+        return self._related("=")
  
      def entailments(self):
-        return self._related('*')
+        return self._related("*")
  
      def causes(self):
-        return self._related('>')
+        return self._related(">")
  
      def also_sees(self):
-        return self._related('^')
+        return self._related("^")
  
      def verb_groups(self):
-        return self._related('$')
+        return self._related("$")
  
      def similar_tos(self):
-        return self._related('&')
+        return self._related("&")
  
      def __hash__(self):
          return hash(self._name)
@@ -204,7 +208,6 @@ class _WordNetObject(object):
          return self._name < other._name
  
  
-@python_2_unicode_compatible
  class Lemma(_WordNetObject):
      """
      The lexical entry for a single morphological form of a
@@ -220,13 +223,13 @@ class Lemma(_WordNetObject):
      'salt.n.03' has the Lemmas 'salt.n.03.salt', 'salt.n.03.saltiness' and
      'salt.n.03.salinity'.
  
-    Lemma attributes, accessible via methods with the same name::
+    Lemma attributes, accessible via methods with the same name:
  
      - name: The canonical name of this lemma.
      - synset: The synset that this lemma belongs to.
      - syntactic_marker: For adjectives, the WordNet string identifying the
        syntactic position relative modified noun. See:
-      http://wordnet.princeton.edu/man/wninput.5WN.html#sect10
+      https://wordnet.princeton.edu/documentation/wninput5wn
        For all other parts of speech, this attribute is None.
      - count: The frequency of this lemma in wordnet.
  
@@ -234,7 +237,7 @@ class Lemma(_WordNetObject):
  
      Lemmas have the following methods for retrieving related Lemmas. They
      correspond to the names for the pointer symbols defined here:
-    http://wordnet.princeton.edu/man/wninput.5WN.html#sect3
+    https://wordnet.princeton.edu/documentation/wninput5wn
      These methods all return lists of Lemmas:
  
      - antonyms
@@ -253,12 +256,28 @@ class Lemma(_WordNetObject):
      - pertainyms
      """
  
-    __slots__ = ['_wordnet_corpus_reader', '_name', '_syntactic_marker',
-                 '_synset', '_frame_strings', '_frame_ids',
-                 '_lexname_index', '_lex_id', '_lang', '_key']
-
-    def __init__(self, wordnet_corpus_reader, synset, name,
-                 lexname_index, lex_id, syntactic_marker):
+    __slots__ = [
+        "_wordnet_corpus_reader",
+        "_name",
+        "_syntactic_marker",
+        "_synset",
+        "_frame_strings",
+        "_frame_ids",
+        "_lexname_index",
+        "_lex_id",
+        "_lang",
+        "_key",
+    ]
+
+    def __init__(
+        self,
+        wordnet_corpus_reader,
+        synset,
+        name,
+        lexname_index,
+        lex_id,
+        syntactic_marker,
+    ):
          self._wordnet_corpus_reader = wordnet_corpus_reader
          self._name = name
          self._syntactic_marker = syntactic_marker
@@ -267,7 +286,7 @@ class Lemma(_WordNetObject):
          self._frame_ids = []
          self._lexname_index = lexname_index
          self._lex_id = lex_id
-        self._lang = 'eng'
+        self._lang = "eng"
  
          self._key = None  # gets set later.
  
@@ -298,27 +317,29 @@ class Lemma(_WordNetObject):
  
      def _related(self, relation_symbol):
          get_synset = self._wordnet_corpus_reader.synset_from_pos_and_offset
-        return sorted([
+        if (self._name, relation_symbol) not in self._synset._lemma_pointers:
+            return []
+        return [
              get_synset(pos, offset)._lemmas[lemma_index]
-            for pos, offset, lemma_index
-            in self._synset._lemma_pointers[self._name, relation_symbol]
-        ])
+            for pos, offset, lemma_index in self._synset._lemma_pointers[
+                self._name, relation_symbol
+            ]
+        ]
  
      def count(self):
          """Return the frequency count for this Lemma"""
          return self._wordnet_corpus_reader.lemma_count(self)
  
      def antonyms(self):
-        return self._related('!')
+        return self._related("!")
  
      def derivationally_related_forms(self):
-        return self._related('+')
+        return self._related("+")
  
      def pertainyms(self):
-        return self._related('\\')
+        return self._related("\\")
  
  
-@python_2_unicode_compatible
  class Synset(_WordNetObject):
      """Create a Synset from a "<lemma>.<pos>.<number>" string where:
      <lemma> is the word's morphological stem
@@ -343,7 +364,7 @@ class Synset(_WordNetObject):
  
      Synsets have the following methods for retrieving related Synsets.
      They correspond to the names for the pointer symbols defined here:
-    http://wordnet.princeton.edu/man/wninput.5WN.html#sect3
+    https://wordnet.princeton.edu/documentation/wninput5wn
      These methods all return lists of Synsets.
  
      - hypernyms, instance_hypernyms
@@ -372,11 +393,21 @@ class Synset(_WordNetObject):
      - pertainyms
      """
  
-    __slots__ = ['_pos', '_offset', '_name', '_frame_ids',
-                 '_lemmas', '_lemma_names',
-                 '_definition', '_examples', '_lexname',
-                 '_pointers', '_lemma_pointers', '_max_depth',
-                 '_min_depth']
+    __slots__ = [
+        "_pos",
+        "_offset",
+        "_name",
+        "_frame_ids",
+        "_lemmas",
+        "_lemma_names",
+        "_definition",
+        "_examples",
+        "_lexname",
+        "_pointers",
+        "_lemma_pointers",
+        "_max_depth",
+        "_min_depth",
+    ]
  
      def __init__(self, wordnet_corpus_reader):
          self._wordnet_corpus_reader = wordnet_corpus_reader
@@ -395,7 +426,7 @@ class Synset(_WordNetObject):
          self._all_hypernyms = None
  
          self._pointers = defaultdict(set)
-        self._lemma_pointers = defaultdict(set)
+        self._lemma_pointers = defaultdict(list)
  
      def pos(self):
          return self._pos
@@ -420,29 +451,29 @@ class Synset(_WordNetObject):
  
      def _needs_root(self):
          if self._pos == NOUN:
-            if self._wordnet_corpus_reader.get_version() == '1.6':
+            if self._wordnet_corpus_reader.get_version() == "1.6":
                  return True
              else:
                  return False
          elif self._pos == VERB:
              return True
  
-    def lemma_names(self, lang='eng'):
-        '''Return all the lemma_names associated with the synset'''
-        if lang == 'eng':
+    def lemma_names(self, lang="eng"):
+        """Return all the lemma_names associated with the synset"""
+        if lang == "eng":
              return self._lemma_names
          else:
              self._wordnet_corpus_reader._load_lang_data(lang)
  
-            i = self._wordnet_corpus_reader.ss2of(self)
+            i = self._wordnet_corpus_reader.ss2of(self, lang)
              if i in self._wordnet_corpus_reader._lang_data[lang][0]:
                  return self._wordnet_corpus_reader._lang_data[lang][0][i]
              else:
                  return []
  
-    def lemmas(self, lang='eng'):
-        '''Return all the lemma objects associated with the synset'''
-        if lang == 'eng':
+    def lemmas(self, lang="eng"):
+        """Return all the lemma objects associated with the synset"""
+        if lang == "eng":
              return self._lemmas
          else:
              self._wordnet_corpus_reader._load_lang_data(lang)
@@ -453,11 +484,9 @@ class Synset(_WordNetObject):
                      self._wordnet_corpus_reader,
                      self,
                      lem,
-                    self._wordnet_corpus_reader._lexnames.index(
-                        self.lexname()
-                    ),
+                    self._wordnet_corpus_reader._lexnames.index(self.lexname()),
                      0,
-                    None
+                    None,
                  )
                  temp._lang = lang
                  lemmark.append(temp)
@@ -473,22 +502,23 @@ class Synset(_WordNetObject):
              next_synset = todo.pop()
              if next_synset not in seen:
                  seen.add(next_synset)
-                next_hypernyms = next_synset.hypernyms() + \
-                    next_synset.instance_hypernyms()
+                next_hypernyms = (
+                    next_synset.hypernyms() + next_synset.instance_hypernyms()
+                )
                  if not next_hypernyms:
                      result.append(next_synset)
                  else:
                      todo.extend(next_hypernyms)
          return result
  
-# Simpler implementation which makes incorrect assumption that
-# hypernym hierarchy is acyclic:
-#
-#        if not self.hypernyms():
-#            return [self]
-#        else:
-#            return list(set(root for h in self.hypernyms()
-#                            for root in h.root_hypernyms()))
+    # Simpler implementation which makes incorrect assumption that
+    # hypernym hierarchy is acyclic:
+    #
+    #        if not self.hypernyms():
+    #            return [self]
+    #        else:
+    #            return list(set(root for h in self.hypernyms()
+    #                            for root in h.root_hypernyms()))
      def max_depth(self):
          """
          :return: The length of the longest hypernym path from this
@@ -535,6 +565,7 @@ class Synset(_WordNetObject):
  
          """
          from nltk.util import breadth_first
+
          synset_offsets = []
          for synset in breadth_first(self, rel, depth):
              if synset._offset != self._offset:
@@ -585,9 +616,7 @@ class Synset(_WordNetObject):
              )
          return list(self._all_hypernyms.intersection(other._all_hypernyms))
  
-    def lowest_common_hypernyms(
-        self, other, simulate_root=False, use_min_depth=False
-    ):
+    def lowest_common_hypernyms(self, other, simulate_root=False, use_min_depth=False):
          """
          Get a list of lowest synset(s) that both synsets have as a hypernym.
          When `use_min_depth == False` this means that the synset which appears
@@ -630,7 +659,7 @@ class Synset(_WordNetObject):
          synsets = self.common_hypernyms(other)
          if simulate_root:
              fake_synset = Synset(None)
-            fake_synset._name = '*ROOT*'
+            fake_synset._name = "*ROOT*"
              fake_synset.hypernyms = lambda: []
              fake_synset.instance_hypernyms = lambda: []
              synsets.append(fake_synset)
@@ -638,14 +667,10 @@ class Synset(_WordNetObject):
          try:
              if use_min_depth:
                  max_depth = max(s.min_depth() for s in synsets)
-                unsorted_lch = [
-                    s for s in synsets if s.min_depth() == max_depth
-                ]
+                unsorted_lch = [s for s in synsets if s.min_depth() == max_depth]
              else:
                  max_depth = max(s.max_depth() for s in synsets)
-                unsorted_lch = [
-                    s for s in synsets if s.max_depth() == max_depth
-                ]
+                unsorted_lch = [s for s in synsets if s.max_depth() == max_depth]
              return sorted(unsorted_lch)
          except ValueError:
              return []
@@ -664,19 +689,16 @@ class Synset(_WordNetObject):
          """
          distances = set([(self, distance)])
          for hypernym in self._hypernyms() + self._instance_hypernyms():
-            distances |= hypernym.hypernym_distances(
-                distance+1,
-                simulate_root=False
-            )
+            distances |= hypernym.hypernym_distances(distance + 1, simulate_root=False)
          if simulate_root:
              fake_synset = Synset(None)
-            fake_synset._name = '*ROOT*'
+            fake_synset._name = "*ROOT*"
              fake_synset_distance = max(distances, key=itemgetter(1))[1]
-            distances.add((fake_synset, fake_synset_distance+1))
+            distances.add((fake_synset, fake_synset_distance + 1))
          return distances
  
      def _shortest_hypernym_paths(self, simulate_root):
-        if self._name == '*ROOT*':
+        if self._name == "*ROOT*":
              return {self: 0}
  
          queue = deque([(self, 0)])
@@ -694,7 +716,7 @@ class Synset(_WordNetObject):
  
          if simulate_root:
              fake_synset = Synset(None)
-            fake_synset._name = '*ROOT*'
+            fake_synset._name = "*ROOT*"
              path[fake_synset] = max(path.values()) + 1
  
          return path
@@ -723,9 +745,9 @@ class Synset(_WordNetObject):
          # For each ancestor synset common to both subject synsets, find the
          # connecting path length. Return the shortest of these.
  
-        inf = float('inf')
+        inf = float("inf")
          path_distance = inf
-        for synset, d1 in iteritems(dist_dict1):
+        for synset, d1 in dist_dict1.items():
              d2 = dist_dict2.get(synset, inf)
              path_distance = min(path_distance, d1 + d2)
  
@@ -763,7 +785,7 @@ class Synset(_WordNetObject):
  
          tree = [self]
          if depth != 0:
-            tree += [x.tree(rel, depth-1, cut_mark) for x in rel(self)]
+            tree += [x.tree(rel, depth - 1, cut_mark) for x in rel(self)]
          elif cut_mark:
              tree += [cut_mark]
          return tree
@@ -797,8 +819,7 @@ class Synset(_WordNetObject):
          """
  
          distance = self.shortest_path_distance(
-            other,
-            simulate_root=simulate_root and self._needs_root()
+            other, simulate_root=simulate_root and self._needs_root()
          )
          if distance is None or distance < 0:
              return None
@@ -833,23 +854,19 @@ class Synset(_WordNetObject):
  
          if self._pos != other._pos:
              raise WordNetError(
-                'Computing the lch similarity requires '
-                '%s and %s to have the same part of speech.' %
-                (self, other)
+                "Computing the lch similarity requires "
+                "%s and %s to have the same part of speech." % (self, other)
              )
  
          need_root = self._needs_root()
  
          if self._pos not in self._wordnet_corpus_reader._max_depth:
-            self._wordnet_corpus_reader._compute_max_depth(
-                self._pos, need_root
-            )
+            self._wordnet_corpus_reader._compute_max_depth(self._pos, need_root)
  
          depth = self._wordnet_corpus_reader._max_depth[self._pos]
  
          distance = self.shortest_path_distance(
-            other,
-            simulate_root=simulate_root and need_root
+            other, simulate_root=simulate_root and need_root
          )
  
          if distance is None or distance < 0 or depth == 0:
@@ -897,8 +914,7 @@ class Synset(_WordNetObject):
          # It is possible that more accurate results could be obtained by
          # removing this setting and it should be tested later on
          subsumers = self.lowest_common_hypernyms(
-            other,
-            simulate_root=simulate_root and need_root, use_min_depth=True
+            other, simulate_root=simulate_root and need_root, use_min_depth=True
          )
  
          # If no LCS was found return None
@@ -923,12 +939,10 @@ class Synset(_WordNetObject):
          # subsuming.  Add this to the LCS path length to get the path
          # length from each synset to the root.
          len1 = self.shortest_path_distance(
-            subsumer,
-            simulate_root=simulate_root and need_root
+            subsumer, simulate_root=simulate_root and need_root
          )
          len2 = other.shortest_path_distance(
-            subsumer,
-            simulate_root=simulate_root and need_root
+            subsumer, simulate_root=simulate_root and need_root
          )
          if len1 is None or len2 is None:
              return None
@@ -1021,18 +1035,20 @@ class Synset(_WordNetObject):
              for synset in todo:
                  seen.add(synset)
              yield todo
-            todo = [hypernym
-                    for synset in todo
-                    for hypernym in (
-                        synset.hypernyms() + synset.instance_hypernyms()
-                    )
-                    if hypernym not in seen]
+            todo = [
+                hypernym
+                for synset in todo
+                for hypernym in (synset.hypernyms() + synset.instance_hypernyms())
+                if hypernym not in seen
+            ]
  
      def __repr__(self):
          return "%s('%s')" % (type(self).__name__, self._name)
  
      def _related(self, relation_symbol, sort=True):
          get_synset = self._wordnet_corpus_reader.synset_from_pos_and_offset
+        if relation_symbol not in self._pointers:
+            return []
          pointer_tuples = self._pointers[relation_symbol]
          r = [get_synset(pos, offset) for pos, offset in pointer_tuples]
          if sort:
@@ -1044,19 +1060,20 @@ class Synset(_WordNetObject):
  # WordNet Corpus Reader
  ######################################################################
  
+
  class WordNetCorpusReader(CorpusReader):
      """
      A corpus reader used to access wordnet or its variants.
      """
  
-    _ENCODING = 'utf8'
+    _ENCODING = "utf8"
  
      # { Part-of-speech constants
-    ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v'
+    ADJ, ADJ_SAT, ADV, NOUN, VERB = "a", "s", "r", "n", "v"
      # }
  
      # { Filename constants
-    _FILEMAP = {ADJ: 'adj', ADV: 'adv', NOUN: 'noun', VERB: 'verb'}
+    _FILEMAP = {ADJ: "adj", ADV: "adv", NOUN: "noun", VERB: "verb"}
      # }
  
      # { Part of speech constants
@@ -1066,18 +1083,32 @@ class WordNetCorpusReader(CorpusReader):
  
      #: A list of file identifiers for all the fileids used by this
      #: corpus reader.
-    _FILES = ('cntlist.rev', 'lexnames', 'index.sense',
-              'index.adj', 'index.adv', 'index.noun', 'index.verb',
-              'data.adj', 'data.adv', 'data.noun', 'data.verb',
-              'adj.exc', 'adv.exc', 'noun.exc', 'verb.exc', )
+    _FILES = (
+        "cntlist.rev",
+        "lexnames",
+        "index.sense",
+        "index.adj",
+        "index.adv",
+        "index.noun",
+        "index.verb",
+        "data.adj",
+        "data.adv",
+        "data.noun",
+        "data.verb",
+        "adj.exc",
+        "adv.exc",
+        "noun.exc",
+        "verb.exc",
+    )
  
      def __init__(self, root, omw_reader):
          """
          Construct a new wordnet corpus reader, with the given root
          directory.
          """
-        super(WordNetCorpusReader, self).__init__(root, self._FILES,
-                                                  encoding=self._ENCODING)
+        super(WordNetCorpusReader, self).__init__(
+            root, self._FILES, encoding=self._ENCODING
+        )
  
          # A index that provides the file offset
          # Map from lemma -> pos -> synset_index -> offset
@@ -1104,7 +1135,7 @@ class WordNetCorpusReader(CorpusReader):
          self._key_synset_file = None
  
          # Load the lexnames
-        for i, line in enumerate(self.open('lexnames')):
+        for i, line in enumerate(self.open("lexnames")):
              index, lexname, _ = line.split()
              assert int(index) == i
              self._lexnames.append(lexname)
@@ -1115,20 +1146,24 @@ class WordNetCorpusReader(CorpusReader):
          # load the exception file data into memory
          self._load_exception_map()
  
-# Open Multilingual WordNet functions, contributed by
-# Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn
+    # Open Multilingual WordNet functions, contributed by
+    # Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn
  
      def of2ss(self, of):
-        ''' take an id and return the synsets '''
+        """ take an id and return the synsets """
          return self.synset_from_pos_and_offset(of[-1], int(of[:8]))
  
-    def ss2of(self, ss):
-        ''' return the ID of the synset '''
-        return ("{:08d}-{}".format(ss.offset(), ss.pos()))
+    def ss2of(self, ss, lang=None):
+        """ return the ID of the synset """
+        pos = ss.pos()
+        # Only these 3 WordNets retain the satellite pos tag
+        if lang not in ["nld", "lit", "slk"] and pos == "s":
+            pos = "a"
+        return "{:08d}-{}".format(ss.offset(), pos)
  
      def _load_lang_data(self, lang):
-        ''' load the wordnet data of the requested language from the file to
-        the cache, _lang_data '''
+        """ load the wordnet data of the requested language from the file to
+        the cache, _lang_data """
  
          if lang in self._lang_data.keys():
              return
@@ -1136,19 +1171,20 @@ class WordNetCorpusReader(CorpusReader):
          if lang not in self.langs():
              raise WordNetError("Language is not supported.")
  
-        f = self._omw_reader.open('{0:}/wn-data-{0:}.tab'.format(lang))
+        f = self._omw_reader.open("{0:}/wn-data-{0:}.tab".format(lang))
          self.custom_lemmas(f, lang)
          f.close()
  
      def langs(self):
-        ''' return a list of languages supported by Multilingual Wordnet '''
+        """ return a list of languages supported by Multilingual Wordnet """
          import os
-        langs = ['eng']
+
+        langs = ["eng"]
          fileids = self._omw_reader.fileids()
          for fileid in fileids:
              file_name, file_extension = os.path.splitext(fileid)
-            if file_extension == '.tab':
-                langs.append(file_name.split('-')[-1])
+            if file_extension == ".tab":
+                langs.append(file_name.split("-")[-1])
  
          return langs
  
@@ -1156,13 +1192,14 @@ class WordNetCorpusReader(CorpusReader):
          for suffix in self._FILEMAP.values():
  
              # parse each line of the file (ignoring comment lines)
-            for i, line in enumerate(self.open('index.%s' % suffix)):
-                if line.startswith(' '):
+            for i, line in enumerate(self.open("index.%s" % suffix)):
+                if line.startswith(" "):
                      continue
  
                  _iter = iter(line.split())
  
-                def _next_token(): return next(_iter)
+                def _next_token():
+                    return next(_iter)
  
                  try:
  
@@ -1188,14 +1225,12 @@ class WordNetCorpusReader(CorpusReader):
                      _next_token()
  
                      # get synset offsets
-                    synset_offsets = [
-                        int(_next_token()) for _ in range(n_synsets)
-                    ]
+                    synset_offsets = [int(_next_token()) for _ in range(n_synsets)]
  
                  # raise more informative error with file name and line number
                  except (AssertionError, ValueError) as e:
-                    tup = ('index.%s' % suffix), (i + 1), e
-                    raise WordNetError('file %s, line %i: %s' % tup)
+                    tup = ("index.%s" % suffix), (i + 1), e
+                    raise WordNetError("file %s, line %i: %s" % tup)
  
                  # map lemmas and parts of speech to synsets
                  self._lemma_pos_offset_map[lemma][pos] = synset_offsets
@@ -1206,7 +1241,7 @@ class WordNetCorpusReader(CorpusReader):
          # load the exception file data into memory
          for pos, suffix in self._FILEMAP.items():
              self._exception_map[pos] = {}
-            for line in self.open('%s.exc' % suffix):
+            for line in self.open("%s.exc" % suffix):
                  terms = line.split()
                  self._exception_map[pos][terms[0]] = terms[1:]
          self._exception_map[ADJ_SAT] = self._exception_map[ADJ]
@@ -1229,7 +1264,7 @@ class WordNetCorpusReader(CorpusReader):
      def get_version(self):
          fh = self._data_file(ADJ)
          for line in fh:
-            match = re.search(r'WordNet (\d+\.\d+) Copyright', line)
+            match = re.search(r"WordNet (\d+\.\d+) Copyright", line)
              if match is not None:
                  version = match.group(1)
                  fh.seek(0)
@@ -1239,35 +1274,31 @@ class WordNetCorpusReader(CorpusReader):
      # Loading Lemmas
      #############################################################
  
-    def lemma(self, name, lang='eng'):
-        '''Return lemma object that matches the name'''
+    def lemma(self, name, lang="eng"):
+        """Return lemma object that matches the name"""
          # cannot simply split on first '.',
          # e.g.: '.45_caliber.a.01..45_caliber'
-        separator = SENSENUM_RE.search(name).start()
+        separator = SENSENUM_RE.search(name).end()
+
+        synset_name, lemma_name = name[: separator - 1], name[separator:]
  
-        leadingZero = int(name[separator+1]) == 0
-        if (leadingZero):
-            synset_name, lemma_name = name[:separator+3], name[separator+4:]
-        else:
-            synset_name, lemma_name = name[:separator+2], name[separator+3:]
-        
          synset = self.synset(synset_name)
          for lemma in synset.lemmas(lang):
              if lemma._name == lemma_name:
                  return lemma
-        raise WordNetError('no lemma %r in %r' % (lemma_name, synset_name))
+        raise WordNetError("no lemma %r in %r" % (lemma_name, synset_name))
  
      def lemma_from_key(self, key):
          # Keys are case sensitive and always lower-case
          key = key.lower()
  
-        lemma_name, lex_sense = key.split('%')
-        pos_number, lexname_index, lex_id, _, _ = lex_sense.split(':')
+        lemma_name, lex_sense = key.split("%")
+        pos_number, lexname_index, lex_id, _, _ = lex_sense.split(":")
          pos = self._pos_names[int(pos_number)]
  
          # open the key -> synset file if necessary
          if self._key_synset_file is None:
-            self._key_synset_file = self.open('index.sense')
+            self._key_synset_file = self.open("index.sense")
  
          # Find the synset for the lemma.
          synset_line = _binary_search_file(self._key_synset_file, key)
@@ -1287,14 +1318,14 @@ class WordNetCorpusReader(CorpusReader):
      #############################################################
      def synset(self, name):
          # split name into lemma, part of speech and synset number
-        lemma, pos, synset_index_str = name.lower().rsplit('.', 2)
+        lemma, pos, synset_index_str = name.lower().rsplit(".", 2)
          synset_index = int(synset_index_str) - 1
  
          # get the offset for this synset
          try:
              offset = self._lemma_pos_offset_map[lemma][pos][synset_index]
          except KeyError:
-            message = 'no lemma %r with part of speech %r'
+            message = "no lemma %r with part of speech %r"
              raise WordNetError(message % (lemma, pos))
          except IndexError:
              n_senses = len(self._lemma_pos_offset_map[lemma][pos])
@@ -1309,11 +1340,13 @@ class WordNetCorpusReader(CorpusReader):
          synset = self.synset_from_pos_and_offset(pos, offset)
  
          # some basic sanity checks on loaded attributes
-        if pos == 's' and synset._pos == 'a':
-            message = ('adjective satellite requested but only plain '
-                       'adjective found for lemma %r')
+        if pos == "s" and synset._pos == "a":
+            message = (
+                "adjective satellite requested but only plain "
+                "adjective found for lemma %r"
+            )
              raise WordNetError(message % lemma)
-        assert synset._pos == pos or (pos == 'a' and synset._pos == 's')
+        assert synset._pos == pos or (pos == "a" and synset._pos == "s")
  
          # Return the synset object.
          return synset
@@ -1326,7 +1359,7 @@ class WordNetCorpusReader(CorpusReader):
          if pos == ADJ_SAT:
              pos = ADJ
          if self._data_file_map.get(pos) is None:
-            fileid = 'data.%s' % self._FILEMAP[pos]
+            fileid = "data.%s" % self._FILEMAP[pos]
              self._data_file_map[pos] = self.open(fileid)
          return self._data_file_map[pos]
  
@@ -1343,7 +1376,7 @@ class WordNetCorpusReader(CorpusReader):
          self._synset_offset_cache[pos][offset] = synset
          return synset
  
-    @deprecated('Use public method synset_from_pos_and_offset() instead')
+    @deprecated("Use public method synset_from_pos_and_offset() instead")
      def _synset_from_pos_and_offset(self, *args, **kwargs):
          """
          Hack to help people like the readers of
@@ -1360,21 +1393,19 @@ class WordNetCorpusReader(CorpusReader):
          try:
  
              # parse out the definitions and examples from the gloss
-            columns_str, gloss = data_file_line.split('|')
-            gloss = gloss.strip()
-            definitions = []
-            for gloss_part in gloss.split(';'):
-                gloss_part = gloss_part.strip()
-                if gloss_part.startswith('"'):
-                    synset._examples.append(gloss_part.strip('"'))
-                else:
-                    definitions.append(gloss_part)
-            synset._definition = '; '.join(definitions)
+            columns_str, gloss = data_file_line.strip().split("|")
+            definition = re.sub(r"[\"].*?[\"]", "", gloss).strip()
+            examples = re.findall(r'"([^"]*)"', gloss)
+            for example in examples:
+                synset._examples.append(example)
+
+            synset._definition = definition.strip("; ")
  
              # split the other info into fields
              _iter = iter(columns_str.split())
  
-            def _next_token(): return next(_iter)
+            def _next_token():
+                return next(_iter)
  
              # get the offset
              synset._offset = int(_next_token())
@@ -1394,11 +1425,10 @@ class WordNetCorpusReader(CorpusReader):
                  # get the lex_id (used for sense_keys)
                  lex_id = int(_next_token(), 16)
                  # If the lemma has a syntactic marker, extract it.
-                m = re.match(r'(.*?)(\(.*\))?$', lemma_name)
+                m = re.match(r"(.*?)(\(.*\))?$", lemma_name)
                  lemma_name, syn_mark = m.groups()
                  # create the lemma object
-                lemma = Lemma(self, synset, lemma_name, lexname_index,
-                              lex_id, syn_mark)
+                lemma = Lemma(self, synset, lemma_name, lexname_index, lex_id, syn_mark)
                  synset._lemmas.append(lemma)
                  synset._lemma_names.append(lemma._name)
  
@@ -1409,7 +1439,7 @@ class WordNetCorpusReader(CorpusReader):
                  offset = int(_next_token())
                  pos = _next_token()
                  lemma_ids_str = _next_token()
-                if lemma_ids_str == '0000':
+                if lemma_ids_str == "0000":
                      synset._pointers[symbol].add((pos, offset))
                  else:
                      source_index = int(lemma_ids_str[:2], 16) - 1
@@ -1417,7 +1447,7 @@ class WordNetCorpusReader(CorpusReader):
                      source_lemma_name = synset._lemmas[source_index]._name
                      lemma_pointers = synset._lemma_pointers
                      tups = lemma_pointers[source_lemma_name, symbol]
-                    tups.add((pos, offset, target_index))
+                    tups.append((pos, offset, target_index))
  
              # read the verb frames
              try:
@@ -1428,7 +1458,7 @@ class WordNetCorpusReader(CorpusReader):
                  for _ in range(frame_count):
                      # read the plus sign
                      plus = _next_token()
-                    assert plus == '+'
+                    assert plus == "+"
                      # read the frame and lemma number
                      frame_number = int(_next_token())
                      frame_string_fmt = VERB_FRAME_STRINGS[frame_number]
@@ -1438,20 +1468,16 @@ class WordNetCorpusReader(CorpusReader):
                          synset._frame_ids.append(frame_number)
                          for lemma in synset._lemmas:
                              lemma._frame_ids.append(frame_number)
-                            lemma._frame_strings.append(
-                                frame_string_fmt % lemma._name
-                            )
+                            lemma._frame_strings.append(frame_string_fmt % lemma._name)
                      # only a specific word in the synset
                      else:
                          lemma = synset._lemmas[lemma_number - 1]
                          lemma._frame_ids.append(frame_number)
-                        lemma._frame_strings.append(
-                            frame_string_fmt % lemma._name
-                        )
+                        lemma._frame_strings.append(frame_string_fmt % lemma._name)
  
          # raise a more informative error with line text
          except ValueError as e:
-            raise WordNetError('line %r: %s' % (data_file_line, e))
+            raise WordNetError("line %r: %s" % (data_file_line, e))
  
          # set sense keys for Lemma objects - note that this has to be
          # done afterwards so that the relations are available
@@ -1459,27 +1485,79 @@ class WordNetCorpusReader(CorpusReader):
              if synset._pos == ADJ_SAT:
                  head_lemma = synset.similar_tos()[0]._lemmas[0]
                  head_name = head_lemma._name
-                head_id = '%02d' % head_lemma._lex_id
+                head_id = "%02d" % head_lemma._lex_id
              else:
-                head_name = head_id = ''
-            tup = (lemma._name, WordNetCorpusReader._pos_numbers[synset._pos],
-                   lemma._lexname_index, lemma._lex_id, head_name, head_id)
-            lemma._key = ('%s%%%d:%02d:%02d:%s:%s' % tup).lower()
+                head_name = head_id = ""
+            tup = (
+                lemma._name,
+                WordNetCorpusReader._pos_numbers[synset._pos],
+                lemma._lexname_index,
+                lemma._lex_id,
+                head_name,
+                head_id,
+            )
+            lemma._key = ("%s%%%d:%02d:%02d:%s:%s" % tup).lower()
  
          # the canonical name is based on the first lemma
          lemma_name = synset._lemmas[0]._name.lower()
          offsets = self._lemma_pos_offset_map[lemma_name][synset._pos]
          sense_index = offsets.index(synset._offset)
          tup = lemma_name, synset._pos, sense_index + 1
-        synset._name = '%s.%s.%02i' % tup
+        synset._name = "%s.%s.%02i" % tup
  
          return synset
  
+    def synset_from_sense_key(self, sense_key):
+        """
+        Retrieves synset based on a given sense_key. Sense keys can be
+        obtained from lemma.key()
+
+        From https://wordnet.princeton.edu/documentation/senseidx5wn:
+        A sense_key is represented as:
+            lemma % lex_sense (e.g. 'dog%1:18:01::')
+        where lex_sense is encoded as:
+            ss_type:lex_filenum:lex_id:head_word:head_id
+
+        lemma:       ASCII text of word/collocation, in lower case
+        ss_type:     synset type for the sense (1 digit int)
+                     The synset type is encoded as follows:
+                     1    NOUN
+                     2    VERB
+                     3    ADJECTIVE
+                     4    ADVERB
+                     5    ADJECTIVE SATELLITE
+        lex_filenum: name of lexicographer file containing the synset for the sense (2 digit int)
+        lex_id:      when paired with lemma, uniquely identifies a sense in the lexicographer file (2 digit int)
+        head_word:   lemma of the first word in satellite's head synset
+                     Only used if sense is in an adjective satellite synset
+        head_id:     uniquely identifies sense in a lexicographer file when paired with head_word
+                     Only used if head_word is present (2 digit int)
+        """
+        sense_key_regex = re.compile(r"(.*)\%(.*):(.*):(.*):(.*):(.*)")
+        synset_types = {1: NOUN, 2: VERB, 3: ADJ, 4: ADV, 5: ADJ_SAT}
+        lemma, ss_type, _, lex_id, _, _ = sense_key_regex.match(sense_key).groups()
+
+        # check that information extracted from sense_key is valid
+        error = None
+        if not lemma:
+            error = "lemma"
+        elif int(ss_type) not in synset_types:
+            error = "ss_type"
+        elif int(lex_id) < 0 or int(lex_id) > 99:
+            error = "lex_id"
+        if error:
+            raise WordNetError(
+                "valid {} could not be extracted from the sense key".format(error)
+            )
+
+        synset_id = ".".join([lemma, synset_types[int(ss_type)], lex_id])
+        return self.synset(synset_id)
+
      #############################################################
      # Retrieve synsets and lemmas.
      #############################################################
  
-    def synsets(self, lemma, pos=None, lang='eng', check_exceptions=True):
+    def synsets(self, lemma, pos=None, lang="eng", check_exceptions=True):
          """Load all synsets with a given lemma and part of speech tag.
          If no pos is specified, all synsets for all parts of speech
          will be loaded.
@@ -1488,36 +1566,41 @@ class WordNetCorpusReader(CorpusReader):
          """
          lemma = lemma.lower()
  
-        if lang == 'eng':
+        if lang == "eng":
              get_synset = self.synset_from_pos_and_offset
              index = self._lemma_pos_offset_map
              if pos is None:
                  pos = POS_LIST
-            return [get_synset(p, offset)
-                    for p in pos
-                    for form in self._morphy(lemma, p, check_exceptions)
-                    for offset in index[form].get(p, [])]
+            return [
+                get_synset(p, offset)
+                for p in pos
+                for form in self._morphy(lemma, p, check_exceptions)
+                for offset in index[form].get(p, [])
+            ]
  
          else:
              self._load_lang_data(lang)
              synset_list = []
-            for l in self._lang_data[lang][1][lemma]:
-                if pos is not None and l[-1] != pos:
-                    continue
-                synset_list.append(self.of2ss(l))
+            if lemma in self._lang_data[lang][1]:
+                for l in self._lang_data[lang][1][lemma]:
+                    if pos is not None and l[-1] != pos:
+                        continue
+                    synset_list.append(self.of2ss(l))
              return synset_list
  
-    def lemmas(self, lemma, pos=None, lang='eng'):
+    def lemmas(self, lemma, pos=None, lang="eng"):
          """Return all Lemma objects with a name matching the specified lemma
          name and part of speech tag. Matches any part of speech tag if none is
          specified."""
  
          lemma = lemma.lower()
-        if lang == 'eng':
-            return [lemma_obj
-                    for synset in self.synsets(lemma, pos)
-                    for lemma_obj in synset.lemmas()
-                    if lemma_obj.name().lower() == lemma]
+        if lang == "eng":
+            return [
+                lemma_obj
+                for synset in self.synsets(lemma, pos)
+                for lemma_obj in synset.lemmas()
+                if lemma_obj.name().lower() == lemma
+            ]
  
          else:
              self._load_lang_data(lang)
@@ -1531,18 +1614,19 @@ class WordNetCorpusReader(CorpusReader):
                          lemmas.append(lemma_obj)
              return lemmas
  
-    def all_lemma_names(self, pos=None, lang='eng'):
+    def all_lemma_names(self, pos=None, lang="eng"):
          """Return all lemma names for all synsets for the given
          part of speech tag and language or languages. If pos is
          not specified, all synsets for all parts of speech will
          be used."""
  
-        if lang == 'eng':
+        if lang == "eng":
              if pos is None:
                  return iter(self._lemma_pos_offset_map)
              else:
                  return (
-                    lemma for lemma in self._lemma_pos_offset_map
+                    lemma
+                    for lemma in self._lemma_pos_offset_map
                      if pos in self._lemma_pos_offset_map[lemma]
                  )
          else:
@@ -1553,7 +1637,7 @@ class WordNetCorpusReader(CorpusReader):
                      continue
                  lemma.extend(self._lang_data[lang][0][i])
  
-            lemma = list(set(lemma))
+            lemma = iter(set(lemma))
              return lemma
  
      def all_synsets(self, pos=None):
@@ -1577,7 +1661,7 @@ class WordNetCorpusReader(CorpusReader):
              # be moved while we're not looking.
              if pos_tag == ADJ_SAT:
                  pos_tag = ADJ
-            fileid = 'data.%s' % self._FILEMAP[pos_tag]
+            fileid = "data.%s" % self._FILEMAP[pos_tag]
              data_file = self.open(fileid)
  
              try:
@@ -1614,35 +1698,33 @@ class WordNetCorpusReader(CorpusReader):
              else:
                  data_file.close()
  
-    def words(self, lang='eng'):
+    def words(self, lang="eng"):
          """return lemmas of the given language as list of words"""
          return self.all_lemma_names(lang=lang)
  
-    def license(self, lang='eng'):
+    def license(self, lang="eng"):
          """Return the contents of LICENSE (for omw)
             use lang=lang to get the license for an individual language"""
-        if lang == 'eng':
+        if lang == "eng":
              return self.open("LICENSE").read()
          elif lang in self.langs():
              return self._omw_reader.open("{}/LICENSE".format(lang)).read()
-        elif lang == 'omw':
+        elif lang == "omw":
              # under the assumption you don't mean Omwunra-Toqura
              return self._omw_reader.open("LICENSE").read()
          elif lang in self._lang_data:
-            raise WordNetError(
-                "Cannot determine license for user-provided tab file"
-            )
+            raise WordNetError("Cannot determine license for user-provided tab file")
          else:
              raise WordNetError("Language is not supported.")
  
-    def readme(self, lang='omw'):
+    def readme(self, lang="omw"):
          """Return the contents of README (for omw)
             use lang=lang to get the readme for an individual language"""
-        if lang == 'eng':
+        if lang == "eng":
              return self.open("README").read()
          elif lang in self.langs():
              return self._omw_reader.open("{}/README".format(lang)).read()
-        elif lang == 'omw':
+        elif lang == "omw":
              # under the assumption you don't mean Omwunra-Toqura
              return self._omw_reader.open("README").read()
          elif lang in self._lang_data:
@@ -1650,14 +1732,14 @@ class WordNetCorpusReader(CorpusReader):
          else:
              raise WordNetError("Language is not supported.")
  
-    def citation(self, lang='omw'):
+    def citation(self, lang="omw"):
          """Return the contents of citation.bib file (for omw)
             use lang=lang to get the citation for an individual language"""
-        if lang == 'eng':
+        if lang == "eng":
              return self.open("citation.bib").read()
          elif lang in self.langs():
              return self._omw_reader.open("{}/citation.bib".format(lang)).read()
-        elif lang == 'omw':
+        elif lang == "omw":
              # under the assumption you don't mean Omwunra-Toqura
              return self._omw_reader.open("citation.bib").read()
          elif lang in self._lang_data:
@@ -1671,46 +1753,46 @@ class WordNetCorpusReader(CorpusReader):
      def lemma_count(self, lemma):
          """Return the frequency count for this Lemma"""
          # Currently, count is only work for English
-        if lemma._lang != 'eng':
+        if lemma._lang != "eng":
              return 0
          # open the count file if we haven't already
          if self._key_count_file is None:
-            self._key_count_file = self.open('cntlist.rev')
+            self._key_count_file = self.open("cntlist.rev")
          # find the key in the counts file and return the count
          line = _binary_search_file(self._key_count_file, lemma._key)
          if line:
-            return int(line.rsplit(' ', 1)[-1])
+            return int(line.rsplit(" ", 1)[-1])
          else:
              return 0
  
-    def path_similarity(
-        self, synset1, synset2, verbose=False, simulate_root=True
-    ):
+    def path_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
          return synset1.path_similarity(synset2, verbose, simulate_root)
+
      path_similarity.__doc__ = Synset.path_similarity.__doc__
  
-    def lch_similarity(
-        self, synset1, synset2, verbose=False, simulate_root=True
-    ):
+    def lch_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
          return synset1.lch_similarity(synset2, verbose, simulate_root)
+
      lch_similarity.__doc__ = Synset.lch_similarity.__doc__
  
-    def wup_similarity(
-        self, synset1, synset2, verbose=False, simulate_root=True
-    ):
+    def wup_similarity(self, synset1, synset2, verbose=False, simulate_root=True):
          return synset1.wup_similarity(synset2, verbose, simulate_root)
+
      wup_similarity.__doc__ = Synset.wup_similarity.__doc__
  
      def res_similarity(self, synset1, synset2, ic, verbose=False):
          return synset1.res_similarity(synset2, ic, verbose)
+
      res_similarity.__doc__ = Synset.res_similarity.__doc__
  
      def jcn_similarity(self, synset1, synset2, ic, verbose=False):
          return synset1.jcn_similarity(synset2, ic, verbose)
+
      jcn_similarity.__doc__ = Synset.jcn_similarity.__doc__
  
      def lin_similarity(self, synset1, synset2, ic, verbose=False):
          return synset1.lin_similarity(synset2, ic, verbose)
+
      lin_similarity.__doc__ = Synset.lin_similarity.__doc__
  
      #############################################################
@@ -1753,13 +1835,30 @@ class WordNetCorpusReader(CorpusReader):
              return None
  
      MORPHOLOGICAL_SUBSTITUTIONS = {
-        NOUN: [('s', ''), ('ses', 's'), ('ves', 'f'), ('xes', 'x'),
-               ('zes', 'z'), ('ches', 'ch'), ('shes', 'sh'),
-               ('men', 'man'), ('ies', 'y')],
-        VERB: [('s', ''), ('ies', 'y'), ('es', 'e'), ('es', ''),
-               ('ed', 'e'), ('ed', ''), ('ing', 'e'), ('ing', '')],
-        ADJ: [('er', ''), ('est', ''), ('er', 'e'), ('est', 'e')],
-        ADV: []}
+        NOUN: [
+            ("s", ""),
+            ("ses", "s"),
+            ("ves", "f"),
+            ("xes", "x"),
+            ("zes", "z"),
+            ("ches", "ch"),
+            ("shes", "sh"),
+            ("men", "man"),
+            ("ies", "y"),
+        ],
+        VERB: [
+            ("s", ""),
+            ("ies", "y"),
+            ("es", "e"),
+            ("es", ""),
+            ("ed", "e"),
+            ("ed", ""),
+            ("ing", "e"),
+            ("ing", ""),
+        ],
+        ADJ: [("er", ""), ("est", ""), ("er", "e"), ("est", "e")],
+        ADV: [],
+    }
  
      MORPHOLOGICAL_SUBSTITUTIONS[ADJ_SAT] = MORPHOLOGICAL_SUBSTITUTIONS[ADJ]
  
@@ -1775,10 +1874,12 @@ class WordNetCorpusReader(CorpusReader):
          substitutions = self.MORPHOLOGICAL_SUBSTITUTIONS[pos]
  
          def apply_rules(forms):
-            return [form[:-len(old)] + new
-                    for form in forms
-                    for old, new in substitutions
-                    if form.endswith(old)]
+            return [
+                form[: -len(old)] + new
+                for form in forms
+                for old, new in substitutions
+                if form.endswith(old)
+            ]
  
          def filter_forms(forms):
              result = []
@@ -1885,32 +1986,35 @@ class WordNetCorpusReader(CorpusReader):
          :param lang ISO 639-3 code of the language of the tab file
          """
          if len(lang) != 3:
-            raise ValueError('lang should be a (3 character) ISO 639-3 code')
+            raise ValueError("lang should be a (3 character) ISO 639-3 code")
          self._lang_data[lang] = [defaultdict(list), defaultdict(list)]
-        for l in tab_file.readlines():
-            if isinstance(l, bytes):
+        for line in tab_file.readlines():
+            if isinstance(line, bytes):
                  # Support byte-stream files (e.g. as returned by Python 2's
                  # open() function) as well as text-stream ones
-                l = l.decode('utf-8')
-            l = l.replace('\n', '')
-            l = l.replace(' ', '_')
-            if l[0] != '#':
-                word = l.split('\t')
-                self._lang_data[lang][0][word[0]].append(word[2])
-                self._lang_data[lang][1][word[2].lower()].append(word[0])
+                line = line.decode("utf-8")
+            if not line.startswith("#"):
+                offset_pos, lemma_type, lemma = line.strip().split("\t")
+                lemma = lemma.strip().replace(" ", "_")
+                self._lang_data[lang][0][offset_pos].append(lemma)
+                self._lang_data[lang][1][lemma.lower()].append(offset_pos)
+        # Make sure no more entries are accidentally added subsequently
+        self._lang_data[lang][0].default_factory = None
+        self._lang_data[lang][1].default_factory = None
  
  
  ######################################################################
  # WordNet Information Content Corpus Reader
  ######################################################################
  
+
  class WordNetICCorpusReader(CorpusReader):
      """
      A corpus reader for the WordNet information content corpus.
      """
  
      def __init__(self, root, fileids):
-        CorpusReader.__init__(self, root, fileids, encoding='utf8')
+        CorpusReader.__init__(self, root, fileids, encoding="utf8")
  
      # this load function would be more efficient if the data was pickled
      # Note that we can't use NLTK's frequency distributions because
@@ -1955,6 +2059,7 @@ class WordNetICCorpusReader(CorpusReader):
  # More information about the metrics is available at
  # http://marimba.d.umn.edu/similarity/measures.html
  
+
  def path_similarity(synset1, synset2, verbose=False, simulate_root=True):
      return synset1.path_similarity(synset2, verbose, simulate_root)
  
@@ -2006,9 +2111,8 @@ def _lcs_ic(synset1, synset2, ic, verbose=False):
      """
      if synset1._pos != synset2._pos:
          raise WordNetError(
-            'Computing the least common subsumer requires '
-            '%s and %s to have the same part of speech.' %
-            (synset1, synset2)
+            "Computing the least common subsumer requires "
+            "%s and %s to have the same part of speech." % (synset1, synset2)
          )
  
      ic1 = information_content(synset1, ic)
@@ -2027,11 +2131,12 @@ def _lcs_ic(synset1, synset2, ic, verbose=False):
  
  # Utility functions
  
+
  def information_content(synset, ic):
      try:
          icpos = ic[synset._pos]
      except KeyError:
-        msg = 'Information content file has no entries for part-of-speech: %s'
+        msg = "Information content file has no entries for part-of-speech: %s"
          raise WordNetError(msg % synset._pos)
  
      counts = icpos[synset._offset]
@@ -2044,10 +2149,11 @@ def information_content(synset, ic):
  # get the part of speech (NOUN or VERB) from the information content record
  # (each identifier has a 'n' or 'v' suffix)
  
+
  def _get_pos(field):
-    if field[-1] == 'n':
+    if field[-1] == "n":
          return NOUN
-    elif field[-1] == 'v':
+    elif field[-1] == "v":
          return VERB
      else:
          msg = (
@@ -2060,5 +2166,5 @@ def _get_pos(field):
  # unload corpus after tests
  def teardown_module(module=None):
      from nltk.corpus import wordnet
-    wordnet._unload()
  
+    wordnet._unload()
diff --git a/nlp_resource_data/nltk/corpus/reader/wordnet.pyc b/nlp_resource_data/nltk/corpus/reader/wordnet.pyc

deleted file mode 100755 (executable)

index 5351bbe..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/wordnet.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/xmldocs.py b/nlp_resource_data/nltk/corpus/reader/xmldocs.py

old mode 100755 (executable)

new mode 100644 (file)

index 295e91e..6f928b7
--- a/nlp_resource_data/nltk/corpus/reader/xmldocs.py
+++ b/nlp_resource_data/nltk/corpus/reader/xmldocs.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: XML Corpus Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -10,15 +10,9 @@ Corpus reader for corpora whose documents are xml files.
  
  (note -- not named 'xml' to avoid conflicting w/ standard xml package)
  """
-from __future__ import print_function, unicode_literals
  
  import codecs
-
-# Use the c version of ElementTree, which is faster, if possible:
-try: from xml.etree import cElementTree as ElementTree
-except ImportError: from xml.etree import ElementTree
-
-from six import string_types
+from xml.etree import ElementTree
  
  from nltk.data import SeekableUnicodeStreamReader
  from nltk.tokenize import WordPunctTokenizer
@@ -27,6 +21,7 @@ from nltk.internals import ElementWrapper
  from nltk.corpus.reader.api import CorpusReader
  from nltk.corpus.reader.util import *
  
+
  class XMLCorpusReader(CorpusReader):
      """
      Corpus reader for corpora whose documents are xml files.
@@ -35,6 +30,7 @@ class XMLCorpusReader(CorpusReader):
      ``encoding`` argument, because the unicode encoding is specified by
      the XML files themselves.  See the XML specs for more info.
      """
+
      def __init__(self, root, fileids, wrap_etree=False):
          self._wrap_etree = wrap_etree
          CorpusReader.__init__(self, root, fileids)
@@ -43,8 +39,8 @@ class XMLCorpusReader(CorpusReader):
          # Make sure we have exactly one file -- no concatenating XML.
          if fileid is None and len(self._fileids) == 1:
              fileid = self._fileids[0]
-        if not isinstance(fileid, string_types):
-            raise TypeError('Expected a single file identifier string')
+        if not isinstance(fileid, str):
+            raise TypeError("Expected a single file identifier string")
          # Read the XML in using ElementTree.
          elt = ElementTree.parse(self.abspath(fileid).open()).getroot()
          # If requested, wrap it.
@@ -65,7 +61,7 @@ class XMLCorpusReader(CorpusReader):
  
          elt = self.xml(fileid)
          encoding = self.encoding(fileid)
-        word_tokenizer=WordPunctTokenizer()
+        word_tokenizer = WordPunctTokenizer()
          iterator = elt.getiterator()
          out = []
  
@@ -79,8 +75,10 @@ class XMLCorpusReader(CorpusReader):
          return out
  
      def raw(self, fileids=None):
-        if fileids is None: fileids = self._fileids
-        elif isinstance(fileids, string_types): fileids = [fileids]
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, str):
+            fileids = [fileids]
          return concat([self.open(f).read() for f in fileids])
  
  
@@ -141,9 +139,10 @@ class XMLCorpusView(StreamBackedCorpusView):
  
                  elt_handler(elt, tagspec) -> value
          """
-        if elt_handler: self.handle_elt = elt_handler
+        if elt_handler:
+            self.handle_elt = elt_handler
  
-        self._tagspec = re.compile(tagspec+r'\Z')
+        self._tagspec = re.compile(tagspec + r"\Z")
          """The tag specification for this corpus view."""
  
          self._tag_context = {0: ()}
@@ -163,18 +162,18 @@ class XMLCorpusView(StreamBackedCorpusView):
              finally:
                  infile.close()
          else:
-            with open(fileid, 'rb') as infile:
+            with open(fileid, "rb") as infile:
                  s = infile.readline()
          if s.startswith(codecs.BOM_UTF16_BE):
-            return 'utf-16-be'
+            return "utf-16-be"
          if s.startswith(codecs.BOM_UTF16_LE):
-            return 'utf-16-le'
+            return "utf-16-le"
          if s.startswith(codecs.BOM_UTF32_BE):
-            return 'utf-32-be'
+            return "utf-32-be"
          if s.startswith(codecs.BOM_UTF32_LE):
-            return 'utf-32-le'
+            return "utf-32-le"
          if s.startswith(codecs.BOM_UTF8):
-            return 'utf-8'
+            return "utf-8"
          m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s)
          if m:
              return m.group(1).decode()
@@ -182,7 +181,7 @@ class XMLCorpusView(StreamBackedCorpusView):
          if m:
              return m.group(1).decode()
          # No encoding found -- what should the default be?
-        return 'utf-8'
+        return "utf-8"
  
      def handle_elt(self, elt, context):
          """
@@ -208,7 +207,8 @@ class XMLCorpusView(StreamBackedCorpusView):
  
      #: A regular expression that matches XML fragments that do not
      #: contain any un-closed tags.
-    _VALID_XML_RE = re.compile(r"""
+    _VALID_XML_RE = re.compile(
+        r"""
          [^<]*
          (
            ((<!--.*?-->)                         |  # comment
@@ -217,17 +217,19 @@ class XMLCorpusView(StreamBackedCorpusView):
             (<[^!>][^>]*>))                         # tag or PI
            [^<]*)*
          \Z""",
-        re.DOTALL|re.VERBOSE)
+        re.DOTALL | re.VERBOSE,
+    )
  
      #: A regular expression used to extract the tag name from a start tag,
      #: end tag, or empty-elt tag string.
-    _XML_TAG_NAME = re.compile('<\s*/?\s*([^\s>]+)')
+    _XML_TAG_NAME = re.compile("<\s*/?\s*([^\s>]+)")
  
      #: A regular expression used to find all start-tags, end-tags, and
      #: emtpy-elt tags in an XML file.  This regexp is more lenient than
      #: the XML spec -- e.g., it allows spaces in some places where the
      #: spec does not.
-    _XML_PIECE = re.compile(r"""
+    _XML_PIECE = re.compile(
+        r"""
          # Include these so we can skip them:
          (?P<COMMENT>        <!--.*?-->                          )|
          (?P<CDATA>          <![CDATA[.*?]]>                     )|
@@ -237,7 +239,8 @@ class XMLCorpusView(StreamBackedCorpusView):
          (?P<EMPTY_ELT_TAG>  <\s*[^>/\?!\s][^>]*/\s*>            )|
          (?P<START_TAG>      <\s*[^>/\?!\s][^>]*>                )|
          (?P<END_TAG>        <\s*/[^>/\?!\s][^>]*>               )""",
-        re.DOTALL|re.VERBOSE)
+        re.DOTALL | re.VERBOSE,
+    )
  
      def _read_xml_fragment(self, stream):
          """
@@ -248,7 +251,7 @@ class XMLCorpusView(StreamBackedCorpusView):
          then this function either backtracks to the last '<', or reads
          another block.
          """
-        fragment = ''
+        fragment = ""
  
          if isinstance(stream, SeekableUnicodeStreamReader):
              startpos = stream.tell()
@@ -262,26 +265,27 @@ class XMLCorpusView(StreamBackedCorpusView):
                  return fragment
  
              # Do we have a fragment that will never be well-formed?
-            if re.search('[<>]', fragment).group(0) == '>':
+            if re.search("[<>]", fragment).group(0) == ">":
                  pos = stream.tell() - (
-                    len(fragment)-re.search('[<>]', fragment).end())
+                    len(fragment) - re.search("[<>]", fragment).end()
+                )
                  raise ValueError('Unexpected ">" near char %s' % pos)
  
              # End of file?
              if not xml_block:
-                raise ValueError('Unexpected end of file: tag not closed')
+                raise ValueError("Unexpected end of file: tag not closed")
  
              # If not, then we must be in the middle of a <..tag..>.
              # If appropriate, backtrack to the most recent '<'
              # character.
-            last_open_bracket = fragment.rfind('<')
+            last_open_bracket = fragment.rfind("<")
              if last_open_bracket > 0:
                  if self._VALID_XML_RE.match(fragment[:last_open_bracket]):
                      if isinstance(stream, SeekableUnicodeStreamReader):
                          stream.seek(startpos)
                          stream.char_seek_forward(last_open_bracket)
                      else:
-                        stream.seek(-(len(fragment)-last_open_bracket), 1)
+                        stream.seek(-(len(fragment) - last_open_bracket), 1)
                      return fragment[:last_open_bracket]
  
              # Otherwise, read another block. (i.e., return to the
@@ -293,67 +297,71 @@ class XMLCorpusView(StreamBackedCorpusView):
          matches ``tagspec``, and return the result of applying
          ``elt_handler`` to each element found.
          """
-        if tagspec is None: tagspec = self._tagspec
-        if elt_handler is None: elt_handler = self.handle_elt
+        if tagspec is None:
+            tagspec = self._tagspec
+        if elt_handler is None:
+            elt_handler = self.handle_elt
  
          # Use a stack of strings to keep track of our context:
          context = list(self._tag_context.get(stream.tell()))
-        assert context is not None # check this -- could it ever happen?
+        assert context is not None  # check this -- could it ever happen?
  
          elts = []
  
-        elt_start = None # where does the elt start
-        elt_depth = None # what context depth
-        elt_text = ''
+        elt_start = None  # where does the elt start
+        elt_depth = None  # what context depth
+        elt_text = ""
  
-        while elts==[] or elt_start is not None:
+        while elts == [] or elt_start is not None:
              if isinstance(stream, SeekableUnicodeStreamReader):
                  startpos = stream.tell()
              xml_fragment = self._read_xml_fragment(stream)
  
              # End of file.
              if not xml_fragment:
-                if elt_start is None: break
-                else: raise ValueError('Unexpected end of file')
+                if elt_start is None:
+                    break
+                else:
+                    raise ValueError("Unexpected end of file")
  
              # Process each <tag> in the xml fragment.
              for piece in self._XML_PIECE.finditer(xml_fragment):
                  if self._DEBUG:
-                    print('%25s %s' % ('/'.join(context)[-20:], piece.group()))
+                    print("%25s %s" % ("/".join(context)[-20:], piece.group()))
  
-                if piece.group('START_TAG'):
+                if piece.group("START_TAG"):
                      name = self._XML_TAG_NAME.match(piece.group()).group(1)
                      # Keep context up-to-date.
                      context.append(name)
                      # Is this one of the elts we're looking for?
                      if elt_start is None:
-                        if re.match(tagspec, '/'.join(context)):
+                        if re.match(tagspec, "/".join(context)):
                              elt_start = piece.start()
                              elt_depth = len(context)
  
-                elif piece.group('END_TAG'):
+                elif piece.group("END_TAG"):
                      name = self._XML_TAG_NAME.match(piece.group()).group(1)
                      # sanity checks:
                      if not context:
-                        raise ValueError('Unmatched tag </%s>' % name)
+                        raise ValueError("Unmatched tag </%s>" % name)
                      if name != context[-1]:
-                        raise ValueError('Unmatched tag <%s>...</%s>' %
-                                         (context[-1], name))
+                        raise ValueError(
+                            "Unmatched tag <%s>...</%s>" % (context[-1], name)
+                        )
                      # Is this the end of an element?
                      if elt_start is not None and elt_depth == len(context):
-                        elt_text += xml_fragment[elt_start:piece.end()]
-                        elts.append( (elt_text, '/'.join(context)) )
+                        elt_text += xml_fragment[elt_start : piece.end()]
+                        elts.append((elt_text, "/".join(context)))
                          elt_start = elt_depth = None
-                        elt_text = ''
+                        elt_text = ""
                      # Keep context up-to-date
                      context.pop()
  
-                elif piece.group('EMPTY_ELT_TAG'):
+                elif piece.group("EMPTY_ELT_TAG"):
                      name = self._XML_TAG_NAME.match(piece.group()).group(1)
                      if elt_start is None:
-                        if re.match(tagspec, '/'.join(context)+'/'+name):
-                            elts.append((piece.group(),
-                                         '/'.join(context)+'/'+name))
+                        if re.match(tagspec, "/".join(context) + "/" + name):
+                            elts.append((piece.group(), "/".join(context) + "/" + name))
  
              if elt_start is not None:
                  # If we haven't found any elements yet, then keep
@@ -369,15 +377,15 @@ class XMLCorpusView(StreamBackedCorpusView):
                      # take back the last start-tag, and return what
                      # we've gotten so far (elts is non-empty).
                      if self._DEBUG:
-                        print(' '*36+'(backtrack)')
+                        print(" " * 36 + "(backtrack)")
                      if isinstance(stream, SeekableUnicodeStreamReader):
                          stream.seek(startpos)
                          stream.char_seek_forward(elt_start)
                      else:
-                        stream.seek(-(len(xml_fragment)-elt_start), 1)
-                    context = context[:elt_depth-1]
+                        stream.seek(-(len(xml_fragment) - elt_start), 1)
+                    context = context[: elt_depth - 1]
                      elt_start = elt_depth = None
-                    elt_text = ''
+                    elt_text = ""
  
          # Update the _tag_context dict.
          pos = stream.tell()
@@ -386,7 +394,10 @@ class XMLCorpusView(StreamBackedCorpusView):
          else:
              self._tag_context[pos] = tuple(context)
  
-        return [elt_handler(ElementTree.fromstring(
-                                  elt.encode('ascii', 'xmlcharrefreplace')),
-                            context)
-                for (elt, context) in elts]
+        return [
+            elt_handler(
+                ElementTree.fromstring(elt.encode("ascii", "xmlcharrefreplace")),
+                context,
+            )
+            for (elt, context) in elts
+        ]
diff --git a/nlp_resource_data/nltk/corpus/reader/xmldocs.pyc b/nlp_resource_data/nltk/corpus/reader/xmldocs.pyc

deleted file mode 100755 (executable)

index 7ac3910..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/xmldocs.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/reader/ycoe.py b/nlp_resource_data/nltk/corpus/reader/ycoe.py

old mode 100755 (executable)

new mode 100644 (file)

index a8870b1..75ffda2
--- a/nlp_resource_data/nltk/corpus/reader/ycoe.py
+++ b/nlp_resource_data/nltk/corpus/reader/ycoe.py
@@ -22,8 +22,6 @@ to the YCOE standard: http://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm
  import os
  import re
  
-from six import string_types
-
  from nltk.tokenize import RegexpTokenizer
  from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
  from nltk.corpus.reader.tagged import TaggedCorpusReader
@@ -31,28 +29,31 @@ from nltk.corpus.reader.tagged import TaggedCorpusReader
  from nltk.corpus.reader.util import *
  from nltk.corpus.reader.api import *
  
+
  class YCOECorpusReader(CorpusReader):
      """
      Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
      English Prose (YCOE), a 1.5 million word syntactically-annotated
      corpus of Old English prose texts.
      """
-    def __init__(self, root, encoding='utf8'):
+
+    def __init__(self, root, encoding="utf8"):
          CorpusReader.__init__(self, root, [], encoding)
  
          self._psd_reader = YCOEParseCorpusReader(
-            self.root.join('psd'), '.*', '.psd', encoding=encoding)
-        self._pos_reader = YCOETaggedCorpusReader(
-            self.root.join('pos'), '.*', '.pos')
+            self.root.join("psd"), ".*", ".psd", encoding=encoding
+        )
+        self._pos_reader = YCOETaggedCorpusReader(self.root.join("pos"), ".*", ".pos")
  
          # Make sure we have a consistent set of items:
          documents = set(f[:-4] for f in self._psd_reader.fileids())
          if set(f[:-4] for f in self._pos_reader.fileids()) != documents:
-            raise ValueError('Items in "psd" and "pos" '
-                             'subdirectories do not match.')
+            raise ValueError('Items in "psd" and "pos" ' "subdirectories do not match.")
  
-        fileids = sorted(['%s.psd' % doc for doc in documents] +
-                       ['%s.pos' % doc for doc in documents])
+        fileids = sorted(
+            ["%s.psd" % doc for doc in documents]
+            + ["%s.pos" % doc for doc in documents]
+        )
          CorpusReader.__init__(self, root, fileids, encoding)
          self._documents = sorted(documents)
  
@@ -64,11 +65,11 @@ class YCOECorpusReader(CorpusReader):
          """
          if fileids is None:
              return self._documents
-        if isinstance(fileids, string_types):
+        if isinstance(fileids, str):
              fileids = [fileids]
          for f in fileids:
              if f not in self._fileids:
-                raise KeyError('File id %s not found' % fileids)
+                raise KeyError("File id %s not found" % fileids)
          # Strip off the '.pos' and '.psd' extensions.
          return sorted(set(f[:-4] for f in fileids))
  
@@ -79,10 +80,14 @@ class YCOECorpusReader(CorpusReader):
          """
          if documents is None:
              return self._fileids
-        elif isinstance(documents, string_types):
+        elif isinstance(documents, str):
              documents = [documents]
-        return sorted(set(['%s.pos' % doc for doc in documents] +
-                          ['%s.psd' % doc for doc in documents]))
+        return sorted(
+            set(
+                ["%s.pos" % doc for doc in documents]
+                + ["%s.psd" % doc for doc in documents]
+            )
+        )
  
      def _getfileids(self, documents, subcorpus):
          """
@@ -92,152 +97,163 @@ class YCOECorpusReader(CorpusReader):
          if documents is None:
              documents = self._documents
          else:
-            if isinstance(documents, string_types):
+            if isinstance(documents, str):
                  documents = [documents]
              for document in documents:
                  if document not in self._documents:
-                    if document[-4:] in ('.pos', '.psd'):
+                    if document[-4:] in (".pos", ".psd"):
                          raise ValueError(
-                            'Expected a document identifier, not a file '
-                            'identifier.  (Use corpus.documents() to get '
-                            'a list of document identifiers.')
+                            "Expected a document identifier, not a file "
+                            "identifier.  (Use corpus.documents() to get "
+                            "a list of document identifiers."
+                        )
                      else:
-                        raise ValueError('Document identifier %s not found'
-                                         % document)
-        return ['%s.%s' % (d, subcorpus) for d in documents]
+                        raise ValueError("Document identifier %s not found" % document)
+        return ["%s.%s" % (d, subcorpus) for d in documents]
  
      # Delegate to one of our two sub-readers:
      def words(self, documents=None):
-        return self._pos_reader.words(self._getfileids(documents, 'pos'))
+        return self._pos_reader.words(self._getfileids(documents, "pos"))
+
      def sents(self, documents=None):
-        return self._pos_reader.sents(self._getfileids(documents, 'pos'))
+        return self._pos_reader.sents(self._getfileids(documents, "pos"))
+
      def paras(self, documents=None):
-        return self._pos_reader.paras(self._getfileids(documents, 'pos'))
+        return self._pos_reader.paras(self._getfileids(documents, "pos"))
+
      def tagged_words(self, documents=None):
-        return self._pos_reader.tagged_words(self._getfileids(documents, 'pos'))
+        return self._pos_reader.tagged_words(self._getfileids(documents, "pos"))
+
      def tagged_sents(self, documents=None):
-        return self._pos_reader.tagged_sents(self._getfileids(documents, 'pos'))
+        return self._pos_reader.tagged_sents(self._getfileids(documents, "pos"))
+
      def tagged_paras(self, documents=None):
-        return self._pos_reader.tagged_paras(self._getfileids(documents, 'pos'))
+        return self._pos_reader.tagged_paras(self._getfileids(documents, "pos"))
+
      def parsed_sents(self, documents=None):
-        return self._psd_reader.parsed_sents(self._getfileids(documents, 'psd'))
+        return self._psd_reader.parsed_sents(self._getfileids(documents, "psd"))
  
  
  class YCOEParseCorpusReader(BracketParseCorpusReader):
      """Specialized version of the standard bracket parse corpus reader
      that strips out (CODE ...) and (ID ...) nodes."""
+
      def _parse(self, t):
-        t = re.sub(r'(?u)\((CODE|ID)[^\)]*\)', '', t)
-        if re.match(r'\s*\(\s*\)\s*$', t): return None
+        t = re.sub(r"(?u)\((CODE|ID)[^\)]*\)", "", t)
+        if re.match(r"\s*\(\s*\)\s*$", t):
+            return None
          return BracketParseCorpusReader._parse(self, t)
  
+
  class YCOETaggedCorpusReader(TaggedCorpusReader):
-    def __init__(self, root, items, encoding='utf8'):
-        gaps_re = r'(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*'
+    def __init__(self, root, items, encoding="utf8"):
+        gaps_re = r"(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*"
          sent_tokenizer = RegexpTokenizer(gaps_re, gaps=True)
-        TaggedCorpusReader.__init__(self, root, items, sep='_',
-                                    sent_tokenizer=sent_tokenizer)
+        TaggedCorpusReader.__init__(
+            self, root, items, sep="_", sent_tokenizer=sent_tokenizer
+        )
+
  
  #: A list of all documents and their titles in ycoe.
  documents = {
-    'coadrian.o34': 'Adrian and Ritheus',
-    'coaelhom.o3': 'Ælfric, Supplemental Homilies',
-    'coaelive.o3': 'Ælfric\'s Lives of Saints',
-    'coalcuin': 'Alcuin De virtutibus et vitiis',
-    'coalex.o23': 'Alexander\'s Letter to Aristotle',
-    'coapollo.o3': 'Apollonius of Tyre',
-    'coaugust': 'Augustine',
-    'cobede.o2': 'Bede\'s History of the English Church',
-    'cobenrul.o3': 'Benedictine Rule',
-    'coblick.o23': 'Blickling Homilies',
-    'coboeth.o2': 'Boethius\' Consolation of Philosophy',
-    'cobyrhtf.o3': 'Byrhtferth\'s Manual',
-    'cocanedgD': 'Canons of Edgar (D)',
-    'cocanedgX': 'Canons of Edgar (X)',
-    'cocathom1.o3': 'Ælfric\'s Catholic Homilies I',
-    'cocathom2.o3': 'Ælfric\'s Catholic Homilies II',
-    'cochad.o24': 'Saint Chad',
-    'cochdrul': 'Chrodegang of Metz, Rule',
-    'cochristoph': 'Saint Christopher',
-    'cochronA.o23': 'Anglo-Saxon Chronicle A',
-    'cochronC': 'Anglo-Saxon Chronicle C',
-    'cochronD': 'Anglo-Saxon Chronicle D',
-    'cochronE.o34': 'Anglo-Saxon Chronicle E',
-    'cocura.o2': 'Cura Pastoralis',
-    'cocuraC': 'Cura Pastoralis (Cotton)',
-    'codicts.o34': 'Dicts of Cato',
-    'codocu1.o1': 'Documents 1 (O1)',
-    'codocu2.o12': 'Documents 2 (O1/O2)',
-    'codocu2.o2': 'Documents 2 (O2)',
-    'codocu3.o23': 'Documents 3 (O2/O3)',
-    'codocu3.o3': 'Documents 3 (O3)',
-    'codocu4.o24': 'Documents 4 (O2/O4)',
-    'coeluc1': 'Honorius of Autun, Elucidarium 1',
-    'coeluc2': 'Honorius of Autun, Elucidarium 1',
-    'coepigen.o3': 'Ælfric\'s Epilogue to Genesis',
-    'coeuphr': 'Saint Euphrosyne',
-    'coeust': 'Saint Eustace and his companions',
-    'coexodusP': 'Exodus (P)',
-    'cogenesiC': 'Genesis (C)',
-    'cogregdC.o24': 'Gregory\'s Dialogues (C)',
-    'cogregdH.o23': 'Gregory\'s Dialogues (H)',
-    'coherbar': 'Pseudo-Apuleius, Herbarium',
-    'coinspolD.o34': 'Wulfstan\'s Institute of Polity (D)',
-    'coinspolX': 'Wulfstan\'s Institute of Polity (X)',
-    'cojames': 'Saint James',
-    'colacnu.o23': 'Lacnunga',
-    'colaece.o2': 'Leechdoms',
-    'colaw1cn.o3': 'Laws, Cnut I',
-    'colaw2cn.o3': 'Laws, Cnut II',
-    'colaw5atr.o3': 'Laws, Æthelred V',
-    'colaw6atr.o3': 'Laws, Æthelred VI',
-    'colawaf.o2': 'Laws, Alfred',
-    'colawafint.o2': 'Alfred\'s Introduction to Laws',
-    'colawger.o34': 'Laws, Gerefa',
-    'colawine.ox2': 'Laws, Ine',
-    'colawnorthu.o3': 'Northumbra Preosta Lagu',
-    'colawwllad.o4': 'Laws, William I, Lad',
-    'coleofri.o4': 'Leofric',
-    'colsigef.o3': 'Ælfric\'s Letter to Sigefyrth',
-    'colsigewB': 'Ælfric\'s Letter to Sigeweard (B)',
-    'colsigewZ.o34': 'Ælfric\'s Letter to Sigeweard (Z)',
-    'colwgeat': 'Ælfric\'s Letter to Wulfgeat',
-    'colwsigeT': 'Ælfric\'s Letter to Wulfsige (T)',
-    'colwsigeXa.o34': 'Ælfric\'s Letter to Wulfsige (Xa)',
-    'colwstan1.o3': 'Ælfric\'s Letter to Wulfstan I',
-    'colwstan2.o3': 'Ælfric\'s Letter to Wulfstan II',
-    'comargaC.o34': 'Saint Margaret (C)',
-    'comargaT': 'Saint Margaret (T)',
-    'comart1': 'Martyrology, I',
-    'comart2': 'Martyrology, II',
-    'comart3.o23': 'Martyrology, III',
-    'comarvel.o23': 'Marvels of the East',
-    'comary': 'Mary of Egypt',
-    'coneot': 'Saint Neot',
-    'conicodA': 'Gospel of Nicodemus (A)',
-    'conicodC': 'Gospel of Nicodemus (C)',
-    'conicodD': 'Gospel of Nicodemus (D)',
-    'conicodE': 'Gospel of Nicodemus (E)',
-    'coorosiu.o2': 'Orosius',
-    'cootest.o3': 'Heptateuch',
-    'coprefcath1.o3': 'Ælfric\'s Preface to Catholic Homilies I',
-    'coprefcath2.o3': 'Ælfric\'s Preface to Catholic Homilies II',
-    'coprefcura.o2': 'Preface to the Cura Pastoralis',
-    'coprefgen.o3': 'Ælfric\'s Preface to Genesis',
-    'copreflives.o3': 'Ælfric\'s Preface to Lives of Saints',
-    'coprefsolilo': 'Preface to Augustine\'s Soliloquies',
-    'coquadru.o23': 'Pseudo-Apuleius, Medicina de quadrupedibus',
-    'corood': 'History of the Holy Rood-Tree',
-    'cosevensl': 'Seven Sleepers',
-    'cosolilo': 'St. Augustine\'s Soliloquies',
-    'cosolsat1.o4': 'Solomon and Saturn I',
-    'cosolsat2': 'Solomon and Saturn II',
-    'cotempo.o3': 'Ælfric\'s De Temporibus Anni',
-    'coverhom': 'Vercelli Homilies',
-    'coverhomE': 'Vercelli Homilies (E)',
-    'coverhomL': 'Vercelli Homilies (L)',
-    'covinceB': 'Saint Vincent (Bodley 343)',
-    'covinsal': 'Vindicta Salvatoris',
-    'cowsgosp.o3': 'West-Saxon Gospels',
-    'cowulf.o34': 'Wulfstan\'s Homilies'
-    }
+    "coadrian.o34": "Adrian and Ritheus",
+    "coaelhom.o3": "Ælfric, Supplemental Homilies",
+    "coaelive.o3": "Ælfric's Lives of Saints",
+    "coalcuin": "Alcuin De virtutibus et vitiis",
+    "coalex.o23": "Alexander's Letter to Aristotle",
+    "coapollo.o3": "Apollonius of Tyre",
+    "coaugust": "Augustine",
+    "cobede.o2": "Bede's History of the English Church",
+    "cobenrul.o3": "Benedictine Rule",
+    "coblick.o23": "Blickling Homilies",
+    "coboeth.o2": "Boethius' Consolation of Philosophy",
+    "cobyrhtf.o3": "Byrhtferth's Manual",
+    "cocanedgD": "Canons of Edgar (D)",
+    "cocanedgX": "Canons of Edgar (X)",
+    "cocathom1.o3": "Ælfric's Catholic Homilies I",
+    "cocathom2.o3": "Ælfric's Catholic Homilies II",
+    "cochad.o24": "Saint Chad",
+    "cochdrul": "Chrodegang of Metz, Rule",
+    "cochristoph": "Saint Christopher",
+    "cochronA.o23": "Anglo-Saxon Chronicle A",
+    "cochronC": "Anglo-Saxon Chronicle C",
+    "cochronD": "Anglo-Saxon Chronicle D",
+    "cochronE.o34": "Anglo-Saxon Chronicle E",
+    "cocura.o2": "Cura Pastoralis",
+    "cocuraC": "Cura Pastoralis (Cotton)",
+    "codicts.o34": "Dicts of Cato",
+    "codocu1.o1": "Documents 1 (O1)",
+    "codocu2.o12": "Documents 2 (O1/O2)",
+    "codocu2.o2": "Documents 2 (O2)",
+    "codocu3.o23": "Documents 3 (O2/O3)",
+    "codocu3.o3": "Documents 3 (O3)",
+    "codocu4.o24": "Documents 4 (O2/O4)",
+    "coeluc1": "Honorius of Autun, Elucidarium 1",
+    "coeluc2": "Honorius of Autun, Elucidarium 1",
+    "coepigen.o3": "Ælfric's Epilogue to Genesis",
+    "coeuphr": "Saint Euphrosyne",
+    "coeust": "Saint Eustace and his companions",
+    "coexodusP": "Exodus (P)",
+    "cogenesiC": "Genesis (C)",
+    "cogregdC.o24": "Gregory's Dialogues (C)",
+    "cogregdH.o23": "Gregory's Dialogues (H)",
+    "coherbar": "Pseudo-Apuleius, Herbarium",
+    "coinspolD.o34": "Wulfstan's Institute of Polity (D)",
+    "coinspolX": "Wulfstan's Institute of Polity (X)",
+    "cojames": "Saint James",
+    "colacnu.o23": "Lacnunga",
+    "colaece.o2": "Leechdoms",
+    "colaw1cn.o3": "Laws, Cnut I",
+    "colaw2cn.o3": "Laws, Cnut II",
+    "colaw5atr.o3": "Laws, Æthelred V",
+    "colaw6atr.o3": "Laws, Æthelred VI",
+    "colawaf.o2": "Laws, Alfred",
+    "colawafint.o2": "Alfred's Introduction to Laws",
+    "colawger.o34": "Laws, Gerefa",
+    "colawine.ox2": "Laws, Ine",
+    "colawnorthu.o3": "Northumbra Preosta Lagu",
+    "colawwllad.o4": "Laws, William I, Lad",
+    "coleofri.o4": "Leofric",
+    "colsigef.o3": "Ælfric's Letter to Sigefyrth",
+    "colsigewB": "Ælfric's Letter to Sigeweard (B)",
+    "colsigewZ.o34": "Ælfric's Letter to Sigeweard (Z)",
+    "colwgeat": "Ælfric's Letter to Wulfgeat",
+    "colwsigeT": "Ælfric's Letter to Wulfsige (T)",
+    "colwsigeXa.o34": "Ælfric's Letter to Wulfsige (Xa)",
+    "colwstan1.o3": "Ælfric's Letter to Wulfstan I",
+    "colwstan2.o3": "Ælfric's Letter to Wulfstan II",
+    "comargaC.o34": "Saint Margaret (C)",
+    "comargaT": "Saint Margaret (T)",
+    "comart1": "Martyrology, I",
+    "comart2": "Martyrology, II",
+    "comart3.o23": "Martyrology, III",
+    "comarvel.o23": "Marvels of the East",
+    "comary": "Mary of Egypt",
+    "coneot": "Saint Neot",
+    "conicodA": "Gospel of Nicodemus (A)",
+    "conicodC": "Gospel of Nicodemus (C)",
+    "conicodD": "Gospel of Nicodemus (D)",
+    "conicodE": "Gospel of Nicodemus (E)",
+    "coorosiu.o2": "Orosius",
+    "cootest.o3": "Heptateuch",
+    "coprefcath1.o3": "Ælfric's Preface to Catholic Homilies I",
+    "coprefcath2.o3": "Ælfric's Preface to Catholic Homilies II",
+    "coprefcura.o2": "Preface to the Cura Pastoralis",
+    "coprefgen.o3": "Ælfric's Preface to Genesis",
+    "copreflives.o3": "Ælfric's Preface to Lives of Saints",
+    "coprefsolilo": "Preface to Augustine's Soliloquies",
+    "coquadru.o23": "Pseudo-Apuleius, Medicina de quadrupedibus",
+    "corood": "History of the Holy Rood-Tree",
+    "cosevensl": "Seven Sleepers",
+    "cosolilo": "St. Augustine's Soliloquies",
+    "cosolsat1.o4": "Solomon and Saturn I",
+    "cosolsat2": "Solomon and Saturn II",
+    "cotempo.o3": "Ælfric's De Temporibus Anni",
+    "coverhom": "Vercelli Homilies",
+    "coverhomE": "Vercelli Homilies (E)",
+    "coverhomL": "Vercelli Homilies (L)",
+    "covinceB": "Saint Vincent (Bodley 343)",
+    "covinsal": "Vindicta Salvatoris",
+    "cowsgosp.o3": "West-Saxon Gospels",
+    "cowulf.o34": "Wulfstan's Homilies",
+}
diff --git a/nlp_resource_data/nltk/corpus/reader/ycoe.pyc b/nlp_resource_data/nltk/corpus/reader/ycoe.pyc

deleted file mode 100755 (executable)

index 0572c2e..0000000

Binary files a/nlp_resource_data/nltk/corpus/reader/ycoe.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/corpus/util.py b/nlp_resource_data/nltk/corpus/util.py

old mode 100755 (executable)

new mode 100644 (file)

index d23c561..ecd147e
--- a/nlp_resource_data/nltk/corpus/util.py
+++ b/nlp_resource_data/nltk/corpus/util.py
@@ -1,28 +1,26 @@
  # Natural Language Toolkit: Corpus Reader Utility Functions
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  ######################################################################
-#{ Lazy Corpus Loader
+# { Lazy Corpus Loader
  ######################################################################
  
-from __future__ import unicode_literals
  import re
  import gc
  import nltk
-from nltk.compat import python_2_unicode_compatible
  
  TRY_ZIPFILE_FIRST = False
  
-@python_2_unicode_compatible
+
  class LazyCorpusLoader(object):
      """
      To see the API documentation for this lazily loaded corpus, first
      run corpus.ensure_loaded(), and then run help(this_corpus).
-    
+
      LazyCorpusLoader is a proxy object which is used to stand in for a
      corpus object before the corpus is loaded.  This allows NLTK to
      create an object for each corpus, but defer the costs associated
@@ -38,7 +36,7 @@ class LazyCorpusLoader(object):
      NLTK data package.  Once they've properly installed the data
      package (or modified ``nltk.data.path`` to point to its location),
      they can then use the corpus object without restarting python.
-    
+
      :param name: The name of the corpus
      :type name: str
      :param reader_cls: The specific CorpusReader class, e.g. PlaintextCorpusReader, WordListCorpusReader
@@ -48,37 +46,43 @@ class LazyCorpusLoader(object):
      :param *args: Any other non-keywords arguments that `reader_cls` might need.
      :param *kargs: Any other keywords arguments that `reader_cls` might need.
      """
+
      def __init__(self, name, reader_cls, *args, **kwargs):
          from nltk.corpus.reader.api import CorpusReader
+
          assert issubclass(reader_cls, CorpusReader)
          self.__name = self.__name__ = name
          self.__reader_cls = reader_cls
-        # If nltk_data_subdir is set explicitly 
-        if 'nltk_data_subdir' in kwargs:
+        # If nltk_data_subdir is set explicitly
+        if "nltk_data_subdir" in kwargs:
              # Use the specified subdirectory path
-            self.subdir = kwargs['nltk_data_subdir']
+            self.subdir = kwargs["nltk_data_subdir"]
              # Pops the `nltk_data_subdir` argument, we don't need it anymore.
-            kwargs.pop('nltk_data_subdir', None)
-        else: # Otherwise use 'nltk_data/corpora'
-            self.subdir = 'corpora'
+            kwargs.pop("nltk_data_subdir", None)
+        else:  # Otherwise use 'nltk_data/corpora'
+            self.subdir = "corpora"
          self.__args = args
          self.__kwargs = kwargs
  
      def __load(self):
          # Find the corpus root directory.
-        zip_name = re.sub(r'(([^/]*)(/.*)?)', r'\2.zip/\1/', self.__name)
+        zip_name = re.sub(r"(([^/]+)(/.*)?)", r"\2.zip/\1/", self.__name)
          if TRY_ZIPFILE_FIRST:
              try:
-                root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
+                root = nltk.data.find("{}/{}".format(self.subdir, zip_name))
              except LookupError as e:
-                try: root = nltk.data.find('{}/{}'.format(self.subdir, self.__name))
-                except LookupError: raise e
+                try:
+                    root = nltk.data.find("{}/{}".format(self.subdir, self.__name))
+                except LookupError:
+                    raise e
          else:
              try:
-                root = nltk.data.find('{}/{}'.format(self.subdir, self.__name))
+                root = nltk.data.find("{}/{}".format(self.subdir, self.__name))
              except LookupError as e:
-                try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
-                except LookupError: raise e
+                try:
+                    root = nltk.data.find("{}/{}".format(self.subdir, zip_name))
+                except LookupError:
+                    raise e
  
          # Load the corpus.
          corpus = self.__reader_cls(root, *self.__args, **self.__kwargs)
@@ -87,7 +91,7 @@ class LazyCorpusLoader(object):
          # the corpus by modifying our own __dict__ and __class__ to
          # match that of the corpus.
  
-        args, kwargs  = self.__args, self.__kwargs
+        args, kwargs = self.__args, self.__kwargs
          name, reader_cls = self.__name, self.__reader_cls
  
          self.__dict__ = corpus.__dict__
@@ -110,7 +114,7 @@ class LazyCorpusLoader(object):
          # (see http://bugs.python.org/issue1225107).
          # Without this fix tests may take extra 1.5GB RAM
          # because all corpora gets loaded during test collection.
-        if attr == '__bases__':
+        if attr == "__bases__":
              raise AttributeError("LazyCorpusLoader object has no attribute '__bases__'")
  
          self.__load()
@@ -119,8 +123,10 @@ class LazyCorpusLoader(object):
          return getattr(self, attr)
  
      def __repr__(self):
-        return '<%s in %r (not loaded yet)>' % (
-            self.__reader_cls.__name__, '.../corpora/'+self.__name)
+        return "<%s in %r (not loaded yet)>" % (
+            self.__reader_cls.__name__,
+            ".../corpora/" + self.__name,
+        )
  
      def _unload(self):
          # If an exception occures during corpus loading then
@@ -133,12 +139,15 @@ def _make_bound_method(func, self):
      """
      Magic for creating bound methods (used for _unload).
      """
+
      class Foo(object):
-        def meth(self): pass
+        def meth(self):
+            pass
+
      f = Foo()
      bound_method = type(f.meth)
  
      try:
          return bound_method(func, self, self.__class__)
-    except TypeError: # python3
+    except TypeError:  # python3
          return bound_method(func, self)
diff --git a/nlp_resource_data/nltk/corpus/util.pyc b/nlp_resource_data/nltk/corpus/util.pyc

deleted file mode 100755 (executable)

index 7915a85..0000000

Binary files a/nlp_resource_data/nltk/corpus/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/data.py b/nlp_resource_data/nltk/data.py

old mode 100755 (executable)

new mode 100644 (file)

index 3295bb8..e1cc913
--- a/nlp_resource_data/nltk/data.py
+++ b/nlp_resource_data/nltk/data.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Utility functions
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -30,48 +30,34 @@ resource file, given its URL: ``load()`` loads a given resource, and
  adds it to a resource cache; and ``retrieve()`` copies a given resource
  to a local file.
  """
-from __future__ import print_function, unicode_literals
-from __future__ import division
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
  import functools
  import textwrap
  import io
+from io import BytesIO
  import os
  import re
  import sys
  import zipfile
  import codecs
+import pickle
  
-from gzip import GzipFile, READ as GZ_READ, WRITE as GZ_WRITE
+from abc import ABCMeta, abstractmethod
+from gzip import GzipFile, WRITE as GZ_WRITE
  
-try: # Python 3.
-    textwrap_indent = functools.partial(textwrap.indent, prefix='  ')
-except AttributeError: # Python 2; indent() not available for Python2.
-    textwrap_fill = functools.partial(textwrap.fill,
-                                        initial_indent='  ',
-                                        subsequent_indent='  ',
-                                        replace_whitespace=False)
-    def textwrap_indent(text):
-        return '\n'.join(textwrap_fill(line) for line in text.splitlines())
+from urllib.request import urlopen, url2pathname
  
  try:
      from zlib import Z_SYNC_FLUSH as FLUSH
  except ImportError:
      from zlib import Z_FINISH as FLUSH
  
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-
-from six import string_types, text_type
-from six.moves.urllib.request import urlopen, url2pathname
-
  # this import should be more specific:
  import nltk
-from nltk.compat import py3_data, add_py3_data, BytesIO
+from nltk.compat import py3_data, add_py3_data
+from nltk.internals import deprecated
+
+textwrap_indent = functools.partial(textwrap.indent, prefix="  ")
  
  ######################################################################
  # Search Path
@@ -85,29 +71,32 @@ path = []
     (e.g., in their home directory under ~/nltk_data)."""
  
  # User-specified locations:
-_paths_from_env = os.environ.get('NLTK_DATA', str('')).split(os.pathsep)
+_paths_from_env = os.environ.get("NLTK_DATA", str("")).split(os.pathsep)
  path += [d for d in _paths_from_env if d]
-if 'APPENGINE_RUNTIME' not in os.environ and os.path.expanduser('~/') != '~/':
-    path.append(os.path.expanduser(str('~/nltk_data')))
+if "APPENGINE_RUNTIME" not in os.environ and os.path.expanduser("~/") != "~/":
+    path.append(os.path.expanduser(str("~/nltk_data")))
  
-if sys.platform.startswith('win'):
+if sys.platform.startswith("win"):
      # Common locations on Windows:
      path += [
-        str(r'C:\nltk_data'), str(r'D:\nltk_data'), str(r'E:\nltk_data'),
-        os.path.join(sys.prefix, str('nltk_data')),
-        os.path.join(sys.prefix, str('lib'), str('nltk_data')),
-        os.path.join(
-            os.environ.get(str('APPDATA'), str('C:\\')), str('nltk_data'))
+        os.path.join(sys.prefix, str("nltk_data")),
+        os.path.join(sys.prefix, str("share"), str("nltk_data")),
+        os.path.join(sys.prefix, str("lib"), str("nltk_data")),
+        os.path.join(os.environ.get(str("APPDATA"), str("C:\\")), str("nltk_data")),
+        str(r"C:\nltk_data"),
+        str(r"D:\nltk_data"),
+        str(r"E:\nltk_data"),
      ]
  else:
      # Common locations on UNIX & OS X:
      path += [
-        str('/usr/share/nltk_data'),
-        str('/usr/local/share/nltk_data'),
-        str('/usr/lib/nltk_data'),
-        str('/usr/local/lib/nltk_data'),
-        os.path.join(sys.prefix, str('nltk_data')),
-        os.path.join(sys.prefix, str('lib'), str('nltk_data'))
+        os.path.join(sys.prefix, str("nltk_data")),
+        os.path.join(sys.prefix, str("share"), str("nltk_data")),
+        os.path.join(sys.prefix, str("lib"), str("nltk_data")),
+        str("/usr/share/nltk_data"),
+        str("/usr/local/share/nltk_data"),
+        str("/usr/lib/nltk_data"),
+        str("/usr/local/lib/nltk_data"),
      ]
  
  
@@ -115,8 +104,16 @@ else:
  # Util Functions
  ######################################################################
  
-def gzip_open_unicode(filename, mode="rb", compresslevel=9, encoding='utf-8',
-                      fileobj=None, errors=None, newline=None):
+
+def gzip_open_unicode(
+    filename,
+    mode="rb",
+    compresslevel=9,
+    encoding="utf-8",
+    fileobj=None,
+    errors=None,
+    newline=None,
+):
      if fileobj is None:
          fileobj = GzipFile(filename, mode, compresslevel, fileobj)
      return io.TextIOWrapper(fileobj, encoding, errors, newline)
@@ -138,14 +135,14 @@ def split_resource_url(resource_url):
      >>> split_resource_url('file:///C:/home/nltk')
      ('file', '/C:/home/nltk')
      """
-    protocol, path_ = resource_url.split(':', 1)
-    if protocol == 'nltk':
+    protocol, path_ = resource_url.split(":", 1)
+    if protocol == "nltk":
          pass
-    elif protocol == 'file':
-        if path_.startswith('/'):
-            path_ = '/' + path_.lstrip('/')
+    elif protocol == "file":
+        if path_.startswith("/"):
+            path_ = "/" + path_.lstrip("/")
      else:
-        path_ = re.sub(r'^/{0,2}', '', path_)
+        path_ = re.sub(r"^/{0,2}", "", path_)
      return protocol, path_
  
  
@@ -186,23 +183,23 @@ def normalize_resource_url(resource_url):
          protocol, name = split_resource_url(resource_url)
      except ValueError:
          # the resource url has no protocol, use the nltk protocol by default
-        protocol = 'nltk'
+        protocol = "nltk"
          name = resource_url
      # use file protocol if the path is an absolute path
-    if protocol == 'nltk' and os.path.isabs(name):
-        protocol = 'file://'
+    if protocol == "nltk" and os.path.isabs(name):
+        protocol = "file://"
          name = normalize_resource_name(name, False, None)
-    elif protocol == 'file':
-        protocol = 'file://'
+    elif protocol == "file":
+        protocol = "file://"
          # name is absolute
          name = normalize_resource_name(name, False, None)
-    elif protocol == 'nltk':
-        protocol = 'nltk:'
+    elif protocol == "nltk":
+        protocol = "nltk:"
          name = normalize_resource_name(name, True)
      else:
          # handled by urllib
-        protocol += '://'
-    return ''.join([protocol, name])
+        protocol += "://"
+    return "".join([protocol, name])
  
  
  def normalize_resource_name(resource_name, allow_relative=True, relative_path=None):
@@ -232,23 +229,24 @@ def normalize_resource_name(resource_name, allow_relative=True, relative_path=No
      >>> windows or normalize_resource_name('/dir/file', True, '/') == '/dir/file'
      True
      """
-    is_dir = bool(re.search(r'[\\/.]$', resource_name)) or resource_name.endswith(os.path.sep)
-    if sys.platform.startswith('win'):
-        resource_name = resource_name.lstrip('/')
+    is_dir = bool(re.search(r"[\\/.]$", resource_name)) or resource_name.endswith(
+        os.path.sep
+    )
+    if sys.platform.startswith("win"):
+        resource_name = resource_name.lstrip("/")
      else:
-        resource_name = re.sub(r'^/+', '/', resource_name)
+        resource_name = re.sub(r"^/+", "/", resource_name)
      if allow_relative:
          resource_name = os.path.normpath(resource_name)
      else:
          if relative_path is None:
              relative_path = os.curdir
-        resource_name = os.path.abspath(
-            os.path.join(relative_path, resource_name))
-    resource_name = resource_name.replace('\\', '/').replace(os.path.sep, '/')
-    if sys.platform.startswith('win') and os.path.isabs(resource_name):
-        resource_name = '/' + resource_name
-    if is_dir and not resource_name.endswith('/'):
-        resource_name += '/'
+        resource_name = os.path.abspath(os.path.join(relative_path, resource_name))
+    resource_name = resource_name.replace("\\", "/").replace(os.path.sep, "/")
+    if sys.platform.startswith("win") and os.path.isabs(resource_name):
+        resource_name = "/" + resource_name
+    if is_dir and not resource_name.endswith("/"):
+        resource_name += "/"
      return resource_name
  
  
@@ -256,8 +254,8 @@ def normalize_resource_name(resource_name, allow_relative=True, relative_path=No
  # Path Pointers
  ######################################################################
  
-@add_metaclass(ABCMeta)
-class PathPointer(object):
+
+class PathPointer(metaclass=ABCMeta):
      """
      An abstract base class for 'path pointers,' used by NLTK's data
      package to identify specific paths.  Two subclasses exist:
@@ -298,11 +296,12 @@ class PathPointer(object):
          """
  
  
-class FileSystemPathPointer(PathPointer, text_type):
+class FileSystemPathPointer(PathPointer, str):
      """
      A path pointer that identifies a file which can be accessed
      directly via a given absolute path.
      """
+
      @py3_data
      def __init__(self, _path):
          """
@@ -313,7 +312,7 @@ class FileSystemPathPointer(PathPointer, text_type):
  
          _path = os.path.abspath(_path)
          if not os.path.exists(_path):
-            raise IOError('No such file or directory: %r' % _path)
+            raise IOError("No such file or directory: %r" % _path)
          self._path = _path
  
          # There's no need to call str.__init__(), since it's a no-op;
@@ -325,7 +324,7 @@ class FileSystemPathPointer(PathPointer, text_type):
          return self._path
  
      def open(self, encoding=None):
-        stream = open(self._path, 'rb')
+        stream = open(self._path, "rb")
          if encoding is not None:
              stream = SeekableUnicodeStreamReader(stream, encoding)
          return stream
@@ -338,114 +337,30 @@ class FileSystemPathPointer(PathPointer, text_type):
          return FileSystemPathPointer(_path)
  
      def __repr__(self):
-        # This should be a byte string under Python 2.x;
-        # we don't want transliteration here so
-        # @python_2_unicode_compatible is not used.
-        return str('FileSystemPathPointer(%r)' % self._path)
+        return "FileSystemPathPointer(%r)" % self._path
  
      def __str__(self):
          return self._path
  
-
+@deprecated("Use gzip.GzipFile instead as it also uses a buffer.")
  class BufferedGzipFile(GzipFile):
-    """
-    A ``GzipFile`` subclass that buffers calls to ``read()`` and ``write()``.
-    This allows faster reads and writes of data to and from gzip-compressed
-    files at the cost of using more memory.
-
-    The default buffer size is 2MB.
+    """A ``GzipFile`` subclass for compatibility with older nltk releases.
  
-    ``BufferedGzipFile`` is useful for loading large gzipped pickle objects
-    as well as writing large encoded feature files for classifier training.
+    Use ``GzipFile`` directly as it also buffers in all supported
+    Python versions.
      """
-    MB = 2 ** 20
-    SIZE = 2 * MB
  
      @py3_data
-    def __init__(self, filename=None, mode=None, compresslevel=9,
-                 fileobj=None, **kwargs):
-        """
-        Return a buffered gzip file object.
-
-        :param filename: a filesystem path
-        :type filename: str
-        :param mode: a file mode which can be any of 'r', 'rb', 'a', 'ab',
-            'w', or 'wb'
-        :type mode: str
-        :param compresslevel: The compresslevel argument is an integer from 1
-            to 9 controlling the level of compression; 1 is fastest and
-            produces the least compression, and 9 is slowest and produces the
-            most compression. The default is 9.
-        :type compresslevel: int
-        :param fileobj: a BytesIO stream to read from instead of a file.
-        :type fileobj: BytesIO
-        :param size: number of bytes to buffer during calls to read() and write()
-        :type size: int
-        :rtype: BufferedGzipFile
-        """
+    def __init__(
+        self, filename=None, mode=None, compresslevel=9, fileobj=None, **kwargs
+    ):
+        """Return a buffered gzip file object."""
          GzipFile.__init__(self, filename, mode, compresslevel, fileobj)
-        self._size = kwargs.get('size', self.SIZE)
-        self._nltk_buffer = BytesIO()
-        # cStringIO does not support len.
-        self._len = 0
-
-    def _reset_buffer(self):
-        # For some reason calling BytesIO.truncate() here will lead to
-        # inconsistent writes so just set _buffer to a new BytesIO object.
-        self._nltk_buffer = BytesIO()
-        self._len = 0
-
-    def _write_buffer(self, data):
-        # Simply write to the buffer and increment the buffer size.
-        if data is not None:
-            self._nltk_buffer.write(data)
-            self._len += len(data)
-
-    def _write_gzip(self, data):
-        # Write the current buffer to the GzipFile.
-        GzipFile.write(self, self._nltk_buffer.getvalue())
-        # Then reset the buffer and write the new data to the buffer.
-        self._reset_buffer()
-        self._write_buffer(data)
-
-    def close(self):
-        # GzipFile.close() doesn't actuallly close anything.
-        if self.mode == GZ_WRITE:
-            self._write_gzip(None)
-            self._reset_buffer()
-        return GzipFile.close(self)
-
-    def flush(self, lib_mode=FLUSH):
-        self._nltk_buffer.flush()
-        GzipFile.flush(self, lib_mode)
  
-    def read(self, size=None):
-        if not size:
-            size = self._size
-            contents = BytesIO()
-            while True:
-                blocks = GzipFile.read(self, size)
-                if not blocks:
-                    contents.flush()
-                    break
-                contents.write(blocks)
-            return contents.getvalue()
-        else:
-            return GzipFile.read(self, size)
-
-    def write(self, data, size=-1):
-        """
-        :param data: bytes to write to file or buffer
-        :type data: bytes
-        :param size: buffer at least size bytes before writing to file
-        :type size: int
-        """
-        if not size:
-            size = self._size
-        if self._len + len(data) <= size:
-            self._write_buffer(data)
-        else:
-            self._write_gzip(data)
+    def write(self, data):
+        # This is identical to GzipFile.write but does not return
+        # the bytes written to retain compatibility.
+        super().write(data)
  
  
  class GzipFileSystemPathPointer(FileSystemPathPointer):
@@ -456,13 +371,7 @@ class GzipFileSystemPathPointer(FileSystemPathPointer):
      """
  
      def open(self, encoding=None):
-        # Note: In >= Python3.5, GzipFile is already using a
-        # buffered reader in the backend which has a variable self._buffer
-        # See https://github.com/nltk/nltk/issues/1308
-        if sys.version.startswith('2.7') or sys.version.startswith('3.4'):
-            stream = BufferedGzipFile(self._path, 'rb')
-        else:
-            stream = GzipFile(self._path, 'rb')
+        stream = GzipFile(self._path, "rb")    
          if encoding:
              stream = SeekableUnicodeStreamReader(stream, encoding)
          return stream
@@ -473,8 +382,9 @@ class ZipFilePathPointer(PathPointer):
      A path pointer that identifies a file contained within a zipfile,
      which can be accessed by reading that zipfile.
      """
+
      @py3_data
-    def __init__(self, zipfile, entry=''):
+    def __init__(self, zipfile, entry=""):
          """
          Create a new path pointer pointing at the specified entry
          in the given zipfile.
@@ -482,14 +392,15 @@ class ZipFilePathPointer(PathPointer):
          :raise IOError: If the given zipfile does not exist, or if it
          does not contain the specified entry.
          """
-        if isinstance(zipfile, string_types):
+        if isinstance(zipfile, str):
              zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))
  
-        # Normalize the entry string, it should be relative:
-        entry = normalize_resource_name(entry, True, '/').lstrip('/')
-
          # Check that the entry exists:
          if entry:
+
+            # Normalize the entry string, it should be relative:
+            entry = normalize_resource_name(entry, True, "/").lstrip("/")
+
              try:
                  zipfile.getinfo(entry)
              except Exception:
@@ -497,13 +408,15 @@ class ZipFilePathPointer(PathPointer):
                  # the zip file.  So if `entry` is a directory name,
                  # then check if the zipfile contains any files that
                  # are under the given directory.
-                if (entry.endswith('/') and
-                        [n for n in zipfile.namelist() if n.startswith(entry)]):
+                if entry.endswith("/") and [
+                    n for n in zipfile.namelist() if n.startswith(entry)
+                ]:
                      pass  # zipfile contains a file in that directory.
                  else:
                      # Otherwise, complain.
-                    raise IOError('Zipfile %r does not contain %r' %
-                                  (zipfile.filename, entry))
+                    raise IOError(
+                        "Zipfile %r does not contain %r" % (zipfile.filename, entry)
+                    )
          self._zipfile = zipfile
          self._entry = entry
  
@@ -526,14 +439,8 @@ class ZipFilePathPointer(PathPointer):
      def open(self, encoding=None):
          data = self._zipfile.read(self._entry)
          stream = BytesIO(data)
-        if self._entry.endswith('.gz'):
-            # Note: In >= Python3.5, GzipFile is already using a
-            # buffered reader in the backend which has a variable self._buffer
-            # See https://github.com/nltk/nltk/issues/1308
-            if sys.version.startswith('2.7') or sys.version.startswith('3.4'):
-                stream = BufferedGzipFile(self._entry, fileobj=stream)
-            else:
-                stream = GzipFile(self._entry, fileobj=stream)
+        if self._entry.endswith(".gz"):
+            stream = GzipFile(self._entry, fileobj=stream)
          elif encoding is not None:
              stream = SeekableUnicodeStreamReader(stream, encoding)
          return stream
@@ -542,16 +449,14 @@ class ZipFilePathPointer(PathPointer):
          return self._zipfile.getinfo(self._entry).file_size
  
      def join(self, fileid):
-        entry = '%s/%s' % (self._entry, fileid)
+        entry = "%s/%s" % (self._entry, fileid)
          return ZipFilePathPointer(self._zipfile, entry)
  
      def __repr__(self):
-        return str('ZipFilePathPointer(%r, %r)') % (
-            self._zipfile.filename, self._entry)
+        return str("ZipFilePathPointer(%r, %r)") % (self._zipfile.filename, self._entry)
  
      def __str__(self):
-        return os.path.normpath(os.path.join(self._zipfile.filename,
-                                             self._entry))
+        return os.path.normpath(os.path.join(self._zipfile.filename, self._entry))
  
  
  ######################################################################
@@ -610,13 +515,13 @@ def find(resource_name, paths=None):
          paths = path
  
      # Check if the resource name includes a zipfile name
-    m = re.match(r'(.*\.zip)/?(.*)$|', resource_name)
+    m = re.match(r"(.*\.zip)/?(.*)$|", resource_name)
      zipfile, zipentry = m.groups()
  
      # Check each item in our path
      for path_ in paths:
          # Is the path item a zipfile?
-        if path_ and (os.path.isfile(path_) and path_.endswith('.zip')):
+        if path_ and (os.path.isfile(path_) and path_.endswith(".zip")):
              try:
                  return ZipFilePathPointer(path_, resource_name)
              except IOError:
@@ -628,7 +533,7 @@ def find(resource_name, paths=None):
              if zipfile is None:
                  p = os.path.join(path_, url2pathname(resource_name))
                  if os.path.exists(p):
-                    if p.endswith('.gz'):
+                    if p.endswith(".gz"):
                          return GzipFileSystemPathPointer(p)
                      else:
                          return FileSystemPathPointer(p)
@@ -645,31 +550,38 @@ def find(resource_name, paths=None):
      # again, assuming that one of the path components is inside a
      # zipfile of the same name.
      if zipfile is None:
-        pieces = resource_name.split('/')
+        pieces = resource_name.split("/")
          for i in range(len(pieces)):
-            modified_name = '/'.join(pieces[:i] +
-                                     [pieces[i] + '.zip'] + pieces[i:])
+            modified_name = "/".join(pieces[:i] + [pieces[i] + ".zip"] + pieces[i:])
              try:
                  return find(modified_name, paths)
              except LookupError:
                  pass
  
      # Identify the package (i.e. the .zip file) to download.
-    resource_zipname = resource_name.split('/')[1]
-    if resource_zipname.endswith('.zip'):
-        resource_zipname = resource_zipname.rpartition('.')[0]
+    resource_zipname = resource_name.split("/")[1]
+    if resource_zipname.endswith(".zip"):
+        resource_zipname = resource_zipname.rpartition(".")[0]
      # Display a friendly error message if the resource wasn't found:
-    msg = str("Resource \33[93m{resource}\033[0m not found.\n"
-              "Please use the NLTK Downloader to obtain the resource:\n\n"
-              "\33[31m" # To display red text in terminal.
-              ">>> import nltk\n"
-              ">>> nltk.download(\'{resource}\')\n"
-              "\033[0m").format(resource=resource_zipname)
+    msg = str(
+        "Resource \33[93m{resource}\033[0m not found.\n"
+        "Please use the NLTK Downloader to obtain the resource:\n\n"
+        "\33[31m"  # To display red text in terminal.
+        ">>> import nltk\n"
+        ">>> nltk.download('{resource}')\n"
+        "\033[0m"
+    ).format(resource=resource_zipname)
      msg = textwrap_indent(msg)
  
-    msg += '\n  Searched in:' + ''.join('\n    - %r' % d for d in paths)
-    sep = '*' * 70
-    resource_not_found = '\n%s\n%s\n%s\n' % (sep, msg, sep)
+    msg += "\n  For more information see: https://www.nltk.org/data.html\n"
+
+    msg += "\n  Attempted to load \33[93m{resource_name}\033[0m\n".format(
+        resource_name=resource_name
+    )
+
+    msg += "\n  Searched in:" + "".join("\n    - %r" % d for d in paths)
+    sep = "*" * 70
+    resource_not_found = "\n%s\n%s\n%s\n" % (sep, msg, sep)
      raise LookupError(resource_not_found)
  
  
@@ -686,16 +598,16 @@ def retrieve(resource_url, filename=None, verbose=True):
      """
      resource_url = normalize_resource_url(resource_url)
      if filename is None:
-        if resource_url.startswith('file:'):
+        if resource_url.startswith("file:"):
              filename = os.path.split(resource_url)[-1]
          else:
-            filename = re.sub(r'(^\w+:)?.*/', '', resource_url)
+            filename = re.sub(r"(^\w+:)?.*/", "", resource_url)
      if os.path.exists(filename):
          filename = os.path.abspath(filename)
          raise ValueError("File %r already exists!" % filename)
  
      if verbose:
-        print('Retrieving %r, saving to %r' % (resource_url, filename))
+        print("Retrieving %r, saving to %r" % (resource_url, filename))
  
      # Open the input & output streams.
      infile = _open(resource_url)
@@ -715,42 +627,49 @@ def retrieve(resource_url, filename=None, verbose=True):
  #: load() method.  Keys are format names, and values are format
  #: descriptions.
  FORMATS = {
-    'pickle': "A serialized python object, stored using the pickle module.",
-    'json': "A serialized python object, stored using the json module.",
-    'yaml': "A serialized python object, stored using the yaml module.",
-    'cfg': "A context free grammar.",
-    'pcfg': "A probabilistic CFG.",
-    'fcfg': "A feature CFG.",
-    'fol': "A list of first order logic expressions, parsed with "
-            "nltk.sem.logic.Expression.fromstring.",
-    'logic': "A list of first order logic expressions, parsed with "
-            "nltk.sem.logic.LogicParser.  Requires an additional logic_parser "
-            "parameter",
-    'val': "A semantic valuation, parsed by nltk.sem.Valuation.fromstring.",
-    'raw': "The raw (byte string) contents of a file.",
-    'text': "The raw (unicode string) contents of a file. "
+    "pickle": "A serialized python object, stored using the pickle module.",
+    "json": "A serialized python object, stored using the json module.",
+    "yaml": "A serialized python object, stored using the yaml module.",
+    "cfg": "A context free grammar.",
+    "pcfg": "A probabilistic CFG.",
+    "fcfg": "A feature CFG.",
+    "fol": "A list of first order logic expressions, parsed with "
+    "nltk.sem.logic.Expression.fromstring.",
+    "logic": "A list of first order logic expressions, parsed with "
+    "nltk.sem.logic.LogicParser.  Requires an additional logic_parser "
+    "parameter",
+    "val": "A semantic valuation, parsed by nltk.sem.Valuation.fromstring.",
+    "raw": "The raw (byte string) contents of a file.",
+    "text": "The raw (unicode string) contents of a file. ",
  }
  
  #: A dictionary mapping from file extensions to format names, used
  #: by load() when format="auto" to decide the format for a
  #: given resource url.
  AUTO_FORMATS = {
-    'pickle': 'pickle',
-    'json': 'json',
-    'yaml': 'yaml',
-    'cfg': 'cfg',
-    'pcfg': 'pcfg',
-    'fcfg': 'fcfg',
-    'fol': 'fol',
-    'logic': 'logic',
-    'val': 'val',
-    'txt': 'text',
-    'text': 'text',
+    "pickle": "pickle",
+    "json": "json",
+    "yaml": "yaml",
+    "cfg": "cfg",
+    "pcfg": "pcfg",
+    "fcfg": "fcfg",
+    "fol": "fol",
+    "logic": "logic",
+    "val": "val",
+    "txt": "text",
+    "text": "text",
  }
  
  
-def load(resource_url, format='auto', cache=True, verbose=False,
-         logic_parser=None, fstruct_reader=None, encoding=None):
+def load(
+    resource_url,
+    format="auto",
+    cache=True,
+    verbose=False,
+    logic_parser=None,
+    fstruct_reader=None,
+    encoding=None,
+):
      """
      Load a given resource from the NLTK data package.  The following
      resource formats are currently supported:
@@ -783,9 +702,7 @@ def load(resource_url, format='auto', cache=True, verbose=False,
      :type cache: bool
      :param cache: If true, add this resource to a cache.  If load()
          finds a resource in its cache, then it will return it from the
-        cache rather than loading it.  The cache uses weak references,
-        so a resource wil automatically be expunged from the cache
-        when no more objects are using it.
+        cache rather than loading it.
      :type verbose: bool
      :param verbose: If true, print a message when loading a resource.
          Messages are not displayed when a resource is retrieved from
@@ -803,52 +720,55 @@ def load(resource_url, format='auto', cache=True, verbose=False,
      resource_url = add_py3_data(resource_url)
  
      # Determine the format of the resource.
-    if format == 'auto':
-        resource_url_parts = resource_url.split('.')
+    if format == "auto":
+        resource_url_parts = resource_url.split(".")
          ext = resource_url_parts[-1]
-        if ext == 'gz':
+        if ext == "gz":
              ext = resource_url_parts[-2]
          format = AUTO_FORMATS.get(ext)
          if format is None:
-            raise ValueError('Could not determine format for %s based '
-                             'on its file\nextension; use the "format" '
-                             'argument to specify the format explicitly.'
-                             % resource_url)
+            raise ValueError(
+                "Could not determine format for %s based "
+                'on its file\nextension; use the "format" '
+                "argument to specify the format explicitly." % resource_url
+            )
  
      if format not in FORMATS:
-        raise ValueError('Unknown format type: %s!' % (format,))
+        raise ValueError("Unknown format type: %s!" % (format,))
  
      # If we've cached the resource, then just return it.
      if cache:
          resource_val = _resource_cache.get((resource_url, format))
          if resource_val is not None:
              if verbose:
-                print('<<Using cached copy of %s>>' % (resource_url,))
+                print("<<Using cached copy of %s>>" % (resource_url,))
              return resource_val
  
      # Let the user know what's going on.
      if verbose:
-        print('<<Loading %s>>' % (resource_url,))
+        print("<<Loading %s>>" % (resource_url,))
  
      # Load the resource.
      opened_resource = _open(resource_url)
  
-    if format == 'raw':
+    if format == "raw":
          resource_val = opened_resource.read()
-    elif format == 'pickle':
+    elif format == "pickle":
          resource_val = pickle.load(opened_resource)
-    elif format == 'json':
+    elif format == "json":
          import json
          from nltk.jsontags import json_tags
+
          resource_val = json.load(opened_resource)
          tag = None
          if len(resource_val) != 1:
              tag = next(resource_val.keys())
          if tag not in json_tags:
-            raise ValueError('Unknown json tag.')
-    elif format == 'yaml':
+            raise ValueError("Unknown json tag.")
+    elif format == "yaml":
          import yaml
-        resource_val = yaml.load(opened_resource)
+
+        resource_val = yaml.safe_load(opened_resource)
      else:
          # The resource is a text format.
          binary_data = opened_resource.read()
@@ -856,34 +776,39 @@ def load(resource_url, format='auto', cache=True, verbose=False,
              string_data = binary_data.decode(encoding)
          else:
              try:
-                string_data = binary_data.decode('utf-8')
+                string_data = binary_data.decode("utf-8")
              except UnicodeDecodeError:
-                string_data = binary_data.decode('latin-1')
-        if format == 'text':
+                string_data = binary_data.decode("latin-1")
+        if format == "text":
              resource_val = string_data
-        elif format == 'cfg':
-            resource_val = nltk.grammar.CFG.fromstring(
-                string_data, encoding=encoding)
-        elif format == 'pcfg':
-            resource_val = nltk.grammar.PCFG.fromstring(
-                string_data, encoding=encoding)
-        elif format == 'fcfg':
+        elif format == "cfg":
+            resource_val = nltk.grammar.CFG.fromstring(string_data, encoding=encoding)
+        elif format == "pcfg":
+            resource_val = nltk.grammar.PCFG.fromstring(string_data, encoding=encoding)
+        elif format == "fcfg":
              resource_val = nltk.grammar.FeatureGrammar.fromstring(
-                string_data, logic_parser=logic_parser,
-                fstruct_reader=fstruct_reader, encoding=encoding)
-        elif format == 'fol':
+                string_data,
+                logic_parser=logic_parser,
+                fstruct_reader=fstruct_reader,
+                encoding=encoding,
+            )
+        elif format == "fol":
              resource_val = nltk.sem.read_logic(
-                string_data, logic_parser=nltk.sem.logic.LogicParser(),
-                encoding=encoding)
-        elif format == 'logic':
+                string_data,
+                logic_parser=nltk.sem.logic.LogicParser(),
+                encoding=encoding,
+            )
+        elif format == "logic":
              resource_val = nltk.sem.read_logic(
-                string_data, logic_parser=logic_parser, encoding=encoding)
-        elif format == 'val':
-            resource_val = nltk.sem.read_valuation(
-                string_data, encoding=encoding)
+                string_data, logic_parser=logic_parser, encoding=encoding
+            )
+        elif format == "val":
+            resource_val = nltk.sem.read_valuation(string_data, encoding=encoding)
          else:
-            raise AssertionError("Internal NLTK error: Format %s isn't "
-                                 "handled by nltk.data.load()" % (format,))
+            raise AssertionError(
+                "Internal NLTK error: Format %s isn't "
+                "handled by nltk.data.load()" % (format,)
+            )
  
      opened_resource.close()
  
@@ -901,7 +826,7 @@ def load(resource_url, format='auto', cache=True, verbose=False,
      return resource_val
  
  
-def show_cfg(resource_url, escape='##'):
+def show_cfg(resource_url, escape="##"):
      """
      Write out a grammar file, ignoring escaped and empty lines.
  
@@ -913,12 +838,12 @@ def show_cfg(resource_url, escape='##'):
      :param escape: Prepended string that signals lines to be ignored
      """
      resource_url = normalize_resource_url(resource_url)
-    resource_val = load(resource_url, format='text', cache=False)
+    resource_val = load(resource_url, format="text", cache=False)
      lines = resource_val.splitlines()
      for l in lines:
          if l.startswith(escape):
              continue
-        if re.match('^$', l):
+        if re.match("^$", l):
              continue
          print(l)
  
@@ -948,24 +873,21 @@ def _open(resource_url):
      resource_url = normalize_resource_url(resource_url)
      protocol, path_ = split_resource_url(resource_url)
  
-    if protocol is None or protocol.lower() == 'nltk':
-        return find(path_, path + ['']).open()
-    elif protocol.lower() == 'file':
+    if protocol is None or protocol.lower() == "nltk":
+        return find(path_, path + [""]).open()
+    elif protocol.lower() == "file":
          # urllib might not use mode='rb', so handle this one ourselves:
-        return find(path_, ['']).open()
+        return find(path_, [""]).open()
      else:
          return urlopen(resource_url)
  
+
  ######################################################################
  # Lazy Resource Loader
  ######################################################################
  
-# We shouldn't apply @python_2_unicode_compatible
-# decorator to LazyLoader, this is resource.__class__ responsibility.
-
  
  class LazyLoader(object):
-
      @py3_data
      def __init__(self, _path):
          self._path = _path
@@ -990,6 +912,7 @@ class LazyLoader(object):
          # __class__ to something new:
          return repr(self)
  
+
  ######################################################################
  # Open-On-Demand ZipFile
  ######################################################################
@@ -1005,10 +928,11 @@ class OpenOnDemandZipFile(zipfile.ZipFile):
      file-like object (to allow re-opening).  ``OpenOnDemandZipFile`` is
      read-only (i.e. ``write()`` and ``writestr()`` are disabled.
      """
+
      @py3_data
      def __init__(self, filename):
-        if not isinstance(filename, string_types):
-            raise TypeError('ReopenableZipFile filename must be a string')
+        if not isinstance(filename, str):
+            raise TypeError("ReopenableZipFile filename must be a string")
          zipfile.ZipFile.__init__(self, filename)
          assert self.filename == filename
          self.close()
@@ -1018,7 +942,7 @@ class OpenOnDemandZipFile(zipfile.ZipFile):
  
      def read(self, name):
          assert self.fp is None
-        self.fp = open(self.filename, 'rb')
+        self.fp = open(self.filename, "rb")
          value = zipfile.ZipFile.read(self, name)
          # Ensure that _fileRefCnt needs to be set for Python2and3 compatible code.
          # Since we only opened one file here, we add 1.
@@ -1028,17 +952,18 @@ class OpenOnDemandZipFile(zipfile.ZipFile):
  
      def write(self, *args, **kwargs):
          """:raise NotImplementedError: OpenOnDemandZipfile is read-only"""
-        raise NotImplementedError('OpenOnDemandZipfile is read-only')
+        raise NotImplementedError("OpenOnDemandZipfile is read-only")
  
      def writestr(self, *args, **kwargs):
          """:raise NotImplementedError: OpenOnDemandZipfile is read-only"""
-        raise NotImplementedError('OpenOnDemandZipfile is read-only')
+        raise NotImplementedError("OpenOnDemandZipfile is read-only")
  
      def __repr__(self):
-        return repr(str('OpenOnDemandZipFile(%r)') % self.filename)
+        return repr(str("OpenOnDemandZipFile(%r)") % self.filename)
+
  
  ######################################################################
-#{ Seekable Unicode Stream Reader
+# { Seekable Unicode Stream Reader
  ######################################################################
  
  
@@ -1058,10 +983,11 @@ class SeekableUnicodeStreamReader(object):
      this shouldn't cause a problem with any of python's builtin
      unicode encodings.
      """
+
      DEBUG = True  # : If true, then perform extra sanity checks.
  
      @py3_data
-    def __init__(self, stream, encoding, errors='strict'):
+    def __init__(self, stream, encoding, errors="strict"):
          # Rewind the stream to its beginning.
          stream.seek(0)
  
@@ -1081,7 +1007,7 @@ class SeekableUnicodeStreamReader(object):
          """The function that is used to decode byte strings into
             unicode strings."""
  
-        self.bytebuffer = b''
+        self.bytebuffer = b""
          """A buffer to use bytes that have been read but have not yet
             been decoded.  This is only used when the final bytes from
             a read do not form a complete encoding for a character."""
@@ -1113,9 +1039,9 @@ class SeekableUnicodeStreamReader(object):
          """The length of the byte order marker at the beginning of
             the stream (or None for no byte order marker)."""
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Read methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def read(self, size=None):
          """
@@ -1131,12 +1057,19 @@ class SeekableUnicodeStreamReader(object):
  
          # If linebuffer is not empty, then include it in the result
          if self.linebuffer:
-            chars = ''.join(self.linebuffer) + chars
+            chars = "".join(self.linebuffer) + chars
              self.linebuffer = None
              self._rewind_numchars = None
  
          return chars
  
+    def discard_line(self):
+        if self.linebuffer and len(self.linebuffer) > 1:
+            line = self.linebuffer.pop(0)
+            self._rewind_numchars += len(line)
+        else:
+            self.stream.readline()
+
      def readline(self, size=None):
          """
          Read a line of text, decode it using this reader's encoding,
@@ -1156,7 +1089,7 @@ class SeekableUnicodeStreamReader(object):
              return line
  
          readsize = size or 72
-        chars = ''
+        chars = ""
  
          # If there's a remaining incomplete line in the buffer, add it.
          if self.linebuffer:
@@ -1169,7 +1102,7 @@ class SeekableUnicodeStreamReader(object):
  
              # If we're at a '\r', then read one extra character, since
              # it might be a '\n', to get the proper line ending.
-            if new_chars and new_chars.endswith('\r'):
+            if new_chars and new_chars.endswith("\r"):
                  new_chars += self._read(1)
  
              chars += new_chars
@@ -1177,8 +1110,7 @@ class SeekableUnicodeStreamReader(object):
              if len(lines) > 1:
                  line = lines[0]
                  self.linebuffer = lines[1:]
-                self._rewind_numchars = (len(new_chars) -
-                                         (len(chars) - len(line)))
+                self._rewind_numchars = len(new_chars) - (len(chars) - len(line))
                  self._rewind_checkpoint = startpos
                  break
              elif len(lines) == 1:
@@ -1224,13 +1156,18 @@ class SeekableUnicodeStreamReader(object):
          """Return self"""
          return self
  
+    def __del__(self):
+        # let garbage collector deal with still opened streams
+        if not self.closed:
+            self.close()
+
      def xreadlines(self):
          """Return self"""
          return self
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Pass-through methods & properties
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      @property
      def closed(self):
@@ -1253,9 +1190,9 @@ class SeekableUnicodeStreamReader(object):
          """
          self.stream.close()
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Seek and tell
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def seek(self, offset, whence=0):
          """
@@ -1270,12 +1207,14 @@ class SeekableUnicodeStreamReader(object):
              typically be negative).
          """
          if whence == 1:
-            raise ValueError('Relative seek is not supported for '
-                             'SeekableUnicodeStreamReader -- consider '
-                             'using char_seek_forward() instead.')
+            raise ValueError(
+                "Relative seek is not supported for "
+                "SeekableUnicodeStreamReader -- consider "
+                "using char_seek_forward() instead."
+            )
          self.stream.seek(offset, whence)
          self.linebuffer = None
-        self.bytebuffer = b''
+        self.bytebuffer = b""
          self._rewind_numchars = None
          self._rewind_checkpoint = self.stream.tell()
  
@@ -1284,7 +1223,7 @@ class SeekableUnicodeStreamReader(object):
          Move the read pointer forward by ``offset`` characters.
          """
          if offset < 0:
-            raise ValueError('Negative offsets are not supported')
+            raise ValueError("Negative offsets are not supported")
          # Clear all buffers.
          self.seek(self.tell())
          # Perform the seek operation.
@@ -1301,7 +1240,7 @@ class SeekableUnicodeStreamReader(object):
          """
          if est_bytes is None:
              est_bytes = offset
-        bytes = b''
+        bytes = b""
  
          while True:
              # Read in a block of bytes.
@@ -1348,11 +1287,11 @@ class SeekableUnicodeStreamReader(object):
          orig_filepos = self.stream.tell()
  
          # Calculate an estimate of where we think the newline is.
-        bytes_read = ((orig_filepos - len(self.bytebuffer)) -
-                      self._rewind_checkpoint)
+        bytes_read = (orig_filepos - len(self.bytebuffer)) - self._rewind_checkpoint
          buf_size = sum(len(line) for line in self.linebuffer)
-        est_bytes = int((bytes_read * self._rewind_numchars /
-                         (self._rewind_numchars + buf_size)))
+        est_bytes = int(
+            (bytes_read * self._rewind_numchars / (self._rewind_numchars + buf_size))
+        )
  
          self.stream.seek(self._rewind_checkpoint)
          self._char_seek_forward(self._rewind_numchars, est_bytes)
@@ -1362,7 +1301,7 @@ class SeekableUnicodeStreamReader(object):
          if self.DEBUG:
              self.stream.seek(filepos)
              check1 = self._incr_decode(self.stream.read(50))[0]
-            check2 = ''.join(self.linebuffer)
+            check2 = "".join(self.linebuffer)
              assert check1.startswith(check2) or check2.startswith(check1)
  
          # Return to our original filepos (so we don't have to throw
@@ -1372,9 +1311,9 @@ class SeekableUnicodeStreamReader(object):
          # Return the calculated filepos
          return filepos
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Helper methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _read(self, size=None):
          """
@@ -1383,7 +1322,7 @@ class SeekableUnicodeStreamReader(object):
          unicode string.  ``linebuffer`` is not included in the result.
          """
          if size == 0:
-            return ''
+            return ""
  
          # Skip past the byte order marker, if present.
          if self._bom and self.stream.tell() == 0:
@@ -1428,15 +1367,15 @@ class SeekableUnicodeStreamReader(object):
          """
          while True:
              try:
-                return self.decode(bytes, 'strict')
+                return self.decode(bytes, "strict")
              except UnicodeDecodeError as exc:
                  # If the exception occurs at the end of the string,
                  # then assume that it's a truncation error.
                  if exc.end == len(bytes):
-                    return self.decode(bytes[:exc.start], self.errors)
+                    return self.decode(bytes[: exc.start], self.errors)
  
                  # Otherwise, if we're being strict, then raise it.
-                elif self.errors == 'strict':
+                elif self.errors == "strict":
                      raise
  
                  # If we're not strict, then re-process it with our
@@ -1445,20 +1384,18 @@ class SeekableUnicodeStreamReader(object):
                      return self.decode(bytes, self.errors)
  
      _BOM_TABLE = {
-        'utf8': [(codecs.BOM_UTF8, None)],
-        'utf16': [(codecs.BOM_UTF16_LE, 'utf16-le'),
-                  (codecs.BOM_UTF16_BE, 'utf16-be')],
-        'utf16le': [(codecs.BOM_UTF16_LE, None)],
-        'utf16be': [(codecs.BOM_UTF16_BE, None)],
-        'utf32': [(codecs.BOM_UTF32_LE, 'utf32-le'),
-                  (codecs.BOM_UTF32_BE, 'utf32-be')],
-        'utf32le': [(codecs.BOM_UTF32_LE, None)],
-        'utf32be': [(codecs.BOM_UTF32_BE, None)],
+        "utf8": [(codecs.BOM_UTF8, None)],
+        "utf16": [(codecs.BOM_UTF16_LE, "utf16-le"), (codecs.BOM_UTF16_BE, "utf16-be")],
+        "utf16le": [(codecs.BOM_UTF16_LE, None)],
+        "utf16be": [(codecs.BOM_UTF16_BE, None)],
+        "utf32": [(codecs.BOM_UTF32_LE, "utf32-le"), (codecs.BOM_UTF32_BE, "utf32-be")],
+        "utf32le": [(codecs.BOM_UTF32_LE, None)],
+        "utf32be": [(codecs.BOM_UTF32_BE, None)],
      }
  
      def _check_bom(self):
          # Normalize our encoding name
-        enc = re.sub('[ -]', '', self.encoding.lower())
+        enc = re.sub("[ -]", "", self.encoding.lower())
  
          # Look up our encoding in the BOM table.
          bom_info = self._BOM_TABLE.get(enc)
@@ -1478,8 +1415,22 @@ class SeekableUnicodeStreamReader(object):
          return None
  
  
-__all__ = ['path', 'PathPointer', 'FileSystemPathPointer', 'BufferedGzipFile',
-           'GzipFileSystemPathPointer', 'GzipFileSystemPathPointer',
-           'find', 'retrieve', 'FORMATS', 'AUTO_FORMATS', 'load',
-           'show_cfg', 'clear_cache', 'LazyLoader', 'OpenOnDemandZipFile',
-           'GzipFileSystemPathPointer', 'SeekableUnicodeStreamReader']
+__all__ = [
+    "path",
+    "PathPointer",
+    "FileSystemPathPointer",
+    "BufferedGzipFile",
+    "GzipFileSystemPathPointer",
+    "GzipFileSystemPathPointer",
+    "find",
+    "retrieve",
+    "FORMATS",
+    "AUTO_FORMATS",
+    "load",
+    "show_cfg",
+    "clear_cache",
+    "LazyLoader",
+    "OpenOnDemandZipFile",
+    "GzipFileSystemPathPointer",
+    "SeekableUnicodeStreamReader",
+]
diff --git a/nlp_resource_data/nltk/data.pyc b/nlp_resource_data/nltk/data.pyc

deleted file mode 100755 (executable)

index fe91094..0000000

Binary files a/nlp_resource_data/nltk/data.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/decorators.py b/nlp_resource_data/nltk/decorators.py

old mode 100755 (executable)

new mode 100644 (file)

index 6350eae..8ab4f7d
--- a/nlp_resource_data/nltk/decorators.py
+++ b/nlp_resource_data/nltk/decorators.py
@@ -5,8 +5,8 @@ http://www.phyast.pitt.edu/~micheles/python/documentation.html
  
  Included in NLTK for its support of a nice memoization decorator.
  """
-from __future__ import print_function
-__docformat__ = 'restructuredtext en'
+
+__docformat__ = "restructuredtext en"
  
  ## The basic trick is to generate the source code for the decorated function
  ## with the right signature and to evaluate it.
@@ -19,15 +19,26 @@ import sys
  
  # Hack to keep NLTK's "tokenize" module from colliding with the "tokenize" in
  # the Python standard library.
-old_sys_path = sys.path[:]
-sys.path = [p for p in sys.path if "nltk" not in p]
+OLD_SYS_PATH = sys.path[:]
+sys.path = [p for p in sys.path if p and "nltk" not in p]
  import inspect
-sys.path = old_sys_path
  
-try:
-    set
-except NameError:
-    from sets import Set as set
+sys.path = OLD_SYS_PATH
+
+def __legacysignature(signature):
+    """
+    For retrocompatibility reasons, we don't use a standard Signature.
+    Instead, we use the string generated by this method.
+    Basically, from a Signature we create a string and remove the default values.
+    """
+    listsignature = str(signature)[1:-1].split(",")
+    for counter, param in enumerate(listsignature):
+        if param.count("=") > 0:
+            listsignature[counter] = param[0:param.index("=")].strip()
+        else:
+            listsignature[counter] = param.strip()
+    return ", ".join(listsignature)
+
  
  def getinfo(func):
      """
@@ -36,6 +47,7 @@ def getinfo(func):
      - argnames (the names of the arguments : list)
      - defaults (the values of the default arguments : tuple)
      - signature (the signature : str)
+    - fullsignature (the full signature : Signature)
      - doc (the docstring : str)
      - module (the module name : str)
      - dict (the function __dict__ : str)
@@ -54,45 +66,57 @@ def getinfo(func):
  
      >>> info["signature"]
      'self, x, y, *args, **kw'
+
+    >>> info["fullsignature"]
+    <Signature (self, x=1, y=2, *args, **kw)>
      """
      assert inspect.ismethod(func) or inspect.isfunction(func)
-    if sys.version_info[0] >= 3:
-        argspec = inspect.getfullargspec(func)
-    else:
-        argspec = inspect.getargspec(func)
-    regargs, varargs, varkwargs, defaults = argspec[:4]
+    argspec = inspect.getfullargspec(func)
+    regargs, varargs, varkwargs = argspec[:3]
      argnames = list(regargs)
      if varargs:
          argnames.append(varargs)
      if varkwargs:
          argnames.append(varkwargs)
-    signature = inspect.formatargspec(regargs, varargs, varkwargs, defaults,
-                                      formatvalue=lambda value: "")[1:-1]
+    fullsignature = inspect.signature(func)
+    # Convert Signature to str
+    signature = __legacysignature(fullsignature)
+
  
      # pypy compatibility
-    if hasattr(func, '__closure__'):
+    if hasattr(func, "__closure__"):
          _closure = func.__closure__
          _globals = func.__globals__
      else:
          _closure = func.func_closure
          _globals = func.func_globals
  
-    return dict(name=func.__name__, argnames=argnames, signature=signature,
-                defaults = func.__defaults__, doc=func.__doc__,
-                module=func.__module__, dict=func.__dict__,
-                globals=_globals, closure=_closure)
+    return dict(
+        name=func.__name__,
+        argnames=argnames,
+        signature=signature,
+        fullsignature=fullsignature,
+        defaults=func.__defaults__,
+        doc=func.__doc__,
+        module=func.__module__,
+        dict=func.__dict__,
+        globals=_globals,
+        closure=_closure,
+    )
+
  
-# akin to functools.update_wrapper
  def update_wrapper(wrapper, model, infodict=None):
+    " akin to functools.update_wrapper "
      infodict = infodict or getinfo(model)
-    wrapper.__name__ = infodict['name']
-    wrapper.__doc__ = infodict['doc']
-    wrapper.__module__ = infodict['module']
-    wrapper.__dict__.update(infodict['dict'])
-    wrapper.__defaults__ = infodict['defaults']
+    wrapper.__name__ = infodict["name"]
+    wrapper.__doc__ = infodict["doc"]
+    wrapper.__module__ = infodict["module"]
+    wrapper.__dict__.update(infodict["dict"])
+    wrapper.__defaults__ = infodict["defaults"]
      wrapper.undecorated = model
      return wrapper
  
+
  def new_wrapper(wrapper, model):
      """
      An improvement over functools.update_wrapper. The wrapper is a generic
@@ -103,17 +127,20 @@ def new_wrapper(wrapper, model):
      """
      if isinstance(model, dict):
          infodict = model
-    else: # assume model is a function
+    else:  # assume model is a function
          infodict = getinfo(model)
-    assert not '_wrapper_' in infodict["argnames"], (
-        '"_wrapper_" is a reserved argument name!')
+    assert (
+        not "_wrapper_" in infodict["argnames"]
+    ), '"_wrapper_" is a reserved argument name!'
      src = "lambda %(signature)s: _wrapper_(%(signature)s)" % infodict
      funcopy = eval(src, dict(_wrapper_=wrapper))
      return update_wrapper(funcopy, model, infodict)
  
+
  # helper used in decorator_factory
  def __call__(self, func):
-    return new_wrapper(lambda *a, **k : self.call(func, *a, **k), func)
+    return new_wrapper(lambda *a, **k: self.call(func, *a, **k), func)
+
  
  def decorator_factory(cls):
      """
@@ -123,15 +150,16 @@ def decorator_factory(cls):
      method.
      """
      attrs = set(dir(cls))
-    if '__call__' in attrs:
-        raise TypeError('You cannot decorate a class with a nontrivial '
-                        '__call__ method')
-    if 'call' not in attrs:
-        raise TypeError('You cannot decorate a class without a '
-                        '.call method')
+    if "__call__" in attrs:
+        raise TypeError(
+            "You cannot decorate a class with a nontrivial " "__call__ method"
+        )
+    if "call" not in attrs:
+        raise TypeError("You cannot decorate a class without a " ".call method")
      cls.__call__ = __call__
      return cls
  
+
  def decorator(caller):
      """
      General purpose decorator factory: takes a caller function as
@@ -164,17 +192,21 @@ def decorator(caller):
      """
      if inspect.isclass(caller):
          return decorator_factory(caller)
-    def _decorator(func): # the real meat is here
+
+    def _decorator(func):  # the real meat is here
          infodict = getinfo(func)
-        argnames = infodict['argnames']
-        assert not ('_call_' in argnames or '_func_' in argnames), (
-            'You cannot use _call_ or _func_ as argument names!')
+        argnames = infodict["argnames"]
+        assert not (
+            "_call_" in argnames or "_func_" in argnames
+        ), "You cannot use _call_ or _func_ as argument names!"
          src = "lambda %(signature)s: _call_(_func_, %(signature)s)" % infodict
          # import sys; print >> sys.stderr, src # for debugging purposes
          dec_func = eval(src, dict(_func_=func, _call_=caller))
          return update_wrapper(dec_func, func, infodict)
+
      return update_wrapper(_decorator, caller)
  
+
  def getattr_(obj, name, default_thunk):
      "Similar to .setdefault in dictionaries."
      try:
@@ -184,16 +216,16 @@ def getattr_(obj, name, default_thunk):
          setattr(obj, name, default)
          return default
  
+
  @decorator
  def memoize(func, *args):
      dic = getattr_(func, "memoize_dic", dict)
      # memoize_dic is created at the first call
      if args in dic:
          return dic[args]
-    else:
-        result = func(*args)
-        dic[args] = result
-        return result
+    result = func(*args)
+    dic[args] = result
+    return result
  
  
  ##########################     LEGALESE    ###############################
diff --git a/nlp_resource_data/nltk/decorators.pyc b/nlp_resource_data/nltk/decorators.pyc

deleted file mode 100755 (executable)

index 5e3bd00..0000000

Binary files a/nlp_resource_data/nltk/decorators.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/downloader.py b/nlp_resource_data/nltk/downloader.py

old mode 100755 (executable)

new mode 100644 (file)

index 452fade..097e574
--- a/nlp_resource_data/nltk/downloader.py
+++ b/nlp_resource_data/nltk/downloader.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Corpus & Model Downloader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -66,8 +66,7 @@ or::
  
      python -m nltk.downloader [-d DATADIR] [-q] [-f] [-k] PACKAGE_IDS
  """
-#----------------------------------------------------------------------
-from __future__ import print_function, division, unicode_literals
+# ----------------------------------------------------------------------
  
  """
  
@@ -159,37 +158,43 @@ they didn't download that model.
  default: unzip or not?
  
  """
-import time, os, zipfile, sys, textwrap, threading, itertools, shutil
+import time, os, zipfile, sys, textwrap, threading, itertools, shutil, functools
+import subprocess
  from hashlib import md5
+from xml.etree import ElementTree
  
  try:
      TKINTER = True
-    from six.moves.tkinter import (Tk, Frame, Label, Entry, Button, Canvas,
-                                   Menu, IntVar, TclError)
-    from six.moves.tkinter_messagebox import showerror
+    from tkinter import (
+        Tk,
+        Frame,
+        Label,
+        Entry,
+        Button,
+        Canvas,
+        Menu,
+        IntVar,
+        TclError,
+    )
+    from tkinter.messagebox import showerror
      from nltk.draw.table import Table
      from nltk.draw.util import ShowText
-except:
+except ImportError:
      TKINTER = False
      TclError = ValueError
  
-from xml.etree import ElementTree
-
-from six import string_types, text_type
-from six.moves import input
-from six.moves.urllib.request import urlopen
-from six.moves.urllib.error import HTTPError, URLError
+from urllib.request import urlopen
+from urllib.error import HTTPError, URLError
  
  import nltk
-from nltk.compat import python_2_unicode_compatible
-#urllib2 = nltk.internals.import_from_stdlib('urllib2')
+
+# urllib2 = nltk.internals.import_from_stdlib('urllib2')
  
  
  ######################################################################
  # Directory entry objects (from the data server's index file)
  ######################################################################
  
-@python_2_unicode_compatible
  class Package(object):
      """
      A directory entry for a downloadable package.  These entries are
@@ -198,13 +203,24 @@ class Package(object):
      that file is a zip file, then it can be automatically decompressed
      when the package is installed.
      """
-    def __init__(self, id, url, name=None, subdir='',
-                 size=None, unzipped_size=None,
-                 checksum=None, svn_revision=None,
-                 copyright='Unknown', contact='Unknown',
-                 license='Unknown', author='Unknown',
-                 unzip=True,
-                 **kw):
+
+    def __init__(
+        self,
+        id,
+        url,
+        name=None,
+        subdir="",
+        size=None,
+        unzipped_size=None,
+        checksum=None,
+        svn_revision=None,
+        copyright="Unknown",
+        contact="Unknown",
+        license="Unknown",
+        author="Unknown",
+        unzip=True,
+        **kw
+    ):
          self.id = id
          """A unique identifier for this package."""
  
@@ -244,13 +260,13 @@ class Package(object):
          self.author = author
          """Author of this package."""
  
-        ext = os.path.splitext(url.split('/')[-1])[1]
-        self.filename = os.path.join(subdir, id+ext)
+        ext = os.path.splitext(url.split("/")[-1])[1]
+        self.filename = os.path.join(subdir, id + ext)
          """The filename that should be used for this package's file.  It
             is formed by joining ``self.subdir`` with ``self.id``, and
             using the same extension as ``url``."""
  
-        self.unzip = bool(int(unzip)) # '0' or '1'
+        self.unzip = bool(int(unzip))  # '0' or '1'
          """A flag indicating whether this corpus should be unzipped by
             default."""
  
@@ -259,25 +275,26 @@ class Package(object):
  
      @staticmethod
      def fromxml(xml):
-        if isinstance(xml, string_types):
+        if isinstance(xml, str):
              xml = ElementTree.parse(xml)
          for key in xml.attrib:
-            xml.attrib[key] = text_type(xml.attrib[key])
+            xml.attrib[key] = str(xml.attrib[key])
          return Package(**xml.attrib)
  
      def __lt__(self, other):
          return self.id < other.id
  
      def __repr__(self):
-        return '<Package %s>' % self.id
+        return "<Package %s>" % self.id
+
  
-@python_2_unicode_compatible
  class Collection(object):
      """
      A directory entry for a collection of downloadable packages.
      These entries are extracted from the XML index file that is
      downloaded by ``Downloader``.
      """
+
      def __init__(self, id, children, name=None, **kw):
          self.id = id
          """A unique identifier for this collection."""
@@ -298,58 +315,103 @@ class Collection(object):
  
      @staticmethod
      def fromxml(xml):
-        if isinstance(xml, string_types):
+        if isinstance(xml, str):
              xml = ElementTree.parse(xml)
          for key in xml.attrib:
-            xml.attrib[key] = text_type(xml.attrib[key])
-        children = [child.get('ref') for child in xml.findall('item')]
+            xml.attrib[key] = str(xml.attrib[key])
+        children = [child.get("ref") for child in xml.findall("item")]
          return Collection(children=children, **xml.attrib)
  
      def __lt__(self, other):
          return self.id < other.id
  
      def __repr__(self):
-        return '<Collection %s>' % self.id
+        return "<Collection %s>" % self.id
+
  
  ######################################################################
  # Message Passing Objects
  ######################################################################
  
+
  class DownloaderMessage(object):
      """A status message object, used by ``incr_download`` to
         communicate its progress."""
+
+
  class StartCollectionMessage(DownloaderMessage):
      """Data server has started working on a collection of packages."""
-    def __init__(self, collection): self.collection = collection
+
+    def __init__(self, collection):
+        self.collection = collection
+
+
  class FinishCollectionMessage(DownloaderMessage):
      """Data server has finished working on a collection of packages."""
-    def __init__(self, collection): self.collection = collection
+
+    def __init__(self, collection):
+        self.collection = collection
+
+
  class StartPackageMessage(DownloaderMessage):
      """Data server has started working on a package."""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class FinishPackageMessage(DownloaderMessage):
      """Data server has finished working on a package."""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class StartDownloadMessage(DownloaderMessage):
      """Data server has started downloading a package."""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class FinishDownloadMessage(DownloaderMessage):
      """Data server has finished downloading a package."""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class StartUnzipMessage(DownloaderMessage):
      """Data server has started unzipping a package."""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class FinishUnzipMessage(DownloaderMessage):
      """Data server has finished unzipping a package."""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class UpToDateMessage(DownloaderMessage):
      """The package download file is already up-to-date"""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class StaleMessage(DownloaderMessage):
      """The package download file is out-of-date or corrupt"""
-    def __init__(self, package): self.package = package
+
+    def __init__(self, package):
+        self.package = package
+
+
  class ErrorMessage(DownloaderMessage):
      """Data server encountered an error"""
+
      def __init__(self, package, message):
          self.package = package
          if isinstance(message, Exception):
@@ -357,57 +419,66 @@ class ErrorMessage(DownloaderMessage):
          else:
              self.message = message
  
+
  class ProgressMessage(DownloaderMessage):
      """Indicates how much progress the data server has made"""
-    def __init__(self, progress): self.progress = progress
+
+    def __init__(self, progress):
+        self.progress = progress
+
+
  class SelectDownloadDirMessage(DownloaderMessage):
      """Indicates what download directory the data server is using"""
-    def __init__(self, download_dir): self.download_dir = download_dir
+
+    def __init__(self, download_dir):
+        self.download_dir = download_dir
+
  
  ######################################################################
  # NLTK Data Server
  ######################################################################
  
+
  class Downloader(object):
      """
      A class used to access the NLTK data server, which can be used to
      download corpora and other data packages.
      """
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Configuration
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
-    INDEX_TIMEOUT = 60*60 # 1 hour
+    INDEX_TIMEOUT = 60 * 60  # 1 hour
      """The amount of time after which the cached copy of the data
         server index will be considered 'stale,' and will be
         re-downloaded."""
  
-    DEFAULT_URL = 'https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml'
+    DEFAULT_URL = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml"
      """The default URL for the NLTK data server's index.  An
         alternative URL can be specified when creating a new
         ``Downloader`` object."""
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Status Constants
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
-    INSTALLED = 'installed'
+    INSTALLED = "installed"
      """A status string indicating that a package or collection is
         installed and up-to-date."""
-    NOT_INSTALLED = 'not installed'
+    NOT_INSTALLED = "not installed"
      """A status string indicating that a package or collection is
         not installed."""
-    STALE = 'out of date'
+    STALE = "out of date"
      """A status string indicating that a package or collection is
         corrupt or out-of-date."""
-    PARTIAL = 'partial'
+    PARTIAL = "partial"
      """A status string indicating that a collection is partially
         installed (i.e., only some of its packages are installed.)"""
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Cosntructor
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def __init__(self, server_index_url=None, download_dir=None):
          self._url = server_index_url or self.DEFAULT_URL
@@ -442,50 +513,69 @@ class Downloader(object):
          if self._download_dir is None:
              self._download_dir = self.default_download_dir()
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Information
-    #/////////////////////////////////////////////////////////////////
-
-    def list(self, download_dir=None, show_packages=True,
-             show_collections=True, header=True, more_prompt=False,
-             skip_installed=False):
-        lines = 0 # for more_prompt
+    # /////////////////////////////////////////////////////////////////
+
+    def list(
+        self,
+        download_dir=None,
+        show_packages=True,
+        show_collections=True,
+        header=True,
+        more_prompt=False,
+        skip_installed=False,
+    ):
+        lines = 0  # for more_prompt
          if download_dir is None:
              download_dir = self._download_dir
-            print('Using default data directory (%s)' % download_dir)
+            print("Using default data directory (%s)" % download_dir)
          if header:
-            print('='*(26+len(self._url)))
-            print(' Data server index for <%s>' % self._url)
-            print('='*(26+len(self._url)))
-            lines += 3 # for more_prompt
+            print("=" * (26 + len(self._url)))
+            print(" Data server index for <%s>" % self._url)
+            print("=" * (26 + len(self._url)))
+            lines += 3  # for more_prompt
          stale = partial = False
  
          categories = []
-        if show_packages: categories.append('packages')
-        if show_collections: categories.append('collections')
+        if show_packages:
+            categories.append("packages")
+        if show_collections:
+            categories.append("collections")
          for category in categories:
-            print('%s:' % category.capitalize())
-            lines += 1 # for more_prompt
+            print("%s:" % category.capitalize())
+            lines += 1  # for more_prompt
              for info in sorted(getattr(self, category)(), key=str):
                  status = self.status(info, download_dir)
-                if status == self.INSTALLED and skip_installed: continue
-                if status == self.STALE: stale = True
-                if status == self.PARTIAL: partial = True
-                prefix = {self.INSTALLED:'*', self.STALE:'-',
-                          self.PARTIAL:'P', self.NOT_INSTALLED: ' '}[status]
-                name = textwrap.fill('-'*27 + (info.name or info.id),
-                                     75, subsequent_indent=27*' ')[27:]
-                print('  [%s] %s %s' % (prefix, info.id.ljust(20, '.'), name))
-                lines += len(name.split('\n')) # for more_prompt
+                if status == self.INSTALLED and skip_installed:
+                    continue
+                if status == self.STALE:
+                    stale = True
+                if status == self.PARTIAL:
+                    partial = True
+                prefix = {
+                    self.INSTALLED: "*",
+                    self.STALE: "-",
+                    self.PARTIAL: "P",
+                    self.NOT_INSTALLED: " ",
+                }[status]
+                name = textwrap.fill(
+                    "-" * 27 + (info.name or info.id), 75, subsequent_indent=27 * " "
+                )[27:]
+                print("  [%s] %s %s" % (prefix, info.id.ljust(20, "."), name))
+                lines += len(name.split("\n"))  # for more_prompt
                  if more_prompt and lines > 20:
                      user_input = input("Hit Enter to continue: ")
-                    if (user_input.lower() in ('x', 'q')): return
+                    if user_input.lower() in ("x", "q"):
+                        return
                      lines = 0
              print()
-        msg = '([*] marks installed packages'
-        if stale: msg += '; [-] marks out-of-date or corrupt packages'
-        if partial: msg += '; [P] marks partially installed collections'
-        print(textwrap.fill(msg+')', subsequent_indent=' ', width=76))
+        msg = "([*] marks installed packages"
+        if stale:
+            msg += "; [-] marks out-of-date or corrupt packages"
+        if partial:
+            msg += "; [P] marks partially installed collections"
+        print(textwrap.fill(msg + ")", subsequent_indent=" ", width=76))
  
      def packages(self):
          self._update_index()
@@ -493,24 +583,22 @@ class Downloader(object):
  
      def corpora(self):
          self._update_index()
-        return [pkg for (id,pkg) in self._packages.items()
-                if pkg.subdir == 'corpora']
+        return [pkg for (id, pkg) in self._packages.items() if pkg.subdir == "corpora"]
  
      def models(self):
          self._update_index()
-        return [pkg for (id,pkg) in self._packages.items()
-                if pkg.subdir != 'corpora']
+        return [pkg for (id, pkg) in self._packages.items() if pkg.subdir != "corpora"]
  
      def collections(self):
          self._update_index()
          return self._collections.values()
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Downloading
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _info_or_id(self, info_or_id):
-        if isinstance(info_or_id, string_types):
+        if isinstance(info_or_id, str):
              return self.info(info_or_id)
          else:
              return info_or_id
@@ -531,16 +619,16 @@ class Downloader(object):
              yield SelectDownloadDirMessage(download_dir)
  
          # If they gave us a list of ids, then download each one.
-        if isinstance(info_or_id, (list,tuple)):
+        if isinstance(info_or_id, (list, tuple)):
              for msg in self._download_list(info_or_id, download_dir, force):
                  yield msg
              return
  
          # Look up the requested collection or package.
-        try: info = self._info_or_id(info_or_id)
+        try:
+            info = self._info_or_id(info_or_id)
          except (IOError, ValueError) as e:
-            yield ErrorMessage(None, 'Error loading %s: %s' %
-                               (info_or_id, e))
+            yield ErrorMessage(None, "Error loading %s: %s" % (info_or_id, e))
              return
  
          # Handle collections.
@@ -556,13 +644,16 @@ class Downloader(object):
                  yield msg
  
      def _num_packages(self, item):
-        if isinstance(item, Package): return 1
-        else: return len(item.packages)
+        if isinstance(item, Package):
+            return 1
+        else:
+            return len(item.packages)
  
      def _download_list(self, items, download_dir, force):
          # Look up the requested items.
          for i in range(len(items)):
-            try: items[i] = self._info_or_id(items[i])
+            try:
+                items[i] = self._info_or_id(items[i])
              except (IOError, ValueError) as e:
                  yield ErrorMessage(items[i], e)
                  return
@@ -572,16 +663,16 @@ class Downloader(object):
          progress = 0
          for i, item in enumerate(items):
              if isinstance(item, Package):
-                delta = 1./num_packages
+                delta = 1.0 / num_packages
              else:
-                delta = len(item.packages)/num_packages
+                delta = len(item.packages) / num_packages
              for msg in self.incr_download(item, download_dir, force):
                  if isinstance(msg, ProgressMessage):
-                    yield ProgressMessage(progress + msg.progress*delta)
+                    yield ProgressMessage(progress + msg.progress * delta)
                  else:
                      yield msg
  
-            progress += 100*delta
+            progress += 100 * delta
  
      def _download_package(self, info, download_dir, force):
          yield StartPackageMessage(info)
@@ -617,25 +708,27 @@ class Downloader(object):
          yield ProgressMessage(5)
          try:
              infile = urlopen(info.url)
-            with open(filepath, 'wb') as outfile:
-                #print info.size
-                num_blocks = max(1, info.size/(1024*16))
+            with open(filepath, "wb") as outfile:
+                num_blocks = max(1, info.size / (1024 * 16))
                  for block in itertools.count():
-                    s = infile.read(1024*16) # 16k blocks.
+                    s = infile.read(1024 * 16)  # 16k blocks.
                      outfile.write(s)
-                    if not s: break
-                    if block % 2 == 0: # how often?
-                        yield ProgressMessage(min(80, 5+75*(block/num_blocks)))
+                    if not s:
+                        break
+                    if block % 2 == 0:  # how often?
+                        yield ProgressMessage(min(80, 5 + 75 * (block / num_blocks)))
              infile.close()
          except IOError as e:
-            yield ErrorMessage(info, 'Error downloading %r from <%s>:'
-                               '\n  %s' % (info.id, info.url, e))
+            yield ErrorMessage(
+                info,
+                "Error downloading %r from <%s>:" "\n  %s" % (info.id, info.url, e),
+            )
              return
          yield FinishDownloadMessage(info)
          yield ProgressMessage(80)
  
          # If it's a zipfile, uncompress it.
-        if info.filename.endswith('.zip'):
+        if info.filename.endswith(".zip"):
              zipdir = os.path.join(download_dir, info.subdir)
              # Unzip if we're unzipping by default; *or* if it's already
              # been unzipped (presumably a previous version).
@@ -649,23 +742,39 @@ class Downloader(object):
  
          yield FinishPackageMessage(info)
  
-    def download(self, info_or_id=None, download_dir=None, quiet=False,
-                 force=False, prefix='[nltk_data] ', halt_on_error=True,
-                 raise_on_error=False):
+    def download(
+        self,
+        info_or_id=None,
+        download_dir=None,
+        quiet=False,
+        force=False,
+        prefix="[nltk_data] ",
+        halt_on_error=True,
+        raise_on_error=False,
+        print_error_to=sys.stderr,
+    ):
+
+        print_to = functools.partial(print, file=print_error_to)
          # If no info or id is given, then use the interactive shell.
          if info_or_id is None:
              # [xx] hmm -- changing self._download_dir here seems like
              # the wrong thing to do.  Maybe the _interactive_download
              # function should make a new copy of self to use?
-            if download_dir is not None: self._download_dir = download_dir
+            if download_dir is not None:
+                self._download_dir = download_dir
              self._interactive_download()
              return True
  
          else:
              # Define a helper function for displaying output:
-            def show(s, prefix2=''):
-                print(textwrap.fill(s, initial_indent=prefix+prefix2,
-                                    subsequent_indent=prefix+prefix2+' '*4))
+            def show(s, prefix2=""):
+                print_to(
+                    textwrap.fill(
+                        s,
+                        initial_indent=prefix + prefix2,
+                        subsequent_indent=prefix + prefix2 + " " * 4,
+                    )
+                )
  
              for msg in self.incr_download(info_or_id, download_dir, force):
                  # Error messages
@@ -677,45 +786,53 @@ class Downloader(object):
                          return False
                      self._errors = True
                      if not quiet:
-                        print("Error installing package. Retry? [n/y/e]")
+                        print_to("Error installing package. Retry? [n/y/e]")
                          choice = input().strip()
-                        if choice in ['y', 'Y']:
-                            if not self.download(msg.package.id, download_dir,
-                                                 quiet, force, prefix,
-                                                 halt_on_error, raise_on_error):
+                        if choice in ["y", "Y"]:
+                            if not self.download(
+                                msg.package.id,
+                                download_dir,
+                                quiet,
+                                force,
+                                prefix,
+                                halt_on_error,
+                                raise_on_error,
+                            ):
                                  return False
-                        elif choice in ['e', 'E']:
+                        elif choice in ["e", "E"]:
                              return False
  
                  # All other messages
                  if not quiet:
                      # Collection downloading messages:
                      if isinstance(msg, StartCollectionMessage):
-                        show('Downloading collection %r' % msg.collection.id)
-                        prefix += '   | '
-                        print(prefix)
+                        show("Downloading collection %r" % msg.collection.id)
+                        prefix += "   | "
+                        print_to(prefix)
                      elif isinstance(msg, FinishCollectionMessage):
-                        print(prefix)
+                        print_to(prefix)
                          prefix = prefix[:-4]
                          if self._errors:
-                            show('Downloaded collection %r with errors' %
-                                 msg.collection.id)
+                            show(
+                                "Downloaded collection %r with errors"
+                                % msg.collection.id
+                            )
                          else:
-                            show('Done downloading collection %s' %
-                                 msg.collection.id)
+                            show("Done downloading collection %s" % msg.collection.id)
  
                      # Package downloading messages:
                      elif isinstance(msg, StartPackageMessage):
-                        show('Downloading package %s to %s...' %
-                             (msg.package.id, download_dir))
+                        show(
+                            "Downloading package %s to %s..."
+                            % (msg.package.id, download_dir)
+                        )
                      elif isinstance(msg, UpToDateMessage):
-                        show('Package %s is already up-to-date!' %
-                             msg.package.id, '  ')
-                    #elif isinstance(msg, StaleMessage):
+                        show("Package %s is already up-to-date!" % msg.package.id, "  ")
+                    # elif isinstance(msg, StaleMessage):
                      #    show('Package %s is out-of-date or corrupt' %
                      #         msg.package.id, '  ')
                      elif isinstance(msg, StartUnzipMessage):
-                        show('Unzipping %s.' % msg.package.filename, '  ')
+                        show("Unzipping %s." % msg.package.filename, "  ")
  
                      # Data directory message:
                      elif isinstance(msg, SelectDownloadDirMessage):
@@ -740,7 +857,8 @@ class Downloader(object):
          or collection.  Status can be one of ``INSTALLED``,
          ``NOT_INSTALLED``, ``STALE``, or ``PARTIAL``.
          """
-        if download_dir is None: download_dir = self._download_dir
+        if download_dir is None:
+            download_dir = self._download_dir
          info = self._info_or_id(info_or_id)
  
          # Handle collections:
@@ -750,8 +868,7 @@ class Downloader(object):
                  return self.STALE
              elif self.PARTIAL in pkg_status:
                  return self.PARTIAL
-            elif (self.INSTALLED in pkg_status and
-                  self.NOT_INSTALLED in pkg_status):
+            elif self.INSTALLED in pkg_status and self.NOT_INSTALLED in pkg_status:
                  return self.PARTIAL
              elif self.NOT_INSTALLED in pkg_status:
                  return self.NOT_INSTALLED
@@ -765,8 +882,7 @@ class Downloader(object):
                  return self._pkg_status(info, filepath)
              else:
                  if info.id not in self._status_cache:
-                    self._status_cache[info.id] = self._pkg_status(info,
-                                                                   filepath)
+                    self._status_cache[info.id] = self._pkg_status(info, filepath)
                  return self._status_cache[info.id]
  
      def _pkg_status(self, info, filepath):
@@ -774,8 +890,10 @@ class Downloader(object):
              return self.NOT_INSTALLED
  
          # Check if the file has the correct size.
-        try: filestat = os.stat(filepath)
-        except OSError: return self.NOT_INSTALLED
+        try:
+            filestat = os.stat(filepath)
+        except OSError:
+            return self.NOT_INSTALLED
          if filestat.st_size != int(info.size):
              return self.STALE
  
@@ -785,23 +903,25 @@ class Downloader(object):
  
          # If it's a zipfile, and it's been at least partially
          # unzipped, then check if it's been fully unzipped.
-        if filepath.endswith('.zip'):
+        if filepath.endswith(".zip"):
              unzipdir = filepath[:-4]
              if not os.path.exists(unzipdir):
-                return self.INSTALLED # but not unzipped -- ok!
+                return self.INSTALLED  # but not unzipped -- ok!
              if not os.path.isdir(unzipdir):
                  return self.STALE
  
-            unzipped_size = sum(os.stat(os.path.join(d, f)).st_size
-                                for d, _, files in os.walk(unzipdir)
-                                for f in files)
+            unzipped_size = sum(
+                os.stat(os.path.join(d, f)).st_size
+                for d, _, files in os.walk(unzipdir)
+                for f in files
+            )
              if unzipped_size != info.unzipped_size:
                  return self.STALE
  
          # Otherwise, everything looks good.
          return self.INSTALLED
  
-    def update(self, quiet=False, prefix='[nltk_data] '):
+    def update(self, quiet=False, prefix="[nltk_data] "):
          """
          Re-download any packages whose status is STALE.
          """
@@ -810,17 +930,20 @@ class Downloader(object):
              if self.status(pkg) == self.STALE:
                  self.download(pkg, quiet=quiet, prefix=prefix)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Index
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _update_index(self, url=None):
          """A helper function that ensures that self._index is
          up-to-date.  If the index is older than self.INDEX_TIMEOUT,
          then download it again."""
          # Check if the index is aleady up-to-date.  If so, do nothing.
-        if not (self._index is None or url is not None or
-                time.time()-self._index_timestamp > self.INDEX_TIMEOUT):
+        if not (
+            self._index is None
+            or url is not None
+            or time.time() - self._index_timestamp > self.INDEX_TIMEOUT
+        ):
              return
  
          # If a URL was specified, then update our URL.
@@ -828,17 +951,18 @@ class Downloader(object):
  
          # Download the index file.
          self._index = nltk.internals.ElementWrapper(
-            ElementTree.parse(urlopen(self._url)).getroot())
+            ElementTree.parse(urlopen(self._url)).getroot()
+        )
          self._index_timestamp = time.time()
  
          # Build a dictionary of packages.
-        packages = [Package.fromxml(p) for p in
-                    self._index.findall('packages/package')]
+        packages = [Package.fromxml(p) for p in self._index.findall("packages/package")]
          self._packages = dict((p.id, p) for p in packages)
  
          # Build a dictionary of collections.
-        collections = [Collection.fromxml(c) for c in
-                       self._index.findall('collections/collection')]
+        collections = [
+            Collection.fromxml(c) for c in self._index.findall("collections/collection")
+        ]
          self._collections = dict((c.id, c) for c in collections)
  
          # Replace identifiers with actual children in collection.children.
@@ -849,7 +973,11 @@ class Downloader(object):
                  elif child_id in self._collections:
                      collection.children[i] = self._collections[child_id]
                  else:
-                    print('removing collection member with no package: {}'.format(child_id))
+                    print(
+                        "removing collection member with no package: {}".format(
+                            child_id
+                        )
+                    )
                      del collection.children[i]
  
          # Fill in collection.packages for each collection.
@@ -881,28 +1009,31 @@ class Downloader(object):
          """Return the ``Package`` or ``Collection`` record for the
             given item."""
          self._update_index()
-        if id in self._packages: return self._packages[id]
-        if id in self._collections: return self._collections[id]
-        raise ValueError('Package %r not found in index' % id)
+        if id in self._packages:
+            return self._packages[id]
+        if id in self._collections:
+            return self._collections[id]
+        raise ValueError("Package %r not found in index" % id)
  
      def xmlinfo(self, id):
          """Return the XML info record for the given item"""
          self._update_index()
-        for package in self._index.findall('packages/package'):
-            if package.get('id') == id:
+        for package in self._index.findall("packages/package"):
+            if package.get("id") == id:
                  return package
-        for collection in self._index.findall('collections/collection'):
-            if collection.get('id') == id:
+        for collection in self._index.findall("collections/collection"):
+            if collection.get("id") == id:
                  return collection
-        raise ValueError('Package %r not found in index' % id)
+        raise ValueError("Package %r not found in index" % id)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # URL & Data Directory
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _get_url(self):
          """The URL for the data server's index file."""
          return self._url
+
      def _set_url(self, url):
          """
          Set a new URL for the data server. If we're unable to contact
@@ -914,6 +1045,7 @@ class Downloader(object):
          except:
              self._url = original_url
              raise
+
      url = property(_get_url, _set_url)
  
      def default_download_dir(self):
@@ -933,28 +1065,27 @@ class Downloader(object):
          ``/usr/lib/nltk_data``, ``/usr/local/lib/nltk_data``, ``~/nltk_data``.
          """
          # Check if we are on GAE where we cannot write into filesystem.
-        if 'APPENGINE_RUNTIME' in os.environ:
+        if "APPENGINE_RUNTIME" in os.environ:
              return
  
          # Check if we have sufficient permissions to install in a
          # variety of system-wide locations.
          for nltkdir in nltk.data.path:
-            if (os.path.exists(nltkdir) and
-                nltk.internals.is_writable(nltkdir)):
+            if os.path.exists(nltkdir) and nltk.internals.is_writable(nltkdir):
                  return nltkdir
  
          # On Windows, use %APPDATA%
-        if sys.platform == 'win32' and 'APPDATA' in os.environ:
-            homedir = os.environ['APPDATA']
+        if sys.platform == "win32" and "APPDATA" in os.environ:
+            homedir = os.environ["APPDATA"]
  
          # Otherwise, install in the user's home directory.
          else:
-            homedir = os.path.expanduser('~/')
-            if homedir == '~/':
+            homedir = os.path.expanduser("~/")
+            if homedir == "~/":
                  raise ValueError("Could not find a default download directory")
  
          # append "nltk_data" to the home directory
-        return os.path.join(homedir, 'nltk_data')
+        return os.path.join(homedir, "nltk_data")
  
      def _get_download_dir(self):
          """
@@ -964,15 +1095,17 @@ class Downloader(object):
          ``download_dir`` argument when calling ``download()``.
          """
          return self._download_dir
+
      def _set_download_dir(self, download_dir):
          self._download_dir = download_dir
          # Clear the status cache.
          self._status_cache.clear()
+
      download_dir = property(_get_download_dir, _set_download_dir)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Interactive Shell
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _interactive_download(self):
          # Try the GUI first; if that doesn't work, try the simple
@@ -985,49 +1118,54 @@ class Downloader(object):
          else:
              DownloaderShell(self).run()
  
+
  class DownloaderShell(object):
      def __init__(self, dataserver):
          self._ds = dataserver
  
      def _simple_interactive_menu(self, *options):
-        print('-'*75)
-        spc = (68 - sum(len(o) for o in options))//(len(options)-1)*' '
-        print('    ' + spc.join(options))
-        #w = 76/len(options)
-        #fmt = '  ' + ('%-'+str(w)+'s')*(len(options)-1) + '%s'
-        #print fmt % options
-        print('-'*75)
+        print("-" * 75)
+        spc = (68 - sum(len(o) for o in options)) // (len(options) - 1) * " "
+        print("    " + spc.join(options))
+        print("-" * 75)
  
      def run(self):
-        print('NLTK Downloader')
+        print("NLTK Downloader")
          while True:
              self._simple_interactive_menu(
-                'd) Download', 'l) List', ' u) Update', 'c) Config', 'h) Help', 'q) Quit')
-            user_input = input('Downloader> ').strip()
-            if not user_input: print(); continue
+                "d) Download",
+                "l) List",
+                " u) Update",
+                "c) Config",
+                "h) Help",
+                "q) Quit",
+            )
+            user_input = input("Downloader> ").strip()
+            if not user_input:
+                print()
+                continue
              command = user_input.lower().split()[0]
              args = user_input.split()[1:]
              try:
-                if command == 'l':
+                if command == "l":
                      print()
-                    self._ds.list(self._ds.download_dir, header=False,
-                                  more_prompt=True)
-                elif command == 'h':
+                    self._ds.list(self._ds.download_dir, header=False, more_prompt=True)
+                elif command == "h":
                      self._simple_interactive_help()
-                elif command == 'c':
+                elif command == "c":
                      self._simple_interactive_config()
-                elif command in ('q', 'x'):
+                elif command in ("q", "x"):
                      return
-                elif command == 'd':
+                elif command == "d":
                      self._simple_interactive_download(args)
-                elif command == 'u':
+                elif command == "u":
                      self._simple_interactive_update()
                  else:
-                    print('Command %r unrecognized' % user_input)
+                    print("Command %r unrecognized" % user_input)
              except HTTPError as e:
-                print('Error reading from server: %s'%e)
+                print("Error reading from server: %s" % e)
              except URLError as e:
-                print('Error connecting to server: %s'%e.reason)
+                print("Error connecting to server: %s" % e.reason)
              # try checking if user_input is a package name, &
              # downloading it?
              print()
@@ -1035,132 +1173,159 @@ class DownloaderShell(object):
      def _simple_interactive_download(self, args):
          if args:
              for arg in args:
-                try: self._ds.download(arg, prefix='    ')
-                except (IOError, ValueError) as e: print(e)
+                try:
+                    self._ds.download(arg, prefix="    ")
+                except (IOError, ValueError) as e:
+                    print(e)
          else:
              while True:
                  print()
-                print('Download which package (l=list; x=cancel)?')
-                user_input = input('  Identifier> ')
-                if user_input.lower()=='l':
-                    self._ds.list(self._ds.download_dir, header=False,
-                                  more_prompt=True, skip_installed=True)
+                print("Download which package (l=list; x=cancel)?")
+                user_input = input("  Identifier> ")
+                if user_input.lower() == "l":
+                    self._ds.list(
+                        self._ds.download_dir,
+                        header=False,
+                        more_prompt=True,
+                        skip_installed=True,
+                    )
                      continue
-                elif user_input.lower() in ('x', 'q', ''):
+                elif user_input.lower() in ("x", "q", ""):
                      return
                  elif user_input:
                      for id in user_input.split():
-                        try: self._ds.download(id, prefix='    ')
-                        except (IOError, ValueError) as e: print(e)
+                        try:
+                            self._ds.download(id, prefix="    ")
+                        except (IOError, ValueError) as e:
+                            print(e)
                      break
  
      def _simple_interactive_update(self):
          while True:
              stale_packages = []
              stale = partial = False
-            for info in sorted(getattr(self._ds, 'packages')(), key=str):
+            for info in sorted(getattr(self._ds, "packages")(), key=str):
                  if self._ds.status(info) == self._ds.STALE:
                      stale_packages.append((info.id, info.name))
  
              print()
              if stale_packages:
-                print('Will update following packages (o=ok; x=cancel)')
+                print("Will update following packages (o=ok; x=cancel)")
                  for pid, pname in stale_packages:
-                    name = textwrap.fill('-'*27 + (pname),
-                                     75, subsequent_indent=27*' ')[27:]
-                    print('  [ ] %s %s' % (pid.ljust(20, '.'), name))
+                    name = textwrap.fill(
+                        "-" * 27 + (pname), 75, subsequent_indent=27 * " "
+                    )[27:]
+                    print("  [ ] %s %s" % (pid.ljust(20, "."), name))
                  print()
  
-                user_input = input('  Identifier> ')
-                if user_input.lower()=='o':
+                user_input = input("  Identifier> ")
+                if user_input.lower() == "o":
                      for pid, pname in stale_packages:
-                        try: self._ds.download(pid, prefix='    ')
-                        except (IOError, ValueError) as e: print(e)
+                        try:
+                            self._ds.download(pid, prefix="    ")
+                        except (IOError, ValueError) as e:
+                            print(e)
                      break
-                elif user_input.lower() in ('x', 'q', ''):
+                elif user_input.lower() in ("x", "q", ""):
                      return
              else:
-                print('Nothing to update.')
+                print("Nothing to update.")
                  return
  
      def _simple_interactive_help(self):
          print()
-        print('Commands:')
-        print('  d) Download a package or collection     u) Update out of date packages')
-        print('  l) List packages & collections          h) Help')
-        print('  c) View & Modify Configuration          q) Quit')
+        print("Commands:")
+        print(
+            "  d) Download a package or collection     u) Update out of date packages"
+        )
+        print("  l) List packages & collections          h) Help")
+        print("  c) View & Modify Configuration          q) Quit")
  
      def _show_config(self):
          print()
-        print('Data Server:')
-        print('  - URL: <%s>' % self._ds.url)
-        print(('  - %d Package Collections Available' %
-               len(self._ds.collections())))
-        print(('  - %d Individual Packages Available' %
-               len(self._ds.packages())))
+        print("Data Server:")
+        print("  - URL: <%s>" % self._ds.url)
+        print(("  - %d Package Collections Available" % len(self._ds.collections())))
+        print(("  - %d Individual Packages Available" % len(self._ds.packages())))
          print()
-        print('Local Machine:')
-        print('  - Data directory: %s' % self._ds.download_dir)
+        print("Local Machine:")
+        print("  - Data directory: %s" % self._ds.download_dir)
  
      def _simple_interactive_config(self):
          self._show_config()
          while True:
              print()
              self._simple_interactive_menu(
-                's) Show Config', 'u) Set Server URL',
-                'd) Set Data Dir', 'm) Main Menu')
-            user_input = input('Config> ').strip().lower()
-            if user_input == 's':
+                "s) Show Config", "u) Set Server URL", "d) Set Data Dir", "m) Main Menu"
+            )
+            user_input = input("Config> ").strip().lower()
+            if user_input == "s":
                  self._show_config()
-            elif user_input == 'd':
-                new_dl_dir = input('  New Directory> ').strip()
-                if new_dl_dir in ('', 'x', 'q', 'X', 'Q'):
-                    print('  Cancelled!')
+            elif user_input == "d":
+                new_dl_dir = input("  New Directory> ").strip()
+                if new_dl_dir in ("", "x", "q", "X", "Q"):
+                    print("  Cancelled!")
                  elif os.path.isdir(new_dl_dir):
                      self._ds.download_dir = new_dl_dir
                  else:
-                    print(('Directory %r not found!  Create it first.' %
-                           new_dl_dir))
-            elif user_input == 'u':
-                new_url = input('  New URL> ').strip()
-                if new_url in ('', 'x', 'q', 'X', 'Q'):
-                    print('  Cancelled!')
+                    print(("Directory %r not found!  Create it first." % new_dl_dir))
+            elif user_input == "u":
+                new_url = input("  New URL> ").strip()
+                if new_url in ("", "x", "q", "X", "Q"):
+                    print("  Cancelled!")
                  else:
-                    if not new_url.startswith(('http://', 'https://')):
-                        new_url = 'http://'+new_url
-                    try: self._ds.url = new_url
+                    if not new_url.startswith(("http://", "https://")):
+                        new_url = "http://" + new_url
+                    try:
+                        self._ds.url = new_url
                      except Exception as e:
-                        print('Error reading <%r>:\n  %s' % (new_url, e))
-            elif user_input == 'm':
+                        print("Error reading <%r>:\n  %s" % (new_url, e))
+            elif user_input == "m":
                  break
  
+
  class DownloaderGUI(object):
      """
      Graphical interface for downloading packages from the NLTK data
      server.
      """
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Column Configuration
-    #/////////////////////////////////////////////////////////////////
-
-    COLUMNS = ['', 'Identifier', 'Name', 'Size', 'Status',
-               'Unzipped Size',
-               'Copyright', 'Contact', 'License', 'Author',
-               'Subdir', 'Checksum']
+    # /////////////////////////////////////////////////////////////////
+
+    COLUMNS = [
+        "",
+        "Identifier",
+        "Name",
+        "Size",
+        "Status",
+        "Unzipped Size",
+        "Copyright",
+        "Contact",
+        "License",
+        "Author",
+        "Subdir",
+        "Checksum",
+    ]
      """A list of the names of columns.  This controls the order in
         which the columns will appear.  If this is edited, then
         ``_package_to_columns()`` may need to be edited to match."""
  
-    COLUMN_WEIGHTS = {'': 0, 'Name': 5, 'Size': 0, 'Status': 0}
+    COLUMN_WEIGHTS = {"": 0, "Name": 5, "Size": 0, "Status": 0}
      """A dictionary specifying how columns should be resized when the
         table is resized.  Columns with weight 0 will not be resized at
         all; and columns with high weight will be resized more.
         Default weight (for columns not explicitly listed) is 1."""
  
-    COLUMN_WIDTHS = {'':1, 'Identifier':20, 'Name':45,
-                     'Size': 10, 'Unzipped Size': 10,
-                     'Status': 12}
+    COLUMN_WIDTHS = {
+        "": 1,
+        "Identifier": 20,
+        "Name": 45,
+        "Size": 10,
+        "Unzipped Size": 10,
+        "Status": 12,
+    }
      """A dictionary specifying how wide each column should be, in
         characters.  The default width (for columns not explicitly
         listed) is specified by ``DEFAULT_COLUMN_WIDTH``."""
@@ -1169,40 +1334,45 @@ class DownloaderGUI(object):
      """The default width for columns that are not explicitly listed
         in ``COLUMN_WIDTHS``."""
  
-    INITIAL_COLUMNS = ['', 'Identifier', 'Name', 'Size', 'Status']
+    INITIAL_COLUMNS = ["", "Identifier", "Name", "Size", "Status"]
      """The set of columns that should be displayed by default."""
  
      # Perform a few import-time sanity checks to make sure that the
      # column configuration variables are defined consistently:
-    for c in COLUMN_WEIGHTS: assert c in COLUMNS
-    for c in COLUMN_WIDTHS: assert c in COLUMNS
-    for c in INITIAL_COLUMNS: assert c in COLUMNS
-
-    #/////////////////////////////////////////////////////////////////
+    for c in COLUMN_WEIGHTS:
+        assert c in COLUMNS
+    for c in COLUMN_WIDTHS:
+        assert c in COLUMNS
+    for c in INITIAL_COLUMNS:
+        assert c in COLUMNS
+
+    # /////////////////////////////////////////////////////////////////
      # Color Configuration
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
-    _BACKDROP_COLOR = ('#000', '#ccc')
+    _BACKDROP_COLOR = ("#000", "#ccc")
  
-    _ROW_COLOR = {Downloader.INSTALLED: ('#afa', '#080'),
-                  Downloader.PARTIAL: ('#ffa', '#880'),
-                  Downloader.STALE: ('#faa', '#800'),
-                  Downloader.NOT_INSTALLED: ('#fff', '#888')}
+    _ROW_COLOR = {
+        Downloader.INSTALLED: ("#afa", "#080"),
+        Downloader.PARTIAL: ("#ffa", "#880"),
+        Downloader.STALE: ("#faa", "#800"),
+        Downloader.NOT_INSTALLED: ("#fff", "#888"),
+    }
  
-    _MARK_COLOR = ('#000', '#ccc')
+    _MARK_COLOR = ("#000", "#ccc")
  
-    #_FRONT_TAB_COLOR = ('#ccf', '#008')
-    #_BACK_TAB_COLOR = ('#88a', '#448')
-    _FRONT_TAB_COLOR = ('#fff', '#45c')
-    _BACK_TAB_COLOR = ('#aaa', '#67a')
+    # _FRONT_TAB_COLOR = ('#ccf', '#008')
+    # _BACK_TAB_COLOR = ('#88a', '#448')
+    _FRONT_TAB_COLOR = ("#fff", "#45c")
+    _BACK_TAB_COLOR = ("#aaa", "#67a")
  
-    _PROGRESS_COLOR = ('#f00', '#aaa')
+    _PROGRESS_COLOR = ("#f00", "#aaa")
  
-    _TAB_FONT = 'helvetica -16 bold'
+    _TAB_FONT = "helvetica -16 bold"
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Constructor
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def __init__(self, dataserver, use_threads=True):
          self._ds = dataserver
@@ -1220,17 +1390,17 @@ class DownloaderGUI(object):
          # A message log.
          self._log_messages = []
          self._log_indent = 0
-        self._log('NLTK Downloader Started!')
+        self._log("NLTK Downloader Started!")
  
          # Create the main window.
          top = self.top = Tk()
-        top.geometry('+50+50')
-        top.title('NLTK Downloader')
+        top.geometry("+50+50")
+        top.title("NLTK Downloader")
          top.configure(background=self._BACKDROP_COLOR[1])
  
          # Set up some bindings now, in case anything goes wrong.
-        top.bind('<Control-q>', self.destroy)
-        top.bind('<Control-x>', self.destroy)
+        top.bind("<Control-q>", self.destroy)
+        top.bind("<Control-x>", self.destroy)
          self._destroyed = False
  
          self._column_vars = {}
@@ -1241,9 +1411,9 @@ class DownloaderGUI(object):
          try:
              self._fill_table()
          except HTTPError as e:
-            showerror('Error reading from server', e)
+            showerror("Error reading from server", e)
          except URLError as e:
-            showerror('Error connecting to server', e.reason)
+            showerror("Error connecting to server", e.reason)
  
          self._show_info()
          self._select_columns()
@@ -1251,126 +1421,149 @@ class DownloaderGUI(object):
  
          # Make sure we get notified when we're destroyed, so we can
          # cancel any download in progress.
-        self._table.bind('<Destroy>', self._destroy)
+        self._table.bind("<Destroy>", self._destroy)
  
      def _log(self, msg):
-        self._log_messages.append('%s %s%s' % (time.ctime(),
-                                     ' | '*self._log_indent, msg))
+        self._log_messages.append(
+            "%s %s%s" % (time.ctime(), " | " * self._log_indent, msg)
+        )
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Internals
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _init_widgets(self):
          # Create the top-level frame structures
-        f1 = Frame(self.top, relief='raised', border=2, padx=8, pady=0)
-        f1.pack(sid='top', expand=True, fill='both')
+        f1 = Frame(self.top, relief="raised", border=2, padx=8, pady=0)
+        f1.pack(sid="top", expand=True, fill="both")
          f1.grid_rowconfigure(2, weight=1)
          f1.grid_columnconfigure(0, weight=1)
-        Frame(f1, height=8).grid(column=0, row=0) # spacer
+        Frame(f1, height=8).grid(column=0, row=0)  # spacer
          tabframe = Frame(f1)
-        tabframe.grid(column=0, row=1, sticky='news')
+        tabframe.grid(column=0, row=1, sticky="news")
          tableframe = Frame(f1)
-        tableframe.grid(column=0, row=2, sticky='news')
+        tableframe.grid(column=0, row=2, sticky="news")
          buttonframe = Frame(f1)
-        buttonframe.grid(column=0, row=3, sticky='news')
-        Frame(f1, height=8).grid(column=0, row=4) # spacer
+        buttonframe.grid(column=0, row=3, sticky="news")
+        Frame(f1, height=8).grid(column=0, row=4)  # spacer
          infoframe = Frame(f1)
-        infoframe.grid(column=0, row=5, sticky='news')
-        Frame(f1, height=8).grid(column=0, row=6) # spacer
-        progressframe = Frame(self.top, padx=3, pady=3,
-                              background=self._BACKDROP_COLOR[1])
-        progressframe.pack(side='bottom', fill='x')
-        self.top['border'] = 0
-        self.top['highlightthickness'] = 0
+        infoframe.grid(column=0, row=5, sticky="news")
+        Frame(f1, height=8).grid(column=0, row=6)  # spacer
+        progressframe = Frame(
+            self.top, padx=3, pady=3, background=self._BACKDROP_COLOR[1]
+        )
+        progressframe.pack(side="bottom", fill="x")
+        self.top["border"] = 0
+        self.top["highlightthickness"] = 0
  
          # Create the tabs
-        self._tab_names = ['Collections', 'Corpora',
-                           'Models', 'All Packages',]
+        self._tab_names = ["Collections", "Corpora", "Models", "All Packages"]
          self._tabs = {}
          for i, tab in enumerate(self._tab_names):
              label = Label(tabframe, text=tab, font=self._TAB_FONT)
-            label.pack(side='left', padx=((i+1)%2)*10)
-            label.bind('<Button-1>', self._select_tab)
+            label.pack(side="left", padx=((i + 1) % 2) * 10)
+            label.bind("<Button-1>", self._select_tab)
              self._tabs[tab.lower()] = label
  
          # Create the table.
-        column_weights = [self.COLUMN_WEIGHTS.get(column, 1)
-                          for column in self.COLUMNS]
-        self._table = Table(tableframe, self.COLUMNS,
-                            column_weights=column_weights,
-                            highlightthickness=0, listbox_height=16,
-                            reprfunc=self._table_reprfunc)
-        self._table.columnconfig(0, foreground=self._MARK_COLOR[0]) # marked
+        column_weights = [self.COLUMN_WEIGHTS.get(column, 1) for column in self.COLUMNS]
+        self._table = Table(
+            tableframe,
+            self.COLUMNS,
+            column_weights=column_weights,
+            highlightthickness=0,
+            listbox_height=16,
+            reprfunc=self._table_reprfunc,
+        )
+        self._table.columnconfig(0, foreground=self._MARK_COLOR[0])  # marked
          for i, column in enumerate(self.COLUMNS):
              width = self.COLUMN_WIDTHS.get(column, self.DEFAULT_COLUMN_WIDTH)
              self._table.columnconfig(i, width=width)
-        self._table.pack(expand=True, fill='both')
+        self._table.pack(expand=True, fill="both")
          self._table.focus()
-        self._table.bind_to_listboxes('<Double-Button-1>',
-                                      self._download)
-        self._table.bind('<space>', self._table_mark)
-        self._table.bind('<Return>', self._download)
-        self._table.bind('<Left>', self._prev_tab)
-        self._table.bind('<Right>', self._next_tab)
-        self._table.bind('<Control-a>', self._mark_all)
+        self._table.bind_to_listboxes("<Double-Button-1>", self._download)
+        self._table.bind("<space>", self._table_mark)
+        self._table.bind("<Return>", self._download)
+        self._table.bind("<Left>", self._prev_tab)
+        self._table.bind("<Right>", self._next_tab)
+        self._table.bind("<Control-a>", self._mark_all)
  
          # Create entry boxes for URL & download_dir
          infoframe.grid_columnconfigure(1, weight=1)
  
-        info = [('url', 'Server Index:', self._set_url),
-                ('download_dir','Download Directory:',self._set_download_dir)]
+        info = [
+            ("url", "Server Index:", self._set_url),
+            ("download_dir", "Download Directory:", self._set_download_dir),
+        ]
          self._info = {}
          for (i, (key, label, callback)) in enumerate(info):
-            Label(infoframe, text=label).grid(column=0, row=i, sticky='e')
-            entry = Entry(infoframe, font='courier', relief='groove',
-                          disabledforeground='black')
+            Label(infoframe, text=label).grid(column=0, row=i, sticky="e")
+            entry = Entry(
+                infoframe, font="courier", relief="groove", disabledforeground="black"
+            )
              self._info[key] = (entry, callback)
-            entry.bind('<Return>', self._info_save)
-            entry.bind('<Button-1>', lambda e,key=key: self._info_edit(key))
-            entry.grid(column=1, row=i, sticky='ew')
+            entry.bind("<Return>", self._info_save)
+            entry.bind("<Button-1>", lambda e, key=key: self._info_edit(key))
+            entry.grid(column=1, row=i, sticky="ew")
  
          # If the user edits url or download_dir, and then clicks outside
          # the entry box, then save their results.
-        self.top.bind('<Button-1>', self._info_save)
+        self.top.bind("<Button-1>", self._info_save)
  
          # Create Download & Refresh buttons.
          self._download_button = Button(
-            buttonframe, text='Download', command=self._download, width=8)
-        self._download_button.pack(side='left')
+            buttonframe, text="Download", command=self._download, width=8
+        )
+        self._download_button.pack(side="left")
          self._refresh_button = Button(
-            buttonframe, text='Refresh', command=self._refresh, width=8)
-        self._refresh_button.pack(side='right')
+            buttonframe, text="Refresh", command=self._refresh, width=8
+        )
+        self._refresh_button.pack(side="right")
  
          # Create Progress bar
-        self._progresslabel = Label(progressframe, text='',
-                                    foreground=self._BACKDROP_COLOR[0],
-                                    background=self._BACKDROP_COLOR[1])
-        self._progressbar = Canvas(progressframe, width=200, height=16,
-                                   background=self._PROGRESS_COLOR[1],
-                                   relief='sunken', border=1)
+        self._progresslabel = Label(
+            progressframe,
+            text="",
+            foreground=self._BACKDROP_COLOR[0],
+            background=self._BACKDROP_COLOR[1],
+        )
+        self._progressbar = Canvas(
+            progressframe,
+            width=200,
+            height=16,
+            background=self._PROGRESS_COLOR[1],
+            relief="sunken",
+            border=1,
+        )
          self._init_progressbar()
-        self._progressbar.pack(side='right')
-        self._progresslabel.pack(side='left')
+        self._progressbar.pack(side="right")
+        self._progresslabel.pack(side="left")
  
      def _init_menu(self):
          menubar = Menu(self.top)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Download', underline=0,
-                             command=self._download, accelerator='Return')
+        filemenu.add_command(
+            label="Download", underline=0, command=self._download, accelerator="Return"
+        )
          filemenu.add_separator()
-        filemenu.add_command(label='Change Server Index', underline=7,
-                             command=lambda: self._info_edit('url'))
-        filemenu.add_command(label='Change Download Directory', underline=0,
-                             command=lambda: self._info_edit('download_dir'))
+        filemenu.add_command(
+            label="Change Server Index",
+            underline=7,
+            command=lambda: self._info_edit("url"),
+        )
+        filemenu.add_command(
+            label="Change Download Directory",
+            underline=0,
+            command=lambda: self._info_edit("download_dir"),
+        )
          filemenu.add_separator()
-        filemenu.add_command(label='Show Log', underline=5,
-                             command=self._show_log)
+        filemenu.add_command(label="Show Log", underline=5, command=self._show_log)
          filemenu.add_separator()
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          # Create a menu to control which columns of the table are
          # shown.  n.b.: we never hide the first two columns (mark and
@@ -1380,34 +1573,38 @@ class DownloaderGUI(object):
              var = IntVar(self.top)
              assert column not in self._column_vars
              self._column_vars[column] = var
-            if column in self.INITIAL_COLUMNS: var.set(1)
-            viewmenu.add_checkbutton(label=column, underline=0, variable=var,
-                                     command=self._select_columns)
-        menubar.add_cascade(label='View', underline=0, menu=viewmenu)
+            if column in self.INITIAL_COLUMNS:
+                var.set(1)
+            viewmenu.add_checkbutton(
+                label=column, underline=0, variable=var, command=self._select_columns
+            )
+        menubar.add_cascade(label="View", underline=0, menu=viewmenu)
  
          # Create a sort menu
          # [xx] this should be selectbuttons; and it should include
          # reversed sorts as options.
          sortmenu = Menu(menubar, tearoff=0)
          for column in self._table.column_names[1:]:
-            sortmenu.add_command(label='Sort by %s' % column,
-                      command=(lambda c=column:
-                               self._table.sort_by(c, 'ascending')))
+            sortmenu.add_command(
+                label="Sort by %s" % column,
+                command=(lambda c=column: self._table.sort_by(c, "ascending")),
+            )
          sortmenu.add_separator()
-        #sortmenu.add_command(label='Descending Sort:')
+        # sortmenu.add_command(label='Descending Sort:')
          for column in self._table.column_names[1:]:
-            sortmenu.add_command(label='Reverse sort by %s' % column,
-                      command=(lambda c=column:
-                               self._table.sort_by(c, 'descending')))
-        menubar.add_cascade(label='Sort', underline=0, menu=sortmenu)
+            sortmenu.add_command(
+                label="Reverse sort by %s" % column,
+                command=(lambda c=column: self._table.sort_by(c, "descending")),
+            )
+        menubar.add_cascade(label="Sort", underline=0, menu=sortmenu)
  
          helpmenu = Menu(menubar, tearoff=0)
-        helpmenu.add_command(label='About', underline=0,
-                             command=self.about)
-        helpmenu.add_command(label='Instructions', underline=0,
-                             command=self.help, accelerator='F1')
-        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
-        self.top.bind('<F1>', self.help)
+        helpmenu.add_command(label="About", underline=0, command=self.about)
+        helpmenu.add_command(
+            label="Instructions", underline=0, command=self.help, accelerator="F1"
+        )
+        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
+        self.top.bind("<F1>", self.help)
  
          self.top.config(menu=menubar)
  
@@ -1423,52 +1620,60 @@ class DownloaderGUI(object):
          try:
              self._fill_table()
          except HTTPError as e:
-            showerror('Error reading from server', e)
+            showerror("Error reading from server", e)
          except URLError as e:
-            showerror('Error connecting to server', e.reason)
+            showerror("Error connecting to server", e.reason)
          self._table.select(0)
  
      def _info_edit(self, info_key):
-        self._info_save() # just in case.
+        self._info_save()  # just in case.
          (entry, callback) = self._info[info_key]
-        entry['state'] = 'normal'
-        entry['relief'] = 'sunken'
+        entry["state"] = "normal"
+        entry["relief"] = "sunken"
          entry.focus()
  
      def _info_save(self, e=None):
          focus = self._table
          for entry, callback in self._info.values():
-            if entry['state'] == 'disabled': continue
-            if e is not None and e.widget is entry and e.keysym != 'Return':
+            if entry["state"] == "disabled":
+                continue
+            if e is not None and e.widget is entry and e.keysym != "Return":
                  focus = entry
              else:
-                entry['state'] = 'disabled'
-                entry['relief'] = 'groove'
+                entry["state"] = "disabled"
+                entry["relief"] = "groove"
                  callback(entry.get())
          focus.focus()
  
      def _table_reprfunc(self, row, col, val):
-        if self._table.column_names[col].endswith('Size'):
-            if isinstance(val, string_types): return '  %s' % val
-            elif val < 1024**2: return '  %.1f KB' % (val/1024.**1)
-            elif val < 1024**3: return '  %.1f MB' % (val/1024.**2)
-            else: return '  %.1f GB' % (val/1024.**3)
+        if self._table.column_names[col].endswith("Size"):
+            if isinstance(val, str):
+                return "  %s" % val
+            elif val < 1024 ** 2:
+                return "  %.1f KB" % (val / 1024.0 ** 1)
+            elif val < 1024 ** 3:
+                return "  %.1f MB" % (val / 1024.0 ** 2)
+            else:
+                return "  %.1f GB" % (val / 1024.0 ** 3)
  
-        if col in (0, ''): return str(val)
-        else: return '  %s' % val
+        if col in (0, ""):
+            return str(val)
+        else:
+            return "  %s" % val
  
      def _set_url(self, url):
-        if url == self._ds.url: return
+        if url == self._ds.url:
+            return
          try:
              self._ds.url = url
              self._fill_table()
          except IOError as e:
-            showerror('Error Setting Server Index', str(e))
+            showerror("Error Setting Server Index", str(e))
          self._show_info()
  
-
      def _set_download_dir(self, download_dir):
-        if self._ds.download_dir == download_dir: return
+        if self._ds.download_dir == download_dir:
+            return
          # check if the dir exists, and if not, ask if we should create it?
  
          # Clear our status cache, & re-check what's installed
@@ -1476,81 +1681,86 @@ class DownloaderGUI(object):
          try:
              self._fill_table()
          except HTTPError as e:
-            showerror('Error reading from server', e)
+            showerror("Error reading from server", e)
          except URLError as e:
-            showerror('Error connecting to server', e.reason)
+            showerror("Error connecting to server", e.reason)
          self._show_info()
  
      def _show_info(self):
-        print('showing info', self._ds.url)
-        for entry,cb in self._info.values():
-            entry['state'] = 'normal'
-            entry.delete(0, 'end')
-        self._info['url'][0].insert(0, self._ds.url)
-        self._info['download_dir'][0].insert(0, self._ds.download_dir)
-        for entry,cb in self._info.values():
-            entry['state'] = 'disabled'
+        print("showing info", self._ds.url)
+        for entry, cb in self._info.values():
+            entry["state"] = "normal"
+            entry.delete(0, "end")
+        self._info["url"][0].insert(0, self._ds.url)
+        self._info["download_dir"][0].insert(0, self._ds.download_dir)
+        for entry, cb in self._info.values():
+            entry["state"] = "disabled"
  
      def _prev_tab(self, *e):
          for i, tab in enumerate(self._tab_names):
              if tab.lower() == self._tab and i > 0:
-                self._tab = self._tab_names[i-1].lower()
+                self._tab = self._tab_names[i - 1].lower()
                  try:
                      return self._fill_table()
                  except HTTPError as e:
-                    showerror('Error reading from server', e)
+                    showerror("Error reading from server", e)
                  except URLError as e:
-                    showerror('Error connecting to server', e.reason)
+                    showerror("Error connecting to server", e.reason)
  
      def _next_tab(self, *e):
          for i, tab in enumerate(self._tab_names):
-            if tab.lower() == self._tab and i < (len(self._tabs)-1):
-                self._tab = self._tab_names[i+1].lower()
+            if tab.lower() == self._tab and i < (len(self._tabs) - 1):
+                self._tab = self._tab_names[i + 1].lower()
                  try:
                      return self._fill_table()
                  except HTTPError as e:
-                    showerror('Error reading from server', e)
+                    showerror("Error reading from server", e)
                  except URLError as e:
-                    showerror('Error connecting to server', e.reason)
+                    showerror("Error connecting to server", e.reason)
  
      def _select_tab(self, event):
-        self._tab = event.widget['text'].lower()
+        self._tab = event.widget["text"].lower()
          try:
              self._fill_table()
          except HTTPError as e:
-            showerror('Error reading from server', e)
+            showerror("Error reading from server", e)
          except URLError as e:
-            showerror('Error connecting to server', e.reason)
+            showerror("Error connecting to server", e.reason)
  
-    _tab = 'collections'
-    #_tab = 'corpora'
+    _tab = "collections"
+    # _tab = 'corpora'
      _rows = None
+
      def _fill_table(self):
          selected_row = self._table.selected_row()
          self._table.clear()
-        if self._tab == 'all packages':
+        if self._tab == "all packages":
              items = self._ds.packages()
-        elif self._tab == 'corpora':
+        elif self._tab == "corpora":
              items = self._ds.corpora()
-        elif self._tab == 'models':
+        elif self._tab == "models":
              items = self._ds.models()
-        elif self._tab == 'collections':
+        elif self._tab == "collections":
              items = self._ds.collections()
          else:
-            assert 0, 'bad tab value %r' % self._tab
+            assert 0, "bad tab value %r" % self._tab
          rows = [self._package_to_columns(item) for item in items]
          self._table.extend(rows)
  
          # Highlight the active tab.
          for tab, label in self._tabs.items():
              if tab == self._tab:
-                label.configure(foreground=self._FRONT_TAB_COLOR[0],
-                                background=self._FRONT_TAB_COLOR[1])
+                label.configure(
+                    foreground=self._FRONT_TAB_COLOR[0],
+                    background=self._FRONT_TAB_COLOR[1],
+                )
              else:
-                label.configure(foreground=self._BACK_TAB_COLOR[0],
-                                background=self._BACK_TAB_COLOR[1])
+                label.configure(
+                    foreground=self._BACK_TAB_COLOR[0],
+                    background=self._BACK_TAB_COLOR[1],
+                )
  
-        self._table.sort_by('Identifier', order='ascending')
+        self._table.sort_by("Identifier", order="ascending")
          self._color_table()
          self._table.select(selected_row)
  
@@ -1559,15 +1769,13 @@ class DownloaderGUI(object):
          # though.  (This is on OS X w/ python 2.5)  The length of
          # delay that's necessary seems to depend on how fast the
          # comptuer is. :-/
-        self.top.after(150, self._table._scrollbar.set,
-                       *self._table._mlb.yview())
-        self.top.after(300, self._table._scrollbar.set,
-                       *self._table._mlb.yview())
+        self.top.after(150, self._table._scrollbar.set, *self._table._mlb.yview())
+        self.top.after(300, self._table._scrollbar.set, *self._table._mlb.yview())
  
      def _update_table_status(self):
          for row_num in range(len(self._table)):
-            status = self._ds.status(self._table[row_num, 'Identifier'])
-            self._table[row_num, 'Status'] = status
+            status = self._ds.status(self._table[row_num, "Identifier"])
+            self._table[row_num, "Status"] = status
          self._color_table()
  
      def _download(self, *e):
@@ -1576,30 +1784,35 @@ class DownloaderGUI(object):
          if self._use_threads:
              return self._download_threaded(*e)
  
-        marked = [self._table[row, 'Identifier']
-                  for row in range(len(self._table))
-                  if self._table[row, 0] != '']
+        marked = [
+            self._table[row, "Identifier"]
+            for row in range(len(self._table))
+            if self._table[row, 0] != ""
+        ]
          selection = self._table.selected_row()
          if not marked and selection is not None:
-            marked = [self._table[selection, 'Identifier']]
+            marked = [self._table[selection, "Identifier"]]
  
          download_iter = self._ds.incr_download(marked, self._ds.download_dir)
          self._log_indent = 0
          self._download_cb(download_iter, marked)
  
-    _DL_DELAY=10
+    _DL_DELAY = 10
+
      def _download_cb(self, download_iter, ids):
-        try: msg = next(download_iter)
+        try:
+            msg = next(download_iter)
          except StopIteration:
-            #self._fill_table(sort=False)
+            # self._fill_table(sort=False)
              self._update_table_status()
              afterid = self.top.after(10, self._show_progress, 0)
-            self._afterid['_download_cb'] = afterid
+            self._afterid["_download_cb"] = afterid
              return
  
          def show(s):
-            self._progresslabel['text'] = s
+            self._progresslabel["text"] = s
              self._log(s)
+
          if isinstance(msg, ProgressMessage):
              self._show_progress(msg.progress)
          elif isinstance(msg, ErrorMessage):
@@ -1607,70 +1820,73 @@ class DownloaderGUI(object):
              if msg.package is not None:
                  self._select(msg.package.id)
              self._show_progress(None)
-            return # halt progress.
+            return  # halt progress.
          elif isinstance(msg, StartCollectionMessage):
-            show('Downloading collection %s' % msg.collection.id)
+            show("Downloading collection %s" % msg.collection.id)
              self._log_indent += 1
          elif isinstance(msg, StartPackageMessage):
-            show('Downloading package %s' % msg.package.id)
+            show("Downloading package %s" % msg.package.id)
          elif isinstance(msg, UpToDateMessage):
-            show('Package %s is up-to-date!' % msg.package.id)
-        #elif isinstance(msg, StaleMessage):
+            show("Package %s is up-to-date!" % msg.package.id)
+        # elif isinstance(msg, StaleMessage):
          #    show('Package %s is out-of-date or corrupt' % msg.package.id)
          elif isinstance(msg, FinishDownloadMessage):
-            show('Finished downloading %r.' % msg.package.id)
+            show("Finished downloading %r." % msg.package.id)
          elif isinstance(msg, StartUnzipMessage):
-            show('Unzipping %s' % msg.package.filename)
+            show("Unzipping %s" % msg.package.filename)
          elif isinstance(msg, FinishCollectionMessage):
              self._log_indent -= 1
-            show('Finished downloading collection %r.' % msg.collection.id)
+            show("Finished downloading collection %r." % msg.collection.id)
              self._clear_mark(msg.collection.id)
          elif isinstance(msg, FinishPackageMessage):
              self._clear_mark(msg.package.id)
-        afterid = self.top.after(self._DL_DELAY, self._download_cb,
-                                 download_iter, ids)
-        self._afterid['_download_cb'] = afterid
+        afterid = self.top.after(self._DL_DELAY, self._download_cb, download_iter, ids)
+        self._afterid["_download_cb"] = afterid
  
      def _select(self, id):
          for row in range(len(self._table)):
-            if self._table[row, 'Identifier'] == id:
+            if self._table[row, "Identifier"] == id:
                  self._table.select(row)
                  return
  
      def _color_table(self):
          # Color rows according to status.
          for row in range(len(self._table)):
-            bg, sbg = self._ROW_COLOR[self._table[row, 'Status']]
-            fg, sfg = ('black', 'white')
-            self._table.rowconfig(row, foreground=fg, selectforeground=sfg,
-                                  background=bg, selectbackground=sbg)
+            bg, sbg = self._ROW_COLOR[self._table[row, "Status"]]
+            fg, sfg = ("black", "white")
+            self._table.rowconfig(
+                row,
+                foreground=fg,
+                selectforeground=sfg,
+                background=bg,
+                selectbackground=sbg,
+            )
              # Color the marked column
-            self._table.itemconfigure(row, 0,
-                                      foreground=self._MARK_COLOR[0],
-                                      background=self._MARK_COLOR[1])
-
+            self._table.itemconfigure(
+                row, 0, foreground=self._MARK_COLOR[0], background=self._MARK_COLOR[1]
+            )
  
      def _clear_mark(self, id):
          for row in range(len(self._table)):
-            if self._table[row, 'Identifier'] == id:
-                self._table[row, 0] = ''
+            if self._table[row, "Identifier"] == id:
+                self._table[row, 0] = ""
  
      def _mark_all(self, *e):
          for row in range(len(self._table)):
-            self._table[row,0] = 'X'
+            self._table[row, 0] = "X"
  
      def _table_mark(self, *e):
          selection = self._table.selected_row()
          if selection >= 0:
-            if self._table[selection][0] != '':
-                self._table[selection,0] = ''
+            if self._table[selection][0] != "":
+                self._table[selection, 0] = ""
              else:
-                self._table[selection,0] = 'X'
+                self._table[selection, 0] = "X"
          self._table.select(delta=1)
  
      def _show_log(self):
-        text = '\n'.join(self._log_messages)
-        ShowText(self.top, 'NLTK Downloader Log', text)
+        text = "\n".join(self._log_messages)
+        ShowText(self.top, "NLTK Downloader Log", text)
  
      def _package_to_columns(self, pkg):
          """
@@ -1679,23 +1895,24 @@ class DownloaderGUI(object):
          """
          row = []
          for column_index, column_name in enumerate(self.COLUMNS):
-            if column_index == 0: # Mark:
-                row.append('')
-            elif column_name == 'Identifier':
+            if column_index == 0:  # Mark:
+                row.append("")
+            elif column_name == "Identifier":
                  row.append(pkg.id)
-            elif column_name == 'Status':
+            elif column_name == "Status":
                  row.append(self._ds.status(pkg))
              else:
-                attr = column_name.lower().replace(' ', '_')
-                row.append(getattr(pkg, attr, 'n/a'))
+                attr = column_name.lower().replace(" ", "_")
+                row.append(getattr(pkg, attr, "n/a"))
          return row
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # External Interface
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def destroy(self, *e):
-        if self._destroyed: return
+        if self._destroyed:
+            return
          self.top.destroy()
          self._destroyed = True
  
@@ -1716,11 +1933,12 @@ class DownloaderGUI(object):
      def mainloop(self, *args, **kwargs):
          self.top.mainloop(*args, **kwargs)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # HELP
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
-    HELP = textwrap.dedent("""\
+    HELP = textwrap.dedent(
+        """\
      This tool can be used to download a variety of corpora and models
      that can be used with NLTK.  Each corpus or model is distributed
      in a single zip file, known as a \"package file.\"  You can
@@ -1745,74 +1963,85 @@ class DownloaderGUI(object):
        [down]\t Select next package
        [left]\t Select previous tab
        [right]\t Select next tab
-    """)
+    """
+    )
  
      def help(self, *e):
          # The default font's not very legible; try using 'fixed' instead.
          try:
-            ShowText(self.top, 'Help: NLTK Dowloader',
-                     self.HELP.strip(), width=75, font='fixed')
+            ShowText(
+                self.top,
+                "Help: NLTK Dowloader",
+                self.HELP.strip(),
+                width=75,
+                font="fixed",
+            )
          except:
-            ShowText(self.top, 'Help: NLTK Downloader',
-                     self.HELP.strip(), width=75)
+            ShowText(self.top, "Help: NLTK Downloader", self.HELP.strip(), width=75)
  
      def about(self, *e):
-        ABOUT = ("NLTK Downloader\n"+
-                 "Written by Edward Loper")
-        TITLE = 'About: NLTK Downloader'
+        ABOUT = "NLTK Downloader\n" + "Written by Edward Loper"
+        TITLE = "About: NLTK Downloader"
          try:
-            from six.moves.tkinter_messagebox import Message
+            from tkinter.messagebox import Message
+
              Message(message=ABOUT, title=TITLE).show()
          except ImportError:
              ShowText(self.top, TITLE, ABOUT)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Progress Bar
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      _gradient_width = 5
+
      def _init_progressbar(self):
          c = self._progressbar
-        width, height = int(c['width']), int(c['height'])
-        for i in range(0, (int(c['width'])*2)//self._gradient_width):
-            c.create_line(i*self._gradient_width+20, -20,
-                          i*self._gradient_width-height-20, height+20,
-                          width=self._gradient_width,
-                          fill='#%02x0000' % (80 + abs(i%6-3)*12))
-        c.addtag_all('gradient')
-        c.itemconfig('gradient', state='hidden')
+        width, height = int(c["width"]), int(c["height"])
+        for i in range(0, (int(c["width"]) * 2) // self._gradient_width):
+            c.create_line(
+                i * self._gradient_width + 20,
+                -20,
+                i * self._gradient_width - height - 20,
+                height + 20,
+                width=self._gradient_width,
+                fill="#%02x0000" % (80 + abs(i % 6 - 3) * 12),
+            )
+        c.addtag_all("gradient")
+        c.itemconfig("gradient", state="hidden")
  
          # This is used to display progress
-        c.addtag_withtag('redbox', c.create_rectangle(
-            0, 0, 0, 0, fill=self._PROGRESS_COLOR[0]))
+        c.addtag_withtag(
+            "redbox", c.create_rectangle(0, 0, 0, 0, fill=self._PROGRESS_COLOR[0])
+        )
  
      def _show_progress(self, percent):
          c = self._progressbar
          if percent is None:
-            c.coords('redbox', 0, 0, 0, 0)
-            c.itemconfig('gradient', state='hidden')
+            c.coords("redbox", 0, 0, 0, 0)
+            c.itemconfig("gradient", state="hidden")
          else:
-            width, height = int(c['width']), int(c['height'])
+            width, height = int(c["width"]), int(c["height"])
              x = percent * int(width) // 100 + 1
-            c.coords('redbox', 0, 0, x, height+1)
+            c.coords("redbox", 0, 0, x, height + 1)
  
      def _progress_alive(self):
          c = self._progressbar
          if not self._downloading:
-            c.itemconfig('gradient', state='hidden')
+            c.itemconfig("gradient", state="hidden")
          else:
-            c.itemconfig('gradient', state='normal')
-            x1, y1, x2, y2 = c.bbox('gradient')
+            c.itemconfig("gradient", state="normal")
+            x1, y1, x2, y2 = c.bbox("gradient")
              if x1 <= -100:
-                c.move('gradient', (self._gradient_width*6)-4, 0)
+                c.move("gradient", (self._gradient_width * 6) - 4, 0)
              else:
-                c.move('gradient', -4, 0)
+                c.move("gradient", -4, 0)
              afterid = self.top.after(200, self._progress_alive)
-            self._afterid['_progress_alive'] = afterid
+            self._afterid["_progress_alive"] = afterid
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Threaded downloader
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _download_threaded(self, *e):
          # If the user tries to start a new download while we're already
@@ -1822,14 +2051,16 @@ class DownloaderGUI(object):
              return
  
          # Change the 'download' button to an 'abort' button.
-        self._download_button['text'] = 'Cancel'
+        self._download_button["text"] = "Cancel"
  
-        marked = [self._table[row, 'Identifier']
-                  for row in range(len(self._table))
-                  if self._table[row, 0] != '']
+        marked = [
+            self._table[row, "Identifier"]
+            for row in range(len(self._table))
+            if self._table[row, 0] != ""
+        ]
          selection = self._table.selected_row()
          if not marked and selection is not None:
-            marked = [self._table[selection, 'Identifier']]
+            marked = [self._table[selection, "Identifier"]]
  
          # Create a new data server object for the download operation,
          # just in case the user modifies our data server during the
@@ -1839,9 +2070,13 @@ class DownloaderGUI(object):
          # Start downloading in a separate thread.
          assert self._download_msg_queue == []
          assert self._download_abort_queue == []
-        self._DownloadThread(ds, marked, self._download_lock,
-                             self._download_msg_queue,
-                             self._download_abort_queue).start()
+        self._DownloadThread(
+            ds,
+            marked,
+            self._download_lock,
+            self._download_msg_queue,
+            self._download_abort_queue,
+        ).start()
  
          # Monitor the download message queue & display its progress.
          self._log_indent = 0
@@ -1855,7 +2090,7 @@ class DownloaderGUI(object):
      def _abort_download(self):
          if self._downloading:
              self._download_lock.acquire()
-            self._download_abort_queue.append('abort')
+            self._download_abort_queue.append("abort")
              self._download_lock.release()
  
      class _DownloadThread(threading.Thread):
@@ -1867,24 +2102,25 @@ class DownloaderGUI(object):
              self.abort = abort
              threading.Thread.__init__(self)
  
-        def run (self):
+        def run(self):
              for msg in self.data_server.incr_download(self.items):
                  self.lock.acquire()
                  self.message_queue.append(msg)
                  # Check if we've been told to kill ourselves:
                  if self.abort:
-                    self.message_queue.append('aborted')
+                    self.message_queue.append("aborted")
                      self.lock.release()
                      return
                  self.lock.release()
              self.lock.acquire()
-            self.message_queue.append('finished')
+            self.message_queue.append("finished")
              self.lock.release()
  
-    _MONITOR_QUEUE_DELAY=100
+    _MONITOR_QUEUE_DELAY = 100
+
      def _monitor_message_queue(self):
          def show(s):
-            self._progresslabel['text'] = s
+            self._progresslabel["text"] = s
              self._log(s)
  
          # Try to acquire the lock; if it's busy, then just try again later.
@@ -1893,20 +2129,20 @@ class DownloaderGUI(object):
          for msg in self._download_msg_queue:
  
              # Done downloading?
-            if msg == 'finished' or msg == 'aborted':
-                #self._fill_table(sort=False)
+            if msg == "finished" or msg == "aborted":
+                # self._fill_table(sort=False)
                  self._update_table_status()
                  self._downloading = False
-                self._download_button['text'] = 'Download'
+                self._download_button["text"] = "Download"
                  del self._download_msg_queue[:]
                  del self._download_abort_queue[:]
                  self._download_lock.release()
-                if msg == 'aborted':
-                    show('Download aborted!')
+                if msg == "aborted":
+                    show("Download aborted!")
                      self._show_progress(None)
                  else:
                      afterid = self.top.after(100, self._show_progress, None)
-                    self._afterid['_monitor_message_queue'] = afterid
+                    self._afterid["_monitor_message_queue"] = afterid
                  return
  
              # All other messages
@@ -1918,27 +2154,27 @@ class DownloaderGUI(object):
                      self._select(msg.package.id)
                  self._show_progress(None)
                  self._downloading = False
-                return # halt progress.
+                return  # halt progress.
              elif isinstance(msg, StartCollectionMessage):
-                show('Downloading collection %r' % msg.collection.id)
+                show("Downloading collection %r" % msg.collection.id)
                  self._log_indent += 1
              elif isinstance(msg, StartPackageMessage):
                  self._ds.clear_status_cache(msg.package.id)
-                show('Downloading package %r' % msg.package.id)
+                show("Downloading package %r" % msg.package.id)
              elif isinstance(msg, UpToDateMessage):
-                show('Package %s is up-to-date!' % msg.package.id)
-            #elif isinstance(msg, StaleMessage):
+                show("Package %s is up-to-date!" % msg.package.id)
+            # elif isinstance(msg, StaleMessage):
              #    show('Package %s is out-of-date or corrupt; updating it' %
              #         msg.package.id)
              elif isinstance(msg, FinishDownloadMessage):
-                show('Finished downloading %r.' % msg.package.id)
+                show("Finished downloading %r." % msg.package.id)
              elif isinstance(msg, StartUnzipMessage):
-                show('Unzipping %s' % msg.package.filename)
+                show("Unzipping %s" % msg.package.filename)
              elif isinstance(msg, FinishUnzipMessage):
-                show('Finished installing %s' % msg.package.id)
+                show("Finished installing %s" % msg.package.id)
              elif isinstance(msg, FinishCollectionMessage):
                  self._log_indent -= 1
-                show('Finished downloading collection %r.' % msg.collection.id)
+                show("Finished downloading collection %r." % msg.collection.id)
                  self._clear_mark(msg.collection.id)
              elif isinstance(msg, FinishPackageMessage):
                  self._update_table_status()
@@ -1948,37 +2184,40 @@ class DownloaderGUI(object):
          # waiting for a good point to abort it, so we don't end up
          # with a partially unzipped package or anything like that).
          if self._download_abort_queue:
-            self._progresslabel['text'] = 'Aborting download...'
+            self._progresslabel["text"] = "Aborting download..."
  
          # Clear the message queue and then release the lock
          del self._download_msg_queue[:]
          self._download_lock.release()
  
          # Check the queue again after MONITOR_QUEUE_DELAY msec.
-        afterid = self.top.after(self._MONITOR_QUEUE_DELAY,
-                                 self._monitor_message_queue)
-        self._afterid['_monitor_message_queue'] = afterid
+        afterid = self.top.after(self._MONITOR_QUEUE_DELAY, self._monitor_message_queue)
+        self._afterid["_monitor_message_queue"] = afterid
+
  
  ######################################################################
  # Helper Functions
  ######################################################################
  # [xx] It may make sense to move these to nltk.internals.
  
+
  def md5_hexdigest(file):
      """
      Calculate and return the MD5 checksum for a given file.
      ``file`` may either be a filename or an open stream.
      """
-    if isinstance(file, string_types):
-        with open(file, 'rb') as infile:
+    if isinstance(file, str):
+        with open(file, "rb") as infile:
              return _md5_hexdigest(infile)
      return _md5_hexdigest(file)
  
+
  def _md5_hexdigest(fp):
      md5_digest = md5()
      while True:
-        block = fp.read(1024*16)  # 16k blocks
-        if not block: break
+        block = fp.read(1024 * 16)  # 16k blocks
+        if not block:
+            break
          md5_digest.update(block)
      return md5_digest.hexdigest()
  
@@ -1995,63 +2234,32 @@ def unzip(filename, root, verbose=True):
          if isinstance(message, ErrorMessage):
              raise Exception(message)
  
+
  def _unzip_iter(filename, root, verbose=True):
      if verbose:
-        sys.stdout.write('Unzipping %s' % os.path.split(filename)[1])
+        sys.stdout.write("Unzipping %s" % os.path.split(filename)[1])
          sys.stdout.flush()
  
-    try: zf = zipfile.ZipFile(filename)
+    try:
+        zf = zipfile.ZipFile(filename)
      except zipfile.error as e:
-        yield ErrorMessage(filename, 'Error with downloaded zip file')
+        yield ErrorMessage(filename, "Error with downloaded zip file")
          return
      except Exception as e:
          yield ErrorMessage(filename, e)
          return
  
-    # Get lists of directories & files
-    namelist = zf.namelist()
-    dirlist = set()
-    for x in namelist:
-        if x.endswith('/'):
-            dirlist.add(x)
-        else:
-            dirlist.add(x.rsplit('/',1)[0] + '/')
-    filelist = [x for x in namelist if not x.endswith('/')]
-
-    # Create the target directory if it doesn't exist
-    if not os.path.exists(root):
-        os.mkdir(root)
-
-    # Create the directory structure
-    for dirname in sorted(dirlist):
-        pieces = dirname[:-1].split('/')
-        for i in range(len(pieces)):
-            dirpath = os.path.join(root, *pieces[:i+1])
-            if not os.path.exists(dirpath):
-                os.mkdir(dirpath)
+    zf.extractall(root)
  
-    # Extract files.
-    for i, filename in enumerate(filelist):
-        filepath = os.path.join(root, *filename.split('/'))
-
-        try:
-            with open(filepath, 'wb') as dstfile, zf.open(filename) as srcfile:
-                shutil.copyfileobj(srcfile, dstfile)
-        except Exception as e:
-            yield ErrorMessage(filename, e)
-            return
-
-        if verbose and (i*10/len(filelist) > (i-1)*10/len(filelist)):
-            sys.stdout.write('.')
-            sys.stdout.flush()
      if verbose:
          print()
  
+
  ######################################################################
  # Index Builder
  ######################################################################
  # This may move to a different file sometime.
-import subprocess, zipfile
+
  
  def build_index(root, base_url):
      """
@@ -2085,44 +2293,47 @@ def build_index(root, base_url):
      """
      # Find all packages.
      packages = []
-    for pkg_xml, zf, subdir in _find_packages(os.path.join(root, 'packages')):
+    for pkg_xml, zf, subdir in _find_packages(os.path.join(root, "packages")):
          zipstat = os.stat(zf.filename)
-        url = '%s/%s/%s' % (base_url, subdir, os.path.split(zf.filename)[1])
+        url = "%s/%s/%s" % (base_url, subdir, os.path.split(zf.filename)[1])
          unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist())
  
          # Fill in several fields of the package xml with calculated values.
-        pkg_xml.set('unzipped_size', '%s' % unzipped_size)
-        pkg_xml.set('size', '%s' % zipstat.st_size)
-        pkg_xml.set('checksum', '%s' % md5_hexdigest(zf.filename))
-        pkg_xml.set('subdir', subdir)
-        #pkg_xml.set('svn_revision', _svn_revision(zf.filename))
-        if not pkg_xml.get('url'):
-            pkg_xml.set('url', url)
+        pkg_xml.set("unzipped_size", "%s" % unzipped_size)
+        pkg_xml.set("size", "%s" % zipstat.st_size)
+        pkg_xml.set("checksum", "%s" % md5_hexdigest(zf.filename))
+        pkg_xml.set("subdir", subdir)
+        # pkg_xml.set('svn_revision', _svn_revision(zf.filename))
+        if not pkg_xml.get("url"):
+            pkg_xml.set("url", url)
  
          # Record the package.
          packages.append(pkg_xml)
  
      # Find all collections
-    collections = list(_find_collections(os.path.join(root, 'collections')))
+    collections = list(_find_collections(os.path.join(root, "collections")))
  
      # Check that all UIDs are unique
      uids = set()
-    for item in packages+collections:
-        if item.get('id') in uids:
-            raise ValueError('Duplicate UID: %s' % item.get('id'))
-        uids.add(item.get('id'))
+    for item in packages + collections:
+        if item.get("id") in uids:
+            raise ValueError("Duplicate UID: %s" % item.get("id"))
+        uids.add(item.get("id"))
  
      # Put it all together
-    top_elt = ElementTree.Element('nltk_data')
-    top_elt.append(ElementTree.Element('packages'))
-    for package in packages: top_elt[0].append(package)
-    top_elt.append(ElementTree.Element('collections'))
-    for collection in collections: top_elt[1].append(collection)
+    top_elt = ElementTree.Element("nltk_data")
+    top_elt.append(ElementTree.Element("packages"))
+    for package in packages:
+        top_elt[0].append(package)
+    top_elt.append(ElementTree.Element("collections"))
+    for collection in collections:
+        top_elt[1].append(collection)
  
      _indent_xml(top_elt)
      return top_elt
  
-def _indent_xml(xml, prefix=''):
+
+def _indent_xml(xml, prefix=""):
      """
      Helper for ``build_index()``: Given an XML ``ElementTree``, modify it
      (and its descendents) ``text`` and ``tail`` attributes to generate
@@ -2130,12 +2341,13 @@ def _indent_xml(xml, prefix=''):
      spaces with respect to its parent.
      """
      if len(xml) > 0:
-        xml.text = (xml.text or '').strip() + '\n' + prefix + '  '
+        xml.text = (xml.text or "").strip() + "\n" + prefix + "  "
          for child in xml:
-            _indent_xml(child, prefix+'  ')
+            _indent_xml(child, prefix + "  ")
          for child in xml[:-1]:
-            child.tail = (child.tail or '').strip() + '\n' + prefix + '  '
-        xml[-1].tail = (xml[-1].tail or '').strip() + '\n' + prefix
+            child.tail = (child.tail or "").strip() + "\n" + prefix + "  "
+        xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix
+
  
  def _check_package(pkg_xml, zipfilename, zf):
      """
@@ -2144,15 +2356,18 @@ def _check_package(pkg_xml, zipfilename, zf):
      """
      # The filename must patch the id given in the XML file.
      uid = os.path.splitext(os.path.split(zipfilename)[1])[0]
-    if pkg_xml.get('id') != uid:
-        raise ValueError('package identifier mismatch (%s vs %s)' %
-                         (pkg_xml.get('id'), uid))
+    if pkg_xml.get("id") != uid:
+        raise ValueError(
+            "package identifier mismatch (%s vs %s)" % (pkg_xml.get("id"), uid)
+        )
  
      # Zip file must expand to a subdir whose name matches uid.
-    if sum( (name!=uid and not name.startswith(uid+'/'))
-            for name in zf.namelist() ):
-        raise ValueError('Zipfile %s.zip does not expand to a single '
-                         'subdirectory %s/' % (uid, uid))
+    if sum((name != uid and not name.startswith(uid + "/")) for name in zf.namelist()):
+        raise ValueError(
+            "Zipfile %s.zip does not expand to a single "
+            "subdirectory %s/" % (uid, uid)
+        )
+
  
  # update for git?
  def _svn_revision(filename):
@@ -2160,15 +2375,20 @@ def _svn_revision(filename):
      Helper for ``build_index()``: Calculate the subversion revision
      number for a given file (by using ``subprocess`` to run ``svn``).
      """
-    p = subprocess.Popen(['svn', 'status', '-v', filename],
-                         stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE)
+    p = subprocess.Popen(
+        ["svn", "status", "-v", filename],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
      (stdout, stderr) = p.communicate()
      if p.returncode != 0 or stderr or not stdout:
-        raise ValueError('Error determining svn_revision for %s: %s' %
-                         (os.path.split(filename)[1], textwrap.fill(stderr)))
+        raise ValueError(
+            "Error determining svn_revision for %s: %s"
+            % (os.path.split(filename)[1], textwrap.fill(stderr))
+        )
      return stdout.split()[2]
  
+
  def _find_collections(root):
      """
      Helper for ``build_index()``: Yield a list of ElementTree.Element
@@ -2177,10 +2397,11 @@ def _find_collections(root):
      packages = []
      for dirname, subdirs, files in os.walk(root):
          for filename in files:
-            if filename.endswith('.xml'):
+            if filename.endswith(".xml"):
                  xmlfile = os.path.join(dirname, filename)
                  yield ElementTree.parse(xmlfile).getroot()
  
+
  def _find_packages(root):
      """
      Helper for ``build_index()``: Yield a list of tuples
@@ -2192,40 +2413,50 @@ def _find_packages(root):
          the package was found (e.g. 'corpora' or 'grammars').
      """
      from nltk.corpus.reader.util import _path_from
+
      # Find all packages.
      packages = []
      for dirname, subdirs, files in os.walk(root):
-        relpath = '/'.join(_path_from(root, dirname))
+        relpath = "/".join(_path_from(root, dirname))
          for filename in files:
-            if filename.endswith('.xml'):
+            if filename.endswith(".xml"):
                  xmlfilename = os.path.join(dirname, filename)
-                zipfilename = xmlfilename[:-4]+'.zip'
-                try: zf = zipfile.ZipFile(zipfilename)
+                zipfilename = xmlfilename[:-4] + ".zip"
+                try:
+                    zf = zipfile.ZipFile(zipfilename)
                  except Exception as e:
-                    raise ValueError('Error reading file %r!\n%s' %
-                                     (zipfilename, e))
-                try: pkg_xml = ElementTree.parse(xmlfilename).getroot()
+                    raise ValueError("Error reading file %r!\n%s" % (zipfilename, e))
+                try:
+                    pkg_xml = ElementTree.parse(xmlfilename).getroot()
                  except Exception as e:
-                    raise ValueError('Error reading file %r!\n%s' %
-                                     (xmlfilename, e))
+                    raise ValueError("Error reading file %r!\n%s" % (xmlfilename, e))
  
                  # Check that the UID matches the filename
                  uid = os.path.split(xmlfilename[:-4])[1]
-                if pkg_xml.get('id') != uid:
-                    raise ValueError('package identifier mismatch (%s '
-                                     'vs %s)' % (pkg_xml.get('id'), uid))
+                if pkg_xml.get("id") != uid:
+                    raise ValueError(
+                        "package identifier mismatch (%s "
+                        "vs %s)" % (pkg_xml.get("id"), uid)
+                    )
  
                  # Check that the zipfile expands to a subdir whose
                  # name matches the uid.
-                if sum( (name!=uid and not name.startswith(uid+'/'))
-                        for name in zf.namelist() ):
-                    raise ValueError('Zipfile %s.zip does not expand to a '
-                                     'single subdirectory %s/' % (uid, uid))
+                if sum(
+                    (name != uid and not name.startswith(uid + "/"))
+                    for name in zf.namelist()
+                ):
+                    raise ValueError(
+                        "Zipfile %s.zip does not expand to a "
+                        "single subdirectory %s/" % (uid, uid)
+                    )
  
                  yield pkg_xml, zf, relpath
          # Don't recurse into svn subdirectories:
-        try: subdirs.remove('.svn')
-        except ValueError: pass
+        try:
+            subdirs.remove(".svn")
+        except ValueError:
+            pass
+
  
  ######################################################################
  # Main:
@@ -2237,42 +2468,81 @@ def _find_packages(root):
  _downloader = Downloader()
  download = _downloader.download
  
+
  def download_shell():
      DownloaderShell(_downloader).run()
  
+
  def download_gui():
      DownloaderGUI(_downloader).mainloop()
  
+
  def update():
      _downloader.update()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      from optparse import OptionParser
+
      parser = OptionParser()
-    parser.add_option("-d", "--dir", dest="dir",
-        help="download package to directory DIR", metavar="DIR")
-    parser.add_option("-q", "--quiet", dest="quiet", action="store_true",
-        default=False, help="work quietly")
-    parser.add_option("-f", "--force", dest="force", action="store_true",
-        default=False, help="download even if already installed")
-    parser.add_option("-e", "--exit-on-error", dest="halt_on_error", action="store_true",
-        default=False, help="exit if an error occurs")
-    parser.add_option("-u", "--url", dest="server_index_url",
-        default=os.environ.get('NLTK_DOWNLOAD_URL'),
-        help="download server index url")
+    parser.add_option(
+        "-d",
+        "--dir",
+        dest="dir",
+        help="download package to directory DIR",
+        metavar="DIR",
+    )
+    parser.add_option(
+        "-q",
+        "--quiet",
+        dest="quiet",
+        action="store_true",
+        default=False,
+        help="work quietly",
+    )
+    parser.add_option(
+        "-f",
+        "--force",
+        dest="force",
+        action="store_true",
+        default=False,
+        help="download even if already installed",
+    )
+    parser.add_option(
+        "-e",
+        "--exit-on-error",
+        dest="halt_on_error",
+        action="store_true",
+        default=False,
+        help="exit if an error occurs",
+    )
+    parser.add_option(
+        "-u",
+        "--url",
+        dest="server_index_url",
+        default=os.environ.get("NLTK_DOWNLOAD_URL"),
+        help="download server index url",
+    )
  
      (options, args) = parser.parse_args()
  
-    downloader = Downloader(server_index_url = options.server_index_url)
+    downloader = Downloader(server_index_url=options.server_index_url)
  
      if args:
          for pkg_id in args:
-            rv = downloader.download(info_or_id=pkg_id, download_dir=options.dir,
-                quiet=options.quiet, force=options.force,
-                halt_on_error=options.halt_on_error)
-            if rv==False and options.halt_on_error:
+            rv = downloader.download(
+                info_or_id=pkg_id,
+                download_dir=options.dir,
+                quiet=options.quiet,
+                force=options.force,
+                halt_on_error=options.halt_on_error,
+            )
+            if rv == False and options.halt_on_error:
                  break
      else:
-        downloader.download(download_dir=options.dir,
-            quiet=options.quiet, force=options.force,
-            halt_on_error=options.halt_on_error)
+        downloader.download(
+            download_dir=options.dir,
+            quiet=options.quiet,
+            force=options.force,
+            halt_on_error=options.halt_on_error,
+        )
diff --git a/nlp_resource_data/nltk/downloader.pyc b/nlp_resource_data/nltk/downloader.pyc

deleted file mode 100755 (executable)

index 9510b13..0000000

Binary files a/nlp_resource_data/nltk/downloader.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/draw/__init__.py b/nlp_resource_data/nltk/draw/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index fdc6678..8e90fd1
--- a/nlp_resource_data/nltk/draw/__init__.py
+++ b/nlp_resource_data/nltk/draw/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: graphical representations package
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
@@ -8,15 +8,20 @@
  
  # Import Tkinter-based modules if Tkinter is installed
  try:
-    from six.moves import tkinter
+    import tkinter
  except ImportError:
      import warnings
-    warnings.warn("nltk.draw package not loaded "
-                  "(please install Tkinter library).")
+
+    warnings.warn("nltk.draw package not loaded " "(please install Tkinter library).")
  else:
      from nltk.draw.cfg import ProductionList, CFGEditor, CFGDemo
-    from nltk.draw.tree import (TreeSegmentWidget, tree_to_treesegment,
-                      TreeWidget, TreeView, draw_trees)
+    from nltk.draw.tree import (
+        TreeSegmentWidget,
+        tree_to_treesegment,
+        TreeWidget,
+        TreeView,
+        draw_trees,
+    )
      from nltk.draw.table import Table
  
  from nltk.draw.dispersion import dispersion_plot
@@ -24,4 +29,5 @@ from nltk.draw.dispersion import dispersion_plot
  # skip doctests from this package
  def setup_module(module):
      from nose import SkipTest
+
      raise SkipTest("nltk.draw examples are not doctests")
diff --git a/nlp_resource_data/nltk/draw/__init__.pyc b/nlp_resource_data/nltk/draw/__init__.pyc

deleted file mode 100755 (executable)

index 94f0ee4..0000000

Binary files a/nlp_resource_data/nltk/draw/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/draw/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f679312

Binary files /dev/null and b/nlp_resource_data/nltk/draw/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/draw/__pycache__/cfg.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/cfg.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9a77630

Binary files /dev/null and b/nlp_resource_data/nltk/draw/__pycache__/cfg.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/draw/__pycache__/dispersion.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/dispersion.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ee174f3

Binary files /dev/null and b/nlp_resource_data/nltk/draw/__pycache__/dispersion.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/draw/__pycache__/table.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/table.cpython-37.pyc

new file mode 100644 (file)

index 0000000..57ffb51

Binary files /dev/null and b/nlp_resource_data/nltk/draw/__pycache__/table.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/draw/__pycache__/tree.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/tree.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d62d51d

Binary files /dev/null and b/nlp_resource_data/nltk/draw/__pycache__/tree.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/draw/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9aee591

Binary files /dev/null and b/nlp_resource_data/nltk/draw/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/draw/cfg.py b/nlp_resource_data/nltk/draw/cfg.py

old mode 100755 (executable)

new mode 100644 (file)

index 3038f9f..9cab511
--- a/nlp_resource_data/nltk/draw/cfg.py
+++ b/nlp_resource_data/nltk/draw/cfg.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: CFG visualization
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -48,41 +48,57 @@ Visualization tools for CFGs.
  
  import re
  
-from six import string_types
-from six.moves.tkinter import (Button, Canvas, Entry, Frame, IntVar, Label,
-                               Scrollbar, Text, Tk, Toplevel)
-
-from nltk.grammar import (CFG, _read_cfg_production,
-                          Nonterminal, nonterminals)
+from tkinter import (
+    Button,
+    Canvas,
+    Entry,
+    Frame,
+    IntVar,
+    Label,
+    Scrollbar,
+    Text,
+    Tk,
+    Toplevel,
+)
+
+from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals
  from nltk.tree import Tree
  from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
-from nltk.draw.util import (CanvasFrame, ColorizedList, ShowText,
-                            SymbolWidget, TextWidget)
+from nltk.draw.util import (
+    CanvasFrame,
+    ColorizedList,
+    ShowText,
+    SymbolWidget,
+    TextWidget,
+)
  
  ######################################################################
  # Production List
  ######################################################################
  
+
  class ProductionList(ColorizedList):
-    ARROW = SymbolWidget.SYMBOLS['rightarrow']
+    ARROW = SymbolWidget.SYMBOLS["rightarrow"]
  
      def _init_colortags(self, textwidget, options):
-        textwidget.tag_config('terminal', foreground='#006000')
-        textwidget.tag_config('arrow', font='symbol', underline='0')
-        textwidget.tag_config('nonterminal', foreground='blue',
-                              font=('helvetica', -12, 'bold'))
+        textwidget.tag_config("terminal", foreground="#006000")
+        textwidget.tag_config("arrow", font="symbol", underline="0")
+        textwidget.tag_config(
+            "nonterminal", foreground="blue", font=("helvetica", -12, "bold")
+        )
  
      def _item_repr(self, item):
          contents = []
-        contents.append(('%s\t' % item.lhs(), 'nonterminal'))
-        contents.append((self.ARROW, 'arrow'))
+        contents.append(("%s\t" % item.lhs(), "nonterminal"))
+        contents.append((self.ARROW, "arrow"))
          for elt in item.rhs():
              if isinstance(elt, Nonterminal):
-                contents.append((' %s' % elt.symbol(), 'nonterminal'))
+                contents.append((" %s" % elt.symbol(), "nonterminal"))
              else:
-                contents.append((' %r' % elt, 'terminal'))
+                contents.append((" %r" % elt, "terminal"))
          return contents
  
+
  ######################################################################
  # CFG Editor
  ######################################################################
@@ -130,6 +146,7 @@ the CFG:
  
  """
  
+
  class CFGEditor(object):
      """
      A dialog window for creating and editing context free grammars.
@@ -140,21 +157,28 @@ class CFGEditor(object):
      - All terminals must be strings consisting of word characters
        and space characters.
      """
+
      # Regular expressions used by _analyze_line.  Precompile them, so
      # we can process the text faster.
-    ARROW = SymbolWidget.SYMBOLS['rightarrow']
-    _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
-    _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
-    _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" +              # LHS
-                                "(->|("+ARROW+"))\s*" +        # arrow
-                                r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$") # RHS
-    _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
-    _BOLD = ('helvetica', -12, 'bold')
+    ARROW = SymbolWidget.SYMBOLS["rightarrow"]
+    _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))")
+    _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*")
+    _PRODUCTION_RE = re.compile(
+        r"(^\s*\w+\s*)"
+        + "(->|("  # LHS
+        + ARROW
+        + "))\s*"
+        + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$"  # arrow
+    )  # RHS
+    _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")")
+    _BOLD = ("helvetica", -12, "bold")
  
      def __init__(self, parent, cfg=None, set_cfg_callback=None):
          self._parent = parent
-        if cfg is not None: self._cfg = cfg
-        else: self._cfg = CFG(Nonterminal('S'), [])
+        if cfg is not None:
+            self._cfg = cfg
+        else:
+            self._cfg = CFG(Nonterminal("S"), [])
          self._set_cfg_callback = set_cfg_callback
  
          self._highlight_matching_nonterminals = 1
@@ -164,149 +188,159 @@ class CFGEditor(object):
          self._init_bindings()
  
          self._init_startframe()
-        self._startframe.pack(side='top', fill='x', expand=0)
+        self._startframe.pack(side="top", fill="x", expand=0)
          self._init_prodframe()
-        self._prodframe.pack(side='top', fill='both', expand=1)
+        self._prodframe.pack(side="top", fill="both", expand=1)
          self._init_buttons()
-        self._buttonframe.pack(side='bottom', fill='x', expand=0)
+        self._buttonframe.pack(side="bottom", fill="x", expand=0)
  
          self._textwidget.focus()
  
      def _init_startframe(self):
          frame = self._startframe = Frame(self._top)
          self._start = Entry(frame)
-        self._start.pack(side='right')
-        Label(frame, text='Start Symbol:').pack(side='right')
-        Label(frame, text='Productions:').pack(side='left')
+        self._start.pack(side="right")
+        Label(frame, text="Start Symbol:").pack(side="right")
+        Label(frame, text="Productions:").pack(side="left")
          self._start.insert(0, self._cfg.start().symbol())
  
      def _init_buttons(self):
          frame = self._buttonframe = Frame(self._top)
-        Button(frame, text='Ok', command=self._ok,
-               underline=0, takefocus=0).pack(side='left')
-        Button(frame, text='Apply', command=self._apply,
-               underline=0, takefocus=0).pack(side='left')
-        Button(frame, text='Reset', command=self._reset,
-               underline=0, takefocus=0,).pack(side='left')
-        Button(frame, text='Cancel', command=self._cancel,
-               underline=0, takefocus=0).pack(side='left')
-        Button(frame, text='Help', command=self._help,
-               underline=0, takefocus=0).pack(side='right')
+        Button(frame, text="Ok", command=self._ok, underline=0, takefocus=0).pack(
+            side="left"
+        )
+        Button(frame, text="Apply", command=self._apply, underline=0, takefocus=0).pack(
+            side="left"
+        )
+        Button(frame, text="Reset", command=self._reset, underline=0, takefocus=0).pack(
+            side="left"
+        )
+        Button(
+            frame, text="Cancel", command=self._cancel, underline=0, takefocus=0
+        ).pack(side="left")
+        Button(frame, text="Help", command=self._help, underline=0, takefocus=0).pack(
+            side="right"
+        )
  
      def _init_bindings(self):
-        self._top.title('CFG Editor')
-        self._top.bind('<Control-q>', self._cancel)
-        self._top.bind('<Alt-q>', self._cancel)
-        self._top.bind('<Control-d>', self._cancel)
-        #self._top.bind('<Control-x>', self._cancel)
-        self._top.bind('<Alt-x>', self._cancel)
-        self._top.bind('<Escape>', self._cancel)
-        #self._top.bind('<Control-c>', self._cancel)
-        self._top.bind('<Alt-c>', self._cancel)
-
-        self._top.bind('<Control-o>', self._ok)
-        self._top.bind('<Alt-o>', self._ok)
-        self._top.bind('<Control-a>', self._apply)
-        self._top.bind('<Alt-a>', self._apply)
-        self._top.bind('<Control-r>', self._reset)
-        self._top.bind('<Alt-r>', self._reset)
-        self._top.bind('<Control-h>', self._help)
-        self._top.bind('<Alt-h>', self._help)
-        self._top.bind('<F1>', self._help)
+        self._top.title("CFG Editor")
+        self._top.bind("<Control-q>", self._cancel)
+        self._top.bind("<Alt-q>", self._cancel)
+        self._top.bind("<Control-d>", self._cancel)
+        # self._top.bind('<Control-x>', self._cancel)
+        self._top.bind("<Alt-x>", self._cancel)
+        self._top.bind("<Escape>", self._cancel)
+        # self._top.bind('<Control-c>', self._cancel)
+        self._top.bind("<Alt-c>", self._cancel)
+
+        self._top.bind("<Control-o>", self._ok)
+        self._top.bind("<Alt-o>", self._ok)
+        self._top.bind("<Control-a>", self._apply)
+        self._top.bind("<Alt-a>", self._apply)
+        self._top.bind("<Control-r>", self._reset)
+        self._top.bind("<Alt-r>", self._reset)
+        self._top.bind("<Control-h>", self._help)
+        self._top.bind("<Alt-h>", self._help)
+        self._top.bind("<F1>", self._help)
  
      def _init_prodframe(self):
          self._prodframe = Frame(self._top)
  
          # Create the basic Text widget & scrollbar.
-        self._textwidget = Text(self._prodframe, background='#e0e0e0',
-                                exportselection=1)
-        self._textscroll = Scrollbar(self._prodframe, takefocus=0,
-                                     orient='vertical')
-        self._textwidget.config(yscrollcommand = self._textscroll.set)
+        self._textwidget = Text(
+            self._prodframe, background="#e0e0e0", exportselection=1
+        )
+        self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient="vertical")
+        self._textwidget.config(yscrollcommand=self._textscroll.set)
          self._textscroll.config(command=self._textwidget.yview)
-        self._textscroll.pack(side='right', fill='y')
-        self._textwidget.pack(expand=1, fill='both', side='left')
+        self._textscroll.pack(side="right", fill="y")
+        self._textwidget.pack(expand=1, fill="both", side="left")
  
          # Initialize the colorization tags.  Each nonterminal gets its
          # own tag, so they aren't listed here.
-        self._textwidget.tag_config('terminal', foreground='#006000')
-        self._textwidget.tag_config('arrow', font='symbol')
-        self._textwidget.tag_config('error', background='red')
+        self._textwidget.tag_config("terminal", foreground="#006000")
+        self._textwidget.tag_config("arrow", font="symbol")
+        self._textwidget.tag_config("error", background="red")
  
          # Keep track of what line they're on.  We use that to remember
          # to re-analyze a line whenever they leave it.
          self._linenum = 0
  
          # Expand "->" to an arrow.
-        self._top.bind('>', self._replace_arrows)
+        self._top.bind(">", self._replace_arrows)
  
          # Re-colorize lines when appropriate.
-        self._top.bind('<<Paste>>', self._analyze)
-        self._top.bind('<KeyPress>', self._check_analyze)
-        self._top.bind('<ButtonPress>', self._check_analyze)
+        self._top.bind("<<Paste>>", self._analyze)
+        self._top.bind("<KeyPress>", self._check_analyze)
+        self._top.bind("<ButtonPress>", self._check_analyze)
  
          # Tab cycles focus. (why doesn't this work??)
          def cycle(e, textwidget=self._textwidget):
              textwidget.tk_focusNext().focus()
-        self._textwidget.bind('<Tab>', cycle)
-
-        prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
-        for i in range(len(prod_tuples)-1,0,-1):
-            if (prod_tuples[i][0] == prod_tuples[i-1][0]):
-                if () in prod_tuples[i][1]: continue
-                if () in prod_tuples[i-1][1]: continue
-                print(prod_tuples[i-1][1])
+
+        self._textwidget.bind("<Tab>", cycle)
+
+        prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()]
+        for i in range(len(prod_tuples) - 1, 0, -1):
+            if prod_tuples[i][0] == prod_tuples[i - 1][0]:
+                if () in prod_tuples[i][1]:
+                    continue
+                if () in prod_tuples[i - 1][1]:
+                    continue
+                print(prod_tuples[i - 1][1])
                  print(prod_tuples[i][1])
-                prod_tuples[i-1][1].extend(prod_tuples[i][1])
+                prod_tuples[i - 1][1].extend(prod_tuples[i][1])
                  del prod_tuples[i]
  
          for lhs, rhss in prod_tuples:
              print(lhs, rhss)
-            s = '%s ->' % lhs
+            s = "%s ->" % lhs
              for rhs in rhss:
                  for elt in rhs:
-                    if isinstance(elt, Nonterminal): s += ' %s' % elt
-                    else: s += ' %r' % elt
-                s += ' |'
-            s = s[:-2] + '\n'
-            self._textwidget.insert('end', s)
+                    if isinstance(elt, Nonterminal):
+                        s += " %s" % elt
+                    else:
+                        s += " %r" % elt
+                s += " |"
+            s = s[:-2] + "\n"
+            self._textwidget.insert("end", s)
  
          self._analyze()
  
-#         # Add the producitons to the text widget, and colorize them.
-#         prod_by_lhs = {}
-#         for prod in self._cfg.productions():
-#             if len(prod.rhs()) > 0:
-#                 prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
-#         for (lhs, prods) in prod_by_lhs.items():
-#             self._textwidget.insert('end', '%s ->' % lhs)
-#             self._textwidget.insert('end', self._rhs(prods[0]))
-#             for prod in prods[1:]:
-#                 print '\t|'+self._rhs(prod),
-#                 self._textwidget.insert('end', '\t|'+self._rhs(prod))
-#             print
-#             self._textwidget.insert('end', '\n')
-#         for prod in self._cfg.productions():
-#             if len(prod.rhs()) == 0:
-#                 self._textwidget.insert('end', '%s' % prod)
-#         self._analyze()
-
-#     def _rhs(self, prod):
-#         s = ''
-#         for elt in prod.rhs():
-#             if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
-#             else: s += ' %r' % elt
-#         return s
+    #         # Add the producitons to the text widget, and colorize them.
+    #         prod_by_lhs = {}
+    #         for prod in self._cfg.productions():
+    #             if len(prod.rhs()) > 0:
+    #                 prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
+    #         for (lhs, prods) in prod_by_lhs.items():
+    #             self._textwidget.insert('end', '%s ->' % lhs)
+    #             self._textwidget.insert('end', self._rhs(prods[0]))
+    #             for prod in prods[1:]:
+    #                 print '\t|'+self._rhs(prod),
+    #                 self._textwidget.insert('end', '\t|'+self._rhs(prod))
+    #             print
+    #             self._textwidget.insert('end', '\n')
+    #         for prod in self._cfg.productions():
+    #             if len(prod.rhs()) == 0:
+    #                 self._textwidget.insert('end', '%s' % prod)
+    #         self._analyze()
+
+    #     def _rhs(self, prod):
+    #         s = ''
+    #         for elt in prod.rhs():
+    #             if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
+    #             else: s += ' %r' % elt
+    #         return s
  
      def _clear_tags(self, linenum):
          """
          Remove all tags (except ``arrow`` and ``sel``) from the given
          line of the text widget used for editing the productions.
          """
-        start = '%d.0'%linenum
-        end = '%d.end'%linenum
+        start = "%d.0" % linenum
+        end = "%d.end" % linenum
          for tag in self._textwidget.tag_names():
-            if tag not in ('arrow', 'sel'):
+            if tag not in ("arrow", "sel"):
                  self._textwidget.tag_remove(tag, start, end)
  
      def _check_analyze(self, *e):
@@ -315,7 +349,7 @@ class CFGEditor(object):
          all colorization from the line we moved to, and re-colorize
          the line that we moved from.
          """
-        linenum = int(self._textwidget.index('insert').split('.')[0])
+        linenum = int(self._textwidget.index("insert").split(".")[0])
          if linenum != self._linenum:
              self._clear_tags(linenum)
              self._analyze_line(self._linenum)
@@ -327,20 +361,21 @@ class CFGEditor(object):
          symbol font).  This searches the whole buffer, but is fast
          enough to be done anytime they press '>'.
          """
-        arrow = '1.0'
+        arrow = "1.0"
          while True:
-            arrow = self._textwidget.search('->', arrow, 'end+1char')
-            if arrow == '': break
-            self._textwidget.delete(arrow, arrow+'+2char')
-            self._textwidget.insert(arrow, self.ARROW, 'arrow')
-            self._textwidget.insert(arrow, '\t')
-
-        arrow = '1.0'
+            arrow = self._textwidget.search("->", arrow, "end+1char")
+            if arrow == "":
+                break
+            self._textwidget.delete(arrow, arrow + "+2char")
+            self._textwidget.insert(arrow, self.ARROW, "arrow")
+            self._textwidget.insert(arrow, "\t")
+
+        arrow = "1.0"
          while True:
-            arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
-                                            'end+1char')
-            if arrow == '': break
-            self._textwidget.tag_add('arrow', arrow, arrow+'+1char')
+            arrow = self._textwidget.search(self.ARROW, arrow + "+1char", "end+1char")
+            if arrow == "":
+                break
+            self._textwidget.tag_add("arrow", arrow, arrow + "+1char")
  
      def _analyze_token(self, match, linenum):
          """
@@ -350,31 +385,35 @@ class CFGEditor(object):
          the line).
          """
          # What type of token is it?
-        if match.group()[0] in "'\"": tag = 'terminal'
-        elif match.group() in ('->', self.ARROW): tag = 'arrow'
+        if match.group()[0] in "'\"":
+            tag = "terminal"
+        elif match.group() in ("->", self.ARROW):
+            tag = "arrow"
          else:
              # If it's a nonterminal, then set up new bindings, so we
              # can highlight all instances of that nonterminal when we
              # put the mouse over it.
-            tag = 'nonterminal_'+match.group()
+            tag = "nonterminal_" + match.group()
              if tag not in self._textwidget.tag_names():
                  self._init_nonterminal_tag(tag)
  
-        start = '%d.%d' % (linenum, match.start())
-        end = '%d.%d' % (linenum, match.end())
+        start = "%d.%d" % (linenum, match.start())
+        end = "%d.%d" % (linenum, match.end())
          self._textwidget.tag_add(tag, start, end)
  
-    def _init_nonterminal_tag(self, tag, foreground='blue'):
-        self._textwidget.tag_config(tag, foreground=foreground,
-                                    font=CFGEditor._BOLD)
+    def _init_nonterminal_tag(self, tag, foreground="blue"):
+        self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD)
          if not self._highlight_matching_nonterminals:
              return
+
          def enter(e, textwidget=self._textwidget, tag=tag):
-            textwidget.tag_config(tag, background='#80ff80')
+            textwidget.tag_config(tag, background="#80ff80")
+
          def leave(e, textwidget=self._textwidget, tag=tag):
-            textwidget.tag_config(tag, background='')
-        self._textwidget.tag_bind(tag, '<Enter>', enter)
-        self._textwidget.tag_bind(tag, '<Leave>', leave)
+            textwidget.tag_config(tag, background="")
+
+        self._textwidget.tag_bind(tag, "<Enter>", enter)
+        self._textwidget.tag_bind(tag, "<Leave>", leave)
  
      def _analyze_line(self, linenum):
          """
@@ -384,7 +423,7 @@ class CFGEditor(object):
          self._clear_tags(linenum)
  
          # Get the line line's text string.
-        line = self._textwidget.get(repr(linenum)+'.0', repr(linenum)+'.end')
+        line = self._textwidget.get(repr(linenum) + ".0", repr(linenum) + ".end")
  
          # If it's a valid production, then colorize each token.
          if CFGEditor._PRODUCTION_RE.match(line):
@@ -392,9 +431,10 @@ class CFGEditor(object):
              # and call analyze_token on each token.
              def analyze_token(match, self=self, linenum=linenum):
                  self._analyze_token(match, linenum)
-                return ''
+                return ""
+
              CFGEditor._TOKEN_RE.sub(analyze_token, line)
-        elif line.strip() != '':
+        elif line.strip() != "":
              # It's invalid; show the user where the error is.
              self._mark_error(linenum, line)
  
@@ -405,30 +445,30 @@ class CFGEditor(object):
          arrowmatch = CFGEditor._ARROW_RE.search(line)
          if not arrowmatch:
              # If there's no arrow at all, highlight the whole line.
-            start = '%d.0' % linenum
-            end = '%d.end' % linenum
+            start = "%d.0" % linenum
+            end = "%d.end" % linenum
          elif not CFGEditor._LHS_RE.match(line):
              # Otherwise, if the LHS is bad, highlight it.
-            start = '%d.0' % linenum
-            end = '%d.%d' % (linenum, arrowmatch.start())
+            start = "%d.0" % linenum
+            end = "%d.%d" % (linenum, arrowmatch.start())
          else:
              # Otherwise, highlight the RHS.
-            start = '%d.%d' % (linenum, arrowmatch.end())
-            end = '%d.end' % linenum
+            start = "%d.%d" % (linenum, arrowmatch.end())
+            end = "%d.end" % linenum
  
          # If we're highlighting 0 chars, highlight the whole line.
-        if self._textwidget.compare(start, '==', end):
-            start = '%d.0' % linenum
-            end = '%d.end' % linenum
-        self._textwidget.tag_add('error', start, end)
+        if self._textwidget.compare(start, "==", end):
+            start = "%d.0" % linenum
+            end = "%d.end" % linenum
+        self._textwidget.tag_add("error", start, end)
  
      def _analyze(self, *e):
          """
          Replace ``->`` with arrows, and colorize the entire buffer.
          """
          self._replace_arrows()
-        numlines = int(self._textwidget.index('end').split('.')[0])
-        for linenum in range(1, numlines+1):  # line numbers start at 1.
+        numlines = int(self._textwidget.index("end").split(".")[0])
+        for linenum in range(1, numlines + 1):  # line numbers start at 1.
              self._analyze_line(linenum)
  
      def _parse_productions(self):
@@ -439,36 +479,38 @@ class CFGEditor(object):
          productions = []
  
          # Get the text, normalize it, and split it into lines.
-        text = self._textwidget.get('1.0', 'end')
-        text = re.sub(self.ARROW, '->', text)
-        text = re.sub('\t', ' ', text)
-        lines = text.split('\n')
+        text = self._textwidget.get("1.0", "end")
+        text = re.sub(self.ARROW, "->", text)
+        text = re.sub("\t", " ", text)
+        lines = text.split("\n")
  
          # Convert each line to a CFG production
          for line in lines:
              line = line.strip()
-            if line=='': continue
+            if line == "":
+                continue
              productions += _read_cfg_production(line)
-            #if line.strip() == '': continue
-            #if not CFGEditor._PRODUCTION_RE.match(line):
+            # if line.strip() == '': continue
+            # if not CFGEditor._PRODUCTION_RE.match(line):
              #    raise ValueError('Bad production string %r' % line)
              #
-            #(lhs_str, rhs_str) = line.split('->')
-            #lhs = Nonterminal(lhs_str.strip())
-            #rhs = []
-            #def parse_token(match, rhs=rhs):
+            # (lhs_str, rhs_str) = line.split('->')
+            # lhs = Nonterminal(lhs_str.strip())
+            # rhs = []
+            # def parse_token(match, rhs=rhs):
              #    token = match.group()
              #    if token[0] in "'\"": rhs.append(token[1:-1])
              #    else: rhs.append(Nonterminal(token))
              #    return ''
-            #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
+            # CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
              #
-            #productions.append(Production(lhs, *rhs))
+            # productions.append(Production(lhs, *rhs))
  
          return productions
  
      def _destroy(self, *e):
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
@@ -484,31 +526,44 @@ class CFGEditor(object):
              self._set_cfg_callback(cfg)
  
      def _reset(self, *e):
-        self._textwidget.delete('1.0', 'end')
+        self._textwidget.delete("1.0", "end")
          for production in self._cfg.productions():
-            self._textwidget.insert('end', '%s\n' % production)
+            self._textwidget.insert("end", "%s\n" % production)
          self._analyze()
          if self._set_cfg_callback is not None:
              self._set_cfg_callback(self._cfg)
  
      def _cancel(self, *e):
-        try: self._reset()
-        except: pass
+        try:
+            self._reset()
+        except:
+            pass
          self._destroy()
  
      def _help(self, *e):
          # The default font's not very legible; try using 'fixed' instead.
          try:
-            ShowText(self._parent, 'Help: Chart Parser Demo',
-                     (_CFGEditor_HELP).strip(), width=75, font='fixed')
+            ShowText(
+                self._parent,
+                "Help: Chart Parser Demo",
+                (_CFGEditor_HELP).strip(),
+                width=75,
+                font="fixed",
+            )
          except:
-            ShowText(self._parent, 'Help: Chart Parser Demo',
-                     (_CFGEditor_HELP).strip(), width=75)
+            ShowText(
+                self._parent,
+                "Help: Chart Parser Demo",
+                (_CFGEditor_HELP).strip(),
+                width=75,
+            )
+
  
  ######################################################################
  # New Demo (built tree based on cfg)
  ######################################################################
  
+
  class CFGDemo(object):
      def __init__(self, grammar, text):
          self._grammar = grammar
@@ -516,62 +571,64 @@ class CFGDemo(object):
  
          # Set up the main window.
          self._top = Tk()
-        self._top.title('Context Free Grammar Demo')
+        self._top.title("Context Free Grammar Demo")
  
          # Base font size
          self._size = IntVar(self._top)
-        self._size.set(12) # = medium
+        self._size.set(12)  # = medium
  
          # Set up the key bindings
          self._init_bindings(self._top)
  
          # Create the basic frames
          frame1 = Frame(self._top)
-        frame1.pack(side='left', fill='y', expand=0)
+        frame1.pack(side="left", fill="y", expand=0)
          self._init_menubar(self._top)
          self._init_buttons(self._top)
          self._init_grammar(frame1)
          self._init_treelet(frame1)
          self._init_workspace(self._top)
  
-    #//////////////////////////////////////////////////
+    # //////////////////////////////////////////////////
      # Initialization
-    #//////////////////////////////////////////////////
+    # //////////////////////////////////////////////////
  
      def _init_bindings(self, top):
-        top.bind('<Control-q>', self.destroy)
+        top.bind("<Control-q>", self.destroy)
  
-    def _init_menubar(self, parent): pass
+    def _init_menubar(self, parent):
+        pass
  
-    def _init_buttons(self, parent): pass
+    def _init_buttons(self, parent):
+        pass
  
      def _init_grammar(self, parent):
          self._prodlist = ProductionList(parent, self._grammar, width=20)
-        self._prodlist.pack(side='top', fill='both', expand=1)
+        self._prodlist.pack(side="top", fill="both", expand=1)
          self._prodlist.focus()
-        self._prodlist.add_callback('select', self._selectprod_cb)
-        self._prodlist.add_callback('move', self._selectprod_cb)
+        self._prodlist.add_callback("select", self._selectprod_cb)
+        self._prodlist.add_callback("move", self._selectprod_cb)
  
      def _init_treelet(self, parent):
-        self._treelet_canvas = Canvas(parent, background='white')
-        self._treelet_canvas.pack(side='bottom', fill='x')
+        self._treelet_canvas = Canvas(parent, background="white")
+        self._treelet_canvas.pack(side="bottom", fill="x")
          self._treelet = None
  
      def _init_workspace(self, parent):
-        self._workspace = CanvasFrame(parent, background='white')
-        self._workspace.pack(side='right', fill='both', expand=1)
+        self._workspace = CanvasFrame(parent, background="white")
+        self._workspace.pack(side="right", fill="both", expand=1)
          self._tree = None
          self.reset_workspace()
  
-    #//////////////////////////////////////////////////
+    # //////////////////////////////////////////////////
      # Workspace
-    #//////////////////////////////////////////////////
+    # //////////////////////////////////////////////////
  
      def reset_workspace(self):
          c = self._workspace.canvas()
          fontsize = int(self._size.get())
-        node_font = ('helvetica', -(fontsize+4), 'bold')
-        leaf_font = ('helvetica', -(fontsize+2))
+        node_font = ("helvetica", -(fontsize + 4), "bold")
+        leaf_font = ("helvetica", -(fontsize + 2))
  
          # Remove the old tree
          if self._tree is not None:
@@ -587,71 +644,79 @@ class CFGDemo(object):
              leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
  
          # Put it all together into one tree
-        self._tree = TreeSegmentWidget(c, rootnode, leaves,
-                                       color='white')
+        self._tree = TreeSegmentWidget(c, rootnode, leaves, color="white")
  
          # Add it to the workspace.
          self._workspace.add_widget(self._tree)
  
          # Move the leaves to the bottom of the workspace.
-        for leaf in leaves: leaf.move(0,100)
+        for leaf in leaves:
+            leaf.move(0, 100)
  
-        #self._nodes = {start:1}
-        #self._leaves = dict([(l,1) for l in leaves])
+        # self._nodes = {start:1}
+        # self._leaves = dict([(l,1) for l in leaves])
  
      def workspace_markprod(self, production):
          pass
  
      def _markproduction(self, prod, tree=None):
-        if tree is None: tree = self._tree
-        for i in range(len(tree.subtrees())-len(prod.rhs())):
-            if tree['color', i] == 'white':
-                self._markproduction
+        if tree is None:
+            tree = self._tree
+        for i in range(len(tree.subtrees()) - len(prod.rhs())):
+            if tree["color", i] == "white":
+                self._markproduction  # FIXME: Is this necessary at all?
  
              for j, node in enumerate(prod.rhs()):
-                widget = tree.subtrees()[i+j]
-                if (isinstance(node, Nonterminal) and
-                    isinstance(widget, TreeSegmentWidget) and
-                    node.symbol == widget.label().text()):
-                    pass # matching nonterminal
-                elif (isinstance(node, string_types) and
-                      isinstance(widget, TextWidget) and
-                      node == widget.text()):
-                    pass # matching nonterminal
-                else: break
+                widget = tree.subtrees()[i + j]
+                if (
+                    isinstance(node, Nonterminal)
+                    and isinstance(widget, TreeSegmentWidget)
+                    and node.symbol == widget.label().text()
+                ):
+                    pass  # matching nonterminal
+                elif (
+                    isinstance(node, str)
+                    and isinstance(widget, TextWidget)
+                    and node == widget.text()
+                ):
+                    pass  # matching nonterminal
+                else:
+                    break
              else:
                  # Everything matched!
-                print('MATCH AT', i)
+                print("MATCH AT", i)
  
-    #//////////////////////////////////////////////////
+    # //////////////////////////////////////////////////
      # Grammar
-    #//////////////////////////////////////////////////
+    # //////////////////////////////////////////////////
  
      def _selectprod_cb(self, production):
          canvas = self._treelet_canvas
  
          self._prodlist.highlight(production)
-        if self._treelet is not None: self._treelet.destroy()
+        if self._treelet is not None:
+            self._treelet.destroy()
  
          # Convert the production to a tree.
          rhs = production.rhs()
          for (i, elt) in enumerate(rhs):
-            if isinstance(elt, Nonterminal): elt = Tree(elt)
+            if isinstance(elt, Nonterminal):
+                elt = Tree(elt)
          tree = Tree(production.lhs().symbol(), *rhs)
  
          # Draw the tree in the treelet area.
          fontsize = int(self._size.get())
-        node_font = ('helvetica', -(fontsize+4), 'bold')
-        leaf_font = ('helvetica', -(fontsize+2))
-        self._treelet = tree_to_treesegment(canvas, tree,
-                                            node_font=node_font,
-                                            leaf_font=leaf_font)
-        self._treelet['draggable'] = 1
+        node_font = ("helvetica", -(fontsize + 4), "bold")
+        leaf_font = ("helvetica", -(fontsize + 2))
+        self._treelet = tree_to_treesegment(
+            canvas, tree, node_font=node_font, leaf_font=leaf_font
+        )
+        self._treelet["draggable"] = 1
  
          # Center the treelet.
          (x1, y1, x2, y2) = self._treelet.bbox()
-        w, h = int(canvas['width']), int(canvas['height'])
-        self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
+        w, h = int(canvas["width"]), int(canvas["height"])
+        self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2)
  
          # Mark the places where we can add it to the workspace.
          self._markproduction(production)
@@ -662,11 +727,12 @@ class CFGDemo(object):
      def mainloop(self, *args, **kwargs):
          self._top.mainloop(*args, **kwargs)
  
+
  def demo2():
      from nltk import Nonterminal, Production, CFG
-    nonterminals = 'S VP NP PP P N Name V Det'
-    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
-                                           for s in nonterminals.split()]
+
+    nonterminals = "S VP NP PP P N Name V Det"
+    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
      productions = (
          # Syntactic Productions
          Production(S, [NP, VP]),
@@ -677,34 +743,40 @@ def demo2():
          Production(VP, [V, NP]),
          Production(PP, [P, NP]),
          Production(PP, []),
-
-        Production(PP, ['up', 'over', NP]),
-
+        Production(PP, ["up", "over", NP]),
          # Lexical Productions
-        Production(NP, ['I']),   Production(Det, ['the']),
-        Production(Det, ['a']),  Production(N, ['man']),
-        Production(V, ['saw']),  Production(P, ['in']),
-        Production(P, ['with']), Production(N, ['park']),
-        Production(N, ['dog']),  Production(N, ['statue']),
-        Production(Det, ['my']),
-        )
+        Production(NP, ["I"]),
+        Production(Det, ["the"]),
+        Production(Det, ["a"]),
+        Production(N, ["man"]),
+        Production(V, ["saw"]),
+        Production(P, ["in"]),
+        Production(P, ["with"]),
+        Production(N, ["park"]),
+        Production(N, ["dog"]),
+        Production(N, ["statue"]),
+        Production(Det, ["my"]),
+    )
      grammar = CFG(S, productions)
  
-    text = 'I saw a man in the park'.split()
-    d=CFGDemo(grammar, text)
+    text = "I saw a man in the park".split()
+    d = CFGDemo(grammar, text)
      d.mainloop()
  
+
  ######################################################################
  # Old Demo
  ######################################################################
  
+
  def demo():
      from nltk import Nonterminal, CFG
-    nonterminals = 'S VP NP PP P N Name V Det'
-    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
-                                           for s in nonterminals.split()]
  
-    grammar = CFG.fromstring("""
+    nonterminals = "S VP NP PP P N Name V Det"
+    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
+
+    grammar = CFG.fromstring(
+        """
      S -> NP VP
      PP -> P NP
      NP -> Det N
@@ -724,19 +796,25 @@ def demo():
      P -> 'up'
      P -> 'over'
      P -> 'with'
-    """)
+    """
+    )
+
+    def cb(grammar):
+        print(grammar)
  
-    def cb(grammar): print(grammar)
      top = Tk()
      editor = CFGEditor(top, grammar, cb)
-    Label(top, text='\nTesting CFG Editor\n').pack()
-    Button(top, text='Quit', command=top.destroy).pack()
+    Label(top, text="\nTesting CFG Editor\n").pack()
+    Button(top, text="Quit", command=top.destroy).pack()
      top.mainloop()
  
+
  def demo3():
      from nltk import Production
-    (S, VP, NP, PP, P, N, Name, V, Det) = \
-        nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
+
+    (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals(
+        "S, VP, NP, PP, P, N, Name, V, Det"
+    )
  
      productions = (
          # Syntactic Productions
@@ -748,27 +826,35 @@ def demo3():
          Production(VP, [V, NP]),
          Production(PP, [P, NP]),
          Production(PP, []),
-
-        Production(PP, ['up', 'over', NP]),
-
+        Production(PP, ["up", "over", NP]),
          # Lexical Productions
-        Production(NP, ['I']),   Production(Det, ['the']),
-        Production(Det, ['a']),  Production(N, ['man']),
-        Production(V, ['saw']),  Production(P, ['in']),
-        Production(P, ['with']), Production(N, ['park']),
-        Production(N, ['dog']),  Production(N, ['statue']),
-        Production(Det, ['my']),
-        )
+        Production(NP, ["I"]),
+        Production(Det, ["the"]),
+        Production(Det, ["a"]),
+        Production(N, ["man"]),
+        Production(V, ["saw"]),
+        Production(P, ["in"]),
+        Production(P, ["with"]),
+        Production(N, ["park"]),
+        Production(N, ["dog"]),
+        Production(N, ["statue"]),
+        Production(Det, ["my"]),
+    )
  
      t = Tk()
-    def destroy(e, t=t): t.destroy()
-    t.bind('q', destroy)
+
+    def destroy(e, t=t):
+        t.destroy()
+
+    t.bind("q", destroy)
      p = ProductionList(t, productions)
-    p.pack(expand=1, fill='both')
-    p.add_callback('select', p.markonly)
-    p.add_callback('move', p.markonly)
+    p.pack(expand=1, fill="both")
+    p.add_callback("select", p.markonly)
+    p.add_callback("move", p.markonly)
      p.focus()
      p.mark(productions[2])
      p.mark(productions[8])
  
-if __name__ == '__main__': demo()
+
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/draw/cfg.pyc b/nlp_resource_data/nltk/draw/cfg.pyc

deleted file mode 100755 (executable)

index 15f6bd5..0000000

Binary files a/nlp_resource_data/nltk/draw/cfg.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/draw/dispersion.py b/nlp_resource_data/nltk/draw/dispersion.py

old mode 100755 (executable)

new mode 100644 (file)

index 5f3a568..d0717af
--- a/nlp_resource_data/nltk/draw/dispersion.py
+++ b/nlp_resource_data/nltk/draw/dispersion.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Dispersion Plots
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -9,6 +9,7 @@
  A utility for displaying lexical dispersion.
  """
  
+
  def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
      """
      Generate a lexical dispersion plot.
@@ -24,8 +25,10 @@ def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Pl
      try:
          from matplotlib import pylab
      except ImportError:
-        raise ValueError('The plot function requires matplotlib to be installed.'
-                     'See http://matplotlib.org/')
+        raise ValueError(
+            "The plot function requires matplotlib to be installed."
+            "See http://matplotlib.org/"
+        )
  
      text = list(text)
      words.reverse()
@@ -37,22 +40,26 @@ def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Pl
          words_to_comp = words
          text_to_comp = text
  
-    points = [(x,y) for x in range(len(text_to_comp))
-                    for y in range(len(words_to_comp))
-                    if text_to_comp[x] == words_to_comp[y]]
+    points = [
+        (x, y)
+        for x in range(len(text_to_comp))
+        for y in range(len(words_to_comp))
+        if text_to_comp[x] == words_to_comp[y]
+    ]
      if points:
          x, y = list(zip(*points))
      else:
          x = y = ()
-    pylab.plot(x, y, "b|", scalex=.1)
+    pylab.plot(x, y, "b|", scalex=0.1)
      pylab.yticks(list(range(len(words))), words, color="b")
      pylab.ylim(-1, len(words))
      pylab.title(title)
      pylab.xlabel("Word Offset")
      pylab.show()
  
-if __name__ == '__main__':
-    import nltk.compat
+
+if __name__ == "__main__":
      from nltk.corpus import gutenberg
-    words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
-    dispersion_plot(gutenberg.words('austen-sense.txt'), words)
+
+    words = ["Elinor", "Marianne", "Edward", "Willoughby"]
+    dispersion_plot(gutenberg.words("austen-sense.txt"), words)
diff --git a/nlp_resource_data/nltk/draw/dispersion.pyc b/nlp_resource_data/nltk/draw/dispersion.pyc

deleted file mode 100755 (executable)

index 9e7801c..0000000

Binary files a/nlp_resource_data/nltk/draw/dispersion.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/draw/table.py b/nlp_resource_data/nltk/draw/table.py

old mode 100755 (executable)

new mode 100644 (file)

index 7894f8e..7ca4a2d
--- a/nlp_resource_data/nltk/draw/table.py
+++ b/nlp_resource_data/nltk/draw/table.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Table widget
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -9,18 +9,16 @@
  Tkinter widgets for displaying multi-column listboxes and tables.
  """
  
-from __future__ import division
-
-
  import operator
  
-from six.moves.tkinter import (Frame, Label, Listbox, Scrollbar, Tk)
+from tkinter import Frame, Label, Listbox, Scrollbar, Tk
  
  
  ######################################################################
  # Multi-Column Listbox
  ######################################################################
  
+
  class MultiListbox(Frame):
      """
      A multi-column listbox, where the current selection applies to an
@@ -31,32 +29,37 @@ class MultiListbox(Frame):
      contained listboxes.  For any methods that do not have docstrings,
      see ``Tkinter.Listbox`` for a description of what that method does.
      """
-    #/////////////////////////////////////////////////////////////////
+
+    # /////////////////////////////////////////////////////////////////
      # Configuration
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      #: Default configuration values for the frame.
-    FRAME_CONFIG = dict(background='#888',
-                        takefocus=True,
-                        highlightthickness=1)
+    FRAME_CONFIG = dict(background="#888", takefocus=True, highlightthickness=1)
  
      #: Default configurations for the column labels.
-    LABEL_CONFIG = dict(borderwidth=1, relief='raised',
-                        font='helvetica -16 bold',
-                      background='#444', foreground='white')
+    LABEL_CONFIG = dict(
+        borderwidth=1,
+        relief="raised",
+        font="helvetica -16 bold",
+        background="#444",
+        foreground="white",
+    )
  
      #: Default configuration for the column listboxes.
-    LISTBOX_CONFIG = dict(borderwidth=1,
-                          selectborderwidth=0,
-                          highlightthickness=0,
-                          exportselection=False,
-                          selectbackground='#888',
-                          activestyle='none',
-                          takefocus=False)
-
-    #/////////////////////////////////////////////////////////////////
+    LISTBOX_CONFIG = dict(
+        borderwidth=1,
+        selectborderwidth=0,
+        highlightthickness=0,
+        exportselection=False,
+        selectbackground="#888",
+        activestyle="none",
+        takefocus=False,
+    )
+
+    # /////////////////////////////////////////////////////////////////
      # Constructor
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def __init__(self, master, columns, column_weights=None, cnf={}, **kw):
          """
@@ -97,7 +100,7 @@ class MultiListbox(Frame):
          if column_weights is None:
              column_weights = [1] * len(columns)
          elif len(column_weights) != len(columns):
-            raise ValueError('Expected one column_weight for each column')
+            raise ValueError("Expected one column_weight for each column")
          self._column_weights = column_weights
  
          # Configure our widgets.
@@ -110,47 +113,47 @@ class MultiListbox(Frame):
              if include_labels:
                  l = Label(self, text=label, **self.LABEL_CONFIG)
                  self._labels.append(l)
-                l.grid(column=i, row=0, sticky='news', padx=0, pady=0)
+                l.grid(column=i, row=0, sticky="news", padx=0, pady=0)
                  l.column_index = i
  
              # Create a listbox for the column
              lb = Listbox(self, **self.LISTBOX_CONFIG)
              self._listboxes.append(lb)
-            lb.grid(column=i, row=1, sticky='news', padx=0, pady=0)
+            lb.grid(column=i, row=1, sticky="news", padx=0, pady=0)
              lb.column_index = i
  
              # Clicking or dragging selects:
-            lb.bind('<Button-1>', self._select)
-            lb.bind('<B1-Motion>', self._select)
+            lb.bind("<Button-1>", self._select)
+            lb.bind("<B1-Motion>", self._select)
              # Scroll whell scrolls:
-            lb.bind('<Button-4>', lambda e: self._scroll(-1))
-            lb.bind('<Button-5>', lambda e: self._scroll(+1))
-            lb.bind('<MouseWheel>', lambda e: self._scroll(e.delta))
+            lb.bind("<Button-4>", lambda e: self._scroll(-1))
+            lb.bind("<Button-5>", lambda e: self._scroll(+1))
+            lb.bind("<MouseWheel>", lambda e: self._scroll(e.delta))
              # Button 2 can be used to scan:
-            lb.bind('<Button-2>', lambda e: self.scan_mark(e.x, e.y))
-            lb.bind('<B2-Motion>', lambda e: self.scan_dragto(e.x, e.y))
+            lb.bind("<Button-2>", lambda e: self.scan_mark(e.x, e.y))
+            lb.bind("<B2-Motion>", lambda e: self.scan_dragto(e.x, e.y))
              # Dragging outside the window has no effect (diable
              # the default listbox behavior, which scrolls):
-            lb.bind('<B1-Leave>', lambda e: 'break')
+            lb.bind("<B1-Leave>", lambda e: "break")
              # Columns can be resized by dragging them:
-            l.bind('<Button-1>', self._resize_column)
+            l.bind("<Button-1>", self._resize_column)
  
          # Columns can be resized by dragging them.  (This binding is
          # used if they click on the grid between columns:)
-        self.bind('<Button-1>', self._resize_column)
+        self.bind("<Button-1>", self._resize_column)
  
          # Set up key bindings for the widget:
-        self.bind('<Up>', lambda e: self.select(delta=-1))
-        self.bind('<Down>', lambda e: self.select(delta=1))
-        self.bind('<Prior>', lambda e: self.select(delta=-self._pagesize()))
-        self.bind('<Next>', lambda e: self.select(delta=self._pagesize()))
+        self.bind("<Up>", lambda e: self.select(delta=-1))
+        self.bind("<Down>", lambda e: self.select(delta=1))
+        self.bind("<Prior>", lambda e: self.select(delta=-self._pagesize()))
+        self.bind("<Next>", lambda e: self.select(delta=self._pagesize()))
  
          # Configuration customizations
          self.configure(cnf, **kw)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Column Resizing
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _resize_column(self, event):
          """
@@ -161,45 +164,46 @@ class MultiListbox(Frame):
          """
          # If we're already waiting for a button release, then ignore
          # the new button press.
-        if event.widget.bind('<ButtonRelease>'):
+        if event.widget.bind("<ButtonRelease>"):
              return False
  
          # Decide which column (if any) to resize.
          self._resize_column_index = None
          if event.widget is self:
              for i, lb in enumerate(self._listboxes):
-                if abs(event.x-(lb.winfo_x()+lb.winfo_width())) < 10:
+                if abs(event.x - (lb.winfo_x() + lb.winfo_width())) < 10:
                      self._resize_column_index = i
-        elif event.x > (event.widget.winfo_width()-5):
+        elif event.x > (event.widget.winfo_width() - 5):
              self._resize_column_index = event.widget.column_index
          elif event.x < 5 and event.widget.column_index != 0:
-            self._resize_column_index = event.widget.column_index-1
+            self._resize_column_index = event.widget.column_index - 1
  
          # Bind callbacks that are used to resize it.
          if self._resize_column_index is not None:
-            event.widget.bind('<Motion>', self._resize_column_motion_cb)
-            event.widget.bind('<ButtonRelease-%d>' % event.num,
-                              self._resize_column_buttonrelease_cb)
+            event.widget.bind("<Motion>", self._resize_column_motion_cb)
+            event.widget.bind(
+                "<ButtonRelease-%d>" % event.num, self._resize_column_buttonrelease_cb
+            )
              return True
          else:
              return False
  
      def _resize_column_motion_cb(self, event):
          lb = self._listboxes[self._resize_column_index]
-        charwidth = lb.winfo_width() / lb['width']
+        charwidth = lb.winfo_width() / lb["width"]
  
          x1 = event.x + event.widget.winfo_x()
          x2 = lb.winfo_x() + lb.winfo_width()
  
-        lb['width'] = max(3, lb['width'] + (x1-x2) // charwidth)
+        lb["width"] = max(3, lb["width"] + (x1 - x2) // charwidth)
  
      def _resize_column_buttonrelease_cb(self, event):
-        event.widget.unbind('<ButtonRelease-%d>' % event.num)
-        event.widget.unbind('<Motion>')
+        event.widget.unbind("<ButtonRelease-%d>" % event.num)
+        event.widget.unbind("<Motion>")
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Properties
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      @property
      def column_names(self):
@@ -233,29 +237,29 @@ class MultiListbox(Frame):
          """
          return tuple(self._listboxes)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Mouse & Keyboard Callback Functions
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _select(self, e):
          i = e.widget.nearest(e.y)
-        self.selection_clear(0, 'end')
+        self.selection_clear(0, "end")
          self.selection_set(i)
          self.activate(i)
          self.focus()
  
      def _scroll(self, delta):
          for lb in self._listboxes:
-            lb.yview_scroll(delta, 'unit')
-        return 'break'
+            lb.yview_scroll(delta, "unit")
+        return "break"
  
      def _pagesize(self):
          """:return: The number of rows that makes up one page"""
-        return int(self.index('@0,1000000')) - int(self.index('@0,0'))
+        return int(self.index("@0,1000000")) - int(self.index("@0,0"))
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Row selection
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def select(self, index=None, delta=None, see=True):
          """
@@ -269,7 +273,7 @@ class MultiListbox(Frame):
              selected index, to ensure that it is visible.
          """
          if (index is not None) and (delta is not None):
-            raise ValueError('specify index or delta, but not both')
+            raise ValueError("specify index or delta, but not both")
  
          # If delta was given, then calculate index.
          if delta is not None:
@@ -279,18 +283,19 @@ class MultiListbox(Frame):
                  index = int(self.curselection()[0]) + delta
  
          # Clear all selected rows.
-        self.selection_clear(0, 'end')
+        self.selection_clear(0, "end")
  
          # Select the specified index
          if index is not None:
-            index = min(max(index, 0), self.size()-1)
-            #self.activate(index)
+            index = min(max(index, 0), self.size() - 1)
+            # self.activate(index)
              self.selection_set(index)
-            if see: self.see(index)
+            if see:
+                self.see(index)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Configuration
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def configure(self, cnf={}, **kw):
          """
@@ -303,21 +308,21 @@ class MultiListbox(Frame):
          """
          cnf = dict(list(cnf.items()) + list(kw.items()))
          for (key, val) in list(cnf.items()):
-            if key.startswith('label_') or key.startswith('label-'):
+            if key.startswith("label_") or key.startswith("label-"):
                  for label in self._labels:
                      label.configure({key[6:]: val})
-            elif key.startswith('listbox_') or key.startswith('listbox-'):
+            elif key.startswith("listbox_") or key.startswith("listbox-"):
                  for listbox in self._listboxes:
                      listbox.configure({key[8:]: val})
              else:
-                Frame.configure(self, {key:val})
+                Frame.configure(self, {key: val})
  
      def __setitem__(self, key, val):
          """
          Configure this widget.  This is equivalent to
          ``self.configure({key,val``)}.  See ``configure()``.
          """
-        self.configure({key:val})
+        self.configure({key: val})
  
      def rowconfigure(self, row_index, cnf={}, **kw):
          """
@@ -325,7 +330,8 @@ class MultiListbox(Frame):
          arguments are: ``background``, ``bg``, ``foreground``, ``fg``,
          ``selectbackground``, ``selectforeground``.
          """
-        for lb in self._listboxes: lb.itemconfigure(row_index, cnf, **kw)
+        for lb in self._listboxes:
+            lb.itemconfigure(row_index, cnf, **kw)
  
      def columnconfigure(self, col_index, cnf={}, **kw):
          """
@@ -337,11 +343,18 @@ class MultiListbox(Frame):
  
          cnf = dict(list(cnf.items()) + list(kw.items()))
          for (key, val) in list(cnf.items()):
-            if key in ('background', 'bg', 'foreground', 'fg',
-                       'selectbackground', 'selectforeground'):
-                for i in range(lb.size()): lb.itemconfigure(i, {key:val})
+            if key in (
+                "background",
+                "bg",
+                "foreground",
+                "fg",
+                "selectbackground",
+                "selectforeground",
+            ):
+                for i in range(lb.size()):
+                    lb.itemconfigure(i, {key: val})
              else:
-                lb.configure({key:val})
+                lb.configure({key: val})
  
      def itemconfigure(self, row_index, col_index, cnf=None, **kw):
          """
@@ -352,9 +365,9 @@ class MultiListbox(Frame):
          lb = self._listboxes[col_index]
          return lb.itemconfigure(row_index, cnf, **kw)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Value Access
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def insert(self, index, *rows):
          """
@@ -366,9 +379,11 @@ class MultiListbox(Frame):
          """
          for elt in rows:
              if len(elt) != len(self._column_names):
-                raise ValueError('rows should be tuples whose length '
-                                 'is equal to the number of columns')
-        for (lb,elts) in zip(self._listboxes, list(zip(*rows))):
+                raise ValueError(
+                    "rows should be tuples whose length "
+                    "is equal to the number of columns"
+                )
+        for (lb, elts) in zip(self._listboxes, list(zip(*rows))):
              lb.insert(index, *elts)
  
      def get(self, first, last=None):
@@ -392,11 +407,11 @@ class MultiListbox(Frame):
          """
          dx, dy, _, _ = self.grid_bbox(row=0, column=col)
          x, y, w, h = self._listboxes[col].bbox(row)
-        return int(x)+int(dx), int(y)+int(dy), int(w), int(h)
+        return int(x) + int(dx), int(y) + int(dy), int(w), int(h)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Hide/Show Columns
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def hide_column(self, col_index):
          """
@@ -419,15 +434,17 @@ class MultiListbox(Frame):
          """
          weight = self._column_weights[col_index]
          if self._labels:
-            self._labels[col_index].grid(column=col_index, row=0,
-                                         sticky='news', padx=0, pady=0)
-        self._listboxes[col_index].grid(column=col_index, row=1,
-                                        sticky='news', padx=0, pady=0)
+            self._labels[col_index].grid(
+                column=col_index, row=0, sticky="news", padx=0, pady=0
+            )
+        self._listboxes[col_index].grid(
+            column=col_index, row=1, sticky="news", padx=0, pady=0
+        )
          self.grid_columnconfigure(col_index, weight=weight)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Binding Methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def bind_to_labels(self, sequence=None, func=None, add=None):
          """
@@ -439,8 +456,7 @@ class MultiListbox(Frame):
              functions (if any), allowing for their deletion (to
              prevent a memory leak).
          """
-        return [label.bind(sequence, func, add)
-                for label in self.column_labels]
+        return [label.bind(sequence, func, add) for label in self.column_labels]
  
      def bind_to_listboxes(self, sequence=None, func=None, add=None):
          """
@@ -465,55 +481,82 @@ class MultiListbox(Frame):
              functions (if any), allowing for their deletion (to
              prevent a memory leak).
          """
-        return (self.bind_to_labels(sequence, func, add) +
-                self.bind_to_listboxes(sequence, func, add))
+        return self.bind_to_labels(sequence, func, add) + self.bind_to_listboxes(
+            sequence, func, add
+        )
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Simple Delegation
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      # These methods delegate to the first listbox:
      def curselection(self, *args, **kwargs):
          return self._listboxes[0].curselection(*args, **kwargs)
+
      def selection_includes(self, *args, **kwargs):
          return self._listboxes[0].selection_includes(*args, **kwargs)
+
      def itemcget(self, *args, **kwargs):
          return self._listboxes[0].itemcget(*args, **kwargs)
+
      def size(self, *args, **kwargs):
          return self._listboxes[0].size(*args, **kwargs)
+
      def index(self, *args, **kwargs):
          return self._listboxes[0].index(*args, **kwargs)
+
      def nearest(self, *args, **kwargs):
          return self._listboxes[0].nearest(*args, **kwargs)
  
      # These methods delegate to each listbox (and return None):
      def activate(self, *args, **kwargs):
-        for lb in self._listboxes: lb.activate(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.activate(*args, **kwargs)
+
      def delete(self, *args, **kwargs):
-        for lb in self._listboxes: lb.delete(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.delete(*args, **kwargs)
+
      def scan_mark(self, *args, **kwargs):
-        for lb in self._listboxes: lb.scan_mark(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.scan_mark(*args, **kwargs)
+
      def scan_dragto(self, *args, **kwargs):
-        for lb in self._listboxes: lb.scan_dragto(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.scan_dragto(*args, **kwargs)
+
      def see(self, *args, **kwargs):
-        for lb in self._listboxes: lb.see(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.see(*args, **kwargs)
+
      def selection_anchor(self, *args, **kwargs):
-        for lb in self._listboxes: lb.selection_anchor(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.selection_anchor(*args, **kwargs)
+
      def selection_clear(self, *args, **kwargs):
-        for lb in self._listboxes: lb.selection_clear(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.selection_clear(*args, **kwargs)
+
      def selection_set(self, *args, **kwargs):
-        for lb in self._listboxes: lb.selection_set(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.selection_set(*args, **kwargs)
+
      def yview(self, *args, **kwargs):
-        for lb in self._listboxes: v = lb.yview(*args, **kwargs)
-        return v # if called with no arguments
+        for lb in self._listboxes:
+            v = lb.yview(*args, **kwargs)
+        return v  # if called with no arguments
+
      def yview_moveto(self, *args, **kwargs):
-        for lb in self._listboxes: lb.yview_moveto(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.yview_moveto(*args, **kwargs)
+
      def yview_scroll(self, *args, **kwargs):
-        for lb in self._listboxes: lb.yview_scroll(*args, **kwargs)
+        for lb in self._listboxes:
+            lb.yview_scroll(*args, **kwargs)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Aliases
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      itemconfig = itemconfigure
      rowconfig = rowconfigure
@@ -523,17 +566,19 @@ class MultiListbox(Frame):
      select_includes = selection_includes
      select_set = selection_set
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # These listbox methods are not defined for multi-listbox
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # def xview(self, *what): pass
      # def xview_moveto(self, fraction): pass
      # def xview_scroll(self, number, what): pass
  
+
  ######################################################################
  # Table
  ######################################################################
  
+
  class Table(object):
      """
      A display widget for a table of values, based on a ``MultiListbox``
@@ -572,10 +617,19 @@ class Table(object):
          table.  Each element of _rows is a row value, i.e., a list of
          cell values, one for each column in the row.
      """
-    def __init__(self, master, column_names, rows=None,
-                 column_weights=None,
-                 scrollbar=True, click_to_sort=True,
-                 reprfunc=None, cnf={}, **kw):
+
+    def __init__(
+        self,
+        master,
+        column_names,
+        rows=None,
+        column_weights=None,
+        scrollbar=True,
+        click_to_sort=True,
+        reprfunc=None,
+        cnf={},
+        **kw
+    ):
          """
          Construct a new Table widget.
  
@@ -611,41 +665,41 @@ class Table(object):
          self._reprfunc = reprfunc
          self._frame = Frame(master)
  
-        self._column_name_to_index = dict((c,i) for (i,c) in
-                                          enumerate(column_names))
+        self._column_name_to_index = dict((c, i) for (i, c) in enumerate(column_names))
  
          # Make a copy of the rows & check that it's valid.
-        if rows is None: self._rows = []
-        else: self._rows = [[v for v in row] for row in rows]
-        for row in self._rows: self._checkrow(row)
+        if rows is None:
+            self._rows = []
+        else:
+            self._rows = [[v for v in row] for row in rows]
+        for row in self._rows:
+            self._checkrow(row)
  
          # Create our multi-list box.
-        self._mlb = MultiListbox(self._frame, column_names,
-                                 column_weights, cnf, **kw)
-        self._mlb.pack(side='left', expand=True, fill='both')
+        self._mlb = MultiListbox(self._frame, column_names, column_weights, cnf, **kw)
+        self._mlb.pack(side="left", expand=True, fill="both")
  
          # Optional scrollbar
          if scrollbar:
-            sb = Scrollbar(self._frame, orient='vertical',
-                           command=self._mlb.yview)
-            self._mlb.listboxes[0]['yscrollcommand'] = sb.set
-            #for listbox in self._mlb.listboxes:
+            sb = Scrollbar(self._frame, orient="vertical", command=self._mlb.yview)
+            self._mlb.listboxes[0]["yscrollcommand"] = sb.set
+            # for listbox in self._mlb.listboxes:
              #    listbox['yscrollcommand'] = sb.set
-            sb.pack(side='right', fill='y')
+            sb.pack(side="right", fill="y")
              self._scrollbar = sb
  
          # Set up sorting
          self._sortkey = None
          if click_to_sort:
              for i, l in enumerate(self._mlb.column_labels):
-                l.bind('<Button-1>', self._sort)
+                l.bind("<Button-1>", self._sort)
  
          # Fill in our multi-list box.
          self._fill_table()
  
-    #/////////////////////////////////////////////////////////////////
-    #{ Widget-like Methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
+    # { Widget-like Methods
+    # /////////////////////////////////////////////////////////////////
      # These all just delegate to either our frame or our MLB.
  
      def pack(self, *args, **kwargs):
@@ -697,9 +751,9 @@ class Table(object):
      columnconfig = columnconfigure
      itemconfig = itemconfigure
  
-    #/////////////////////////////////////////////////////////////////
-    #{ Table as list-of-lists
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
+    # { Table as list-of-lists
+    # /////////////////////////////////////////////////////////////////
  
      def insert(self, row_index, rowvalue):
          """
@@ -714,10 +768,12 @@ class Table(object):
          self._checkrow(rowvalue)
          self._rows.insert(row_index, rowvalue)
          if self._reprfunc is not None:
-            rowvalue = [self._reprfunc(row_index,j,v)
-                        for (j,v) in enumerate(rowvalue)]
+            rowvalue = [
+                self._reprfunc(row_index, j, v) for (j, v) in enumerate(rowvalue)
+            ]
          self._mlb.insert(row_index, rowvalue)
-        if self._DEBUG: self._check_table_vs_mlb()
+        if self._DEBUG:
+            self._check_table_vs_mlb()
  
      def extend(self, rowvalues):
          """
@@ -727,8 +783,10 @@ class Table(object):
              table.  Each row value should be a tuple of cell values,
              one for each column in the row.
          """
-        for rowvalue in rowvalues: self.append(rowvalue)
-        if self._DEBUG: self._check_table_vs_mlb()
+        for rowvalue in rowvalues:
+            self.append(rowvalue)
+        if self._DEBUG:
+            self._check_table_vs_mlb()
  
      def append(self, rowvalue):
          """
@@ -738,15 +796,17 @@ class Table(object):
              in the new row.
          """
          self.insert(len(self._rows), rowvalue)
-        if self._DEBUG: self._check_table_vs_mlb()
+        if self._DEBUG:
+            self._check_table_vs_mlb()
  
      def clear(self):
          """
          Delete all rows in this table.
          """
          self._rows = []
-        self._mlb.delete(0, 'end')
-        if self._DEBUG: self._check_table_vs_mlb()
+        self._mlb.delete(0, "end")
+        if self._DEBUG:
+            self._check_table_vs_mlb()
  
      def __getitem__(self, index):
          """
@@ -758,8 +818,8 @@ class Table(object):
          ``i``th row and the ``j``th column.
          """
          if isinstance(index, slice):
-            raise ValueError('Slicing not supported')
-        elif isinstance(index, tuple) and len(index)==2:
+            raise ValueError("Slicing not supported")
+        elif isinstance(index, tuple) and len(index) == 2:
              return self._rows[index[0]][self.column_index(index[1])]
          else:
              return tuple(self._rows[index])
@@ -779,18 +839,17 @@ class Table(object):
          ``val``.
          """
          if isinstance(index, slice):
-            raise ValueError('Slicing not supported')
-
+            raise ValueError("Slicing not supported")
  
          # table[i,j] = val
-        elif isinstance(index, tuple) and len(index)==2:
+        elif isinstance(index, tuple) and len(index) == 2:
              i, j = index[0], self.column_index(index[1])
              config_cookie = self._save_config_info([i])
              self._rows[i][j] = val
              if self._reprfunc is not None:
                  val = self._reprfunc(i, j, val)
              self._mlb.listboxes[j].insert(i, val)
-            self._mlb.listboxes[j].delete(i+1)
+            self._mlb.listboxes[j].delete(i + 1)
              self._restore_config_info(config_cookie)
  
          # table[i] = val
@@ -799,9 +858,9 @@ class Table(object):
              self._checkrow(val)
              self._rows[index] = list(val)
              if self._reprfunc is not None:
-                val = [self._reprfunc(index,j,v) for (j,v) in enumerate(val)]
+                val = [self._reprfunc(index, j, v) for (j, v) in enumerate(val)]
              self._mlb.insert(index, val)
-            self._mlb.delete(index+1)
+            self._mlb.delete(index + 1)
              self._restore_config_info(config_cookie)
  
      def __delitem__(self, row_index):
@@ -809,12 +868,13 @@ class Table(object):
          Delete the ``row_index``th row from this table.
          """
          if isinstance(row_index, slice):
-            raise ValueError('Slicing not supported')
-        if isinstance(row_index, tuple) and len(row_index)==2:
-            raise ValueError('Cannot delete a single cell!')
+            raise ValueError("Slicing not supported")
+        if isinstance(row_index, tuple) and len(row_index) == 2:
+            raise ValueError("Cannot delete a single cell!")
          del self._rows[row_index]
          self._mlb.delete(row_index)
-        if self._DEBUG: self._check_table_vs_mlb()
+        if self._DEBUG:
+            self._check_table_vs_mlb()
  
      def __len__(self):
          """
@@ -828,12 +888,14 @@ class Table(object):
          number of elements; and if not, raise an exception.
          """
          if len(rowvalue) != self._num_columns:
-            raise ValueError('Row %r has %d columns; expected %d' %
-                             (rowvalue, len(rowvalue), self._num_columns))
+            raise ValueError(
+                "Row %r has %d columns; expected %d"
+                % (rowvalue, len(rowvalue), self._num_columns)
+            )
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Columns
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      @property
      def column_names(self):
@@ -861,9 +923,9 @@ class Table(object):
          """:see: ``MultiListbox.show_column()``"""
          self._mlb.show_column(self.column_index(column_index))
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Selection
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def selected_row(self):
          """
@@ -872,18 +934,20 @@ class Table(object):
          ``table[table.selected_row()]``.
          """
          sel = self._mlb.curselection()
-        if sel: return int(sel[0])
-        else: return None
+        if sel:
+            return int(sel[0])
+        else:
+            return None
  
      def select(self, index=None, delta=None, see=True):
          """:see: ``MultiListbox.select()``"""
          self._mlb.select(index, delta, see)
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Sorting
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
-    def sort_by(self, column_index, order='toggle'):
+    def sort_by(self, column_index, order="toggle"):
          """
          Sort the rows in this table, using the specified column's
          values as a sort key.
@@ -902,24 +966,27 @@ class Table(object):
                  then reverse the rows; otherwise sort in ascending
                  order.
          """
-        if order not in ('ascending', 'descending', 'toggle'):
-            raise ValueError('sort_by(): order should be "ascending", '
-                             '"descending", or "toggle".')
+        if order not in ("ascending", "descending", "toggle"):
+            raise ValueError(
+                'sort_by(): order should be "ascending", ' '"descending", or "toggle".'
+            )
          column_index = self.column_index(column_index)
          config_cookie = self._save_config_info(index_by_id=True)
  
          # Sort the rows.
-        if order == 'toggle' and column_index == self._sortkey:
+        if order == "toggle" and column_index == self._sortkey:
              self._rows.reverse()
          else:
-            self._rows.sort(key=operator.itemgetter(column_index),
-                            reverse=(order=='descending'))
+            self._rows.sort(
+                key=operator.itemgetter(column_index), reverse=(order == "descending")
+            )
              self._sortkey = column_index
  
          # Redraw the table.
          self._fill_table()
          self._restore_config_info(config_cookie, index_by_id=True, see=True)
-        if self._DEBUG: self._check_table_vs_mlb()
+        if self._DEBUG:
+            self._check_table_vs_mlb()
  
      def _sort(self, event):
          """Event handler for clicking on a column label -- sort by
@@ -929,16 +996,16 @@ class Table(object):
          # If they click on the far-left of far-right of a column's
          # label, then resize rather than sorting.
          if self._mlb._resize_column(event):
-            return 'continue'
+            return "continue"
  
          # Otherwise, sort.
          else:
              self.sort_by(column_index)
-            return 'continue'
+            return "continue"
  
-    #/////////////////////////////////////////////////////////////////
-    #{ Table Drawing Helpers
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
+    # { Table Drawing Helpers
+    # /////////////////////////////////////////////////////////////////
  
      def _fill_table(self, save_config=True):
          """
@@ -949,16 +1016,22 @@ class Table(object):
          selection will also be lost -- i.e., no row will be selected
          after this call completes.
          """
-        self._mlb.delete(0, 'end')
+        self._mlb.delete(0, "end")
          for i, row in enumerate(self._rows):
              if self._reprfunc is not None:
-                row = [self._reprfunc(i,j,v) for (j,v) in enumerate(row)]
-            self._mlb.insert('end', row)
+                row = [self._reprfunc(i, j, v) for (j, v) in enumerate(row)]
+            self._mlb.insert("end", row)
  
      def _get_itemconfig(self, r, c):
-        return dict( (k, self._mlb.itemconfig(r, c, k)[-1])
-                     for k in ('foreground', 'selectforeground',
-                               'background', 'selectbackground') )
+        return dict(
+            (k, self._mlb.itemconfig(r, c, k)[-1])
+            for k in (
+                "foreground",
+                "selectforeground",
+                "background",
+                "selectbackground",
+            )
+        )
  
      def _save_config_info(self, row_indices=None, index_by_id=False):
          """
@@ -986,14 +1059,18 @@ class Table(object):
  
          # Look up the color configuration info for each row.
          if index_by_id:
-            config = dict((id(self._rows[r]), [self._get_itemconfig(r, c)
-                                        for c in range(self._num_columns)])
-                          for r in row_indices)
+            config = dict(
+                (
+                    id(self._rows[r]),
+                    [self._get_itemconfig(r, c) for c in range(self._num_columns)],
+                )
+                for r in row_indices
+            )
          else:
-            config = dict((r, [self._get_itemconfig(r, c)
-                               for c in range(self._num_columns)])
-                          for r in row_indices)
-
+            config = dict(
+                (r, [self._get_itemconfig(r, c) for c in range(self._num_columns)])
+                for r in row_indices
+            )
  
          return selection, config
  
@@ -1006,7 +1083,7 @@ class Table(object):
  
          # Clear the selection.
          if selection is None:
-            self._mlb.selection_clear(0, 'end')
+            self._mlb.selection_clear(0, "end")
  
          # Restore selection & color config
          if index_by_id:
@@ -1023,9 +1100,9 @@ class Table(object):
                  for c in range(self._num_columns):
                      self._mlb.itemconfigure(r, c, config[r][c])
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Debugging (Invariant Checker)
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      _DEBUG = False
      """If true, then run ``_check_table_vs_mlb()`` after any operation
@@ -1043,13 +1120,14 @@ class Table(object):
          for row in self:
              assert len(row) == self._num_columns
          assert self._num_columns == len(self._mlb.column_names)
-        #assert self._column_names == self._mlb.column_names
+        # assert self._column_names == self._mlb.column_names
          for i, row in enumerate(self):
              for j, cell in enumerate(row):
                  if self._reprfunc is not None:
                      cell = self._reprfunc(i, j, cell)
                  assert self._mlb.get(i)[j] == cell
  
+
  ######################################################################
  # Demo/Test Function
  ######################################################################
@@ -1057,42 +1135,46 @@ class Table(object):
  # update this to use new WordNet API
  def demo():
      root = Tk()
-    root.bind('<Control-q>', lambda e: root.destroy())
+    root.bind("<Control-q>", lambda e: root.destroy())
  
-    table = Table(root, 'Word Synset Hypernym Hyponym'.split(),
-                  column_weights=[0, 1, 1, 1],
-                  reprfunc=(lambda i,j,s: '  %s' % s))
-    table.pack(expand=True, fill='both')
+    table = Table(
+        root,
+        "Word Synset Hypernym Hyponym".split(),
+        column_weights=[0, 1, 1, 1],
+        reprfunc=(lambda i, j, s: "  %s" % s),
+    )
+    table.pack(expand=True, fill="both")
  
      from nltk.corpus import wordnet
      from nltk.corpus import brown
+
      for word, pos in sorted(set(brown.tagged_words()[:500])):
-        if pos[0] != 'N': continue
+        if pos[0] != "N":
+            continue
          word = word.lower()
          for synset in wordnet.synsets(word):
              try:
                  hyper_def = synset.hypernyms()[0].definition()
              except:
-                hyper_def = '*none*'
+                hyper_def = "*none*"
              try:
                  hypo_def = synset.hypernyms()[0].definition()
              except:
-                hypo_def = '*none*'
-            table.append([word,
-                          synset.definition(),
-                          hyper_def,
-                          hypo_def])
-
-    table.columnconfig('Word', background='#afa')
-    table.columnconfig('Synset', background='#efe')
-    table.columnconfig('Hypernym', background='#fee')
-    table.columnconfig('Hyponym', background='#ffe')
+                hypo_def = "*none*"
+            table.append([word, synset.definition(), hyper_def, hypo_def])
+
+    table.columnconfig("Word", background="#afa")
+    table.columnconfig("Synset", background="#efe")
+    table.columnconfig("Hypernym", background="#fee")
+    table.columnconfig("Hyponym", background="#ffe")
      for row in range(len(table)):
-        for column in ('Hypernym', 'Hyponym'):
-            if table[row, column] == '*none*':
-                table.itemconfig(row, column, foreground='#666',
-                                 selectforeground='#666')
+        for column in ("Hypernym", "Hyponym"):
+            if table[row, column] == "*none*":
+                table.itemconfig(
+                    row, column, foreground="#666", selectforeground="#666"
+                )
      root.mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/draw/table.pyc b/nlp_resource_data/nltk/draw/table.pyc

deleted file mode 100755 (executable)

index f70e348..0000000

Binary files a/nlp_resource_data/nltk/draw/table.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/draw/tree.py b/nlp_resource_data/nltk/draw/tree.py

old mode 100755 (executable)

new mode 100644 (file)

index f421d13..33bfb9a
--- a/nlp_resource_data/nltk/draw/tree.py
+++ b/nlp_resource_data/nltk/draw/tree.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Graphical Representations for Trees
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -9,17 +9,24 @@
  Graphically display a Tree.
  """
  
-from six.moves.tkinter import IntVar, Menu, Tk
+from tkinter import IntVar, Menu, Tk
  
  from nltk.util import in_idle
  from nltk.tree import Tree
-from nltk.draw.util import (CanvasFrame, CanvasWidget, BoxWidget,
-                            TextWidget, ParenWidget, OvalWidget)
+from nltk.draw.util import (
+    CanvasFrame,
+    CanvasWidget,
+    BoxWidget,
+    TextWidget,
+    ParenWidget,
+    OvalWidget,
+)
  
  ##//////////////////////////////////////////////////////
  ##  Tree Segment
  ##//////////////////////////////////////////////////////
  
+
  class TreeSegmentWidget(CanvasWidget):
      """
      A canvas widget that displays a single segment of a hierarchical
@@ -53,6 +60,7 @@ class TreeSegmentWidget(CanvasWidget):
          branch downwards).
        - ``draggable``: whether the widget can be dragged by the user.
      """
+
      def __init__(self, canvas, label, subtrees, **attribs):
          """
          :type node:
@@ -69,10 +77,10 @@ class TreeSegmentWidget(CanvasWidget):
          self._ordered = False
  
          # Create canvas objects.
-        self._lines = [canvas.create_line(0,0,0,0, fill='#006060')
-                       for c in subtrees]
-        self._polygon = canvas.create_polygon(0,0, fill='', state='hidden',
-                                              outline='#006060')
+        self._lines = [canvas.create_line(0, 0, 0, 0, fill="#006060") for c in subtrees]
+        self._polygon = canvas.create_polygon(
+            0, 0, fill="", state="hidden", outline="#006060"
+        )
  
          # Register child widgets (label + subtrees)
          self._add_child_widget(label)
@@ -86,55 +94,68 @@ class TreeSegmentWidget(CanvasWidget):
  
      def __setitem__(self, attr, value):
          canvas = self.canvas()
-        if attr == 'roof':
+        if attr == "roof":
              self._roof = value
              if self._roof:
-                for l in self._lines: canvas.itemconfig(l, state='hidden')
-                canvas.itemconfig(self._polygon, state='normal')
+                for l in self._lines:
+                    canvas.itemconfig(l, state="hidden")
+                canvas.itemconfig(self._polygon, state="normal")
              else:
-                for l in self._lines: canvas.itemconfig(l, state='normal')
-                canvas.itemconfig(self._polygon, state='hidden')
-        elif attr == 'orientation':
-            if value == 'horizontal': self._horizontal = 1
-            elif value == 'vertical': self._horizontal = 0
+                for l in self._lines:
+                    canvas.itemconfig(l, state="normal")
+                canvas.itemconfig(self._polygon, state="hidden")
+        elif attr == "orientation":
+            if value == "horizontal":
+                self._horizontal = 1
+            elif value == "vertical":
+                self._horizontal = 0
              else:
-                raise ValueError('orientation must be horizontal or vertical')
-        elif attr == 'color':
-            for l in self._lines: canvas.itemconfig(l, fill=value)
+                raise ValueError("orientation must be horizontal or vertical")
+        elif attr == "color":
+            for l in self._lines:
+                canvas.itemconfig(l, fill=value)
              canvas.itemconfig(self._polygon, outline=value)
-        elif isinstance(attr, tuple) and attr[0] == 'color':
+        elif isinstance(attr, tuple) and attr[0] == "color":
              # Set the color of an individual line.
              l = self._lines[int(attr[1])]
              canvas.itemconfig(l, fill=value)
-        elif attr == 'fill':
+        elif attr == "fill":
              canvas.itemconfig(self._polygon, fill=value)
-        elif attr == 'width':
-            canvas.itemconfig(self._polygon, {attr:value})
-            for l in self._lines: canvas.itemconfig(l, {attr:value})
-        elif attr in ('xspace', 'yspace'):
-            if attr == 'xspace': self._xspace = value
-            elif attr == 'yspace': self._yspace = value
+        elif attr == "width":
+            canvas.itemconfig(self._polygon, {attr: value})
+            for l in self._lines:
+                canvas.itemconfig(l, {attr: value})
+        elif attr in ("xspace", "yspace"):
+            if attr == "xspace":
+                self._xspace = value
+            elif attr == "yspace":
+                self._yspace = value
              self.update(self._label)
-        elif attr == 'ordered':
+        elif attr == "ordered":
              self._ordered = value
          else:
              CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'roof': return self._roof
-        elif attr == 'width':
+        if attr == "roof":
+            return self._roof
+        elif attr == "width":
              return self.canvas().itemcget(self._polygon, attr)
-        elif attr == 'color':
-            return self.canvas().itemcget(self._polygon, 'outline')
-        elif isinstance(attr, tuple) and attr[0] == 'color':
+        elif attr == "color":
+            return self.canvas().itemcget(self._polygon, "outline")
+        elif isinstance(attr, tuple) and attr[0] == "color":
              l = self._lines[int(attr[1])]
-            return self.canvas().itemcget(l, 'fill')
-        elif attr == 'xspace': return self._xspace
-        elif attr == 'yspace': return self._yspace
-        elif attr == 'orientation':
-            if self._horizontal: return 'horizontal'
-            else: return 'vertical'
-        elif attr == 'ordered':
+            return self.canvas().itemcget(l, "fill")
+        elif attr == "xspace":
+            return self._xspace
+        elif attr == "yspace":
+            return self._yspace
+        elif attr == "orientation":
+            if self._horizontal:
+                return "horizontal"
+            else:
+                return "vertical"
+        elif attr == "ordered":
              return self._ordered
          else:
              return CanvasWidget.__getitem__(self, attr)
@@ -175,7 +196,7 @@ class TreeSegmentWidget(CanvasWidget):
          canvas = self.canvas()
          self._subtrees.insert(index, child)
          self._add_child_widget(child)
-        self._lines.append(canvas.create_line(0,0,0,0, fill='#006060'))
+        self._lines.append(canvas.create_line(0, 0, 0, 0, fill="#006060"))
          self.update(self._label)
  
      # but.. lines???
@@ -192,24 +213,28 @@ class TreeSegmentWidget(CanvasWidget):
          else:
              bbox = child.bbox()
          if self._horizontal:
-            return (bbox[0], (bbox[1]+bbox[3])/2.0)
+            return (bbox[0], (bbox[1] + bbox[3]) / 2.0)
          else:
-            return ((bbox[0]+bbox[2])/2.0, bbox[1])
+            return ((bbox[0] + bbox[2]) / 2.0, bbox[1])
  
      def _node_bottom(self):
          bbox = self._label.bbox()
          if self._horizontal:
-            return (bbox[2], (bbox[1]+bbox[3])/2.0)
+            return (bbox[2], (bbox[1] + bbox[3]) / 2.0)
          else:
-            return ((bbox[0]+bbox[2])/2.0, bbox[3])
+            return ((bbox[0] + bbox[2]) / 2.0, bbox[3])
  
      def _update(self, child):
-        if len(self._subtrees) == 0: return
-        if self._label.bbox() is None: return # [XX] ???
+        if len(self._subtrees) == 0:
+            return
+        if self._label.bbox() is None:
+            return  # [XX] ???
  
          # Which lines need to be redrawn?
-        if child is self._label: need_update = self._subtrees
-        else: need_update = [child]
+        if child is self._label:
+            need_update = self._subtrees
+        else:
+            need_update = [child]
  
          if self._ordered and not self._managing:
              need_update = self._maintain_order(child)
@@ -225,11 +250,13 @@ class TreeSegmentWidget(CanvasWidget):
              ymax = max(ymax, bbox[3])
  
          if self._horizontal:
-            self.canvas().coords(self._polygon, nodex, nodey, xmin,
-                                 ymin, xmin, ymax, nodex, nodey)
+            self.canvas().coords(
+                self._polygon, nodex, nodey, xmin, ymin, xmin, ymax, nodex, nodey
+            )
          else:
-            self.canvas().coords(self._polygon, nodex, nodey, xmin,
-                                 ymin, xmax, ymin, nodex, nodey)
+            self.canvas().coords(
+                self._polygon, nodex, nodey, xmin, ymin, xmax, ymin, nodex, nodey
+            )
  
          # Redraw all lines that need it.
          for subtree in need_update:
@@ -251,8 +278,8 @@ class TreeSegmentWidget(CanvasWidget):
              # Check all the leaves
              for subtree in self._subtrees:
                  (x1, y1, x2, y2) = subtree.bbox()
-                if bot+self._yspace > y1:
-                    subtree.move(0,bot+self._yspace-y1)
+                if bot + self._yspace > y1:
+                    subtree.move(0, bot + self._yspace - y1)
  
              return self._subtrees
          else:
@@ -261,26 +288,26 @@ class TreeSegmentWidget(CanvasWidget):
  
              # Check leaves to our right.
              x = right + self._xspace
-            for i in range(index+1, len(self._subtrees)):
+            for i in range(index + 1, len(self._subtrees)):
                  (x1, y1, x2, y2) = self._subtrees[i].bbox()
                  if x > x1:
-                    self._subtrees[i].move(x-x1, 0)
-                    x += x2-x1 + self._xspace
+                    self._subtrees[i].move(x - x1, 0)
+                    x += x2 - x1 + self._xspace
                      moved.append(self._subtrees[i])
  
              # Check leaves to our left.
              x = left - self._xspace
-            for i in range(index-1, -1, -1):
+            for i in range(index - 1, -1, -1):
                  (x1, y1, x2, y2) = self._subtrees[i].bbox()
                  if x < x2:
-                    self._subtrees[i].move(x-x2, 0)
-                    x -= x2-x1 + self._xspace
+                    self._subtrees[i].move(x - x2, 0)
+                    x -= x2 - x1 + self._xspace
                      moved.append(self._subtrees[i])
  
              # Check the node
              (x1, y1, x2, y2) = self._label.bbox()
-            if y2 > top-self._yspace:
-                self._label.move(0, top-self._yspace-y2)
+            if y2 > top - self._yspace:
+                self._label.move(0, top - self._yspace - y2)
                  moved = self._subtrees
  
          # Return a list of the nodes we moved
@@ -293,8 +320,8 @@ class TreeSegmentWidget(CanvasWidget):
              # Check all the leaves
              for subtree in self._subtrees:
                  (x1, y1, x2, y2) = subtree.bbox()
-                if right+self._xspace > x1:
-                    subtree.move(right+self._xspace-x1)
+                if right + self._xspace > x1:
+                    subtree.move(right + self._xspace - x1)
  
              return self._subtrees
          else:
@@ -303,26 +330,26 @@ class TreeSegmentWidget(CanvasWidget):
  
              # Check leaves below us.
              y = bot + self._yspace
-            for i in range(index+1, len(self._subtrees)):
+            for i in range(index + 1, len(self._subtrees)):
                  (x1, y1, x2, y2) = self._subtrees[i].bbox()
                  if y > y1:
-                    self._subtrees[i].move(0, y-y1)
-                    y += y2-y1 + self._yspace
+                    self._subtrees[i].move(0, y - y1)
+                    y += y2 - y1 + self._yspace
                      moved.append(self._subtrees[i])
  
              # Check leaves above us
              y = top - self._yspace
-            for i in range(index-1, -1, -1):
+            for i in range(index - 1, -1, -1):
                  (x1, y1, x2, y2) = self._subtrees[i].bbox()
                  if y < y2:
-                    self._subtrees[i].move(0, y-y2)
-                    y -= y2-y1 + self._yspace
+                    self._subtrees[i].move(0, y - y2)
+                    y -= y2 - y1 + self._yspace
                      moved.append(self._subtrees[i])
  
              # Check the node
              (x1, y1, x2, y2) = self._label.bbox()
-            if x2 > left-self._xspace:
-                self._label.move(left-self._xspace-x2, 0)
+            if x2 > left - self._xspace:
+                self._label.move(left - self._xspace - x2, 0)
                  moved = self._subtrees
  
          # Return a list of the nodes we moved
@@ -348,7 +375,7 @@ class TreeSegmentWidget(CanvasWidget):
  
          # Center the subtrees with the node.
          for subtree in self._subtrees:
-            subtree.move(0, nodey-center)
+            subtree.move(0, nodey - center)
  
      def _manage_vertical(self):
          (nodex, nodey) = self._node_bottom()
@@ -365,19 +392,22 @@ class TreeSegmentWidget(CanvasWidget):
          # Find the center of their tops.
          center = 0.0
          for subtree in self._subtrees:
-            center += self._subtree_top(subtree)[0]/len(self._subtrees)
+            center += self._subtree_top(subtree)[0] / len(self._subtrees)
  
          # Center the subtrees with the node.
          for subtree in self._subtrees:
-            subtree.move(nodex-center, 0)
+            subtree.move(nodex - center, 0)
  
      def _manage(self):
          self._managing = True
          (nodex, nodey) = self._node_bottom()
-        if len(self._subtrees) == 0: return
+        if len(self._subtrees) == 0:
+            return
  
-        if self._horizontal: self._manage_horizontal()
-        else: self._manage_vertical()
+        if self._horizontal:
+            self._manage_horizontal()
+        else:
+            self._manage_vertical()
  
          # Update lines to subtrees.
          for subtree in self._subtrees:
@@ -386,23 +416,42 @@ class TreeSegmentWidget(CanvasWidget):
          self._managing = False
  
      def __repr__(self):
-        return '[TreeSeg %s: %s]' % (self._label, self._subtrees)
-
-def _tree_to_treeseg(canvas, t, make_node, make_leaf,
-                     tree_attribs, node_attribs,
-                     leaf_attribs, loc_attribs):
+        return "[TreeSeg %s: %s]" % (self._label, self._subtrees)
+
+
+def _tree_to_treeseg(
+    canvas,
+    t,
+    make_node,
+    make_leaf,
+    tree_attribs,
+    node_attribs,
+    leaf_attribs,
+    loc_attribs,
+):
      if isinstance(t, Tree):
          label = make_node(canvas, t.label(), **node_attribs)
-        subtrees = [_tree_to_treeseg(canvas, child, make_node, make_leaf,
-                                     tree_attribs, node_attribs,
-                                     leaf_attribs, loc_attribs)
-                    for child in t]
+        subtrees = [
+            _tree_to_treeseg(
+                canvas,
+                child,
+                make_node,
+                make_leaf,
+                tree_attribs,
+                node_attribs,
+                leaf_attribs,
+                loc_attribs,
+            )
+            for child in t
+        ]
          return TreeSegmentWidget(canvas, label, subtrees, **tree_attribs)
      else:
          return make_leaf(canvas, t, **leaf_attribs)
  
-def tree_to_treesegment(canvas, t, make_node=TextWidget,
-                        make_leaf=TextWidget, **attribs):
+
+def tree_to_treesegment(
+    canvas, t, make_node=TextWidget, make_leaf=TextWidget, **attribs
+):
      """
      Convert a Tree into a ``TreeSegmentWidget``.
  
@@ -430,19 +479,33 @@ def tree_to_treesegment(canvas, t, make_node=TextWidget,
      loc_attribs = {}
  
      for (key, value) in list(attribs.items()):
-        if key[:5] == 'tree_': tree_attribs[key[5:]] = value
-        elif key[:5] == 'node_': node_attribs[key[5:]] = value
-        elif key[:5] == 'leaf_': leaf_attribs[key[5:]] = value
-        elif key[:4] == 'loc_': loc_attribs[key[4:]] = value
-        else: raise ValueError('Bad attribute: %s' % key)
-    return _tree_to_treeseg(canvas, t, make_node, make_leaf,
-                                tree_attribs, node_attribs,
-                                leaf_attribs, loc_attribs)
+        if key[:5] == "tree_":
+            tree_attribs[key[5:]] = value
+        elif key[:5] == "node_":
+            node_attribs[key[5:]] = value
+        elif key[:5] == "leaf_":
+            leaf_attribs[key[5:]] = value
+        elif key[:4] == "loc_":
+            loc_attribs[key[4:]] = value
+        else:
+            raise ValueError("Bad attribute: %s" % key)
+    return _tree_to_treeseg(
+        canvas,
+        t,
+        make_node,
+        make_leaf,
+        tree_attribs,
+        node_attribs,
+        leaf_attribs,
+        loc_attribs,
+    )
+
  
  ##//////////////////////////////////////////////////////
  ##  Tree Widget
  ##//////////////////////////////////////////////////////
  
+
  class TreeWidget(CanvasWidget):
      """
      A canvas widget that displays a single Tree.
@@ -483,8 +546,10 @@ class TreeWidget(CanvasWidget):
          segments.
        - ``draggable``: whether the widget can be dragged by the user.
      """
-    def __init__(self, canvas, t, make_node=TextWidget,
-                 make_leaf=TextWidget, **attribs):
+
+    def __init__(
+        self, canvas, t, make_node=TextWidget, make_leaf=TextWidget, **attribs
+    ):
          # Node & leaf canvas widget constructors
          self._make_node = make_node
          self._make_leaf = make_leaf
@@ -493,24 +558,24 @@ class TreeWidget(CanvasWidget):
          # Attributes.
          self._nodeattribs = {}
          self._leafattribs = {}
-        self._locattribs = {'color': '#008000'}
-        self._line_color = '#008080'
+        self._locattribs = {"color": "#008000"}
+        self._line_color = "#008080"
          self._line_width = 1
-        self._roof_color = '#008080'
-        self._roof_fill = '#c0c0c0'
+        self._roof_color = "#008080"
+        self._roof_fill = "#c0c0c0"
          self._shapeable = False
          self._xspace = 10
          self._yspace = 10
-        self._orientation = 'vertical'
+        self._orientation = "vertical"
          self._ordered = False
  
          # Build trees.
-        self._keys = {} # treeseg -> key
+        self._keys = {}  # treeseg -> key
          self._expanded_trees = {}
          self._collapsed_trees = {}
          self._nodes = []
          self._leaves = []
-        #self._locs = []
+        # self._locs = []
          self._make_collapsed_trees(canvas, t, ())
          self._treeseg = self._make_expanded_tree(canvas, t, ())
          self._add_child_widget(self._treeseg)
@@ -561,48 +626,61 @@ class TreeWidget(CanvasWidget):
          """
          Add a binding to all leaves.
          """
-        for leaf in self._leaves: leaf.bind_click(callback, button)
-        for leaf in self._leaves: leaf.bind_click(callback, button)
+        for leaf in self._leaves:
+            leaf.bind_click(callback, button)
+        for leaf in self._leaves:
+            leaf.bind_click(callback, button)
  
      def bind_drag_leaves(self, callback, button=1):
          """
          Add a binding to all leaves.
          """
-        for leaf in self._leaves: leaf.bind_drag(callback, button)
-        for leaf in self._leaves: leaf.bind_drag(callback, button)
+        for leaf in self._leaves:
+            leaf.bind_drag(callback, button)
+        for leaf in self._leaves:
+            leaf.bind_drag(callback, button)
  
      def bind_click_nodes(self, callback, button=1):
          """
          Add a binding to all nodes.
          """
-        for node in self._nodes: node.bind_click(callback, button)
-        for node in self._nodes: node.bind_click(callback, button)
+        for node in self._nodes:
+            node.bind_click(callback, button)
+        for node in self._nodes:
+            node.bind_click(callback, button)
  
      def bind_drag_nodes(self, callback, button=1):
          """
          Add a binding to all nodes.
          """
-        for node in self._nodes: node.bind_drag(callback, button)
-        for node in self._nodes: node.bind_drag(callback, button)
+        for node in self._nodes:
+            node.bind_drag(callback, button)
+        for node in self._nodes:
+            node.bind_drag(callback, button)
  
      def _make_collapsed_trees(self, canvas, t, key):
-        if not isinstance(t, Tree): return
+        if not isinstance(t, Tree):
+            return
          make_node = self._make_node
          make_leaf = self._make_leaf
  
          node = make_node(canvas, t.label(), **self._nodeattribs)
          self._nodes.append(node)
-        leaves = [make_leaf(canvas, l, **self._leafattribs)
-                  for l in t.leaves()]
+        leaves = [make_leaf(canvas, l, **self._leafattribs) for l in t.leaves()]
          self._leaves += leaves
-        treeseg = TreeSegmentWidget(canvas, node, leaves, roof=1,
-                                    color=self._roof_color,
-                                    fill=self._roof_fill,
-                                    width=self._line_width)
+        treeseg = TreeSegmentWidget(
+            canvas,
+            node,
+            leaves,
+            roof=1,
+            color=self._roof_color,
+            fill=self._roof_fill,
+            width=self._line_width,
+        )
  
          self._collapsed_trees[key] = treeseg
          self._keys[treeseg] = key
-        #self._add_child_widget(treeseg)
+        # self._add_child_widget(treeseg)
          treeseg.hide()
  
          # Build trees for children.
@@ -618,11 +696,13 @@ class TreeWidget(CanvasWidget):
              node = make_node(canvas, t.label(), **self._nodeattribs)
              self._nodes.append(node)
              children = t
-            subtrees = [self._make_expanded_tree(canvas, children[i], key+(i,))
-                        for i in range(len(children))]
-            treeseg = TreeSegmentWidget(canvas, node, subtrees,
-                                        color=self._line_color,
-                                        width=self._line_width)
+            subtrees = [
+                self._make_expanded_tree(canvas, children[i], key + (i,))
+                for i in range(len(children))
+            ]
+            treeseg = TreeSegmentWidget(
+                canvas, node, subtrees, color=self._line_color, width=self._line_width
+            )
              self._expanded_trees[key] = treeseg
              self._keys[treeseg] = key
              return treeseg
@@ -632,80 +712,101 @@ class TreeWidget(CanvasWidget):
              return leaf
  
      def __setitem__(self, attr, value):
-        if attr[:5] == 'node_':
-            for node in self._nodes: node[attr[5:]] = value
-        elif attr[:5] == 'leaf_':
-            for leaf in self._leaves: leaf[attr[5:]] = value
-        elif attr == 'line_color':
+        if attr[:5] == "node_":
+            for node in self._nodes:
+                node[attr[5:]] = value
+        elif attr[:5] == "leaf_":
+            for leaf in self._leaves:
+                leaf[attr[5:]] = value
+        elif attr == "line_color":
              self._line_color = value
-            for tseg in list(self._expanded_trees.values()): tseg['color'] = value
-        elif attr == 'line_width':
+            for tseg in list(self._expanded_trees.values()):
+                tseg["color"] = value
+        elif attr == "line_width":
              self._line_width = value
-            for tseg in list(self._expanded_trees.values()): tseg['width'] = value
-            for tseg in list(self._collapsed_trees.values()): tseg['width'] = value
-        elif attr == 'roof_color':
+            for tseg in list(self._expanded_trees.values()):
+                tseg["width"] = value
+            for tseg in list(self._collapsed_trees.values()):
+                tseg["width"] = value
+        elif attr == "roof_color":
              self._roof_color = value
-            for tseg in list(self._collapsed_trees.values()): tseg['color'] = value
-        elif attr == 'roof_fill':
+            for tseg in list(self._collapsed_trees.values()):
+                tseg["color"] = value
+        elif attr == "roof_fill":
              self._roof_fill = value
-            for tseg in list(self._collapsed_trees.values()): tseg['fill'] = value
-        elif attr == 'shapeable':
+            for tseg in list(self._collapsed_trees.values()):
+                tseg["fill"] = value
+        elif attr == "shapeable":
              self._shapeable = value
              for tseg in list(self._expanded_trees.values()):
-                tseg['draggable'] = value
+                tseg["draggable"] = value
              for tseg in list(self._collapsed_trees.values()):
-                tseg['draggable'] = value
-            for leaf in self._leaves: leaf['draggable'] = value
-        elif attr == 'xspace':
+                tseg["draggable"] = value
+            for leaf in self._leaves:
+                leaf["draggable"] = value
+        elif attr == "xspace":
              self._xspace = value
              for tseg in list(self._expanded_trees.values()):
-                tseg['xspace'] = value
+                tseg["xspace"] = value
              for tseg in list(self._collapsed_trees.values()):
-                tseg['xspace'] = value
+                tseg["xspace"] = value
              self.manage()
-        elif attr == 'yspace':
+        elif attr == "yspace":
              self._yspace = value
              for tseg in list(self._expanded_trees.values()):
-                tseg['yspace'] = value
+                tseg["yspace"] = value
              for tseg in list(self._collapsed_trees.values()):
-                tseg['yspace'] = value
+                tseg["yspace"] = value
              self.manage()
-        elif attr == 'orientation':
+        elif attr == "orientation":
              self._orientation = value
              for tseg in list(self._expanded_trees.values()):
-                tseg['orientation'] = value
+                tseg["orientation"] = value
              for tseg in list(self._collapsed_trees.values()):
-                tseg['orientation'] = value
+                tseg["orientation"] = value
              self.manage()
-        elif attr == 'ordered':
+        elif attr == "ordered":
              self._ordered = value
              for tseg in list(self._expanded_trees.values()):
-                tseg['ordered'] = value
+                tseg["ordered"] = value
              for tseg in list(self._collapsed_trees.values()):
-                tseg['ordered'] = value
-        else: CanvasWidget.__setitem__(self, attr, value)
+                tseg["ordered"] = value
+        else:
+            CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr[:5] == 'node_':
+        if attr[:5] == "node_":
              return self._nodeattribs.get(attr[5:], None)
-        elif attr[:5] == 'leaf_':
+        elif attr[:5] == "leaf_":
              return self._leafattribs.get(attr[5:], None)
-        elif attr[:4] == 'loc_':
+        elif attr[:4] == "loc_":
              return self._locattribs.get(attr[4:], None)
-        elif attr == 'line_color': return self._line_color
-        elif attr == 'line_width': return self._line_width
-        elif attr == 'roof_color': return self._roof_color
-        elif attr == 'roof_fill': return self._roof_fill
-        elif attr == 'shapeable': return self._shapeable
-        elif attr == 'xspace': return self._xspace
-        elif attr == 'yspace': return self._yspace
-        elif attr == 'orientation': return self._orientation
-        else: return CanvasWidget.__getitem__(self, attr)
-
-    def _tags(self): return []
+        elif attr == "line_color":
+            return self._line_color
+        elif attr == "line_width":
+            return self._line_width
+        elif attr == "roof_color":
+            return self._roof_color
+        elif attr == "roof_fill":
+            return self._roof_fill
+        elif attr == "shapeable":
+            return self._shapeable
+        elif attr == "xspace":
+            return self._xspace
+        elif attr == "yspace":
+            return self._yspace
+        elif attr == "orientation":
+            return self._orientation
+        else:
+            return CanvasWidget.__getitem__(self, attr)
+
+    def _tags(self):
+        return []
  
      def _manage(self):
-        segs = list(self._expanded_trees.values()) + list(self._collapsed_trees.values())
+        segs = list(self._expanded_trees.values()) + list(
+            self._collapsed_trees.values()
+        )
          for tseg in segs:
              if tseg.hidden():
                  tseg.show()
@@ -717,7 +818,7 @@ class TreeWidget(CanvasWidget):
          Collapse/expand a tree.
          """
          old_treeseg = treeseg
-        if old_treeseg['roof']:
+        if old_treeseg["roof"]:
              new_treeseg = self._expanded_trees[self._keys[old_treeseg]]
          else:
              new_treeseg = self._collapsed_trees[self._keys[old_treeseg]]
@@ -735,7 +836,7 @@ class TreeWidget(CanvasWidget):
          new_treeseg.show()
          (newx, newy) = new_treeseg.label().bbox()[:2]
          (oldx, oldy) = old_treeseg.label().bbox()[:2]
-        new_treeseg.move(oldx-newx, oldy-newy)
+        new_treeseg.move(oldx - newx, oldy - newy)
  
          # Hide the old tree
          old_treeseg.hide()
@@ -743,10 +844,12 @@ class TreeWidget(CanvasWidget):
          # We could do parent.manage() here instead, if we wanted.
          new_treeseg.parent().update(new_treeseg)
  
+
  ##//////////////////////////////////////////////////////
  ##  draw_trees
  ##//////////////////////////////////////////////////////
  
+
  class TreeView(object):
      def __init__(self, *trees):
          from math import sqrt, ceil
@@ -754,34 +857,41 @@ class TreeView(object):
          self._trees = trees
  
          self._top = Tk()
-        self._top.title('NLTK')
-        self._top.bind('<Control-x>', self.destroy)
-        self._top.bind('<Control-q>', self.destroy)
+        self._top.title("NLTK")
+        self._top.bind("<Control-x>", self.destroy)
+        self._top.bind("<Control-q>", self.destroy)
  
          cf = self._cframe = CanvasFrame(self._top)
-        self._top.bind('<Control-p>', self._cframe.print_to_file)
+        self._top.bind("<Control-p>", self._cframe.print_to_file)
  
          # Size is variable.
          self._size = IntVar(self._top)
          self._size.set(12)
-        bold = ('helvetica', -self._size.get(), 'bold')
-        helv = ('helvetica', -self._size.get())
+        bold = ("helvetica", -self._size.get(), "bold")
+        helv = ("helvetica", -self._size.get())
  
          # Lay the trees out in a square.
          self._width = int(ceil(sqrt(len(trees))))
          self._widgets = []
          for i in range(len(trees)):
-            widget = TreeWidget(cf.canvas(), trees[i], node_font=bold,
-                                leaf_color='#008040', node_color='#004080',
-                                roof_color='#004040', roof_fill='white',
-                                line_color='#004040', draggable=1,
-                                leaf_font=helv)
+            widget = TreeWidget(
+                cf.canvas(),
+                trees[i],
+                node_font=bold,
+                leaf_color="#008040",
+                node_color="#004080",
+                roof_color="#004040",
+                roof_fill="white",
+                line_color="#004040",
+                draggable=1,
+                leaf_font=helv,
+            )
              widget.bind_click_trees(widget.toggle_collapsed)
              self._widgets.append(widget)
              cf.add_widget(widget, 0, 0)
  
          self._layout()
-        self._cframe.pack(expand=1, fill='both')
+        self._cframe.pack(expand=1, fill="both")
          self._init_menubar()
  
      def _layout(self):
@@ -793,7 +903,7 @@ class TreeView(object):
              if i % width == 0:
                  y = ymax
                  x = 0
-            widget.move(x-oldx, y-oldy)
+            widget.move(x - oldx, y - oldy)
              x = widget.bbox()[2] + 10
              ymax = max(ymax, widget.bbox()[3] + 10)
  
@@ -801,45 +911,78 @@ class TreeView(object):
          menubar = Menu(self._top)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Print to Postscript', underline=0,
-                             command=self._cframe.print_to_file,
-                             accelerator='Ctrl-p')
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Print to Postscript",
+            underline=0,
+            command=self._cframe.print_to_file,
+            accelerator="Ctrl-p",
+        )
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          zoommenu = Menu(menubar, tearoff=0)
-        zoommenu.add_radiobutton(label='Tiny', variable=self._size,
-                                 underline=0, value=10, command=self.resize)
-        zoommenu.add_radiobutton(label='Small', variable=self._size,
-                                 underline=0, value=12, command=self.resize)
-        zoommenu.add_radiobutton(label='Medium', variable=self._size,
-                                 underline=0, value=14, command=self.resize)
-        zoommenu.add_radiobutton(label='Large', variable=self._size,
-                                 underline=0, value=28, command=self.resize)
-        zoommenu.add_radiobutton(label='Huge', variable=self._size,
-                                 underline=0, value=50, command=self.resize)
-        menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu)
+        zoommenu.add_radiobutton(
+            label="Tiny",
+            variable=self._size,
+            underline=0,
+            value=10,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Small",
+            variable=self._size,
+            underline=0,
+            value=12,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Medium",
+            variable=self._size,
+            underline=0,
+            value=14,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Large",
+            variable=self._size,
+            underline=0,
+            value=28,
+            command=self.resize,
+        )
+        zoommenu.add_radiobutton(
+            label="Huge",
+            variable=self._size,
+            underline=0,
+            value=50,
+            command=self.resize,
+        )
+        menubar.add_cascade(label="Zoom", underline=0, menu=zoommenu)
  
          self._top.config(menu=menubar)
  
      def resize(self, *e):
-        bold = ('helvetica', -self._size.get(), 'bold')
-        helv = ('helvetica', -self._size.get())
+        bold = ("helvetica", -self._size.get(), "bold")
+        helv = ("helvetica", -self._size.get())
          xspace = self._size.get()
          yspace = self._size.get()
          for widget in self._widgets:
-            widget['node_font'] = bold
-            widget['leaf_font'] = helv
-            widget['xspace'] = xspace
-            widget['yspace'] = yspace
-            if self._size.get() < 20: widget['line_width'] = 1
-            elif self._size.get() < 30: widget['line_width'] = 2
-            else: widget['line_width'] = 3
+            widget["node_font"] = bold
+            widget["leaf_font"] = helv
+            widget["xspace"] = xspace
+            widget["yspace"] = yspace
+            if self._size.get() < 20:
+                widget["line_width"] = 1
+            elif self._size.get() < 30:
+                widget["line_width"] = 2
+            else:
+                widget["line_width"] = 3
          self._layout()
  
      def destroy(self, *e):
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
@@ -850,9 +993,11 @@ class TreeView(object):
          from a secript); otherwise, the demo will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._top.mainloop(*args, **kwargs)
  
+
  def draw_trees(*trees):
      """
      Open a new window containing a graphical diagram of the given
@@ -863,44 +1008,55 @@ def draw_trees(*trees):
      TreeView(*trees).mainloop()
      return
  
+
  ##//////////////////////////////////////////////////////
  ##  Demo Code
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      import random
+
      def fill(cw):
-        cw['fill'] = '#%06d' % random.randint(0,999999)
+        cw["fill"] = "#%06d" % random.randint(0, 999999)
  
      cf = CanvasFrame(width=550, height=450, closeenough=2)
  
-    t = Tree.fromstring('''
+    t = Tree.fromstring(
+        """
      (S (NP the very big cat)
-       (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''')
-
-    tc = TreeWidget(cf.canvas(), t, draggable=1,
-                    node_font=('helvetica', -14, 'bold'),
-                    leaf_font=('helvetica', -12, 'italic'),
-                    roof_fill='white', roof_color='black',
-                    leaf_color='green4', node_color='blue2')
-    cf.add_widget(tc,10,10)
+       (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))"""
+    )
+
+    tc = TreeWidget(
+        cf.canvas(),
+        t,
+        draggable=1,
+        node_font=("helvetica", -14, "bold"),
+        leaf_font=("helvetica", -12, "italic"),
+        roof_fill="white",
+        roof_color="black",
+        leaf_color="green4",
+        node_color="blue2",
+    )
+    cf.add_widget(tc, 10, 10)
  
      def boxit(canvas, text):
-        big = ('helvetica', -16, 'bold')
-        return BoxWidget(canvas, TextWidget(canvas, text,
-                                            font=big), fill='green')
+        big = ("helvetica", -16, "bold")
+        return BoxWidget(canvas, TextWidget(canvas, text, font=big), fill="green")
+
      def ovalit(canvas, text):
-        return OvalWidget(canvas, TextWidget(canvas, text),
-                          fill='cyan')
+        return OvalWidget(canvas, TextWidget(canvas, text), fill="cyan")
  
-    treetok = Tree.fromstring('(S (NP this tree) (VP (V is) (AdjP shapeable)))')
+    treetok = Tree.fromstring("(S (NP this tree) (VP (V is) (AdjP shapeable)))")
      tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1)
  
      def color(node):
-        node['color'] = '#%04d00' % random.randint(0,9999)
+        node["color"] = "#%04d00" % random.randint(0, 9999)
+
      def color2(treeseg):
-        treeseg.label()['fill'] = '#%06d' % random.randint(0,9999)
-        treeseg.label().child()['color'] = 'white'
+        treeseg.label()["fill"] = "#%06d" % random.randint(0, 9999)
+        treeseg.label().child()["color"] = "white"
  
      tc.bind_click_trees(tc.toggle_collapsed)
      tc2.bind_click_trees(tc2.toggle_collapsed)
@@ -909,29 +1065,32 @@ def demo():
      tc2.expanded_tree().bind_click(color2, 3)
  
      paren = ParenWidget(cf.canvas(), tc2)
-    cf.add_widget(paren, tc.bbox()[2]+10, 10)
+    cf.add_widget(paren, tc.bbox()[2] + 10, 10)
  
-    tree3 = Tree.fromstring('''
+    tree3 = Tree.fromstring(
+        """
      (S (NP this tree) (AUX was)
-       (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''')
-    tc3 = tree_to_treesegment(cf.canvas(), tree3, tree_color='green4',
-                              tree_xspace=2, tree_width=2)
-    tc3['draggable'] = 1
-    cf.add_widget(tc3, 10, tc.bbox()[3]+10)
+       (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))"""
+    )
+    tc3 = tree_to_treesegment(
+        cf.canvas(), tree3, tree_color="green4", tree_xspace=2, tree_width=2
+    )
+    tc3["draggable"] = 1
+    cf.add_widget(tc3, 10, tc.bbox()[3] + 10)
  
      def orientswitch(treewidget):
-        if treewidget['orientation'] == 'horizontal':
-            treewidget.expanded_tree(1,1).subtrees()[0].set_text('vertical')
-            treewidget.collapsed_tree(1,1).subtrees()[0].set_text('vertical')
-            treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical')
-            treewidget.collapsed_tree().subtrees()[3].set_text('vertical')
-            treewidget['orientation'] = 'vertical'
+        if treewidget["orientation"] == "horizontal":
+            treewidget.expanded_tree(1, 1).subtrees()[0].set_text("vertical")
+            treewidget.collapsed_tree(1, 1).subtrees()[0].set_text("vertical")
+            treewidget.collapsed_tree(1).subtrees()[1].set_text("vertical")
+            treewidget.collapsed_tree().subtrees()[3].set_text("vertical")
+            treewidget["orientation"] = "vertical"
          else:
-            treewidget.expanded_tree(1,1).subtrees()[0].set_text('horizontal')
-            treewidget.collapsed_tree(1,1).subtrees()[0].set_text('horizontal')
-            treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal')
-            treewidget.collapsed_tree().subtrees()[3].set_text('horizontal')
-            treewidget['orientation'] = 'horizontal'
+            treewidget.expanded_tree(1, 1).subtrees()[0].set_text("horizontal")
+            treewidget.collapsed_tree(1, 1).subtrees()[0].set_text("horizontal")
+            treewidget.collapsed_tree(1).subtrees()[1].set_text("horizontal")
+            treewidget.collapsed_tree().subtrees()[3].set_text("horizontal")
+            treewidget["orientation"] = "horizontal"
  
      text = """
  Try clicking, right clicking, and dragging
@@ -943,21 +1102,28 @@ constructors for the nodes & leaves (BoxWidget
  and OvalWidget).  The bottom-left tree is
  built from tree_to_treesegment."""
      twidget = TextWidget(cf.canvas(), text.strip())
-    textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1)
-    cf.add_widget(textbox, tc3.bbox()[2]+10, tc2.bbox()[3]+10)
-
-    tree4 = Tree.fromstring('(S (NP this tree) (VP (V is) (Adj horizontal)))')
-    tc4 = TreeWidget(cf.canvas(), tree4, draggable=1,
-                     line_color='brown2', roof_color='brown2',
-                     node_font=('helvetica', -12, 'bold'),
-                     node_color='brown4', orientation='horizontal')
+    textbox = BoxWidget(cf.canvas(), twidget, fill="white", draggable=1)
+    cf.add_widget(textbox, tc3.bbox()[2] + 10, tc2.bbox()[3] + 10)
+
+    tree4 = Tree.fromstring("(S (NP this tree) (VP (V is) (Adj horizontal)))")
+    tc4 = TreeWidget(
+        cf.canvas(),
+        tree4,
+        draggable=1,
+        line_color="brown2",
+        roof_color="brown2",
+        node_font=("helvetica", -12, "bold"),
+        node_color="brown4",
+        orientation="horizontal",
+    )
      tc4.manage()
-    cf.add_widget(tc4, tc3.bbox()[2]+10, textbox.bbox()[3]+10)
+    cf.add_widget(tc4, tc3.bbox()[2] + 10, textbox.bbox()[3] + 10)
      tc4.bind_click(orientswitch)
      tc4.bind_click_trees(tc4.toggle_collapsed, 3)
  
      # Run mainloop
      cf.mainloop()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/draw/tree.pyc b/nlp_resource_data/nltk/draw/tree.pyc

deleted file mode 100755 (executable)

index 9be2391..0000000

Binary files a/nlp_resource_data/nltk/draw/tree.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/draw/util.py b/nlp_resource_data/nltk/draw/util.py

old mode 100755 (executable)

new mode 100644 (file)

index e4006bc..be2db58
--- a/nlp_resource_data/nltk/draw/util.py
+++ b/nlp_resource_data/nltk/draw/util.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Drawing utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -34,11 +34,23 @@ homepage (http://www.ags.uni-sb.de/~konrad/clig.html).
  
  """
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
-from six.moves.tkinter import (Button, Canvas, Entry, Frame, Label, Menu,
-                               Menubutton, Scrollbar, StringVar, Text, Tk,
-                               Toplevel, Widget, RAISED)
-from six.moves.tkinter_tkfiledialog import asksaveasfilename
+from tkinter import (
+    Button,
+    Canvas,
+    Entry,
+    Frame,
+    Label,
+    Menu,
+    Menubutton,
+    Scrollbar,
+    StringVar,
+    Text,
+    Tk,
+    Toplevel,
+    Widget,
+    RAISED,
+)
+from tkinter.filedialog import asksaveasfilename
  
  from nltk.util import in_idle
  
@@ -47,8 +59,7 @@ from nltk.util import in_idle
  ##//////////////////////////////////////////////////////
  
  
-@add_metaclass(ABCMeta)
-class CanvasWidget(object):
+class CanvasWidget(metaclass=ABCMeta):
      """
      A collection of graphical elements and bindings used to display a
      complex object on a Tkinter ``Canvas``.  A canvas widget is
@@ -162,6 +173,7 @@ class CanvasWidget(object):
          argument, which is the ``CanvasWidget`` that triggered the
          callback.
      """
+
      def __init__(self, canvas, parent=None, **attribs):
          """
          Create a new canvas widget.  This constructor should only be
@@ -176,17 +188,18 @@ class CanvasWidget(object):
          :param attribs: The new canvas widget's attributes.
          """
          if self.__class__ == CanvasWidget:
-            raise TypeError('CanvasWidget is an abstract base class')
+            raise TypeError("CanvasWidget is an abstract base class")
  
          if not isinstance(canvas, Canvas):
-            raise TypeError('Expected a canvas!')
+            raise TypeError("Expected a canvas!")
  
          self.__canvas = canvas
          self.__parent = parent
  
          # If the subclass constructor called _add_child_widget, then
          # self.__children will already exist.
-        if not hasattr(self, '_CanvasWidget__children'): self.__children = []
+        if not hasattr(self, "_CanvasWidget__children"):
+            self.__children = []
  
          # Is this widget hidden?
          self.__hidden = 0
@@ -201,19 +214,17 @@ class CanvasWidget(object):
          self.__draggable = 0
  
          # Set up attributes.
-        for (attr, value) in list(attribs.items()): self[attr] = value
+        for (attr, value) in list(attribs.items()):
+            self[attr] = value
  
          # Manage this canvas widget
          self._manage()
  
          # Register any new bindings
          for tag in self._tags():
-            self.__canvas.tag_bind(tag, '<ButtonPress-1>',
-                                   self.__press_cb)
-            self.__canvas.tag_bind(tag, '<ButtonPress-2>',
-                                   self.__press_cb)
-            self.__canvas.tag_bind(tag, '<ButtonPress-3>',
-                                   self.__press_cb)
+            self.__canvas.tag_bind(tag, "<ButtonPress-1>", self.__press_cb)
+            self.__canvas.tag_bind(tag, "<ButtonPress-2>", self.__press_cb)
+            self.__canvas.tag_bind(tag, "<ButtonPress-3>", self.__press_cb)
  
      ##//////////////////////////////////////////////////////
      ##  Inherited methods.
@@ -228,8 +239,10 @@ class CanvasWidget(object):
              specified with respect to the coordinate space of the ``Canvas``.
          :rtype: tuple(int, int, int, int)
          """
-        if self.__hidden: return (0,0,0,0)
-        if len(self.tags()) == 0: raise ValueError('No tags')
+        if self.__hidden:
+            return (0, 0, 0, 0)
+        if len(self.tags()) == 0:
+            raise ValueError("No tags")
          return self.__canvas.bbox(*self.tags())
  
      def width(self):
@@ -238,9 +251,10 @@ class CanvasWidget(object):
              its ``Canvas``'s coordinate space.
          :rtype: int
          """
-        if len(self.tags()) == 0: raise ValueError('No tags')
+        if len(self.tags()) == 0:
+            raise ValueError("No tags")
          bbox = self.__canvas.bbox(*self.tags())
-        return bbox[2]-bbox[0]
+        return bbox[2] - bbox[0]
  
      def height(self):
          """
@@ -248,9 +262,10 @@ class CanvasWidget(object):
              its ``Canvas``'s coordinate space.
          :rtype: int
          """
-        if len(self.tags()) == 0: raise ValueError('No tags')
+        if len(self.tags()) == 0:
+            raise ValueError("No tags")
          bbox = self.__canvas.bbox(*self.tags())
-        return bbox[3]-bbox[1]
+        return bbox[3] - bbox[1]
  
      def parent(self):
          """
@@ -292,12 +307,14 @@ class CanvasWidget(object):
              downwards.
          :rtype: None
          """
-        if dx == dy == 0: return
+        if dx == dy == 0:
+            return
          for tag in self.tags():
              self.__canvas.move(tag, dx, dy)
-        if self.__parent: self.__parent.update(self)
+        if self.__parent:
+            self.__parent.update(self)
  
-    def moveto(self, x, y, anchor='NW'):
+    def moveto(self, x, y, anchor="NW"):
          """
          Move this canvas widget to the given location.  In particular,
          shift the canvas widget such that the corner or side of the
@@ -311,15 +328,23 @@ class CanvasWidget(object):
              specifies the top center; ``'NE'`` specifies the top right
              corner; etc.
          """
-        x1,y1,x2,y2 = self.bbox()
-        if anchor == 'NW': self.move(x-x1,        y-y1)
-        if anchor == 'N':  self.move(x-x1/2-x2/2, y-y1)
-        if anchor == 'NE': self.move(x-x2,        y-y1)
-        if anchor == 'E':  self.move(x-x2,        y-y1/2-y2/2)
-        if anchor == 'SE': self.move(x-x2,        y-y2)
-        if anchor == 'S':  self.move(x-x1/2-x2/2, y-y2)
-        if anchor == 'SW': self.move(x-x1,        y-y2)
-        if anchor == 'W':  self.move(x-x1,        y-y1/2-y2/2)
+        x1, y1, x2, y2 = self.bbox()
+        if anchor == "NW":
+            self.move(x - x1, y - y1)
+        if anchor == "N":
+            self.move(x - x1 / 2 - x2 / 2, y - y1)
+        if anchor == "NE":
+            self.move(x - x2, y - y1)
+        if anchor == "E":
+            self.move(x - x2, y - y1 / 2 - y2 / 2)
+        if anchor == "SE":
+            self.move(x - x2, y - y2)
+        if anchor == "S":
+            self.move(x - x1 / 2 - x2 / 2, y - y2)
+        if anchor == "SW":
+            self.move(x - x1, y - y2)
+        if anchor == "W":
+            self.move(x - x1, y - y1 / 2 - y2 / 2)
  
      def destroy(self):
          """
@@ -340,9 +365,9 @@ class CanvasWidget(object):
              return
  
          for tag in self.tags():
-            self.__canvas.tag_unbind(tag, '<ButtonPress-1>')
-            self.__canvas.tag_unbind(tag, '<ButtonPress-2>')
-            self.__canvas.tag_unbind(tag, '<ButtonPress-3>')
+            self.__canvas.tag_unbind(tag, "<ButtonPress-1>")
+            self.__canvas.tag_unbind(tag, "<ButtonPress-2>")
+            self.__canvas.tag_unbind(tag, "<ButtonPress-3>")
          self.__canvas.delete(*self.tags())
          self.__canvas = None
  
@@ -355,17 +380,20 @@ class CanvasWidget(object):
          :param child: The child widget that changed.
          :type child: CanvasWidget
          """
-        if self.__hidden or child.__hidden: return
+        if self.__hidden or child.__hidden:
+            return
          # If we're already updating, then do nothing.  This prevents
          # infinite loops when _update modifies its children.
-        if self.__updating: return
+        if self.__updating:
+            return
          self.__updating = 1
  
          # Update this CanvasWidget.
          self._update(child)
  
          # Propagate update request to the parent.
-        if self.__parent: self.__parent.update(self)
+        if self.__parent:
+            self.__parent.update(self)
  
          # We're done updating.
          self.__updating = 0
@@ -376,8 +404,10 @@ class CanvasWidget(object):
  
          :rtype: None
          """
-        if self.__hidden: return
-        for child in self.__children: child.manage()
+        if self.__hidden:
+            return
+        for child in self.__children:
+            child.manage()
          self._manage()
  
      def tags(self):
@@ -388,7 +418,7 @@ class CanvasWidget(object):
          :rtype: list of int
          """
          if self.__canvas is None:
-            raise ValueError('Attempt to access a destroyed canvas widget')
+            raise ValueError("Attempt to access a destroyed canvas widget")
          tags = []
          tags += self._tags()
          for child in self.__children:
@@ -403,10 +433,10 @@ class CanvasWidget(object):
  
          :rtype: None
          """
-        if attr == 'draggable':
+        if attr == "draggable":
              self.__draggable = value
          else:
-            raise ValueError('Unknown attribute %r' % attr)
+            raise ValueError("Unknown attribute %r" % attr)
  
      def __getitem__(self, attr):
          """
@@ -415,17 +445,17 @@ class CanvasWidget(object):
              canvas widget.
          :rtype: (any)
          """
-        if attr == 'draggable':
+        if attr == "draggable":
              return self.__draggable
          else:
-            raise ValueError('Unknown attribute %r' % attr)
+            raise ValueError("Unknown attribute %r" % attr)
  
      def __repr__(self):
          """
          :return: a string representation of this canvas widget.
          :rtype: str
          """
-        return '<%s>' % self.__class__.__name__
+        return "<%s>" % self.__class__.__name__
  
      def hide(self):
          """
@@ -435,7 +465,7 @@ class CanvasWidget(object):
          """
          self.__hidden = 1
          for tag in self.tags():
-            self.__canvas.itemconfig(tag, state='hidden')
+            self.__canvas.itemconfig(tag, state="hidden")
  
      def show(self):
          """
@@ -445,7 +475,7 @@ class CanvasWidget(object):
          """
          self.__hidden = 0
          for tag in self.tags():
-            self.__canvas.itemconfig(tag, state='normal')
+            self.__canvas.itemconfig(tag, state="normal")
  
      def hidden(self):
          """
@@ -486,7 +516,7 @@ class CanvasWidget(object):
              will be called with this ``CanvasWidget`` as its argument.
          """
          self.__draggable = 1
-        self.__callbacks['drag'] = callback
+        self.__callbacks["drag"] = callback
  
      def unbind_click(self, button=1):
          """
@@ -497,15 +527,19 @@ class CanvasWidget(object):
              this ``CanvasWidget``.  Typically, this should be 1 (left
              button), 3 (right button), or 2 (middle button).
          """
-        try: del self.__callbacks[button]
-        except: pass
+        try:
+            del self.__callbacks[button]
+        except:
+            pass
  
      def unbind_drag(self):
          """
          Remove a callback that was registered with ``bind_drag``.
          """
-        try: del self.__callbacks['drag']
-        except: pass
+        try:
+            del self.__callbacks["drag"]
+        except:
+            pass
  
      ##//////////////////////////////////////////////////////
      ##  Callback internals
@@ -521,13 +555,15 @@ class CanvasWidget(object):
          """
          # If we're already waiting for a button release, then ignore
          # this new button press.
-        if (self.__canvas.bind('<ButtonRelease-1>') or
-            self.__canvas.bind('<ButtonRelease-2>') or
-            self.__canvas.bind('<ButtonRelease-3>')):
+        if (
+            self.__canvas.bind("<ButtonRelease-1>")
+            or self.__canvas.bind("<ButtonRelease-2>")
+            or self.__canvas.bind("<ButtonRelease-3>")
+        ):
              return
  
          # Unbind motion (just in case; this shouldn't be necessary)
-        self.__canvas.unbind('<Motion>')
+        self.__canvas.unbind("<Motion>")
  
          # Record the button press event.
          self.__press = event
@@ -537,14 +573,13 @@ class CanvasWidget(object):
          if event.num == 1:
              widget = self
              while widget is not None:
-                if widget['draggable']:
+                if widget["draggable"]:
                      widget.__start_drag(event)
                      break
                  widget = widget.parent()
  
          # Set up the button release callback.
-        self.__canvas.bind('<ButtonRelease-%d>' % event.num,
-                          self.__release_cb)
+        self.__canvas.bind("<ButtonRelease-%d>" % event.num, self.__release_cb)
  
      def __start_drag(self, event):
          """
@@ -552,7 +587,7 @@ class CanvasWidget(object):
            - register a motion callback
            - record the drag coordinates
          """
-        self.__canvas.bind('<Motion>', self.__motion_cb)
+        self.__canvas.bind("<Motion>", self.__motion_cb)
          self.__drag_x = event.x
          self.__drag_y = event.y
  
@@ -562,7 +597,7 @@ class CanvasWidget(object):
            - move this object to the new location
            - record the new drag coordinates
          """
-        self.move(event.x-self.__drag_x, event.y-self.__drag_y)
+        self.move(event.x - self.__drag_x, event.y - self.__drag_y)
          self.__drag_x = event.x
          self.__drag_y = event.y
  
@@ -574,16 +609,19 @@ class CanvasWidget(object):
            - call the appropriate handler.
          """
          # Unbind the button release & motion callbacks.
-        self.__canvas.unbind('<ButtonRelease-%d>' % event.num)
-        self.__canvas.unbind('<Motion>')
+        self.__canvas.unbind("<ButtonRelease-%d>" % event.num)
+        self.__canvas.unbind("<Motion>")
  
          # Is it a click or a drag?
-        if (event.time - self.__press.time < 100 and
-            abs(event.x-self.__press.x) + abs(event.y-self.__press.y) < 5):
+        if (
+            event.time - self.__press.time < 100
+            and abs(event.x - self.__press.x) + abs(event.y - self.__press.y) < 5
+        ):
              # Move it back, if we were dragging.
              if self.__draggable and event.num == 1:
-                self.move(self.__press.x - self.__drag_x,
-                          self.__press.y - self.__drag_y)
+                self.move(
+                    self.__press.x - self.__drag_x, self.__press.y - self.__drag_y
+                )
              self.__click(event.num)
          elif event.num == 1:
              self.__drag()
@@ -597,12 +635,12 @@ class CanvasWidget(object):
          call it.  If no ancestors have a drag callback, do nothing.
          """
          if self.__draggable:
-            if 'drag' in self.__callbacks:
-                cb = self.__callbacks['drag']
+            if "drag" in self.__callbacks:
+                cb = self.__callbacks["drag"]
                  try:
                      cb(self)
                  except:
-                    print('Error in drag callback for %r' % self)
+                    print("Error in drag callback for %r" % self)
          elif self.__parent is not None:
              self.__parent.__drag()
  
@@ -614,10 +652,10 @@ class CanvasWidget(object):
          """
          if button in self.__callbacks:
              cb = self.__callbacks[button]
-            #try:
+            # try:
              cb(self)
-            #except:
-            #    print 'Error in click callback for %r' % self
+            # except:
+            #    print('Error in click callback for %r' % self)
              #    raise
          elif self.__parent is not None:
              self.__parent.__click(button)
@@ -639,9 +677,10 @@ class CanvasWidget(object):
              have a parent.
          :type child: CanvasWidget
          """
-        if not hasattr(self, '_CanvasWidget__children'): self.__children = []
+        if not hasattr(self, "_CanvasWidget__children"):
+            self.__children = []
          if child.__parent is not None:
-            raise ValueError('%s already has a parent', child)
+            raise ValueError("{} already has a parent".format(child))
          child.__parent = self
          self.__children.append(child)
  
@@ -694,10 +733,12 @@ class CanvasWidget(object):
          :rtype: None
          """
  
+
  ##//////////////////////////////////////////////////////
  ##  Basic widgets.
  ##//////////////////////////////////////////////////////
  
+
  class TextWidget(CanvasWidget):
      """
      A canvas widget that displays a single string of text.
@@ -711,6 +752,7 @@ class TextWidget(CanvasWidget):
          this width, it will be line-wrapped at whitespace.
        - ``draggable``: whether the text can be dragged by the user.
      """
+
      def __init__(self, canvas, text, **attribs):
          """
          Create a new text widget.
@@ -726,29 +768,32 @@ class TextWidget(CanvasWidget):
          CanvasWidget.__init__(self, canvas, **attribs)
  
      def __setitem__(self, attr, value):
-        if attr in ('color', 'font', 'justify', 'width'):
-            if attr == 'color': attr = 'fill'
-            self.canvas().itemconfig(self._tag, {attr:value})
+        if attr in ("color", "font", "justify", "width"):
+            if attr == "color":
+                attr = "fill"
+            self.canvas().itemconfig(self._tag, {attr: value})
          else:
              CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'width':
+        if attr == "width":
              return int(self.canvas().itemcget(self._tag, attr))
-        elif attr in ('color', 'font', 'justify'):
-            if attr == 'color': attr = 'fill'
+        elif attr in ("color", "font", "justify"):
+            if attr == "color":
+                attr = "fill"
              return self.canvas().itemcget(self._tag, attr)
          else:
              return CanvasWidget.__getitem__(self, attr)
  
-    def _tags(self): return [self._tag]
+    def _tags(self):
+        return [self._tag]
  
      def text(self):
          """
          :return: The text displayed by this text widget.
          :rtype: str
          """
-        return self.canvas().itemcget(self._tag, 'TEXT')
+        return self.canvas().itemcget(self._tag, "TEXT")
  
      def set_text(self, text):
          """
@@ -763,7 +808,8 @@ class TextWidget(CanvasWidget):
              self.parent().update(self)
  
      def __repr__(self):
-        return '[Text: %r]' % self._text
+        return "[Text: %r]" % self._text
+
  
  class SymbolWidget(TextWidget):
      """
@@ -782,16 +828,27 @@ class SymbolWidget(TextWidget):
      :cvar SYMBOLS: A dictionary mapping from symbols to the character
          in the ``symbol`` font used to render them.
      """
-    SYMBOLS = {'neg':'\330', 'disj':'\332', 'conj': '\331',
-               'lambda': '\154', 'merge': '\304',
-               'forall': '\042', 'exists': '\044',
-               'subseteq': '\315', 'subset': '\314',
-               'notsubset': '\313', 'emptyset': '\306',
-               'imp': '\336', 'rightarrow': chr(222), #'\256',
-               'equal': '\75', 'notequal': '\271',
-               'intersection': '\307', 'union': '\310',
-               'epsilon': 'e',
-               }
+
+    SYMBOLS = {
+        "neg": "\330",
+        "disj": "\332",
+        "conj": "\331",
+        "lambda": "\154",
+        "merge": "\304",
+        "forall": "\042",
+        "exists": "\044",
+        "subseteq": "\315",
+        "subset": "\314",
+        "notsubset": "\313",
+        "emptyset": "\306",
+        "imp": "\336",
+        "rightarrow": chr(222),  #'\256',
+        "equal": "\75",
+        "notequal": "\271",
+        "intersection": "\307",
+        "union": "\310",
+        "epsilon": "e",
+    }
  
      def __init__(self, canvas, symbol, **attribs):
          """
@@ -803,8 +860,8 @@ class SymbolWidget(TextWidget):
          :param symbol: The name of the symbol to display.
          :param attribs: The new canvas widget's attributes.
          """
-        attribs['font'] = 'symbol'
-        TextWidget.__init__(self, canvas, '', **attribs)
+        attribs["font"] = "symbol"
+        TextWidget.__init__(self, canvas, "", **attribs)
          self.set_symbol(symbol)
  
      def symbol(self):
@@ -823,12 +880,12 @@ class SymbolWidget(TextWidget):
          :param symbol: The name of the symbol to display.
          """
          if symbol not in SymbolWidget.SYMBOLS:
-            raise ValueError('Unknown symbol: %s' % symbol)
+            raise ValueError("Unknown symbol: %s" % symbol)
          self._symbol = symbol
          self.set_text(SymbolWidget.SYMBOLS[symbol])
  
      def __repr__(self):
-        return '[Symbol: %r]' % self._symbol
+        return "[Symbol: %r]" % self._symbol
  
      @staticmethod
      def symbolsheet(size=20):
@@ -838,24 +895,28 @@ class SymbolWidget(TextWidget):
          ``SymbolWidget.SYMBOLS`` dictionary.
          """
          top = Tk()
-        def destroy(e, top=top): top.destroy()
-        top.bind('q', destroy)
-        Button(top, text='Quit', command=top.destroy).pack(side='bottom')
-        text = Text(top, font=('helvetica', -size), width=20, height=30)
-        text.pack(side='left')
-        sb=Scrollbar(top, command=text.yview)
-        text['yscrollcommand']=sb.set
-        sb.pack(side='right', fill='y')
-        text.tag_config('symbol', font=('symbol', -size))
+
+        def destroy(e, top=top):
+            top.destroy()
+
+        top.bind("q", destroy)
+        Button(top, text="Quit", command=top.destroy).pack(side="bottom")
+        text = Text(top, font=("helvetica", -size), width=20, height=30)
+        text.pack(side="left")
+        sb = Scrollbar(top, command=text.yview)
+        text["yscrollcommand"] = sb.set
+        sb.pack(side="right", fill="y")
+        text.tag_config("symbol", font=("symbol", -size))
          for i in range(256):
-            if i in (0,10): continue # null and newline
-            for k,v in list(SymbolWidget.SYMBOLS.items()):
+            if i in (0, 10):
+                continue  # null and newline
+            for k, v in list(SymbolWidget.SYMBOLS.items()):
                  if v == chr(i):
-                    text.insert('end', '%-10s\t' % k)
+                    text.insert("end", "%-10s\t" % k)
                      break
              else:
-                text.insert('end', '%-10d  \t' % i)
-            text.insert('end', '[%s]\n' % chr(i), 'symbol')
+                text.insert("end", "%-10d  \t" % i)
+            text.insert("end", "[%s]\n" % chr(i), "symbol")
          top.mainloop()
  
  
@@ -869,6 +930,7 @@ class AbstractContainerWidget(CanvasWidget):
      and any subclasses that define attributes should define
      ``__setitem__`` and ``__getitem__``.
      """
+
      def __init__(self, canvas, child, **attribs):
          """
          Create a new container widget.  This constructor should only
@@ -911,8 +973,10 @@ class AbstractContainerWidget(CanvasWidget):
  
      def __repr__(self):
          name = self.__class__.__name__
-        if name[-6:] == 'Widget': name = name[:-6]
-        return '[%s: %r]' % (name, self._child)
+        if name[-6:] == "Widget":
+            name = name[:-6]
+        return "[%s: %r]" % (name, self._child)
+
  
  class BoxWidget(AbstractContainerWidget):
      """
@@ -926,6 +990,7 @@ class BoxWidget(AbstractContainerWidget):
          and the box.
        - ``draggable``: whether the text can be dragged by the user.
      """
+
      def __init__(self, canvas, child, **attribs):
          """
          Create a new box widget.
@@ -939,33 +1004,38 @@ class BoxWidget(AbstractContainerWidget):
          """
          self._child = child
          self._margin = 1
-        self._box = canvas.create_rectangle(1,1,1,1)
+        self._box = canvas.create_rectangle(1, 1, 1, 1)
          canvas.tag_lower(self._box)
          AbstractContainerWidget.__init__(self, canvas, child, **attribs)
  
      def __setitem__(self, attr, value):
-        if attr == 'margin': self._margin = value
-        elif attr in ('outline', 'fill', 'width'):
-            self.canvas().itemconfig(self._box, {attr:value})
+        if attr == "margin":
+            self._margin = value
+        elif attr in ("outline", "fill", "width"):
+            self.canvas().itemconfig(self._box, {attr: value})
          else:
              CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'margin': return self._margin
-        elif attr == 'width':
+        if attr == "margin":
+            return self._margin
+        elif attr == "width":
              return float(self.canvas().itemcget(self._box, attr))
-        elif attr in ('outline', 'fill', 'width'):
+        elif attr in ("outline", "fill", "width"):
              return self.canvas().itemcget(self._box, attr)
          else:
              return CanvasWidget.__getitem__(self, attr)
  
      def _update(self, child):
          (x1, y1, x2, y2) = child.bbox()
-        margin = self._margin + self['width']/2
-        self.canvas().coords(self._box, x1-margin, y1-margin,
-                             x2+margin, y2+margin)
+        margin = self._margin + self["width"] / 2
+        self.canvas().coords(
+            self._box, x1 - margin, y1 - margin, x2 + margin, y2 + margin
+        )
+
+    def _tags(self):
+        return [self._box]
  
-    def _tags(self): return [self._box]
  
  class OvalWidget(AbstractContainerWidget):
      """
@@ -980,6 +1050,7 @@ class OvalWidget(AbstractContainerWidget):
        - ``draggable``: whether the text can be dragged by the user.
        - ``double``: If true, then a double-oval is drawn.
      """
+
      def __init__(self, canvas, child, **attribs):
          """
          Create a new oval widget.
@@ -993,11 +1064,11 @@ class OvalWidget(AbstractContainerWidget):
          """
          self._child = child
          self._margin = 1
-        self._oval = canvas.create_oval(1,1,1,1)
-        self._circle = attribs.pop('circle', False)
-        self._double = attribs.pop('double', False)
+        self._oval = canvas.create_oval(1, 1, 1, 1)
+        self._circle = attribs.pop("circle", False)
+        self._double = attribs.pop("double", False)
          if self._double:
-            self._oval2 = canvas.create_oval(1,1,1,1)
+            self._oval2 = canvas.create_oval(1, 1, 1, 1)
          else:
              self._oval2 = None
          canvas.tag_lower(self._oval)
@@ -1005,34 +1076,42 @@ class OvalWidget(AbstractContainerWidget):
  
      def __setitem__(self, attr, value):
          c = self.canvas()
-        if attr == 'margin': self._margin = value
-        elif attr == 'double':
-            if value==True and self._oval2 is None:
+        if attr == "margin":
+            self._margin = value
+        elif attr == "double":
+            if value == True and self._oval2 is None:
                  # Copy attributes & position from self._oval.
                  x1, y1, x2, y2 = c.bbox(self._oval)
-                w = self['width']*2
-                self._oval2 = c.create_oval(x1-w, y1-w, x2+w, y2+w,
-                                outline=c.itemcget(self._oval, 'outline'),
-                                width=c.itemcget(self._oval, 'width'))
+                w = self["width"] * 2
+                self._oval2 = c.create_oval(
+                    x1 - w,
+                    y1 - w,
+                    x2 + w,
+                    y2 + w,
+                    outline=c.itemcget(self._oval, "outline"),
+                    width=c.itemcget(self._oval, "width"),
+                )
                  c.tag_lower(self._oval2)
-            if value==False and self._oval2 is not None:
+            if value == False and self._oval2 is not None:
                  c.delete(self._oval2)
                  self._oval2 = None
-        elif attr in ('outline', 'fill', 'width'):
-            c.itemconfig(self._oval, {attr:value})
-            if self._oval2 is not None and attr!='fill':
-                c.itemconfig(self._oval2, {attr:value})
-            if self._oval2 is not None and attr!='fill':
-                self.canvas().itemconfig(self._oval2, {attr:value})
+        elif attr in ("outline", "fill", "width"):
+            c.itemconfig(self._oval, {attr: value})
+            if self._oval2 is not None and attr != "fill":
+                c.itemconfig(self._oval2, {attr: value})
+            if self._oval2 is not None and attr != "fill":
+                self.canvas().itemconfig(self._oval2, {attr: value})
          else:
              CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'margin': return self._margin
-        elif attr == 'double': return self._double is not None
-        elif attr == 'width':
+        if attr == "margin":
+            return self._margin
+        elif attr == "double":
+            return self._double is not None
+        elif attr == "width":
              return float(self.canvas().itemcget(self._oval, attr))
-        elif attr in ('outline', 'fill', 'width'):
+        elif attr in ("outline", "fill", "width"):
              return self.canvas().itemcget(self._oval, attr)
          else:
              return CanvasWidget.__getitem__(self, attr)
@@ -1047,24 +1126,30 @@ class OvalWidget(AbstractContainerWidget):
  
          # If we're a circle, pretend our contents are square.
          if self._circle:
-            dx, dy = abs(x1-x2), abs(y1-y2)
+            dx, dy = abs(x1 - x2), abs(y1 - y2)
              if dx > dy:
-                y = (y1+y2)/2
-                y1, y2 = y-dx/2, y+dx/2
+                y = (y1 + y2) / 2
+                y1, y2 = y - dx / 2, y + dx / 2
              elif dy > dx:
-                x = (x1+x2)/2
-                x1, x2 = x-dy/2, x+dy/2
+                x = (x1 + x2) / 2
+                x1, x2 = x - dy / 2, x + dy / 2
  
          # Find the four corners.
-        left = int(( x1*(1+R) + x2*(1-R) ) / 2)
-        right = left + int((x2-x1)*R)
-        top = int(( y1*(1+R) + y2*(1-R) ) / 2)
-        bot = top + int((y2-y1)*R)
-        self.canvas().coords(self._oval, left-margin, top-margin,
-                             right+margin, bot+margin)
+        left = int((x1 * (1 + R) + x2 * (1 - R)) / 2)
+        right = left + int((x2 - x1) * R)
+        top = int((y1 * (1 + R) + y2 * (1 - R)) / 2)
+        bot = top + int((y2 - y1) * R)
+        self.canvas().coords(
+            self._oval, left - margin, top - margin, right + margin, bot + margin
+        )
          if self._oval2 is not None:
-            self.canvas().coords(self._oval2, left-margin+2, top-margin+2,
-                                 right+margin-2, bot+margin-2)
+            self.canvas().coords(
+                self._oval2,
+                left - margin + 2,
+                top - margin + 2,
+                right + margin - 2,
+                bot + margin - 2,
+            )
  
      def _tags(self):
          if self._oval2 is None:
@@ -1072,6 +1157,7 @@ class OvalWidget(AbstractContainerWidget):
          else:
              return [self._oval, self._oval2]
  
+
  class ParenWidget(AbstractContainerWidget):
      """
      A canvas widget that places a pair of parenthases around a child
@@ -1082,6 +1168,7 @@ class ParenWidget(AbstractContainerWidget):
        - ``width``: The width of the parenthases.
        - ``draggable``: whether the text can be dragged by the user.
      """
+
      def __init__(self, canvas, child, **attribs):
          """
          Create a new parenthasis widget.
@@ -1094,37 +1181,37 @@ class ParenWidget(AbstractContainerWidget):
          :param attribs: The new canvas widget's attributes.
          """
          self._child = child
-        self._oparen = canvas.create_arc(1,1,1,1, style='arc',
-                                         start=90, extent=180)
-        self._cparen = canvas.create_arc(1,1,1,1, style='arc',
-                                         start=-90, extent=180)
+        self._oparen = canvas.create_arc(1, 1, 1, 1, style="arc", start=90, extent=180)
+        self._cparen = canvas.create_arc(1, 1, 1, 1, style="arc", start=-90, extent=180)
          AbstractContainerWidget.__init__(self, canvas, child, **attribs)
  
      def __setitem__(self, attr, value):
-        if attr == 'color':
+        if attr == "color":
              self.canvas().itemconfig(self._oparen, outline=value)
              self.canvas().itemconfig(self._cparen, outline=value)
-        elif attr == 'width':
+        elif attr == "width":
              self.canvas().itemconfig(self._oparen, width=value)
              self.canvas().itemconfig(self._cparen, width=value)
          else:
              CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'color':
-            return self.canvas().itemcget(self._oparen, 'outline')
-        elif attr == 'width':
-            return self.canvas().itemcget(self._oparen, 'width')
+        if attr == "color":
+            return self.canvas().itemcget(self._oparen, "outline")
+        elif attr == "width":
+            return self.canvas().itemcget(self._oparen, "width")
          else:
              return CanvasWidget.__getitem__(self, attr)
  
      def _update(self, child):
          (x1, y1, x2, y2) = child.bbox()
-        width = max((y2-y1)/6, 4)
-        self.canvas().coords(self._oparen, x1-width, y1, x1+width, y2)
-        self.canvas().coords(self._cparen, x2-width, y1, x2+width, y2)
+        width = max((y2 - y1) / 6, 4)
+        self.canvas().coords(self._oparen, x1 - width, y1, x1 + width, y2)
+        self.canvas().coords(self._cparen, x2 - width, y1, x2 + width, y2)
+
+    def _tags(self):
+        return [self._oparen, self._cparen]
  
-    def _tags(self): return [self._oparen, self._cparen]
  
  class BracketWidget(AbstractContainerWidget):
      """
@@ -1136,6 +1223,7 @@ class BracketWidget(AbstractContainerWidget):
        - ``width``: The width of the brackets.
        - ``draggable``: whether the text can be dragged by the user.
      """
+
      def __init__(self, canvas, child, **attribs):
          """
          Create a new bracket widget.
@@ -1148,37 +1236,41 @@ class BracketWidget(AbstractContainerWidget):
          :param attribs: The new canvas widget's attributes.
          """
          self._child = child
-        self._obrack = canvas.create_line(1,1,1,1,1,1,1,1)
-        self._cbrack = canvas.create_line(1,1,1,1,1,1,1,1)
+        self._obrack = canvas.create_line(1, 1, 1, 1, 1, 1, 1, 1)
+        self._cbrack = canvas.create_line(1, 1, 1, 1, 1, 1, 1, 1)
          AbstractContainerWidget.__init__(self, canvas, child, **attribs)
  
      def __setitem__(self, attr, value):
-        if attr == 'color':
+        if attr == "color":
              self.canvas().itemconfig(self._obrack, fill=value)
              self.canvas().itemconfig(self._cbrack, fill=value)
-        elif attr == 'width':
+        elif attr == "width":
              self.canvas().itemconfig(self._obrack, width=value)
              self.canvas().itemconfig(self._cbrack, width=value)
          else:
              CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'color':
-            return self.canvas().itemcget(self._obrack, 'outline')
-        elif attr == 'width':
-            return self.canvas().itemcget(self._obrack, 'width')
+        if attr == "color":
+            return self.canvas().itemcget(self._obrack, "outline")
+        elif attr == "width":
+            return self.canvas().itemcget(self._obrack, "width")
          else:
              return CanvasWidget.__getitem__(self, attr)
  
      def _update(self, child):
          (x1, y1, x2, y2) = child.bbox()
-        width = max((y2-y1)/8, 2)
-        self.canvas().coords(self._obrack, x1, y1, x1-width, y1,
-                             x1-width, y2, x1, y2)
-        self.canvas().coords(self._cbrack, x2, y1, x2+width, y1,
-                             x2+width, y2, x2, y2)
+        width = max((y2 - y1) / 8, 2)
+        self.canvas().coords(
+            self._obrack, x1, y1, x1 - width, y1, x1 - width, y2, x1, y2
+        )
+        self.canvas().coords(
+            self._cbrack, x2, y1, x2 + width, y1, x2 + width, y2, x2, y2
+        )
+
+    def _tags(self):
+        return [self._obrack, self._cbrack]
  
-    def _tags(self): return [self._obrack, self._cbrack]
  
  class SequenceWidget(CanvasWidget):
      """
@@ -1194,6 +1286,7 @@ class SequenceWidget(CanvasWidget):
        - ``ordered``: If true, then keep the children in their
          original order.
      """
+
      def __init__(self, canvas, *children, **attribs):
          """
          Create a new sequence widget.
@@ -1205,34 +1298,46 @@ class SequenceWidget(CanvasWidget):
          :type children: list(CanvasWidget)
          :param attribs: The new canvas widget's attributes.
          """
-        self._align = 'center'
+        self._align = "center"
          self._space = 1
          self._ordered = False
          self._children = list(children)
-        for child in children: self._add_child_widget(child)
+        for child in children:
+            self._add_child_widget(child)
          CanvasWidget.__init__(self, canvas, **attribs)
  
      def __setitem__(self, attr, value):
-        if attr == 'align':
-            if value not in ('top', 'bottom', 'center'):
-                raise ValueError('Bad alignment: %r' % value)
+        if attr == "align":
+            if value not in ("top", "bottom", "center"):
+                raise ValueError("Bad alignment: %r" % value)
              self._align = value
-        elif attr == 'space': self._space = value
-        elif attr == 'ordered': self._ordered = value
-        else: CanvasWidget.__setitem__(self, attr, value)
+        elif attr == "space":
+            self._space = value
+        elif attr == "ordered":
+            self._ordered = value
+        else:
+            CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'align': return self._align
-        elif attr == 'space': return self._space
-        elif attr == 'ordered': return self._ordered
-        else: return CanvasWidget.__getitem__(self, attr)
+        if attr == "align":
+            return self._align
+        elif attr == "space":
+            return self._space
+        elif attr == "ordered":
+            return self._ordered
+        else:
+            return CanvasWidget.__getitem__(self, attr)
  
-    def _tags(self): return []
+    def _tags(self):
+        return []
  
      def _yalign(self, top, bot):
-        if self._align == 'top': return top
-        if self._align == 'bottom': return bot
-        if self._align == 'center': return (top+bot)/2
+        if self._align == "top":
+            return top
+        if self._align == "bottom":
+            return bot
+        if self._align == "center":
+            return (top + bot) / 2
  
      def _update(self, child):
          # Align all children with child.
@@ -1240,27 +1345,28 @@ class SequenceWidget(CanvasWidget):
          y = self._yalign(top, bot)
          for c in self._children:
              (x1, y1, x2, y2) = c.bbox()
-            c.move(0, y-self._yalign(y1,y2))
+            c.move(0, y - self._yalign(y1, y2))
  
          if self._ordered and len(self._children) > 1:
              index = self._children.index(child)
  
              x = right + self._space
-            for i in range(index+1, len(self._children)):
+            for i in range(index + 1, len(self._children)):
                  (x1, y1, x2, y2) = self._children[i].bbox()
                  if x > x1:
-                    self._children[i].move(x-x1, 0)
-                    x += x2-x1 + self._space
+                    self._children[i].move(x - x1, 0)
+                    x += x2 - x1 + self._space
  
              x = left - self._space
-            for i in range(index-1, -1, -1):
+            for i in range(index - 1, -1, -1):
                  (x1, y1, x2, y2) = self._children[i].bbox()
                  if x < x2:
-                    self._children[i].move(x-x2, 0)
-                    x -= x2-x1 + self._space
+                    self._children[i].move(x - x2, 0)
+                    x -= x2 - x1 + self._space
  
      def _manage(self):
-        if len(self._children) == 0: return
+        if len(self._children) == 0:
+            return
          child = self._children[0]
  
          # Align all children with child.
@@ -1271,20 +1377,20 @@ class SequenceWidget(CanvasWidget):
  
          # Line up children to the right of child.
          x = right + self._space
-        for i in range(index+1, len(self._children)):
+        for i in range(index + 1, len(self._children)):
              (x1, y1, x2, y2) = self._children[i].bbox()
-            self._children[i].move(x-x1, y-self._yalign(y1,y2))
-            x += x2-x1 + self._space
+            self._children[i].move(x - x1, y - self._yalign(y1, y2))
+            x += x2 - x1 + self._space
  
          # Line up children to the left of child.
          x = left - self._space
-        for i in range(index-1, -1, -1):
+        for i in range(index - 1, -1, -1):
              (x1, y1, x2, y2) = self._children[i].bbox()
-            self._children[i].move(x-x2, y-self._yalign(y1,y2))
-            x -= x2-x1 + self._space
+            self._children[i].move(x - x2, y - self._yalign(y1, y2))
+            x -= x2 - x1 + self._space
  
      def __repr__(self):
-        return '[Sequence: ' + repr(self._children)[1:-1]+']'
+        return "[Sequence: " + repr(self._children)[1:-1] + "]"
  
      # Provide an alias for the child_widgets() member.
      children = CanvasWidget.child_widgets
@@ -1337,6 +1443,7 @@ class SequenceWidget(CanvasWidget):
          self._children.insert(index, child)
          self._add_child_widget(child)
  
+
  class StackWidget(CanvasWidget):
      """
      A canvas widget that keeps a list of canvas widgets in a vertical
@@ -1351,6 +1458,7 @@ class StackWidget(CanvasWidget):
        - ``ordered``: If true, then keep the children in their
          original order.
      """
+
      def __init__(self, canvas, *children, **attribs):
          """
          Create a new stack widget.
@@ -1362,34 +1470,46 @@ class StackWidget(CanvasWidget):
          :type children: list(CanvasWidget)
          :param attribs: The new canvas widget's attributes.
          """
-        self._align = 'center'
+        self._align = "center"
          self._space = 1
          self._ordered = False
          self._children = list(children)
-        for child in children: self._add_child_widget(child)
+        for child in children:
+            self._add_child_widget(child)
          CanvasWidget.__init__(self, canvas, **attribs)
  
      def __setitem__(self, attr, value):
-        if attr == 'align':
-            if value not in ('left', 'right', 'center'):
-                raise ValueError('Bad alignment: %r' % value)
+        if attr == "align":
+            if value not in ("left", "right", "center"):
+                raise ValueError("Bad alignment: %r" % value)
              self._align = value
-        elif attr == 'space': self._space = value
-        elif attr == 'ordered': self._ordered = value
-        else: CanvasWidget.__setitem__(self, attr, value)
+        elif attr == "space":
+            self._space = value
+        elif attr == "ordered":
+            self._ordered = value
+        else:
+            CanvasWidget.__setitem__(self, attr, value)
  
      def __getitem__(self, attr):
-        if attr == 'align': return self._align
-        elif attr == 'space': return self._space
-        elif attr == 'ordered': return self._ordered
-        else: return CanvasWidget.__getitem__(self, attr)
+        if attr == "align":
+            return self._align
+        elif attr == "space":
+            return self._space
+        elif attr == "ordered":
+            return self._ordered
+        else:
+            return CanvasWidget.__getitem__(self, attr)
  
-    def _tags(self): return []
+    def _tags(self):
+        return []
  
      def _xalign(self, left, right):
-        if self._align == 'left': return left
-        if self._align == 'right': return right
-        if self._align == 'center': return (left+right)/2
+        if self._align == "left":
+            return left
+        if self._align == "right":
+            return right
+        if self._align == "center":
+            return (left + right) / 2
  
      def _update(self, child):
          # Align all children with child.
@@ -1397,27 +1517,28 @@ class StackWidget(CanvasWidget):
          x = self._xalign(left, right)
          for c in self._children:
              (x1, y1, x2, y2) = c.bbox()
-            c.move(x-self._xalign(x1,x2), 0)
+            c.move(x - self._xalign(x1, x2), 0)
  
          if self._ordered and len(self._children) > 1:
              index = self._children.index(child)
  
              y = bot + self._space
-            for i in range(index+1, len(self._children)):
+            for i in range(index + 1, len(self._children)):
                  (x1, y1, x2, y2) = self._children[i].bbox()
                  if y > y1:
-                    self._children[i].move(0, y-y1)
-                    y += y2-y1 + self._space
+                    self._children[i].move(0, y - y1)
+                    y += y2 - y1 + self._space
  
              y = top - self._space
-            for i in range(index-1, -1, -1):
+            for i in range(index - 1, -1, -1):
                  (x1, y1, x2, y2) = self._children[i].bbox()
                  if y < y2:
-                    self._children[i].move(0, y-y2)
-                    y -= y2-y1 + self._space
+                    self._children[i].move(0, y - y2)
+                    y -= y2 - y1 + self._space
  
      def _manage(self):
-        if len(self._children) == 0: return
+        if len(self._children) == 0:
+            return
          child = self._children[0]
  
          # Align all children with child.
@@ -1428,20 +1549,20 @@ class StackWidget(CanvasWidget):
  
          # Line up children below the child.
          y = bot + self._space
-        for i in range(index+1, len(self._children)):
+        for i in range(index + 1, len(self._children)):
              (x1, y1, x2, y2) = self._children[i].bbox()
-            self._children[i].move(x-self._xalign(x1,x2), y-y1)
-            y += y2-y1 + self._space
+            self._children[i].move(x - self._xalign(x1, x2), y - y1)
+            y += y2 - y1 + self._space
  
          # Line up children above the child.
          y = top - self._space
-        for i in range(index-1, -1, -1):
+        for i in range(index - 1, -1, -1):
              (x1, y1, x2, y2) = self._children[i].bbox()
-            self._children[i].move(x-self._xalign(x1,x2), y-y2)
-            y -= y2-y1 + self._space
+            self._children[i].move(x - self._xalign(x1, x2), y - y2)
+            y -= y2 - y1 + self._space
  
      def __repr__(self):
-        return '[Stack: ' + repr(self._children)[1:-1]+']'
+        return "[Stack: " + repr(self._children)[1:-1] + "]"
  
      # Provide an alias for the child_widgets() member.
      children = CanvasWidget.child_widgets
@@ -1494,6 +1615,7 @@ class StackWidget(CanvasWidget):
          self._children.insert(index, child)
          self._add_child_widget(child)
  
+
  class SpaceWidget(CanvasWidget):
      """
      A canvas widget that takes up space but does not display
@@ -1503,6 +1625,7 @@ class SpaceWidget(CanvasWidget):
      height of zero; and if you wish to only create vertical space, use
      a width of zero.
      """
+
      def __init__(self, canvas, width, height, **attribs):
          """
          Create a new space widget.
@@ -1516,9 +1639,11 @@ class SpaceWidget(CanvasWidget):
          :param attribs: The new canvas widget's attributes.
          """
          # For some reason,
-        if width > 4: width -= 4
-        if height > 4: height -= 4
-        self._tag = canvas.create_line(1, 1, width, height, fill='')
+        if width > 4:
+            width -= 4
+        if height > 4:
+            height -= 4
+        self._tag = canvas.create_line(1, 1, width, height, fill="")
          CanvasWidget.__init__(self, canvas, **attribs)
  
      # note: width() and height() are already defined by CanvasWidget.
@@ -1531,7 +1656,7 @@ class SpaceWidget(CanvasWidget):
          :rtype: None
          """
          [x1, y1, x2, y2] = self.bbox()
-        self.canvas().coords(self._tag, x1, y1, x1+width, y2)
+        self.canvas().coords(self._tag, x1, y1, x1 + width, y2)
  
      def set_height(self, height):
          """
@@ -1542,11 +1667,14 @@ class SpaceWidget(CanvasWidget):
          :rtype: None
          """
          [x1, y1, x2, y2] = self.bbox()
-        self.canvas().coords(self._tag, x1, y1, x2, y1+height)
+        self.canvas().coords(self._tag, x1, y1, x2, y1 + height)
+
+    def _tags(self):
+        return [self._tag]
  
-    def _tags(self): return [self._tag]
+    def __repr__(self):
+        return "[Space]"
  
-    def __repr__(self): return '[Space]'
  
  class ScrollWatcherWidget(CanvasWidget):
      """
@@ -1555,6 +1683,7 @@ class ScrollWatcherWidget(CanvasWidget):
      scroll-watcher widget will only increase the size of the
      ``Canvas``'s scrollregion; it will never decrease it.
      """
+
      def __init__(self, canvas, *children, **attribs):
          """
          Create a new scroll-watcher widget.
@@ -1568,7 +1697,8 @@ class ScrollWatcherWidget(CanvasWidget):
              scrollregion.
          :param attribs: The new canvas widget's attributes.
          """
-        for child in children: self._add_child_widget(child)
+        for child in children:
+            self._add_child_widget(child)
          CanvasWidget.__init__(self, canvas, **attribs)
  
      def add_child(self, canvaswidget):
@@ -1596,7 +1726,8 @@ class ScrollWatcherWidget(CanvasWidget):
          """
          self._remove_child_widget(canvaswidget)
  
-    def _tags(self): return []
+    def _tags(self):
+        return []
  
      def _update(self, child):
          self._adjust_scrollregion()
@@ -1608,21 +1739,29 @@ class ScrollWatcherWidget(CanvasWidget):
          """
          bbox = self.bbox()
          canvas = self.canvas()
-        scrollregion = [int(n) for n in canvas['scrollregion'].split()]
-        if len(scrollregion) != 4: return
-        if (bbox[0] < scrollregion[0] or bbox[1] < scrollregion[1] or
-            bbox[2] > scrollregion[2] or bbox[3] > scrollregion[3]):
-            scrollregion = ('%d %d %d %d' %
-                            (min(bbox[0], scrollregion[0]),
-                             min(bbox[1], scrollregion[1]),
-                             max(bbox[2], scrollregion[2]),
-                         max(bbox[3], scrollregion[3])))
-            canvas['scrollregion'] = scrollregion
+        scrollregion = [int(n) for n in canvas["scrollregion"].split()]
+        if len(scrollregion) != 4:
+            return
+        if (
+            bbox[0] < scrollregion[0]
+            or bbox[1] < scrollregion[1]
+            or bbox[2] > scrollregion[2]
+            or bbox[3] > scrollregion[3]
+        ):
+            scrollregion = "%d %d %d %d" % (
+                min(bbox[0], scrollregion[0]),
+                min(bbox[1], scrollregion[1]),
+                max(bbox[2], scrollregion[2]),
+                max(bbox[3], scrollregion[3]),
+            )
+            canvas["scrollregion"] = scrollregion
+
  
  ##//////////////////////////////////////////////////////
  ##  Canvas Frame
  ##//////////////////////////////////////////////////////
  
+
  class CanvasFrame(object):
      """
      A ``Tkinter`` frame containing a canvas and scrollbars.
@@ -1637,6 +1776,7 @@ class CanvasFrame(object):
      its own main window, including a "Done" button and a "Print"
      button.
      """
+
      def __init__(self, parent=None, **kw):
          """
          Create a new ``CanvasFrame``.
@@ -1651,46 +1791,51 @@ class CanvasFrame(object):
          # If no parent was given, set up a top-level window.
          if parent is None:
              self._parent = Tk()
-            self._parent.title('NLTK')
-            self._parent.bind('<Control-p>', lambda e: self.print_to_file())
-            self._parent.bind('<Control-x>', self.destroy)
-            self._parent.bind('<Control-q>', self.destroy)
+            self._parent.title("NLTK")
+            self._parent.bind("<Control-p>", lambda e: self.print_to_file())
+            self._parent.bind("<Control-x>", self.destroy)
+            self._parent.bind("<Control-q>", self.destroy)
          else:
              self._parent = parent
  
          # Create a frame for the canvas & scrollbars
          self._frame = frame = Frame(self._parent)
          self._canvas = canvas = Canvas(frame, **kw)
-        xscrollbar = Scrollbar(self._frame, orient='horizontal')
-        yscrollbar = Scrollbar(self._frame, orient='vertical')
-        xscrollbar['command'] = canvas.xview
-        yscrollbar['command'] = canvas.yview
-        canvas['xscrollcommand'] = xscrollbar.set
-        canvas['yscrollcommand'] = yscrollbar.set
-        yscrollbar.pack(fill='y', side='right')
-        xscrollbar.pack(fill='x', side='bottom')
-        canvas.pack(expand=1, fill='both', side='left')
+        xscrollbar = Scrollbar(self._frame, orient="horizontal")
+        yscrollbar = Scrollbar(self._frame, orient="vertical")
+        xscrollbar["command"] = canvas.xview
+        yscrollbar["command"] = canvas.yview
+        canvas["xscrollcommand"] = xscrollbar.set
+        canvas["yscrollcommand"] = yscrollbar.set
+        yscrollbar.pack(fill="y", side="right")
+        xscrollbar.pack(fill="x", side="bottom")
+        canvas.pack(expand=1, fill="both", side="left")
  
          # Set initial scroll region.
-        scrollregion = '0 0 %s %s' % (canvas['width'], canvas['height'])
-        canvas['scrollregion'] = scrollregion
+        scrollregion = "0 0 %s %s" % (canvas["width"], canvas["height"])
+        canvas["scrollregion"] = scrollregion
  
          self._scrollwatcher = ScrollWatcherWidget(canvas)
  
          # If no parent was given, pack the frame, and add a menu.
          if parent is None:
-            self.pack(expand=1, fill='both')
+            self.pack(expand=1, fill="both")
              self._init_menubar()
  
      def _init_menubar(self):
          menubar = Menu(self._parent)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Print to Postscript', underline=0,
-                             command=self.print_to_file, accelerator='Ctrl-p')
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='Ctrl-x')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Print to Postscript",
+            underline=0,
+            command=self.print_to_file,
+            accelerator="Ctrl-p",
+        )
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          self._parent.config(menu=menubar)
  
@@ -1704,21 +1849,25 @@ class CanvasFrame(object):
          :rtype: None
          """
          if filename is None:
-            ftypes = [('Postscript files', '.ps'),
-                      ('All files', '*')]
-            filename = asksaveasfilename(filetypes=ftypes,
-                                         defaultextension='.ps')
-            if not filename: return
+            ftypes = [("Postscript files", ".ps"), ("All files", "*")]
+            filename = asksaveasfilename(filetypes=ftypes, defaultextension=".ps")
+            if not filename:
+                return
          (x0, y0, w, h) = self.scrollregion()
-        postscript = self._canvas.postscript(x=x0, y=y0,
-                                width=w+2, height=h+2,
-                                pagewidth=w+2, # points = 1/72 inch
-                                pageheight=h+2, # points = 1/72 inch
-                                pagex=0, pagey=0)
+        postscript = self._canvas.postscript(
+            x=x0,
+            y=y0,
+            width=w + 2,
+            height=h + 2,
+            pagewidth=w + 2,  # points = 1/72 inch
+            pageheight=h + 2,  # points = 1/72 inch
+            pagex=0,
+            pagey=0,
+        )
          # workaround for bug in Tk font handling
-        postscript = postscript.replace(' 0 scalefont ', ' 9 scalefont ')
-        with open(filename, 'wb') as f:
-            f.write(postscript.encode('utf8'))
+        postscript = postscript.replace(" 0 scalefont ", " 9 scalefont ")
+        with open(filename, "wb") as f:
+            f.write(postscript.encode("utf8"))
  
      def scrollregion(self):
          """
@@ -1726,7 +1875,7 @@ class CanvasFrame(object):
              this ``CanvasFrame``.
          :rtype: 4-tuple of int
          """
-        (x1, y1, x2, y2) = self._canvas['scrollregion'].split()
+        (x1, y1, x2, y2) = self._canvas["scrollregion"].split()
          return (int(x1), int(y1), int(x2), int(y2))
  
      def canvas(self):
@@ -1760,8 +1909,8 @@ class CanvasFrame(object):
              (x, y) = self._find_room(canvaswidget, x, y)
  
          # Move to (x,y)
-        (x1,y1,x2,y2) = canvaswidget.bbox()
-        canvaswidget.move(x-x1,y-y1)
+        (x1, y1, x2, y2) = canvaswidget.bbox()
+        canvaswidget.move(x - x1, y - y1)
  
          # Register with scrollwatcher.
          self._scrollwatcher.add_child(canvaswidget)
@@ -1774,30 +1923,38 @@ class CanvasFrame(object):
          w = widget.width()
          h = widget.height()
  
-        if w >= (right-left): return (0,0)
-        if h >= (bot-top): return (0,0)
+        if w >= (right - left):
+            return (0, 0)
+        if h >= (bot - top):
+            return (0, 0)
  
          # Move the widget out of the way, for now.
-        (x1,y1,x2,y2) = widget.bbox()
-        widget.move(left-x2-50, top-y2-50)
+        (x1, y1, x2, y2) = widget.bbox()
+        widget.move(left - x2 - 50, top - y2 - 50)
  
          if desired_x is not None:
              x = desired_x
-            for y in range(top, bot-h, int((bot-top-h)/10)):
-                if not self._canvas.find_overlapping(x-5, y-5, x+w+5, y+h+5):
-                    return (x,y)
+            for y in range(top, bot - h, int((bot - top - h) / 10)):
+                if not self._canvas.find_overlapping(
+                    x - 5, y - 5, x + w + 5, y + h + 5
+                ):
+                    return (x, y)
  
          if desired_y is not None:
              y = desired_y
-            for x in range(left, right-w, int((right-left-w)/10)):
-                if not self._canvas.find_overlapping(x-5, y-5, x+w+5, y+h+5):
-                    return (x,y)
-
-        for y in range(top, bot-h, int((bot-top-h)/10)):
-            for x in range(left, right-w, int((right-left-w)/10)):
-                if not self._canvas.find_overlapping(x-5, y-5, x+w+5, y+h+5):
-                    return (x,y)
-        return (0,0)
+            for x in range(left, right - w, int((right - left - w) / 10)):
+                if not self._canvas.find_overlapping(
+                    x - 5, y - 5, x + w + 5, y + h + 5
+                ):
+                    return (x, y)
+
+        for y in range(top, bot - h, int((bot - top - h) / 10)):
+            for x in range(left, right - w, int((right - left - w) / 10)):
+                if not self._canvas.find_overlapping(
+                    x - 5, y - 5, x + w + 5, y + h + 5
+                ):
+                    return (x, y)
+        return (0, 0)
  
      def destroy_widget(self, canvaswidget):
          """
@@ -1824,7 +1981,8 @@ class CanvasFrame(object):
          Destroy this ``CanvasFrame``.  If this ``CanvasFrame`` created a
          top-level window, then this will close that window.
          """
-        if self._parent is None: return
+        if self._parent is None:
+            return
          self._parent.destroy()
          self._parent = None
  
@@ -1835,21 +1993,24 @@ class CanvasFrame(object):
          from a secript); otherwise, the frame will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._parent.mainloop(*args, **kwargs)
  
+
  ##//////////////////////////////////////////////////////
  ##  Text display
  ##//////////////////////////////////////////////////////
  
+
  class ShowText(object):
      """
      A ``Tkinter`` window used to display a text.  ``ShowText`` is
      typically used by graphical tools to display help text, or similar
      information.
      """
-    def __init__(self, root, title, text, width=None, height=None,
-                 **textbox_options):
+
+    def __init__(self, root, title, text, width=None, height=None, **textbox_options):
          if width is None or height is None:
              (width, height) = self.find_dimentions(text, width, height)
  
@@ -1860,33 +2021,32 @@ class ShowText(object):
              self._top = top = Toplevel(root)
          top.title(title)
  
-        b = Button(top, text='Ok', command=self.destroy)
-        b.pack(side='bottom')
+        b = Button(top, text="Ok", command=self.destroy)
+        b.pack(side="bottom")
  
          tbf = Frame(top)
-        tbf.pack(expand=1, fill='both')
-        scrollbar = Scrollbar(tbf, orient='vertical')
-        scrollbar.pack(side='right', fill='y')
-        textbox = Text(tbf, wrap='word', width=width,
-                       height=height, **textbox_options)
-        textbox.insert('end', text)
-        textbox['state'] = 'disabled'
-        textbox.pack(side='left', expand=1, fill='both')
-        scrollbar['command'] = textbox.yview
-        textbox['yscrollcommand'] = scrollbar.set
+        tbf.pack(expand=1, fill="both")
+        scrollbar = Scrollbar(tbf, orient="vertical")
+        scrollbar.pack(side="right", fill="y")
+        textbox = Text(tbf, wrap="word", width=width, height=height, **textbox_options)
+        textbox.insert("end", text)
+        textbox["state"] = "disabled"
+        textbox.pack(side="left", expand=1, fill="both")
+        scrollbar["command"] = textbox.yview
+        textbox["yscrollcommand"] = scrollbar.set
  
          # Make it easy to close the window.
-        top.bind('q', self.destroy)
-        top.bind('x', self.destroy)
-        top.bind('c', self.destroy)
-        top.bind('<Return>', self.destroy)
-        top.bind('<Escape>', self.destroy)
+        top.bind("q", self.destroy)
+        top.bind("x", self.destroy)
+        top.bind("c", self.destroy)
+        top.bind("<Return>", self.destroy)
+        top.bind("<Escape>", self.destroy)
  
          # Focus the scrollbar, so they can use up/down, etc.
          scrollbar.focus()
  
      def find_dimentions(self, text, width, height):
-        lines = text.split('\n')
+        lines = text.split("\n")
          if width is None:
              maxwidth = max(len(line) for line in lines)
              width = min(maxwidth, 80)
@@ -1895,7 +2055,7 @@ class ShowText(object):
          height = 0
          for line in lines:
              while len(line) > width:
-                brk = line[:width].rfind(' ')
+                brk = line[:width].rfind(" ")
                  line = line[brk:]
                  height += 1
              height += 1
@@ -1904,7 +2064,8 @@ class ShowText(object):
          return (width, height)
  
      def destroy(self, *e):
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
@@ -1915,68 +2076,75 @@ class ShowText(object):
          from a secript); otherwise, the window will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._top.mainloop(*args, **kwargs)
  
+
  ##//////////////////////////////////////////////////////
  ##  Entry dialog
  ##//////////////////////////////////////////////////////
  
+
  class EntryDialog(object):
      """
      A dialog box for entering
      """
-    def __init__(self, parent, original_text='', instructions='',
-                 set_callback=None, title=None):
+
+    def __init__(
+        self, parent, original_text="", instructions="", set_callback=None, title=None
+    ):
          self._parent = parent
          self._original_text = original_text
          self._set_callback = set_callback
  
-        width = int(max(30, len(original_text)*3/2))
+        width = int(max(30, len(original_text) * 3 / 2))
          self._top = Toplevel(parent)
  
-        if title: self._top.title(title)
+        if title:
+            self._top.title(title)
  
          # The text entry box.
          entryframe = Frame(self._top)
-        entryframe.pack(expand=1, fill='both', padx=5, pady=5,ipady=10)
+        entryframe.pack(expand=1, fill="both", padx=5, pady=5, ipady=10)
          if instructions:
-            l=Label(entryframe, text=instructions)
-            l.pack(side='top', anchor='w', padx=30)
+            l = Label(entryframe, text=instructions)
+            l.pack(side="top", anchor="w", padx=30)
          self._entry = Entry(entryframe, width=width)
-        self._entry.pack(expand=1, fill='x', padx=30)
+        self._entry.pack(expand=1, fill="x", padx=30)
          self._entry.insert(0, original_text)
  
          # A divider
-        divider = Frame(self._top, borderwidth=1, relief='sunken')
-        divider.pack(fill='x', ipady=1, padx=10)
+        divider = Frame(self._top, borderwidth=1, relief="sunken")
+        divider.pack(fill="x", ipady=1, padx=10)
  
          # The buttons.
          buttons = Frame(self._top)
-        buttons.pack(expand=0, fill='x', padx=5, pady=5)
-        b = Button(buttons, text='Cancel', command=self._cancel, width=8)
-        b.pack(side='right', padx=5)
-        b = Button(buttons, text='Ok', command=self._ok,
-                   width=8, default='active')
-        b.pack(side='left', padx=5)
-        b = Button(buttons, text='Apply', command=self._apply, width=8)
-        b.pack(side='left')
-
-        self._top.bind('<Return>', self._ok)
-        self._top.bind('<Control-q>', self._cancel)
-        self._top.bind('<Escape>', self._cancel)
+        buttons.pack(expand=0, fill="x", padx=5, pady=5)
+        b = Button(buttons, text="Cancel", command=self._cancel, width=8)
+        b.pack(side="right", padx=5)
+        b = Button(buttons, text="Ok", command=self._ok, width=8, default="active")
+        b.pack(side="left", padx=5)
+        b = Button(buttons, text="Apply", command=self._apply, width=8)
+        b.pack(side="left")
+
+        self._top.bind("<Return>", self._ok)
+        self._top.bind("<Control-q>", self._cancel)
+        self._top.bind("<Escape>", self._cancel)
  
          self._entry.focus()
  
      def _reset(self, *e):
-        self._entry.delete(0,'end')
+        self._entry.delete(0, "end")
          self._entry.insert(0, self._original_text)
          if self._set_callback:
              self._set_callback(self._original_text)
  
      def _cancel(self, *e):
-        try: self._reset()
-        except: pass
+        try:
+            self._reset()
+        except:
+            pass
          self._destroy()
  
      def _ok(self, *e):
@@ -1988,14 +2156,17 @@ class EntryDialog(object):
              self._set_callback(self._entry.get())
  
      def _destroy(self, *e):
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
+
  ##//////////////////////////////////////////////////////
  ##  Colorized List
  ##//////////////////////////////////////////////////////
  
+
  class ColorizedList(object):
      """
      An abstract base class for displaying a colorized list of items.
@@ -2008,6 +2179,7 @@ class ColorizedList(object):
      :note: Typically, you will want to register a callback for
          ``'select'`` that calls ``mark`` on the given item.
      """
+
      def __init__(self, parent, items=[], **options):
          """
          Construct a new list.
@@ -2026,16 +2198,16 @@ class ColorizedList(object):
          self._init_itemframe(options.copy())
  
          # Set up key & mouse bindings.
-        self._textwidget.bind('<KeyPress>', self._keypress)
-        self._textwidget.bind('<ButtonPress>', self._buttonpress)
+        self._textwidget.bind("<KeyPress>", self._keypress)
+        self._textwidget.bind("<ButtonPress>", self._buttonpress)
  
          # Fill in the given CFG's items.
          self._items = None
          self.set(items)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Abstract methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      @abstractmethod
      def _init_colortags(self, textwidget, options):
          """
@@ -2053,9 +2225,9 @@ class ColorizedList(object):
          strings returned may not contain newline characters.
          """
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Item Access
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def get(self, index=None):
          """
@@ -2071,20 +2243,21 @@ class ColorizedList(object):
          Modify the list of items contained by this list.
          """
          items = list(items)
-        if self._items == items: return
+        if self._items == items:
+            return
          self._items = list(items)
  
-        self._textwidget['state'] = 'normal'
-        self._textwidget.delete('1.0', 'end')
+        self._textwidget["state"] = "normal"
+        self._textwidget.delete("1.0", "end")
          for item in items:
              for (text, colortag) in self._item_repr(item):
-                assert '\n' not in text, 'item repr may not contain newline'
-                self._textwidget.insert('end', text, colortag)
-            self._textwidget.insert('end', '\n')
+                assert "\n" not in text, "item repr may not contain newline"
+                self._textwidget.insert("end", text, colortag)
+            self._textwidget.insert("end", "\n")
          # Remove the final newline
-        self._textwidget.delete('end-1char', 'end')
-        self._textwidget.mark_set('insert', '1.0')
-        self._textwidget['state'] = 'disabled'
+        self._textwidget.delete("end-1char", "end")
+        self._textwidget.mark_set("insert", "1.0")
+        self._textwidget["state"] = "disabled"
          # Clear all marks
          self._marks.clear()
  
@@ -2097,12 +2270,12 @@ class ColorizedList(object):
          """
          if item is None:
              self._marks.clear()
-            self._textwidget.tag_remove('highlight', '1.0', 'end+1char')
+            self._textwidget.tag_remove("highlight", "1.0", "end+1char")
          else:
              index = self._items.index(item)
              del self._marks[item]
-            (start, end) = ('%d.0' % (index+1), '%d.0' % (index+2))
-            self._textwidget.tag_remove('highlight', start, end)
+            (start, end) = ("%d.0" % (index + 1), "%d.0" % (index + 2))
+            self._textwidget.tag_remove("highlight", start, end)
  
      def mark(self, item):
          """
@@ -2111,8 +2284,8 @@ class ColorizedList(object):
          """
          self._marks[item] = 1
          index = self._items.index(item)
-        (start, end) = ('%d.0' % (index+1), '%d.0' % (index+2))
-        self._textwidget.tag_add('highlight', start, end)
+        (start, end) = ("%d.0" % (index + 1), "%d.0" % (index + 2))
+        self._textwidget.tag_add("highlight", start, end)
  
      def markonly(self, item):
          """
@@ -2128,11 +2301,11 @@ class ColorizedList(object):
          the item is already visible, then do nothing.
          """
          index = self._items.index(item)
-        self._textwidget.see('%d.0' % (index+1))
+        self._textwidget.see("%d.0" % (index + 1))
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Callbacks
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def add_callback(self, event, func):
          """
@@ -2147,75 +2320,87 @@ class ColorizedList(object):
              single item as its argument.  (The item selected
              or the item moved to).
          """
-        if event == 'select': events = ['click1', 'space', 'return']
-        elif event == 'move': events = ['up', 'down', 'next', 'prior']
-        else: events = [event]
+        if event == "select":
+            events = ["click1", "space", "return"]
+        elif event == "move":
+            events = ["up", "down", "next", "prior"]
+        else:
+            events = [event]
  
          for e in events:
-            self._callbacks.setdefault(e,{})[func] = 1
+            self._callbacks.setdefault(e, {})[func] = 1
  
      def remove_callback(self, event, func=None):
          """
          Deregister a callback function.  If ``func`` is none, then
          all callbacks are removed for the given event.
          """
-        if event is None: events = list(self._callbacks.keys())
-        elif event == 'select': events = ['click1', 'space', 'return']
-        elif event == 'move': events = ['up', 'down', 'next', 'prior']
-        else: events = [event]
+        if event is None:
+            events = list(self._callbacks.keys())
+        elif event == "select":
+            events = ["click1", "space", "return"]
+        elif event == "move":
+            events = ["up", "down", "next", "prior"]
+        else:
+            events = [event]
  
          for e in events:
-            if func is None: del self._callbacks[e]
+            if func is None:
+                del self._callbacks[e]
              else:
-                try: del self._callbacks[e][func]
-                except: pass
+                try:
+                    del self._callbacks[e][func]
+                except:
+                    pass
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Tkinter Methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def pack(self, cnf={}, **kw):
-#        "@include: Tkinter.Pack.pack"
+        #        "@include: Tkinter.Pack.pack"
          self._itemframe.pack(cnf, **kw)
  
      def grid(self, cnf={}, **kw):
-#        "@include: Tkinter.Grid.grid"
+        #        "@include: Tkinter.Grid.grid"
          self._itemframe.grid(cnf, *kw)
  
      def focus(self):
-#        "@include: Tkinter.Widget.focus"
+        #        "@include: Tkinter.Widget.focus"
          self._textwidget.focus()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Internal Methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _init_itemframe(self, options):
          self._itemframe = Frame(self._parent)
  
          # Create the basic Text widget & scrollbar.
-        options.setdefault('background', '#e0e0e0')
+        options.setdefault("background", "#e0e0e0")
          self._textwidget = Text(self._itemframe, **options)
-        self._textscroll = Scrollbar(self._itemframe, takefocus=0,
-                                     orient='vertical')
-        self._textwidget.config(yscrollcommand = self._textscroll.set)
+        self._textscroll = Scrollbar(self._itemframe, takefocus=0, orient="vertical")
+        self._textwidget.config(yscrollcommand=self._textscroll.set)
          self._textscroll.config(command=self._textwidget.yview)
-        self._textscroll.pack(side='right', fill='y')
-        self._textwidget.pack(expand=1, fill='both', side='left')
+        self._textscroll.pack(side="right", fill="y")
+        self._textwidget.pack(expand=1, fill="both", side="left")
  
          # Initialize the colorization tags
-        self._textwidget.tag_config('highlight', background='#e0ffff',
-                                    border='1', relief='raised')
+        self._textwidget.tag_config(
+            "highlight", background="#e0ffff", border="1", relief="raised"
+        )
          self._init_colortags(self._textwidget, options)
  
          # How do I want to mark keyboard selection?
-        self._textwidget.tag_config('sel', foreground='')
-        self._textwidget.tag_config('sel', foreground='', background='',
-                                    border='', underline=1)
-        self._textwidget.tag_lower('highlight', 'sel')
+        self._textwidget.tag_config("sel", foreground="")
+        self._textwidget.tag_config(
+            "sel", foreground="", background="", border="", underline=1
+        )
+        self._textwidget.tag_lower("highlight", "sel")
  
      def _fire_callback(self, event, itemnum):
-        if event not in self._callbacks: return
+        if event not in self._callbacks:
+            return
          if 0 <= itemnum < len(self._items):
              item = self._items[itemnum]
          else:
@@ -2224,65 +2409,83 @@ class ColorizedList(object):
              cb_func(item)
  
      def _buttonpress(self, event):
-        clickloc = '@%d,%d' % (event.x,event.y)
+        clickloc = "@%d,%d" % (event.x, event.y)
          insert_point = self._textwidget.index(clickloc)
-        itemnum = int(insert_point.split('.')[0])-1
-        self._fire_callback('click%d' % event.num, itemnum)
+        itemnum = int(insert_point.split(".")[0]) - 1
+        self._fire_callback("click%d" % event.num, itemnum)
  
      def _keypress(self, event):
-        if event.keysym == 'Return' or event.keysym == 'space':
-            insert_point = self._textwidget.index('insert')
-            itemnum = int(insert_point.split('.')[0])-1
+        if event.keysym == "Return" or event.keysym == "space":
+            insert_point = self._textwidget.index("insert")
+            itemnum = int(insert_point.split(".")[0]) - 1
              self._fire_callback(event.keysym.lower(), itemnum)
              return
-        elif event.keysym == 'Down': delta='+1line'
-        elif event.keysym == 'Up': delta='-1line'
-        elif event.keysym == 'Next': delta='+10lines'
-        elif event.keysym == 'Prior': delta='-10lines'
-        else: return 'continue'
-
-        self._textwidget.mark_set('insert', 'insert'+delta)
-        self._textwidget.see('insert')
-        self._textwidget.tag_remove('sel', '1.0', 'end+1char')
-        self._textwidget.tag_add('sel', 'insert linestart', 'insert lineend')
-
-        insert_point = self._textwidget.index('insert')
-        itemnum = int(insert_point.split('.')[0])-1
+        elif event.keysym == "Down":
+            delta = "+1line"
+        elif event.keysym == "Up":
+            delta = "-1line"
+        elif event.keysym == "Next":
+            delta = "+10lines"
+        elif event.keysym == "Prior":
+            delta = "-10lines"
+        else:
+            return "continue"
+
+        self._textwidget.mark_set("insert", "insert" + delta)
+        self._textwidget.see("insert")
+        self._textwidget.tag_remove("sel", "1.0", "end+1char")
+        self._textwidget.tag_add("sel", "insert linestart", "insert lineend")
+
+        insert_point = self._textwidget.index("insert")
+        itemnum = int(insert_point.split(".")[0]) - 1
          self._fire_callback(event.keysym.lower(), itemnum)
  
-        return 'break'
+        return "break"
+
  
  ##//////////////////////////////////////////////////////
  ##  Improved OptionMenu
  ##//////////////////////////////////////////////////////
  
+
  class MutableOptionMenu(Menubutton):
      def __init__(self, master, values, **options):
-        self._callback = options.get('command')
-        if 'command' in options: del options['command']
+        self._callback = options.get("command")
+        if "command" in options:
+            del options["command"]
  
          # Create a variable
          self._variable = variable = StringVar()
          if len(values) > 0:
              variable.set(values[0])
  
-        kw = {"borderwidth": 2, "textvariable": variable,
-              "indicatoron": 1, "relief": RAISED, "anchor": "c",
-              "highlightthickness": 2}
+        kw = {
+            "borderwidth": 2,
+            "textvariable": variable,
+            "indicatoron": 1,
+            "relief": RAISED,
+            "anchor": "c",
+            "highlightthickness": 2,
+        }
          kw.update(options)
          Widget.__init__(self, master, "menubutton", kw)
-        self.widgetName = 'tk_optionMenu'
-        self._menu = Menu(self, name="menu", tearoff=0,)
+        self.widgetName = "tk_optionMenu"
+        self._menu = Menu(self, name="menu", tearoff=0)
          self.menuname = self._menu._w
  
          self._values = []
-        for value in values: self.add(value)
+        for value in values:
+            self.add(value)
  
          self["menu"] = self._menu
  
      def add(self, value):
-        if value in self._values: return
-        def set(value=value): self.set(value)
+        if value in self._values:
+            return
+
+        def set(value=value):
+            self.set(value)
+
          self._menu.add_command(label=value, command=set)
          self._values.append(value)
  
@@ -2298,7 +2501,7 @@ class MutableOptionMenu(Menubutton):
          self._menu.delete(i, i)
  
      def __getitem__(self, name):
-        if name == 'menu':
+        if name == "menu":
              return self.__menu
          return Widget.__getitem__(self, name)
  
@@ -2307,40 +2510,52 @@ class MutableOptionMenu(Menubutton):
          Menubutton.destroy(self)
          self._menu = None
  
+
  ##//////////////////////////////////////////////////////
  ##  Test code.
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      """
      A simple demonstration showing how to use canvas widgets.
      """
+
      def fill(cw):
          from random import randint
-        cw['fill'] = '#00%04d' % randint(0,9999)
+
+        cw["fill"] = "#00%04d" % randint(0, 9999)
+
      def color(cw):
          from random import randint
-        cw['color'] = '#ff%04d' % randint(0,9999)
+
+        cw["color"] = "#ff%04d" % randint(0, 9999)
  
      cf = CanvasFrame(closeenough=10, width=300, height=300)
      c = cf.canvas()
-    ct3 = TextWidget(c, 'hiya there', draggable=1)
-    ct2 = TextWidget(c, 'o  o\n||\n___\n  U', draggable=1, justify='center')
-    co = OvalWidget(c, ct2, outline='red')
-    ct = TextWidget(c, 'o  o\n||\n\\___/', draggable=1, justify='center')
-    cp = ParenWidget(c, ct, color='red')
-    cb = BoxWidget(c, cp, fill='cyan', draggable=1, width=3, margin=10)
-    equation = SequenceWidget(c,
-                              SymbolWidget(c, 'forall'), TextWidget(c, 'x'),
-                              SymbolWidget(c, 'exists'), TextWidget(c, 'y: '),
-                              TextWidget(c, 'x'), SymbolWidget(c, 'notequal'),
-                              TextWidget(c, 'y'))
+    ct3 = TextWidget(c, "hiya there", draggable=1)
+    ct2 = TextWidget(c, "o  o\n||\n___\n  U", draggable=1, justify="center")
+    co = OvalWidget(c, ct2, outline="red")
+    ct = TextWidget(c, "o  o\n||\n\\___/", draggable=1, justify="center")
+    cp = ParenWidget(c, ct, color="red")
+    cb = BoxWidget(c, cp, fill="cyan", draggable=1, width=3, margin=10)
+    equation = SequenceWidget(
+        c,
+        SymbolWidget(c, "forall"),
+        TextWidget(c, "x"),
+        SymbolWidget(c, "exists"),
+        TextWidget(c, "y: "),
+        TextWidget(c, "x"),
+        SymbolWidget(c, "notequal"),
+        TextWidget(c, "y"),
+    )
      space = SpaceWidget(c, 0, 30)
-    cstack = StackWidget(c, cb, ct3, space, co, equation, align='center')
-    foo = TextWidget(c, 'try clicking\nand dragging',
-                     draggable=1, justify='center')
-    cs = SequenceWidget(c, cstack, foo)
-    zz = BracketWidget(c, cs, color='green4', width=3)
+    cstack = StackWidget(c, cb, ct3, space, co, equation, align="center")
+    prompt_msg = TextWidget(
+        c, "try clicking\nand dragging", draggable=1, justify="center"
+    )
+    cs = SequenceWidget(c, cstack, prompt_msg)
+    zz = BracketWidget(c, cs, color="green4", width=3)
      cf.add_widget(zz, 60, 30)
  
      cb.bind_click(fill)
@@ -2350,7 +2565,8 @@ def demo():
      ct3.bind_click(color)
  
      cf.mainloop()
-    #ShowText(None, 'title', ((('this is text'*150)+'\n')*5))
+    # ShowText(None, 'title', ((('this is text'*150)+'\n')*5))
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/draw/util.pyc b/nlp_resource_data/nltk/draw/util.pyc

deleted file mode 100755 (executable)

index 18f1ae9..0000000

Binary files a/nlp_resource_data/nltk/draw/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/featstruct.py b/nlp_resource_data/nltk/featstruct.py

old mode 100755 (executable)

new mode 100644 (file)

index 7795286..f90b581
--- a/nlp_resource_data/nltk/featstruct.py
+++ b/nlp_resource_data/nltk/featstruct.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Feature Structures
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>,
  #         Rob Speer,
  #         Steven Bird <stevenbird1@gmail.com>
@@ -88,23 +88,25 @@ In general, if your feature structures will contain any reentrances,
  or if you plan to use them as dictionary keys, it is strongly
  recommended that you use full-fledged ``FeatStruct`` objects.
  """
-from __future__ import print_function, unicode_literals, division
  
  import re
  import copy
  from functools import total_ordering
  
-from six import integer_types, string_types
-
  from nltk.internals import read_str, raise_unorderable_types
-from nltk.sem.logic import (Variable, Expression, SubstituteBindingsI,
-                            LogicParser, LogicalExpressionException)
-from nltk.compat import python_2_unicode_compatible, unicode_repr
+from nltk.sem.logic import (
+    Variable,
+    Expression,
+    SubstituteBindingsI,
+    LogicParser,
+    LogicalExpressionException,
+)
  
  ######################################################################
  # Feature Structure
  ######################################################################
  
+
  @total_ordering
  class FeatStruct(SubstituteBindingsI):
      """
@@ -146,7 +148,7 @@ class FeatStruct(SubstituteBindingsI):
         feature structue."""
  
      ##////////////////////////////////////////////////////////////
-    #{ Constructor
+    # { Constructor
      ##////////////////////////////////////////////////////////////
  
      def __new__(cls, features=None, **morefeatures):
@@ -175,9 +177,11 @@ class FeatStruct(SubstituteBindingsI):
              elif _is_mapping(features):
                  return FeatDict.__new__(FeatDict, features, **morefeatures)
              elif morefeatures:
-                raise TypeError('Keyword arguments may only be specified '
-                                'if features is None or is a mapping.')
-            if isinstance(features, string_types):
+                raise TypeError(
+                    "Keyword arguments may only be specified "
+                    "if features is None or is a mapping."
+                )
+            if isinstance(features, str):
                  if FeatStructReader._START_FDICT_RE.match(features):
                      return FeatDict.__new__(FeatDict, features, **morefeatures)
                  else:
@@ -185,15 +189,14 @@ class FeatStruct(SubstituteBindingsI):
              elif _is_sequence(features):
                  return FeatList.__new__(FeatList, features)
              else:
-                raise TypeError('Expected string or mapping or sequence')
+                raise TypeError("Expected string or mapping or sequence")
  
          # Otherwise, construct the object as normal.
          else:
-            return super(FeatStruct, cls).__new__(cls, features,
-                                                  **morefeatures)
+            return super(FeatStruct, cls).__new__(cls, features, **morefeatures)
  
      ##////////////////////////////////////////////////////////////
-    #{ Uniform Accessor Methods
+    # { Uniform Accessor Methods
      ##////////////////////////////////////////////////////////////
      # These helper functions allow the methods defined by FeatStruct
      # to treat all feature structures as mappings, even if they're
@@ -202,21 +205,21 @@ class FeatStruct(SubstituteBindingsI):
      def _keys(self):
          """Return an iterable of the feature identifiers used by this
          FeatStruct."""
-        raise NotImplementedError() # Implemented by subclasses.
+        raise NotImplementedError()  # Implemented by subclasses.
  
      def _values(self):
          """Return an iterable of the feature values directly defined
          by this FeatStruct."""
-        raise NotImplementedError() # Implemented by subclasses.
+        raise NotImplementedError()  # Implemented by subclasses.
  
      def _items(self):
          """Return an iterable of (fid,fval) pairs, where fid is a
          feature identifier and fval is the corresponding feature
          value, for all features defined by this FeatStruct."""
-        raise NotImplementedError() # Implemented by subclasses.
+        raise NotImplementedError()  # Implemented by subclasses.
  
      ##////////////////////////////////////////////////////////////
-    #{ Equality & Hashing
+    # { Equality & Hashing
      ##////////////////////////////////////////////////////////////
  
      def equal_values(self, other, check_reentrance=False):
@@ -264,15 +267,16 @@ class FeatStruct(SubstituteBindingsI):
          otherwise, raise ``TypeError``.
          """
          if not self._frozen:
-            raise TypeError('FeatStructs must be frozen before they '
-                            'can be hashed.')
-        try: return self._hash
+            raise TypeError("FeatStructs must be frozen before they " "can be hashed.")
+        try:
+            return self._hash
          except AttributeError:
              self._hash = self._calculate_hashvalue(set())
              return self._hash
  
-    def _equal(self, other, check_reentrance, visited_self,
-               visited_other, visited_pairs):
+    def _equal(
+        self, other, check_reentrance, visited_self, visited_other, visited_pairs
+    ):
          """
          Return True iff self and other have equal values.
  
@@ -284,16 +288,20 @@ class FeatStruct(SubstituteBindingsI):
              for all pairs of feature structures we've already visited.
          """
          # If we're the same object, then we're equal.
-        if self is other: return True
+        if self is other:
+            return True
  
          # If we have different classes, we're definitely not equal.
-        if self.__class__ != other.__class__: return False
+        if self.__class__ != other.__class__:
+            return False
  
          # If we define different features, we're definitely not equal.
          # (Perform len test first because it's faster -- we should
          # do profiling to see if this actually helps)
-        if len(self) != len(other): return False
-        if set(self._keys()) != set(other._keys()): return False
+        if len(self) != len(other):
+            return False
+        if set(self._keys()) != set(other._keys()):
+            return False
  
          # If we're checking reentrance, then any time we revisit a
          # structure, make sure that it was paired with the same
@@ -315,19 +323,24 @@ class FeatStruct(SubstituteBindingsI):
          # Keep track of which nodes we've visited.
          visited_self.add(id(self))
          visited_other.add(id(other))
-        visited_pairs.add( (id(self), id(other)) )
+        visited_pairs.add((id(self), id(other)))
  
          # Now we have to check all values.  If any of them don't match,
          # then return false.
          for (fname, self_fval) in self._items():
              other_fval = other[fname]
              if isinstance(self_fval, FeatStruct):
-                if not self_fval._equal(other_fval, check_reentrance,
-                                        visited_self, visited_other,
-                                        visited_pairs):
+                if not self_fval._equal(
+                    other_fval,
+                    check_reentrance,
+                    visited_self,
+                    visited_other,
+                    visited_pairs,
+                ):
                      return False
              else:
-                if self_fval != other_fval: return False
+                if self_fval != other_fval:
+                    return False
  
          # Everything matched up; return true.
          return True
@@ -340,7 +353,8 @@ class FeatStruct(SubstituteBindingsI):
          :param visited: A set containing the ids of all feature
              structures we've already visited while hashing.
          """
-        if id(self) in visited: return 1
+        if id(self) in visited:
+            return 1
          visited.add(id(self))
  
          hashval = 5831
@@ -353,11 +367,11 @@ class FeatStruct(SubstituteBindingsI):
              else:
                  hashval += hash(fval)
              # Convert to a 32 bit int.
-            hashval = int(hashval & 0x7fffffff)
+            hashval = int(hashval & 0x7FFFFFFF)
          return hashval
  
      ##////////////////////////////////////////////////////////////
-    #{ Freezing
+    # { Freezing
      ##////////////////////////////////////////////////////////////
  
      #: Error message used by mutating methods when called on a frozen
@@ -371,7 +385,8 @@ class FeatStruct(SubstituteBindingsI):
          'freeze' any feature value that is not a ``FeatStruct``; it
          is recommended that you use only immutable feature values.
          """
-        if self._frozen: return
+        if self._frozen:
+            return
          self._freeze(set())
  
      def frozen(self):
@@ -391,7 +406,8 @@ class FeatStruct(SubstituteBindingsI):
          :param visited: A set containing the ids of all feature
              structures we've already visited while freezing.
          """
-        if id(self) in visited: return
+        if id(self) in visited:
+            return
          visited.add(id(self))
          self._frozen = True
          for (fname, fval) in sorted(self._items()):
@@ -399,7 +415,7 @@ class FeatStruct(SubstituteBindingsI):
                  fval._freeze(visited)
  
      ##////////////////////////////////////////////////////////////
-    #{ Copying
+    # { Copying
      ##////////////////////////////////////////////////////////////
  
      def copy(self, deep=True):
@@ -417,10 +433,10 @@ class FeatStruct(SubstituteBindingsI):
      # Subclasses should define __deepcopy__ to ensure that the new
      # copy will not be frozen.
      def __deepcopy__(self, memo):
-        raise NotImplementedError() # Implemented by subclasses.
+        raise NotImplementedError()  # Implemented by subclasses.
  
      ##////////////////////////////////////////////////////////////
-    #{ Structural Information
+    # { Structural Information
      ##////////////////////////////////////////////////////////////
  
      def cyclic(self):
@@ -445,10 +461,11 @@ class FeatStruct(SubstituteBindingsI):
          :param visited: A set containing the ids of all feature
              structures we've already visited while freezing.
          """
-        raise NotImplementedError() # Implemented by subclasses.
+        raise NotImplementedError()  # Implemented by subclasses.
  
      def _walk(self, visited):
-        if id(self) in visited: return
+        if id(self) in visited:
+            return
          visited.add(id(self))
          yield self
          for fval in self._values():
@@ -480,7 +497,7 @@ class FeatStruct(SubstituteBindingsI):
          return reentrances
  
      ##////////////////////////////////////////////////////////////
-    #{ Variables & Bindings
+    # { Variables & Bindings
      ##////////////////////////////////////////////////////////////
  
      def substitute_bindings(self, bindings):
@@ -509,11 +526,10 @@ class FeatStruct(SubstituteBindingsI):
          return remove_variables(self)
  
      ##////////////////////////////////////////////////////////////
-    #{ Unification
+    # { Unification
      ##////////////////////////////////////////////////////////////
  
-    def unify(self, other, bindings=None, trace=False,
-              fail=None, rename_vars=True):
+    def unify(self, other, bindings=None, trace=False, fail=None, rename_vars=True):
          return unify(self, other, bindings, trace, fail, rename_vars)
  
      def subsumes(self, other):
@@ -525,7 +541,7 @@ class FeatStruct(SubstituteBindingsI):
          return subsumes(self, other)
  
      ##////////////////////////////////////////////////////////////
-    #{ String Representations
+    # { String Representations
      ##////////////////////////////////////////////////////////////
  
      def __repr__(self):
@@ -549,21 +565,28 @@ class FeatStruct(SubstituteBindingsI):
          """
          raise NotImplementedError()
  
+
  # Mutation: disable if frozen.
  _FROZEN_ERROR = "Frozen FeatStructs may not be modified."
  _FROZEN_NOTICE = "\n%sIf self is frozen, raise ValueError."
-def _check_frozen(method, indent=''):
+
+
+def _check_frozen(method, indent=""):
      """
      Given a method function, return a new method function that first
      checks if ``self._frozen`` is true; and if so, raises ``ValueError``
      with an appropriate message.  Otherwise, call the method and return
      its result.
      """
+
      def wrapped(self, *args, **kwargs):
-        if self._frozen: raise ValueError(_FROZEN_ERROR)
-        else: return method(self, *args, **kwargs)
+        if self._frozen:
+            raise ValueError(_FROZEN_ERROR)
+        else:
+            return method(self, *args, **kwargs)
+
      wrapped.__name__ = method.__name__
-    wrapped.__doc__ = (method.__doc__ or '') + (_FROZEN_NOTICE % indent)
+    wrapped.__doc__ = (method.__doc__ or "") + (_FROZEN_NOTICE % indent)
      return wrapped
  
  
@@ -571,7 +594,8 @@ def _check_frozen(method, indent=''):
  # Feature Dictionary
  ######################################################################
  
-@python_2_unicode_compatible
+
+
  class FeatDict(FeatStruct, dict):
      """
      A feature structure that acts like a Python dictionary.  I.e., a
@@ -587,6 +611,7 @@ class FeatDict(FeatStruct, dict):
      :see: ``FeatStruct`` for information about feature paths, reentrance,
          cyclic feature structures, mutability, freezing, and hashing.
      """
+
      def __init__(self, features=None, **morefeatures):
          """
          Create a new feature dictionary, with the specified features.
@@ -603,29 +628,29 @@ class FeatDict(FeatStruct, dict):
              ``morefeatures``, then the value from ``morefeatures`` will be
              used.
          """
-        if isinstance(features, string_types):
+        if isinstance(features, str):
              FeatStructReader().fromstring(features, self)
              self.update(**morefeatures)
          else:
              # update() checks the types of features.
              self.update(features, **morefeatures)
  
-    #////////////////////////////////////////////////////////////
-    #{ Dict methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Dict methods
+    # ////////////////////////////////////////////////////////////
      _INDEX_ERROR = str("Expected feature name or path.  Got %r.")
  
      def __getitem__(self, name_or_path):
          """If the feature with the given name or path exists, return
          its value; otherwise, raise ``KeyError``."""
-        if isinstance(name_or_path, (string_types, Feature)):
+        if isinstance(name_or_path, (str, Feature)):
              return dict.__getitem__(self, name_or_path)
          elif isinstance(name_or_path, tuple):
              try:
                  val = self
                  for fid in name_or_path:
                      if not isinstance(val, FeatStruct):
-                        raise KeyError # path contains base value
+                        raise KeyError  # path contains base value
                      val = val[fid]
                  return val
              except (KeyError, IndexError):
@@ -636,13 +661,18 @@ class FeatDict(FeatStruct, dict):
      def get(self, name_or_path, default=None):
          """If the feature with the given name or path exists, return its
          value; otherwise, return ``default``."""
-        try: return self[name_or_path]
-        except KeyError: return default
+        try:
+            return self[name_or_path]
+        except KeyError:
+            return default
  
      def __contains__(self, name_or_path):
          """Return true if a feature with the given name or path exists."""
-        try: self[name_or_path]; return True
-        except KeyError: return False
+        try:
+            self[name_or_path]
+            return True
+        except KeyError:
+            return False
  
      def has_key(self, name_or_path):
          """Return true if a feature with the given name or path exists."""
@@ -651,8 +681,9 @@ class FeatDict(FeatStruct, dict):
      def __delitem__(self, name_or_path):
          """If the feature with the given name or path exists, delete
          its value; otherwise, raise ``KeyError``."""
-        if self._frozen: raise ValueError(_FROZEN_ERROR)
-        if isinstance(name_or_path, (string_types, Feature)):
+        if self._frozen:
+            raise ValueError(_FROZEN_ERROR)
+        if isinstance(name_or_path, (str, Feature)):
              return dict.__delitem__(self, name_or_path)
          elif isinstance(name_or_path, tuple):
              if len(name_or_path) == 0:
@@ -660,7 +691,7 @@ class FeatDict(FeatStruct, dict):
              else:
                  parent = self[name_or_path[:-1]]
                  if not isinstance(parent, FeatStruct):
-                    raise KeyError(name_or_path) # path contains base value
+                    raise KeyError(name_or_path)  # path contains base value
                  del parent[name_or_path[-1]]
          else:
              raise TypeError(self._INDEX_ERROR % name_or_path)
@@ -669,8 +700,9 @@ class FeatDict(FeatStruct, dict):
          """Set the value for the feature with the given name or path
          to ``value``.  If ``name_or_path`` is an invalid path, raise
          ``KeyError``."""
-        if self._frozen: raise ValueError(_FROZEN_ERROR)
-        if isinstance(name_or_path, (string_types, Feature)):
+        if self._frozen:
+            raise ValueError(_FROZEN_ERROR)
+        if isinstance(name_or_path, (str, Feature)):
              return dict.__setitem__(self, name_or_path, value)
          elif isinstance(name_or_path, tuple):
              if len(name_or_path) == 0:
@@ -678,7 +710,7 @@ class FeatDict(FeatStruct, dict):
              else:
                  parent = self[name_or_path[:-1]]
                  if not isinstance(parent, FeatStruct):
-                    raise KeyError(name_or_path) # path contains base value
+                    raise KeyError(name_or_path)  # path contains base value
                  parent[name_or_path[-1]] = value
          else:
              raise TypeError(self._INDEX_ERROR % name_or_path)
@@ -689,45 +721,51 @@ class FeatDict(FeatStruct, dict):
      setdefault = _check_frozen(dict.setdefault)
  
      def update(self, features=None, **morefeatures):
-        if self._frozen: raise ValueError(_FROZEN_ERROR)
+        if self._frozen:
+            raise ValueError(_FROZEN_ERROR)
          if features is None:
              items = ()
-        elif hasattr(features, 'items') and callable(features.items):
+        elif hasattr(features, "items") and callable(features.items):
              items = features.items()
-        elif hasattr(features, '__iter__'):
+        elif hasattr(features, "__iter__"):
              items = features
          else:
-            raise ValueError('Expected mapping or list of tuples')
+            raise ValueError("Expected mapping or list of tuples")
  
          for key, val in items:
-            if not isinstance(key, (string_types, Feature)):
-                raise TypeError('Feature names must be strings')
+            if not isinstance(key, (str, Feature)):
+                raise TypeError("Feature names must be strings")
              self[key] = val
          for key, val in morefeatures.items():
-            if not isinstance(key, (string_types, Feature)):
-                raise TypeError('Feature names must be strings')
+            if not isinstance(key, (str, Feature)):
+                raise TypeError("Feature names must be strings")
              self[key] = val
  
      ##////////////////////////////////////////////////////////////
-    #{ Copying
+    # { Copying
      ##////////////////////////////////////////////////////////////
  
      def __deepcopy__(self, memo):
          memo[id(self)] = selfcopy = self.__class__()
          for (key, val) in self._items():
-            selfcopy[copy.deepcopy(key,memo)] = copy.deepcopy(val,memo)
+            selfcopy[copy.deepcopy(key, memo)] = copy.deepcopy(val, memo)
          return selfcopy
  
      ##////////////////////////////////////////////////////////////
-    #{ Uniform Accessor Methods
+    # { Uniform Accessor Methods
      ##////////////////////////////////////////////////////////////
  
-    def _keys(self): return self.keys()
-    def _values(self): return self.values()
-    def _items(self): return self.items()
+    def _keys(self):
+        return self.keys()
+
+    def _values(self):
+        return self.values()
+
+    def _items(self):
+        return self.items()
  
      ##////////////////////////////////////////////////////////////
-    #{ String Representations
+    # { String Representations
      ##////////////////////////////////////////////////////////////
  
      def __str__(self):
@@ -735,51 +773,53 @@ class FeatDict(FeatStruct, dict):
          Display a multi-line representation of this feature dictionary
          as an FVM (feature value matrix).
          """
-        return '\n'.join(self._str(self._find_reentrances({}), {}))
+        return "\n".join(self._str(self._find_reentrances({}), {}))
  
      def _repr(self, reentrances, reentrance_ids):
          segments = []
-        prefix = ''
-        suffix = ''
+        prefix = ""
+        suffix = ""
  
          # If this is the first time we've seen a reentrant structure,
          # then assign it a unique identifier.
          if reentrances[id(self)]:
              assert id(self) not in reentrance_ids
-            reentrance_ids[id(self)] = repr(len(reentrance_ids)+1)
+            reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1)
  
          # sorting note: keys are unique strings, so we'll never fall
          # through to comparing values.
          for (fname, fval) in sorted(self.items()):
-            display = getattr(fname, 'display', None)
+            display = getattr(fname, "display", None)
              if id(fval) in reentrance_ids:
-                segments.append('%s->(%s)' %
-                                (fname, reentrance_ids[id(fval)]))
-            elif (display == 'prefix' and not prefix and
-                  isinstance(fval, (Variable, string_types))):
-                    prefix = '%s' % fval
-            elif display == 'slash' and not suffix:
+                segments.append("%s->(%s)" % (fname, reentrance_ids[id(fval)]))
+            elif (
+                display == "prefix"
+                and not prefix
+                and isinstance(fval, (Variable, str))
+            ):
+                prefix = "%s" % fval
+            elif display == "slash" and not suffix:
                  if isinstance(fval, Variable):
-                    suffix = '/%s' % fval.name
+                    suffix = "/%s" % fval.name
                  else:
-                    suffix = '/%s' % unicode_repr(fval)
+                    suffix = "/%s" % repr(fval)
              elif isinstance(fval, Variable):
-                segments.append('%s=%s' % (fname, fval.name))
+                segments.append("%s=%s" % (fname, fval.name))
              elif fval is True:
-                segments.append('+%s' % fname)
+                segments.append("+%s" % fname)
              elif fval is False:
-                segments.append('-%s' % fname)
+                segments.append("-%s" % fname)
              elif isinstance(fval, Expression):
-                segments.append('%s=<%s>' % (fname, fval))
+                segments.append("%s=<%s>" % (fname, fval))
              elif not isinstance(fval, FeatStruct):
-                segments.append('%s=%s' % (fname, unicode_repr(fval)))
+                segments.append("%s=%s" % (fname, repr(fval)))
              else:
                  fval_repr = fval._repr(reentrances, reentrance_ids)
-                segments.append('%s=%s' % (fname, fval_repr))
+                segments.append("%s=%s" % (fname, fval_repr))
          # If it's reentrant, then add on an identifier tag.
          if reentrances[id(self)]:
-            prefix = '(%s)%s' % (reentrance_ids[id(self)], prefix)
-        return '%s[%s]%s' % (prefix, ', '.join(segments), suffix)
+            prefix = "(%s)%s" % (reentrance_ids[id(self)], prefix)
+        return "%s[%s]%s" % (prefix, ", ".join(segments), suffix)
  
      def _str(self, reentrances, reentrance_ids):
          """
@@ -798,14 +838,14 @@ class FeatDict(FeatStruct, dict):
          # then tack on an id string.
          if reentrances[id(self)]:
              assert id(self) not in reentrance_ids
-            reentrance_ids[id(self)] = repr(len(reentrance_ids)+1)
+            reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1)
  
          # Special case: empty feature dict.
          if len(self) == 0:
              if reentrances[id(self)]:
-                return ['(%s) []' % reentrance_ids[id(self)]]
+                return ["(%s) []" % reentrance_ids[id(self)]]
              else:
-                return ['[]']
+                return ["[]"]
  
          # What's the longest feature name?  Use this to align names.
          maxfnamelen = max(len("%s" % k) for k in self.keys())
@@ -816,59 +856,62 @@ class FeatDict(FeatStruct, dict):
          for (fname, fval) in sorted(self.items()):
              fname = ("%s" % fname).ljust(maxfnamelen)
              if isinstance(fval, Variable):
-                lines.append('%s = %s' % (fname,fval.name))
+                lines.append("%s = %s" % (fname, fval.name))
  
              elif isinstance(fval, Expression):
-                lines.append('%s = <%s>' % (fname, fval))
+                lines.append("%s = <%s>" % (fname, fval))
  
              elif isinstance(fval, FeatList):
                  fval_repr = fval._repr(reentrances, reentrance_ids)
-                lines.append('%s = %s' % (fname, unicode_repr(fval_repr)))
+                lines.append("%s = %s" % (fname, repr(fval_repr)))
  
              elif not isinstance(fval, FeatDict):
                  # It's not a nested feature structure -- just print it.
-                lines.append('%s = %s' % (fname, unicode_repr(fval)))
+                lines.append("%s = %s" % (fname, repr(fval)))
  
              elif id(fval) in reentrance_ids:
                  # It's a feature structure we've seen before -- print
                  # the reentrance id.
-                lines.append('%s -> (%s)' % (fname, reentrance_ids[id(fval)]))
+                lines.append("%s -> (%s)" % (fname, reentrance_ids[id(fval)]))
  
              else:
                  # It's a new feature structure.  Separate it from
                  # other values by a blank line.
-                if lines and lines[-1] != '': lines.append('')
+                if lines and lines[-1] != "":
+                    lines.append("")
  
                  # Recursively print the feature's value (fval).
                  fval_lines = fval._str(reentrances, reentrance_ids)
  
                  # Indent each line to make room for fname.
-                fval_lines = [(' '*(maxfnamelen+3))+l for l in fval_lines]
+                fval_lines = [(" " * (maxfnamelen + 3)) + l for l in fval_lines]
  
                  # Pick which line we'll display fname on, & splice it in.
-                nameline = (len(fval_lines)-1) // 2
+                nameline = (len(fval_lines) - 1) // 2
                  fval_lines[nameline] = (
-                        fname+' ='+fval_lines[nameline][maxfnamelen+2:])
+                    fname + " =" + fval_lines[nameline][maxfnamelen + 2 :]
+                )
  
                  # Add the feature structure to the output.
                  lines += fval_lines
  
                  # Separate FeatStructs by a blank line.
-                lines.append('')
+                lines.append("")
  
          # Get rid of any excess blank lines.
-        if lines[-1] == '': lines.pop()
+        if lines[-1] == "":
+            lines.pop()
  
          # Add brackets around everything.
          maxlen = max(len(line) for line in lines)
-        lines = ['[ %s%s ]' % (line, ' '*(maxlen-len(line))) for line in lines]
+        lines = ["[ %s%s ]" % (line, " " * (maxlen - len(line))) for line in lines]
  
          # If it's reentrant, then add on an identifier tag.
          if reentrances[id(self)]:
-            idstr = '(%s) ' % reentrance_ids[id(self)]
-            lines = [(' '*len(idstr))+l for l in lines]
-            idline = (len(lines)-1) // 2
-            lines[idline] = idstr + lines[idline][len(idstr):]
+            idstr = "(%s) " % reentrance_ids[id(self)]
+            lines = [(" " * len(idstr)) + l for l in lines]
+            idline = (len(lines) - 1) // 2
+            lines[idline] = idstr + lines[idline][len(idstr) :]
  
          return lines
  
@@ -877,6 +920,7 @@ class FeatDict(FeatStruct, dict):
  # Feature List
  ######################################################################
  
+
  class FeatList(FeatStruct, list):
      """
      A list of feature values, where each feature value is either a
@@ -893,6 +937,7 @@ class FeatList(FeatStruct, list):
      :see: ``FeatStruct`` for information about feature paths, reentrance,
          cyclic feature structures, mutability, freezing, and hashing.
      """
+
      def __init__(self, features=()):
          """
          Create a new feature list, with the specified features.
@@ -902,25 +947,25 @@ class FeatList(FeatStruct, list):
              ``FeatStructReader``.  Otherwise, it should be a sequence
              of basic values and nested feature structures.
          """
-        if isinstance(features, string_types):
+        if isinstance(features, str):
              FeatStructReader().fromstring(features, self)
          else:
              list.__init__(self, features)
  
-    #////////////////////////////////////////////////////////////
-    #{ List methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { List methods
+    # ////////////////////////////////////////////////////////////
      _INDEX_ERROR = "Expected int or feature path.  Got %r."
  
      def __getitem__(self, name_or_path):
-        if isinstance(name_or_path, integer_types):
+        if isinstance(name_or_path, int):
              return list.__getitem__(self, name_or_path)
          elif isinstance(name_or_path, tuple):
              try:
                  val = self
                  for fid in name_or_path:
                      if not isinstance(val, FeatStruct):
-                        raise KeyError # path contains base value
+                        raise KeyError  # path contains base value
                      val = val[fid]
                  return val
              except (KeyError, IndexError):
@@ -931,8 +976,9 @@ class FeatList(FeatStruct, list):
      def __delitem__(self, name_or_path):
          """If the feature with the given name or path exists, delete
          its value; otherwise, raise ``KeyError``."""
-        if self._frozen: raise ValueError(_FROZEN_ERROR)
-        if isinstance(name_or_path, (integer_types, slice)):
+        if self._frozen:
+            raise ValueError(_FROZEN_ERROR)
+        if isinstance(name_or_path, (int, slice)):
              return list.__delitem__(self, name_or_path)
          elif isinstance(name_or_path, tuple):
              if len(name_or_path) == 0:
@@ -940,7 +986,7 @@ class FeatList(FeatStruct, list):
              else:
                  parent = self[name_or_path[:-1]]
                  if not isinstance(parent, FeatStruct):
-                    raise KeyError(name_or_path) # path contains base value
+                    raise KeyError(name_or_path)  # path contains base value
                  del parent[name_or_path[-1]]
          else:
              raise TypeError(self._INDEX_ERROR % name_or_path)
@@ -949,8 +995,9 @@ class FeatList(FeatStruct, list):
          """Set the value for the feature with the given name or path
          to ``value``.  If ``name_or_path`` is an invalid path, raise
          ``KeyError``."""
-        if self._frozen: raise ValueError(_FROZEN_ERROR)
-        if isinstance(name_or_path, (integer_types, slice)):
+        if self._frozen:
+            raise ValueError(_FROZEN_ERROR)
+        if isinstance(name_or_path, (int, slice)):
              return list.__setitem__(self, name_or_path, value)
          elif isinstance(name_or_path, tuple):
              if len(name_or_path) == 0:
@@ -958,13 +1005,13 @@ class FeatList(FeatStruct, list):
              else:
                  parent = self[name_or_path[:-1]]
                  if not isinstance(parent, FeatStruct):
-                    raise KeyError(name_or_path) # path contains base value
+                    raise KeyError(name_or_path)  # path contains base value
                  parent[name_or_path[-1]] = value
          else:
              raise TypeError(self._INDEX_ERROR % name_or_path)
  
-#    __delslice__ = _check_frozen(list.__delslice__, '               ')
-#    __setslice__ = _check_frozen(list.__setslice__, '               ')
+    #    __delslice__ = _check_frozen(list.__delslice__, '               ')
+    #    __setslice__ = _check_frozen(list.__setslice__, '               ')
      __iadd__ = _check_frozen(list.__iadd__)
      __imul__ = _check_frozen(list.__imul__)
      append = _check_frozen(list.append)
@@ -976,24 +1023,29 @@ class FeatList(FeatStruct, list):
      sort = _check_frozen(list.sort)
  
      ##////////////////////////////////////////////////////////////
-    #{ Copying
+    # { Copying
      ##////////////////////////////////////////////////////////////
  
      def __deepcopy__(self, memo):
          memo[id(self)] = selfcopy = self.__class__()
-        selfcopy.extend(copy.deepcopy(fval,memo) for fval in self)
+        selfcopy.extend(copy.deepcopy(fval, memo) for fval in self)
          return selfcopy
  
      ##////////////////////////////////////////////////////////////
-    #{ Uniform Accessor Methods
+    # { Uniform Accessor Methods
      ##////////////////////////////////////////////////////////////
  
-    def _keys(self): return list(range(len(self)))
-    def _values(self): return self
-    def _items(self): return enumerate(self)
+    def _keys(self):
+        return list(range(len(self)))
+
+    def _values(self):
+        return self
+
+    def _items(self):
+        return enumerate(self)
  
      ##////////////////////////////////////////////////////////////
-    #{ String Representations
+    # { String Representations
      ##////////////////////////////////////////////////////////////
  
      # Special handling for: reentrances, variables, expressions.
@@ -1002,31 +1054,33 @@ class FeatList(FeatStruct, list):
          # then assign it a unique identifier.
          if reentrances[id(self)]:
              assert id(self) not in reentrance_ids
-            reentrance_ids[id(self)] = repr(len(reentrance_ids)+1)
-            prefix = '(%s)' % reentrance_ids[id(self)]
+            reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1)
+            prefix = "(%s)" % reentrance_ids[id(self)]
          else:
-            prefix = ''
+            prefix = ""
  
          segments = []
          for fval in self:
              if id(fval) in reentrance_ids:
-                segments.append('->(%s)' % reentrance_ids[id(fval)])
+                segments.append("->(%s)" % reentrance_ids[id(fval)])
              elif isinstance(fval, Variable):
                  segments.append(fval.name)
              elif isinstance(fval, Expression):
-                segments.append('%s' % fval)
+                segments.append("%s" % fval)
              elif isinstance(fval, FeatStruct):
                  segments.append(fval._repr(reentrances, reentrance_ids))
              else:
-                segments.append('%s' % unicode_repr(fval))
+                segments.append("%s" % repr(fval))
+
+        return "%s[%s]" % (prefix, ", ".join(segments))
  
-        return '%s[%s]' % (prefix, ', '.join(segments))
  
  ######################################################################
  # Variables & Bindings
  ######################################################################
  
-def substitute_bindings(fstruct, bindings, fs_class='default'):
+
+def substitute_bindings(fstruct, bindings, fs_class="default"):
      """
      Return the feature structure that is obtained by replacing each
      variable bound by ``bindings`` with its binding.  If a variable is
@@ -1037,28 +1091,35 @@ def substitute_bindings(fstruct, bindings, fs_class='default'):
      :type bindings: dict(Variable -> any)
      :param bindings: A dictionary mapping from variables to values.
      """
-    if fs_class == 'default': fs_class = _default_fs_class(fstruct)
+    if fs_class == "default":
+        fs_class = _default_fs_class(fstruct)
      fstruct = copy.deepcopy(fstruct)
      _substitute_bindings(fstruct, bindings, fs_class, set())
      return fstruct
  
+
  def _substitute_bindings(fstruct, bindings, fs_class, visited):
      # Visit each node only once:
-    if id(fstruct) in visited: return
+    if id(fstruct) in visited:
+        return
      visited.add(id(fstruct))
  
-    if _is_mapping(fstruct): items = fstruct.items()
-    elif _is_sequence(fstruct): items = enumerate(fstruct)
-    else: raise ValueError('Expected mapping or sequence')
+    if _is_mapping(fstruct):
+        items = fstruct.items()
+    elif _is_sequence(fstruct):
+        items = enumerate(fstruct)
+    else:
+        raise ValueError("Expected mapping or sequence")
      for (fname, fval) in items:
-        while (isinstance(fval, Variable) and fval in bindings):
+        while isinstance(fval, Variable) and fval in bindings:
              fval = fstruct[fname] = bindings[fval]
          if isinstance(fval, fs_class):
              _substitute_bindings(fval, bindings, fs_class, visited)
          elif isinstance(fval, SubstituteBindingsI):
              fstruct[fname] = fval.substitute_bindings(bindings)
  
-def retract_bindings(fstruct, bindings, fs_class='default'):
+
+def retract_bindings(fstruct, bindings, fs_class="default"):
      """
      Return the feature structure that is obtained by replacing each
      feature structure value that is bound by ``bindings`` with the
@@ -1070,21 +1131,27 @@ def retract_bindings(fstruct, bindings, fs_class='default'):
      values in ``bindings`` may be modified if they are contained in
      ``fstruct``.
      """
-    if fs_class == 'default': fs_class = _default_fs_class(fstruct)
+    if fs_class == "default":
+        fs_class = _default_fs_class(fstruct)
      (fstruct, new_bindings) = copy.deepcopy((fstruct, bindings))
      bindings.update(new_bindings)
-    inv_bindings = dict((id(val),var) for (var,val) in bindings.items())
+    inv_bindings = dict((id(val), var) for (var, val) in bindings.items())
      _retract_bindings(fstruct, inv_bindings, fs_class, set())
      return fstruct
  
+
  def _retract_bindings(fstruct, inv_bindings, fs_class, visited):
      # Visit each node only once:
-    if id(fstruct) in visited: return
+    if id(fstruct) in visited:
+        return
      visited.add(id(fstruct))
  
-    if _is_mapping(fstruct): items = fstruct.items()
-    elif _is_sequence(fstruct): items = enumerate(fstruct)
-    else: raise ValueError('Expected mapping or sequence')
+    if _is_mapping(fstruct):
+        items = fstruct.items()
+    elif _is_sequence(fstruct):
+        items = enumerate(fstruct)
+    else:
+        raise ValueError("Expected mapping or sequence")
      for (fname, fval) in items:
          if isinstance(fval, fs_class):
              if id(fval) in inv_bindings:
@@ -1092,21 +1159,27 @@ def _retract_bindings(fstruct, inv_bindings, fs_class, visited):
              _retract_bindings(fval, inv_bindings, fs_class, visited)
  
  
-def find_variables(fstruct, fs_class='default'):
+def find_variables(fstruct, fs_class="default"):
      """
      :return: The set of variables used by this feature structure.
      :rtype: set(Variable)
      """
-    if fs_class == 'default': fs_class = _default_fs_class(fstruct)
+    if fs_class == "default":
+        fs_class = _default_fs_class(fstruct)
      return _variables(fstruct, set(), fs_class, set())
  
+
  def _variables(fstruct, vars, fs_class, visited):
      # Visit each node only once:
-    if id(fstruct) in visited: return
+    if id(fstruct) in visited:
+        return
      visited.add(id(fstruct))
-    if _is_mapping(fstruct): items = fstruct.items()
-    elif _is_sequence(fstruct): items = enumerate(fstruct)
-    else: raise ValueError('Expected mapping or sequence')
+    if _is_mapping(fstruct):
+        items = fstruct.items()
+    elif _is_sequence(fstruct):
+        items = enumerate(fstruct)
+    else:
+        raise ValueError("Expected mapping or sequence")
      for (fname, fval) in items:
          if isinstance(fval, Variable):
              vars.add(fval)
@@ -1116,8 +1189,10 @@ def _variables(fstruct, vars, fs_class, visited):
              vars.update(fval.variables())
      return vars
  
-def rename_variables(fstruct, vars=None, used_vars=(), new_vars=None,
-                     fs_class='default'):
+
+def rename_variables(
+    fstruct, vars=None, used_vars=(), new_vars=None, fs_class="default"
+):
      """
      Return the feature structure that is obtained by replacing
      any of this feature structure's variables that are in ``vars``
@@ -1159,26 +1234,36 @@ def rename_variables(fstruct, vars=None, used_vars=(), new_vars=None,
  
      If new_vars is not specified, then an empty dictionary is used.
      """
-    if fs_class == 'default': fs_class = _default_fs_class(fstruct)
+    if fs_class == "default":
+        fs_class = _default_fs_class(fstruct)
  
      # Default values:
-    if new_vars is None: new_vars = {}
-    if vars is None: vars = find_variables(fstruct, fs_class)
-    else: vars = set(vars)
+    if new_vars is None:
+        new_vars = {}
+    if vars is None:
+        vars = find_variables(fstruct, fs_class)
+    else:
+        vars = set(vars)
  
      # Add our own variables to used_vars.
      used_vars = find_variables(fstruct, fs_class).union(used_vars)
  
      # Copy ourselves, and rename variables in the copy.
-    return _rename_variables(copy.deepcopy(fstruct), vars, used_vars,
-                             new_vars, fs_class, set())
+    return _rename_variables(
+        copy.deepcopy(fstruct), vars, used_vars, new_vars, fs_class, set()
+    )
+
  
  def _rename_variables(fstruct, vars, used_vars, new_vars, fs_class, visited):
-    if id(fstruct) in visited: return
+    if id(fstruct) in visited:
+        return
      visited.add(id(fstruct))
-    if _is_mapping(fstruct): items = fstruct.items()
-    elif _is_sequence(fstruct): items = enumerate(fstruct)
-    else: raise ValueError('Expected mapping or sequence')
+    if _is_mapping(fstruct):
+        items = fstruct.items()
+    elif _is_sequence(fstruct):
+        items = enumerate(fstruct)
+    else:
+        raise ValueError("Expected mapping or sequence")
      for (fname, fval) in items:
          if isinstance(fval, Variable):
              # If it's in new_vars, then rebind it.
@@ -1190,8 +1275,7 @@ def _rename_variables(fstruct, vars, used_vars, new_vars, fs_class, visited):
                  fstruct[fname] = new_vars[fval]
                  used_vars.add(new_vars[fval])
          elif isinstance(fval, fs_class):
-            _rename_variables(fval, vars, used_vars, new_vars,
-                              fs_class, visited)
+            _rename_variables(fval, vars, used_vars, new_vars, fs_class, visited)
          elif isinstance(fval, SubstituteBindingsI):
              # Pick new names for any variables in `vars`
              for var in fval.variables():
@@ -1202,21 +1286,27 @@ def _rename_variables(fstruct, vars, used_vars, new_vars, fs_class, visited):
              fstruct[fname] = fval.substitute_bindings(new_vars)
      return fstruct
  
+
  def _rename_variable(var, used_vars):
-    name, n = re.sub('\d+$', '', var.name), 2
-    if not name: name = '?'
-    while Variable('%s%s' % (name, n)) in used_vars: n += 1
-    return Variable('%s%s' % (name, n))
+    name, n = re.sub("\d+$", "", var.name), 2
+    if not name:
+        name = "?"
+    while Variable("%s%s" % (name, n)) in used_vars:
+        n += 1
+    return Variable("%s%s" % (name, n))
+
  
-def remove_variables(fstruct, fs_class='default'):
+def remove_variables(fstruct, fs_class="default"):
      """
      :rtype: FeatStruct
      :return: The feature structure that is obtained by deleting
          all features whose values are ``Variables``.
      """
-    if fs_class == 'default': fs_class = _default_fs_class(fstruct)
+    if fs_class == "default":
+        fs_class = _default_fs_class(fstruct)
      return _remove_variables(copy.deepcopy(fstruct), fs_class, set())
  
+
  def _remove_variables(fstruct, fs_class, visited):
      if id(fstruct) in visited:
          return
@@ -1227,7 +1317,7 @@ def _remove_variables(fstruct, fs_class, visited):
      elif _is_sequence(fstruct):
          items = list(enumerate(fstruct))
      else:
-        raise ValueError('Expected mapping or sequence')
+        raise ValueError("Expected mapping or sequence")
  
      for (fname, fval) in items:
          if isinstance(fval, Variable):
@@ -1241,23 +1331,33 @@ def _remove_variables(fstruct, fs_class, visited):
  # Unification
  ######################################################################
  
-@python_2_unicode_compatible
+
+
  class _UnificationFailure(object):
      def __repr__(self):
-        return 'nltk.featstruct.UnificationFailure'
+        return "nltk.featstruct.UnificationFailure"
+
  
  UnificationFailure = _UnificationFailure()
  """A unique value used to indicate unification failure.  It can be
     returned by ``Feature.unify_base_values()`` or by custom ``fail()``
     functions to indicate that unificaiton should fail."""
  
+
  # The basic unification algorithm:
  #   1. Make copies of self and other (preserving reentrance)
  #   2. Destructively unify self and other
  #   3. Apply forward pointers, to preserve reentrance.
  #   4. Replace bound variables with their values.
-def unify(fstruct1, fstruct2, bindings=None, trace=False,
-          fail=None, rename_vars=True, fs_class='default'):
+def unify(
+    fstruct1,
+    fstruct2,
+    bindings=None,
+    trace=False,
+    fail=None,
+    rename_vars=True,
+    fs_class="default",
+):
      """
      Unify ``fstruct1`` with ``fstruct2``, and return the resulting feature
      structure.  This unified feature structure is the minimal
@@ -1302,25 +1402,29 @@ def unify(fstruct1, fstruct2, bindings=None, trace=False,
      """
      # Decide which class(es) will be treated as feature structures,
      # for the purposes of unification.
-    if fs_class == 'default':
+    if fs_class == "default":
          fs_class = _default_fs_class(fstruct1)
          if _default_fs_class(fstruct2) != fs_class:
-            raise ValueError("Mixing FeatStruct objects with Python "
-                             "dicts and lists is not supported.")
+            raise ValueError(
+                "Mixing FeatStruct objects with Python "
+                "dicts and lists is not supported."
+            )
      assert isinstance(fstruct1, fs_class)
      assert isinstance(fstruct2, fs_class)
  
      # If bindings are unspecified, use an empty set of bindings.
-    user_bindings = (bindings is not None)
-    if bindings is None: bindings = {}
+    user_bindings = bindings is not None
+    if bindings is None:
+        bindings = {}
  
      # Make copies of fstruct1 and fstruct2 (since the unification
      # algorithm is destructive). Do it all at once, to preserve
      # reentrance links between fstruct1 and fstruct2.  Copy bindings
      # as well, in case there are any bound vars that contain parts
      # of fstruct1 or fstruct2.
-    (fstruct1copy, fstruct2copy, bindings_copy) = (
-        copy.deepcopy((fstruct1, fstruct2, bindings)))
+    (fstruct1copy, fstruct2copy, bindings_copy) = copy.deepcopy(
+        (fstruct1, fstruct2, bindings)
+    )
  
      # Copy the bindings back to the original bindings dict.
      bindings.update(bindings_copy)
@@ -1332,37 +1436,49 @@ def unify(fstruct1, fstruct2, bindings=None, trace=False,
  
      # Do the actual unification.  If it fails, return None.
      forward = {}
-    if trace: _trace_unify_start((), fstruct1copy, fstruct2copy)
-    try: result = _destructively_unify(fstruct1copy, fstruct2copy, bindings,
-                                       forward, trace, fail, fs_class, ())
-    except _UnificationFailureError: return None
+    if trace:
+        _trace_unify_start((), fstruct1copy, fstruct2copy)
+    try:
+        result = _destructively_unify(
+            fstruct1copy, fstruct2copy, bindings, forward, trace, fail, fs_class, ()
+        )
+    except _UnificationFailureError:
+        return None
  
      # _destructively_unify might return UnificationFailure, e.g. if we
      # tried to unify a mapping with a sequence.
      if result is UnificationFailure:
-        if fail is None: return None
-        else: return fail(fstruct1copy, fstruct2copy, ())
+        if fail is None:
+            return None
+        else:
+            return fail(fstruct1copy, fstruct2copy, ())
  
      # Replace any feature structure that has a forward pointer
      # with the target of its forward pointer.
      result = _apply_forwards(result, forward, fs_class, set())
-    if user_bindings: _apply_forwards_to_bindings(forward, bindings)
+    if user_bindings:
+        _apply_forwards_to_bindings(forward, bindings)
  
      # Replace bound vars with values.
      _resolve_aliases(bindings)
      _substitute_bindings(result, bindings, fs_class, set())
  
      # Return the result.
-    if trace: _trace_unify_succeed((), result)
-    if trace: _trace_bindings((), bindings)
+    if trace:
+        _trace_unify_succeed((), result)
+    if trace:
+        _trace_bindings((), bindings)
      return result
  
+
  class _UnificationFailureError(Exception):
      """An exception that is used by ``_destructively_unify`` to abort
      unification when a failure is encountered."""
  
-def _destructively_unify(fstruct1, fstruct2, bindings, forward,
-                         trace, fail, fs_class, path):
+
+def _destructively_unify(
+    fstruct1, fstruct2, bindings, forward, trace, fail, fs_class, path
+):
      """
      Attempt to unify ``fstruct1`` and ``fstruct2`` by modifying them
      in-place.  If the unification succeeds, then ``fstruct1`` will
@@ -1388,7 +1504,8 @@ def _destructively_unify(fstruct1, fstruct2, bindings, forward,
      # Note: this, together with the forward pointers, ensures
      # that unification will terminate even for cyclic structures.
      if fstruct1 is fstruct2:
-        if trace: _trace_unify_identity(path, fstruct1)
+        if trace:
+            _trace_unify_identity(path, fstruct1)
          return fstruct1
  
      # Set fstruct2's forward pointer to point to fstruct1; this makes
@@ -1400,10 +1517,10 @@ def _destructively_unify(fstruct1, fstruct2, bindings, forward,
      # Unifying two mappings:
      if _is_mapping(fstruct1) and _is_mapping(fstruct2):
          for fname in fstruct1:
-            if getattr(fname, 'default', None) is not None:
+            if getattr(fname, "default", None) is not None:
                  fstruct2.setdefault(fname, fname.default)
          for fname in fstruct2:
-            if getattr(fname, 'default', None) is not None:
+            if getattr(fname, "default", None) is not None:
                  fstruct1.setdefault(fname, fname.default)
  
          # Unify any values that are defined in both fstruct1 and
@@ -1414,12 +1531,20 @@ def _destructively_unify(fstruct1, fstruct2, bindings, forward,
          for fname, fval2 in sorted(fstruct2.items()):
              if fname in fstruct1:
                  fstruct1[fname] = _unify_feature_values(
-                    fname, fstruct1[fname], fval2, bindings,
-                    forward, trace, fail, fs_class, path+(fname,))
+                    fname,
+                    fstruct1[fname],
+                    fval2,
+                    bindings,
+                    forward,
+                    trace,
+                    fail,
+                    fs_class,
+                    path + (fname,),
+                )
              else:
                  fstruct1[fname] = fval2
  
-        return fstruct1 # Contains the unified value.
+        return fstruct1  # Contains the unified value.
  
      # Unifying two sequences:
      elif _is_sequence(fstruct1) and _is_sequence(fstruct2):
@@ -1430,22 +1555,33 @@ def _destructively_unify(fstruct1, fstruct2, bindings, forward,
          # Unify corresponding values in fstruct1 and fstruct2.
          for findex in range(len(fstruct1)):
              fstruct1[findex] = _unify_feature_values(
-                findex, fstruct1[findex], fstruct2[findex], bindings,
-                forward, trace, fail, fs_class, path+(findex,))
-
-        return fstruct1 # Contains the unified value.
+                findex,
+                fstruct1[findex],
+                fstruct2[findex],
+                bindings,
+                forward,
+                trace,
+                fail,
+                fs_class,
+                path + (findex,),
+            )
+
+        return fstruct1  # Contains the unified value.
  
      # Unifying sequence & mapping: fail.  The failure function
      # doesn't get a chance to recover in this case.
-    elif ((_is_sequence(fstruct1) or _is_mapping(fstruct1)) and
-          (_is_sequence(fstruct2) or _is_mapping(fstruct2))):
+    elif (_is_sequence(fstruct1) or _is_mapping(fstruct1)) and (
+        _is_sequence(fstruct2) or _is_mapping(fstruct2)
+    ):
          return UnificationFailure
  
      # Unifying anything else: not allowed!
-    raise TypeError('Expected mappings or sequences')
+    raise TypeError("Expected mappings or sequences")
+
  
-def _unify_feature_values(fname, fval1, fval2, bindings, forward,
-                          trace, fail, fs_class, fpath):
+def _unify_feature_values(
+    fname, fval1, fval2, bindings, forward, trace, fail, fs_class, fpath
+):
      """
      Attempt to unify ``fval1`` and and ``fval2``, and return the
      resulting unified value.  The method of unification will depend on
@@ -1462,11 +1598,14 @@ def _unify_feature_values(fname, fval1, fval2, bindings, forward,
        5. If they're both base values, then unify them.  By default,
           this will succeed if they are equal, and fail otherwise.
      """
-    if trace: _trace_unify_start(fpath, fval1, fval2)
+    if trace:
+        _trace_unify_start(fpath, fval1, fval2)
  
      # Look up the "canonical" copy of fval1 and fval2
-    while id(fval1) in forward: fval1 = forward[id(fval1)]
-    while id(fval2) in forward: fval2 = forward[id(fval2)]
+    while id(fval1) in forward:
+        fval1 = forward[id(fval1)]
+    while id(fval2) in forward:
+        fval2 = forward[id(fval2)]
  
      # If fval1 or fval2 is a bound variable, then
      # replace it by the variable's bound value.  This
@@ -1482,13 +1621,14 @@ def _unify_feature_values(fname, fval1, fval2, bindings, forward,
  
      # Case 1: Two feature structures (recursive case)
      if isinstance(fval1, fs_class) and isinstance(fval2, fs_class):
-        result = _destructively_unify(fval1, fval2, bindings, forward,
-                                      trace, fail, fs_class, fpath)
+        result = _destructively_unify(
+            fval1, fval2, bindings, forward, trace, fail, fs_class, fpath
+        )
  
      # Case 2: Two unbound variables (create alias)
-    elif (isinstance(fval1, Variable) and
-          isinstance(fval2, Variable)):
-        if fval1 != fval2: bindings[fval2] = fval1
+    elif isinstance(fval1, Variable) and isinstance(fval2, Variable):
+        if fval1 != fval2:
+            bindings[fval2] = fval1
          result = fval1
  
      # Case 3: An unbound variable and a value (bind)
@@ -1512,12 +1652,12 @@ def _unify_feature_values(fname, fval1, fval2, bindings, forward,
          elif isinstance(fval1, CustomFeatureValue):
              result = fval1.unify(fval2)
              # Sanity check: unify value should be symmetric
-            if (isinstance(fval2, CustomFeatureValue) and
-                result != fval2.unify(fval1)):
+            if isinstance(fval2, CustomFeatureValue) and result != fval2.unify(fval1):
                  raise AssertionError(
-                    'CustomFeatureValue objects %r and %r disagree '
-                    'about unification value: %r vs. %r' %
-                    (fval1, fval2, result, fval2.unify(fval1)))
+                    "CustomFeatureValue objects %r and %r disagree "
+                    "about unification value: %r vs. %r"
+                    % (fval1, fval2, result, fval2.unify(fval1))
+                )
          elif isinstance(fval2, CustomFeatureValue):
              result = fval2.unify(fval1)
          # Case 5c: Simple values -- check if they're equal.
@@ -1541,8 +1681,10 @@ def _unify_feature_values(fname, fval1, fval2, bindings, forward,
      # If we unification failed, call the failure function; it
      # might decide to continue anyway.
      if result is UnificationFailure:
-        if fail is not None: result = fail(fval1, fval2, fpath)
-        if trace: _trace_unify_fail(fpath[:-1], result)
+        if fail is not None:
+            result = fail(fval1, fval2, fpath)
+        if trace:
+            _trace_unify_fail(fpath[:-1], result)
          if result is UnificationFailure:
              raise _UnificationFailureError
  
@@ -1550,12 +1692,14 @@ def _unify_feature_values(fname, fval1, fval2, bindings, forward,
      if isinstance(result, fs_class):
          result = _apply_forwards(result, forward, fs_class, set())
  
-    if trace: _trace_unify_succeed(fpath, result)
+    if trace:
+        _trace_unify_succeed(fpath, result)
      if trace and isinstance(result, fs_class):
          _trace_bindings(fpath, bindings)
  
      return result
  
+
  def _apply_forwards_to_bindings(forward, bindings):
      """
      Replace any feature structure that has a forward pointer with
@@ -1566,21 +1710,27 @@ def _apply_forwards_to_bindings(forward, bindings):
              value = forward[id(value)]
          bindings[var] = value
  
+
  def _apply_forwards(fstruct, forward, fs_class, visited):
      """
      Replace any feature structure that has a forward pointer with
      the target of its forward pointer (to preserve reentrancy).
      """
      # Follow our own forwards pointers (if any)
-    while id(fstruct) in forward: fstruct = forward[id(fstruct)]
+    while id(fstruct) in forward:
+        fstruct = forward[id(fstruct)]
  
      # Visit each node only once:
-    if id(fstruct) in visited: return
+    if id(fstruct) in visited:
+        return
      visited.add(id(fstruct))
  
-    if _is_mapping(fstruct): items = fstruct.items()
-    elif _is_sequence(fstruct): items = enumerate(fstruct)
-    else: raise ValueError('Expected mapping or sequence')
+    if _is_mapping(fstruct):
+        items = fstruct.items()
+    elif _is_sequence(fstruct):
+        items = enumerate(fstruct)
+    else:
+        raise ValueError("Expected mapping or sequence")
      for fname, fval in items:
          if isinstance(fval, fs_class):
              # Replace w/ forwarded value.
@@ -1592,6 +1742,7 @@ def _apply_forwards(fstruct, forward, fs_class, visited):
  
      return fstruct
  
+
  def _resolve_aliases(bindings):
      """
      Replace any bound aliased vars with their binding; and replace
@@ -1601,42 +1752,56 @@ def _resolve_aliases(bindings):
          while isinstance(value, Variable) and value in bindings:
              value = bindings[var] = bindings[value]
  
+
  def _trace_unify_start(path, fval1, fval2):
      if path == ():
-        print('\nUnification trace:')
+        print("\nUnification trace:")
      else:
-        fullname = '.'.join("%s" % n for n in path)
-        print('  '+'|   '*(len(path)-1)+'|')
-        print('  '+'|   '*(len(path)-1)+'| Unify feature: %s' % fullname)
-    print('  '+'|   '*len(path)+' / '+_trace_valrepr(fval1))
-    print('  '+'|   '*len(path)+'|\\ '+_trace_valrepr(fval2))
+        fullname = ".".join("%s" % n for n in path)
+        print("  " + "|   " * (len(path) - 1) + "|")
+        print("  " + "|   " * (len(path) - 1) + "| Unify feature: %s" % fullname)
+    print("  " + "|   " * len(path) + " / " + _trace_valrepr(fval1))
+    print("  " + "|   " * len(path) + "|\\ " + _trace_valrepr(fval2))
+
+
  def _trace_unify_identity(path, fval1):
-    print('  '+'|   '*len(path)+'|')
-    print('  '+'|   '*len(path)+'| (identical objects)')
-    print('  '+'|   '*len(path)+'|')
-    print('  '+'|   '*len(path)+'+-->'+unicode_repr(fval1))
+    print("  " + "|   " * len(path) + "|")
+    print("  " + "|   " * len(path) + "| (identical objects)")
+    print("  " + "|   " * len(path) + "|")
+    print("  " + "|   " * len(path) + "+-->" + repr(fval1))
+
+
  def _trace_unify_fail(path, result):
-    if result is UnificationFailure: resume = ''
-    else: resume = ' (nonfatal)'
-    print('  '+'|   '*len(path)+'|   |')
-    print('  '+'X   '*len(path)+'X   X <-- FAIL'+resume)
+    if result is UnificationFailure:
+        resume = ""
+    else:
+        resume = " (nonfatal)"
+    print("  " + "|   " * len(path) + "|   |")
+    print("  " + "X   " * len(path) + "X   X <-- FAIL" + resume)
+
+
  def _trace_unify_succeed(path, fval1):
      # Print the result.
-    print('  '+'|   '*len(path)+'|')
-    print('  '+'|   '*len(path)+'+-->'+unicode_repr(fval1))
+    print("  " + "|   " * len(path) + "|")
+    print("  " + "|   " * len(path) + "+-->" + repr(fval1))
+
+
  def _trace_bindings(path, bindings):
      # Print the bindings (if any).
      if len(bindings) > 0:
-        binditems = sorted(bindings.items(), key=lambda v:v[0].name)
-        bindstr = '{%s}' % ', '.join(
-            '%s: %s' % (var, _trace_valrepr(val))
-            for (var, val) in binditems)
-        print('  '+'|   '*len(path)+'    Bindings: '+bindstr)
+        binditems = sorted(bindings.items(), key=lambda v: v[0].name)
+        bindstr = "{%s}" % ", ".join(
+            "%s: %s" % (var, _trace_valrepr(val)) for (var, val) in binditems
+        )
+        print("  " + "|   " * len(path) + "    Bindings: " + bindstr)
+
+
  def _trace_valrepr(val):
      if isinstance(val, Variable):
-        return '%s' % val
+        return "%s" % val
      else:
-        return '%s' % unicode_repr(val)
+        return "%s" % repr(val)
+
  
  def subsumes(fstruct1, fstruct2):
      """
@@ -1648,6 +1813,7 @@ def subsumes(fstruct1, fstruct2):
      """
      return fstruct2 == unify(fstruct1, fstruct2)
  
+
  def conflicts(fstruct1, fstruct2, trace=0):
      """
      Return a list of the feature paths of all features which are
@@ -1656,42 +1822,64 @@ def conflicts(fstruct1, fstruct2, trace=0):
      :rtype: list(tuple)
      """
      conflict_list = []
+
      def add_conflict(fval1, fval2, path):
          conflict_list.append(path)
          return fval1
+
      unify(fstruct1, fstruct2, fail=add_conflict, trace=trace)
      return conflict_list
  
+
  ######################################################################
  # Helper Functions
  ######################################################################
  
+
  def _is_mapping(v):
-    return hasattr(v, '__contains__') and hasattr(v, 'keys')
+    return hasattr(v, "__contains__") and hasattr(v, "keys")
+
  
  def _is_sequence(v):
-    return (hasattr(v, '__iter__') and hasattr(v, '__len__') and
-            not isinstance(v, string_types))
+    return (
+        hasattr(v, "__iter__")
+        and hasattr(v, "__len__")
+        and not isinstance(v, str)
+    )
+
  
  def _default_fs_class(obj):
-    if isinstance(obj, FeatStruct): return FeatStruct
-    if isinstance(obj, (dict, list)): return (dict, list)
+    if isinstance(obj, FeatStruct):
+        return FeatStruct
+    if isinstance(obj, (dict, list)):
+        return (dict, list)
      else:
-        raise ValueError('To unify objects of type %s, you must specify '
-                         'fs_class explicitly.' % obj.__class__.__name__)
+        raise ValueError(
+            "To unify objects of type %s, you must specify "
+            "fs_class explicitly." % obj.__class__.__name__
+        )
+
+
  ######################################################################
  # FeatureValueSet & FeatureValueTuple
  ######################################################################
  
+
  class SubstituteBindingsSequence(SubstituteBindingsI):
      """
      A mixin class for sequence clases that distributes variables() and
      substitute_bindings() over the object's elements.
      """
+
      def variables(self):
-        return ([elt for elt in self if isinstance(elt, Variable)] +
-                sum([list(elt.variables()) for elt in self
-                     if isinstance(elt, SubstituteBindingsI)], []))
+        return [elt for elt in self if isinstance(elt, Variable)] + sum(
+            [
+                list(elt.variables())
+                for elt in self
+                if isinstance(elt, SubstituteBindingsI)
+            ],
+            [],
+        )
  
      def substitute_bindings(self, bindings):
          return self.__class__([self.subst(v, bindings) for v in self])
@@ -1702,7 +1890,8 @@ class SubstituteBindingsSequence(SubstituteBindingsI):
          else:
              return bindings.get(v, v)
  
-@python_2_unicode_compatible
+
+
  class FeatureValueTuple(SubstituteBindingsSequence, tuple):
      """
      A base feature value that is a tuple of other base feature values.
@@ -1710,12 +1899,14 @@ class FeatureValueTuple(SubstituteBindingsSequence, tuple):
      variable substitutions will be propagated to the elements
      contained by the set.  A ``FeatureValueTuple`` is immutable.
      """
-    def __repr__(self): # [xx] really use %s here?
-        if len(self) == 0: return '()'
-        return '(%s)' % ', '.join('%s' % (b,) for b in self)
+
+    def __repr__(self):  # [xx] really use %s here?
+        if len(self) == 0:
+            return "()"
+        return "(%s)" % ", ".join("%s" % (b,) for b in self)
+
  
  
-@python_2_unicode_compatible
  class FeatureValueSet(SubstituteBindingsSequence, frozenset):
      """
      A base feature value that is a set of other base feature values.
@@ -1723,19 +1914,24 @@ class FeatureValueSet(SubstituteBindingsSequence, frozenset):
      variable substitutions will be propagated to the elements
      contained by the set.  A ``FeatureValueSet`` is immutable.
      """
-    def __repr__(self): # [xx] really use %s here?
-        if len(self) == 0: return '{/}' # distinguish from dict.
+
+    def __repr__(self):  # [xx] really use %s here?
+        if len(self) == 0:
+            return "{/}"  # distinguish from dict.
          # n.b., we sort the string reprs of our elements, to ensure
          # that our own repr is deterministic.
-        return '{%s}' % ', '.join(sorted('%s' % (b,) for b in self))
+        return "{%s}" % ", ".join(sorted("%s" % (b,) for b in self))
+
      __str__ = __repr__
  
-@python_2_unicode_compatible
+
+
  class FeatureValueUnion(SubstituteBindingsSequence, frozenset):
      """
      A base feature value that represents the union of two or more
      ``FeatureValueSet`` or ``Variable``.
      """
+
      def __new__(cls, values):
          # If values contains FeatureValueUnions, then collapse them.
          values = _flatten(values, FeatureValueUnion)
@@ -1757,14 +1953,16 @@ class FeatureValueUnion(SubstituteBindingsSequence, frozenset):
          # n.b., we sort the string reprs of our elements, to ensure
          # that our own repr is deterministic.  also, note that len(self)
          # is guaranteed to be 2 or more.
-        return '{%s}' % '+'.join(sorted('%s' % (b,) for b in self))
+        return "{%s}" % "+".join(sorted("%s" % (b,) for b in self))
+
+
  
-@python_2_unicode_compatible
  class FeatureValueConcat(SubstituteBindingsSequence, tuple):
      """
      A base feature value that represents the concatenation of two or
      more ``FeatureValueTuple`` or ``Variable``.
      """
+
      def __new__(cls, values):
          # If values contains FeatureValueConcats, then collapse them.
          values = _flatten(values, FeatureValueConcat)
@@ -1784,7 +1982,7 @@ class FeatureValueConcat(SubstituteBindingsSequence, tuple):
  
      def __repr__(self):
          # n.b.: len(self) is guaranteed to be 2 or more.
-        return '(%s)' % '+'.join('%s' % (b,) for b in self)
+        return "(%s)" % "+".join("%s" % (b,) for b in self)
  
  
  def _flatten(lst, cls):
@@ -1794,31 +1992,36 @@ def _flatten(lst, cls):
      """
      result = []
      for elt in lst:
-        if isinstance(elt, cls): result.extend(elt)
-        else: result.append(elt)
+        if isinstance(elt, cls):
+            result.extend(elt)
+        else:
+            result.append(elt)
      return result
  
+
  ######################################################################
  # Specialized Features
  ######################################################################
  
+
  @total_ordering
-@python_2_unicode_compatible
+
  class Feature(object):
      """
      A feature identifier that's specialized to put additional
      constraints, default values, etc.
      """
+
      def __init__(self, name, default=None, display=None):
-        assert display in (None, 'prefix', 'slash')
+        assert display in (None, "prefix", "slash")
  
-        self._name = name # [xx] rename to .identifier?
-        self._default = default # [xx] not implemented yet.
+        self._name = name  # [xx] rename to .identifier?
+        self._default = default  # [xx] not implemented yet.
          self._display = display
  
-        if self._display == 'prefix':
+        if self._display == "prefix":
              self._sortkey = (-1, self._name)
-        elif self._display == 'slash':
+        elif self._display == "slash":
              self._sortkey = (1, self._name)
          else:
              self._sortkey = (0, self._name)
@@ -1839,10 +2042,10 @@ class Feature(object):
          return self._display
  
      def __repr__(self):
-        return '*%s*' % self.name
+        return "*%s*" % self.name
  
      def __lt__(self, other):
-        if isinstance(other, string_types):
+        if isinstance(other, str):
              return True
          if not isinstance(other, Feature):
              raise_unorderable_types("<", self, other)
@@ -1857,9 +2060,9 @@ class Feature(object):
      def __hash__(self):
          return hash(self._name)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # These can be overridden by subclasses:
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def read_value(self, s, position, reentrances, parser):
          return parser.read_value(s, position, reentrances)
@@ -1869,35 +2072,46 @@ class Feature(object):
          If possible, return a single value..  If not, return
          the value ``UnificationFailure``.
          """
-        if fval1 == fval2: return fval1
-        else: return UnificationFailure
+        if fval1 == fval2:
+            return fval1
+        else:
+            return UnificationFailure
  
  
  class SlashFeature(Feature):
      def read_value(self, s, position, reentrances, parser):
          return parser.read_partial(s, position, reentrances)
  
+
  class RangeFeature(Feature):
-    RANGE_RE = re.compile('(-?\d+):(-?\d+)')
+    RANGE_RE = re.compile("(-?\d+):(-?\d+)")
+
      def read_value(self, s, position, reentrances, parser):
          m = self.RANGE_RE.match(s, position)
-        if not m: raise ValueError('range', position)
+        if not m:
+            raise ValueError("range", position)
          return (int(m.group(1)), int(m.group(2))), m.end()
  
      def unify_base_values(self, fval1, fval2, bindings):
-        if fval1 is None: return fval2
-        if fval2 is None: return fval1
+        if fval1 is None:
+            return fval2
+        if fval2 is None:
+            return fval1
          rng = max(fval1[0], fval2[0]), min(fval1[1], fval2[1])
-        if rng[1] < rng[0]: return UnificationFailure
+        if rng[1] < rng[0]:
+            return UnificationFailure
          return rng
  
-SLASH = SlashFeature('slash', default=False, display='slash')
-TYPE = Feature('type', display='prefix')
+
+SLASH = SlashFeature("slash", default=False, display="slash")
+TYPE = Feature("type", display="prefix")
+
  
  ######################################################################
  # Specialized Feature Values
  ######################################################################
  
+
  @total_ordering
  class CustomFeatureValue(object):
      """
@@ -1917,48 +2131,57 @@ class CustomFeatureValue(object):
      Subclasses must define ``unify()``, ``__eq__()`` and ``__lt__()``.
      Subclasses may also wish to define ``__hash__()``.
      """
+
      def unify(self, other):
          """
          If this base value unifies with ``other``, then return the
          unified value.  Otherwise, return ``UnificationFailure``.
          """
-        raise NotImplementedError('abstract base class')
+        raise NotImplementedError("abstract base class")
  
      def __eq__(self, other):
-        raise NotImplementedError('abstract base class')
+        raise NotImplementedError("abstract base class")
  
      def __ne__(self, other):
          return not self == other
  
      def __lt__(self, other):
-        raise NotImplementedError('abstract base class')
+        raise NotImplementedError("abstract base class")
  
      def __hash__(self):
-        raise TypeError('%s objects or unhashable' % self.__class__.__name__)
+        raise TypeError("%s objects or unhashable" % self.__class__.__name__)
+
  
  ######################################################################
  # Feature Structure Reader
  ######################################################################
  
+
  class FeatStructReader(object):
-    def __init__(self, features=(SLASH, TYPE), fdict_class=FeatStruct,
-                 flist_class=FeatList, logic_parser=None):
-        self._features = dict((f.name,f) for f in features)
+    def __init__(
+        self,
+        features=(SLASH, TYPE),
+        fdict_class=FeatStruct,
+        flist_class=FeatList,
+        logic_parser=None,
+    ):
+        self._features = dict((f.name, f) for f in features)
          self._fdict_class = fdict_class
          self._flist_class = flist_class
          self._prefix_feature = None
          self._slash_feature = None
          for feature in features:
-            if feature.display == 'slash':
+            if feature.display == "slash":
                  if self._slash_feature:
-                    raise ValueError('Multiple features w/ display=slash')
+                    raise ValueError("Multiple features w/ display=slash")
                  self._slash_feature = feature
-            if feature.display == 'prefix':
+            if feature.display == "prefix":
                  if self._prefix_feature:
-                    raise ValueError('Multiple features w/ display=prefix')
+                    raise ValueError("Multiple features w/ display=prefix")
                  self._prefix_feature = feature
-        self._features_with_defaults = [feature for feature in features
-                                        if feature.default is not None]
+        self._features_with_defaults = [
+            feature for feature in features if feature.default is not None
+        ]
          if logic_parser is None:
              logic_parser = LogicParser()
          self._logic_parser = logic_parser
@@ -1985,22 +2208,28 @@ class FeatStructReader(object):
          s = s.strip()
          value, position = self.read_partial(s, 0, {}, fstruct)
          if position != len(s):
-            self._error(s, 'end of string', position)
+            self._error(s, "end of string", position)
          return value
  
-    _START_FSTRUCT_RE = re.compile(r'\s*(?:\((\d+)\)\s*)?(\??[\w-]+)?(\[)')
-    _END_FSTRUCT_RE = re.compile(r'\s*]\s*')
-    _SLASH_RE = re.compile(r'/')
+    _START_FSTRUCT_RE = re.compile(r"\s*(?:\((\d+)\)\s*)?(\??[\w-]+)?(\[)")
+    _END_FSTRUCT_RE = re.compile(r"\s*]\s*")
+    _SLASH_RE = re.compile(r"/")
      _FEATURE_NAME_RE = re.compile(r'\s*([+-]?)([^\s\(\)<>"\'\-=\[\],]+)\s*')
-    _REENTRANCE_RE = re.compile(r'\s*->\s*')
-    _TARGET_RE = re.compile(r'\s*\((\d+)\)\s*')
-    _ASSIGN_RE = re.compile(r'\s*=\s*')
-    _COMMA_RE = re.compile(r'\s*,\s*')
-    _BARE_PREFIX_RE = re.compile(r'\s*(?:\((\d+)\)\s*)?(\??[\w-]+\s*)()')
+    _REENTRANCE_RE = re.compile(r"\s*->\s*")
+    _TARGET_RE = re.compile(r"\s*\((\d+)\)\s*")
+    _ASSIGN_RE = re.compile(r"\s*=\s*")
+    _COMMA_RE = re.compile(r"\s*,\s*")
+    _BARE_PREFIX_RE = re.compile(r"\s*(?:\((\d+)\)\s*)?(\??[\w-]+\s*)()")
      # This one is used to distinguish fdicts from flists:
-    _START_FDICT_RE = re.compile(r'(%s)|(%s\s*(%s\s*(=|->)|[+-]%s|\]))' % (
-        _BARE_PREFIX_RE.pattern, _START_FSTRUCT_RE.pattern,
-        _FEATURE_NAME_RE.pattern, _FEATURE_NAME_RE.pattern))
+    _START_FDICT_RE = re.compile(
+        r"(%s)|(%s\s*(%s\s*(=|->)|[+-]%s|\]))"
+        % (
+            _BARE_PREFIX_RE.pattern,
+            _START_FSTRUCT_RE.pattern,
+            _FEATURE_NAME_RE.pattern,
+            _FEATURE_NAME_RE.pattern,
+        )
+    )
  
      def read_partial(self, s, position=0, reentrances=None, fstruct=None):
          """
@@ -2014,11 +2243,13 @@ class FeatStructReader(object):
              parsing and the position where the parsed feature structure ends.
          :rtype: bool
          """
-        if reentrances is None: reentrances = {}
+        if reentrances is None:
+            reentrances = {}
          try:
              return self._read_partial(s, position, reentrances, fstruct)
          except ValueError as e:
-            if len(e.args) != 2: raise
+            if len(e.args) != 2:
+                raise
              self._error(s, *e.args)
  
      def _read_partial(self, s, position, reentrances, fstruct=None):
@@ -2034,31 +2265,30 @@ class FeatStructReader(object):
          if not match:
              match = self._BARE_PREFIX_RE.match(s, position)
              if not match:
-                raise ValueError('open bracket or identifier', position)
+                raise ValueError("open bracket or identifier", position)
          position = match.end()
  
          # If there as an identifier, record it.
          if match.group(1):
              identifier = match.group(1)
              if identifier in reentrances:
-                raise ValueError('new identifier', match.start(1))
+                raise ValueError("new identifier", match.start(1))
              reentrances[identifier] = fstruct
  
          if isinstance(fstruct, FeatDict):
              fstruct.clear()
-            return self._read_partial_featdict(s, position, match,
-                                                reentrances, fstruct)
+            return self._read_partial_featdict(s, position, match, reentrances, fstruct)
          else:
              del fstruct[:]
-            return self._read_partial_featlist(s, position, match,
-                                                reentrances, fstruct)
+            return self._read_partial_featlist(s, position, match, reentrances, fstruct)
  
-    def _read_partial_featlist(self, s, position, match,
-                                reentrances, fstruct):
+    def _read_partial_featlist(self, s, position, match, reentrances, fstruct):
          # Prefix features are not allowed:
-        if match.group(2): raise ValueError('open bracket')
+        if match.group(2):
+            raise ValueError("open bracket")
          # Bare prefixes are not allowed:
-        if not match.group(3): raise ValueError('open bracket')
+        if not match.group(3):
+            raise ValueError("open bracket")
  
          # Build a list of the features defined by the structure.
          while position < len(s):
@@ -2072,17 +2302,17 @@ class FeatStructReader(object):
              if match:
                  position = match.end()
                  match = self._TARGET_RE.match(s, position)
-                if not match: raise ValueError('identifier', position)
+                if not match:
+                    raise ValueError("identifier", position)
                  target = match.group(1)
                  if target not in reentrances:
-                    raise ValueError('bound identifier', position)
+                    raise ValueError("bound identifier", position)
                  position = match.end()
                  fstruct.append(reentrances[target])
  
              # Anything else is a value.
              else:
-                value, position = (
-                    self._read_value(0, s, position, reentrances))
+                value, position = self._read_value(0, s, position, reentrances)
                  fstruct.append(value)
  
              # If there's a close bracket, handle it at the top of the loop.
@@ -2091,20 +2321,20 @@ class FeatStructReader(object):
  
              # Otherwise, there should be a comma
              match = self._COMMA_RE.match(s, position)
-            if match is None: raise ValueError('comma', position)
+            if match is None:
+                raise ValueError("comma", position)
              position = match.end()
  
          # We never saw a close bracket.
-        raise ValueError('close bracket', position)
+        raise ValueError("close bracket", position)
  
-    def _read_partial_featdict(self, s, position, match,
-                                reentrances, fstruct):
+    def _read_partial_featdict(self, s, position, match, reentrances, fstruct):
          # If there was a prefix feature, record it.
          if match.group(2):
              if self._prefix_feature is None:
-                raise ValueError('open bracket or identifier', match.start(2))
+                raise ValueError("open bracket or identifier", match.start(2))
              prefixval = match.group(2).strip()
-            if prefixval.startswith('?'):
+            if prefixval.startswith("?"):
                  prefixval = Variable(prefixval)
              fstruct[self._prefix_feature] = prefixval
  
@@ -2130,23 +2360,26 @@ class FeatStructReader(object):
  
              # Get the feature name's name
              match = self._FEATURE_NAME_RE.match(s, position)
-            if match is None: raise ValueError('feature name', position)
+            if match is None:
+                raise ValueError("feature name", position)
              name = match.group(2)
              position = match.end()
  
              # Check if it's a special feature.
-            if name[0] == '*' and name[-1] == '*':
+            if name[0] == "*" and name[-1] == "*":
                  name = self._features.get(name[1:-1])
                  if name is None:
-                    raise ValueError('known special feature', match.start(2))
+                    raise ValueError("known special feature", match.start(2))
  
              # Check if this feature has a value already.
              if name in fstruct:
-                raise ValueError('new name', match.start(2))
+                raise ValueError("new name", match.start(2))
  
              # Boolean value ("+name" or "-name")
-            if match.group(1) == '+': value = True
-            if match.group(1) == '-': value = False
+            if match.group(1) == "+":
+                value = True
+            if match.group(1) == "-":
+                value = False
  
              # Reentrance link ("-> (target)")
              if value is None:
@@ -2155,10 +2388,10 @@ class FeatStructReader(object):
                      position = match.end()
                      match = self._TARGET_RE.match(s, position)
                      if not match:
-                        raise ValueError('identifier', position)
+                        raise ValueError("identifier", position)
                      target = match.group(1)
                      if target not in reentrances:
-                        raise ValueError('bound identifier', position)
+                        raise ValueError("bound identifier", position)
                      position = match.end()
                      value = reentrances[target]
  
@@ -2167,11 +2400,10 @@ class FeatStructReader(object):
                  match = self._ASSIGN_RE.match(s, position)
                  if match:
                      position = match.end()
-                    value, position = (
-                        self._read_value(name, s, position, reentrances))
+                    value, position = self._read_value(name, s, position, reentrances)
                  # None of the above: error.
                  else:
-                    raise ValueError('equals sign', position)
+                    raise ValueError("equals sign", position)
  
              # Store the value.
              fstruct[name] = value
@@ -2182,11 +2414,12 @@ class FeatStructReader(object):
  
              # Otherwise, there should be a comma
              match = self._COMMA_RE.match(s, position)
-            if match is None: raise ValueError('comma', position)
+            if match is None:
+                raise ValueError("comma", position)
              position = match.end()
  
          # We never saw a close bracket.
-        raise ValueError('close bracket', position)
+        raise ValueError("close bracket", position)
  
      def _finalize(self, s, pos, reentrances, fstruct):
          """
@@ -2200,7 +2433,7 @@ class FeatStructReader(object):
              v, pos = self._read_value(name, s, match.end(), reentrances)
              fstruct[name] = v
          ## Add any default features.  -- handle in unficiation instead?
-        #for feature in self._features_with_defaults:
+        # for feature in self._features_with_defaults:
          #    fstruct.setdefault(feature, feature.default)
          # Return the value.
          return fstruct, pos
@@ -2217,20 +2450,25 @@ class FeatStructReader(object):
              if match:
                  handler_func = getattr(self, handler)
                  return handler_func(s, position, reentrances, match)
-        raise ValueError('value', position)
+        raise ValueError("value", position)
  
      def _error(self, s, expected, position):
-        lines = s.split('\n')
+        lines = s.split("\n")
          while position > len(lines[0]):
-            position -= len(lines.pop(0))+1 # +1 for the newline.
-        estr = ('Error parsing feature structure\n    ' +
-                lines[0] + '\n    ' + ' '*position + '^ ' +
-                'Expected %s' % expected)
+            position -= len(lines.pop(0)) + 1  # +1 for the newline.
+        estr = (
+            "Error parsing feature structure\n    "
+            + lines[0]
+            + "\n    "
+            + " " * position
+            + "^ "
+            + "Expected %s" % expected
+        )
          raise ValueError(estr)
  
-    #////////////////////////////////////////////////////////////
-    #{ Value Readers
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Value Readers
+    # ////////////////////////////////////////////////////////////
  
      #: A table indicating how feature values should be processed.  Each
      #: entry in the table is a pair (handler, regexp).  The first entry
@@ -2243,19 +2481,21 @@ class FeatStructReader(object):
      #: the string position where the value ended.  (n.b.: order is
      #: important here!)
      VALUE_HANDLERS = [
-        ('read_fstruct_value', _START_FSTRUCT_RE),
-        ('read_var_value', re.compile(r'\?[a-zA-Z_][a-zA-Z0-9_]*')),
-        ('read_str_value', re.compile("[uU]?[rR]?(['\"])")),
-        ('read_int_value', re.compile(r'-?\d+')),
-        ('read_sym_value', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')),
-        ('read_app_value', re.compile(r'<(app)\((\?[a-z][a-z]*)\s*,'
-                                       r'\s*(\?[a-z][a-z]*)\)>')),
-#       ('read_logic_value', re.compile(r'<([^>]*)>')),
-        #lazily match any character after '<' until we hit a '>' not preceded by '-'
-        ('read_logic_value', re.compile(r'<(.*?)(?<!-)>')),
-        ('read_set_value', re.compile(r'{')),
-        ('read_tuple_value', re.compile(r'\(')),
-        ]
+        ("read_fstruct_value", _START_FSTRUCT_RE),
+        ("read_var_value", re.compile(r"\?[a-zA-Z_][a-zA-Z0-9_]*")),
+        ("read_str_value", re.compile("[uU]?[rR]?(['\"])")),
+        ("read_int_value", re.compile(r"-?\d+")),
+        ("read_sym_value", re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")),
+        (
+            "read_app_value",
+            re.compile(r"<(app)\((\?[a-z][a-z]*)\s*," r"\s*(\?[a-z][a-z]*)\)>"),
+        ),
+        #       ('read_logic_value', re.compile(r'<([^>]*)>')),
+        # lazily match any character after '<' until we hit a '>' not preceded by '-'
+        ("read_logic_value", re.compile(r"<(.*?)(?<!-)>")),
+        ("read_set_value", re.compile(r"{")),
+        ("read_tuple_value", re.compile(r"\(")),
+    ]
  
      def read_fstruct_value(self, s, position, reentrances, match):
          return self.read_partial(s, position, reentrances)
@@ -2270,14 +2510,15 @@ class FeatStructReader(object):
      def read_var_value(self, s, position, reentrances, match):
          return Variable(match.group()), match.end()
  
-    _SYM_CONSTS = {'None':None, 'True':True, 'False':False}
+    _SYM_CONSTS = {"None": None, "True": True, "False": False}
+
      def read_sym_value(self, s, position, reentrances, match):
          val, end = match.group(), match.end()
          return self._SYM_CONSTS.get(val, val), end
  
      def read_app_value(self, s, position, reentrances, match):
          """Mainly included for backwards compat."""
-        return self._logic_parser.parse('%s(%s)' % match.group(2,3)), match.end()
+        return self._logic_parser.parse("%s(%s)" % match.group(2, 3)), match.end()
  
      def read_logic_value(self, s, position, reentrances, match):
          try:
@@ -2287,94 +2528,107 @@ class FeatStructReader(object):
                  raise ValueError()
              return expr, match.end()
          except ValueError:
-            raise ValueError('logic expression', match.start(1))
+            raise ValueError("logic expression", match.start(1))
  
      def read_tuple_value(self, s, position, reentrances, match):
-        return self._read_seq_value(s, position, reentrances, match, ')',
-                                     FeatureValueTuple, FeatureValueConcat)
+        return self._read_seq_value(
+            s, position, reentrances, match, ")", FeatureValueTuple, FeatureValueConcat
+        )
  
      def read_set_value(self, s, position, reentrances, match):
-        return self._read_seq_value(s, position, reentrances, match, '}',
-                                     FeatureValueSet, FeatureValueUnion)
+        return self._read_seq_value(
+            s, position, reentrances, match, "}", FeatureValueSet, FeatureValueUnion
+        )
  
-    def _read_seq_value(self, s, position, reentrances, match,
-                         close_paren, seq_class, plus_class):
+    def _read_seq_value(
+        self, s, position, reentrances, match, close_paren, seq_class, plus_class
+    ):
          """
          Helper function used by read_tuple_value and read_set_value.
          """
          cp = re.escape(close_paren)
          position = match.end()
          # Special syntax fo empty tuples:
-        m = re.compile(r'\s*/?\s*%s' % cp).match(s, position)
-        if m: return seq_class(), m.end()
+        m = re.compile(r"\s*/?\s*%s" % cp).match(s, position)
+        if m:
+            return seq_class(), m.end()
          # Read values:
          values = []
          seen_plus = False
          while True:
              # Close paren: return value.
-            m = re.compile(r'\s*%s' % cp).match(s, position)
+            m = re.compile(r"\s*%s" % cp).match(s, position)
              if m:
-                if seen_plus: return plus_class(values), m.end()
-                else: return seq_class(values), m.end()
+                if seen_plus:
+                    return plus_class(values), m.end()
+                else:
+                    return seq_class(values), m.end()
  
              # Read the next value.
              val, position = self.read_value(s, position, reentrances)
              values.append(val)
  
              # Comma or looking at close paren
-            m = re.compile(r'\s*(,|\+|(?=%s))\s*' % cp).match(s, position)
-            if not m: raise ValueError("',' or '+' or '%s'" % cp, position)
-            if m.group(1) == '+': seen_plus = True
+            m = re.compile(r"\s*(,|\+|(?=%s))\s*" % cp).match(s, position)
+            if not m:
+                raise ValueError("',' or '+' or '%s'" % cp, position)
+            if m.group(1) == "+":
+                seen_plus = True
              position = m.end()
  
+
  ######################################################################
-#{ Demo
+# { Demo
  ######################################################################
  
-def display_unification(fs1, fs2, indent='  '):
+
+def display_unification(fs1, fs2, indent="  "):
      # Print the two input feature structures, side by side.
-    fs1_lines = ("%s" % fs1).split('\n')
-    fs2_lines = ("%s" % fs2).split('\n')
+    fs1_lines = ("%s" % fs1).split("\n")
+    fs2_lines = ("%s" % fs2).split("\n")
      if len(fs1_lines) > len(fs2_lines):
-        blankline = '['+' '*(len(fs2_lines[0])-2)+']'
-        fs2_lines += [blankline]*len(fs1_lines)
+        blankline = "[" + " " * (len(fs2_lines[0]) - 2) + "]"
+        fs2_lines += [blankline] * len(fs1_lines)
      else:
-        blankline = '['+' '*(len(fs1_lines[0])-2)+']'
-        fs1_lines += [blankline]*len(fs2_lines)
+        blankline = "[" + " " * (len(fs1_lines[0]) - 2) + "]"
+        fs1_lines += [blankline] * len(fs2_lines)
      for (fs1_line, fs2_line) in zip(fs1_lines, fs2_lines):
-        print(indent + fs1_line + '   ' + fs2_line)
-    print(indent+'-'*len(fs1_lines[0])+'   '+'-'*len(fs2_lines[0]))
+        print(indent + fs1_line + "   " + fs2_line)
+    print(indent + "-" * len(fs1_lines[0]) + "   " + "-" * len(fs2_lines[0]))
  
-    linelen = len(fs1_lines[0])*2+3
-    print(indent+'|               |'.center(linelen))
-    print(indent+'+-----UNIFY-----+'.center(linelen))
-    print(indent+'|'.center(linelen))
-    print(indent+'V'.center(linelen))
+    linelen = len(fs1_lines[0]) * 2 + 3
+    print(indent + "|               |".center(linelen))
+    print(indent + "+-----UNIFY-----+".center(linelen))
+    print(indent + "|".center(linelen))
+    print(indent + "V".center(linelen))
  
      bindings = {}
  
      result = fs1.unify(fs2, bindings)
      if result is None:
-        print(indent+'(FAILED)'.center(linelen))
+        print(indent + "(FAILED)".center(linelen))
      else:
-        print('\n'.join(indent+l.center(linelen)
-                         for l in ("%s" % result).split('\n')))
+        print(
+            "\n".join(indent + l.center(linelen) for l in ("%s" % result).split("\n"))
+        )
          if bindings and len(bindings.bound_variables()) > 0:
              print(repr(bindings).center(linelen))
      return result
  
+
  def interactive_demo(trace=False):
      import random, sys
  
-    HELP = '''
+    HELP = """
      1-%d: Select the corresponding feature structure
      q: Quit
      t: Turn tracing on or off
      l: List all feature structures
      ?: Help
-    '''
+    """
  
-    print('''
+    print(
+        """
      This demo will repeatedly present you with a list of feature
      structures, and ask you to choose two for unification.  Whenever a
      new feature structure is generated, it is added to the list of
@@ -2383,35 +2637,41 @@ def interactive_demo(trace=False):
      random subset for you to choose between at a given time.  If you
      want to see the complete lists, type "l".  For a list of valid
      commands, type "?".
-    ''')
+    """
+    )
      print('Press "Enter" to continue...')
      sys.stdin.readline()
  
      fstruct_strings = [
-        '[agr=[number=sing, gender=masc]]',
-        '[agr=[gender=masc, person=3]]',
-        '[agr=[gender=fem, person=3]]',
-        '[subj=[agr=(1)[]], agr->(1)]',
-        '[obj=?x]', '[subj=?x]',
-        '[/=None]', '[/=NP]',
-        '[cat=NP]', '[cat=VP]', '[cat=PP]',
-        '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]',
-        '[gender=masc, agr=?C]',
-        '[gender=?S, agr=[gender=?S,person=3]]'
-        ]
+        "[agr=[number=sing, gender=masc]]",
+        "[agr=[gender=masc, person=3]]",
+        "[agr=[gender=fem, person=3]]",
+        "[subj=[agr=(1)[]], agr->(1)]",
+        "[obj=?x]",
+        "[subj=?x]",
+        "[/=None]",
+        "[/=NP]",
+        "[cat=NP]",
+        "[cat=VP]",
+        "[cat=PP]",
+        "[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]",
+        "[gender=masc, agr=?C]",
+        "[gender=?S, agr=[gender=?S,person=3]]",
+    ]
  
-    all_fstructs = [(i, FeatStruct(fstruct_strings[i]))
-                    for i in range(len(fstruct_strings))]
+    all_fstructs = [
+        (i, FeatStruct(fstruct_strings[i])) for i in range(len(fstruct_strings))
+    ]
  
      def list_fstructs(fstructs):
          for i, fstruct in fstructs:
              print()
-            lines = ("%s" % fstruct).split('\n')
-            print('%3d: %s' % (i+1, lines[0]))
-            for line in lines[1:]: print('     '+line)
+            lines = ("%s" % fstruct).split("\n")
+            print("%3d: %s" % (i + 1, lines[0]))
+            for line in lines[1:]:
+                print("     " + line)
          print()
  
-
      while True:
          # Pick 5 feature structures at random from the master list.
          MAX_CHOICES = 5
@@ -2420,32 +2680,40 @@ def interactive_demo(trace=False):
          else:
              fstructs = all_fstructs
  
-        print('_'*75)
+        print("_" * 75)
  
-        print('Choose two feature structures to unify:')
+        print("Choose two feature structures to unify:")
          list_fstructs(fstructs)
  
-        selected = [None,None]
-        for (nth,i) in (('First',0), ('Second',1)):
+        selected = [None, None]
+        for (nth, i) in (("First", 0), ("Second", 1)):
              while selected[i] is None:
-                print(('%s feature structure (1-%d,q,t,l,?): '
-                       % (nth, len(all_fstructs))), end=' ')
+                print(
+                    (
+                        "%s feature structure (1-%d,q,t,l,?): "
+                        % (nth, len(all_fstructs))
+                    ),
+                    end=" ",
+                )
                  try:
                      input = sys.stdin.readline().strip()
-                    if input in ('q', 'Q', 'x', 'X'): return
-                    if input in ('t', 'T'):
+                    if input in ("q", "Q", "x", "X"):
+                        return
+                    if input in ("t", "T"):
                          trace = not trace
-                        print('   Trace = %s' % trace)
+                        print("   Trace = %s" % trace)
+                        continue
+                    if input in ("h", "H", "?"):
+                        print(HELP % len(fstructs))
                          continue
-                    if input in ('h', 'H', '?'):
-                        print(HELP % len(fstructs)); continue
-                    if input in ('l', 'L'):
-                        list_fstructs(all_fstructs); continue
-                    num = int(input)-1
+                    if input in ("l", "L"):
+                        list_fstructs(all_fstructs)
+                        continue
+                    num = int(input) - 1
                      selected[i] = all_fstructs[num][1]
                      print()
                  except:
-                    print('Bad sentence number')
+                    print("Bad sentence number")
                      continue
  
          if trace:
@@ -2454,49 +2722,70 @@ def interactive_demo(trace=False):
              result = display_unification(selected[0], selected[1])
          if result is not None:
              for i, fstruct in all_fstructs:
-                if repr(result) == repr(fstruct): break
+                if repr(result) == repr(fstruct):
+                    break
              else:
                  all_fstructs.append((len(all_fstructs), result))
  
          print('\nType "Enter" to continue unifying; or "q" to quit.')
          input = sys.stdin.readline().strip()
-        if input in ('q', 'Q', 'x', 'X'): return
+        if input in ("q", "Q", "x", "X"):
+            return
+
  
  def demo(trace=False):
      """
      Just for testing
      """
-    #import random
+    # import random
  
      # processor breaks with values like '3rd'
      fstruct_strings = [
-        '[agr=[number=sing, gender=masc]]',
-        '[agr=[gender=masc, person=3]]',
-        '[agr=[gender=fem, person=3]]',
-        '[subj=[agr=(1)[]], agr->(1)]',
-        '[obj=?x]', '[subj=?x]',
-        '[/=None]', '[/=NP]',
-        '[cat=NP]', '[cat=VP]', '[cat=PP]',
-        '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]',
-        '[gender=masc, agr=?C]',
-        '[gender=?S, agr=[gender=?S,person=3]]'
+        "[agr=[number=sing, gender=masc]]",
+        "[agr=[gender=masc, person=3]]",
+        "[agr=[gender=fem, person=3]]",
+        "[subj=[agr=(1)[]], agr->(1)]",
+        "[obj=?x]",
+        "[subj=?x]",
+        "[/=None]",
+        "[/=NP]",
+        "[cat=NP]",
+        "[cat=VP]",
+        "[cat=PP]",
+        "[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]",
+        "[gender=masc, agr=?C]",
+        "[gender=?S, agr=[gender=?S,person=3]]",
      ]
      all_fstructs = [FeatStruct(fss) for fss in fstruct_strings]
-    #MAX_CHOICES = 5
-    #if len(all_fstructs) > MAX_CHOICES:
-        #fstructs = random.sample(all_fstructs, MAX_CHOICES)
-        #fstructs.sort()
-    #else:
-        #fstructs = all_fstructs
+    # MAX_CHOICES = 5
+    # if len(all_fstructs) > MAX_CHOICES:
+    # fstructs = random.sample(all_fstructs, MAX_CHOICES)
+    # fstructs.sort()
+    # else:
+    # fstructs = all_fstructs
  
      for fs1 in all_fstructs:
          for fs2 in all_fstructs:
-            print("\n*******************\nfs1 is:\n%s\n\nfs2 is:\n%s\n\nresult is:\n%s" % (fs1, fs2, unify(fs1, fs2)))
+            print(
+                "\n*******************\nfs1 is:\n%s\n\nfs2 is:\n%s\n\nresult is:\n%s"
+                % (fs1, fs2, unify(fs1, fs2))
+            )
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
  
-__all__ = ['FeatStruct', 'FeatDict', 'FeatList', 'unify', 'subsumes', 'conflicts',
-           'Feature', 'SlashFeature', 'RangeFeature', 'SLASH', 'TYPE',
-           'FeatStructReader']
+__all__ = [
+    "FeatStruct",
+    "FeatDict",
+    "FeatList",
+    "unify",
+    "subsumes",
+    "conflicts",
+    "Feature",
+    "SlashFeature",
+    "RangeFeature",
+    "SLASH",
+    "TYPE",
+    "FeatStructReader",
+]
diff --git a/nlp_resource_data/nltk/featstruct.pyc b/nlp_resource_data/nltk/featstruct.pyc

deleted file mode 100755 (executable)

index 08e80ca..0000000

Binary files a/nlp_resource_data/nltk/featstruct.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/grammar.py b/nlp_resource_data/nltk/grammar.py

old mode 100755 (executable)

new mode 100644 (file)

index 6c09500..c6c7a69
--- a/nlp_resource_data/nltk/grammar.py
+++ b/nlp_resource_data/nltk/grammar.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Context Free Grammars
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  #         Jason Narad <jason.narad@gmail.com>
@@ -68,26 +68,22 @@ The operation of replacing the left hand side (*lhs*) of a production
  with the right hand side (*rhs*) in a tree (*tree*) is known as
  "expanding" *lhs* to *rhs* in *tree*.
  """
-from __future__ import print_function, unicode_literals, division
-
  import re
  from functools import total_ordering
  
-from six import string_types
-
  from nltk.util import transitive_closure, invert_graph
-from nltk.compat import python_2_unicode_compatible, unicode_repr
  from nltk.internals import raise_unorderable_types
  
  from nltk.probability import ImmutableProbabilisticMixIn
  from nltk.featstruct import FeatStruct, FeatDict, FeatStructReader, SLASH, TYPE
  
+
  #################################################################
  # Nonterminal
  #################################################################
  
+
  @total_ordering
-@python_2_unicode_compatible
  class Nonterminal(object):
      """
      A non-terminal symbol for a context free grammar.  ``Nonterminal``
@@ -106,6 +102,7 @@ class Nonterminal(object):
      :ivar _symbol: The node value corresponding to this
          ``Nonterminal``.  This value must be immutable and hashable.
      """
+
      def __init__(self, symbol):
          """
          Construct a new non-terminal from the given symbol.
@@ -153,10 +150,10 @@ class Nonterminal(object):
  
          :rtype: str
          """
-        if isinstance(self._symbol, string_types):
-            return '%s' % self._symbol
+        if isinstance(self._symbol, str):
+            return "%s" % self._symbol
          else:
-            return '%s' % unicode_repr(self._symbol)
+            return "%s" % repr(self._symbol)
  
      def __str__(self):
          """
@@ -164,10 +161,10 @@ class Nonterminal(object):
  
          :rtype: str
          """
-        if isinstance(self._symbol, string_types):
-            return '%s' % self._symbol
+        if isinstance(self._symbol, str):
+            return "%s" % self._symbol
          else:
-            return '%s' % unicode_repr(self._symbol)
+            return "%s" % repr(self._symbol)
  
      def __div__(self, rhs):
          """
@@ -179,8 +176,7 @@ class Nonterminal(object):
          :type rhs: Nonterminal
          :rtype: Nonterminal
          """
-        return Nonterminal('%s/%s' % (self._symbol, rhs._symbol))
-
+        return Nonterminal("%s/%s" % (self._symbol, rhs._symbol))
  
      def __truediv__(self, rhs):
          """
@@ -196,6 +192,7 @@ class Nonterminal(object):
          """
          return self.__div__(rhs)
  
+
  def nonterminals(symbols):
      """
      Given a string containing a list of symbol names, return a list of
@@ -209,19 +206,25 @@ def nonterminals(symbols):
          in the same order as the symbols names.
      :rtype: list(Nonterminal)
      """
-    if ',' in symbols: symbol_list = symbols.split(',')
-    else: symbol_list = symbols.split()
+    if "," in symbols:
+        symbol_list = symbols.split(",")
+    else:
+        symbol_list = symbols.split()
      return [Nonterminal(s.strip()) for s in symbol_list]
  
+
  class FeatStructNonterminal(FeatDict, Nonterminal):
      """A feature structure that's also a nonterminal.  It acts as its
      own symbol, and automatically freezes itself when hashed."""
+
      def __hash__(self):
          self.freeze()
          return FeatStruct.__hash__(self)
+
      def symbol(self):
          return self
  
+
  def is_nonterminal(item):
      """
      :return: True if the item is a ``Nonterminal``.
@@ -234,6 +237,7 @@ def is_nonterminal(item):
  # Terminals
  #################################################################
  
+
  def is_terminal(item):
      """
      Return True if the item is a terminal, which currently is
@@ -241,15 +245,16 @@ def is_terminal(item):
  
      :rtype: bool
      """
-    return hasattr(item, '__hash__') and not isinstance(item, Nonterminal)
+    return hasattr(item, "__hash__") and not isinstance(item, Nonterminal)
  
  
  #################################################################
  # Productions
  #################################################################
  
+
  @total_ordering
-@python_2_unicode_compatible
+
  class Production(object):
      """
      A grammar production.  Each production maps a single symbol
@@ -279,9 +284,10 @@ class Production(object):
          :param rhs: The right-hand side of the new ``Production``.
          :type rhs: sequence(Nonterminal and terminal)
          """
-        if isinstance(rhs, string_types):
-            raise TypeError('production right hand side should be a list, '
-                            'not a string')
+        if isinstance(rhs, str):
+            raise TypeError(
+                "production right hand side should be a list, " "not a string"
+            )
          self._lhs = lhs
          self._rhs = tuple(rhs)
          self._hash = hash((self._lhs, self._rhs))
@@ -332,8 +338,8 @@ class Production(object):
  
          :rtype: str
          """
-        result = '%s -> ' % unicode_repr(self._lhs)
-        result += " ".join(unicode_repr(el) for el in self._rhs)
+        result = "%s -> " % repr(self._lhs)
+        result += " ".join(repr(el) for el in self._rhs)
          return result
  
      def __repr__(self):
@@ -342,7 +348,7 @@ class Production(object):
  
          :rtype: str
          """
-        return '%s' % self
+        return "%s" % self
  
      def __eq__(self, other):
          """
@@ -350,9 +356,11 @@ class Production(object):
  
          :rtype: bool
          """
-        return (type(self) == type(other) and
-                self._lhs == other._lhs and
-                self._rhs == other._rhs)
+        return (
+            type(self) == type(other)
+            and self._lhs == other._lhs
+            and self._rhs == other._rhs
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -371,25 +379,26 @@ class Production(object):
          return self._hash
  
  
-@python_2_unicode_compatible
+
  class DependencyProduction(Production):
      """
      A dependency grammar production.  Each production maps a single
      head word to an unordered list of one or more modifier words.
      """
+
      def __str__(self):
          """
          Return a verbose string representation of the ``DependencyProduction``.
  
          :rtype: str
          """
-        result = '\'%s\' ->' % (self._lhs,)
+        result = "'%s' ->" % (self._lhs,)
          for elt in self._rhs:
-            result += ' \'%s\'' % (elt,)
+            result += " '%s'" % (elt,)
          return result
  
  
-@python_2_unicode_compatible
+
  class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):
      """
      A probabilistic context free grammar production.
@@ -401,6 +410,7 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):
  
      :see: ``Production``
      """
+
      def __init__(self, lhs, rhs, **prob):
          """
          Construct a new ``ProbabilisticProduction``.
@@ -415,14 +425,17 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):
          Production.__init__(self, lhs, rhs)
  
      def __str__(self):
-        return Production.__unicode__(self) + \
-            (' [1.0]' if (self.prob() == 1.0) else ' [%g]' % self.prob())
+        return super().__str__() + (
+            " [1.0]" if (self.prob() == 1.0) else " [%g]" % self.prob()
+        )
  
      def __eq__(self, other):
-        return (type(self) == type(other) and
-                self._lhs == other._lhs and
-                self._rhs == other._rhs and
-                self.prob() == other.prob())
+        return (
+            type(self) == type(other)
+            and self._lhs == other._lhs
+            and self._rhs == other._rhs
+            and self.prob() == other.prob()
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -430,11 +443,13 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn):
      def __hash__(self):
          return hash((self._lhs, self._rhs, self.prob()))
  
+
  #################################################################
  # Grammars
  #################################################################
  
-@python_2_unicode_compatible
+
+
  class CFG(object):
      """
      A context-free grammar.  A grammar consists of a start state and
@@ -444,6 +459,7 @@ class CFG(object):
      If you need efficient key-based access to productions, you
      can use a subclass to implement it.
      """
+
      def __init__(self, start, productions, calculate_leftcorners=True):
          """
          Create a new context-free grammar, from the given start state
@@ -458,8 +474,10 @@ class CFG(object):
          :type calculate_leftcorners: bool
          """
          if not is_nonterminal(start):
-            raise TypeError("start should be a Nonterminal object,"
-                            " not a %s" % type(start).__name__)
+            raise TypeError(
+                "start should be a Nonterminal object,"
+                " not a %s" % type(start).__name__
+            )
  
          self._start = start
          self._productions = productions
@@ -496,8 +514,12 @@ class CFG(object):
  
      def _calculate_leftcorners(self):
          # Calculate leftcorner relations, for use in optimized parsing.
-        self._immediate_leftcorner_categories = dict((cat, set([cat])) for cat in self._categories)
-        self._immediate_leftcorner_words = dict((cat, set()) for cat in self._categories)
+        self._immediate_leftcorner_categories = dict(
+            (cat, set([cat])) for cat in self._categories
+        )
+        self._immediate_leftcorner_words = dict(
+            (cat, set()) for cat in self._categories
+        )
          for prod in self.productions():
              if len(prod) > 0:
                  cat, left = prod.lhs(), prod.rhs()[0]
@@ -510,7 +532,9 @@ class CFG(object):
          self._leftcorners = lc
          self._leftcorner_parents = invert_graph(lc)
  
-        nr_leftcorner_categories = sum(map(len, self._immediate_leftcorner_categories.values()))
+        nr_leftcorner_categories = sum(
+            map(len, self._immediate_leftcorner_categories.values())
+        )
          nr_leftcorner_words = sum(map(len, self._immediate_leftcorner_words.values()))
          if nr_leftcorner_words > nr_leftcorner_categories > 10000:
              # If the grammar is big, the leftcorner-word dictionary will be too large.
@@ -528,13 +552,14 @@ class CFG(object):
      @classmethod
      def fromstring(cls, input, encoding=None):
          """
-        Return the ``CFG`` corresponding to the input string(s).
+        Return the grammar instance corresponding to the input string(s).
  
          :param input: a grammar, either in the form of a string or as a list of strings.
          """
-        start, productions = read_grammar(input, standard_nonterm_parser,
-                                          encoding=encoding)
-        return CFG(start, productions)
+        start, productions = read_grammar(
+            input, standard_nonterm_parser, encoding=encoding
+        )
+        return cls(start, productions)
  
      def start(self):
          """
@@ -559,8 +584,9 @@ class CFG(object):
          :rtype: list(Production)
          """
          if rhs and empty:
-            raise ValueError("You cannot select empty and non-empty "
-                             "productions at the same time.")
+            raise ValueError(
+                "You cannot select empty and non-empty " "productions at the same time."
+            )
  
          # no constraints so return everything
          if not lhs and not rhs:
@@ -584,8 +610,11 @@ class CFG(object):
  
          # intersect
          else:
-            return [prod for prod in self._lhs_index.get(lhs, [])
-                    if prod in self._rhs_index.get(rhs, [])]
+            return [
+                prod
+                for prod in self._lhs_index.get(lhs, [])
+                if prod in self._rhs_index.get(rhs, [])
+            ]
  
      def leftcorners(self, cat):
          """
@@ -618,8 +647,10 @@ class CFG(object):
          elif self._leftcorner_words:
              return left in self._leftcorner_words.get(cat, set())
          else:
-            return any(left in self._immediate_leftcorner_words.get(parent, set())
-                       for parent in self.leftcorners(cat))
+            return any(
+                left in self._immediate_leftcorner_words.get(parent, set())
+                for parent in self.leftcorners(cat)
+            )
  
      def leftcorner_parents(self, cat):
          """
@@ -640,12 +671,12 @@ class CFG(object):
  
          :type tokens: list(str)
          """
-        missing = [tok for tok in tokens
-                   if not self._lexical_index.get(tok)]
+        missing = [tok for tok in tokens if not self._lexical_index.get(tok)]
          if missing:
-            missing = ', '.join('%r' % (w,) for w in missing)
-            raise ValueError("Grammar does not cover some of the "
-                             "input words: %r." % missing)
+            missing = ", ".join("%r" % (w,) for w in missing)
+            raise ValueError(
+                "Grammar does not cover some of the " "input words: %r." % missing
+            )
  
      def _calculate_grammar_forms(self):
          """
@@ -653,12 +684,10 @@ class CFG(object):
          """
          prods = self._productions
          self._is_lexical = all(p.is_lexical() for p in prods)
-        self._is_nonlexical = all(p.is_nonlexical() for p in prods
-                                  if len(p) != 1)
+        self._is_nonlexical = all(p.is_nonlexical() for p in prods if len(p) != 1)
          self._min_len = min(len(p) for p in prods)
          self._max_len = max(len(p) for p in prods)
-        self._all_unary_are_lexical = all(p.is_lexical() for p in prods
-                                          if len(p) == 1)
+        self._all_unary_are_lexical = all(p.is_lexical() for p in prods if len(p) == 1)
  
      def is_lexical(self):
          """
@@ -716,17 +745,122 @@ class CFG(object):
          Return True if the grammar is of Chomsky Normal Form, i.e. all productions
          are of the form A -> B C, or A -> "s".
          """
-        return (self.is_flexible_chomsky_normal_form() and
-                self._all_unary_are_lexical)
+        return self.is_flexible_chomsky_normal_form() and self._all_unary_are_lexical
+
+    def chomsky_normal_form(self, new_token_padding="@$@", flexible=False):
+        """
+        Returns a new Grammer that is in chomsky normal
+        :param: new_token_padding
+            Customise new rule formation during binarisation
+        """
+        if self.is_chomsky_normal_form():
+            return self
+        if self.productions(empty=True):
+            raise ValueError(
+                ("Grammar has Empty rules. " "Cannot deal with them at the moment")
+            )
+
+        # check for mixed rules
+        for rule in self.productions():
+            if rule.is_lexical() and len(rule.rhs()) > 1:
+                raise ValueError(
+                    "Cannot handled mixed rule {} => {}".format(rule.lhs(), rule.rhs())
+                )
+
+        step1 = CFG.eliminate_start(self)
+        step2 = CFG.binarize(step1, new_token_padding)
+        if flexible:
+            return step2
+        step3 = CFG.remove_unitary_rules(step2)
+        return step3
+
+    @classmethod
+    def remove_unitary_rules(cls, grammar):
+        """
+        Remove nonlexical unitary rules and convert them to
+        lexical
+        """
+        result = []
+        unitary = []
+        for rule in grammar.productions():
+            if len(rule) == 1 and rule.is_nonlexical():
+                unitary.append(rule)
+            else:
+                result.append(rule)
+
+        while unitary:
+            rule = unitary.pop(0)
+            for item in grammar.productions(lhs=rule.rhs()[0]):
+                new_rule = Production(rule.lhs(), item.rhs())
+                if len(new_rule) != 1 or new_rule.is_lexical():
+                    result.append(new_rule)
+                else:
+                    unitary.append(new_rule)
+
+        n_grammar = CFG(grammar.start(), result)
+        return n_grammar
+
+    @classmethod
+    def binarize(cls, grammar, padding="@$@"):
+        """
+        Convert all non-binary rules into binary by introducing
+        new tokens.
+        Example::
+        Original:
+            A => B C D
+        After Conversion:
+            A => B A@$@B
+            A@$@B => C D
+        """
+        result = []
+
+        for rule in grammar.productions():
+            if len(rule.rhs()) > 2:
+                # this rule needs to be broken down
+                left_side = rule.lhs()
+                for k in range(0, len(rule.rhs()) - 2):
+                    tsym = rule.rhs()[k]
+                    new_sym = Nonterminal(left_side.symbol() + padding + tsym.symbol())
+                    new_production = Production(left_side, (tsym, new_sym))
+                    left_side = new_sym
+                    result.append(new_production)
+                last_prd = Production(left_side, rule.rhs()[-2:])
+                result.append(last_prd)
+            else:
+                result.append(rule)
+
+        n_grammar = CFG(grammar.start(), result)
+        return n_grammar
+
+    @classmethod
+    def eliminate_start(cls, grammar):
+        """
+        Eliminate start rule in case it appears on RHS
+        Example: S -> S0 S1 and S0 -> S1 S
+        Then another rule S0_Sigma -> S is added
+        """
+        start = grammar.start()
+        result = []
+        need_to_add = None
+        for rule in grammar.productions():
+            if start in rule.rhs():
+                need_to_add = True
+            result.append(rule)
+        if need_to_add:
+            start = Nonterminal("S0_SIGMA")
+            result.append(Production(start, [grammar.start()]))
+            n_grammar = CFG(start, result)
+            return n_grammar
+        return grammar
  
      def __repr__(self):
-        return '<Grammar with %d productions>' % len(self._productions)
+        return "<Grammar with %d productions>" % len(self._productions)
  
      def __str__(self):
-        result = 'Grammar with %d productions' % len(self._productions)
-        result += ' (start state = %r)' % self._start
+        result = "Grammar with %d productions" % len(self._productions)
+        result += " (start state = %r)" % self._start
          for production in self._productions:
-            result += '\n    %s' % production
+            result += "\n    %s" % production
          return result
  
  
@@ -740,6 +874,7 @@ class FeatureGrammar(CFG):
      productions.  The set of terminals and nonterminals
      is implicitly specified by the productions.
      """
+
      def __init__(self, start, productions):
          """
          Create a new feature-based grammar, from the given start
@@ -786,10 +921,11 @@ class FeatureGrammar(CFG):
                      self._lexical_index.setdefault(token, set()).add(prod)
  
      @classmethod
-    def fromstring(cls, input, features=None, logic_parser=None, fstruct_reader=None,
-               encoding=None):
+    def fromstring(
+        cls, input, features=None, logic_parser=None, fstruct_reader=None, encoding=None
+    ):
          """
-        Return a feature structure based ``FeatureGrammar``.
+        Return a feature structure based grammar.
  
          :param input: a grammar, either in the form of a string or else
          as a list of strings.
@@ -803,16 +939,18 @@ class FeatureGrammar(CFG):
              features = (SLASH, TYPE)
  
          if fstruct_reader is None:
-            fstruct_reader = FeatStructReader(features, FeatStructNonterminal,
-                                              logic_parser=logic_parser)
+            fstruct_reader = FeatStructReader(
+                features, FeatStructNonterminal, logic_parser=logic_parser
+            )
          elif logic_parser is not None:
-            raise Exception('\'logic_parser\' and \'fstruct_reader\' must '
-                            'not both be set')
-
-        start, productions = read_grammar(input, fstruct_reader.read_partial,
-                                          encoding=encoding)
-        return FeatureGrammar(start, productions)
+            raise Exception(
+                "'logic_parser' and 'fstruct_reader' must " "not both be set"
+            )
  
+        start, productions = read_grammar(
+            input, fstruct_reader.read_partial, encoding=encoding
+        )
+        return cls(start, productions)
  
      def productions(self, lhs=None, rhs=None, empty=False):
          """
@@ -826,8 +964,9 @@ class FeatureGrammar(CFG):
          :rtype: list(Production)
          """
          if rhs and empty:
-            raise ValueError("You cannot select empty and non-empty "
-                             "productions at the same time.")
+            raise ValueError(
+                "You cannot select empty and non-empty " "productions at the same time."
+            )
  
          # no constraints so return everything
          if not lhs and not rhs:
@@ -849,8 +988,11 @@ class FeatureGrammar(CFG):
  
          # intersect
          else:
-            return [prod for prod in self._lhs_index.get(self._get_type_if_possible(lhs), [])
-                    if prod in self._rhs_index.get(self._get_type_if_possible(rhs), [])]
+            return [
+                prod
+                for prod in self._lhs_index.get(self._get_type_if_possible(lhs), [])
+                if prod in self._rhs_index.get(self._get_type_if_possible(rhs), [])
+            ]
  
      def leftcorners(self, cat):
          """
@@ -876,20 +1018,22 @@ class FeatureGrammar(CFG):
          else:
              return item
  
+
  @total_ordering
-@python_2_unicode_compatible
+
  class FeatureValueType(object):
      """
      A helper class for ``FeatureGrammars``, designed to be different
      from ordinary strings.  This is to stop the ``FeatStruct``
      ``FOO[]`` from being compare equal to the terminal "FOO".
      """
+
      def __init__(self, value):
          self._value = value
          self._hash = hash(value)
  
      def __repr__(self):
-        return '<%s>' % self._value
+        return "<%s>" % self._value
  
      def __eq__(self, other):
          return type(self) == type(other) and self._value == other._value
@@ -906,13 +1050,14 @@ class FeatureValueType(object):
          return self._hash
  
  
-@python_2_unicode_compatible
+
  class DependencyGrammar(object):
      """
      A dependency grammar.  A DependencyGrammar consists of a set of
      productions.  Each production specifies a head/modifier relationship
      between a pair of words.
      """
+
      def __init__(self, productions):
          """
          Create a new dependency grammar, from the set of ``Productions``.
@@ -925,15 +1070,17 @@ class DependencyGrammar(object):
      @classmethod
      def fromstring(cls, input):
          productions = []
-        for linenum, line in enumerate(input.split('\n')):
+        for linenum, line in enumerate(input.split("\n")):
              line = line.strip()
-            if line.startswith('#') or line=='': continue
-            try: productions += _read_dependency_production(line)
+            if line.startswith("#") or line == "":
+                continue
+            try:
+                productions += _read_dependency_production(line)
              except ValueError:
-                raise ValueError('Unable to parse line %s: %s' % (linenum, line))
+                raise ValueError("Unable to parse line %s: %s" % (linenum, line))
          if len(productions) == 0:
-            raise ValueError('No productions found!')
-        return DependencyGrammar(productions)
+            raise ValueError("No productions found!")
+        return cls(productions)
  
      def contains(self, head, mod):
          """
@@ -948,7 +1095,7 @@ class DependencyGrammar(object):
          """
          for production in self._productions:
              for possibleMod in production._rhs:
-                if(production._lhs == head and possibleMod == mod):
+                if production._lhs == head and possibleMod == mod:
                      return True
          return False
  
@@ -965,7 +1112,7 @@ class DependencyGrammar(object):
          """
          for production in self._productions:
              for possibleMod in production._rhs:
-                if(production._lhs == head and possibleMod == mod):
+                if production._lhs == head and possibleMod == mod:
                      return True
          return False
  
@@ -980,26 +1127,25 @@ class DependencyGrammar(object):
      #                   return True
      #   return False
  
-
      def __str__(self):
          """
          Return a verbose string representation of the ``DependencyGrammar``
  
          :rtype: str
          """
-        str = 'Dependency grammar with %d productions' % len(self._productions)
+        str = "Dependency grammar with %d productions" % len(self._productions)
          for production in self._productions:
-            str += '\n  %s' % production
+            str += "\n  %s" % production
          return str
  
      def __repr__(self):
          """
          Return a concise string representation of the ``DependencyGrammar``
          """
-        return 'Dependency grammar with %d productions' % len(self._productions)
+        return "Dependency grammar with %d productions" % len(self._productions)
+
  
  
-@python_2_unicode_compatible
  class ProbabilisticDependencyGrammar(object):
      """
  
@@ -1023,7 +1169,7 @@ class ProbabilisticDependencyGrammar(object):
          """
          for production in self._productions:
              for possibleMod in production._rhs:
-                if(production._lhs == head and possibleMod == mod):
+                if production._lhs == head and possibleMod == mod:
                      return True
          return False
  
@@ -1033,22 +1179,26 @@ class ProbabilisticDependencyGrammar(object):
  
          :rtype: str
          """
-        str = 'Statistical dependency grammar with %d productions' % len(self._productions)
+        str = "Statistical dependency grammar with %d productions" % len(
+            self._productions
+        )
          for production in self._productions:
-            str += '\n  %s' % production
-        str += '\nEvents:'
+            str += "\n  %s" % production
+        str += "\nEvents:"
          for event in self._events:
-            str += '\n  %d:%s' % (self._events[event], event)
-        str += '\nTags:'
+            str += "\n  %d:%s" % (self._events[event], event)
+        str += "\nTags:"
          for tag_word in self._tags:
-            str += '\n %s:\t(%s)' % (tag_word, self._tags[tag_word])
+            str += "\n %s:\t(%s)" % (tag_word, self._tags[tag_word])
          return str
  
      def __repr__(self):
          """
          Return a concise string representation of the ``ProbabilisticDependencyGrammar``
          """
-        return 'Statistical Dependency grammar with %d productions' % len(self._productions)
+        return "Statistical Dependency grammar with %d productions" % len(
+            self._productions
+        )
  
  
  class PCFG(CFG):
@@ -1070,6 +1220,7 @@ class PCFG(CFG):
          productions with a given left-hand side have probabilities
          that sum to 1.
      """
+
      EPSILON = 0.01
  
      def __init__(self, start, productions, calculate_leftcorners=True):
@@ -1093,26 +1244,24 @@ class PCFG(CFG):
          # Make sure that the probabilities sum to one.
          probs = {}
          for production in productions:
-            probs[production.lhs()] = (probs.get(production.lhs(), 0) +
-                                       production.prob())
+            probs[production.lhs()] = probs.get(production.lhs(), 0) + production.prob()
          for (lhs, p) in probs.items():
-            if not ((1-PCFG.EPSILON) < p <
-                    (1+PCFG.EPSILON)):
+            if not ((1 - PCFG.EPSILON) < p < (1 + PCFG.EPSILON)):
                  raise ValueError("Productions for %r do not sum to 1" % lhs)
  
-
      @classmethod
      def fromstring(cls, input, encoding=None):
          """
-        Return a probabilistic ``PCFG`` corresponding to the
+        Return a probabilistic context-free grammar corresponding to the
          input string(s).
  
          :param input: a grammar, either in the form of a string or else
               as a list of strings.
          """
-        start, productions = read_grammar(input, standard_nonterm_parser,
-                                          probabilistic=True, encoding=encoding)
-        return PCFG(start, productions)
+        start, productions = read_grammar(
+            input, standard_nonterm_parser, probabilistic=True, encoding=encoding
+        )
+        return cls(start, productions)
  
  
  #################################################################
@@ -1121,6 +1270,7 @@ class PCFG(CFG):
  
  # Contributed by Nathan Bodenstab <bodenstab@cslu.ogi.edu>
  
+
  def induce_pcfg(start, productions):
      """
      Induce a PCFG grammar from a list of productions.
@@ -1144,11 +1294,12 @@ def induce_pcfg(start, productions):
  
      for prod in productions:
          lcount[prod.lhs()] = lcount.get(prod.lhs(), 0) + 1
-        pcount[prod]       = pcount.get(prod,       0) + 1
+        pcount[prod] = pcount.get(prod, 0) + 1
  
-    prods = [ProbabilisticProduction(p.lhs(), p.rhs(),
-                                prob=pcount[p] / lcount[p.lhs()])
-             for p in pcount]
+    prods = [
+        ProbabilisticProduction(p.lhs(), p.rhs(), prob=pcount[p] / lcount[p.lhs()])
+        for p in pcount
+    ]
      return PCFG(start, prods)
  
  
@@ -1156,18 +1307,21 @@ def induce_pcfg(start, productions):
  # Helper functions for reading productions
  #################################################################
  
+
  def _read_cfg_production(input):
      """
      Return a list of context-free ``Productions``.
      """
      return _read_production(input, standard_nonterm_parser)
  
+
  def _read_pcfg_production(input):
      """
      Return a list of PCFG ``ProbabilisticProductions``.
      """
      return _read_production(input, standard_nonterm_parser, probabilistic=True)
  
+
  def _read_fcfg_production(input, fstruct_reader):
      """
      Return a list of feature-based ``Productions``.
@@ -1177,10 +1331,11 @@ def _read_fcfg_production(input, fstruct_reader):
  
  # Parsing generic grammars
  
-_ARROW_RE = re.compile(r'\s* -> \s*', re.VERBOSE)
-_PROBABILITY_RE = re.compile(r'( \[ [\d\.]+ \] ) \s*', re.VERBOSE)
+_ARROW_RE = re.compile(r"\s* -> \s*", re.VERBOSE)
+_PROBABILITY_RE = re.compile(r"( \[ [\d\.]+ \] ) \s*", re.VERBOSE)
  _TERMINAL_RE = re.compile(r'( "[^"]+" | \'[^\']+\' ) \s*', re.VERBOSE)
-_DISJUNCTION_RE = re.compile(r'\| \s*', re.VERBOSE)
+_DISJUNCTION_RE = re.compile(r"\| \s*", re.VERBOSE)
+
  
  def _read_production(line, nonterm_parser, probabilistic=False):
      """
@@ -1194,7 +1349,8 @@ def _read_production(line, nonterm_parser, probabilistic=False):
  
      # Skip over the arrow.
      m = _ARROW_RE.match(line, pos)
-    if not m: raise ValueError('Expected an arrow')
+    if not m:
+        raise ValueError("Expected an arrow")
      pos = m.end()
  
      # Parse the right hand side.
@@ -1207,19 +1363,21 @@ def _read_production(line, nonterm_parser, probabilistic=False):
              pos = m.end()
              probabilities[-1] = float(m.group(1)[1:-1])
              if probabilities[-1] > 1.0:
-                raise ValueError('Production probability %f, '
-                                 'should not be greater than 1.0' %
-                                 (probabilities[-1],))
+                raise ValueError(
+                    "Production probability %f, "
+                    "should not be greater than 1.0" % (probabilities[-1],)
+                )
  
          # String -- add terminal.
-        elif line[pos] in "\'\"":
+        elif line[pos] in "'\"":
              m = _TERMINAL_RE.match(line, pos)
-            if not m: raise ValueError('Unterminated string')
+            if not m:
+                raise ValueError("Unterminated string")
              rhsides[-1].append(m.group(1)[1:-1])
              pos = m.end()
  
          # Vertical bar -- start new rhside.
-        elif line[pos] == '|':
+        elif line[pos] == "|":
              m = _DISJUNCTION_RE.match(line, pos)
              probabilities.append(0.0)
              rhsides.append([])
@@ -1231,8 +1389,10 @@ def _read_production(line, nonterm_parser, probabilistic=False):
              rhsides[-1].append(nonterm)
  
      if probabilistic:
-        return [ProbabilisticProduction(lhs, rhs, prob=probability)
-                for (rhs, probability) in zip(rhsides, probabilities)]
+        return [
+            ProbabilisticProduction(lhs, rhs, prob=probability)
+            for (rhs, probability) in zip(rhsides, probabilities)
+        ]
      else:
          return [Production(lhs, rhs) for rhs in rhsides]
  
@@ -1241,6 +1401,7 @@ def _read_production(line, nonterm_parser, probabilistic=False):
  # Reading Phrase Structure Grammars
  #################################################################
  
+
  def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None):
      """
      Return a pair consisting of a starting category and a list of
@@ -1258,49 +1419,51 @@ def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None):
      """
      if encoding is not None:
          input = input.decode(encoding)
-    if isinstance(input, string_types):
-        lines = input.split('\n')
+    if isinstance(input, str):
+        lines = input.split("\n")
      else:
          lines = input
  
      start = None
      productions = []
-    continue_line = ''
+    continue_line = ""
      for linenum, line in enumerate(lines):
          line = continue_line + line.strip()
-        if line.startswith('#') or line=='': continue
-        if line.endswith('\\'):
-            continue_line = line[:-1].rstrip()+' '
+        if line.startswith("#") or line == "":
+            continue
+        if line.endswith("\\"):
+            continue_line = line[:-1].rstrip() + " "
              continue
-        continue_line = ''
+        continue_line = ""
          try:
-            if line[0] == '%':
+            if line[0] == "%":
                  directive, args = line[1:].split(None, 1)
-                if directive == 'start':
+                if directive == "start":
                      start, pos = nonterm_parser(args, 0)
                      if pos != len(args):
-                        raise ValueError('Bad argument to start directive')
+                        raise ValueError("Bad argument to start directive")
                  else:
-                    raise ValueError('Bad directive')
+                    raise ValueError("Bad directive")
              else:
                  # expand out the disjunctions on the RHS
                  productions += _read_production(line, nonterm_parser, probabilistic)
          except ValueError as e:
-            raise ValueError('Unable to parse line %s: %s\n%s' %
-                             (linenum+1, line, e))
+            raise ValueError("Unable to parse line %s: %s\n%s" % (linenum + 1, line, e))
  
      if not productions:
-        raise ValueError('No productions found!')
+        raise ValueError("No productions found!")
      if not start:
          start = productions[0].lhs()
      return (start, productions)
  
-_STANDARD_NONTERM_RE = re.compile('( [\w/][\w/^<>-]* ) \s*', re.VERBOSE)
+
+_STANDARD_NONTERM_RE = re.compile("( [\w/][\w/^<>-]* ) \s*", re.VERBOSE)
+
  
  def standard_nonterm_parser(string, pos):
      m = _STANDARD_NONTERM_RE.match(string, pos)
-    if not m: raise ValueError('Expected a nonterminal, found: '
-                               + string[pos:])
+    if not m:
+        raise ValueError("Expected a nonterminal, found: " + string[pos:])
      return (Nonterminal(m.group(1)), m.end())
  
  
@@ -1308,7 +1471,8 @@ def standard_nonterm_parser(string, pos):
  # Reading Dependency Grammars
  #################################################################
  
-_READ_DG_RE = re.compile(r'''^\s*                # leading whitespace
+_READ_DG_RE = re.compile(
+    r"""^\s*                # leading whitespace
                                ('[^']+')\s*        # single-quoted lhs
                                (?:[-=]+>)\s*        # arrow
                                (?:(                 # rhs:
@@ -1317,22 +1481,24 @@ _READ_DG_RE = re.compile(r'''^\s*                # leading whitespace
                                   | \|              # disjunction
                                   )
                                   \s*)              # trailing space
-                                 *$''',            # zero or more copies
-                             re.VERBOSE)
-_SPLIT_DG_RE = re.compile(r'''('[^']'|[-=]+>|"[^"]+"|'[^']+'|\|)''')
+                                 *$""",  # zero or more copies
+    re.VERBOSE,
+)
+_SPLIT_DG_RE = re.compile(r"""('[^']'|[-=]+>|"[^"]+"|'[^']+'|\|)""")
+
  
  def _read_dependency_production(s):
      if not _READ_DG_RE.match(s):
-        raise ValueError('Bad production string')
+        raise ValueError("Bad production string")
      pieces = _SPLIT_DG_RE.split(s)
-    pieces = [p for i,p in enumerate(pieces) if i%2==1]
-    lhside = pieces[0].strip('\'\"')
+    pieces = [p for i, p in enumerate(pieces) if i % 2 == 1]
+    lhside = pieces[0].strip("'\"")
      rhsides = [[]]
      for piece in pieces[2:]:
-        if piece == '|':
+        if piece == "|":
              rhsides.append([])
          else:
-            rhsides[-1].append(piece.strip('\'\"'))
+            rhsides[-1].append(piece.strip("'\""))
      return [DependencyProduction(lhside, rhside) for rhside in rhsides]
  
  
@@ -1340,6 +1506,7 @@ def _read_dependency_production(s):
  # Demonstration
  #################################################################
  
+
  def cfg_demo():
      """
      A demonstration showing how ``CFGs`` can be created and used.
@@ -1348,18 +1515,19 @@ def cfg_demo():
      from nltk import nonterminals, Production, CFG
  
      # Create some nonterminals
-    S, NP, VP, PP = nonterminals('S, NP, VP, PP')
-    N, V, P, Det = nonterminals('N, V, P, Det')
-    VP_slash_NP = VP/NP
+    S, NP, VP, PP = nonterminals("S, NP, VP, PP")
+    N, V, P, Det = nonterminals("N, V, P, Det")
+    VP_slash_NP = VP / NP
  
-    print('Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP])
-    print('    S.symbol() =>', repr(S.symbol()))
+    print("Some nonterminals:", [S, NP, VP, PP, N, V, P, Det, VP / NP])
+    print("    S.symbol() =>", repr(S.symbol()))
      print()
  
      print(Production(S, [NP]))
  
      # Create some Grammar Productions
-    grammar = CFG.fromstring("""
+    grammar = CFG.fromstring(
+        """
        S -> NP VP
        PP -> P NP
        NP -> Det N | NP PP
@@ -1368,16 +1536,19 @@ def cfg_demo():
        N -> 'dog' | 'cat'
        V -> 'chased' | 'sat'
        P -> 'on' | 'in'
-    """)
+    """
+    )
  
-    print('A Grammar:', repr(grammar))
-    print('    grammar.start()       =>', repr(grammar.start()))
-    print('    grammar.productions() =>', end=' ')
+    print("A Grammar:", repr(grammar))
+    print("    grammar.start()       =>", repr(grammar.start()))
+    print("    grammar.productions() =>", end=" ")
      # Use string.replace(...) is to line-wrap the output.
-    print(repr(grammar.productions()).replace(',', ',\n'+' '*25))
+    print(repr(grammar.productions()).replace(",", ",\n" + " " * 25))
      print()
  
-toy_pcfg1 = PCFG.fromstring("""
+
+toy_pcfg1 = PCFG.fromstring(
+    """
      S -> NP VP [1.0]
      NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
      Det -> 'the' [0.8] | 'my' [0.2]
@@ -1386,9 +1557,11 @@ toy_pcfg1 = PCFG.fromstring("""
      V -> 'ate' [0.35] | 'saw' [0.65]
      PP -> P NP [1.0]
      P -> 'with' [0.61] | 'under' [0.39]
-    """)
+    """
+)
  
-toy_pcfg2 = PCFG.fromstring("""
+toy_pcfg2 = PCFG.fromstring(
+    """
      S    -> NP VP         [1.0]
      VP   -> V NP          [.59]
      VP   -> V             [.40]
@@ -1412,7 +1585,9 @@ toy_pcfg2 = PCFG.fromstring("""
      Det  -> 'the'         [.41]
      Det  -> 'a'           [.31]
      Det  -> 'my'          [.28]
-    """)
+    """
+)
+
  
  def pcfg_demo():
      """
@@ -1427,18 +1602,18 @@ def pcfg_demo():
      pcfg_prods = toy_pcfg1.productions()
  
      pcfg_prod = pcfg_prods[2]
-    print('A PCFG production:', repr(pcfg_prod))
-    print('    pcfg_prod.lhs()  =>', repr(pcfg_prod.lhs()))
-    print('    pcfg_prod.rhs()  =>', repr(pcfg_prod.rhs()))
-    print('    pcfg_prod.prob() =>', repr(pcfg_prod.prob()))
+    print("A PCFG production:", repr(pcfg_prod))
+    print("    pcfg_prod.lhs()  =>", repr(pcfg_prod.lhs()))
+    print("    pcfg_prod.rhs()  =>", repr(pcfg_prod.rhs()))
+    print("    pcfg_prod.prob() =>", repr(pcfg_prod.prob()))
      print()
  
      grammar = toy_pcfg2
-    print('A PCFG grammar:', repr(grammar))
-    print('    grammar.start()       =>', repr(grammar.start()))
-    print('    grammar.productions() =>', end=' ')
+    print("A PCFG grammar:", repr(grammar))
+    print("    grammar.start()       =>", repr(grammar.start()))
+    print("    grammar.productions() =>", end=" ")
      # Use .replace(...) is to line-wrap the output.
-    print(repr(grammar.productions()).replace(',', ',\n'+' '*26))
+    print(repr(grammar.productions()).replace(",", ",\n" + " " * 26))
      print()
  
      # extract productions from three trees and induce the PCFG
@@ -1448,12 +1623,12 @@ def pcfg_demo():
      item = treebank._fileids[0]
      for tree in treebank.parsed_sents(item)[:3]:
          # perform optional tree transformations, e.g.:
-        tree.collapse_unary(collapsePOS = False)
-        tree.chomsky_normal_form(horzMarkov = 2)
+        tree.collapse_unary(collapsePOS=False)
+        tree.chomsky_normal_form(horzMarkov=2)
  
          productions += tree.productions()
  
-    S = Nonterminal('S')
+    S = Nonterminal("S")
      grammar = induce_pcfg(S, productions)
      print(grammar)
      print()
@@ -1464,31 +1639,37 @@ def pcfg_demo():
      parser.trace(3)
  
      # doesn't work as tokens are different:
-    #sent = treebank.tokenized('wsj_0001.mrg')[0]
+    # sent = treebank.tokenized('wsj_0001.mrg')[0]
  
      sent = treebank.parsed_sents(item)[0].leaves()
      print(sent)
      for parse in parser.parse(sent):
          print(parse)
  
+
  def fcfg_demo():
      import nltk.data
-    g = nltk.data.load('grammars/book_grammars/feat0.fcfg')
+
+    g = nltk.data.load("grammars/book_grammars/feat0.fcfg")
      print(g)
      print()
  
+
  def dg_demo():
      """
      A demonstration showing the creation and inspection of a
      ``DependencyGrammar``.
      """
-    grammar = DependencyGrammar.fromstring("""
+    grammar = DependencyGrammar.fromstring(
+        """
      'scratch' -> 'cats' | 'walls'
      'walls' -> 'the'
      'cats' -> 'the'
-    """)
+    """
+    )
      print(grammar)
  
+
  def sdg_demo():
      """
      A demonstration of how to read a string representation of
@@ -1496,7 +1677,8 @@ def sdg_demo():
      """
      from nltk.parse import DependencyGraph
  
-    dg = DependencyGraph("""
+    dg = DependencyGraph(
+        """
      1   Ze                ze                Pron  Pron  per|3|evofmv|nom                 2   su      _  _
      2   had               heb               V     V     trans|ovt|1of2of3|ev             0   ROOT    _  _
      3   met               met               Prep  Prep  voor                             8   mod     _  _
@@ -1510,10 +1692,12 @@ def sdg_demo():
      11  of                of                Conj  Conj  neven                            7   vc      _  _
      12  terrassen         terras            N     N     soort|mv|neut                    11  cnj     _  _
      13  .                 .                 Punc  Punc  punt                             12  punct   _  _
-    """)
+    """
+    )
      tree = dg.tree()
      print(tree.pprint())
  
+
  def demo():
      cfg_demo()
      pcfg_demo()
@@ -1521,12 +1705,20 @@ def demo():
      dg_demo()
      sdg_demo()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
  
-__all__ = ['Nonterminal', 'nonterminals',
-           'CFG', 'Production',
-          'PCFG', 'ProbabilisticProduction',
-          'DependencyGrammar', 'DependencyProduction',
-           'ProbabilisticDependencyGrammar',
-          'induce_pcfg', 'read_grammar']
+__all__ = [
+    "Nonterminal",
+    "nonterminals",
+    "CFG",
+    "Production",
+    "PCFG",
+    "ProbabilisticProduction",
+    "DependencyGrammar",
+    "DependencyProduction",
+    "ProbabilisticDependencyGrammar",
+    "induce_pcfg",
+    "read_grammar",
+]
diff --git a/nlp_resource_data/nltk/grammar.pyc b/nlp_resource_data/nltk/grammar.pyc

deleted file mode 100755 (executable)

index 82d8d7a..0000000

Binary files a/nlp_resource_data/nltk/grammar.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/help.py b/nlp_resource_data/nltk/help.py

old mode 100755 (executable)

new mode 100644 (file)

index 2d2f516..8b292d6
--- a/nlp_resource_data/nltk/help.py
+++ b/nlp_resource_data/nltk/help.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit (NLTK) Help
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -8,33 +8,40 @@
  """
  Provide structured access to documentation.
  """
-from __future__ import print_function
  
  import re
  from textwrap import wrap
  
  from nltk.data import load
  
+
  def brown_tagset(tagpattern=None):
      _format_tagset("brown_tagset", tagpattern)
  
+
  def claws5_tagset(tagpattern=None):
      _format_tagset("claws5_tagset", tagpattern)
  
+
  def upenn_tagset(tagpattern=None):
      _format_tagset("upenn_tagset", tagpattern)
  
+
  #####################################################################
  # UTILITIES
  #####################################################################
  
+
  def _print_entries(tags, tagdict):
      for tag in tags:
          entry = tagdict[tag]
          defn = [tag + ": " + entry[0]]
-        examples = wrap(entry[1], width=75, initial_indent='    ', subsequent_indent='    ')
+        examples = wrap(
+            entry[1], width=75, initial_indent="    ", subsequent_indent="    "
+        )
          print("\n".join(defn + examples))
  
+
  def _format_tagset(tagset, tagpattern=None):
      tagdict = load("help/tagsets/" + tagset + ".pickle")
      if not tagpattern:
@@ -49,8 +56,9 @@ def _format_tagset(tagset, tagpattern=None):
          else:
              print("No matching tags found.")
  
-if __name__ == '__main__':
-    brown_tagset(r'NN.*')
-    upenn_tagset(r'.*\$')
-    claws5_tagset('UNDEFINED')
-    brown_tagset(r'NN')
+
+if __name__ == "__main__":
+    brown_tagset(r"NN.*")
+    upenn_tagset(r".*\$")
+    claws5_tagset("UNDEFINED")
+    brown_tagset(r"NN")
diff --git a/nlp_resource_data/nltk/help.pyc b/nlp_resource_data/nltk/help.pyc

deleted file mode 100755 (executable)

index cac98ea..0000000

Binary files a/nlp_resource_data/nltk/help.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/__init__.py b/nlp_resource_data/nltk/inference/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 94581e5..fdd653d
--- a/nlp_resource_data/nltk/inference/__init__.py
+++ b/nlp_resource_data/nltk/inference/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Inference
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #         Ewan Klein <ewan@inf.ed.ac.uk>
  #
@@ -16,5 +16,9 @@ from nltk.inference.mace import Mace, MaceCommand
  from nltk.inference.prover9 import Prover9, Prover9Command
  from nltk.inference.resolution import ResolutionProver, ResolutionProverCommand
  from nltk.inference.tableau import TableauProver, TableauProverCommand
-from nltk.inference.discourse import (ReadingCommand, CfgReadingCommand,
-                       DrtGlueReadingCommand, DiscourseTester)
+from nltk.inference.discourse import (
+    ReadingCommand,
+    CfgReadingCommand,
+    DrtGlueReadingCommand,
+    DiscourseTester,
+)
diff --git a/nlp_resource_data/nltk/inference/__init__.pyc b/nlp_resource_data/nltk/inference/__init__.pyc

deleted file mode 100755 (executable)

index 0be4dc4..0000000

Binary files a/nlp_resource_data/nltk/inference/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..abbb072

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..478b83d

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/discourse.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/discourse.cpython-37.pyc

new file mode 100644 (file)

index 0000000..db0b098

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/discourse.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/mace.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/mace.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3dfabb3

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/mace.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc

new file mode 100644 (file)

index 0000000..24af18a

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/prover9.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/prover9.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1c27213

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/prover9.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/resolution.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/resolution.cpython-37.pyc

new file mode 100644 (file)

index 0000000..21f4fd2

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/resolution.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/__pycache__/tableau.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/tableau.cpython-37.pyc

new file mode 100644 (file)

index 0000000..525035f

Binary files /dev/null and b/nlp_resource_data/nltk/inference/__pycache__/tableau.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/inference/api.py b/nlp_resource_data/nltk/inference/api.py

old mode 100755 (executable)

new mode 100644 (file)

index ca03a96..3135e1b
--- a/nlp_resource_data/nltk/inference/api.py
+++ b/nlp_resource_data/nltk/inference/api.py
@@ -17,20 +17,18 @@ the model builder tries to build a model for the assumptions. Given a set of ass
  goal *G*, the model builder tries to find a counter-model, in the sense of a model that will satisfy
  the assumptions plus the negation of *G*.
  """
-from __future__ import print_function
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
+from abc import ABCMeta, abstractmethod
  import threading
  import time
  
  
-@add_metaclass(ABCMeta)
-class Prover(object):
+class Prover(metaclass=ABCMeta):
      """
      Interface for trying to prove a goal from assumptions.  Both the goal and
      the assumptions are constrained to be formulas of ``logic.Expression``.
      """
+
      def prove(self, goal=None, assumptions=None, verbose=False):
          """
          :return: Whether the proof was successful or not.
@@ -46,14 +44,14 @@ class Prover(object):
          """
  
  
-@add_metaclass(ABCMeta)
-class ModelBuilder(object):
+class ModelBuilder(metaclass=ABCMeta):
      """
      Interface for trying to build a model of set of formulas.
      Open formulas are assumed to be universally quantified.
      Both the goal and the assumptions are constrained to be formulas
      of ``logic.Expression``.
      """
+
      def build_model(self, goal=None, assumptions=None, verbose=False):
          """
          Perform the actual model building.
@@ -71,12 +69,12 @@ class ModelBuilder(object):
          """
  
  
-@add_metaclass(ABCMeta)
-class TheoremToolCommand(object):
+class TheoremToolCommand(metaclass=ABCMeta):
      """
      This class holds a goal and a list of assumptions to be used in proving
      or model building.
      """
+
      @abstractmethod
      def add_assumptions(self, new_assumptions):
          """
@@ -126,6 +124,7 @@ class ProverCommand(TheoremToolCommand):
      This class holds a ``Prover``, a goal, and a list of assumptions.  When
      prove() is called, the ``Prover`` is executed with the goal and assumptions.
      """
+
      @abstractmethod
      def prove(self, verbose=False):
          """
@@ -154,6 +153,7 @@ class ModelBuilderCommand(TheoremToolCommand):
      When build_model() is called, the ``ModelBuilder`` is executed with the goal
      and assumptions.
      """
+
      @abstractmethod
      def build_model(self, verbose=False):
          """
@@ -184,6 +184,7 @@ class BaseTheoremToolCommand(TheoremToolCommand):
      This class holds a goal and a list of assumptions to be used in proving
      or model building.
      """
+
      def __init__(self, goal=None, assumptions=None):
          """
          :param goal: Input expression to prove
@@ -261,6 +262,7 @@ class BaseProverCommand(BaseTheoremToolCommand, ProverCommand):
      This class holds a ``Prover``, a goal, and a list of assumptions.  When
      prove() is called, the ``Prover`` is executed with the goal and assumptions.
      """
+
      def __init__(self, prover, goal=None, assumptions=None):
          """
          :param prover: The theorem tool to execute with the assumptions
@@ -280,9 +282,9 @@ class BaseProverCommand(BaseTheoremToolCommand, ProverCommand):
          re-proving.
          """
          if self._result is None:
-            self._result, self._proof = self._prover._prove(self.goal(),
-                                                            self.assumptions(),
-                                                            verbose)
+            self._result, self._proof = self._prover._prove(
+                self.goal(), self.assumptions(), verbose
+            )
          return self._result
  
      def proof(self, simplify=True):
@@ -315,6 +317,7 @@ class BaseModelBuilderCommand(BaseTheoremToolCommand, ModelBuilderCommand):
      build_model() is called, the ``ModelBuilder`` is executed with the goal and
      assumptions.
      """
+
      def __init__(self, modelbuilder, goal=None, assumptions=None):
          """
          :param modelbuilder: The theorem tool to execute with the assumptions
@@ -334,10 +337,9 @@ class BaseModelBuilderCommand(BaseTheoremToolCommand, ModelBuilderCommand):
          re-building.
          """
          if self._result is None:
-            self._result, self._model = \
-                    self._modelbuilder._build_model(self.goal(),
-                                                    self.assumptions(),
-                                                    verbose)
+            self._result, self._model = self._modelbuilder._build_model(
+                self.goal(), self.assumptions(), verbose
+            )
          return self._result
  
      def model(self, format=None):
@@ -348,8 +350,7 @@ class BaseModelBuilderCommand(BaseTheoremToolCommand, ModelBuilderCommand):
          :return: str
          """
          if self._result is None:
-            raise LookupError('You have to call build_model() first to '
-                              'get a model!')
+            raise LookupError("You have to call build_model() first to " "get a model!")
          else:
              return self._decorate_model(self._model, format)
  
@@ -370,6 +371,7 @@ class TheoremToolCommandDecorator(TheoremToolCommand):
      A base decorator for the ``ProverCommandDecorator`` and
      ``ModelBuilderCommandDecorator`` classes from which decorators can extend.
      """
+
      def __init__(self, command):
          """
          :param command: ``TheoremToolCommand`` to decorate
@@ -403,6 +405,7 @@ class ProverCommandDecorator(TheoremToolCommandDecorator, ProverCommand):
      A base decorator for the ``ProverCommand`` class from which other
      prover command decorators can extend.
      """
+
      def __init__(self, proverCommand):
          """
          :param proverCommand: ``ProverCommand`` to decorate
@@ -416,9 +419,9 @@ class ProverCommandDecorator(TheoremToolCommandDecorator, ProverCommand):
      def prove(self, verbose=False):
          if self._result is None:
              prover = self.get_prover()
-            self._result, self._proof = prover._prove(self.goal(),
-                                                      self.assumptions(),
-                                                      verbose)
+            self._result, self._proof = prover._prove(
+                self.goal(), self.assumptions(), verbose
+            )
          return self._result
  
      def proof(self, simplify=True):
@@ -450,6 +453,7 @@ class ModelBuilderCommandDecorator(TheoremToolCommandDecorator, ModelBuilderComm
      A base decorator for the ``ModelBuilderCommand`` class from which other
      prover command decorators can extend.
      """
+
      def __init__(self, modelBuilderCommand):
          """
          :param modelBuilderCommand: ``ModelBuilderCommand`` to decorate
@@ -467,10 +471,9 @@ class ModelBuilderCommandDecorator(TheoremToolCommandDecorator, ModelBuilderComm
          """
          if self._result is None:
              modelbuilder = self.get_model_builder()
-            self._result, self._model = \
-                            modelbuilder._build_model(self.goal(),
-                                                      self.assumptions(),
-                                                      verbose)
+            self._result, self._model = modelbuilder._build_model(
+                self.goal(), self.assumptions(), verbose
+            )
          return self._result
  
      def model(self, format=None):
@@ -481,8 +484,7 @@ class ModelBuilderCommandDecorator(TheoremToolCommandDecorator, ModelBuilderComm
          :return: str
          """
          if self._result is None:
-            raise LookupError('You have to call build_model() first to '
-                              'get a model!')
+            raise LookupError("You have to call build_model() first to " "get a model!")
          else:
              return self._decorate_model(self._model, format)
  
@@ -506,20 +508,27 @@ class ParallelProverBuilder(Prover, ModelBuilder):
      parallel.  Whichever finishes first, the prover or the model builder, is the
      result that will be used.
      """
+
      def __init__(self, prover, modelbuilder):
          self._prover = prover
          self._modelbuilder = modelbuilder
  
      def _prove(self, goal=None, assumptions=None, verbose=False):
-        return self._run(goal, assumptions, verbose), ''
+        return self._run(goal, assumptions, verbose), ""
  
      def _build_model(self, goal=None, assumptions=None, verbose=False):
-        return not self._run(goal, assumptions, verbose), ''
+        return not self._run(goal, assumptions, verbose), ""
  
      def _run(self, goal, assumptions, verbose):
          # Set up two thread, Prover and ModelBuilder to run in parallel
-        tp_thread = TheoremToolThread(lambda: self._prover.prove(goal, assumptions, verbose), verbose, 'TP')
-        mb_thread = TheoremToolThread(lambda: self._modelbuilder.build_model(goal, assumptions, verbose), verbose, 'MB')
+        tp_thread = TheoremToolThread(
+            lambda: self._prover.prove(goal, assumptions, verbose), verbose, "TP"
+        )
+        mb_thread = TheoremToolThread(
+            lambda: self._modelbuilder.build_model(goal, assumptions, verbose),
+            verbose,
+            "MB",
+        )
  
          tp_thread.start()
          mb_thread.start()
@@ -546,6 +555,7 @@ class ParallelProverBuilderCommand(BaseProverCommand, BaseModelBuilderCommand):
      Because the theorem prover result is the opposite of the model builder
      result, we will treat self._result as meaning "proof found/no model found".
      """
+
      def __init__(self, prover, modelbuilder, goal=None, assumptions=None):
          BaseProverCommand.__init__(self, prover, goal, assumptions)
          BaseModelBuilderCommand.__init__(self, modelbuilder, goal, assumptions)
@@ -558,8 +568,12 @@ class ParallelProverBuilderCommand(BaseProverCommand, BaseModelBuilderCommand):
  
      def _run(self, verbose):
          # Set up two thread, Prover and ModelBuilder to run in parallel
-        tp_thread = TheoremToolThread(lambda: BaseProverCommand.prove(self, verbose), verbose, 'TP')
-        mb_thread = TheoremToolThread(lambda: BaseModelBuilderCommand.build_model(self, verbose), verbose, 'MB')
+        tp_thread = TheoremToolThread(
+            lambda: BaseProverCommand.prove(self, verbose), verbose, "TP"
+        )
+        mb_thread = TheoremToolThread(
+            lambda: BaseModelBuilderCommand.build_model(self, verbose), verbose, "MB"
+        )
  
          tp_thread.start()
          mb_thread.start()
@@ -587,11 +601,14 @@ class TheoremToolThread(threading.Thread):
          try:
              self._result = self._command()
              if self._verbose:
-                print('Thread %s finished with result %s at %s' % \
-                      (self._name, self._result, time.localtime(time.time())))
+                print(
+                    "Thread %s finished with result %s at %s"
+                    % (self._name, self._result, time.localtime(time.time()))
+                )
          except Exception as e:
              print(e)
-            print('Thread %s completed abnormally' % (self._name))
+            print("Thread %s completed abnormally" % (self._name))
  
      @property
-    def result(self): return self._result
+    def result(self):
+        return self._result
diff --git a/nlp_resource_data/nltk/inference/api.pyc b/nlp_resource_data/nltk/inference/api.pyc

deleted file mode 100755 (executable)

index 6f587f2..0000000

Binary files a/nlp_resource_data/nltk/inference/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/discourse.py b/nlp_resource_data/nltk/inference/discourse.py

old mode 100755 (executable)

new mode 100644 (file)

index a04d360..5d4065c
--- a/nlp_resource_data/nltk/inference/discourse.py
+++ b/nlp_resource_data/nltk/inference/discourse.py
@@ -42,14 +42,13 @@ The set of all threads for a discourse is the Cartesian product of all the readi
  (This is not intended to scale beyond very short discourses!) The method ``readings(filter=True)`` will only show
  those threads which are consistent (taking into account any background assumptions).
  """
-from __future__ import print_function
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
-import os
  
+import os
+from abc import ABCMeta, abstractmethod
  from operator import and_, add
  from functools import reduce
  
+
  from nltk.data import show_cfg
  from nltk.tag import RegexpTagger
  from nltk.parse import load_parser
@@ -62,8 +61,7 @@ from nltk.inference.mace import MaceCommand
  from nltk.inference.prover9 import Prover9Command
  
  
-@add_metaclass(ABCMeta)
-class ReadingCommand(object):
+class ReadingCommand(metaclass=ABCMeta):
      @abstractmethod
      def parse_to_readings(self, sentence):
          """
@@ -110,12 +108,15 @@ class CfgReadingCommand(ReadingCommand):
          :param gramfile: name of file where grammar can be loaded
          :type gramfile: str
          """
-        self._gramfile = (gramfile if gramfile else 'grammars/book_grammars/discourse.fcfg')
+        self._gramfile = (
+            gramfile if gramfile else "grammars/book_grammars/discourse.fcfg"
+        )
          self._parser = load_parser(self._gramfile)
  
      def parse_to_readings(self, sentence):
          """:see: ReadingCommand.parse_to_readings()"""
          from nltk.sem import root_semrep
+
          tokens = sentence.split()
          trees = self._parser.parse(tokens)
          return [root_semrep(tree) for tree in trees]
@@ -130,18 +131,21 @@ class CfgReadingCommand(ReadingCommand):
  
  
  class DrtGlueReadingCommand(ReadingCommand):
-    def __init__(self, semtype_file=None, remove_duplicates=False,
-                 depparser=None):
+    def __init__(self, semtype_file=None, remove_duplicates=False, depparser=None):
          """
          :param semtype_file: name of file where grammar can be loaded
          :param remove_duplicates: should duplicates be removed?
          :param depparser: the dependency parser
          """
          if semtype_file is None:
-            semtype_file = os.path.join('grammars', 'sample_grammars','drt_glue.semtype')
-        self._glue = DrtGlue(semtype_file=semtype_file,
-                             remove_duplicates=remove_duplicates,
-                             depparser=depparser)
+            semtype_file = os.path.join(
+                "grammars", "sample_grammars", "drt_glue.semtype"
+            )
+        self._glue = DrtGlue(
+            semtype_file=semtype_file,
+            remove_duplicates=remove_duplicates,
+            depparser=depparser,
+        )
  
      def parse_to_readings(self, sentence):
          """:see: ReadingCommand.parse_to_readings()"""
@@ -168,6 +172,7 @@ class DiscourseTester(object):
      """
      Check properties of an ongoing discourse.
      """
+
      def __init__(self, input, reading_command=None, background=None):
          """
          Initialize a ``DiscourseTester``.
@@ -178,14 +183,17 @@ class DiscourseTester(object):
          :type background: list(Expression)
          """
          self._input = input
-        self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(input)])
+        self._sentences = dict([("s%s" % i, sent) for i, sent in enumerate(input)])
          self._models = None
          self._readings = {}
-        self._reading_command = (reading_command if reading_command else CfgReadingCommand())
+        self._reading_command = (
+            reading_command if reading_command else CfgReadingCommand()
+        )
          self._threads = {}
          self._filtered_threads = {}
          if background is not None:
              from nltk.sem.logic import Expression
+
              for e in background:
                  assert isinstance(e, Expression)
              self._background = background
@@ -203,7 +211,7 @@ class DiscourseTester(object):
          for id in sorted(self._sentences):
              print("%s: %s" % (id, self._sentences[id]))
  
-    def add_sentence(self, sentence, informchk=False, consistchk=False,):
+    def add_sentence(self, sentence, informchk=False, consistchk=False):
          """
          Add a sentence to the current discourse.
  
@@ -223,11 +231,16 @@ class DiscourseTester(object):
                  for sent_reading in self._get_readings(sentence):
                      tp = Prover9Command(goal=sent_reading, assumptions=assumptions)
                      if tp.prove():
-                        print("Sentence '%s' under reading '%s':" % (sentence, str(sent_reading)))
+                        print(
+                            "Sentence '%s' under reading '%s':"
+                            % (sentence, str(sent_reading))
+                        )
                          print("Not informative relative to thread '%s'" % tid)
  
          self._input.append(sentence)
-        self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(self._input)])
+        self._sentences = dict(
+            [("s%s" % i, sent) for i, sent in enumerate(self._input)]
+        )
          # check whether adding the new sentence to the discourse preserves consistency (i.e. a model can be found for the combined set of
          # of assumptions
          if consistchk:
@@ -246,10 +259,15 @@ class DiscourseTester(object):
          try:
              self._input.remove(sentence)
          except ValueError:
-            print("Retraction failed. The sentence '%s' is not part of the current discourse:" % sentence)
+            print(
+                "Retraction failed. The sentence '%s' is not part of the current discourse:"
+                % sentence
+            )
              self.sentences()
              return None
-        self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(self._input)])
+        self._sentences = dict(
+            [("s%s" % i, sent) for i, sent in enumerate(self._input)]
+        )
          self.readings(verbose=False)
          if verbose:
              print("Current sentences are ")
@@ -282,8 +300,12 @@ class DiscourseTester(object):
          for sid in sorted(self._sentences):
              sentence = self._sentences[sid]
              readings = self._get_readings(sentence)
-            self._readings[sid] = dict([("%s-r%s" % (sid, rid), reading.simplify())
-                                                        for rid, reading in enumerate(sorted(readings, key=str))])
+            self._readings[sid] = dict(
+                [
+                    ("%s-r%s" % (sid, rid), reading.simplify())
+                    for rid, reading in enumerate(sorted(readings, key=str))
+                ]
+            )
  
      def _construct_threads(self):
          """
@@ -293,7 +315,9 @@ class DiscourseTester(object):
          thread_list = [[]]
          for sid in sorted(self._readings):
              thread_list = self.multiply(thread_list, sorted(self._readings[sid]))
-        self._threads = dict([("d%s" % tid, thread) for tid, thread in enumerate(thread_list)])
+        self._threads = dict(
+            [("d%s" % tid, thread) for tid, thread in enumerate(thread_list)]
+        )
          # re-initialize the filtered threads
          self._filtered_threads = {}
          # keep the same ids, but only include threads which get models
@@ -313,8 +337,8 @@ class DiscourseTester(object):
          else:
              for sid in sorted(self._readings):
                  print()
-                print('%s readings:' % sid)
-                print() #'-' * 30
+                print("%s readings:" % sid)
+                print()  #'-' * 30
                  for rid in sorted(self._readings[sid]):
                      lf = self._readings[sid][rid]
                      print("%s: %s" % (rid, lf.normalize()))
@@ -323,24 +347,32 @@ class DiscourseTester(object):
          """
          Print out the value of ``self._threads`` or ``self._filtered_hreads``
          """
-        threads = (self._filtered_threads if filter else self._threads)
+        threads = self._filtered_threads if filter else self._threads
          for tid in sorted(threads):
              if show_thread_readings:
-                readings = [self._readings[rid.split('-')[0]][rid]
-                            for rid in self._threads[tid]]
+                readings = [
+                    self._readings[rid.split("-")[0]][rid] for rid in self._threads[tid]
+                ]
                  try:
-                    thread_reading = ": %s" % \
-                              self._reading_command.combine_readings(readings).normalize()
+                    thread_reading = (
+                        ": %s"
+                        % self._reading_command.combine_readings(readings).normalize()
+                    )
                  except Exception as e:
-                    thread_reading = ': INVALID: %s' % e.__class__.__name__
+                    thread_reading = ": INVALID: %s" % e.__class__.__name__
              else:
-                thread_reading = ''
+                thread_reading = ""
  
              print("%s:" % tid, self._threads[tid], thread_reading)
  
-
-    def readings(self, sentence=None, threaded=False, verbose=True,
-                 filter=False, show_thread_readings=False):
+    def readings(
+        self,
+        sentence=None,
+        threaded=False,
+        verbose=True,
+        filter=False,
+        show_thread_readings=False,
+    ):
          """
          Construct and show the readings of the discourse (or of a single sentence).
  
@@ -360,8 +392,9 @@ class DiscourseTester(object):
              if not threaded:
                  self._show_readings(sentence=sentence)
              else:
-                self._show_threads(filter=filter,
-                                   show_thread_readings=show_thread_readings)
+                self._show_threads(
+                    filter=filter, show_thread_readings=show_thread_readings
+                )
  
      def expand_threads(self, thread_id, threads=None):
          """
@@ -376,8 +409,11 @@ class DiscourseTester(object):
          """
          if threads is None:
              threads = self._threads
-        return [(rid, self._readings[sid][rid]) for rid in threads[thread_id] for sid in rid.split('-')[:1]]
-
+        return [
+            (rid, self._readings[sid][rid])
+            for rid in threads[thread_id]
+            for sid in rid.split("-")[:1]
+        ]
  
      ###############################
      # Models and Background
@@ -386,8 +422,15 @@ class DiscourseTester(object):
      def _check_consistency(self, threads, show=False, verbose=False):
          results = []
          for tid in sorted(threads):
-            assumptions = [reading for (rid, reading) in self.expand_threads(tid, threads=threads)]
-            assumptions = list(map(self._reading_command.to_fol, self._reading_command.process_thread(assumptions)))
+            assumptions = [
+                reading for (rid, reading) in self.expand_threads(tid, threads=threads)
+            ]
+            assumptions = list(
+                map(
+                    self._reading_command.to_fol,
+                    self._reading_command.process_thread(assumptions),
+                )
+            )
              if assumptions:
                  assumptions += self._background
                  # if Mace4 finds a model, it always seems to find it quickly
@@ -405,7 +448,7 @@ class DiscourseTester(object):
                          print(a)
                      spacer(80)
                  if modelfound:
-                    print(mb.model(format='cooked'))
+                    print(mb.model(format="cooked"))
                  else:
                      print("No model found!\n")
          return results
@@ -420,9 +463,11 @@ class DiscourseTester(object):
          """
          self._construct_readings()
          self._construct_threads()
-        threads = ({thread_id: self._threads[thread_id]} if thread_id else self._threads)
+        threads = {thread_id: self._threads[thread_id]} if thread_id else self._threads
  
-        for (tid, modelfound) in self._check_consistency(threads, show=show, verbose=verbose):
+        for (tid, modelfound) in self._check_consistency(
+            threads, show=show, verbose=verbose
+        ):
              idlist = [rid for rid in threads[tid]]
  
              if not modelfound:
@@ -445,13 +490,14 @@ class DiscourseTester(object):
          :type background: list(Expression)
          """
          from nltk.sem.logic import Expression
+
          for (count, e) in enumerate(background):
              assert isinstance(e, Expression)
              if verbose:
                  print("Adding assumption %s to background" % count)
              self._background.append(e)
  
-        #update the state
+        # update the state
          self._construct_readings()
          self._construct_threads()
  
@@ -462,7 +508,7 @@ class DiscourseTester(object):
          for e in self._background:
              print(str(e))
  
-   ###############################
+    ###############################
      # Misc
      ###############################
  
@@ -489,11 +535,6 @@ class DiscourseTester(object):
                  result.append(new)
          return result
  
-#multiply = DiscourseTester.multiply
-#L1 = [['A'], ['B']]
-#L2 = ['a', 'b', 'c']
-#print multiply(L1,L2)
-
  
  def load_fol(s):
      """
@@ -508,12 +549,12 @@ def load_fol(s):
      statements = []
      for linenum, line in enumerate(s.splitlines()):
          line = line.strip()
-        if line.startswith('#') or line == '':
+        if line.startswith("#") or line == "":
              continue
          try:
              statements.append(Expression.fromstring(line))
          except Exception:
-            raise ValueError('Unable to parse line %s: %s' % (linenum, line))
+            raise ValueError("Unable to parse line %s: %s" % (linenum, line))
      return statements
  
  
@@ -524,8 +565,9 @@ def discourse_demo(reading_command=None):
      """
      Illustrate the various methods of ``DiscourseTester``
      """
-    dt = DiscourseTester(['A boxer walks', 'Every boxer chases a girl'],
-                         reading_command)
+    dt = DiscourseTester(
+        ["A boxer walks", "Every boxer chases a girl"], reading_command
+    )
      dt.models()
      print()
      # dt.grammar()
@@ -536,33 +578,36 @@ def discourse_demo(reading_command=None):
      print()
      dt.readings(threaded=True)
      print()
-    dt.models('d1')
-    dt.add_sentence('John is a boxer')
+    dt.models("d1")
+    dt.add_sentence("John is a boxer")
      print()
      dt.sentences()
      print()
      dt.readings(threaded=True)
      print()
-    dt = DiscourseTester(['A student dances', 'Every student is a person'],
-                         reading_command)
+    dt = DiscourseTester(
+        ["A student dances", "Every student is a person"], reading_command
+    )
      print()
-    dt.add_sentence('No person dances', consistchk=True)
+    dt.add_sentence("No person dances", consistchk=True)
      print()
      dt.readings()
      print()
-    dt.retract_sentence('No person dances', verbose=True)
+    dt.retract_sentence("No person dances", verbose=True)
      print()
      dt.models()
      print()
-    dt.readings('A person dances')
+    dt.readings("A person dances")
      print()
-    dt.add_sentence('A person dances', informchk=True)
-    dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer',
-                          'Vincent is married', 'Fido barks'],
-                         reading_command)
+    dt.add_sentence("A person dances", informchk=True)
+    dt = DiscourseTester(
+        ["Vincent is a boxer", "Fido is a boxer", "Vincent is married", "Fido barks"],
+        reading_command,
+    )
      dt.readings(filter=True)
      import nltk.data
-    background_file = os.path.join('grammars', 'book_grammars', 'background.fol')
+
+    background_file = os.path.join("grammars", "book_grammars", "background.fol")
      background = nltk.data.load(background_file)
  
      print()
@@ -578,8 +623,7 @@ def drt_discourse_demo(reading_command=None):
      """
      Illustrate the various methods of ``DiscourseTester``
      """
-    dt = DiscourseTester(['every dog chases a boy', 'he runs'],
-                         reading_command)
+    dt = DiscourseTester(["every dog chases a boy", "he runs"], reading_command)
      dt.models()
      print()
      dt.sentences()
@@ -592,21 +636,26 @@ def drt_discourse_demo(reading_command=None):
  
  
  def spacer(num=30):
-    print('-' * num)
+    print("-" * num)
  
  
  def demo():
      discourse_demo()
  
-    tagger = RegexpTagger([('^(chases|runs)$', 'VB'),
-                           ('^(a)$', 'ex_quant'),
-                           ('^(every)$', 'univ_quant'),
-                           ('^(dog|boy)$', 'NN'),
-                           ('^(he)$', 'PRP')])
+    tagger = RegexpTagger(
+        [
+            ("^(chases|runs)$", "VB"),
+            ("^(a)$", "ex_quant"),
+            ("^(every)$", "univ_quant"),
+            ("^(dog|boy)$", "NN"),
+            ("^(he)$", "PRP"),
+        ]
+    )
      depparser = MaltParser(tagger=tagger)
-    drt_discourse_demo(DrtGlueReadingCommand(remove_duplicates=False,
-                                             depparser=depparser))
+    drt_discourse_demo(
+        DrtGlueReadingCommand(remove_duplicates=False, depparser=depparser)
+    )
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/inference/discourse.pyc b/nlp_resource_data/nltk/inference/discourse.pyc

deleted file mode 100755 (executable)

index 64f9d18..0000000

Binary files a/nlp_resource_data/nltk/inference/discourse.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/mace.py b/nlp_resource_data/nltk/inference/mace.py

old mode 100755 (executable)

new mode 100644 (file)

index 83e841b..159a510
--- a/nlp_resource_data/nltk/inference/mace.py
+++ b/nlp_resource_data/nltk/inference/mace.py
@@ -9,7 +9,6 @@
  """
  A model builder that makes use of the external 'Mace4' package.
  """
-from __future__ import print_function
  
  import os
  import tempfile
@@ -27,6 +26,7 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
      a print_assumptions() method that is used to print the list
      of assumptions in multiple formats.
      """
+
      _interpformat_bin = None
  
      def __init__(self, goal=None, assumptions=None, max_models=500, model_builder=None):
@@ -48,7 +48,8 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          BaseModelBuilderCommand.__init__(self, model_builder, goal, assumptions)
  
      @property
-    def valuation(mbc): return mbc.model('valuation')
+    def valuation(mbc):
+        return mbc.model("valuation")
  
      def _convert2val(self, valuation_str):
          """
@@ -57,35 +58,40 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          :return: A model if one is generated; None otherwise.
          :rtype: sem.Valuation
          """
-        valuation_standard_format = self._transform_output(valuation_str, 'standard')
+        valuation_standard_format = self._transform_output(valuation_str, "standard")
  
          val = []
          for line in valuation_standard_format.splitlines(False):
              l = line.strip()
  
-            if l.startswith('interpretation'):
+            if l.startswith("interpretation"):
                  # find the number of entities in the model
-                num_entities = int(l[l.index('(')+1:l.index(',')].strip())
+                num_entities = int(l[l.index("(") + 1 : l.index(",")].strip())
  
-            elif l.startswith('function') and l.find('_') == -1:
+            elif l.startswith("function") and l.find("_") == -1:
                  # replace the integer identifier with a corresponding alphabetic character
-                name = l[l.index('(')+1:l.index(',')].strip()
+                name = l[l.index("(") + 1 : l.index(",")].strip()
                  if is_indvar(name):
                      name = name.upper()
-                value = int(l[l.index('[')+1:l.index(']')].strip())
+                value = int(l[l.index("[") + 1 : l.index("]")].strip())
                  val.append((name, MaceCommand._make_model_var(value)))
  
-            elif l.startswith('relation'):
-                l = l[l.index('(')+1:]
-                if '(' in l:
-                    #relation is not nullary
-                    name = l[:l.index('(')].strip()
-                    values = [int(v.strip()) for v in l[l.index('[')+1:l.index(']')].split(',')]
-                    val.append((name, MaceCommand._make_relation_set(num_entities, values)))
+            elif l.startswith("relation"):
+                l = l[l.index("(") + 1 :]
+                if "(" in l:
+                    # relation is not nullary
+                    name = l[: l.index("(")].strip()
+                    values = [
+                        int(v.strip())
+                        for v in l[l.index("[") + 1 : l.index("]")].split(",")
+                    ]
+                    val.append(
+                        (name, MaceCommand._make_relation_set(num_entities, values))
+                    )
                  else:
-                    #relation is nullary
-                    name = l[:l.index(',')].strip()
-                    value = int(l[l.index('[')+1:l.index(']')].strip())
+                    # relation is nullary
+                    name = l[: l.index(",")].strip()
+                    value = int(l[l.index("[") + 1 : l.index("]")].strip())
                      val.append((name, value == 1))
  
          return Valuation(val)
@@ -101,8 +107,10 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          :type values: list of int
          """
          r = set()
-        for position in [pos for (pos,v) in enumerate(values) if v == 1]:
-            r.add(tuple(MaceCommand._make_relation_tuple(position, values, num_entities)))
+        for position in [pos for (pos, v) in enumerate(values) if v == 1]:
+            r.add(
+                tuple(MaceCommand._make_relation_tuple(position, values, num_entities))
+            )
          return r
  
      @staticmethod
@@ -114,11 +122,14 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
              sublist_start = position // sublist_size
              sublist_position = int(position % sublist_size)
  
-            sublist = values[sublist_start*sublist_size:(sublist_start+1)*sublist_size]
-            return [MaceCommand._make_model_var(sublist_start)] + \
-                   MaceCommand._make_relation_tuple(sublist_position,
-                                                    sublist,
-                                                    num_entities)
+            sublist = values[
+                sublist_start * sublist_size : (sublist_start + 1) * sublist_size
+            ]
+            return [
+                MaceCommand._make_model_var(sublist_start)
+            ] + MaceCommand._make_relation_tuple(
+                sublist_position, sublist, num_entities
+            )
  
      @staticmethod
      def _make_model_var(value):
@@ -128,10 +139,36 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          :param value: where to index into the list of characters
          :type value: int
          """
-        letter = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n',
-                  'o','p','q','r','s','t','u','v','w','x','y','z'][value]
+        letter = [
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z",
+        ][value]
          num = value // 26
-        return (letter + str(num) if num > 0 else letter)
+        return letter + str(num) if num > 0 else letter
  
      def _decorate_model(self, valuation_str, format):
          """
@@ -145,7 +182,7 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          """
          if not format:
              return valuation_str
-        elif format == 'valuation':
+        elif format == "valuation":
              return self._convert2val(valuation_str)
          else:
              return self._transform_output(valuation_str, format)
@@ -157,8 +194,16 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          :param format: Output format for displaying models.
          :type format: str
          """
-        if format in ['standard', 'standard2', 'portable', 'tabular',
-                      'raw', 'cooked', 'xml', 'tex']:
+        if format in [
+            "standard",
+            "standard2",
+            "portable",
+            "tabular",
+            "raw",
+            "cooked",
+            "xml",
+            "tex",
+        ]:
              return self._call_interpformat(valuation_str, [format])[0]
          else:
              raise LookupError("The specified format does not exist")
@@ -174,10 +219,12 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand):
          """
          if self._interpformat_bin is None:
              self._interpformat_bin = self._modelbuilder._find_binary(
-                                                'interpformat', verbose)
+                "interpformat", verbose
+            )
  
-        return self._modelbuilder._call(input_str, self._interpformat_bin,
-                                        args, verbose)
+        return self._modelbuilder._call(
+            input_str, self._interpformat_bin, args, verbose
+        )
  
  
  class Mace(Prover9Parent, ModelBuilder):
@@ -198,8 +245,9 @@ class Mace(Prover9Parent, ModelBuilder):
          if not assumptions:
              assumptions = []
  
-        stdout, returncode = self._call_mace4(self.prover9_input(goal, assumptions),
-                                              verbose=verbose)
+        stdout, returncode = self._call_mace4(
+            self.prover9_input(goal, assumptions), verbose=verbose
+        )
          return (returncode == 0, stdout)
  
      def _call_mace4(self, input_str, args=[], verbose=False):
@@ -212,18 +260,19 @@ class Mace(Prover9Parent, ModelBuilder):
          :see: ``config_prover9``
          """
          if self._mace4_bin is None:
-            self._mace4_bin = self._find_binary('mace4', verbose)
+            self._mace4_bin = self._find_binary("mace4", verbose)
  
-        updated_input_str = ''
+        updated_input_str = ""
          if self._end_size > 0:
-            updated_input_str += 'assign(end_size, %d).\n\n' % self._end_size
+            updated_input_str += "assign(end_size, %d).\n\n" % self._end_size
          updated_input_str += input_str
  
          return self._call(updated_input_str, self._mace4_bin, args, verbose)
  
  
  def spacer(num=30):
-    print('-' * num)
+    print("-" * num)
+
  
  def decode_result(found):
      """
@@ -232,7 +281,10 @@ def decode_result(found):
      :param found: The output of model_found()
      :type found: bool
      """
-    return {True: 'Countermodel found', False: 'No countermodel found', None: 'None'}[found]
+    return {True: "Countermodel found", False: "No countermodel found", None: "None"}[
+        found
+    ]
+
  
  def test_model_found(arguments):
      """
@@ -244,21 +296,26 @@ def test_model_found(arguments):
          m = MaceCommand(g, assumptions=alist, max_models=50)
          found = m.build_model()
          for a in alist:
-            print('   %s' % a)
-        print('|- %s: %s\n' % (g, decode_result(found)))
+            print("   %s" % a)
+        print("|- %s: %s\n" % (g, decode_result(found)))
  
  
  def test_build_model(arguments):
      """
      Try to build a ``nltk.sem.Valuation``.
      """
-    g = Expression.fromstring('all x.man(x)')
-    alist = [Expression.fromstring(a) for a in ['man(John)',
-                                   'man(Socrates)',
-                                   'man(Bill)',
-                                   'some x.(-(x = John) & man(x) & sees(John,x))',
-                                   'some x.(-(x = Bill) & man(x))',
-                                   'all x.some y.(man(x) -> gives(Socrates,x,y))']]
+    g = Expression.fromstring("all x.man(x)")
+    alist = [
+        Expression.fromstring(a)
+        for a in [
+            "man(John)",
+            "man(Socrates)",
+            "man(Bill)",
+            "some x.(-(x = John) & man(x) & sees(John,x))",
+            "some x.(-(x = Bill) & man(x))",
+            "all x.some y.(man(x) -> gives(Socrates,x,y))",
+        ]
+    ]
  
      m = MaceCommand(g, assumptions=alist)
      m.build_model()
@@ -266,14 +323,15 @@ def test_build_model(arguments):
      print("Assumptions and Goal")
      spacer()
      for a in alist:
-        print('   %s' % a)
-    print('|- %s: %s\n' % (g, decode_result(m.build_model())))
+        print("   %s" % a)
+    print("|- %s: %s\n" % (g, decode_result(m.build_model())))
      spacer()
-    #print m.model('standard')
-    #print m.model('cooked')
+    # print(m.model('standard'))
+    # print(m.model('cooked'))
      print("Valuation")
      spacer()
-    print(m.valuation, '\n')
+    print(m.valuation, "\n")
+
  
  def test_transform_output(argument_pair):
      """
@@ -284,28 +342,43 @@ def test_transform_output(argument_pair):
      m = MaceCommand(g, assumptions=alist)
      m.build_model()
      for a in alist:
-        print('   %s' % a)
-    print('|- %s: %s\n' % (g, m.build_model()))
-    for format in ['standard', 'portable', 'xml', 'cooked']:
+        print("   %s" % a)
+    print("|- %s: %s\n" % (g, m.build_model()))
+    for format in ["standard", "portable", "xml", "cooked"]:
          spacer()
          print("Using '%s' format" % format)
          spacer()
          print(m.model(format=format))
  
+
  def test_make_relation_set():
-    print(MaceCommand._make_relation_set(num_entities=3, values=[1,0,1]) == set([('c',), ('a',)]))
-    print(MaceCommand._make_relation_set(num_entities=3, values=[0,0,0,0,0,0,1,0,0]) == set([('c', 'a')]))
-    print(MaceCommand._make_relation_set(num_entities=2, values=[0,0,1,0,0,0,1,0]) == set([('a', 'b', 'a'), ('b', 'b', 'a')]))
+    print(
+        MaceCommand._make_relation_set(num_entities=3, values=[1, 0, 1])
+        == set([("c",), ("a",)])
+    )
+    print(
+        MaceCommand._make_relation_set(
+            num_entities=3, values=[0, 0, 0, 0, 0, 0, 1, 0, 0]
+        )
+        == set([("c", "a")])
+    )
+    print(
+        MaceCommand._make_relation_set(num_entities=2, values=[0, 0, 1, 0, 0, 0, 1, 0])
+        == set([("a", "b", "a"), ("b", "b", "a")])
+    )
+
  
  arguments = [
-    ('mortal(Socrates)', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']),
-    ('(not mortal(Socrates))', ['all x.(man(x) -> mortal(x))', 'man(Socrates)'])
+    ("mortal(Socrates)", ["all x.(man(x) -> mortal(x))", "man(Socrates)"]),
+    ("(not mortal(Socrates))", ["all x.(man(x) -> mortal(x))", "man(Socrates)"]),
  ]
  
+
  def demo():
      test_model_found(arguments)
      test_build_model(arguments)
      test_transform_output(arguments[1])
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/inference/mace.pyc b/nlp_resource_data/nltk/inference/mace.pyc

deleted file mode 100755 (executable)

index d01e8e4..0000000

Binary files a/nlp_resource_data/nltk/inference/mace.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/nonmonotonic.py b/nlp_resource_data/nltk/inference/nonmonotonic.py

old mode 100755 (executable)

new mode 100644 (file)

index 60c9cd8..3bbb8d2
--- a/nlp_resource_data/nltk/inference/nonmonotonic.py
+++ b/nlp_resource_data/nltk/inference/nonmonotonic.py
@@ -2,7 +2,7 @@
  #
  # Author: Daniel H. Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
  
@@ -11,23 +11,34 @@ A module to perform nonmonotonic reasoning.  The ideas and demonstrations in
  this module are based on "Logical Foundations of Artificial Intelligence" by
  Michael R. Genesereth and Nils J. Nilsson.
  """
-from __future__ import print_function, unicode_literals
  
-from nltk.inference.prover9 import Prover9, Prover9Command
  from collections import defaultdict
  from functools import reduce
  
-from nltk.sem.logic import (VariableExpression, EqualityExpression,
-                            ApplicationExpression, Expression,
-                            AbstractVariableExpression, AllExpression,
-                            BooleanExpression, NegatedExpression,
-                            ExistsExpression, Variable, ImpExpression,
-                            AndExpression, unique_variable, operator)
+from nltk.inference.prover9 import Prover9, Prover9Command
+from nltk.sem.logic import (
+    VariableExpression,
+    EqualityExpression,
+    ApplicationExpression,
+    Expression,
+    AbstractVariableExpression,
+    AllExpression,
+    BooleanExpression,
+    NegatedExpression,
+    ExistsExpression,
+    Variable,
+    ImpExpression,
+    AndExpression,
+    unique_variable,
+    operator,
+)
  
  from nltk.inference.api import Prover, ProverCommandDecorator
-from nltk.compat import python_2_unicode_compatible
  
-class ProverParseError(Exception): pass
+
+class ProverParseError(Exception):
+    pass
+
  
  def get_domain(goal, assumptions):
      if goal is None:
@@ -36,11 +47,13 @@ def get_domain(goal, assumptions):
          all_expressions = assumptions + [-goal]
      return reduce(operator.or_, (a.constants() for a in all_expressions), set())
  
+
  class ClosedDomainProver(ProverCommandDecorator):
      """
      This is a prover decorator that adds domain closure assumptions before
      proving.
      """
+
      def assumptions(self):
          assumptions = [a for a in self._command.assumptions()]
          goal = self._command.goal()
@@ -64,28 +77,34 @@ class ClosedDomainProver(ProverCommandDecorator):
          :return: ``Expression``
          """
          if isinstance(ex, AllExpression):
-            conjuncts = [ex.term.replace(ex.variable, VariableExpression(d))
-                         for d in domain]
+            conjuncts = [
+                ex.term.replace(ex.variable, VariableExpression(d)) for d in domain
+            ]
              conjuncts = [self.replace_quants(c, domain) for c in conjuncts]
-            return reduce(lambda x,y: x&y, conjuncts)
+            return reduce(lambda x, y: x & y, conjuncts)
          elif isinstance(ex, BooleanExpression):
-            return ex.__class__(self.replace_quants(ex.first, domain),
-                                self.replace_quants(ex.second, domain) )
+            return ex.__class__(
+                self.replace_quants(ex.first, domain),
+                self.replace_quants(ex.second, domain),
+            )
          elif isinstance(ex, NegatedExpression):
              return -self.replace_quants(ex.term, domain)
          elif isinstance(ex, ExistsExpression):
-            disjuncts = [ex.term.replace(ex.variable, VariableExpression(d))
-                         for d in domain]
+            disjuncts = [
+                ex.term.replace(ex.variable, VariableExpression(d)) for d in domain
+            ]
              disjuncts = [self.replace_quants(d, domain) for d in disjuncts]
-            return reduce(lambda x,y: x|y, disjuncts)
+            return reduce(lambda x, y: x | y, disjuncts)
          else:
              return ex
  
+
  class UniqueNamesProver(ProverCommandDecorator):
      """
      This is a prover decorator that adds unique names assumptions before
      proving.
      """
+
      def assumptions(self):
          """
           - Domain = union([e.free()|e.constants() for e in all_expressions])
@@ -95,36 +114,39 @@ class UniqueNamesProver(ProverCommandDecorator):
  
          domain = list(get_domain(self._command.goal(), assumptions))
  
-        #build a dictionary of obvious equalities
+        # build a dictionary of obvious equalities
          eq_sets = SetHolder()
          for a in assumptions:
              if isinstance(a, EqualityExpression):
                  av = a.first.variable
                  bv = a.second.variable
-                #put 'a' and 'b' in the same set
+                # put 'a' and 'b' in the same set
                  eq_sets[av].add(bv)
  
          new_assumptions = []
-        for i,a in enumerate(domain):
-            for b in domain[i+1:]:
-                #if a and b are not already in the same equality set
+        for i, a in enumerate(domain):
+            for b in domain[i + 1 :]:
+                # if a and b are not already in the same equality set
                  if b not in eq_sets[a]:
-                    newEqEx = EqualityExpression(VariableExpression(a),
-                                                 VariableExpression(b))
+                    newEqEx = EqualityExpression(
+                        VariableExpression(a), VariableExpression(b)
+                    )
                      if Prover9().prove(newEqEx, assumptions):
-                        #we can prove that the names are the same entity.
-                        #remember that they are equal so we don't re-check.
+                        # we can prove that the names are the same entity.
+                        # remember that they are equal so we don't re-check.
                          eq_sets[a].add(b)
                      else:
-                        #we can't prove it, so assume unique names
+                        # we can't prove it, so assume unique names
                          new_assumptions.append(-newEqEx)
  
          return assumptions + new_assumptions
  
+
  class SetHolder(list):
      """
      A list of sets of Variables.
      """
+
      def __getitem__(self, item):
          """
          :param item: ``Variable``
@@ -134,11 +156,12 @@ class SetHolder(list):
          for s in self:
              if item in s:
                  return s
-        #item is not found in any existing set.  so create a new set
+        # item is not found in any existing set.  so create a new set
          new = set([item])
          self.append(new)
          return new
  
+
  class ClosedWorldProver(ProverCommandDecorator):
      """
      This is a prover decorator that completes predicates before proving.
@@ -170,6 +193,7 @@ class ClosedWorldProver(ProverCommandDecorator):
      -------------------
      -bird(Sam)
      """
+
      def assumptions(self):
          assumptions = self._command.assumptions()
  
@@ -183,32 +207,32 @@ class ClosedWorldProver(ProverCommandDecorator):
  
              disjuncts = []
  
-            #Turn the signatures into disjuncts
+            # Turn the signatures into disjuncts
              for sig in predHolder.signatures:
                  equality_exs = []
-                for v1,v2 in zip(new_sig_exs, sig):
-                    equality_exs.append(EqualityExpression(v1,v2))
-                disjuncts.append(reduce(lambda x,y: x&y, equality_exs))
+                for v1, v2 in zip(new_sig_exs, sig):
+                    equality_exs.append(EqualityExpression(v1, v2))
+                disjuncts.append(reduce(lambda x, y: x & y, equality_exs))
  
-            #Turn the properties into disjuncts
+            # Turn the properties into disjuncts
              for prop in predHolder.properties:
-                #replace variables from the signature with new sig variables
+                # replace variables from the signature with new sig variables
                  bindings = {}
-                for v1,v2 in zip(new_sig_exs, prop[0]):
+                for v1, v2 in zip(new_sig_exs, prop[0]):
                      bindings[v2] = v1
                  disjuncts.append(prop[1].substitute_bindings(bindings))
  
-            #make the assumption
+            # make the assumption
              if disjuncts:
-                #disjuncts exist, so make an implication
+                # disjuncts exist, so make an implication
                  antecedent = self._make_antecedent(p, new_sig)
-                consequent = reduce(lambda x,y: x|y, disjuncts)
+                consequent = reduce(lambda x, y: x | y, disjuncts)
                  accum = ImpExpression(antecedent, consequent)
              else:
-                #nothing has property 'p'
+                # nothing has property 'p'
                  accum = NegatedExpression(self._make_antecedent(p, new_sig))
  
-            #quantify the implication
+            # quantify the implication
              for new_sig_var in new_sig[::-1]:
                  accum = AllExpression(new_sig_var, accum)
              new_assumptions.append(accum)
@@ -253,25 +277,28 @@ class ClosedWorldProver(ProverCommandDecorator):
              self._map_predicates(expression.first, predDict)
              self._map_predicates(expression.second, predDict)
          elif isinstance(expression, AllExpression):
-            #collect all the universally quantified variables
+            # collect all the universally quantified variables
              sig = [expression.variable]
              term = expression.term
              while isinstance(term, AllExpression):
                  sig.append(term.variable)
                  term = term.term
              if isinstance(term, ImpExpression):
-                if isinstance(term.first, ApplicationExpression) and \
-                   isinstance(term.second, ApplicationExpression):
+                if isinstance(term.first, ApplicationExpression) and isinstance(
+                    term.second, ApplicationExpression
+                ):
                      func1, args1 = term.first.uncurry()
                      func2, args2 = term.second.uncurry()
-                    if isinstance(func1, AbstractVariableExpression) and \
-                       isinstance(func2, AbstractVariableExpression) and \
-                       sig == [v.variable for v in args1] and \
-                       sig == [v.variable for v in args2]:
+                    if (
+                        isinstance(func1, AbstractVariableExpression)
+                        and isinstance(func2, AbstractVariableExpression)
+                        and sig == [v.variable for v in args1]
+                        and sig == [v.variable for v in args2]
+                    ):
                          predDict[func2].append_prop((tuple(sig), term.first))
                          predDict[func1].validate_sig_len(sig)
  
-@python_2_unicode_compatible
+
  class PredHolder(object):
      """
      This class will be used by a dictionary that will store information
@@ -287,6 +314,7 @@ class PredHolder(object):
      'all x.all y.(see(x,y) -> know(x,y))' would result in "((x,y),('see(x,y)'))"
      for 'know'.
      """
+
      def __init__(self):
          self.signatures = []
          self.properties = []
@@ -307,190 +335,211 @@ class PredHolder(object):
              raise Exception("Signature lengths do not match")
  
      def __str__(self):
-        return '(%s,%s,%s)' % (self.signatures, self.properties,
-                               self.signature_len)
+        return "(%s,%s,%s)" % (self.signatures, self.properties, self.signature_len)
  
      def __repr__(self):
          return "%s" % self
  
+
  def closed_domain_demo():
      lexpr = Expression.fromstring
  
-    p1 = lexpr(r'exists x.walk(x)')
-    p2 = lexpr(r'man(Socrates)')
-    c = lexpr(r'walk(Socrates)')
-    prover = Prover9Command(c, [p1,p2])
+    p1 = lexpr(r"exists x.walk(x)")
+    p2 = lexpr(r"man(Socrates)")
+    c = lexpr(r"walk(Socrates)")
+    prover = Prover9Command(c, [p1, p2])
      print(prover.prove())
      cdp = ClosedDomainProver(prover)
-    print('assumptions:')
-    for a in cdp.assumptions(): print('   ', a)
-    print('goal:', cdp.goal())
+    print("assumptions:")
+    for a in cdp.assumptions():
+        print("   ", a)
+    print("goal:", cdp.goal())
      print(cdp.prove())
  
-    p1 = lexpr(r'exists x.walk(x)')
-    p2 = lexpr(r'man(Socrates)')
-    p3 = lexpr(r'-walk(Bill)')
-    c = lexpr(r'walk(Socrates)')
-    prover = Prover9Command(c, [p1,p2,p3])
+    p1 = lexpr(r"exists x.walk(x)")
+    p2 = lexpr(r"man(Socrates)")
+    p3 = lexpr(r"-walk(Bill)")
+    c = lexpr(r"walk(Socrates)")
+    prover = Prover9Command(c, [p1, p2, p3])
      print(prover.prove())
      cdp = ClosedDomainProver(prover)
-    print('assumptions:')
-    for a in cdp.assumptions(): print('   ', a)
-    print('goal:', cdp.goal())
+    print("assumptions:")
+    for a in cdp.assumptions():
+        print("   ", a)
+    print("goal:", cdp.goal())
      print(cdp.prove())
  
-    p1 = lexpr(r'exists x.walk(x)')
-    p2 = lexpr(r'man(Socrates)')
-    p3 = lexpr(r'-walk(Bill)')
-    c = lexpr(r'walk(Socrates)')
-    prover = Prover9Command(c, [p1,p2,p3])
+    p1 = lexpr(r"exists x.walk(x)")
+    p2 = lexpr(r"man(Socrates)")
+    p3 = lexpr(r"-walk(Bill)")
+    c = lexpr(r"walk(Socrates)")
+    prover = Prover9Command(c, [p1, p2, p3])
      print(prover.prove())
      cdp = ClosedDomainProver(prover)
-    print('assumptions:')
-    for a in cdp.assumptions(): print('   ', a)
-    print('goal:', cdp.goal())
+    print("assumptions:")
+    for a in cdp.assumptions():
+        print("   ", a)
+    print("goal:", cdp.goal())
      print(cdp.prove())
  
-    p1 = lexpr(r'walk(Socrates)')
-    p2 = lexpr(r'walk(Bill)')
-    c = lexpr(r'all x.walk(x)')
-    prover = Prover9Command(c, [p1,p2])
+    p1 = lexpr(r"walk(Socrates)")
+    p2 = lexpr(r"walk(Bill)")
+    c = lexpr(r"all x.walk(x)")
+    prover = Prover9Command(c, [p1, p2])
      print(prover.prove())
      cdp = ClosedDomainProver(prover)
-    print('assumptions:')
-    for a in cdp.assumptions(): print('   ', a)
-    print('goal:', cdp.goal())
+    print("assumptions:")
+    for a in cdp.assumptions():
+        print("   ", a)
+    print("goal:", cdp.goal())
      print(cdp.prove())
  
-    p1 = lexpr(r'girl(mary)')
-    p2 = lexpr(r'dog(rover)')
-    p3 = lexpr(r'all x.(girl(x) -> -dog(x))')
-    p4 = lexpr(r'all x.(dog(x) -> -girl(x))')
-    p5 = lexpr(r'chase(mary, rover)')
-    c = lexpr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))')
-    prover = Prover9Command(c, [p1,p2,p3,p4,p5])
+    p1 = lexpr(r"girl(mary)")
+    p2 = lexpr(r"dog(rover)")
+    p3 = lexpr(r"all x.(girl(x) -> -dog(x))")
+    p4 = lexpr(r"all x.(dog(x) -> -girl(x))")
+    p5 = lexpr(r"chase(mary, rover)")
+    c = lexpr(r"exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))")
+    prover = Prover9Command(c, [p1, p2, p3, p4, p5])
      print(prover.prove())
      cdp = ClosedDomainProver(prover)
-    print('assumptions:')
-    for a in cdp.assumptions(): print('   ', a)
-    print('goal:', cdp.goal())
+    print("assumptions:")
+    for a in cdp.assumptions():
+        print("   ", a)
+    print("goal:", cdp.goal())
      print(cdp.prove())
  
+
  def unique_names_demo():
      lexpr = Expression.fromstring
  
-    p1 = lexpr(r'man(Socrates)')
-    p2 = lexpr(r'man(Bill)')
-    c = lexpr(r'exists x.exists y.(x != y)')
-    prover = Prover9Command(c, [p1,p2])
+    p1 = lexpr(r"man(Socrates)")
+    p2 = lexpr(r"man(Bill)")
+    c = lexpr(r"exists x.exists y.(x != y)")
+    prover = Prover9Command(c, [p1, p2])
      print(prover.prove())
      unp = UniqueNamesProver(prover)
-    print('assumptions:')
-    for a in unp.assumptions(): print('   ', a)
-    print('goal:', unp.goal())
+    print("assumptions:")
+    for a in unp.assumptions():
+        print("   ", a)
+    print("goal:", unp.goal())
      print(unp.prove())
  
-    p1 = lexpr(r'all x.(walk(x) -> (x = Socrates))')
-    p2 = lexpr(r'Bill = William')
-    p3 = lexpr(r'Bill = Billy')
-    c = lexpr(r'-walk(William)')
-    prover = Prover9Command(c, [p1,p2,p3])
+    p1 = lexpr(r"all x.(walk(x) -> (x = Socrates))")
+    p2 = lexpr(r"Bill = William")
+    p3 = lexpr(r"Bill = Billy")
+    c = lexpr(r"-walk(William)")
+    prover = Prover9Command(c, [p1, p2, p3])
      print(prover.prove())
      unp = UniqueNamesProver(prover)
-    print('assumptions:')
-    for a in unp.assumptions(): print('   ', a)
-    print('goal:', unp.goal())
+    print("assumptions:")
+    for a in unp.assumptions():
+        print("   ", a)
+    print("goal:", unp.goal())
      print(unp.prove())
  
+
  def closed_world_demo():
      lexpr = Expression.fromstring
  
-    p1 = lexpr(r'walk(Socrates)')
-    p2 = lexpr(r'(Socrates != Bill)')
-    c = lexpr(r'-walk(Bill)')
-    prover = Prover9Command(c, [p1,p2])
+    p1 = lexpr(r"walk(Socrates)")
+    p2 = lexpr(r"(Socrates != Bill)")
+    c = lexpr(r"-walk(Bill)")
+    prover = Prover9Command(c, [p1, p2])
      print(prover.prove())
      cwp = ClosedWorldProver(prover)
-    print('assumptions:')
-    for a in cwp.assumptions(): print('   ', a)
-    print('goal:', cwp.goal())
+    print("assumptions:")
+    for a in cwp.assumptions():
+        print("   ", a)
+    print("goal:", cwp.goal())
      print(cwp.prove())
  
-    p1 = lexpr(r'see(Socrates, John)')
-    p2 = lexpr(r'see(John, Mary)')
-    p3 = lexpr(r'(Socrates != John)')
-    p4 = lexpr(r'(John != Mary)')
-    c = lexpr(r'-see(Socrates, Mary)')
-    prover = Prover9Command(c, [p1,p2,p3,p4])
+    p1 = lexpr(r"see(Socrates, John)")
+    p2 = lexpr(r"see(John, Mary)")
+    p3 = lexpr(r"(Socrates != John)")
+    p4 = lexpr(r"(John != Mary)")
+    c = lexpr(r"-see(Socrates, Mary)")
+    prover = Prover9Command(c, [p1, p2, p3, p4])
      print(prover.prove())
      cwp = ClosedWorldProver(prover)
-    print('assumptions:')
-    for a in cwp.assumptions(): print('   ', a)
-    print('goal:', cwp.goal())
+    print("assumptions:")
+    for a in cwp.assumptions():
+        print("   ", a)
+    print("goal:", cwp.goal())
      print(cwp.prove())
  
-    p1 = lexpr(r'all x.(ostrich(x) -> bird(x))')
-    p2 = lexpr(r'bird(Tweety)')
-    p3 = lexpr(r'-ostrich(Sam)')
-    p4 = lexpr(r'Sam != Tweety')
-    c = lexpr(r'-bird(Sam)')
-    prover = Prover9Command(c, [p1,p2,p3,p4])
+    p1 = lexpr(r"all x.(ostrich(x) -> bird(x))")
+    p2 = lexpr(r"bird(Tweety)")
+    p3 = lexpr(r"-ostrich(Sam)")
+    p4 = lexpr(r"Sam != Tweety")
+    c = lexpr(r"-bird(Sam)")
+    prover = Prover9Command(c, [p1, p2, p3, p4])
      print(prover.prove())
      cwp = ClosedWorldProver(prover)
-    print('assumptions:')
-    for a in cwp.assumptions(): print('   ', a)
-    print('goal:', cwp.goal())
+    print("assumptions:")
+    for a in cwp.assumptions():
+        print("   ", a)
+    print("goal:", cwp.goal())
      print(cwp.prove())
  
+
  def combination_prover_demo():
      lexpr = Expression.fromstring
  
-    p1 = lexpr(r'see(Socrates, John)')
-    p2 = lexpr(r'see(John, Mary)')
-    c = lexpr(r'-see(Socrates, Mary)')
-    prover = Prover9Command(c, [p1,p2])
+    p1 = lexpr(r"see(Socrates, John)")
+    p2 = lexpr(r"see(John, Mary)")
+    c = lexpr(r"-see(Socrates, Mary)")
+    prover = Prover9Command(c, [p1, p2])
      print(prover.prove())
-    command = ClosedDomainProver(
-                  UniqueNamesProver(
-                      ClosedWorldProver(prover)))
-    for a in command.assumptions(): print(a)
+    command = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover)))
+    for a in command.assumptions():
+        print(a)
      print(command.prove())
  
+
  def default_reasoning_demo():
      lexpr = Expression.fromstring
  
      premises = []
  
-    #define taxonomy
-    premises.append(lexpr(r'all x.(elephant(x)        -> animal(x))'))
-    premises.append(lexpr(r'all x.(bird(x)            -> animal(x))'))
-    premises.append(lexpr(r'all x.(dove(x)            -> bird(x))'))
-    premises.append(lexpr(r'all x.(ostrich(x)         -> bird(x))'))
-    premises.append(lexpr(r'all x.(flying_ostrich(x)  -> ostrich(x))'))
-
-    #default properties
-    premises.append(lexpr(r'all x.((animal(x)  & -Ab1(x)) -> -fly(x))')) #normal animals don't fly
-    premises.append(lexpr(r'all x.((bird(x)    & -Ab2(x)) -> fly(x))')) #normal birds fly
-    premises.append(lexpr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))')) #normal ostriches don't fly
-
-    #specify abnormal entities
-    premises.append(lexpr(r'all x.(bird(x)           -> Ab1(x))')) #flight
-    premises.append(lexpr(r'all x.(ostrich(x)        -> Ab2(x))')) #non-flying bird
-    premises.append(lexpr(r'all x.(flying_ostrich(x) -> Ab3(x))')) #flying ostrich
-
-    #define entities
-    premises.append(lexpr(r'elephant(E)'))
-    premises.append(lexpr(r'dove(D)'))
-    premises.append(lexpr(r'ostrich(O)'))
-
-    #print the assumptions
+    # define taxonomy
+    premises.append(lexpr(r"all x.(elephant(x)        -> animal(x))"))
+    premises.append(lexpr(r"all x.(bird(x)            -> animal(x))"))
+    premises.append(lexpr(r"all x.(dove(x)            -> bird(x))"))
+    premises.append(lexpr(r"all x.(ostrich(x)         -> bird(x))"))
+    premises.append(lexpr(r"all x.(flying_ostrich(x)  -> ostrich(x))"))
+
+    # default properties
+    premises.append(
+        lexpr(r"all x.((animal(x)  & -Ab1(x)) -> -fly(x))")
+    )  # normal animals don't fly
+    premises.append(
+        lexpr(r"all x.((bird(x)    & -Ab2(x)) -> fly(x))")
+    )  # normal birds fly
+    premises.append(
+        lexpr(r"all x.((ostrich(x) & -Ab3(x)) -> -fly(x))")
+    )  # normal ostriches don't fly
+
+    # specify abnormal entities
+    premises.append(lexpr(r"all x.(bird(x)           -> Ab1(x))"))  # flight
+    premises.append(lexpr(r"all x.(ostrich(x)        -> Ab2(x))"))  # non-flying bird
+    premises.append(lexpr(r"all x.(flying_ostrich(x) -> Ab3(x))"))  # flying ostrich
+
+    # define entities
+    premises.append(lexpr(r"elephant(E)"))
+    premises.append(lexpr(r"dove(D)"))
+    premises.append(lexpr(r"ostrich(O)"))
+
+    # print the assumptions
      prover = Prover9Command(None, premises)
      command = UniqueNamesProver(ClosedWorldProver(prover))
-    for a in command.assumptions(): print(a)
+    for a in command.assumptions():
+        print(a)
+
+    print_proof("-fly(E)", premises)
+    print_proof("fly(D)", premises)
+    print_proof("-fly(O)", premises)
  
-    print_proof('-fly(E)', premises)
-    print_proof('fly(D)', premises)
-    print_proof('-fly(O)', premises)
  
  def print_proof(goal, premises):
      lexpr = Expression.fromstring
@@ -498,6 +547,7 @@ def print_proof(goal, premises):
      command = UniqueNamesProver(ClosedWorldProver(prover))
      print(goal, prover.prove(), command.prove())
  
+
  def demo():
      closed_domain_demo()
      unique_names_demo()
@@ -505,5 +555,6 @@ def demo():
      combination_prover_demo()
      default_reasoning_demo()
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/inference/nonmonotonic.pyc b/nlp_resource_data/nltk/inference/nonmonotonic.pyc

deleted file mode 100755 (executable)

index bdd8449..0000000

Binary files a/nlp_resource_data/nltk/inference/nonmonotonic.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/prover9.py b/nlp_resource_data/nltk/inference/prover9.py

old mode 100755 (executable)

new mode 100644 (file)

index cfeeb1e..5a76c34
--- a/nlp_resource_data/nltk/inference/prover9.py
+++ b/nlp_resource_data/nltk/inference/prover9.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Interface to the Prover9 Theorem Prover
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #         Ewan Klein <ewan@inf.ed.ac.uk>
  #
@@ -9,15 +9,22 @@
  """
  A theorem prover that makes use of the external 'Prover9' package.
  """
-from __future__ import print_function
  
  import os
  import subprocess
  
  import nltk
-from nltk.sem.logic import Expression, ExistsExpression, AllExpression, \
-    NegatedExpression, AndExpression, IffExpression, OrExpression, \
-    EqualityExpression, ImpExpression
+from nltk.sem.logic import (
+    Expression,
+    ExistsExpression,
+    AllExpression,
+    NegatedExpression,
+    AndExpression,
+    IffExpression,
+    OrExpression,
+    EqualityExpression,
+    ImpExpression,
+)
  from nltk.inference.api import BaseProverCommand, Prover
  
  #
@@ -25,16 +32,16 @@ from nltk.inference.api import BaseProverCommand, Prover
  #
  p9_return_codes = {
      0: True,
-    1:  "(FATAL)",      #A fatal error occurred (user's syntax error).
-    2: False,           # (SOS_EMPTY) Prover9 ran out of things to do
-                        #   (sos list exhausted).
-    3: "(MAX_MEGS)",    # The max_megs (memory limit) parameter was exceeded.
-    4: "(MAX_SECONDS)", # The max_seconds parameter was exceeded.
-    5: "(MAX_GIVEN)",   # The max_given parameter was exceeded.
-    6: "(MAX_KEPT)",    # The max_kept parameter was exceeded.
-    7: "(ACTION)",      # A Prover9 action terminated the search.
-    101: "(SIGSEGV)",   # Prover9 crashed, most probably due to a bug.
- }
+    1: "(FATAL)",  # A fatal error occurred (user's syntax error).
+    2: False,  # (SOS_EMPTY) Prover9 ran out of things to do
+    #   (sos list exhausted).
+    3: "(MAX_MEGS)",  # The max_megs (memory limit) parameter was exceeded.
+    4: "(MAX_SECONDS)",  # The max_seconds parameter was exceeded.
+    5: "(MAX_GIVEN)",  # The max_given parameter was exceeded.
+    6: "(MAX_KEPT)",  # The max_kept parameter was exceeded.
+    7: "(ACTION)",  # A Prover9 action terminated the search.
+    101: "(SIGSEGV)",  # Prover9 crashed, most probably due to a bug.
+}
  
  
  class Prover9CommandParent(object):
@@ -43,19 +50,22 @@ class Prover9CommandParent(object):
      which is responsible for maintaining a goal and a set of assumptions,
      and generating prover9-style input files from them.
      """
-    def print_assumptions(self, output_format='nltk'):
+
+    def print_assumptions(self, output_format="nltk"):
          """
          Print the list of the current assumptions.
          """
-        if output_format.lower() == 'nltk':
+        if output_format.lower() == "nltk":
              for a in self.assumptions():
                  print(a)
-        elif output_format.lower() == 'prover9':
+        elif output_format.lower() == "prover9":
              for a in convert_to_prover9(self.assumptions()):
                  print(a)
          else:
-            raise NameError("Unrecognized value for 'output_format': %s" %
-                            output_format)
+            raise NameError(
+                "Unrecognized value for 'output_format': %s" % output_format
+            )
+
  
  class Prover9Command(Prover9CommandParent, BaseProverCommand):
      """
@@ -63,6 +73,7 @@ class Prover9Command(Prover9CommandParent, BaseProverCommand):
      the a print_assumptions() method that is used to print the list
      of assumptions in multiple formats.
      """
+
      def __init__(self, goal=None, assumptions=None, timeout=60, prover=None):
          """
          :param goal: Input expression to prove
@@ -91,7 +102,9 @@ class Prover9Command(Prover9CommandParent, BaseProverCommand):
          :see BaseProverCommand.decorate_proof()
          """
          if simplify:
-            return self._prover._call_prooftrans(proof_string, ['striplabels'])[0].rstrip()
+            return self._prover._call_prooftrans(proof_string, ["striplabels"])[
+                0
+            ].rstrip()
          else:
              return proof_string.rstrip()
  
@@ -110,14 +123,15 @@ class Prover9Parent(object):
              self._binary_location = None
              self._prover9_bin = None
          else:
-            name = 'prover9'
+            name = "prover9"
              self._prover9_bin = nltk.internals.find_binary(
-                                  name,
-                                  path_to_bin=binary_location,
-                                  env_vars=['PROVER9'],
-                                  url='http://www.cs.unm.edu/~mccune/prover9/',
-                                  binary_names=[name, name + '.exe'],
-                                  verbose=verbose)
+                name,
+                path_to_bin=binary_location,
+                env_vars=["PROVER9"],
+                url="http://www.cs.unm.edu/~mccune/prover9/",
+                binary_names=[name, name + ".exe"],
+                verbose=verbose,
+            )
              self._binary_location = self._prover9_bin.rsplit(os.path.sep, 1)
  
      def prover9_input(self, goal, assumptions):
@@ -126,18 +140,18 @@ class Prover9Parent(object):
          prover9 binary.  This string is formed based on the goal,
          assumptions, and timeout value of this object.
          """
-        s = ''
+        s = ""
  
          if assumptions:
-            s += 'formulas(assumptions).\n'
+            s += "formulas(assumptions).\n"
              for p9_assumption in convert_to_prover9(assumptions):
-                s += '    %s.\n' % p9_assumption
-            s += 'end_of_list.\n\n'
+                s += "    %s.\n" % p9_assumption
+            s += "end_of_list.\n\n"
  
          if goal:
-            s += 'formulas(goals).\n'
-            s += '    %s.\n' % convert_to_prover9(goal)
-            s += 'end_of_list.\n\n'
+            s += "formulas(goals).\n"
+            s += "    %s.\n" % convert_to_prover9(goal)
+            s += "end_of_list.\n\n"
  
          return s
  
@@ -147,23 +161,27 @@ class Prover9Parent(object):
          executables.  This list is used by ``config_prover9`` when searching
          for the prover9 executables.
          """
-        return ['/usr/local/bin/prover9',
-                '/usr/local/bin/prover9/bin',
-                '/usr/local/bin',
-                '/usr/bin',
-                '/usr/local/prover9',
-                '/usr/local/share/prover9']
+        return [
+            "/usr/local/bin/prover9",
+            "/usr/local/bin/prover9/bin",
+            "/usr/local/bin",
+            "/usr/bin",
+            "/usr/local/prover9",
+            "/usr/local/share/prover9",
+        ]
  
      def _find_binary(self, name, verbose=False):
          binary_locations = self.binary_locations()
          if self._binary_location is not None:
              binary_locations += [self._binary_location]
-        return nltk.internals.find_binary(name,
+        return nltk.internals.find_binary(
+            name,
              searchpath=binary_locations,
-            env_vars=['PROVER9'],
-            url='http://www.cs.unm.edu/~mccune/prover9/',
-            binary_names=[name, name + '.exe'],
-            verbose=verbose)
+            env_vars=["PROVER9"],
+            url="http://www.cs.unm.edu/~mccune/prover9/",
+            binary_names=[name, name + ".exe"],
+            verbose=verbose,
+        )
  
      def _call(self, input_str, binary, args=[], verbose=False):
          """
@@ -176,9 +194,9 @@ class Prover9Parent(object):
          :see: ``config_prover9``
          """
          if verbose:
-            print('Calling:', binary)
-            print('Args:', args)
-            print('Input:\n', input_str, '\n')
+            print("Calling:", binary)
+            print("Args:", args)
+            print("Input:\n", input_str, "\n")
  
          # Call prover9 via a subprocess
          cmd = [binary] + args
@@ -186,15 +204,17 @@ class Prover9Parent(object):
              input_str = input_str.encode("utf8")
          except AttributeError:
              pass
-        p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                             stderr=subprocess.STDOUT,
-                             stdin=subprocess.PIPE)
+        p = subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE
+        )
          (stdout, stderr) = p.communicate(input=input_str)
  
          if verbose:
-            print('Return code:', p.returncode)
-            if stdout: print('stdout:\n', stdout, '\n')
-            if stderr: print('stderr:\n', stderr, '\n')
+            print("Return code:", p.returncode)
+            if stdout:
+                print("stdout:\n", stdout, "\n")
+            if stderr:
+                print("stderr:\n", stderr, "\n")
  
          return (stdout.decode("utf-8"), p.returncode)
  
@@ -209,41 +229,77 @@ def convert_to_prover9(input):
              try:
                  result.append(_convert_to_prover9(s.simplify()))
              except:
-                print('input %s cannot be converted to Prover9 input syntax' % input)
+                print("input %s cannot be converted to Prover9 input syntax" % input)
                  raise
          return result
      else:
          try:
              return _convert_to_prover9(input.simplify())
          except:
-            print('input %s cannot be converted to Prover9 input syntax' % input)
+            print("input %s cannot be converted to Prover9 input syntax" % input)
              raise
  
+
  def _convert_to_prover9(expression):
      """
      Convert ``logic.Expression`` to Prover9 formatted string.
      """
      if isinstance(expression, ExistsExpression):
-        return 'exists ' + str(expression.variable) + ' ' + _convert_to_prover9(expression.term)
+        return (
+            "exists "
+            + str(expression.variable)
+            + " "
+            + _convert_to_prover9(expression.term)
+        )
      elif isinstance(expression, AllExpression):
-        return 'all ' + str(expression.variable) + ' ' + _convert_to_prover9(expression.term)
+        return (
+            "all "
+            + str(expression.variable)
+            + " "
+            + _convert_to_prover9(expression.term)
+        )
      elif isinstance(expression, NegatedExpression):
-        return '-(' + _convert_to_prover9(expression.term) + ')'
+        return "-(" + _convert_to_prover9(expression.term) + ")"
      elif isinstance(expression, AndExpression):
-        return '(' + _convert_to_prover9(expression.first) + ' & ' + \
-                     _convert_to_prover9(expression.second) + ')'
+        return (
+            "("
+            + _convert_to_prover9(expression.first)
+            + " & "
+            + _convert_to_prover9(expression.second)
+            + ")"
+        )
      elif isinstance(expression, OrExpression):
-        return '(' + _convert_to_prover9(expression.first) + ' | ' + \
-                     _convert_to_prover9(expression.second) + ')'
+        return (
+            "("
+            + _convert_to_prover9(expression.first)
+            + " | "
+            + _convert_to_prover9(expression.second)
+            + ")"
+        )
      elif isinstance(expression, ImpExpression):
-        return '(' + _convert_to_prover9(expression.first) + ' -> ' + \
-                     _convert_to_prover9(expression.second) + ')'
+        return (
+            "("
+            + _convert_to_prover9(expression.first)
+            + " -> "
+            + _convert_to_prover9(expression.second)
+            + ")"
+        )
      elif isinstance(expression, IffExpression):
-        return '(' + _convert_to_prover9(expression.first) + ' <-> ' + \
-                     _convert_to_prover9(expression.second) + ')'
+        return (
+            "("
+            + _convert_to_prover9(expression.first)
+            + " <-> "
+            + _convert_to_prover9(expression.second)
+            + ")"
+        )
      elif isinstance(expression, EqualityExpression):
-        return '(' + _convert_to_prover9(expression.first) + ' = ' + \
-                     _convert_to_prover9(expression.second) + ')'
+        return (
+            "("
+            + _convert_to_prover9(expression.first)
+            + " = "
+            + _convert_to_prover9(expression.second)
+            + ")"
+        )
      else:
          return str(expression)
  
@@ -268,15 +324,16 @@ class Prover9(Prover9Parent, Prover):
          if not assumptions:
              assumptions = []
  
-        stdout, returncode = self._call_prover9(self.prover9_input(goal, assumptions),
-                                                verbose=verbose)
+        stdout, returncode = self._call_prover9(
+            self.prover9_input(goal, assumptions), verbose=verbose
+        )
          return (returncode == 0, stdout)
  
      def prover9_input(self, goal, assumptions):
          """
          :see: Prover9Parent.prover9_input
          """
-        s = 'clear(auto_denials).\n' #only one proof required
+        s = "clear(auto_denials).\n"  # only one proof required
          return s + Prover9Parent.prover9_input(self, goal, assumptions)
  
      def _call_prover9(self, input_str, args=[], verbose=False):
@@ -289,23 +346,25 @@ class Prover9(Prover9Parent, Prover):
          :see: ``config_prover9``
          """
          if self._prover9_bin is None:
-            self._prover9_bin = self._find_binary('prover9', verbose)
+            self._prover9_bin = self._find_binary("prover9", verbose)
  
-        updated_input_str = ''
+        updated_input_str = ""
          if self._timeout > 0:
-            updated_input_str += 'assign(max_seconds, %d).\n\n' % self._timeout
+            updated_input_str += "assign(max_seconds, %d).\n\n" % self._timeout
          updated_input_str += input_str
  
-        stdout, returncode = self._call(updated_input_str, self._prover9_bin, args, verbose)
+        stdout, returncode = self._call(
+            updated_input_str, self._prover9_bin, args, verbose
+        )
  
-        if returncode not in [0,2]:
-            errormsgprefix = '%%ERROR:'
+        if returncode not in [0, 2]:
+            errormsgprefix = "%%ERROR:"
              if errormsgprefix in stdout:
                  msgstart = stdout.index(errormsgprefix)
                  errormsg = stdout[msgstart:].strip()
              else:
                  errormsg = None
-            if returncode in [3,4,5,6]:
+            if returncode in [3, 4, 5, 6]:
                  raise Prover9LimitExceededException(returncode, errormsg)
              else:
                  raise Prover9FatalException(returncode, errormsg)
@@ -322,7 +381,7 @@ class Prover9(Prover9Parent, Prover):
          :see: ``config_prover9``
          """
          if self._prooftrans_bin is None:
-            self._prooftrans_bin = self._find_binary('prooftrans', verbose)
+            self._prooftrans_bin = self._find_binary("prooftrans", verbose)
  
          return self._call(input_str, self._prooftrans_bin, args, verbose)
  
@@ -331,33 +390,36 @@ class Prover9Exception(Exception):
      def __init__(self, returncode, message):
          msg = p9_return_codes[returncode]
          if message:
-            msg += '\n%s' % message
+            msg += "\n%s" % message
          Exception.__init__(self, msg)
  
+
  class Prover9FatalException(Prover9Exception):
      pass
  
+
  class Prover9LimitExceededException(Prover9Exception):
      pass
  
  
-
  ######################################################################
-#{ Tests and Demos
+# { Tests and Demos
  ######################################################################
  
+
  def test_config():
  
-    a = Expression.fromstring('(walk(j) & sing(j))')
-    g = Expression.fromstring('walk(j)')
+    a = Expression.fromstring("(walk(j) & sing(j))")
+    g = Expression.fromstring("walk(j)")
      p = Prover9Command(g, assumptions=[a])
      p._executable_path = None
-    p.prover9_search=[]
+    p.prover9_search = []
      p.prove()
-    #config_prover9('/usr/local/bin')
+    # config_prover9('/usr/local/bin')
      print(p.prove())
      print(p.proof())
  
+
  def test_convert_to_prover9(expr):
      """
      Test that parsing works OK.
@@ -366,6 +428,7 @@ def test_convert_to_prover9(expr):
          e = Expression.fromstring(t)
          print(convert_to_prover9(e))
  
+
  def test_prove(arguments):
      """
      Try some proofs and exhibit the results.
@@ -375,44 +438,57 @@ def test_prove(arguments):
          alist = [Expression.fromstring(a) for a in assumptions]
          p = Prover9Command(g, assumptions=alist).prove()
          for a in alist:
-            print('   %s' % a)
-        print('|- %s: %s\n' % (g, p))
+            print("   %s" % a)
+        print("|- %s: %s\n" % (g, p))
+
  
  arguments = [
-    ('(man(x) <-> (not (not man(x))))', []),
-    ('(not (man(x) & (not man(x))))', []),
-    ('(man(x) | (not man(x)))', []),
-    ('(man(x) & (not man(x)))', []),
-    ('(man(x) -> man(x))', []),
-    ('(not (man(x) & (not man(x))))', []),
-    ('(man(x) | (not man(x)))', []),
-    ('(man(x) -> man(x))', []),
-    ('(man(x) <-> man(x))', []),
-    ('(not (man(x) <-> (not man(x))))', []),
-    ('mortal(Socrates)', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']),
-    ('((all x.(man(x) -> walks(x)) & man(Socrates)) -> some y.walks(y))', []),
-    ('(all x.man(x) -> all x.man(x))', []),
-    ('some x.all y.sees(x,y)', []),
-    ('some e3.(walk(e3) & subj(e3, mary))',
-        ['some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))']),
-    ('some x e1.(see(e1) & subj(e1, x) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))',
-       ['some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))'])
+    ("(man(x) <-> (not (not man(x))))", []),
+    ("(not (man(x) & (not man(x))))", []),
+    ("(man(x) | (not man(x)))", []),
+    ("(man(x) & (not man(x)))", []),
+    ("(man(x) -> man(x))", []),
+    ("(not (man(x) & (not man(x))))", []),
+    ("(man(x) | (not man(x)))", []),
+    ("(man(x) -> man(x))", []),
+    ("(man(x) <-> man(x))", []),
+    ("(not (man(x) <-> (not man(x))))", []),
+    ("mortal(Socrates)", ["all x.(man(x) -> mortal(x))", "man(Socrates)"]),
+    ("((all x.(man(x) -> walks(x)) & man(Socrates)) -> some y.walks(y))", []),
+    ("(all x.man(x) -> all x.man(x))", []),
+    ("some x.all y.sees(x,y)", []),
+    (
+        "some e3.(walk(e3) & subj(e3, mary))",
+        [
+            "some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))"
+        ],
+    ),
+    (
+        "some x e1.(see(e1) & subj(e1, x) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))",
+        [
+            "some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))"
+        ],
+    ),
+]
+
+expressions = [
+    r"some x y.sees(x,y)",
+    r"some x.(man(x) & walks(x))",
+    r"\x.(man(x) & walks(x))",
+    r"\x y.sees(x,y)",
+    r"walks(john)",
+    r"\x.big(x, \y.mouse(y))",
+    r"(walks(x) & (runs(x) & (threes(x) & fours(x))))",
+    r"(walks(x) -> runs(x))",
+    r"some x.(PRO(x) & sees(John, x))",
+    r"some x.(man(x) & (not walks(x)))",
+    r"all x.(man(x) -> walks(x))",
  ]
  
-expressions = [r'some x y.sees(x,y)',
-               r'some x.(man(x) & walks(x))',
-               r'\x.(man(x) & walks(x))',
-               r'\x y.sees(x,y)',
-               r'walks(john)',
-               r'\x.big(x, \y.mouse(y))',
-               r'(walks(x) & (runs(x) & (threes(x) & fours(x))))',
-               r'(walks(x) -> runs(x))',
-               r'some x.(PRO(x) & sees(John, x))',
-               r'some x.(man(x) & (not walks(x)))',
-               r'all x.(man(x) -> walks(x))']
  
  def spacer(num=45):
-    print('-' * num)
+    print("-" * num)
+
  
  def demo():
      print("Testing configuration")
@@ -427,5 +503,6 @@ def demo():
      spacer()
      test_prove(arguments)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/inference/prover9.pyc b/nlp_resource_data/nltk/inference/prover9.pyc

deleted file mode 100755 (executable)

index 4916a77..0000000

Binary files a/nlp_resource_data/nltk/inference/prover9.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/resolution.py b/nlp_resource_data/nltk/inference/resolution.py

old mode 100755 (executable)

new mode 100644 (file)

index eb38d73..df19776
--- a/nlp_resource_data/nltk/inference/resolution.py
+++ b/nlp_resource_data/nltk/inference/resolution.py
@@ -2,34 +2,44 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
  
  """
  Module for a resolution-based First Order theorem prover.
  """
-from __future__ import print_function, unicode_literals
  
  import operator
  from collections import defaultdict
  from functools import reduce
  
  from nltk.sem import skolemize
-from nltk.sem.logic import (VariableExpression, EqualityExpression,
-                            ApplicationExpression, Expression,
-                            NegatedExpression, Variable,
-                            AndExpression, unique_variable, OrExpression,
-                            is_indvar, IndividualVariableExpression, Expression)
+from nltk.sem.logic import (
+    VariableExpression,
+    EqualityExpression,
+    ApplicationExpression,
+    Expression,
+    NegatedExpression,
+    Variable,
+    AndExpression,
+    unique_variable,
+    OrExpression,
+    is_indvar,
+    IndividualVariableExpression,
+    Expression,
+)
  
  from nltk.inference.api import Prover, BaseProverCommand
-from nltk.compat import python_2_unicode_compatible
  
-class ProverParseError(Exception): pass
+
+class ProverParseError(Exception):
+    pass
+
  
  class ResolutionProver(Prover):
-    ANSWER_KEY = 'ANSWER'
-    _assume_false=True
+    ANSWER_KEY = "ANSWER"
+    _assume_false = True
  
      def _prove(self, goal=None, assumptions=None, verbose=False):
          """
@@ -52,7 +62,9 @@ class ResolutionProver(Prover):
              if verbose:
                  print(ResolutionProverCommand._decorate_clauses(clauses))
          except RuntimeError as e:
-            if self._assume_false and str(e).startswith('maximum recursion depth exceeded'):
+            if self._assume_false and str(e).startswith(
+                "maximum recursion depth exceeded"
+            ):
                  result = False
                  clauses = []
              else:
@@ -63,37 +75,38 @@ class ResolutionProver(Prover):
          return (result, clauses)
  
      def _attempt_proof(self, clauses):
-        #map indices to lists of indices, to store attempted unifications
+        # map indices to lists of indices, to store attempted unifications
          tried = defaultdict(list)
  
          i = 0
          while i < len(clauses):
              if not clauses[i].is_tautology():
-                #since we try clauses in order, we should start after the last
-                #index tried
+                # since we try clauses in order, we should start after the last
+                # index tried
                  if tried[i]:
                      j = tried[i][-1] + 1
                  else:
-                    j = i + 1 #nothing tried yet for 'i', so start with the next
+                    j = i + 1  # nothing tried yet for 'i', so start with the next
  
                  while j < len(clauses):
-                    #don't: 1) unify a clause with itself,
+                    # don't: 1) unify a clause with itself,
                      #       2) use tautologies
                      if i != j and j and not clauses[j].is_tautology():
                          tried[i].append(j)
                          newclauses = clauses[i].unify(clauses[j])
                          if newclauses:
                              for newclause in newclauses:
-                                newclause._parents = (i+1, j+1)
+                                newclause._parents = (i + 1, j + 1)
                                  clauses.append(newclause)
-                                if not len(newclause): #if there's an empty clause
+                                if not len(newclause):  # if there's an empty clause
                                      return (True, clauses)
-                            i=-1 #since we added a new clause, restart from the top
+                            i = -1  # since we added a new clause, restart from the top
                              break
                      j += 1
              i += 1
          return (False, clauses)
  
+
  class ResolutionProverCommand(BaseProverCommand):
      def __init__(self, goal=None, assumptions=None, prover=None):
          """
@@ -117,9 +130,9 @@ class ResolutionProverCommand(BaseProverCommand):
          re-proving.
          """
          if self._result is None:
-            self._result, clauses = self._prover._prove(self.goal(),
-                                                        self.assumptions(),
-                                                        verbose)
+            self._result, clauses = self._prover._prove(
+                self.goal(), self.assumptions(), verbose
+            )
              self._clauses = clauses
              self._proof = ResolutionProverCommand._decorate_clauses(clauses)
          return self._result
@@ -131,9 +144,11 @@ class ResolutionProverCommand(BaseProverCommand):
          answer_ex = VariableExpression(Variable(ResolutionProver.ANSWER_KEY))
          for clause in self._clauses:
              for term in clause:
-                if isinstance(term, ApplicationExpression) and\
-                   term.function == answer_ex and\
-                   not isinstance(term.argument, IndividualVariableExpression):
+                if (
+                    isinstance(term, ApplicationExpression)
+                    and term.function == answer_ex
+                    and not isinstance(term.argument, IndividualVariableExpression)
+                ):
                      answers.add(term.argument)
          return answers
  
@@ -142,22 +157,22 @@ class ResolutionProverCommand(BaseProverCommand):
          """
          Decorate the proof output.
          """
-        out = ''
+        out = ""
          max_clause_len = max([len(str(clause)) for clause in clauses])
          max_seq_len = len(str(len(clauses)))
          for i in range(len(clauses)):
-            parents = 'A'
-            taut = ''
+            parents = "A"
+            taut = ""
              if clauses[i].is_tautology():
-                taut = 'Tautology'
+                taut = "Tautology"
              if clauses[i]._parents:
                  parents = str(clauses[i]._parents)
-            parents = ' '*(max_clause_len-len(str(clauses[i]))+1) + parents
-            seq = ' '*(max_seq_len-len(str(i+1))) + str(i+1)
-            out += '[%s] %s %s %s\n' % (seq, clauses[i], parents, taut)
+            parents = " " * (max_clause_len - len(str(clauses[i])) + 1) + parents
+            seq = " " * (max_seq_len - len(str(i + 1))) + str(i + 1)
+            out += "[%s] %s %s %s\n" % (seq, clauses[i], parents, taut)
          return out
  
-@python_2_unicode_compatible
+
  class Clause(list):
      def __init__(self, data):
          list.__init__(self, data)
@@ -183,20 +198,26 @@ class Clause(list):
          :return: list containing all the resulting ``Clause`` objects that could be
          obtained by unification
          """
-        if bindings is None: bindings = BindingDict()
-        if used is None: used = ([],[])
-        if skipped is None: skipped = ([],[])
-        if isinstance(debug, bool): debug = DebugObject(debug)
-
-        newclauses = _iterate_first(self, other, bindings, used, skipped, _complete_unify_path, debug)
-
-        #remove subsumed clauses.  make a list of all indices of subsumed
-        #clauses, and then remove them from the list
+        if bindings is None:
+            bindings = BindingDict()
+        if used is None:
+            used = ([], [])
+        if skipped is None:
+            skipped = ([], [])
+        if isinstance(debug, bool):
+            debug = DebugObject(debug)
+
+        newclauses = _iterate_first(
+            self, other, bindings, used, skipped, _complete_unify_path, debug
+        )
+
+        # remove subsumed clauses.  make a list of all indices of subsumed
+        # clauses, and then remove them from the list
          subsumed = []
          for i, c1 in enumerate(newclauses):
              if i not in subsumed:
                  for j, c2 in enumerate(newclauses):
-                    if i!=j and j not in subsumed and c1.subsumes(c2):
+                    if i != j and j not in subsumed and c1.subsumes(c2):
                          subsumed.append(j)
          result = []
          for i in range(len(newclauses)):
@@ -236,13 +257,24 @@ class Clause(list):
          negatedotherClause = Clause(negatedother)
  
          bindings = BindingDict()
-        used = ([],[])
-        skipped = ([],[])
+        used = ([], [])
+        skipped = ([], [])
          debug = DebugObject(False)
  
-        return len(_iterate_first(self, negatedotherClause, bindings, used,
-                                      skipped, _subsumes_finalize,
-                                      debug)) > 0
+        return (
+            len(
+                _iterate_first(
+                    self,
+                    negatedotherClause,
+                    bindings,
+                    used,
+                    skipped,
+                    _subsumes_finalize,
+                    debug,
+                )
+            )
+            > 0
+        )
  
      def __getslice__(self, start, end):
          return Clause(list.__getslice__(self, start, end))
@@ -260,9 +292,9 @@ class Clause(list):
          """
          if self._is_tautology is not None:
              return self._is_tautology
-        for i,a in enumerate(self):
+        for i, a in enumerate(self):
              if not isinstance(a, EqualityExpression):
-                j = len(self)-1
+                j = len(self) - 1
                  while j > i:
                      b = self[j]
                      if isinstance(a, NegatedExpression):
@@ -301,66 +333,95 @@ class Clause(list):
          return Clause([atom.substitute_bindings(bindings) for atom in self])
  
      def __str__(self):
-        return '{' + ', '.join("%s" % item for item in self) + '}'
+        return "{" + ", ".join("%s" % item for item in self) + "}"
  
      def __repr__(self):
          return "%s" % self
  
+
  def _iterate_first(first, second, bindings, used, skipped, finalize_method, debug):
      """
      This method facilitates movement through the terms of 'self'
      """
-    debug.line('unify(%s,%s) %s'%(first, second, bindings))
+    debug.line("unify(%s,%s) %s" % (first, second, bindings))
  
-    if not len(first) or not len(second): #if no more recursions can be performed
+    if not len(first) or not len(second):  # if no more recursions can be performed
          return finalize_method(first, second, bindings, used, skipped, debug)
      else:
-        #explore this 'self' atom
-        result = _iterate_second(first, second, bindings, used, skipped, finalize_method, debug+1)
+        # explore this 'self' atom
+        result = _iterate_second(
+            first, second, bindings, used, skipped, finalize_method, debug + 1
+        )
  
-        #skip this possible 'self' atom
-        newskipped = (skipped[0]+[first[0]], skipped[1])
-        result += _iterate_first(first[1:], second, bindings, used, newskipped, finalize_method, debug+1)
+        # skip this possible 'self' atom
+        newskipped = (skipped[0] + [first[0]], skipped[1])
+        result += _iterate_first(
+            first[1:], second, bindings, used, newskipped, finalize_method, debug + 1
+        )
  
          try:
-            newbindings, newused, unused = _unify_terms(first[0], second[0], bindings, used)
-            #Unification found, so progress with this line of unification
-            #put skipped and unused terms back into play for later unification.
+            newbindings, newused, unused = _unify_terms(
+                first[0], second[0], bindings, used
+            )
+            # Unification found, so progress with this line of unification
+            # put skipped and unused terms back into play for later unification.
              newfirst = first[1:] + skipped[0] + unused[0]
              newsecond = second[1:] + skipped[1] + unused[1]
-            result += _iterate_first(newfirst, newsecond, newbindings, newused, ([],[]), finalize_method, debug+1)
+            result += _iterate_first(
+                newfirst,
+                newsecond,
+                newbindings,
+                newused,
+                ([], []),
+                finalize_method,
+                debug + 1,
+            )
          except BindingException:
-            #the atoms could not be unified,
+            # the atoms could not be unified,
              pass
  
          return result
  
+
  def _iterate_second(first, second, bindings, used, skipped, finalize_method, debug):
      """
      This method facilitates movement through the terms of 'other'
      """
-    debug.line('unify(%s,%s) %s'%(first, second, bindings))
+    debug.line("unify(%s,%s) %s" % (first, second, bindings))
  
-    if not len(first) or not len(second): #if no more recursions can be performed
+    if not len(first) or not len(second):  # if no more recursions can be performed
          return finalize_method(first, second, bindings, used, skipped, debug)
      else:
-        #skip this possible pairing and move to the next
-        newskipped = (skipped[0], skipped[1]+[second[0]])
-        result = _iterate_second(first, second[1:], bindings, used, newskipped, finalize_method, debug+1)
+        # skip this possible pairing and move to the next
+        newskipped = (skipped[0], skipped[1] + [second[0]])
+        result = _iterate_second(
+            first, second[1:], bindings, used, newskipped, finalize_method, debug + 1
+        )
  
          try:
-            newbindings, newused, unused = _unify_terms(first[0], second[0], bindings, used)
-            #Unification found, so progress with this line of unification
-            #put skipped and unused terms back into play for later unification.
+            newbindings, newused, unused = _unify_terms(
+                first[0], second[0], bindings, used
+            )
+            # Unification found, so progress with this line of unification
+            # put skipped and unused terms back into play for later unification.
              newfirst = first[1:] + skipped[0] + unused[0]
              newsecond = second[1:] + skipped[1] + unused[1]
-            result += _iterate_second(newfirst, newsecond, newbindings, newused, ([],[]), finalize_method, debug+1)
+            result += _iterate_second(
+                newfirst,
+                newsecond,
+                newbindings,
+                newused,
+                ([], []),
+                finalize_method,
+                debug + 1,
+            )
          except BindingException:
-            #the atoms could not be unified,
+            # the atoms could not be unified,
              pass
  
          return result
  
+
  def _unify_terms(a, b, bindings=None, used=None):
      """
      This method attempts to unify two terms.  Two expressions are unifiable
@@ -376,54 +437,59 @@ def _unify_terms(a, b, bindings=None, used=None):
      assert isinstance(a, Expression)
      assert isinstance(b, Expression)
  
-    if bindings is None: bindings = BindingDict()
-    if used is None: used = ([],[])
+    if bindings is None:
+        bindings = BindingDict()
+    if used is None:
+        used = ([], [])
  
      # Use resolution
      if isinstance(a, NegatedExpression) and isinstance(b, ApplicationExpression):
          newbindings = most_general_unification(a.term, b, bindings)
-        newused = (used[0]+[a], used[1]+[b])
-        unused = ([],[])
+        newused = (used[0] + [a], used[1] + [b])
+        unused = ([], [])
      elif isinstance(a, ApplicationExpression) and isinstance(b, NegatedExpression):
          newbindings = most_general_unification(a, b.term, bindings)
-        newused = (used[0]+[a], used[1]+[b])
-        unused = ([],[])
+        newused = (used[0] + [a], used[1] + [b])
+        unused = ([], [])
  
      # Use demodulation
      elif isinstance(a, EqualityExpression):
          newbindings = BindingDict([(a.first.variable, a.second)])
-        newused = (used[0]+[a], used[1])
-        unused = ([],[b])
+        newused = (used[0] + [a], used[1])
+        unused = ([], [b])
      elif isinstance(b, EqualityExpression):
          newbindings = BindingDict([(b.first.variable, b.second)])
-        newused = (used[0], used[1]+[b])
-        unused = ([a],[])
+        newused = (used[0], used[1] + [b])
+        unused = ([a], [])
  
      else:
          raise BindingException((a, b))
  
      return newbindings, newused, unused
  
+
  def _complete_unify_path(first, second, bindings, used, skipped, debug):
-    if used[0] or used[1]: #if bindings were made along the path
+    if used[0] or used[1]:  # if bindings were made along the path
          newclause = Clause(skipped[0] + skipped[1] + first + second)
-        debug.line('  -> New Clause: %s' % newclause)
+        debug.line("  -> New Clause: %s" % newclause)
          return [newclause.substitute_bindings(bindings)]
-    else: #no bindings made means no unification occurred.  so no result
-        debug.line('  -> End')
+    else:  # no bindings made means no unification occurred.  so no result
+        debug.line("  -> End")
          return []
  
+
  def _subsumes_finalize(first, second, bindings, used, skipped, debug):
      if not len(skipped[0]) and not len(first):
-        #If there are no skipped terms and no terms left in 'first', then
-        #all of the terms in the original 'self' were unified with terms
-        #in 'other'.  Therefore, there exists a binding (this one) such that
-        #every term in self can be unified with a term in other, which
-        #is the definition of subsumption.
+        # If there are no skipped terms and no terms left in 'first', then
+        # all of the terms in the original 'self' were unified with terms
+        # in 'other'.  Therefore, there exists a binding (this one) such that
+        # every term in self can be unified with a term in other, which
+        # is the definition of subsumption.
          return [True]
      else:
          return []
  
+
  def clausify(expression):
      """
      Skolemize, clausify, and standardize the variables apart.
@@ -437,6 +503,7 @@ def clausify(expression):
          clause_list.append(clause)
      return clause_list
  
+
  def _clausify(expression):
      """
      :param expression: a skolemized expression in CNF
@@ -461,7 +528,6 @@ def _clausify(expression):
      raise ProverParseError()
  
  
-@python_2_unicode_compatible
  class BindingDict(object):
      def __init__(self, binding_list=None):
          """
@@ -504,11 +570,13 @@ class BindingDict(object):
              if not existing or binding2 == existing:
                  self.d[binding.variable] = binding2
              else:
-                raise BindingException('Variable %s already bound to another '
-                                       'value' % (variable))
+                raise BindingException(
+                    "Variable %s already bound to another " "value" % (variable)
+                )
          else:
-            raise BindingException('Variable %s already bound to another '
-                                   'value' % (variable))
+            raise BindingException(
+                "Variable %s already bound to another " "value" % (variable)
+            )
  
      def __getitem__(self, variable):
          """
@@ -540,16 +608,17 @@ class BindingDict(object):
                  combined[v] = other.d[v]
              return combined
          except BindingException:
-            raise BindingException("Attempting to add two contradicting "
-                                   "BindingDicts: '%s' and '%s'"
-                                   % (self, other))
+            raise BindingException(
+                "Attempting to add two contradicting "
+                "BindingDicts: '%s' and '%s'" % (self, other)
+            )
  
      def __len__(self):
          return len(self.d)
  
      def __str__(self):
-        data_str = ', '.join('%s: %s' % (v, self.d[v]) for v in sorted(self.d.keys()))
-        return '{' + data_str + '}'
+        data_str = ", ".join("%s: %s" % (v, self.d[v]) for v in sorted(self.d.keys()))
+        return "{" + data_str + "}"
  
      def __repr__(self):
          return "%s" % self
@@ -575,14 +644,15 @@ def most_general_unification(a, b, bindings=None):
          return _mgu_var(a, b, bindings)
      elif isinstance(b, IndividualVariableExpression):
          return _mgu_var(b, a, bindings)
-    elif isinstance(a, ApplicationExpression) and\
-         isinstance(b, ApplicationExpression):
-        return most_general_unification(a.function, b.function, bindings) +\
-               most_general_unification(a.argument, b.argument, bindings)
+    elif isinstance(a, ApplicationExpression) and isinstance(b, ApplicationExpression):
+        return most_general_unification(
+            a.function, b.function, bindings
+        ) + most_general_unification(a.argument, b.argument, bindings)
      raise BindingException((a, b))
  
+
  def _mgu_var(var, expression, bindings):
-    if var.variable in expression.free()|expression.constants():
+    if var.variable in expression.free() | expression.constants():
          raise BindingException((var, expression))
      else:
          return BindingDict([(var.variable, expression)]) + bindings
@@ -595,9 +665,10 @@ class BindingException(Exception):
          else:
              Exception.__init__(self, arg)
  
+
  class UnificationException(Exception):
      def __init__(self, a, b):
-        Exception.__init__(self, "'%s' cannot unify with '%s'" % (a,b))
+        Exception.__init__(self, "'%s' cannot unify with '%s'" % (a, b))
  
  
  class DebugObject(object):
@@ -606,72 +677,74 @@ class DebugObject(object):
          self.indent = indent
  
      def __add__(self, i):
-        return DebugObject(self.enabled, self.indent+i)
+        return DebugObject(self.enabled, self.indent + i)
  
      def line(self, line):
          if self.enabled:
-            print('    '*self.indent + line)
+            print("    " * self.indent + line)
  
  
  def testResolutionProver():
-    resolution_test(r'man(x)')
-    resolution_test(r'(man(x) -> man(x))')
-    resolution_test(r'(man(x) -> --man(x))')
-    resolution_test(r'-(man(x) and -man(x))')
-    resolution_test(r'(man(x) or -man(x))')
-    resolution_test(r'(man(x) -> man(x))')
-    resolution_test(r'-(man(x) and -man(x))')
-    resolution_test(r'(man(x) or -man(x))')
-    resolution_test(r'(man(x) -> man(x))')
-    resolution_test(r'(man(x) iff man(x))')
-    resolution_test(r'-(man(x) iff -man(x))')
-    resolution_test('all x.man(x)')
-    resolution_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')
-    resolution_test('some x.all y.sees(x,y)')
-
-    p1 = Expression.fromstring(r'all x.(man(x) -> mortal(x))')
-    p2 = Expression.fromstring(r'man(Socrates)')
-    c = Expression.fromstring(r'mortal(Socrates)')
-    print('%s, %s |- %s: %s' % (p1, p2, c, ResolutionProver().prove(c, [p1,p2])))
-
-    p1 = Expression.fromstring(r'all x.(man(x) -> walks(x))')
-    p2 = Expression.fromstring(r'man(John)')
-    c = Expression.fromstring(r'some y.walks(y)')
-    print('%s, %s |- %s: %s' % (p1, p2, c, ResolutionProver().prove(c, [p1,p2])))
-
-    p = Expression.fromstring(r'some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))')
-    c = Expression.fromstring(r'some e0.walk(e0,mary)')
-    print('%s |- %s: %s' % (p, c, ResolutionProver().prove(c, [p])))
+    resolution_test(r"man(x)")
+    resolution_test(r"(man(x) -> man(x))")
+    resolution_test(r"(man(x) -> --man(x))")
+    resolution_test(r"-(man(x) and -man(x))")
+    resolution_test(r"(man(x) or -man(x))")
+    resolution_test(r"(man(x) -> man(x))")
+    resolution_test(r"-(man(x) and -man(x))")
+    resolution_test(r"(man(x) or -man(x))")
+    resolution_test(r"(man(x) -> man(x))")
+    resolution_test(r"(man(x) iff man(x))")
+    resolution_test(r"-(man(x) iff -man(x))")
+    resolution_test("all x.man(x)")
+    resolution_test("-all x.some y.F(x,y) & some x.all y.(-F(x,y))")
+    resolution_test("some x.all y.sees(x,y)")
+
+    p1 = Expression.fromstring(r"all x.(man(x) -> mortal(x))")
+    p2 = Expression.fromstring(r"man(Socrates)")
+    c = Expression.fromstring(r"mortal(Socrates)")
+    print("%s, %s |- %s: %s" % (p1, p2, c, ResolutionProver().prove(c, [p1, p2])))
+
+    p1 = Expression.fromstring(r"all x.(man(x) -> walks(x))")
+    p2 = Expression.fromstring(r"man(John)")
+    c = Expression.fromstring(r"some y.walks(y)")
+    print("%s, %s |- %s: %s" % (p1, p2, c, ResolutionProver().prove(c, [p1, p2])))
+
+    p = Expression.fromstring(r"some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))")
+    c = Expression.fromstring(r"some e0.walk(e0,mary)")
+    print("%s |- %s: %s" % (p, c, ResolutionProver().prove(c, [p])))
+
  
  def resolution_test(e):
      f = Expression.fromstring(e)
      t = ResolutionProver().prove(f)
-    print('|- %s: %s' % (f, t))
+    print("|- %s: %s" % (f, t))
+
  
  def test_clausify():
      lexpr = Expression.fromstring
  
-    print(clausify(lexpr('P(x) | Q(x)')))
-    print(clausify(lexpr('(P(x) & Q(x)) | R(x)')))
-    print(clausify(lexpr('P(x) | (Q(x) & R(x))')))
-    print(clausify(lexpr('(P(x) & Q(x)) | (R(x) & S(x))')))
+    print(clausify(lexpr("P(x) | Q(x)")))
+    print(clausify(lexpr("(P(x) & Q(x)) | R(x)")))
+    print(clausify(lexpr("P(x) | (Q(x) & R(x))")))
+    print(clausify(lexpr("(P(x) & Q(x)) | (R(x) & S(x))")))
  
-    print(clausify(lexpr('P(x) | Q(x) | R(x)')))
-    print(clausify(lexpr('P(x) | (Q(x) & R(x)) | S(x)')))
+    print(clausify(lexpr("P(x) | Q(x) | R(x)")))
+    print(clausify(lexpr("P(x) | (Q(x) & R(x)) | S(x)")))
  
-    print(clausify(lexpr('exists x.P(x) | Q(x)')))
+    print(clausify(lexpr("exists x.P(x) | Q(x)")))
  
-    print(clausify(lexpr('-(-P(x) & Q(x))')))
-    print(clausify(lexpr('P(x) <-> Q(x)')))
-    print(clausify(lexpr('-(P(x) <-> Q(x))')))
-    print(clausify(lexpr('-(all x.P(x))')))
-    print(clausify(lexpr('-(some x.P(x))')))
+    print(clausify(lexpr("-(-P(x) & Q(x))")))
+    print(clausify(lexpr("P(x) <-> Q(x)")))
+    print(clausify(lexpr("-(P(x) <-> Q(x))")))
+    print(clausify(lexpr("-(all x.P(x))")))
+    print(clausify(lexpr("-(some x.P(x))")))
  
-    print(clausify(lexpr('some x.P(x)')))
-    print(clausify(lexpr('some x.all y.P(x,y)')))
-    print(clausify(lexpr('all y.some x.P(x,y)')))
-    print(clausify(lexpr('all z.all y.some x.P(x,y,z)')))
-    print(clausify(lexpr('all x.(all y.P(x,y) -> -all y.(Q(x,y) -> R(x,y)))')))
+    print(clausify(lexpr("some x.P(x)")))
+    print(clausify(lexpr("some x.all y.P(x,y)")))
+    print(clausify(lexpr("all y.some x.P(x,y)")))
+    print(clausify(lexpr("all z.all y.some x.P(x,y,z)")))
+    print(clausify(lexpr("all x.(all y.P(x,y) -> -all y.(Q(x,y) -> R(x,y)))")))
  
  
  def demo():
@@ -680,8 +753,9 @@ def demo():
      testResolutionProver()
      print()
  
-    p = Expression.fromstring('man(x)')
+    p = Expression.fromstring("man(x)")
      print(ResolutionProverCommand(p, [p]).prove())
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/inference/resolution.pyc b/nlp_resource_data/nltk/inference/resolution.pyc

deleted file mode 100755 (executable)

index b2c1ad5..0000000

Binary files a/nlp_resource_data/nltk/inference/resolution.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/inference/tableau.py b/nlp_resource_data/nltk/inference/tableau.py

old mode 100755 (executable)

new mode 100644 (file)

index 02e769c..90c9725
--- a/nlp_resource_data/nltk/inference/tableau.py
+++ b/nlp_resource_data/nltk/inference/tableau.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: First-Order Tableau Theorem Prover
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
  # URL: <http://nltk.org/>
@@ -9,27 +9,39 @@
  """
  Module for a tableau-based First Order theorem prover.
  """
-from __future__ import print_function, unicode_literals
  
  from nltk.internals import Counter
  
-from nltk.sem.logic import (VariableExpression, EqualityExpression,
-                            ApplicationExpression, Expression,
-                            AbstractVariableExpression, AllExpression,
-                            NegatedExpression,
-                            ExistsExpression, Variable, ImpExpression,
-                            AndExpression, unique_variable,
-                            LambdaExpression, IffExpression,
-                            OrExpression, FunctionVariableExpression)
+from nltk.sem.logic import (
+    VariableExpression,
+    EqualityExpression,
+    ApplicationExpression,
+    Expression,
+    AbstractVariableExpression,
+    AllExpression,
+    NegatedExpression,
+    ExistsExpression,
+    Variable,
+    ImpExpression,
+    AndExpression,
+    unique_variable,
+    LambdaExpression,
+    IffExpression,
+    OrExpression,
+    FunctionVariableExpression,
+)
  
  from nltk.inference.api import Prover, BaseProverCommand
  
  _counter = Counter()
  
-class ProverParseError(Exception): pass
+
+class ProverParseError(Exception):
+    pass
+
  
  class TableauProver(Prover):
-    _assume_false=False
+    _assume_false = False
  
      def _prove(self, goal=None, assumptions=None, verbose=False):
          if not assumptions:
@@ -44,211 +56,280 @@ class TableauProver(Prover):
              debugger = Debug(verbose)
              result = self._attempt_proof(agenda, set(), set(), debugger)
          except RuntimeError as e:
-            if self._assume_false and str(e).startswith('maximum recursion depth exceeded'):
+            if self._assume_false and str(e).startswith(
+                "maximum recursion depth exceeded"
+            ):
                  result = False
              else:
                  if verbose:
                      print(e)
                  else:
                      raise e
-        return (result, '\n'.join(debugger.lines))
+        return (result, "\n".join(debugger.lines))
  
      def _attempt_proof(self, agenda, accessible_vars, atoms, debug):
          (current, context), category = agenda.pop_first()
  
-        #if there's nothing left in the agenda, and we haven't closed the path
+        # if there's nothing left in the agenda, and we haven't closed the path
          if not current:
-            debug.line('AGENDA EMPTY')
+            debug.line("AGENDA EMPTY")
              return False
  
-        proof_method = { Categories.ATOM:     self._attempt_proof_atom,
-                         Categories.PROP:     self._attempt_proof_prop,
-                         Categories.N_ATOM:   self._attempt_proof_n_atom,
-                         Categories.N_PROP:   self._attempt_proof_n_prop,
-                         Categories.APP:      self._attempt_proof_app,
-                         Categories.N_APP:    self._attempt_proof_n_app,
-                         Categories.N_EQ:     self._attempt_proof_n_eq,
-                         Categories.D_NEG:    self._attempt_proof_d_neg,
-                         Categories.N_ALL:    self._attempt_proof_n_all,
-                         Categories.N_EXISTS: self._attempt_proof_n_some,
-                         Categories.AND:      self._attempt_proof_and,
-                         Categories.N_OR:     self._attempt_proof_n_or,
-                         Categories.N_IMP:    self._attempt_proof_n_imp,
-                         Categories.OR:       self._attempt_proof_or,
-                         Categories.IMP:      self._attempt_proof_imp,
-                         Categories.N_AND:    self._attempt_proof_n_and,
-                         Categories.IFF:      self._attempt_proof_iff,
-                         Categories.N_IFF:    self._attempt_proof_n_iff,
-                         Categories.EQ:       self._attempt_proof_eq,
-                         Categories.EXISTS:   self._attempt_proof_some,
-                         Categories.ALL:      self._attempt_proof_all,
-                        }[category]
+        proof_method = {
+            Categories.ATOM: self._attempt_proof_atom,
+            Categories.PROP: self._attempt_proof_prop,
+            Categories.N_ATOM: self._attempt_proof_n_atom,
+            Categories.N_PROP: self._attempt_proof_n_prop,
+            Categories.APP: self._attempt_proof_app,
+            Categories.N_APP: self._attempt_proof_n_app,
+            Categories.N_EQ: self._attempt_proof_n_eq,
+            Categories.D_NEG: self._attempt_proof_d_neg,
+            Categories.N_ALL: self._attempt_proof_n_all,
+            Categories.N_EXISTS: self._attempt_proof_n_some,
+            Categories.AND: self._attempt_proof_and,
+            Categories.N_OR: self._attempt_proof_n_or,
+            Categories.N_IMP: self._attempt_proof_n_imp,
+            Categories.OR: self._attempt_proof_or,
+            Categories.IMP: self._attempt_proof_imp,
+            Categories.N_AND: self._attempt_proof_n_and,
+            Categories.IFF: self._attempt_proof_iff,
+            Categories.N_IFF: self._attempt_proof_n_iff,
+            Categories.EQ: self._attempt_proof_eq,
+            Categories.EXISTS: self._attempt_proof_some,
+            Categories.ALL: self._attempt_proof_all,
+        }[category]
  
          debug.line((current, context))
          return proof_method(current, context, agenda, accessible_vars, atoms, debug)
  
-    def _attempt_proof_atom(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_atom(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          # Check if the branch is closed.  Return 'True' if it is
          if (current, True) in atoms:
-            debug.line('CLOSED', 1)
+            debug.line("CLOSED", 1)
              return True
  
          if context:
              if isinstance(context.term, NegatedExpression):
                  current = current.negate()
              agenda.put(context(current).simplify())
-            return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+            return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
          else:
-            #mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
-            agenda.mark_alls_fresh();
-            return self._attempt_proof(agenda, accessible_vars|set(current.args), atoms|set([(current, False)]), debug+1)
-
-    def _attempt_proof_n_atom(self, current, context, agenda, accessible_vars, atoms, debug):
+            # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
+            agenda.mark_alls_fresh()
+            return self._attempt_proof(
+                agenda,
+                accessible_vars | set(current.args),
+                atoms | set([(current, False)]),
+                debug + 1,
+            )
+
+    def _attempt_proof_n_atom(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          # Check if the branch is closed.  Return 'True' if it is
          if (current.term, False) in atoms:
-            debug.line('CLOSED', 1)
+            debug.line("CLOSED", 1)
              return True
  
          if context:
              if isinstance(context.term, NegatedExpression):
                  current = current.negate()
              agenda.put(context(current).simplify())
-            return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+            return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
          else:
-            #mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
-            agenda.mark_alls_fresh();
-            return self._attempt_proof(agenda, accessible_vars|set(current.term.args), atoms|set([(current.term, True)]), debug+1)
-
-    def _attempt_proof_prop(self, current, context, agenda, accessible_vars, atoms, debug):
+            # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
+            agenda.mark_alls_fresh()
+            return self._attempt_proof(
+                agenda,
+                accessible_vars | set(current.term.args),
+                atoms | set([(current.term, True)]),
+                debug + 1,
+            )
+
+    def _attempt_proof_prop(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          # Check if the branch is closed.  Return 'True' if it is
          if (current, True) in atoms:
-            debug.line('CLOSED', 1)
+            debug.line("CLOSED", 1)
              return True
  
-        #mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
-        agenda.mark_alls_fresh();
-        return self._attempt_proof(agenda, accessible_vars, atoms|set([(current, False)]), debug+1)
+        # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
+        agenda.mark_alls_fresh()
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms | set([(current, False)]), debug + 1
+        )
  
-    def _attempt_proof_n_prop(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_prop(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          # Check if the branch is closed.  Return 'True' if it is
          if (current.term, False) in atoms:
-            debug.line('CLOSED', 1)
+            debug.line("CLOSED", 1)
              return True
  
-        #mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
-        agenda.mark_alls_fresh();
-        return self._attempt_proof(agenda, accessible_vars, atoms|set([(current.term, True)]), debug+1)
+        # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars
+        agenda.mark_alls_fresh()
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms | set([(current.term, True)]), debug + 1
+        )
  
-    def _attempt_proof_app(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_app(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          f, args = current.uncurry()
          for i, arg in enumerate(args):
              if not TableauProver.is_atom(arg):
                  ctx = f
-                nv = Variable('X%s' % _counter.get())
-                for j,a in enumerate(args):
-                    ctx = (ctx(VariableExpression(nv)) if i == j else ctx(a))
+                nv = Variable("X%s" % _counter.get())
+                for j, a in enumerate(args):
+                    ctx = ctx(VariableExpression(nv)) if i == j else ctx(a)
                  if context:
                      ctx = context(ctx).simplify()
                  ctx = LambdaExpression(nv, ctx)
                  agenda.put(arg, ctx)
-                return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
-        raise Exception('If this method is called, there must be a non-atomic argument')
+                return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
+        raise Exception("If this method is called, there must be a non-atomic argument")
  
-    def _attempt_proof_n_app(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_app(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          f, args = current.term.uncurry()
          for i, arg in enumerate(args):
              if not TableauProver.is_atom(arg):
                  ctx = f
-                nv = Variable('X%s' % _counter.get())
-                for j,a in enumerate(args):
-                    ctx = (ctx(VariableExpression(nv)) if i == j else ctx(a))
+                nv = Variable("X%s" % _counter.get())
+                for j, a in enumerate(args):
+                    ctx = ctx(VariableExpression(nv)) if i == j else ctx(a)
                  if context:
-                    #combine new context with existing
+                    # combine new context with existing
                      ctx = context(ctx).simplify()
                  ctx = LambdaExpression(nv, -ctx)
                  agenda.put(-arg, ctx)
-                return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
-        raise Exception('If this method is called, there must be a non-atomic argument')
+                return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
+        raise Exception("If this method is called, there must be a non-atomic argument")
  
-    def _attempt_proof_n_eq(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_eq(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          ###########################################################################
          # Since 'current' is of type '~(a=b)', the path is closed if 'a' == 'b'
          ###########################################################################
          if current.term.first == current.term.second:
-            debug.line('CLOSED', 1)
+            debug.line("CLOSED", 1)
              return True
  
-        agenda[Categories.N_EQ].add((current,context))
+        agenda[Categories.N_EQ].add((current, context))
          current._exhausted = True
-        return self._attempt_proof(agenda, accessible_vars|set([current.term.first, current.term.second]), atoms, debug+1)
-
-    def _attempt_proof_d_neg(self, current, context, agenda, accessible_vars, atoms, debug):
+        return self._attempt_proof(
+            agenda,
+            accessible_vars | set([current.term.first, current.term.second]),
+            atoms,
+            debug + 1,
+        )
+
+    def _attempt_proof_d_neg(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          agenda.put(current.term.term, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
-
-    def _attempt_proof_n_all(self, current, context, agenda, accessible_vars, atoms, debug):
-        agenda[Categories.EXISTS].add((ExistsExpression(current.term.variable, -current.term.term), context))
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
-
-    def _attempt_proof_n_some(self, current, context, agenda, accessible_vars, atoms, debug):
-        agenda[Categories.ALL].add((AllExpression(current.term.variable, -current.term.term), context))
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
-
-    def _attempt_proof_and(self, current, context, agenda, accessible_vars, atoms, debug):
+        return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
+
+    def _attempt_proof_n_all(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
+        agenda[Categories.EXISTS].add(
+            (ExistsExpression(current.term.variable, -current.term.term), context)
+        )
+        return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
+
+    def _attempt_proof_n_some(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
+        agenda[Categories.ALL].add(
+            (AllExpression(current.term.variable, -current.term.term), context)
+        )
+        return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
+
+    def _attempt_proof_and(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          agenda.put(current.first, context)
          agenda.put(current.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_n_or(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_or(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          agenda.put(-current.term.first, context)
          agenda.put(-current.term.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_n_imp(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_imp(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          agenda.put(current.term.first, context)
          agenda.put(-current.term.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_or(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_or(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          new_agenda = agenda.clone()
          agenda.put(current.first, context)
          new_agenda.put(current.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1) and \
-                self._attempt_proof(new_agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms, debug + 1
+        ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_imp(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_imp(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          new_agenda = agenda.clone()
          agenda.put(-current.first, context)
          new_agenda.put(current.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1) and \
-                self._attempt_proof(new_agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms, debug + 1
+        ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_n_and(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_and(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          new_agenda = agenda.clone()
          agenda.put(-current.term.first, context)
          new_agenda.put(-current.term.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1) and \
-                self._attempt_proof(new_agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms, debug + 1
+        ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_iff(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_iff(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          new_agenda = agenda.clone()
          agenda.put(current.first, context)
          agenda.put(current.second, context)
          new_agenda.put(-current.first, context)
          new_agenda.put(-current.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1) and \
-                self._attempt_proof(new_agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms, debug + 1
+        ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_n_iff(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_n_iff(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          new_agenda = agenda.clone()
          agenda.put(current.term.first, context)
          agenda.put(-current.term.second, context)
          new_agenda.put(-current.term.first, context)
          new_agenda.put(current.term.second, context)
-        return self._attempt_proof(agenda, accessible_vars, atoms, debug+1) and \
-                self._attempt_proof(new_agenda, accessible_vars, atoms, debug+1)
+        return self._attempt_proof(
+            agenda, accessible_vars, atoms, debug + 1
+        ) and self._attempt_proof(new_agenda, accessible_vars, atoms, debug + 1)
  
-    def _attempt_proof_eq(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_eq(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          #########################################################################
          # Since 'current' is of the form '(a = b)', replace ALL free instances
          # of 'a' with 'b'
@@ -256,49 +337,61 @@ class TableauProver(Prover):
          agenda.put_atoms(atoms)
          agenda.replace_all(current.first, current.second)
          accessible_vars.discard(current.first)
-        agenda.mark_neqs_fresh();
-        return self._attempt_proof(agenda, accessible_vars, set(), debug+1)
+        agenda.mark_neqs_fresh()
+        return self._attempt_proof(agenda, accessible_vars, set(), debug + 1)
  
-    def _attempt_proof_some(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_some(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          new_unique_variable = VariableExpression(unique_variable())
          agenda.put(current.term.replace(current.variable, new_unique_variable), context)
          agenda.mark_alls_fresh()
-        return self._attempt_proof(agenda, accessible_vars|set([new_unique_variable]), atoms, debug+1)
+        return self._attempt_proof(
+            agenda, accessible_vars | set([new_unique_variable]), atoms, debug + 1
+        )
  
-    def _attempt_proof_all(self, current, context, agenda, accessible_vars, atoms, debug):
+    def _attempt_proof_all(
+        self, current, context, agenda, accessible_vars, atoms, debug
+    ):
          try:
              current._used_vars
          except AttributeError:
              current._used_vars = set()
  
-        #if there are accessible_vars on the path
+        # if there are accessible_vars on the path
          if accessible_vars:
              # get the set of bound variables that have not be used by this AllExpression
              bv_available = accessible_vars - current._used_vars
  
              if bv_available:
                  variable_to_use = list(bv_available)[0]
-                debug.line('--> Using \'%s\'' % variable_to_use, 2)
+                debug.line("--> Using '%s'" % variable_to_use, 2)
                  current._used_vars |= set([variable_to_use])
-                agenda.put(current.term.replace(current.variable, variable_to_use), context)
-                agenda[Categories.ALL].add((current,context))
-                return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+                agenda.put(
+                    current.term.replace(current.variable, variable_to_use), context
+                )
+                agenda[Categories.ALL].add((current, context))
+                return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
  
              else:
-                #no more available variables to substitute
-                debug.line('--> Variables Exhausted', 2)
+                # no more available variables to substitute
+                debug.line("--> Variables Exhausted", 2)
                  current._exhausted = True
-                agenda[Categories.ALL].add((current,context))
-                return self._attempt_proof(agenda, accessible_vars, atoms, debug+1)
+                agenda[Categories.ALL].add((current, context))
+                return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1)
  
          else:
              new_unique_variable = VariableExpression(unique_variable())
-            debug.line('--> Using \'%s\'' % new_unique_variable, 2)
+            debug.line("--> Using '%s'" % new_unique_variable, 2)
              current._used_vars |= set([new_unique_variable])
-            agenda.put(current.term.replace(current.variable, new_unique_variable), context)
-            agenda[Categories.ALL].add((current,context))
+            agenda.put(
+                current.term.replace(current.variable, new_unique_variable), context
+            )
+            agenda[Categories.ALL].add((current, context))
              agenda.mark_alls_fresh()
-            return self._attempt_proof(agenda, accessible_vars|set([new_unique_variable]), atoms, debug+1)
+            return self._attempt_proof(
+                agenda, accessible_vars | set([new_unique_variable]), atoms, debug + 1
+            )
  
      @staticmethod
      def is_atom(e):
@@ -310,8 +403,9 @@ class TableauProver(Prover):
                  if not TableauProver.is_atom(arg):
                      return False
              return True
-        elif isinstance(e, AbstractVariableExpression) or \
-             isinstance(e, LambdaExpression):
+        elif isinstance(e, AbstractVariableExpression) or isinstance(
+            e, LambdaExpression
+        ):
              return True
          else:
              return False
@@ -343,17 +437,19 @@ class Agenda(object):
          set_list = [s.copy() for s in self.sets]
  
          new_allExs = set()
-        for allEx,_ in set_list[Categories.ALL]:
+        for allEx, _ in set_list[Categories.ALL]:
              new_allEx = AllExpression(allEx.variable, allEx.term)
              try:
                  new_allEx._used_vars = set(used for used in allEx._used_vars)
              except AttributeError:
                  new_allEx._used_vars = set()
-            new_allExs.add((new_allEx,None))
+            new_allExs.add((new_allEx, None))
          set_list[Categories.ALL] = new_allExs
  
-        set_list[Categories.N_EQ] = set((NegatedExpression(n_eq.term),ctx)
-                                        for (n_eq,ctx) in set_list[Categories.N_EQ])
+        set_list[Categories.N_EQ] = set(
+            (NegatedExpression(n_eq.term), ctx)
+            for (n_eq, ctx) in set_list[Categories.N_EQ]
+        )
  
          new_agenda.sets = tuple(set_list)
          return new_agenda
@@ -379,13 +475,13 @@ class Agenda(object):
      def put_atoms(self, atoms):
          for atom, neg in atoms:
              if neg:
-                self[Categories.N_ATOM].add((-atom,None))
+                self[Categories.N_ATOM].add((-atom, None))
              else:
-                self[Categories.ATOM].add((atom,None))
+                self[Categories.ATOM].add((atom, None))
  
      def pop_first(self):
          """ Pop the first expression that appears in the agenda """
-        for i,s in enumerate(self.sets):
+        for i, s in enumerate(self.sets):
              if s:
                  if i in [Categories.N_EQ, Categories.ALL]:
                      for ex in s:
@@ -402,17 +498,17 @@ class Agenda(object):
  
      def replace_all(self, old, new):
          for s in self.sets:
-            for ex,ctx in s:
+            for ex, ctx in s:
                  ex.replace(old.variable, new)
                  if ctx is not None:
                      ctx.replace(old.variable, new)
  
      def mark_alls_fresh(self):
-        for u,_ in self.sets[Categories.ALL]:
+        for u, _ in self.sets[Categories.ALL]:
              u._exhausted = False
  
      def mark_neqs_fresh(self):
-        for neq,_ in self.sets[Categories.N_EQ]:
+        for neq, _ in self.sets[Categories.N_EQ]:
              neq._exhausted = False
  
      def _categorize_expression(self, current):
@@ -439,8 +535,7 @@ class Agenda(object):
          elif isinstance(current, ApplicationExpression):
              return Categories.APP
          else:
-            raise ProverParseError("cannot categorize %s" % \
-                                   current.__class__.__name__)
+            raise ProverParseError("cannot categorize %s" % current.__class__.__name__)
  
      def _categorize_NegatedExpression(self, current):
          negated = current.term
@@ -468,8 +563,7 @@ class Agenda(object):
          elif isinstance(negated, ApplicationExpression):
              return Categories.N_APP
          else:
-            raise ProverParseError("cannot categorize %s" % \
-                                   negated.__class__.__name__)
+            raise ProverParseError("cannot categorize %s" % negated.__class__.__name__)
  
  
  class Debug(object):
@@ -477,28 +571,31 @@ class Debug(object):
          self.verbose = verbose
          self.indent = indent
  
-        if not lines: lines = []
+        if not lines:
+            lines = []
          self.lines = lines
  
      def __add__(self, increment):
-        return Debug(self.verbose, self.indent+1, self.lines)
+        return Debug(self.verbose, self.indent + 1, self.lines)
  
      def line(self, data, indent=0):
          if isinstance(data, tuple):
              ex, ctx = data
              if ctx:
-                data = '%s, %s' % (ex, ctx)
+                data = "%s, %s" % (ex, ctx)
              else:
-                data = '%s' % ex
+                data = "%s" % ex
  
              if isinstance(ex, AllExpression):
                  try:
-                    used_vars = "[%s]" % (",".join("%s" % ve.variable.name for ve in ex._used_vars))
-                    data += ':   %s' % used_vars
+                    used_vars = "[%s]" % (
+                        ",".join("%s" % ve.variable.name for ve in ex._used_vars)
+                    )
+                    data += ":   %s" % used_vars
                  except AttributeError:
-                    data += ':   []'
+                    data += ":   []"
  
-        newline = '%s%s' % ('   '*(self.indent+indent), data)
+        newline = "%s%s" % ("   " * (self.indent + indent), data)
          self.lines.append(newline)
  
          if self.verbose:
@@ -506,102 +603,112 @@ class Debug(object):
  
  
  class Categories(object):
-    ATOM     = 0
-    PROP     = 1
-    N_ATOM   = 2
-    N_PROP   = 3
-    APP      = 4
-    N_APP    = 5
-    N_EQ     = 6
-    D_NEG    = 7
-    N_ALL    = 8
+    ATOM = 0
+    PROP = 1
+    N_ATOM = 2
+    N_PROP = 3
+    APP = 4
+    N_APP = 5
+    N_EQ = 6
+    D_NEG = 7
+    N_ALL = 8
      N_EXISTS = 9
-    AND      = 10
-    N_OR     = 11
-    N_IMP    = 12
-    OR       = 13
-    IMP      = 14
-    N_AND    = 15
-    IFF      = 16
-    N_IFF    = 17
-    EQ       = 18
-    EXISTS   = 19
-    ALL      = 20
+    AND = 10
+    N_OR = 11
+    N_IMP = 12
+    OR = 13
+    IMP = 14
+    N_AND = 15
+    IFF = 16
+    N_IFF = 17
+    EQ = 18
+    EXISTS = 19
+    ALL = 20
  
  
  def testTableauProver():
-    tableau_test('P | -P')
-    tableau_test('P & -P')
-    tableau_test('Q', ['P', '(P -> Q)'])
-    tableau_test('man(x)')
-    tableau_test('(man(x) -> man(x))')
-    tableau_test('(man(x) -> --man(x))')
-    tableau_test('-(man(x) and -man(x))')
-    tableau_test('(man(x) or -man(x))')
-    tableau_test('(man(x) -> man(x))')
-    tableau_test('-(man(x) and -man(x))')
-    tableau_test('(man(x) or -man(x))')
-    tableau_test('(man(x) -> man(x))')
-    tableau_test('(man(x) iff man(x))')
-    tableau_test('-(man(x) iff -man(x))')
-    tableau_test('all x.man(x)')
-    tableau_test('all x.all y.((x = y) -> (y = x))')
-    tableau_test('all x.all y.all z.(((x = y) & (y = z)) -> (x = z))')
-#    tableau_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')
-#    tableau_test('some x.all y.sees(x,y)')
-
-    p1 = 'all x.(man(x) -> mortal(x))'
-    p2 = 'man(Socrates)'
-    c = 'mortal(Socrates)'
+    tableau_test("P | -P")
+    tableau_test("P & -P")
+    tableau_test("Q", ["P", "(P -> Q)"])
+    tableau_test("man(x)")
+    tableau_test("(man(x) -> man(x))")
+    tableau_test("(man(x) -> --man(x))")
+    tableau_test("-(man(x) and -man(x))")
+    tableau_test("(man(x) or -man(x))")
+    tableau_test("(man(x) -> man(x))")
+    tableau_test("-(man(x) and -man(x))")
+    tableau_test("(man(x) or -man(x))")
+    tableau_test("(man(x) -> man(x))")
+    tableau_test("(man(x) iff man(x))")
+    tableau_test("-(man(x) iff -man(x))")
+    tableau_test("all x.man(x)")
+    tableau_test("all x.all y.((x = y) -> (y = x))")
+    tableau_test("all x.all y.all z.(((x = y) & (y = z)) -> (x = z))")
+    #    tableau_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')
+    #    tableau_test('some x.all y.sees(x,y)')
+
+    p1 = "all x.(man(x) -> mortal(x))"
+    p2 = "man(Socrates)"
+    c = "mortal(Socrates)"
      tableau_test(c, [p1, p2])
  
-    p1 = 'all x.(man(x) -> walks(x))'
-    p2 = 'man(John)'
-    c = 'some y.walks(y)'
+    p1 = "all x.(man(x) -> walks(x))"
+    p2 = "man(John)"
+    c = "some y.walks(y)"
      tableau_test(c, [p1, p2])
  
-    p = '((x = y) & walks(y))'
-    c = 'walks(x)'
+    p = "((x = y) & walks(y))"
+    c = "walks(x)"
      tableau_test(c, [p])
  
-    p = '((x = y) & ((y = z) & (z = w)))'
-    c = '(x = w)'
+    p = "((x = y) & ((y = z) & (z = w)))"
+    c = "(x = w)"
      tableau_test(c, [p])
  
-    p = 'some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))'
-    c = 'some e0.walk(e0,mary)'
+    p = "some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))"
+    c = "some e0.walk(e0,mary)"
      tableau_test(c, [p])
  
-    c = '(exists x.exists z3.((x = Mary) & ((z3 = John) & sees(z3,x))) <-> exists x.exists z4.((x = John) & ((z4 = Mary) & sees(x,z4))))'
+    c = "(exists x.exists z3.((x = Mary) & ((z3 = John) & sees(z3,x))) <-> exists x.exists z4.((x = John) & ((z4 = Mary) & sees(x,z4))))"
      tableau_test(c)
  
+
  #    p = 'some e1.some e2.((believe e1 john e2) and (walk e2 mary))'
  #    c = 'some x.some e3.some e4.((believe e3 x e4) and (walk e4 mary))'
  #    tableau_test(c, [p])
  
  
  def testHigherOrderTableauProver():
-    tableau_test('believe(j, -lie(b))', ['believe(j, -lie(b) & -cheat(b))'])
-    tableau_test('believe(j, lie(b) & cheat(b))', ['believe(j, lie(b))'])
-    tableau_test('believe(j, lie(b))', ['lie(b)']) #how do we capture that John believes all things that are true
-    tableau_test('believe(j, know(b, cheat(b)))', ['believe(j, know(b, lie(b)) & know(b, steals(b) & cheat(b)))'])
-    tableau_test('P(Q(y), R(y) & R(z))', ['P(Q(x) & Q(y), R(y) & R(z))'])
+    tableau_test("believe(j, -lie(b))", ["believe(j, -lie(b) & -cheat(b))"])
+    tableau_test("believe(j, lie(b) & cheat(b))", ["believe(j, lie(b))"])
+    tableau_test(
+        "believe(j, lie(b))", ["lie(b)"]
+    )  # how do we capture that John believes all things that are true
+    tableau_test(
+        "believe(j, know(b, cheat(b)))",
+        ["believe(j, know(b, lie(b)) & know(b, steals(b) & cheat(b)))"],
+    )
+    tableau_test("P(Q(y), R(y) & R(z))", ["P(Q(x) & Q(y), R(y) & R(z))"])
  
-    tableau_test('believe(j, cheat(b) & lie(b))', ['believe(j, lie(b) & cheat(b))'])
-    tableau_test('believe(j, -cheat(b) & -lie(b))', ['believe(j, -lie(b) & -cheat(b))'])
+    tableau_test("believe(j, cheat(b) & lie(b))", ["believe(j, lie(b) & cheat(b))"])
+    tableau_test("believe(j, -cheat(b) & -lie(b))", ["believe(j, -lie(b) & -cheat(b))"])
  
  
  def tableau_test(c, ps=None, verbose=False):
      pc = Expression.fromstring(c)
-    pps = ([Expression.fromstring(p) for p in ps] if ps else [])
+    pps = [Expression.fromstring(p) for p in ps] if ps else []
      if not ps:
          ps = []
-    print('%s |- %s: %s' % (', '.join(ps), pc, TableauProver().prove(pc, pps, verbose=verbose)))
+    print(
+        "%s |- %s: %s"
+        % (", ".join(ps), pc, TableauProver().prove(pc, pps, verbose=verbose))
+    )
+
  
  def demo():
      testTableauProver()
      testHigherOrderTableauProver()
  
-if __name__ == '__main__':
-    demo()
  
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/inference/tableau.pyc b/nlp_resource_data/nltk/inference/tableau.pyc

deleted file mode 100755 (executable)

index 324960f..0000000

Binary files a/nlp_resource_data/nltk/inference/tableau.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/internals.py b/nlp_resource_data/nltk/internals.py

old mode 100755 (executable)

new mode 100644 (file)

index 84e28cf..ac93c8a
--- a/nlp_resource_data/nltk/internals.py
+++ b/nlp_resource_data/nltk/internals.py
@@ -1,12 +1,11 @@
  # Natural Language Toolkit: Internal utility functions
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  #         Nitin Madnani <nmadnani@ets.org>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function
  
  import subprocess
  import os
@@ -18,17 +17,7 @@ import types
  import sys
  import stat
  import locale
-
-# Use the c version of ElementTree, which is faster, if possible:
-try:
-    from xml.etree import cElementTree as ElementTree
-except ImportError:
-    from xml.etree import ElementTree
-
-from six import string_types
-
-from nltk import __file__
-from nltk import compat
+from xml.etree import ElementTree
  
  ##########################################################################
  # Java Via Command-Line
@@ -55,15 +44,21 @@ def config_java(bin=None, options=None, verbose=False):
      :type options: list(str)
      """
      global _java_bin, _java_options
-    _java_bin = find_binary('java', bin, env_vars=['JAVAHOME', 'JAVA_HOME'], verbose=verbose, binary_names=['java.exe'])
+    _java_bin = find_binary(
+        "java",
+        bin,
+        env_vars=["JAVAHOME", "JAVA_HOME"],
+        verbose=verbose,
+        binary_names=["java.exe"],
+    )
  
      if options is not None:
-        if isinstance(options, string_types):
+        if isinstance(options, str):
              options = options.split()
          _java_options = list(options)
  
-def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
-         blocking=True):
+
+def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=True):
      """
      Execute the given java command, by opening a subprocess that calls
      Java.  If java has not yet been configured, it will be configured
@@ -83,7 +78,7 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
          standard input, standard output and standard error file
          handles, respectively.  Valid values are ``subprocess.PIPE``,
          an existing file descriptor (a positive integer), an existing
-        file object, and None.  ``subprocess.PIPE`` indicates that a
+        file object, 'pipe', 'stdout', 'devnull' and None.  ``subprocess.PIPE`` indicates that a
          new pipe to the child should be created.  With None, no
          redirection will occur; the child's file handles will be
          inherited from the parent.  Additionally, stderr can be
@@ -103,71 +98,95 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
  
      :raise OSError: If the java command returns a nonzero return code.
      """
-    if stdin == 'pipe': stdin = subprocess.PIPE
-    if stdout == 'pipe': stdout = subprocess.PIPE
-    if stderr == 'pipe': stderr = subprocess.PIPE
-    if isinstance(cmd, string_types):
-        raise TypeError('cmd should be a list of strings')
+
+    subprocess_output_dict = {
+        "pipe": subprocess.PIPE,
+        "stdout": subprocess.STDOUT,
+        "devnull": subprocess.DEVNULL,
+    }
+
+    stdin = subprocess_output_dict.get(stdin, stdin)
+    stdout = subprocess_output_dict.get(stdout, stdout)
+    stderr = subprocess_output_dict.get(stderr, stderr)
+
+    if isinstance(cmd, str):
+        raise TypeError("cmd should be a list of strings")
  
      # Make sure we know where a java binary is.
      if _java_bin is None:
          config_java()
  
      # Set up the classpath.
-    if isinstance(classpath, string_types):
-        classpaths=[classpath]
+    if isinstance(classpath, str):
+        classpaths = [classpath]
      else:
-        classpaths=list(classpath)
-    classpath=os.path.pathsep.join(classpaths)
+        classpaths = list(classpath)
+    classpath = os.path.pathsep.join(classpaths)
  
      # Construct the full command string.
      cmd = list(cmd)
-    cmd = ['-cp', classpath] + cmd
+    cmd = ["-cp", classpath] + cmd
      cmd = [_java_bin] + _java_options + cmd
  
      # Call java via a subprocess
      p = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr)
-    if not blocking: return p
+    if not blocking:
+        return p
      (stdout, stderr) = p.communicate()
  
      # Check the return code.
      if p.returncode != 0:
          print(_decode_stdoutdata(stderr))
-        raise OSError('Java command failed : ' + str(cmd))
+        raise OSError("Java command failed : " + str(cmd))
  
      return (stdout, stderr)
  
+
  if 0:
-    #config_java(options='-Xmx512m')
+    # config_java(options='-Xmx512m')
      # Write:
-    #java('weka.classifiers.bayes.NaiveBayes',
+    # java('weka.classifiers.bayes.NaiveBayes',
      #     ['-d', '/tmp/names.model', '-t', '/tmp/train.arff'],
      #     classpath='/Users/edloper/Desktop/weka/weka.jar')
      # Read:
-    (a,b) = java(['weka.classifiers.bayes.NaiveBayes',
-                  '-l', '/tmp/names.model', '-T', '/tmp/test.arff',
-                  '-p', '0'],#, '-distribution'],
-                 classpath='/Users/edloper/Desktop/weka/weka.jar')
+    (a, b) = java(
+        [
+            "weka.classifiers.bayes.NaiveBayes",
+            "-l",
+            "/tmp/names.model",
+            "-T",
+            "/tmp/test.arff",
+            "-p",
+            "0",
+        ],  # , '-distribution'],
+        classpath="/Users/edloper/Desktop/weka/weka.jar",
+    )
  
  
  ######################################################################
  # Parsing
  ######################################################################
  
+
  class ReadError(ValueError):
      """
      Exception raised by read_* functions when they fail.
      :param position: The index in the input string where an error occurred.
      :param expected: What was expected when an error occurred.
      """
+
      def __init__(self, expected, position):
          ValueError.__init__(self, expected, position)
          self.expected = expected
          self.position = position
+
      def __str__(self):
-        return 'Expected %s at %s' % (self.expected, self.position)
+        return "Expected %s at %s" % (self.expected, self.position)
+
  
  _STRING_START_RE = re.compile(r"[uU]?[rR]?(\"\"\"|\'\'\'|\"|\')")
+
+
  def read_str(s, start_position):
      """
      If a Python string literal begins at the specified position in the
@@ -202,26 +221,33 @@ def read_str(s, start_position):
      """
      # Read the open quote, and any modifiers.
      m = _STRING_START_RE.match(s, start_position)
-    if not m: raise ReadError('open quote', start_position)
+    if not m:
+        raise ReadError("open quote", start_position)
      quotemark = m.group(1)
  
      # Find the close quote.
-    _STRING_END_RE = re.compile(r'\\|%s' % quotemark)
+    _STRING_END_RE = re.compile(r"\\|%s" % quotemark)
      position = m.end()
      while True:
          match = _STRING_END_RE.search(s, position)
-        if not match: raise ReadError('close quote', position)
-        if match.group(0) == '\\': position = match.end()+1
-        else: break
+        if not match:
+            raise ReadError("close quote", position)
+        if match.group(0) == "\\":
+            position = match.end() + 1
+        else:
+            break
  
      # Process it, using eval.  Strings with invalid escape sequences
      # might raise ValueEerror.
      try:
-        return eval(s[start_position:match.end()]), match.end()
+        return eval(s[start_position : match.end()]), match.end()
      except ValueError as e:
-        raise ReadError('invalid string (%s)' % e)
+        raise ReadError("invalid string (%s)" % e)
+
+
+_READ_INT_RE = re.compile(r"-?\d+")
+
  
-_READ_INT_RE = re.compile(r'-?\d+')
  def read_int(s, start_position):
      """
      If an integer begins at the specified position in the given
@@ -251,10 +277,14 @@ def read_int(s, start_position):
  
      """
      m = _READ_INT_RE.match(s, start_position)
-    if not m: raise ReadError('integer', start_position)
+    if not m:
+        raise ReadError("integer", start_position)
      return int(m.group()), m.end()
  
-_READ_NUMBER_VALUE = re.compile(r'-?(\d*)([.]?\d*)?')
+
+_READ_NUMBER_VALUE = re.compile(r"-?(\d*)([.]?\d*)?")
+
+
  def read_number(s, start_position):
      """
      If an integer or float begins at the specified position in the
@@ -285,16 +315,18 @@ def read_number(s, start_position):
      """
      m = _READ_NUMBER_VALUE.match(s, start_position)
      if not m or not (m.group(1) or m.group(2)):
-        raise ReadError('number', start_position)
-    if m.group(2): return float(m.group()), m.end()
-    else: return int(m.group()), m.end()
-
+        raise ReadError("number", start_position)
+    if m.group(2):
+        return float(m.group()), m.end()
+    else:
+        return int(m.group()), m.end()
  
  
  ######################################################################
  # Check if a method has been overridden
  ######################################################################
  
+
  def overridden(method):
      """
      :return: True if ``method`` overrides some method with the same
@@ -314,15 +346,17 @@ def overridden(method):
  
      :type method: instance method
      """
-    # [xx] breaks on classic classes!
-    if isinstance(method, types.MethodType) and compat.get_im_class(method) is not None:
+    if isinstance(method, types.MethodType) and method.__self__.__class__ is not None:
          name = method.__name__
-        funcs = [cls.__dict__[name]
-                 for cls in _mro(compat.get_im_class(method))
-                 if name in cls.__dict__]
+        funcs = [
+            cls.__dict__[name]
+            for cls in _mro(method.__self__.__class__)
+            if name in cls.__dict__
+        ]
          return len(funcs) > 1
      else:
-        raise TypeError('Expected an instance method.')
+        raise TypeError("Expected an instance method.")
+
  
  def _mro(cls):
      """
@@ -336,30 +370,37 @@ def _mro(cls):
          return cls.__mro__
      else:
          mro = [cls]
-        for base in cls.__bases__: mro.extend(_mro(base))
+        for base in cls.__bases__:
+            mro.extend(_mro(base))
          return mro
  
+
  ######################################################################
  # Deprecation decorator & base class
  ######################################################################
  # [xx] dedent msg first if it comes from  a docstring.
  
+
  def _add_epytext_field(obj, field, message):
      """Add an epytext @field to a given object's docstring."""
-    indent = ''
+    indent = ""
      # If we already have a docstring, then add a blank line to separate
      # it from the new field, and check its indentation.
      if obj.__doc__:
-        obj.__doc__ = obj.__doc__.rstrip()+'\n\n'
-        indents = re.findall(r'(?<=\n)[ ]+(?!\s)', obj.__doc__.expandtabs())
-        if indents: indent = min(indents)
+        obj.__doc__ = obj.__doc__.rstrip() + "\n\n"
+        indents = re.findall(r"(?<=\n)[ ]+(?!\s)", obj.__doc__.expandtabs())
+        if indents:
+            indent = min(indents)
      # If we don't have a docstring, add an empty one.
      else:
-        obj.__doc__ = ''
+        obj.__doc__ = ""
+
+    obj.__doc__ += textwrap.fill(
+        "@%s: %s" % (field, message),
+        initial_indent=indent,
+        subsequent_indent=indent + "    ",
+    )
  
-    obj.__doc__ += textwrap.fill('@%s: %s' % (field, message),
-                                 initial_indent=indent,
-                                 subsequent_indent=indent+'    ')
  
  def deprecated(message):
      """
@@ -374,10 +415,9 @@ def deprecated(message):
      """
  
      def decorator(func):
-        msg = ("Function %s() has been deprecated.  %s"
-               % (func.__name__, message))
-        msg = '\n' + textwrap.fill(msg, initial_indent='  ',
-                                   subsequent_indent='  ')
+        msg = "Function %s() has been deprecated.  %s" % (func.__name__, message)
+        msg = "\n" + textwrap.fill(msg, initial_indent="  ", subsequent_indent="  ")
+
          def newFunc(*args, **kwargs):
              warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
              return func(*args, **kwargs)
@@ -388,10 +428,12 @@ def deprecated(message):
          newFunc.__doc__ = func.__doc__
          newFunc.__deprecated__ = True
          # Add a @deprecated field to the docstring.
-        _add_epytext_field(newFunc, 'deprecated', message)
+        _add_epytext_field(newFunc, "deprecated", message)
          return newFunc
+
      return decorator
  
+
  class Deprecated(object):
      """
      A base class used to mark deprecated classes.  A typical usage is to
@@ -407,53 +449,67 @@ class Deprecated(object):
      The docstring of the deprecated class will be used in the
      deprecation warning message.
      """
+
      def __new__(cls, *args, **kwargs):
          # Figure out which class is the deprecated one.
          dep_cls = None
          for base in _mro(cls):
              if Deprecated in base.__bases__:
-                dep_cls = base; break
-        assert dep_cls, 'Unable to determine which base is deprecated.'
+                dep_cls = base
+                break
+        assert dep_cls, "Unable to determine which base is deprecated."
  
          # Construct an appropriate warning.
-        doc = dep_cls.__doc__ or ''.strip()
+        doc = dep_cls.__doc__ or "".strip()
          # If there's a @deprecated field, strip off the field marker.
-        doc = re.sub(r'\A\s*@deprecated:', r'', doc)
+        doc = re.sub(r"\A\s*@deprecated:", r"", doc)
          # Strip off any indentation.
-        doc = re.sub(r'(?m)^\s*', '', doc)
+        doc = re.sub(r"(?m)^\s*", "", doc)
          # Construct a 'name' string.
-        name = 'Class %s' % dep_cls.__name__
+        name = "Class %s" % dep_cls.__name__
          if cls != dep_cls:
-            name += ' (base class for %s)' % cls.__name__
+            name += " (base class for %s)" % cls.__name__
          # Put it all together.
-        msg = '%s has been deprecated.  %s' % (name, doc)
+        msg = "%s has been deprecated.  %s" % (name, doc)
          # Wrap it.
-        msg = '\n' + textwrap.fill(msg, initial_indent='    ',
-                                   subsequent_indent='    ')
+        msg = "\n" + textwrap.fill(msg, initial_indent="    ", subsequent_indent="    ")
          warnings.warn(msg, category=DeprecationWarning, stacklevel=2)
          # Do the actual work of __new__.
          return object.__new__(cls)
  
+
  ##########################################################################
  # COUNTER, FOR UNIQUE NAMING
  ##########################################################################
  
+
  class Counter:
      """
      A counter that auto-increments each time its value is read.
      """
+
      def __init__(self, initial_value=0):
          self._value = initial_value
+
      def get(self):
          self._value += 1
          return self._value
  
+
  ##########################################################################
  # Search for files/binaries
  ##########################################################################
  
-def find_file_iter(filename, env_vars=(), searchpath=(),
-    file_names=None, url=None, verbose=False, finding_dir=False):
+
+def find_file_iter(
+    filename,
+    env_vars=(),
+    searchpath=(),
+    file_names=None,
+    url=None,
+    verbose=False,
+    finding_dir=False,
+):
      """
      Search for a file to be used by nltk.
  
@@ -465,10 +521,10 @@ def find_file_iter(filename, env_vars=(), searchpath=(),
      :param verbose: Whether or not to print path when a file is found.
      """
      file_names = [filename] + (file_names or [])
-    assert isinstance(filename, string_types)
-    assert not isinstance(file_names, string_types)
-    assert not isinstance(searchpath, string_types)
-    if isinstance(env_vars, string_types):
+    assert isinstance(filename, str)
+    assert not isinstance(file_names, str)
+    assert not isinstance(searchpath, str)
+    if isinstance(env_vars, str):
          env_vars = env_vars.split()
      yielded = False
  
@@ -477,27 +533,27 @@ def find_file_iter(filename, env_vars=(), searchpath=(),
          path_to_file = os.path.join(filename, alternative)
          if os.path.isfile(path_to_file):
              if verbose:
-                print('[Found %s: %s]' % (filename, path_to_file))
+                print("[Found %s: %s]" % (filename, path_to_file))
              yielded = True
              yield path_to_file
          # Check the bare alternatives
          if os.path.isfile(alternative):
              if verbose:
-                print('[Found %s: %s]' % (filename, alternative))
+                print("[Found %s: %s]" % (filename, alternative))
              yielded = True
              yield alternative
          # Check if the alternative is inside a 'file' directory
-        path_to_file = os.path.join(filename, 'file', alternative)
+        path_to_file = os.path.join(filename, "file", alternative)
          if os.path.isfile(path_to_file):
              if verbose:
-                print('[Found %s: %s]' % (filename, path_to_file))
+                print("[Found %s: %s]" % (filename, path_to_file))
              yielded = True
              yield path_to_file
  
      # Check environment variables
      for env_var in env_vars:
          if env_var in os.environ:
-            if finding_dir: # This is to file a directory instead of file
+            if finding_dir:  # This is to file a directory instead of file
                  yielded = True
                  yield os.environ[env_var]
  
@@ -505,7 +561,7 @@ def find_file_iter(filename, env_vars=(), searchpath=(),
                  # Check if the environment variable contains a direct path to the bin
                  if os.path.isfile(env_dir):
                      if verbose:
-                        print('[Found %s: %s]'%(filename, env_dir))
+                        print("[Found %s: %s]" % (filename, env_dir))
                      yielded = True
                      yield env_dir
                  # Check if the possible bin names exist inside the environment variable directories
@@ -513,18 +569,18 @@ def find_file_iter(filename, env_vars=(), searchpath=(),
                      path_to_file = os.path.join(env_dir, alternative)
                      if os.path.isfile(path_to_file):
                          if verbose:
-                            print('[Found %s: %s]'%(filename, path_to_file))
+                            print("[Found %s: %s]" % (filename, path_to_file))
                          yielded = True
                          yield path_to_file
                      # Check if the alternative is inside a 'file' directory
                      # path_to_file = os.path.join(env_dir, 'file', alternative)
  
                      # Check if the alternative is inside a 'bin' directory
-                    path_to_file = os.path.join(env_dir, 'bin', alternative)
+                    path_to_file = os.path.join(env_dir, "bin", alternative)
  
                      if os.path.isfile(path_to_file):
                          if verbose:
-                            print('[Found %s: %s]' % (filename, path_to_file))
+                            print("[Found %s: %s]" % (filename, path_to_file))
                          yielded = True
                          yield path_to_file
  
@@ -538,51 +594,71 @@ def find_file_iter(filename, env_vars=(), searchpath=(),
  
      # If we're on a POSIX system, then try using the 'which' command
      # to find the file.
-    if os.name == 'posix':
+    if os.name == "posix":
          for alternative in file_names:
              try:
-                p = subprocess.Popen(['which', alternative],
-                        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                p = subprocess.Popen(
+                    ["which", alternative],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
                  stdout, stderr = p.communicate()
                  path = _decode_stdoutdata(stdout).strip()
                  if path.endswith(alternative) and os.path.exists(path):
                      if verbose:
-                        print('[Found %s: %s]' % (filename, path))
+                        print("[Found %s: %s]" % (filename, path))
                      yielded = True
                      yield path
              except (KeyboardInterrupt, SystemExit, OSError):
                  raise
-            except:
+            finally:
                  pass
  
      if not yielded:
-        msg = ("NLTK was unable to find the %s file!" "\nUse software specific "
-               "configuration paramaters" % filename)
-        if env_vars: msg += ' or set the %s environment variable' % env_vars[0]
-        msg += '.'
+        msg = (
+            "NLTK was unable to find the %s file!"
+            "\nUse software specific "
+            "configuration paramaters" % filename
+        )
+        if env_vars:
+            msg += " or set the %s environment variable" % env_vars[0]
+        msg += "."
          if searchpath:
-            msg += '\n\n  Searched in:'
-            msg += ''.join('\n    - %s' % d for d in searchpath)
-        if url: msg += ('\n\n  For more information on %s, see:\n    <%s>' %
-                        (filename, url))
-        div = '='*75
-        raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div))
-
-
-def find_file(filename, env_vars=(), searchpath=(),
-        file_names=None, url=None, verbose=False):
-    return next(find_file_iter(filename, env_vars, searchpath,
-                               file_names, url, verbose))
-
-
-def find_dir(filename, env_vars=(), searchpath=(),
-        file_names=None, url=None, verbose=False):
-    return next(find_file_iter(filename, env_vars, searchpath,
-                               file_names, url, verbose, finding_dir=True))
-
-
-def find_binary_iter(name, path_to_bin=None, env_vars=(), searchpath=(),
-                binary_names=None, url=None, verbose=False):
+            msg += "\n\n  Searched in:"
+            msg += "".join("\n    - %s" % d for d in searchpath)
+        if url:
+            msg += "\n\n  For more information on %s, see:\n    <%s>" % (filename, url)
+        div = "=" * 75
+        raise LookupError("\n\n%s\n%s\n%s" % (div, msg, div))
+
+
+def find_file(
+    filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False
+):
+    return next(
+        find_file_iter(filename, env_vars, searchpath, file_names, url, verbose)
+    )
+
+
+def find_dir(
+    filename, env_vars=(), searchpath=(), file_names=None, url=None, verbose=False
+):
+    return next(
+        find_file_iter(
+            filename, env_vars, searchpath, file_names, url, verbose, finding_dir=True
+        )
+    )
+
+
+def find_binary_iter(
+    name,
+    path_to_bin=None,
+    env_vars=(),
+    searchpath=(),
+    binary_names=None,
+    url=None,
+    verbose=False,
+):
      """
      Search for a file to be used by nltk.
  
@@ -594,17 +670,37 @@ def find_binary_iter(name, path_to_bin=None, env_vars=(), searchpath=(),
      :param url: URL presented to user for download help.
      :param verbose: Whether or not to print path when a file is found.
      """
-    for file in  find_file_iter(path_to_bin or name, env_vars, searchpath, binary_names,
-                     url, verbose):
+    for file in find_file_iter(
+        path_to_bin or name, env_vars, searchpath, binary_names, url, verbose
+    ):
          yield file
  
-def find_binary(name, path_to_bin=None, env_vars=(), searchpath=(),
-                binary_names=None, url=None, verbose=False):
-    return next(find_binary_iter(name, path_to_bin, env_vars, searchpath,
-                                 binary_names, url, verbose))
  
-def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
-        searchpath=(), url=None, verbose=False, is_regex=False):
+def find_binary(
+    name,
+    path_to_bin=None,
+    env_vars=(),
+    searchpath=(),
+    binary_names=None,
+    url=None,
+    verbose=False,
+):
+    return next(
+        find_binary_iter(
+            name, path_to_bin, env_vars, searchpath, binary_names, url, verbose
+        )
+    )
+
+
+def find_jar_iter(
+    name_pattern,
+    path_to_jar=None,
+    env_vars=(),
+    searchpath=(),
+    url=None,
+    verbose=False,
+    is_regex=False,
+):
      """
      Search for a jar that is used by nltk.
  
@@ -617,14 +713,14 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
      :param is_regex: Whether name is a regular expression.
      """
  
-    assert isinstance(name_pattern, string_types)
-    assert not isinstance(searchpath, string_types)
-    if isinstance(env_vars, string_types):
+    assert isinstance(name_pattern, str)
+    assert not isinstance(searchpath, str)
+    if isinstance(env_vars, str):
          env_vars = env_vars.split()
      yielded = False
  
      # Make sure we check the CLASSPATH first
-    env_vars = ['CLASSPATH'] + list(env_vars)
+    env_vars = ["CLASSPATH"] + list(env_vars)
  
      # If an explicit location was given, then check it, and yield it if
      # it's present; otherwise, complain.
@@ -633,51 +729,70 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
              yielded = True
              yield path_to_jar
          else:
-            raise LookupError('Could not find %s jar file at %s' %
-                            (name_pattern, path_to_jar))
+            raise LookupError(
+                "Could not find %s jar file at %s" % (name_pattern, path_to_jar)
+            )
  
      # Check environment variables
      for env_var in env_vars:
          if env_var in os.environ:
-            if env_var == 'CLASSPATH':
-                classpath = os.environ['CLASSPATH']
+            if env_var == "CLASSPATH":
+                classpath = os.environ["CLASSPATH"]
                  for cp in classpath.split(os.path.pathsep):
                      if os.path.isfile(cp):
-                        filename=os.path.basename(cp)
-                        if is_regex and re.match(name_pattern, filename) or \
-                                (not is_regex and filename == name_pattern):
+                        filename = os.path.basename(cp)
+                        if (
+                            is_regex
+                            and re.match(name_pattern, filename)
+                            or (not is_regex and filename == name_pattern)
+                        ):
                              if verbose:
-                                print('[Found %s: %s]' % (name_pattern, cp))
+                                print("[Found %s: %s]" % (name_pattern, cp))
                              yielded = True
                              yield cp
                      # The case where user put directory containing the jar file in the classpath
                      if os.path.isdir(cp):
                          if not is_regex:
-                            if os.path.isfile(os.path.join(cp,name_pattern)):
+                            if os.path.isfile(os.path.join(cp, name_pattern)):
                                  if verbose:
-                                    print('[Found %s: %s]' % (name_pattern, cp))
+                                    print("[Found %s: %s]" % (name_pattern, cp))
                                  yielded = True
-                                yield os.path.join(cp,name_pattern)
+                                yield os.path.join(cp, name_pattern)
                          else:
                              # Look for file using regular expression
                              for file_name in os.listdir(cp):
-                                if re.match(name_pattern,file_name):
+                                if re.match(name_pattern, file_name):
                                      if verbose:
-                                        print('[Found %s: %s]' % (name_pattern, os.path.join(cp,file_name)))
+                                        print(
+                                            "[Found %s: %s]"
+                                            % (
+                                                name_pattern,
+                                                os.path.join(cp, file_name),
+                                            )
+                                        )
                                      yielded = True
-                                    yield os.path.join(cp,file_name)
+                                    yield os.path.join(cp, file_name)
  
              else:
                  jar_env = os.environ[env_var]
-                jar_iter = ((os.path.join(jar_env, path_to_jar) for path_to_jar in os.listdir(jar_env))
-                            if os.path.isdir(jar_env) else (jar_env,))
+                jar_iter = (
+                    (
+                        os.path.join(jar_env, path_to_jar)
+                        for path_to_jar in os.listdir(jar_env)
+                    )
+                    if os.path.isdir(jar_env)
+                    else (jar_env,)
+                )
                  for path_to_jar in jar_iter:
                      if os.path.isfile(path_to_jar):
-                        filename=os.path.basename(path_to_jar)
-                        if is_regex and re.match(name_pattern, filename) or \
-                                (not is_regex and filename == name_pattern):
+                        filename = os.path.basename(path_to_jar)
+                        if (
+                            is_regex
+                            and re.match(name_pattern, filename)
+                            or (not is_regex and filename == name_pattern)
+                        ):
                              if verbose:
-                                print('[Found %s: %s]' % (name_pattern, path_to_jar))
+                                print("[Found %s: %s]" % (name_pattern, path_to_jar))
                              yielded = True
                              yield path_to_jar
  
@@ -689,42 +804,58 @@ def find_jar_iter(name_pattern, path_to_jar=None, env_vars=(),
                  if os.path.isfile(path_to_jar):
                      if re.match(name_pattern, filename):
                          if verbose:
-                            print('[Found %s: %s]' % (filename, path_to_jar))
+                            print("[Found %s: %s]" % (filename, path_to_jar))
                  yielded = True
                  yield path_to_jar
          else:
              path_to_jar = os.path.join(directory, name_pattern)
              if os.path.isfile(path_to_jar):
                  if verbose:
-                    print('[Found %s: %s]' % (name_pattern, path_to_jar))
+                    print("[Found %s: %s]" % (name_pattern, path_to_jar))
                  yielded = True
                  yield path_to_jar
  
      if not yielded:
          # If nothing was found, raise an error
-        msg = ("NLTK was unable to find %s!" % name_pattern)
-        if env_vars: msg += ' Set the %s environment variable' % env_vars[0]
-        msg = textwrap.fill(msg+'.', initial_indent='  ',
-                            subsequent_indent='  ')
+        msg = "NLTK was unable to find %s!" % name_pattern
+        if env_vars:
+            msg += " Set the %s environment variable" % env_vars[0]
+        msg = textwrap.fill(msg + ".", initial_indent="  ", subsequent_indent="  ")
          if searchpath:
-            msg += '\n\n  Searched in:'
-            msg += ''.join('\n    - %s' % d for d in searchpath)
+            msg += "\n\n  Searched in:"
+            msg += "".join("\n    - %s" % d for d in searchpath)
          if url:
-            msg += ('\n\n  For more information, on %s, see:\n    <%s>' %
-                    (name_pattern, url))
-        div = '='*75
-        raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div))
-
-def find_jar(name_pattern, path_to_jar=None, env_vars=(),
-        searchpath=(), url=None, verbose=False, is_regex=False):
-    return next(find_jar_iter(name_pattern, path_to_jar, env_vars,
-                         searchpath, url, verbose, is_regex))
+            msg += "\n\n  For more information, on %s, see:\n    <%s>" % (
+                name_pattern,
+                url,
+            )
+        div = "=" * 75
+        raise LookupError("\n\n%s\n%s\n%s" % (div, msg, div))
+
+
+def find_jar(
+    name_pattern,
+    path_to_jar=None,
+    env_vars=(),
+    searchpath=(),
+    url=None,
+    verbose=False,
+    is_regex=False,
+):
+    return next(
+        find_jar_iter(
+            name_pattern, path_to_jar, env_vars, searchpath, url, verbose, is_regex
+        )
+    )
  
  
  def find_jars_within_path(path_to_jars):
-       return [os.path.join(root, filename)
-                       for root, dirnames, filenames in os.walk(path_to_jars)
-                       for filename in fnmatch.filter(filenames, '*.jar')]
+    return [
+        os.path.join(root, filename)
+        for root, dirnames, filenames in os.walk(path_to_jars)
+        for filename in fnmatch.filter(filenames, "*.jar")
+    ]
+
  
  def _decode_stdoutdata(stdoutdata):
      """ Convert data read from stdout/stderr to unicode """
@@ -736,10 +867,12 @@ def _decode_stdoutdata(stdoutdata):
          return stdoutdata.decode()
      return stdoutdata.decode(encoding)
  
+
  ##########################################################################
  # Import Stdlib Module
  ##########################################################################
  
+
  def import_from_stdlib(module):
      """
      When python is run from within the nltk/ directory tree, the
@@ -751,7 +884,7 @@ def import_from_stdlib(module):
      instead (causing the import to fail).
      """
      old_path = sys.path
-    sys.path = [d for d in sys.path if d not in ('', '.')]
+    sys.path = [d for d in sys.path if d not in ("", ".")]
      m = __import__(module)
      sys.path = old_path
      return m
@@ -761,7 +894,8 @@ def import_from_stdlib(module):
  # Wrapper for ElementTree Elements
  ##########################################################################
  
-@compat.python_2_unicode_compatible
+
+
  class ElementWrapper(object):
      """
      A wrapper around ElementTree Element objects whose main purpose is
@@ -797,9 +931,9 @@ class ElementWrapper(object):
              <Element "<?xml version='1.0' encoding='utf8'?>\n<test />">
  
          """
-        if isinstance(etree, string_types):
+        if isinstance(etree, str):
              etree = ElementTree.fromstring(etree)
-        self.__dict__['_etree'] = etree
+        self.__dict__["_etree"] = etree
  
      def unwrap(self):
          """
@@ -808,26 +942,29 @@ class ElementWrapper(object):
          return self._etree
  
      ##////////////////////////////////////////////////////////////
-    #{ String Representation
+    # { String Representation
      ##////////////////////////////////////////////////////////////
  
      def __repr__(self):
-        s = ElementTree.tostring(self._etree, encoding='utf8').decode('utf8')
+        s = ElementTree.tostring(self._etree, encoding="utf8").decode("utf8")
          if len(s) > 60:
-            e = s.rfind('<')
-            if (len(s)-e) > 30: e = -20
-            s = '%s...%s' % (s[:30], s[e:])
-        return '<Element %r>' % s
+            e = s.rfind("<")
+            if (len(s) - e) > 30:
+                e = -20
+            s = "%s...%s" % (s[:30], s[e:])
+        return "<Element %r>" % s
  
      def __str__(self):
          """
          :return: the result of applying ``ElementTree.tostring()`` to
          the wrapped Element object.
          """
-        return ElementTree.tostring(self._etree, encoding='utf8').decode('utf8').rstrip()
+        return (
+            ElementTree.tostring(self._etree, encoding="utf8").decode("utf8").rstrip()
+        )
  
      ##////////////////////////////////////////////////////////////
-    #{ Element interface Delegation (pass-through)
+    # { Element interface Delegation (pass-through)
      ##////////////////////////////////////////////////////////////
  
      def __getattr__(self, attrib):
@@ -855,7 +992,7 @@ class ElementWrapper(object):
          return len(self._etree)
  
      ##////////////////////////////////////////////////////////////
-    #{ Element interface Delegation (wrap result)
+    # { Element interface Delegation (wrap result)
      ##////////////////////////////////////////////////////////////
  
      def __getitem__(self, index):
@@ -868,24 +1005,27 @@ class ElementWrapper(object):
          return [ElementWrapper(elt) for elt in self._etree]
  
      def getiterator(self, tag=None):
-        return (ElementWrapper(elt)
-                for elt in self._etree.getiterator(tag))
+        return (ElementWrapper(elt) for elt in self._etree.getiterator(tag))
  
      def makeelement(self, tag, attrib):
          return ElementWrapper(self._etree.makeelement(tag, attrib))
  
      def find(self, path):
          elt = self._etree.find(path)
-        if elt is None: return elt
-        else: return ElementWrapper(elt)
+        if elt is None:
+            return elt
+        else:
+            return ElementWrapper(elt)
  
      def findall(self, path):
          return [ElementWrapper(elt) for elt in self._etree.findall(path)]
  
+
  ######################################################################
  # Helper for Handling Slicing
  ######################################################################
  
+
  def slice_bounds(sequence, slice_obj, allow_step=False):
      """
      Given a slice, return the corresponding (start, stop) bounds,
@@ -907,7 +1047,8 @@ def slice_bounds(sequence, slice_obj, allow_step=False):
      # value tuple.
      if allow_step:
          step = slice_obj.step
-        if step is None: step = 1
+        if step is None:
+            step = 1
          # Use a recursive call without allow_step to find the slice
          # bounds.  If step is negative, then the roles of start and
          # stop (in terms of default values, etc), are swapped.
@@ -919,23 +1060,30 @@ def slice_bounds(sequence, slice_obj, allow_step=False):
  
      # Otherwise, make sure that no non-default step value is used.
      elif slice_obj.step not in (None, 1):
-        raise ValueError('slices with steps are not supported by %s' %
-                         sequence.__class__.__name__)
+        raise ValueError(
+            "slices with steps are not supported by %s" % sequence.__class__.__name__
+        )
  
      # Supply default offsets.
-    if start is None: start = 0
-    if stop is None: stop = len(sequence)
+    if start is None:
+        start = 0
+    if stop is None:
+        stop = len(sequence)
  
      # Handle negative indices.
-    if start < 0: start = max(0, len(sequence)+start)
-    if stop < 0: stop = max(0, len(sequence)+stop)
+    if start < 0:
+        start = max(0, len(sequence) + start)
+    if stop < 0:
+        stop = max(0, len(sequence) + stop)
  
      # Make sure stop doesn't go past the end of the list.  Note that
      # we avoid calculating len(sequence) if possible, because for lazy
      # sequences, calculating the length of a sequence can be expensive.
      if stop > 0:
-        try: sequence[stop-1]
-        except IndexError: stop = len(sequence)
+        try:
+            sequence[stop - 1]
+        except IndexError:
+            stop = len(sequence)
  
      # Make sure start isn't past stop.
      start = min(start, stop)
@@ -943,28 +1091,29 @@ def slice_bounds(sequence, slice_obj, allow_step=False):
      # That's all folks!
      return start, stop
  
+
  ######################################################################
  # Permission Checking
  ######################################################################
  
+
  def is_writable(path):
      # Ensure that it exists.
      if not os.path.exists(path):
          return False
  
      # If we're on a posix system, check its permissions.
-    if hasattr(os, 'getuid'):
+    if hasattr(os, "getuid"):
          statdata = os.stat(path)
          perm = stat.S_IMODE(statdata.st_mode)
          # is it world-writable?
-        if (perm & 0o002):
+        if perm & 0o002:
              return True
          # do we own it?
          elif statdata.st_uid == os.getuid() and (perm & 0o200):
              return True
          # are we in a group that can write to it?
-        elif (statdata.st_gid in [os.getgid()] + os.getgroups()) \
-            and (perm & 0o020):
+        elif (statdata.st_gid in [os.getgid()] + os.getgroups()) and (perm & 0o020):
              return True
          # otherwise, we can't write to it.
          else:
@@ -974,9 +1123,14 @@ def is_writable(path):
      # [xx] should we do other checks on other platforms?
      return True
  
+
  ######################################################################
  # NLTK Error reporting
  ######################################################################
  
+
  def raise_unorderable_types(ordering, a, b):
-    raise TypeError("unorderable types: %s() %s %s()" % (type(a).__name__, ordering, type(b).__name__))
+    raise TypeError(
+        "unorderable types: %s() %s %s()"
+        % (type(a).__name__, ordering, type(b).__name__)
+    )
diff --git a/nlp_resource_data/nltk/internals.pyc b/nlp_resource_data/nltk/internals.pyc

deleted file mode 100755 (executable)

index 62fbf52..0000000

Binary files a/nlp_resource_data/nltk/internals.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/jsontags.py b/nlp_resource_data/nltk/jsontags.py

old mode 100755 (executable)

new mode 100644 (file)

index 3f74b12..f15fea1
--- a/nlp_resource_data/nltk/jsontags.py
+++ b/nlp_resource_data/nltk/jsontags.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: JSON Encoder/Decoder Helpers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Xu <xxu@student.unimelb.edu.au>
  #
  # URL: <http://nltk.org/>
@@ -19,24 +19,27 @@ import json
  
  json_tags = {}
  
-TAG_PREFIX = '!'
+TAG_PREFIX = "!"
+
  
  def register_tag(cls):
      """
      Decorates a class to register it's json tag.
      """
-    json_tags[TAG_PREFIX+getattr(cls, 'json_tag')] = cls
+    json_tags[TAG_PREFIX + getattr(cls, "json_tag")] = cls
      return cls
  
+
  class JSONTaggedEncoder(json.JSONEncoder):
      def default(self, obj):
-        obj_tag = getattr(obj, 'json_tag', None)
+        obj_tag = getattr(obj, "json_tag", None)
          if obj_tag is None:
              return super(JSONTaggedEncoder, self).default(obj)
          obj_tag = TAG_PREFIX + obj_tag
          obj = obj.encode_json_obj()
          return {obj_tag: obj}
  
+
  class JSONTaggedDecoder(json.JSONDecoder):
      def decode(self, s):
          return self.decode_obj(super(JSONTaggedDecoder, self).decode(s))
@@ -52,12 +55,12 @@ class JSONTaggedDecoder(json.JSONDecoder):
          if not isinstance(obj, dict) or len(obj) != 1:
              return obj
          obj_tag = next(iter(obj.keys()))
-        if not obj_tag.startswith('!'):
+        if not obj_tag.startswith("!"):
              return obj
          if obj_tag not in json_tags:
-            raise ValueError('Unknown tag', obj_tag)
+            raise ValueError("Unknown tag", obj_tag)
          obj_cls = json_tags[obj_tag]
          return obj_cls.decode_json_obj(obj[obj_tag])
  
-__all__ = ['register_tag', 'json_tags',
-           'JSONTaggedEncoder', 'JSONTaggedDecoder']
+
+__all__ = ["register_tag", "json_tags", "JSONTaggedEncoder", "JSONTaggedDecoder"]
diff --git a/nlp_resource_data/nltk/jsontags.pyc b/nlp_resource_data/nltk/jsontags.pyc

deleted file mode 100755 (executable)

index c455355..0000000

Binary files a/nlp_resource_data/nltk/jsontags.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/lazyimport.py b/nlp_resource_data/nltk/lazyimport.py

old mode 100755 (executable)

new mode 100644 (file)

index 2c58878..266df76
--- a/nlp_resource_data/nltk/lazyimport.py
+++ b/nlp_resource_data/nltk/lazyimport.py
@@ -14,7 +14,6 @@
      See the documentation for further information on copyrights,
      or contact the author. All Rights Reserved.
  """
-from __future__ import print_function
  
  ### Constants
  
@@ -22,6 +21,7 @@ _debug = 0
  
  ###
  
+
  class LazyModule:
  
      """ Lazy module class.
@@ -44,11 +44,12 @@ class LazyModule:
          t = ISO.Week(1998,1,1)
  
      """
+
      # Flag which inidicates whether the LazyModule is initialized or not
      __lazymodule_init = 0
  
      # Name of the module to load
-    __lazymodule_name = ''
+    __lazymodule_name = ""
  
      # Flag which indicates whether the module was loaded or not
      __lazymodule_loaded = 0
@@ -73,9 +74,9 @@ class LazyModule:
          if globals is None:
              globals = locals
          self.__lazymodule_globals = globals
-        mainname = globals.get('__name__', '')
+        mainname = globals.get("__name__", "")
          if mainname:
-            self.__name__ = mainname + '.' + name
+            self.__name__ = mainname + "." + name
              self.__lazymodule_name = name
          else:
              self.__name__ = self.__lazymodule_name = name
@@ -90,23 +91,20 @@ class LazyModule:
          if self.__lazymodule_loaded:
              return self.__lazymodule_locals[name]
          if _debug:
-            print('LazyModule: Loading module %r' % name)
-        self.__lazymodule_locals[name] \
-             = module \
-             = __import__(name,
-                          self.__lazymodule_locals,
-                          self.__lazymodule_globals,
-                          '*')
+            print("LazyModule: Loading module %r" % name)
+        self.__lazymodule_locals[name] = module = __import__(
+            name, self.__lazymodule_locals, self.__lazymodule_globals, "*"
+        )
  
          # Fill namespace with all symbols from original module to
          # provide faster access.
          self.__dict__.update(module.__dict__)
  
          # Set import flag
-        self.__dict__['__lazymodule_loaded'] = 1
+        self.__dict__["__lazymodule_loaded"] = 1
  
          if _debug:
-            print('LazyModule: Module %r loaded' % name)
+            print("LazyModule: Module %r loaded" % name)
          return module
  
      def __getattr__(self, name):
@@ -116,8 +114,10 @@ class LazyModule:
          if self.__lazymodule_loaded:
              raise AttributeError(name)
          if _debug:
-            print('LazyModule: ' \
-                  'Module load triggered by attribute %r read access' % name)
+            print(
+                "LazyModule: "
+                "Module load triggered by attribute %r read access" % name
+            )
          module = self.__lazymodule_import()
          return getattr(module, name)
  
@@ -133,8 +133,10 @@ class LazyModule:
              self.__dict__[name] = value
              return
          if _debug:
-            print('LazyModule: ' \
-                  'Module load triggered by attribute %r write access' % name)
+            print(
+                "LazyModule: "
+                "Module load triggered by attribute %r write access" % name
+            )
          module = self.__lazymodule_import()
          setattr(module, name, value)
  
diff --git a/nlp_resource_data/nltk/lazyimport.pyc b/nlp_resource_data/nltk/lazyimport.pyc

deleted file mode 100755 (executable)

index bba5094..0000000

Binary files a/nlp_resource_data/nltk/lazyimport.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/lm/__init__.py b/nlp_resource_data/nltk/lm/__init__.py

new file mode 100644 (file)

index 0000000..5b87ef7
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/__init__.py
@@ -0,0 +1,239 @@
+# Natural Language Toolkit: Language Models
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Authors: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/
+# For license information, see LICENSE.TXT
+"""
+NLTK Language Modeling Module.
+------------------------------
+
+Currently this module covers only ngram language models, but it should be easy
+to extend to neural models.
+
+
+Preparing Data
+==============
+
+Before we train our ngram models it is necessary to make sure the data we put in
+them is in the right format.
+Let's say we have a text that is a list of sentences, where each sentence is
+a list of strings. For simplicity we just consider a text consisting of
+characters instead of words.
+
+    >>> text = [['a', 'b', 'c'], ['a', 'c', 'd', 'c', 'e', 'f']]
+
+If we want to train a bigram model, we need to turn this text into bigrams.
+Here's what the first sentence of our text would look like if we use a function
+from NLTK for this.
+
+    >>> from nltk.util import bigrams
+    >>> list(bigrams(text[0]))
+    [('a', 'b'), ('b', 'c')]
+
+Notice how "b" occurs both as the first and second member of different bigrams
+but "a" and "c" don't? Wouldn't it be nice to somehow indicate how often sentences
+start with "a" and end with "c"?
+A standard way to deal with this is to add special "padding" symbols to the
+sentence before splitting it into ngrams.
+Fortunately, NLTK also has a function for that, let's see what it does to the
+first sentence.
+
+    >>> from nltk.util import pad_sequence
+    >>> list(pad_sequence(text[0],
+    ... pad_left=True,
+    ... left_pad_symbol="<s>",
+    ... pad_right=True,
+    ... right_pad_symbol="</s>",
+    ... n=2))
+    ['<s>', 'a', 'b', 'c', '</s>']
+
+Note the `n` argument, that tells the function we need padding for bigrams.
+Now, passing all these parameters every time is tedious and in most cases they
+can be safely assumed as defaults anyway.
+Thus our module provides a convenience function that has all these arguments
+already set while the other arguments remain the same as for `pad_sequence`.
+
+    >>> from nltk.lm.preprocessing import pad_both_ends
+    >>> list(pad_both_ends(text[0], n=2))
+    ['<s>', 'a', 'b', 'c', '</s>']
+
+Combining the two parts discussed so far we get the following preparation steps
+for one sentence.
+
+    >>> list(bigrams(pad_both_ends(text[0], n=2)))
+    [('<s>', 'a'), ('a', 'b'), ('b', 'c'), ('c', '</s>')]
+
+To make our model more robust we could also train it on unigrams (single words)
+as well as bigrams, its main source of information.
+NLTK once again helpfully provides a function called `everygrams`.
+While not the most efficient, it is conceptually simple.
+
+
+    >>> from nltk.util import everygrams
+    >>> padded_bigrams = list(pad_both_ends(text[0], n=2))
+    >>> list(everygrams(padded_bigrams, max_len=2))
+    [('<s>',),
+     ('a',),
+     ('b',),
+     ('c',),
+     ('</s>',),
+     ('<s>', 'a'),
+     ('a', 'b'),
+     ('b', 'c'),
+     ('c', '</s>')]
+
+We are almost ready to start counting ngrams, just one more step left.
+During training and evaluation our model will rely on a vocabulary that
+defines which words are "known" to the model.
+To create this vocabulary we need to pad our sentences (just like for counting
+ngrams) and then combine the sentences into one flat stream of words.
+
+    >>> from nltk.lm.preprocessing import flatten
+    >>> list(flatten(pad_both_ends(sent, n=2) for sent in text))
+    ['<s>', 'a', 'b', 'c', '</s>', '<s>', 'a', 'c', 'd', 'c', 'e', 'f', '</s>']
+
+In most cases we want to use the same text as the source for both vocabulary
+and ngram counts.
+Now that we understand what this means for our preprocessing, we can simply import
+a function that does everything for us.
+
+    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
+    >>> train, vocab = padded_everygram_pipeline(2, text)
+
+So as to avoid re-creating the text in memory, both `train` and `vocab` are lazy
+iterators. They are evaluated on demand at training time.
+
+
+Training
+========
+Having prepared our data we are ready to start training a model.
+As a simple example, let us train a Maximum Likelihood Estimator (MLE).
+We only need to specify the highest ngram order to instantiate it.
+
+    >>> from nltk.lm import MLE
+    >>> lm = MLE(2)
+
+This automatically creates an empty vocabulary...
+
+    >>> len(lm.vocab)
+    0
+
+... which gets filled as we fit the model.
+
+    >>> lm.fit(train, vocab)
+    >>> print(lm.vocab)
+    <Vocabulary with cutoff=1 unk_label='<UNK>' and 9 items>
+    >>> len(lm.vocab)
+    9
+
+The vocabulary helps us handle words that have not occurred during training.
+
+    >>> lm.vocab.lookup(text[0])
+    ('a', 'b', 'c')
+    >>> lm.vocab.lookup(["aliens", "from", "Mars"])
+    ('<UNK>', '<UNK>', '<UNK>')
+
+Moreover, in some cases we want to ignore words that we did see during training
+but that didn't occur frequently enough, to provide us useful information.
+You can tell the vocabulary to ignore such words.
+To find out how that works, check out the docs for the `Vocabulary` class.
+
+
+Using a Trained Model
+=====================
+When it comes to ngram models the training boils down to counting up the ngrams
+from the training corpus.
+
+    >>> print(lm.counts)
+    <NgramCounter with 2 ngram orders and 24 ngrams>
+
+This provides a convenient interface to access counts for unigrams...
+
+    >>> lm.counts['a']
+    2
+
+...and bigrams (in this case "a b")
+
+    >>> lm.counts[['a']]['b']
+    1
+
+And so on. However, the real purpose of training a language model is to have it
+score how probable words are in certain contexts.
+This being MLE, the model returns the item's relative frequency as its score.
+
+    >>> lm.score("a")
+    0.15384615384615385
+
+Items that are not seen during training are mapped to the vocabulary's
+"unknown label" token. This is "<UNK>" by default.
+
+    >>> lm.score("<UNK>") == lm.score("aliens")
+    True
+
+Here's how you get the score for a word given some preceding context.
+For example we want to know what is the chance that "b" is preceded by "a".
+
+    >>> lm.score("b", ["a"])
+    0.5
+
+To avoid underflow when working with many small score values it makes sense to
+take their logarithm.
+For convenience this can be done with the `logscore` method.
+
+    >>> lm.logscore("a")
+    -2.700439718141092
+
+Building on this method, we can also evaluate our model's cross-entropy and
+perplexity with respect to sequences of ngrams.
+
+    >>> test = [('a', 'b'), ('c', 'd')]
+    >>> lm.entropy(test)
+    1.292481250360578
+    >>> lm.perplexity(test)
+    2.449489742783178
+
+It is advisable to preprocess your test text exactly the same way as you did
+the training text.
+
+One cool feature of ngram models is that they can be used to generate text.
+
+    >>> lm.generate(1, random_seed=3)
+    '<s>'
+    >>> lm.generate(5, random_seed=3)
+    ['<s>', 'a', 'b', 'c', 'd']
+
+Provide `random_seed` if you want to consistently reproduce the same text all
+other things being equal. Here we are using it to test the examples.
+
+You can also condition your generation on some preceding text with the `context`
+argument.
+
+    >>> lm.generate(5, text_seed=['c'], random_seed=3)
+    ['</s>', 'c', 'd', 'c', 'd']
+
+Note that an ngram model is restricted in how much preceding context it can
+take into account. For example, a trigram model can only condition its output
+on 2 preceding words. If you pass in a 4-word context, the first two words
+will be ignored.
+"""
+
+from nltk.lm.models import (
+    MLE,
+    Lidstone,
+    Laplace,
+    WittenBellInterpolated,
+    KneserNeyInterpolated,
+)
+from nltk.lm.counter import NgramCounter
+from nltk.lm.vocabulary import Vocabulary
+
+__all__ = [
+    "Vocabulary",
+    "NgramCounter",
+    "MLE",
+    "Lidstone",
+    "Laplace",
+    "WittenBellInterpolated",
+    "KneserNeyInterpolated",
+]
diff --git a/nlp_resource_data/nltk/lm/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5f3ebc2

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f9c0a39

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/counter.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/counter.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d7a2490

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/counter.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/models.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/models.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2c5d88e

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/models.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/preprocessing.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/preprocessing.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e7c7319

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/preprocessing.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/smoothing.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/smoothing.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0918ee6

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/smoothing.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a4ab715

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/__pycache__/vocabulary.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/vocabulary.cpython-37.pyc

new file mode 100644 (file)

index 0000000..edc8fd1

Binary files /dev/null and b/nlp_resource_data/nltk/lm/__pycache__/vocabulary.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/lm/api.py b/nlp_resource_data/nltk/lm/api.py

new file mode 100644 (file)

index 0000000..60e9f87
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/api.py
@@ -0,0 +1,232 @@
+# Natural Language Toolkit: Language Models
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Authors: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""Language Model Interface."""
+
+import random
+from abc import ABCMeta, abstractmethod
+from bisect import bisect
+
+
+from nltk.lm.counter import NgramCounter
+from nltk.lm.util import log_base2
+from nltk.lm.vocabulary import Vocabulary
+
+from itertools import accumulate
+
+
+class Smoothing(metaclass=ABCMeta):
+    """Ngram Smoothing Interface
+
+    Implements Chen & Goodman 1995's idea that all smoothing algorithms have
+    certain features in common. This should ideally allow smoothing algorithms to
+    work both with Backoff and Interpolation.
+    """
+
+    def __init__(self, vocabulary, counter):
+        """
+        :param vocabulary: The Ngram vocabulary object.
+        :type vocabulary: nltk.lm.vocab.Vocabulary
+        :param counter: The counts of the vocabulary items.
+        :type counter: nltk.lm.counter.NgramCounter
+        """
+        self.vocab = vocabulary
+        self.counts = counter
+
+    @abstractmethod
+    def unigram_score(self, word):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def alpha_gamma(self, word, context):
+        raise NotImplementedError()
+
+
+def _mean(items):
+    """Return average (aka mean) for sequence of items."""
+    return sum(items) / len(items)
+
+
+def _random_generator(seed_or_generator):
+    if isinstance(seed_or_generator, random.Random):
+        return seed_or_generator
+    return random.Random(seed_or_generator)
+
+
+def _weighted_choice(population, weights, random_generator=None):
+    """Like random.choice, but with weights.
+
+    Heavily inspired by python 3.6 `random.choices`.
+    """
+    if not population:
+        raise ValueError("Can't choose from empty population")
+    if len(population) != len(weights):
+        raise ValueError("The number of weights does not match the population")
+    cum_weights = list(accumulate(weights))
+    total = cum_weights[-1]
+    threshold = random_generator.random()
+    return population[bisect(cum_weights, total * threshold)]
+
+
+class LanguageModel(metaclass=ABCMeta):
+    """ABC for Language Models.
+
+    Cannot be directly instantiated itself.
+
+    """
+
+    def __init__(self, order, vocabulary=None, counter=None):
+        """Creates new LanguageModel.
+
+        :param vocabulary: If provided, this vocabulary will be used instead
+        of creating a new one when training.
+        :type vocabulary: `nltk.lm.Vocabulary` or None
+        :param counter: If provided, use this object to count ngrams.
+        :type vocabulary: `nltk.lm.NgramCounter` or None
+        :param ngrams_fn: If given, defines how sentences in training text are turned to ngram
+                          sequences.
+        :type ngrams_fn: function or None
+        :param pad_fn: If given, defines how senteces in training text are padded.
+        :type pad_fn: function or None
+
+        """
+        self.order = order
+        self.vocab = Vocabulary() if vocabulary is None else vocabulary
+        self.counts = NgramCounter() if counter is None else counter
+
+    def fit(self, text, vocabulary_text=None):
+        """Trains the model on a text.
+
+        :param text: Training text as a sequence of sentences.
+
+        """
+        if not self.vocab:
+            if vocabulary_text is None:
+                raise ValueError(
+                    "Cannot fit without a vocabulary or text to create it from."
+                )
+            self.vocab.update(vocabulary_text)
+        self.counts.update(self.vocab.lookup(sent) for sent in text)
+
+    def score(self, word, context=None):
+        """Masks out of vocab (OOV) words and computes their model score.
+
+        For model-specific logic of calculating scores, see the `unmasked_score`
+        method.
+        """
+        return self.unmasked_score(
+            self.vocab.lookup(word), self.vocab.lookup(context) if context else None
+        )
+
+    @abstractmethod
+    def unmasked_score(self, word, context=None):
+        """Score a word given some optional context.
+
+        Concrete models are expected to provide an implementation.
+        Note that this method does not mask its arguments with the OOV label.
+        Use the `score` method for that.
+
+        :param str word: Word for which we want the score
+        :param tuple(str) context: Context the word is in.
+        If `None`, compute unigram score.
+        :param context: tuple(str) or None
+        :rtype: float
+
+        """
+        raise NotImplementedError()
+
+    def logscore(self, word, context=None):
+        """Evaluate the log score of this word in this context.
+
+        The arguments are the same as for `score` and `unmasked_score`.
+
+        """
+        return log_base2(self.score(word, context))
+
+    def context_counts(self, context):
+        """Helper method for retrieving counts for a given context.
+
+        Assumes context has been checked and oov words in it masked.
+        :type context: tuple(str) or None
+
+        """
+        return (
+            self.counts[len(context) + 1][context] if context else self.counts.unigrams
+        )
+
+    def entropy(self, text_ngrams):
+        """Calculate cross-entropy of model for given evaluation text.
+
+        :param Iterable(tuple(str)) text_ngrams: A sequence of ngram tuples.
+        :rtype: float
+
+        """
+        return -1 * _mean(
+            [self.logscore(ngram[-1], ngram[:-1]) for ngram in text_ngrams]
+        )
+
+    def perplexity(self, text_ngrams):
+        """Calculates the perplexity of the given text.
+
+        This is simply 2 ** cross-entropy for the text, so the arguments are the same.
+
+        """
+        return pow(2.0, self.entropy(text_ngrams))
+
+    def generate(self, num_words=1, text_seed=None, random_seed=None):
+        """Generate words from the model.
+
+        :param int num_words: How many words to generate. By default 1.
+        :param text_seed: Generation can be conditioned on preceding context.
+        :param random_seed: A random seed or an instance of `random.Random`. If provided,
+        makes the random sampling part of generation reproducible.
+        :return: One (str) word or a list of words generated from model.
+
+        Examples:
+
+        >>> from nltk.lm import MLE
+        >>> lm = MLE(2)
+        >>> lm.fit([[("a", "b"), ("b", "c")]], vocabulary_text=['a', 'b', 'c'])
+        >>> lm.fit([[("a",), ("b",), ("c",)]])
+        >>> lm.generate(random_seed=3)
+        'a'
+        >>> lm.generate(text_seed=['a'])
+        'b'
+
+        """
+        text_seed = [] if text_seed is None else list(text_seed)
+        random_generator = _random_generator(random_seed)
+        # This is the base recursion case.
+        if num_words == 1:
+            context = (
+                text_seed[-self.order + 1 :]
+                if len(text_seed) >= self.order
+                else text_seed
+            )
+            samples = self.context_counts(self.vocab.lookup(context))
+            while context and not samples:
+                context = context[1:] if len(context) > 1 else []
+                samples = self.context_counts(self.vocab.lookup(context))
+            # Sorting samples achieves two things:
+            # - reproducible randomness when sampling
+            # - turns Mapping into Sequence which `_weighted_choice` expects
+            samples = sorted(samples)
+            return _weighted_choice(
+                samples,
+                tuple(self.score(w, context) for w in samples),
+                random_generator,
+            )
+        # We build up text one word at a time using the preceding context.
+        generated = []
+        for _ in range(num_words):
+            generated.append(
+                self.generate(
+                    num_words=1,
+                    text_seed=text_seed + generated,
+                    random_seed=random_generator,
+                )
+            )
+        return generated
diff --git a/nlp_resource_data/nltk/lm/counter.py b/nlp_resource_data/nltk/lm/counter.py

new file mode 100644 (file)

index 0000000..09be9b4
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/counter.py
@@ -0,0 +1,163 @@
+# Natural Language Toolkit
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""
+Language Model Counter
+----------------------
+"""
+
+from collections import defaultdict
+from collections.abc import Sequence
+
+from nltk.probability import ConditionalFreqDist, FreqDist
+
+
+class NgramCounter:
+    """Class for counting ngrams.
+
+    Will count any ngram sequence you give it ;)
+
+    First we need to make sure we are feeding the counter sentences of ngrams.
+
+    >>> text = [["a", "b", "c", "d"], ["a", "c", "d", "c"]]
+    >>> from nltk.util import ngrams
+    >>> text_bigrams = [ngrams(sent, 2) for sent in text]
+    >>> text_unigrams = [ngrams(sent, 1) for sent in text]
+
+    The counting itself is very simple.
+
+    >>> from nltk.lm import NgramCounter
+    >>> ngram_counts = NgramCounter(text_bigrams + text_unigrams)
+
+    You can conveniently access ngram counts using standard python dictionary notation.
+    String keys will give you unigram counts.
+
+    >>> ngram_counts['a']
+    2
+    >>> ngram_counts['aliens']
+    0
+
+    If you want to access counts for higher order ngrams, use a list or a tuple.
+    These are treated as "context" keys, so what you get is a frequency distribution
+    over all continuations after the given context.
+
+    >>> sorted(ngram_counts[['a']].items())
+    [('b', 1), ('c', 1)]
+    >>> sorted(ngram_counts[('a',)].items())
+    [('b', 1), ('c', 1)]
+
+    This is equivalent to specifying explicitly the order of the ngram (in this case
+    2 for bigram) and indexing on the context.
+    >>> ngram_counts[2][('a',)] is ngram_counts[['a']]
+    True
+
+    Note that the keys in `ConditionalFreqDist` cannot be lists, only tuples!
+    It is generally advisable to use the less verbose and more flexible square
+    bracket notation.
+
+    To get the count of the full ngram "a b", do this:
+
+    >>> ngram_counts[['a']]['b']
+    1
+
+    Specifying the ngram order as a number can be useful for accessing all ngrams
+    in that order.
+
+    >>> ngram_counts[2]
+    <ConditionalFreqDist with 4 conditions>
+
+    The keys of this `ConditionalFreqDist` are the contexts we discussed earlier.
+    Unigrams can also be accessed with a human-friendly alias.
+
+    >>> ngram_counts.unigrams is ngram_counts[1]
+    True
+
+    Similarly to `collections.Counter`, you can update counts after initialization.
+
+    >>> ngram_counts['e']
+    0
+    >>> ngram_counts.update([ngrams(["d", "e", "f"], 1)])
+    >>> ngram_counts['e']
+    1
+
+    """
+
+    def __init__(self, ngram_text=None):
+        """Creates a new NgramCounter.
+
+        If `ngram_text` is specified, counts ngrams from it, otherwise waits for
+        `update` method to be called explicitly.
+
+        :param ngram_text: Optional text containing senteces of ngrams, as for `update` method.
+        :type ngram_text: Iterable(Iterable(tuple(str))) or None
+
+        """
+        self._counts = defaultdict(ConditionalFreqDist)
+        self._counts[1] = self.unigrams = FreqDist()
+
+        if ngram_text:
+            self.update(ngram_text)
+
+    def update(self, ngram_text):
+        """Updates ngram counts from `ngram_text`.
+
+        Expects `ngram_text` to be a sequence of sentences (sequences).
+        Each sentence consists of ngrams as tuples of strings.
+
+        :param Iterable(Iterable(tuple(str))) ngram_text: Text containing senteces of ngrams.
+        :raises TypeError: if the ngrams are not tuples.
+
+        """
+
+        for sent in ngram_text:
+            for ngram in sent:
+                if not isinstance(ngram, tuple):
+                    raise TypeError(
+                        "Ngram <{0}> isn't a tuple, "
+                        "but {1}".format(ngram, type(ngram))
+                    )
+
+                ngram_order = len(ngram)
+                if ngram_order == 1:
+                    self.unigrams[ngram[0]] += 1
+                    continue
+
+                context, word = ngram[:-1], ngram[-1]
+                self[ngram_order][context][word] += 1
+
+    def N(self):
+        """Returns grand total number of ngrams stored.
+
+        This includes ngrams from all orders, so some duplication is expected.
+        :rtype: int
+
+        >>> from nltk.lm import NgramCounter
+        >>> counts = NgramCounter([[("a", "b"), ("c",), ("d", "e")]])
+        >>> counts.N()
+        3
+
+        """
+        return sum(val.N() for val in self._counts.values())
+
+    def __getitem__(self, item):
+        """User-friendly access to ngram counts."""
+        if isinstance(item, int):
+            return self._counts[item]
+        elif isinstance(item, str):
+            return self._counts.__getitem__(1)[item]
+        elif isinstance(item, Sequence):
+            return self._counts.__getitem__(len(item) + 1)[tuple(item)]
+
+    def __str__(self):
+        return "<{0} with {1} ngram orders and {2} ngrams>".format(
+            self.__class__.__name__, len(self._counts), self.N()
+        )
+
+    def __len__(self):
+        return self._counts.__len__()
+
+    def __contains__(self, item):
+        return item in self._counts
diff --git a/nlp_resource_data/nltk/lm/models.py b/nlp_resource_data/nltk/lm/models.py

new file mode 100644 (file)

index 0000000..f459af3
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/models.py
@@ -0,0 +1,99 @@
+# Natural Language Toolkit: Language Models
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""Language Models"""
+
+from nltk.lm.api import LanguageModel, Smoothing
+from nltk.lm.smoothing import KneserNey, WittenBell
+
+
+class MLE(LanguageModel):
+    """Class for providing MLE ngram model scores.
+
+    Inherits initialization from BaseNgramModel.
+    """
+
+    def unmasked_score(self, word, context=None):
+        """Returns the MLE score for a word given a context.
+
+        Args:
+        - word is expcected to be a string
+        - context is expected to be something reasonably convertible to a tuple
+        """
+        return self.context_counts(context).freq(word)
+
+
+class Lidstone(LanguageModel):
+    """Provides Lidstone-smoothed scores.
+
+    In addition to initialization arguments from BaseNgramModel also requires
+    a number by which to increase the counts, gamma.
+    """
+
+    def __init__(self, gamma, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.gamma = gamma
+
+    def unmasked_score(self, word, context=None):
+        """Add-one smoothing: Lidstone or Laplace.
+
+        To see what kind, look at `gamma` attribute on the class.
+
+        """
+        counts = self.context_counts(context)
+        word_count = counts[word]
+        norm_count = counts.N()
+        return (word_count + self.gamma) / (norm_count + len(self.vocab) * self.gamma)
+
+
+class Laplace(Lidstone):
+    """Implements Laplace (add one) smoothing.
+
+    Initialization identical to BaseNgramModel because gamma is always 1.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(1, *args, **kwargs)
+
+
+class InterpolatedLanguageModel(LanguageModel):
+    """Logic common to all interpolated language models.
+
+    The idea to abstract this comes from Chen & Goodman 1995.
+    Do not instantiate this class directly!
+    """
+
+    def __init__(self, smoothing_cls, order, **kwargs):
+        assert issubclass(smoothing_cls, Smoothing)
+        params = kwargs.pop("params", {})
+        super().__init__(order, **kwargs)
+        self.estimator = smoothing_cls(self.vocab, self.counts, **params)
+
+    def unmasked_score(self, word, context=None):
+        if not context:
+            # The base recursion case: no context, we only have a unigram.
+            return self.estimator.unigram_score(word)
+        if not self.counts[context]:
+            # It can also happen that we have no data for this context.
+            # In that case we defer to the lower-order ngram.
+            # This is the same as setting alpha to 0 and gamma to 1.
+            return self.unmasked_score(word, context[1:])
+        alpha, gamma = self.estimator.alpha_gamma(word, context)
+        return alpha + gamma * self.unmasked_score(word, context[1:])
+
+
+class WittenBellInterpolated(InterpolatedLanguageModel):
+    """Interpolated version of Witten-Bell smoothing."""
+
+    def __init__(self, order, **kwargs):
+        super().__init__(WittenBell, order, **kwargs)
+
+
+class KneserNeyInterpolated(InterpolatedLanguageModel):
+    """Interpolated version of Kneser-Ney smoothing."""
+
+    def __init__(self, order, discount=0.1, **kwargs):
+        super().__init__(KneserNey, order, params={"discount": discount}, **kwargs)
diff --git a/nlp_resource_data/nltk/lm/preprocessing.py b/nlp_resource_data/nltk/lm/preprocessing.py

new file mode 100644 (file)

index 0000000..8279cd4
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/preprocessing.py
@@ -0,0 +1,50 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+from functools import partial
+from itertools import chain
+
+from nltk.util import everygrams, pad_sequence
+
+flatten = chain.from_iterable
+pad_both_ends = partial(
+    pad_sequence,
+    pad_left=True,
+    left_pad_symbol="<s>",
+    pad_right=True,
+    right_pad_symbol="</s>",
+)
+pad_both_ends.__doc__ = """Pads both ends of a sentence to length specified by ngram order.
+
+    Following convention <s> pads the start of sentence </s> pads its end.
+    """
+
+
+def padded_everygrams(order, sentence):
+    """Helper with some useful defaults.
+
+    Applies pad_both_ends to sentence and follows it up with everygrams.
+    """
+    return everygrams(list(pad_both_ends(sentence, n=order)), max_len=order)
+
+
+def padded_everygram_pipeline(order, text):
+    """Default preprocessing for a sequence of sentences.
+
+    Creates two iterators:
+    - sentences padded and turned into sequences of `nltk.util.everygrams`
+    - sentences padded as above and chained together for a flat stream of words
+
+    :param order: Largest ngram length produced by `everygrams`.
+    :param text: Text to iterate over. Expected to be an iterable of sentences:
+    Iterable[Iterable[str]]
+    :return: iterator over text as ngrams, iterator over text as vocabulary data
+    """
+    padding_fn = partial(pad_both_ends, n=order)
+    return (
+        (everygrams(list(padding_fn(sent)), max_len=order) for sent in text),
+        flatten(map(padding_fn, text)),
+    )
diff --git a/nlp_resource_data/nltk/lm/smoothing.py b/nlp_resource_data/nltk/lm/smoothing.py

new file mode 100644 (file)

index 0000000..02b8df2
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/smoothing.py
@@ -0,0 +1,56 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""Smoothing algorithms for language modeling.
+
+According to Chen & Goodman 1995 these should work with both Backoff and
+Interpolation.
+"""
+
+from nltk.lm.api import Smoothing
+
+
+def _count_non_zero_vals(dictionary):
+    return sum(1.0 for c in dictionary.values() if c > 0)
+
+
+class WittenBell(Smoothing):
+    """Witten-Bell smoothing."""
+
+    def __init__(self, vocabulary, counter, **kwargs):
+        super().__init__(vocabulary, counter, **kwargs)
+
+    def alpha_gamma(self, word, context):
+        alpha = self.counts[context].freq(word)
+        gamma = self._gamma(context)
+        return (1.0 - gamma) * alpha, gamma
+
+    def _gamma(self, context):
+        n_plus = _count_non_zero_vals(self.counts[context])
+        return n_plus / (n_plus + self.counts[len(context) + 1].N())
+
+    def unigram_score(self, word):
+        return self.counts.unigrams.freq(word)
+
+
+class KneserNey(Smoothing):
+    """Kneser-Ney Smoothing."""
+
+    def __init__(self, vocabulary, counter, discount=0.1, **kwargs):
+        super().__init__(vocabulary, counter, **kwargs)
+        self.discount = discount
+
+    def unigram_score(self, word):
+        return 1.0 / len(self.vocab)
+
+    def alpha_gamma(self, word, context):
+        prefix_counts = self.counts[context]
+        prefix_total_ngrams = prefix_counts.N()
+        alpha = max(prefix_counts[word] - self.discount, 0.0) / prefix_total_ngrams
+        gamma = (
+            self.discount * _count_non_zero_vals(prefix_counts) / prefix_total_ngrams
+        )
+        return alpha, gamma
diff --git a/nlp_resource_data/nltk/lm/util.py b/nlp_resource_data/nltk/lm/util.py

new file mode 100644 (file)

index 0000000..0da2eb2
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/util.py
@@ -0,0 +1,19 @@
+# Natural Language Toolkit
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""Language Model Utilities"""
+
+from math import log
+
+NEG_INF = float("-inf")
+POS_INF = float("inf")
+
+
+def log_base2(score):
+    """Convenience function for computing logarithms with base 2."""
+    if score == 0.0:
+        return NEG_INF
+    return log(score, 2)
diff --git a/nlp_resource_data/nltk/lm/vocabulary.py b/nlp_resource_data/nltk/lm/vocabulary.py

new file mode 100644 (file)

index 0000000..39ba6b2
--- /dev/null
+++ b/nlp_resource_data/nltk/lm/vocabulary.py
@@ -0,0 +1,223 @@
+# Natural Language Toolkit
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""Language Model Vocabulary"""
+
+import sys
+from collections import Counter
+from collections.abc import Iterable
+from itertools import chain
+from functools import singledispatch
+
+
+@singledispatch
+def _dispatched_lookup(words, vocab):
+    raise TypeError(
+        "Unsupported type for looking up in vocabulary: {0}".format(type(words))
+    )
+
+
+@_dispatched_lookup.register(Iterable)
+def _(words, vocab):
+    """Look up a sequence of words in the vocabulary.
+
+    Returns an iterator over looked up words.
+
+    """
+    return tuple(_dispatched_lookup(w, vocab) for w in words)
+
+
+@_dispatched_lookup.register(str)
+def _string_lookup(word, vocab):
+    """Looks up one word in the vocabulary."""
+    return word if word in vocab else vocab.unk_label
+
+
+class Vocabulary:
+    """Stores language model vocabulary.
+
+    Satisfies two common language modeling requirements for a vocabulary:
+    - When checking membership and calculating its size, filters items
+      by comparing their counts to a cutoff value.
+    - Adds a special "unknown" token which unseen words are mapped to.
+
+    >>> words = ['a', 'c', '-', 'd', 'c', 'a', 'b', 'r', 'a', 'c', 'd']
+    >>> from nltk.lm import Vocabulary
+    >>> vocab = Vocabulary(words, unk_cutoff=2)
+
+    Tokens with counts greater than or equal to the cutoff value will
+    be considered part of the vocabulary.
+
+    >>> vocab['c']
+    3
+    >>> 'c' in vocab
+    True
+    >>> vocab['d']
+    2
+    >>> 'd' in vocab
+    True
+
+    Tokens with frequency counts less than the cutoff value will be considered not
+    part of the vocabulary even though their entries in the count dictionary are
+    preserved.
+
+    >>> vocab['b']
+    1
+    >>> 'b' in vocab
+    False
+    >>> vocab['aliens']
+    0
+    >>> 'aliens' in vocab
+    False
+
+    Keeping the count entries for seen words allows us to change the cutoff value
+    without having to recalculate the counts.
+
+    >>> vocab2 = Vocabulary(vocab.counts, unk_cutoff=1)
+    >>> "b" in vocab2
+    True
+
+    The cutoff value influences not only membership checking but also the result of
+    getting the size of the vocabulary using the built-in `len`.
+    Note that while the number of keys in the vocabulary's counter stays the same,
+    the items in the vocabulary differ depending on the cutoff.
+    We use `sorted` to demonstrate because it keeps the order consistent.
+
+    >>> sorted(vocab2.counts)
+    ['-', 'a', 'b', 'c', 'd', 'r']
+    >>> sorted(vocab2)
+    ['-', '<UNK>', 'a', 'b', 'c', 'd', 'r']
+    >>> sorted(vocab.counts)
+    ['-', 'a', 'b', 'c', 'd', 'r']
+    >>> sorted(vocab)
+    ['<UNK>', 'a', 'c', 'd']
+
+    In addition to items it gets populated with, the vocabulary stores a special
+    token that stands in for so-called "unknown" items. By default it's "<UNK>".
+
+    >>> "<UNK>" in vocab
+    True
+
+    We can look up words in a vocabulary using its `lookup` method.
+    "Unseen" words (with counts less than cutoff) are looked up as the unknown label.
+    If given one word (a string) as an input, this method will return a string.
+
+    >>> vocab.lookup("a")
+    'a'
+    >>> vocab.lookup("aliens")
+    '<UNK>'
+
+    If given a sequence, it will return an tuple of the looked up words.
+
+    >>> vocab.lookup(["p", 'a', 'r', 'd', 'b', 'c'])
+    ('<UNK>', 'a', '<UNK>', 'd', '<UNK>', 'c')
+
+    It's possible to update the counts after the vocabulary has been created.
+    In general, the interface is the same as that of `collections.Counter`.
+
+    >>> vocab['b']
+    1
+    >>> vocab.update(["b", "b", "c"])
+    >>> vocab['b']
+    3
+    """
+
+    def __init__(self, counts=None, unk_cutoff=1, unk_label="<UNK>"):
+        """Create a new Vocabulary.
+
+        :param counts: Optional iterable or `collections.Counter` instance to
+                       pre-seed the Vocabulary. In case it is iterable, counts
+                       are calculated.
+        :param int unk_cutoff: Words that occur less frequently than this value
+                               are not considered part of the vocabulary.
+        :param unk_label: Label for marking words not part of vocabulary.
+
+        """
+        if isinstance(counts, Counter):
+            self.counts = counts
+        else:
+            self.counts = Counter()
+            if isinstance(counts, Iterable):
+                self.counts.update(counts)
+        self.unk_label = unk_label
+        if unk_cutoff < 1:
+            raise ValueError(
+                "Cutoff value cannot be less than 1. Got: {0}".format(unk_cutoff)
+            )
+        self._cutoff = unk_cutoff
+
+    @property
+    def cutoff(self):
+        """Cutoff value.
+
+        Items with count below this value are not considered part of vocabulary.
+
+        """
+        return self._cutoff
+
+    def update(self, *counter_args, **counter_kwargs):
+        """Update vocabulary counts.
+
+        Wraps `collections.Counter.update` method.
+
+        """
+        self.counts.update(*counter_args, **counter_kwargs)
+
+    def lookup(self, words):
+        """Look up one or more words in the vocabulary.
+
+        If passed one word as a string will return that word or `self.unk_label`.
+        Otherwise will assume it was passed a sequence of words, will try to look
+        each of them up and return an iterator over the looked up words.
+
+        :param words: Word(s) to look up.
+        :type words: Iterable(str) or str
+        :rtype: generator(str) or str
+        :raises: TypeError for types other than strings or iterables
+
+        >>> from nltk.lm import Vocabulary
+        >>> vocab = Vocabulary(["a", "b", "c", "a", "b"], unk_cutoff=2)
+        >>> vocab.lookup("a")
+        'a'
+        >>> vocab.lookup("aliens")
+        '<UNK>'
+        >>> vocab.lookup(["a", "b", "c", ["x", "b"]])
+        ('a', 'b', '<UNK>', ('<UNK>', 'b'))
+
+        """
+        return _dispatched_lookup(words, self)
+
+    def __getitem__(self, item):
+        return self._cutoff if item == self.unk_label else self.counts[item]
+
+    def __contains__(self, item):
+        """Only consider items with counts GE to cutoff as being in the
+        vocabulary."""
+        return self[item] >= self.cutoff
+
+    def __iter__(self):
+        """Building on membership check define how to iterate over
+        vocabulary."""
+        return chain(
+            (item for item in self.counts if item in self),
+            [self.unk_label] if self.counts else [],
+        )
+
+    def __len__(self):
+        """Computing size of vocabulary reflects the cutoff."""
+        return sum(1 for _ in self)
+
+    def __eq__(self, other):
+        return (
+            self.unk_label == other.unk_label
+            and self.cutoff == other.cutoff
+            and self.counts == other.counts
+        )
+
+    def __str__(self):
+        return "<{0} with cutoff={1} unk_label='{2}' and {3} items>".format(
+            self.__class__.__name__, self.cutoff, self.unk_label, len(self)
+        )
diff --git a/nlp_resource_data/nltk/metrics/__init__.py b/nlp_resource_data/nltk/metrics/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 2205cc6..59be7df
--- a/nlp_resource_data/nltk/metrics/__init__.py
+++ b/nlp_resource_data/nltk/metrics/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Metrics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -13,18 +13,39 @@ NLTK Metrics
  Classes and methods for scoring processing modules.
  """
  
-from nltk.metrics.scores import          (accuracy, precision, recall, f_measure,
-                                          log_likelihood, approxrand)
+from nltk.metrics.scores import (
+    accuracy,
+    precision,
+    recall,
+    f_measure,
+    log_likelihood,
+    approxrand,
+)
  from nltk.metrics.confusionmatrix import ConfusionMatrix
-from nltk.metrics.distance        import (edit_distance, binary_distance,
-                                          jaccard_distance, masi_distance,
-                                          interval_distance, custom_distance,
-                                          presence, fractional_presence)
-from nltk.metrics.paice           import Paice
-from nltk.metrics.segmentation    import windowdiff, ghd, pk
-from nltk.metrics.agreement       import AnnotationTask
-from nltk.metrics.association     import (NgramAssocMeasures, BigramAssocMeasures,
-                                          TrigramAssocMeasures, ContingencyMeasures)
-from nltk.metrics.spearman        import (spearman_correlation, ranks_from_sequence,
-                                          ranks_from_scores)
-from nltk.metrics.aline           import align
+from nltk.metrics.distance import (
+    edit_distance,
+    edit_distance_align,
+    binary_distance,
+    jaccard_distance,
+    masi_distance,
+    interval_distance,
+    custom_distance,
+    presence,
+    fractional_presence,
+)
+from nltk.metrics.paice import Paice
+from nltk.metrics.segmentation import windowdiff, ghd, pk
+from nltk.metrics.agreement import AnnotationTask
+from nltk.metrics.association import (
+    NgramAssocMeasures,
+    BigramAssocMeasures,
+    TrigramAssocMeasures,
+    QuadgramAssocMeasures,
+    ContingencyMeasures,
+)
+from nltk.metrics.spearman import (
+    spearman_correlation,
+    ranks_from_sequence,
+    ranks_from_scores,
+)
+from nltk.metrics.aline import align
diff --git a/nlp_resource_data/nltk/metrics/__init__.pyc b/nlp_resource_data/nltk/metrics/__init__.pyc

deleted file mode 100755 (executable)

index 419ee04..0000000

Binary files a/nlp_resource_data/nltk/metrics/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b0a9b1a

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/agreement.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/agreement.cpython-37.pyc

new file mode 100644 (file)

index 0000000..152e55e

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/agreement.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/aline.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/aline.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a3fe392

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/aline.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/association.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/association.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d668cc3

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/association.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5f05b53

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/distance.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/distance.cpython-37.pyc

new file mode 100644 (file)

index 0000000..addf2e9

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/distance.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/paice.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/paice.cpython-37.pyc

new file mode 100644 (file)

index 0000000..111907f

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/paice.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/scores.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/scores.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3a17b16

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/scores.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/segmentation.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/segmentation.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c8ae7b7

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/segmentation.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/__pycache__/spearman.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/spearman.cpython-37.pyc

new file mode 100644 (file)

index 0000000..608ae3d

Binary files /dev/null and b/nlp_resource_data/nltk/metrics/__pycache__/spearman.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/metrics/agreement.py b/nlp_resource_data/nltk/metrics/agreement.py

old mode 100755 (executable)

new mode 100644 (file)

index 887ad76..8fff0eb
--- a/nlp_resource_data/nltk/metrics/agreement.py
+++ b/nlp_resource_data/nltk/metrics/agreement.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Agreement Metrics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tom Lippincott <tom@cs.columbia.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -68,23 +68,19 @@ Expected results from the Artstein and Poesio survey paper:
      1.0
  
  """
-from __future__ import print_function, unicode_literals, division
  
  import logging
  from itertools import groupby
  from operator import itemgetter
  
-from six import iteritems
-
  from nltk.probability import FreqDist, ConditionalFreqDist
  from nltk.internals import deprecated
-from nltk.compat import python_2_unicode_compatible
  
  from nltk.metrics.distance import binary_distance
  
-log = logging.getLogger(__file__)
+log = logging.getLogger(__name__)
+
  
-@python_2_unicode_compatible
  class AnnotationTask(object):
      """Represents an annotation task, i.e. people assign labels to items.
  
@@ -118,9 +114,13 @@ class AnnotationTask(object):
              self.load_array(data)
  
      def __str__(self):
-        return "\r\n".join(map(lambda x:"%s\t%s\t%s" %
-                               (x['coder'], x['item'].replace('_', "\t"),
-                                ",".join(x['labels'])), self.data))
+        return "\r\n".join(
+            map(
+                lambda x: "%s\t%s\t%s"
+                % (x["coder"], x["item"].replace("_", "\t"), ",".join(x["labels"])),
+                self.data,
+            )
+        )
  
      def load_array(self, array):
          """Load an sequence of annotation results, appending to any data already loaded.
@@ -132,7 +132,7 @@ class AnnotationTask(object):
              self.C.add(coder)
              self.K.add(labels)
              self.I.add(item)
-            self.data.append({'coder':coder, 'labels':labels, 'item':item})
+            self.data.append({"coder": coder, "labels": labels, "item": item})
  
      def agr(self, cA, cB, i, data=None):
          """Agreement between two coders on a given item
@@ -142,29 +142,29 @@ class AnnotationTask(object):
          # cfedermann: we don't know what combination of coder/item will come
          # first in x; to avoid StopIteration problems due to assuming an order
          # cA,cB, we allow either for k1 and then look up the missing as k2.
-        k1 = next((x for x in data if x['coder'] in (cA,cB) and x['item']==i))
-        if k1['coder'] == cA:
-            k2 = next((x for x in data if x['coder']==cB and x['item']==i))
+        k1 = next((x for x in data if x["coder"] in (cA, cB) and x["item"] == i))
+        if k1["coder"] == cA:
+            k2 = next((x for x in data if x["coder"] == cB and x["item"] == i))
          else:
-            k2 = next((x for x in data if x['coder']==cA and x['item']==i))
+            k2 = next((x for x in data if x["coder"] == cA and x["item"] == i))
  
-        ret = 1.0 - float(self.distance(k1['labels'], k2['labels']))
-        log.debug("Observed agreement between %s and %s on %s: %f",
-                      cA, cB, i, ret)
-        log.debug("Distance between \"%r\" and \"%r\": %f",
-                      k1['labels'], k2['labels'], 1.0 - ret)
+        ret = 1.0 - float(self.distance(k1["labels"], k2["labels"]))
+        log.debug("Observed agreement between %s and %s on %s: %f", cA, cB, i, ret)
+        log.debug(
+            'Distance between "%r" and "%r": %f', k1["labels"], k2["labels"], 1.0 - ret
+        )
          return ret
  
      def Nk(self, k):
-        return float(sum(1 for x in self.data if x['labels'] == k))
+        return float(sum(1 for x in self.data if x["labels"] == k))
  
      def Nik(self, i, k):
-        return float(sum(1 for x in self.data if x['item'] == i and x['labels'] == k))
+        return float(sum(1 for x in self.data if x["item"] == i and x["labels"] == k))
  
      def Nck(self, c, k):
-        return float(sum(1 for x in self.data if x['coder'] == c and x['labels'] == k))
+        return float(sum(1 for x in self.data if x["coder"] == c and x["labels"] == k))
  
-    @deprecated('Use Nk, Nik or Nck instead')
+    @deprecated("Use Nk, Nik or Nck instead")
      def N(self, k=None, i=None, c=None):
          """Implements the "n-notation" used in Artstein and Poesio (2007)
  
@@ -176,7 +176,9 @@ class AnnotationTask(object):
          elif k is not None and c is not None and i is None:
              ret = self.Nck(c, k)
          else:
-            raise ValueError("You must pass either i or c, not both! (k=%r,i=%r,c=%r)" % (k, i, c))
+            raise ValueError(
+                "You must pass either i or c, not both! (k=%r,i=%r,c=%r)" % (k, i, c)
+            )
          log.debug("Count on N[%s,%s,%s]: %d", k, i, c, ret)
          return ret
  
@@ -188,8 +190,12 @@ class AnnotationTask(object):
          """Observed agreement between two coders on all items.
  
          """
-        data = self._grouped_data('item', (x for x in self.data if x['coder'] in (cA, cB)))
-        ret = sum(self.agr(cA, cB, item, item_data) for item, item_data in data) / len(self.I)
+        data = self._grouped_data(
+            "item", (x for x in self.data if x["coder"] in (cA, cB))
+        )
+        ret = sum(self.agr(cA, cB, item, item_data) for item, item_data in data) / len(
+            self.I
+        )
          log.debug("Observed agreement between %s and %s: %f", cA, cB, ret)
          return ret
  
@@ -216,33 +222,15 @@ class AnnotationTask(object):
          log.debug("Average observed agreement: %f", ret)
          return ret
  
-    def Do_alpha(self):
-        """The observed disagreement for the alpha coefficient.
-
-        The alpha coefficient, unlike the other metrics, uses this rather than
-        observed agreement.
-        """
-        total = 0.0
-        for i, itemdata in self._grouped_data('item'):
-            label_freqs = FreqDist(x['labels'] for x in itemdata)
-
-            for j, nj in iteritems(label_freqs):
-                for l, nl in iteritems(label_freqs):
-                    total += float(nj * nl) * self.distance(l, j)
-        ret = (1.0 / (len(self.I) * len(self.C) * (len(self.C) - 1))) * total
-        log.debug("Observed disagreement: %f", ret)
-        return ret
-
-    def Do_Kw_pairwise(self,cA,cB,max_distance=1.0):
+    def Do_Kw_pairwise(self, cA, cB, max_distance=1.0):
          """The observed disagreement for the weighted kappa coefficient.
  
          """
          total = 0.0
-        data = (x for x in self.data if x['coder'] in (cA, cB))
-        for i, itemdata in self._grouped_data('item', data):
+        data = (x for x in self.data if x["coder"] in (cA, cB))
+        for i, itemdata in self._grouped_data("item", data):
              # we should have two items; distance doesn't care which comes first
-            total += self.distance(next(itemdata)['labels'],
-                                   next(itemdata)['labels'])
+            total += self.distance(next(itemdata)["labels"], next(itemdata)["labels"])
  
          ret = total / (len(self.I) * max_distance)
          log.debug("Observed disagreement between %s and %s: %f", cA, cB, ret)
@@ -252,7 +240,9 @@ class AnnotationTask(object):
          """Averaged over all labelers
  
          """
-        ret = self._pairwise_average(lambda cA, cB: self.Do_Kw_pairwise(cA, cB, max_distance))
+        ret = self._pairwise_average(
+            lambda cA, cB: self.Do_Kw_pairwise(cA, cB, max_distance)
+        )
          log.debug("Observed disagreement: %f", ret)
          return ret
  
@@ -271,8 +261,8 @@ class AnnotationTask(object):
  
          """
          total = 0.0
-        label_freqs = FreqDist(x['labels'] for x in self.data)
-        for k, f in iteritems(label_freqs):
+        label_freqs = FreqDist(x["labels"] for x in self.data)
+        for k, f in label_freqs.items():
              total += f ** 2
          Ae = total / ((len(self.I) * len(self.C)) ** 2)
          return (self.avg_Ao() - Ae) / (1 - Ae)
@@ -280,7 +270,7 @@ class AnnotationTask(object):
      def Ae_kappa(self, cA, cB):
          Ae = 0.0
          nitems = float(len(self.I))
-        label_freqs = ConditionalFreqDist((x['labels'], x['coder']) for x in self.data)
+        label_freqs = ConditionalFreqDist((x["labels"], x["coder"]) for x in self.data)
          for k in label_freqs.conditions():
              Ae += (label_freqs[k][cA] / nitems) * (label_freqs[k][cB] / nitems)
          return Ae
@@ -309,42 +299,56 @@ class AnnotationTask(object):
          Ae = self._pairwise_average(self.Ae_kappa)
          return (self.avg_Ao() - Ae) / (1.0 - Ae)
  
+    def Disagreement(self, label_freqs):
+        total_labels = sum(label_freqs.values())
+        pairs = 0.0
+        for j, nj in label_freqs.items():
+            for l, nl in label_freqs.items():
+                pairs += float(nj * nl) * self.distance(l, j)
+        return 1.0 * pairs / (total_labels * (total_labels - 1))
+
      def alpha(self):
          """Krippendorff 1980
  
          """
          # check for degenerate cases
-        if len(self.K)==0:
+        if len(self.K) == 0:
              raise ValueError("Cannot calculate alpha, no data present!")
          if len(self.K) == 1:
              log.debug("Only one annotation value, allpha returning 1.")
              return 1
-        if len(self.C)==1 and len(self.I) == 1:
+        if len(self.C) == 1 and len(self.I) == 1:
              raise ValueError("Cannot calculate alpha, only one coder and item present!")
  
-        De = 0.0
+        total_disagreement = 0.0
+        total_ratings = 0
+        all_valid_labels_freq = FreqDist([])
  
-        label_freqs = FreqDist(x['labels'] for x in self.data)
-        for j in self.K:
-            nj = label_freqs[j]
-            for l in self.K:
-                De += float(nj * label_freqs[l]) * self.distance(j, l)
-        try:
-            De = (1.0 / (len(self.I) * len(self.C) * (len(self.I) * len(self.C) - 1))) * De
-            log.debug("Expected disagreement: %f", De)
-            ret = 1.0 - (self.Do_alpha() / De)
-        except ZeroDivisionError:
-            raise ValueError("Cannot calculate alpha, expected disagreement zero, check the distance function!")
-        return ret
+        total_do = 0.0  # Total observed disagreement for all items.
+        for i, itemdata in self._grouped_data("item"):
+            label_freqs = FreqDist(x["labels"] for x in itemdata)
+            labels_count = sum(label_freqs.values())
+            if labels_count < 2:
+                # Ignore the item.
+                continue
+            all_valid_labels_freq += label_freqs
+            total_do += self.Disagreement(label_freqs) * labels_count
+
+        do = total_do / sum(all_valid_labels_freq.values())
+
+        de = self.Disagreement(all_valid_labels_freq)  # Expected disagreement.
+        k_alpha = 1.0 - do / de
+
+        return k_alpha
  
      def weighted_kappa_pairwise(self, cA, cB, max_distance=1.0):
          """Cohen 1968
  
          """
          total = 0.0
-        label_freqs = ConditionalFreqDist((x['coder'], x['labels'])
-                for x in self.data
-                if x['coder'] in (cA, cB))
+        label_freqs = ConditionalFreqDist(
+            (x["coder"], x["labels"]) for x in self.data if x["coder"] in (cA, cB)
+        )
          for j in self.K:
              for l in self.K:
                  total += label_freqs[cA][j] * label_freqs[cB][l] * self.distance(j, l)
@@ -358,10 +362,12 @@ class AnnotationTask(object):
          """Cohen 1968
  
          """
-        return self._pairwise_average(lambda cA, cB: self.weighted_kappa_pairwise(cA, cB, max_distance))
+        return self._pairwise_average(
+            lambda cA, cB: self.weighted_kappa_pairwise(cA, cB, max_distance)
+        )
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
  
      import re
      import optparse
@@ -369,26 +375,78 @@ if __name__ == '__main__':
  
      # process command-line arguments
      parser = optparse.OptionParser()
-    parser.add_option("-d", "--distance", dest="distance", default="binary_distance",
-                      help="distance metric to use")
-    parser.add_option("-a", "--agreement", dest="agreement", default="kappa",
-                      help="agreement coefficient to calculate")
-    parser.add_option("-e", "--exclude", dest="exclude", action="append",
-                      default=[], help="coder names to exclude (may be specified multiple times)")
-    parser.add_option("-i", "--include", dest="include", action="append", default=[],
-                      help="coder names to include, same format as exclude")
-    parser.add_option("-f", "--file", dest="file",
-                      help="file to read labelings from, each line with three columns: 'labeler item labels'")
-    parser.add_option("-v", "--verbose", dest="verbose", default='0',
-                      help="how much debugging to print on stderr (0-4)")
-    parser.add_option("-c", "--columnsep", dest="columnsep", default="\t",
-                      help="char/string that separates the three columns in the file, defaults to tab")
-    parser.add_option("-l", "--labelsep", dest="labelsep", default=",",
-                      help="char/string that separates labels (if labelers can assign more than one), defaults to comma")
-    parser.add_option("-p", "--presence", dest="presence", default=None,
-                      help="convert each labeling into 1 or 0, based on presence of LABEL")
-    parser.add_option("-T", "--thorough", dest="thorough", default=False, action="store_true",
-                      help="calculate agreement for every subset of the annotators")
+    parser.add_option(
+        "-d",
+        "--distance",
+        dest="distance",
+        default="binary_distance",
+        help="distance metric to use",
+    )
+    parser.add_option(
+        "-a",
+        "--agreement",
+        dest="agreement",
+        default="kappa",
+        help="agreement coefficient to calculate",
+    )
+    parser.add_option(
+        "-e",
+        "--exclude",
+        dest="exclude",
+        action="append",
+        default=[],
+        help="coder names to exclude (may be specified multiple times)",
+    )
+    parser.add_option(
+        "-i",
+        "--include",
+        dest="include",
+        action="append",
+        default=[],
+        help="coder names to include, same format as exclude",
+    )
+    parser.add_option(
+        "-f",
+        "--file",
+        dest="file",
+        help="file to read labelings from, each line with three columns: 'labeler item labels'",
+    )
+    parser.add_option(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        default="0",
+        help="how much debugging to print on stderr (0-4)",
+    )
+    parser.add_option(
+        "-c",
+        "--columnsep",
+        dest="columnsep",
+        default="\t",
+        help="char/string that separates the three columns in the file, defaults to tab",
+    )
+    parser.add_option(
+        "-l",
+        "--labelsep",
+        dest="labelsep",
+        default=",",
+        help="char/string that separates labels (if labelers can assign more than one), defaults to comma",
+    )
+    parser.add_option(
+        "-p",
+        "--presence",
+        dest="presence",
+        default=None,
+        help="convert each labeling into 1 or 0, based on presence of LABEL",
+    )
+    parser.add_option(
+        "-T",
+        "--thorough",
+        dest="thorough",
+        default=False,
+        action="store_true",
+        help="calculate agreement for every subset of the annotators",
+    )
      (options, remainder) = parser.parse_args()
  
      if not options.file:
@@ -399,17 +457,25 @@ if __name__ == '__main__':
  
      # read in data from the specified file
      data = []
-    with open(options.file, 'r') as infile:
+    with open(options.file, "r") as infile:
          for l in infile:
              toks = l.split(options.columnsep)
-            coder, object_, labels = toks[0], str(toks[1:-1]), frozenset(toks[-1].strip().split(options.labelsep))
-            if ((options.include == options.exclude) or
-                (len(options.include) > 0 and coder in options.include) or
-                (len(options.exclude) > 0 and coder not in options.exclude)):
+            coder, object_, labels = (
+                toks[0],
+                str(toks[1:-1]),
+                frozenset(toks[-1].strip().split(options.labelsep)),
+            )
+            if (
+                (options.include == options.exclude)
+                or (len(options.include) > 0 and coder in options.include)
+                or (len(options.exclude) > 0 and coder not in options.exclude)
+            ):
                  data.append((coder, object_, labels))
  
      if options.presence:
-        task = AnnotationTask(data, getattr(distance, options.distance)(options.presence))
+        task = AnnotationTask(
+            data, getattr(distance, options.distance)(options.presence)
+        )
      else:
          task = AnnotationTask(data, getattr(distance, options.distance))
  
diff --git a/nlp_resource_data/nltk/metrics/agreement.pyc b/nlp_resource_data/nltk/metrics/agreement.pyc

deleted file mode 100755 (executable)

index c6c35ae..0000000

Binary files a/nlp_resource_data/nltk/metrics/agreement.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/aline.py b/nlp_resource_data/nltk/metrics/aline.py

old mode 100755 (executable)

new mode 100644 (file)

index aa1da0d..4b88bb4
--- a/nlp_resource_data/nltk/metrics/aline.py
+++ b/nlp_resource_data/nltk/metrics/aline.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: ALINE
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Greg Kondrak <gkondrak@ualberta.ca>
  #         Geoff Bacon <bacon@berkeley.edu> (Python port)
  # URL: <http://nltk.org/>
@@ -38,8 +38,6 @@ Example usage
  University of Toronto.
  """
  
-from __future__ import unicode_literals
-
  try:
      import numpy as np
  except ImportError:
@@ -47,328 +45,1037 @@ except ImportError:
  
  # === Constants ===
  
-inf = float('inf')
+inf = float("inf")
  
  # Default values for maximum similarity scores (Kondrak 2002: 54)
-C_skip = 10 # Indels
-C_sub  = 35  # Substitutions
-C_exp  = 45  # Expansions/compressions
-C_vwl  = 5  # Vowel/consonant relative weight (decreased from 10)
-
-consonants = ['B', 'N', 'R', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm',
-              'n', 'p', 'q', 'r', 's', 't', 'v', 'x', 'z', 'ç', 'ð', 'ħ',
-              'ŋ', 'ɖ', 'ɟ', 'ɢ', 'ɣ', 'ɦ', 'ɬ', 'ɮ', 'ɰ', 'ɱ', 'ɲ', 'ɳ', 'ɴ',
-              'ɸ', 'ɹ', 'ɻ', 'ɽ', 'ɾ', 'ʀ', 'ʁ', 'ʂ', 'ʃ', 'ʈ', 'ʋ', 'ʐ ', 'ʒ',
-              'ʔ', 'ʕ', 'ʙ', 'ʝ', 'β', 'θ', 'χ', 'ʐ', 'w']
+C_skip = 10  # Indels
+C_sub = 35  # Substitutions
+C_exp = 45  # Expansions/compressions
+C_vwl = 5  # Vowel/consonant relative weight (decreased from 10)
+
+consonants = [
+    "B",
+    "N",
+    "R",
+    "b",
+    "c",
+    "d",
+    "f",
+    "g",
+    "h",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "v",
+    "x",
+    "z",
+    "ç",
+    "ð",
+    "ħ",
+    "ŋ",
+    "ɖ",
+    "ɟ",
+    "ɢ",
+    "ɣ",
+    "ɦ",
+    "ɬ",
+    "ɮ",
+    "ɰ",
+    "ɱ",
+    "ɲ",
+    "ɳ",
+    "ɴ",
+    "ɸ",
+    "ɹ",
+    "ɻ",
+    "ɽ",
+    "ɾ",
+    "ʀ",
+    "ʁ",
+    "ʂ",
+    "ʃ",
+    "ʈ",
+    "ʋ",
+    "ʐ ",
+    "ʒ",
+    "ʔ",
+    "ʕ",
+    "ʙ",
+    "ʝ",
+    "β",
+    "θ",
+    "χ",
+    "ʐ",
+    "w",
+]
  
  # Relevant features for comparing consonants and vowels
-R_c = ['aspirated', 'lateral', 'manner', 'nasal', 'place', 'retroflex',
-       'syllabic', 'voice']
+R_c = [
+    "aspirated",
+    "lateral",
+    "manner",
+    "nasal",
+    "place",
+    "retroflex",
+    "syllabic",
+    "voice",
+]
  # 'high' taken out of R_v because same as manner
-R_v = ['back', 'lateral', 'long', 'manner', 'nasal', 'place',
-       'retroflex', 'round', 'syllabic', 'voice']
+R_v = [
+    "back",
+    "lateral",
+    "long",
+    "manner",
+    "nasal",
+    "place",
+    "retroflex",
+    "round",
+    "syllabic",
+    "voice",
+]
  
  # Flattened feature matrix (Kondrak 2002: 56)
  similarity_matrix = {
-   #place
-   'bilabial': 1.0, 'labiodental': 0.95, 'dental': 0.9,
-   'alveolar': 0.85, 'retroflex': 0.8, 'palato-alveolar': 0.75,
-   'palatal': 0.7, 'velar': 0.6, 'uvular': 0.5, 'pharyngeal': 0.3,
-   'glottal': 0.1, 'labiovelar': 1.0, 'vowel': -1.0, # added 'vowel'
-   #manner
-   'stop': 1.0, 'affricate': 0.9, 'fricative': 0.85, # increased fricative from 0.8
-   'trill': 0.7, 'tap': 0.65, 'approximant': 0.6, 'high vowel': 0.4,
-   'mid vowel': 0.2, 'low vowel': 0.0, 'vowel2': 0.5, # added vowel
-   #high
-   'high': 1.0, 'mid': 0.5, 'low': 0.0,
-   #back
-   'front': 1.0, 'central': 0.5, 'back': 0.0,
-   #binary features
-   'plus': 1.0, 'minus': 0.0
+    # place
+    "bilabial": 1.0,
+    "labiodental": 0.95,
+    "dental": 0.9,
+    "alveolar": 0.85,
+    "retroflex": 0.8,
+    "palato-alveolar": 0.75,
+    "palatal": 0.7,
+    "velar": 0.6,
+    "uvular": 0.5,
+    "pharyngeal": 0.3,
+    "glottal": 0.1,
+    "labiovelar": 1.0,
+    "vowel": -1.0,  # added 'vowel'
+    # manner
+    "stop": 1.0,
+    "affricate": 0.9,
+    "fricative": 0.85,  # increased fricative from 0.8
+    "trill": 0.7,
+    "tap": 0.65,
+    "approximant": 0.6,
+    "high vowel": 0.4,
+    "mid vowel": 0.2,
+    "low vowel": 0.0,
+    "vowel2": 0.5,  # added vowel
+    # high
+    "high": 1.0,
+    "mid": 0.5,
+    "low": 0.0,
+    # back
+    "front": 1.0,
+    "central": 0.5,
+    "back": 0.0,
+    # binary features
+    "plus": 1.0,
+    "minus": 0.0,
  }
  
  # Relative weights of phonetic features (Kondrak 2002: 55)
  salience = {
-   'syllabic': 5,
-   'place': 40,
-   'manner': 50,
-   'voice': 5, # decreased from 10
-   'nasal': 20, # increased from 10
-   'retroflex': 10,
-   'lateral': 10,
-   'aspirated': 5,
-   'long': 0, # decreased from 1
-   'high': 3, # decreased from 5
-   'back': 2, # decreased from 5
-   'round': 2 # decreased from 5
+    "syllabic": 5,
+    "place": 40,
+    "manner": 50,
+    "voice": 5,  # decreased from 10
+    "nasal": 20,  # increased from 10
+    "retroflex": 10,
+    "lateral": 10,
+    "aspirated": 5,
+    "long": 0,  # decreased from 1
+    "high": 3,  # decreased from 5
+    "back": 2,  # decreased from 5
+    "round": 2,  # decreased from 5
  }
  
  # (Kondrak 2002: 59-60)
  feature_matrix = {
-# Consonants
-'p': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'b': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'t': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'d': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʈ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɖ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'c': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɟ': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'k': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'g': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'q': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɢ': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʔ': {'place': 'glottal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'m': {'place': 'bilabial', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɱ': {'place': 'labiodental', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'n': {'place': 'alveolar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɳ': {'place': 'retroflex', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɲ': {'place': 'palatal', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ŋ': {'place': 'velar', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɴ': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'N': {'place': 'uvular', 'manner': 'stop', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'plus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʙ': {'place': 'bilabial', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'B': {'place': 'bilabial', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'r': {'place': 'alveolar', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʀ': {'place': 'uvular', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'R': {'place': 'uvular', 'manner': 'trill', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɾ': {'place': 'alveolar', 'manner': 'tap', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɽ': {'place': 'retroflex', 'manner': 'tap', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɸ': {'place': 'bilabial', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'β': {'place': 'bilabial', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'f': {'place': 'labiodental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'v': {'place': 'labiodental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'θ': {'place': 'dental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ð': {'place': 'dental', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'s': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'z': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʃ': {'place': 'palato-alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʒ': {'place': 'palato-alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʂ': {'place': 'retroflex', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʐ': {'place': 'retroflex', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ç': {'place': 'palatal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʝ': {'place': 'palatal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'x': {'place': 'velar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɣ': {'place': 'velar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'χ': {'place': 'uvular', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʁ': {'place': 'uvular', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ħ': {'place': 'pharyngeal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ʕ': {'place': 'pharyngeal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'h': {'place': 'glottal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɦ': {'place': 'glottal', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɬ': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'minus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'},
-
-'ɮ': {'place': 'alveolar', 'manner': 'fricative', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'},
-
-'ʋ': {'place': 'labiodental', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɹ': {'place': 'alveolar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɻ': {'place': 'retroflex', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'plus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'j': {'place': 'palatal', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'ɰ': {'place': 'velar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-'l': {'place': 'alveolar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'plus', 'aspirated': 'minus'},
-
-'w': {'place': 'labiovelar', 'manner': 'approximant', 'syllabic': 'minus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'aspirated': 'minus'},
-
-# Vowels
-
-'i': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'front','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'y': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'front','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'e': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'front','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'E': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'front','round': 'minus', 'long': 'plus', 'aspirated': 'minus'},
-
-'ø': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'front','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'ɛ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'front','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'œ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'front','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'æ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low',
-'back': 'front','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'a': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low',
-'back': 'front','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'A': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low',
-'back': 'front','round': 'minus', 'long': 'plus', 'aspirated': 'minus'},
-
-'ɨ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'central','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'ʉ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'central','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'ə': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'central','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'u': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'back','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'U': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'back','round': 'plus', 'long': 'plus', 'aspirated': 'minus'},
-
-'o': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'back','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'O': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'back','round': 'plus', 'long': 'plus', 'aspirated': 'minus'},
-
-'ɔ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'mid',
-'back': 'back','round': 'plus', 'long': 'minus', 'aspirated': 'minus'},
-
-'ɒ': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'low',
-'back': 'back','round': 'minus', 'long': 'minus', 'aspirated': 'minus'},
-
-'I': {'place': 'vowel', 'manner': 'vowel2', 'syllabic': 'plus', 'voice': 'plus',
-'nasal': 'minus', 'retroflex': 'minus', 'lateral': 'minus', 'high': 'high',
-'back': 'front','round': 'minus', 'long': 'plus', 'aspirated': 'minus'},
-
+    # Consonants
+    "p": {
+        "place": "bilabial",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "b": {
+        "place": "bilabial",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "t": {
+        "place": "alveolar",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "d": {
+        "place": "alveolar",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʈ": {
+        "place": "retroflex",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɖ": {
+        "place": "retroflex",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "c": {
+        "place": "palatal",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɟ": {
+        "place": "palatal",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "k": {
+        "place": "velar",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "g": {
+        "place": "velar",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "q": {
+        "place": "uvular",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɢ": {
+        "place": "uvular",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʔ": {
+        "place": "glottal",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "m": {
+        "place": "bilabial",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɱ": {
+        "place": "labiodental",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "n": {
+        "place": "alveolar",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɳ": {
+        "place": "retroflex",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɲ": {
+        "place": "palatal",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ŋ": {
+        "place": "velar",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɴ": {
+        "place": "uvular",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "N": {
+        "place": "uvular",
+        "manner": "stop",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "plus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʙ": {
+        "place": "bilabial",
+        "manner": "trill",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "B": {
+        "place": "bilabial",
+        "manner": "trill",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "r": {
+        "place": "alveolar",
+        "manner": "trill",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʀ": {
+        "place": "uvular",
+        "manner": "trill",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "R": {
+        "place": "uvular",
+        "manner": "trill",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɾ": {
+        "place": "alveolar",
+        "manner": "tap",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɽ": {
+        "place": "retroflex",
+        "manner": "tap",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɸ": {
+        "place": "bilabial",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "β": {
+        "place": "bilabial",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "f": {
+        "place": "labiodental",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "v": {
+        "place": "labiodental",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "θ": {
+        "place": "dental",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ð": {
+        "place": "dental",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "s": {
+        "place": "alveolar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "z": {
+        "place": "alveolar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʃ": {
+        "place": "palato-alveolar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʒ": {
+        "place": "palato-alveolar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʂ": {
+        "place": "retroflex",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʐ": {
+        "place": "retroflex",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ç": {
+        "place": "palatal",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʝ": {
+        "place": "palatal",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "x": {
+        "place": "velar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɣ": {
+        "place": "velar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "χ": {
+        "place": "uvular",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʁ": {
+        "place": "uvular",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ħ": {
+        "place": "pharyngeal",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ʕ": {
+        "place": "pharyngeal",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "h": {
+        "place": "glottal",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɦ": {
+        "place": "glottal",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɬ": {
+        "place": "alveolar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "minus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "plus",
+        "aspirated": "minus",
+    },
+    "ɮ": {
+        "place": "alveolar",
+        "manner": "fricative",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "plus",
+        "aspirated": "minus",
+    },
+    "ʋ": {
+        "place": "labiodental",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɹ": {
+        "place": "alveolar",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɻ": {
+        "place": "retroflex",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "plus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "j": {
+        "place": "palatal",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "ɰ": {
+        "place": "velar",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    "l": {
+        "place": "alveolar",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "plus",
+        "aspirated": "minus",
+    },
+    "w": {
+        "place": "labiovelar",
+        "manner": "approximant",
+        "syllabic": "minus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "aspirated": "minus",
+    },
+    # Vowels
+    "i": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "front",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "y": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "front",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "e": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "front",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "E": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "front",
+        "round": "minus",
+        "long": "plus",
+        "aspirated": "minus",
+    },
+    "ø": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "front",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "ɛ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "front",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "œ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "front",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "æ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "low",
+        "back": "front",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "a": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "low",
+        "back": "front",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "A": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "low",
+        "back": "front",
+        "round": "minus",
+        "long": "plus",
+        "aspirated": "minus",
+    },
+    "ɨ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "central",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "ʉ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "central",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "ə": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "central",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "u": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "back",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "U": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "back",
+        "round": "plus",
+        "long": "plus",
+        "aspirated": "minus",
+    },
+    "o": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "back",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "O": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "back",
+        "round": "plus",
+        "long": "plus",
+        "aspirated": "minus",
+    },
+    "ɔ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "mid",
+        "back": "back",
+        "round": "plus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "ɒ": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "low",
+        "back": "back",
+        "round": "minus",
+        "long": "minus",
+        "aspirated": "minus",
+    },
+    "I": {
+        "place": "vowel",
+        "manner": "vowel2",
+        "syllabic": "plus",
+        "voice": "plus",
+        "nasal": "minus",
+        "retroflex": "minus",
+        "lateral": "minus",
+        "high": "high",
+        "back": "front",
+        "round": "minus",
+        "long": "plus",
+        "aspirated": "minus",
+    },
  }
  
  # === Algorithm ===
  
+
  def align(str1, str2, epsilon=0):
      """
      Compute the alignment of two phonetic strings.
@@ -383,42 +1090,43 @@ def align(str1, str2, epsilon=0):
  
      (Kondrak 2002: 51)
      """
-    if np == None:
-      raise ImportError('You need numpy in order to use the align function')
+    if np is None:
+        raise ImportError("You need numpy in order to use the align function")
  
      assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0."
      m = len(str1)
      n = len(str2)
      # This includes Kondrak's initialization of row 0 and column 0 to all 0s.
-    S = np.zeros((m+1, n+1), dtype=float)
+    S = np.zeros((m + 1, n + 1), dtype=float)
  
      # If i <= 1 or j <= 1, don't allow expansions as it doesn't make sense,
      # and breaks array and string indices. Make sure they never get chosen
      # by setting them to -inf.
-    for i in range(1, m+1):
-        for j in range(1, n+1):
-            edit1 = S[i-1, j] + sigma_skip(str1[i-1])
-            edit2 = S[i, j-1] + sigma_skip(str2[j-1])
-            edit3 = S[i-1, j-1] + sigma_sub(str1[i-1], str2[j-1])
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            edit1 = S[i - 1, j] + sigma_skip(str1[i - 1])
+            edit2 = S[i, j - 1] + sigma_skip(str2[j - 1])
+            edit3 = S[i - 1, j - 1] + sigma_sub(str1[i - 1], str2[j - 1])
              if i > 1:
-                edit4 = S[i-2, j-1] + sigma_exp(str2[j-1], str1[i-2:i])
+                edit4 = S[i - 2, j - 1] + sigma_exp(str2[j - 1], str1[i - 2 : i])
              else:
                  edit4 = -inf
              if j > 1:
-                edit5 = S[i-1, j-2] + sigma_exp(str1[i-1], str2[j-2:j])
+                edit5 = S[i - 1, j - 2] + sigma_exp(str1[i - 1], str2[j - 2 : j])
              else:
                  edit5 = -inf
              S[i, j] = max(edit1, edit2, edit3, edit4, edit5, 0)
  
-    T = (1-epsilon)*np.amax(S) # Threshold score for near-optimal alignments
+    T = (1 - epsilon) * np.amax(S)  # Threshold score for near-optimal alignments
  
      alignments = []
-    for i in range(1, m+1):
-        for j in range(1, n+1):
-            if S[i,j] >= T:
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            if S[i, j] >= T:
                  alignments.append(_retrieve(i, j, 0, S, T, str1, str2, []))
      return alignments
  
+
  def _retrieve(i, j, s, S, T, str1, str2, out):
      """
      Retrieve the path through the similarity matrix S starting at (i, j).
@@ -429,23 +1137,53 @@ def _retrieve(i, j, s, S, T, str1, str2, out):
      if S[i, j] == 0:
          return out
      else:
-        if j > 1 and S[i-1, j-2] + sigma_exp(str1[i-1], str2[j-2:j]) + s >= T:
-            out.insert(0, (str1[i-1], str2[j-2:j]))
-            _retrieve(i-1, j-2, s+sigma_exp(str1[i-1], str2[j-2:j]), S, T, str1, str2, out)
-        elif i > 1 and S[i-2, j-1] + sigma_exp(str2[j-1], str1[i-2:i]) + s >= T:
-            out.insert(0, (str1[i-2:i], str2[j-1]))
-            _retrieve(i-2, j-1, s+sigma_exp(str2[j-1], str1[i-2:i]), S, T, str1, str2, out)
-        elif S[i, j-1] + sigma_skip(str2[j-1]) + s >= T:
-            out.insert(0, ('-', str2[j-1]))
-            _retrieve(i, j-1, s+sigma_skip(str2[j-1]), S, T, str1, str2, out)
-        elif S[i-1, j] + sigma_skip(str1[i-1]) + s >= T:
-            out.insert(0, (str1[i-1], '-'))
-            _retrieve(i-1, j, s+sigma_skip(str1[i-1]), S, T, str1, str2, out)
-        elif S[i-1, j-1] + sigma_sub(str1[i-1], str2[j-1]) + s >= T:
-            out.insert(0, (str1[i-1], str2[j-1]))
-            _retrieve(i-1, j-1, s+sigma_sub(str1[i-1], str2[j-1]), S, T, str1, str2, out)
+        if j > 1 and S[i - 1, j - 2] + sigma_exp(str1[i - 1], str2[j - 2 : j]) + s >= T:
+            out.insert(0, (str1[i - 1], str2[j - 2 : j]))
+            _retrieve(
+                i - 1,
+                j - 2,
+                s + sigma_exp(str1[i - 1], str2[j - 2 : j]),
+                S,
+                T,
+                str1,
+                str2,
+                out,
+            )
+        elif (
+            i > 1 and S[i - 2, j - 1] + sigma_exp(str2[j - 1], str1[i - 2 : i]) + s >= T
+        ):
+            out.insert(0, (str1[i - 2 : i], str2[j - 1]))
+            _retrieve(
+                i - 2,
+                j - 1,
+                s + sigma_exp(str2[j - 1], str1[i - 2 : i]),
+                S,
+                T,
+                str1,
+                str2,
+                out,
+            )
+        elif S[i, j - 1] + sigma_skip(str2[j - 1]) + s >= T:
+            out.insert(0, ("-", str2[j - 1]))
+            _retrieve(i, j - 1, s + sigma_skip(str2[j - 1]), S, T, str1, str2, out)
+        elif S[i - 1, j] + sigma_skip(str1[i - 1]) + s >= T:
+            out.insert(0, (str1[i - 1], "-"))
+            _retrieve(i - 1, j, s + sigma_skip(str1[i - 1]), S, T, str1, str2, out)
+        elif S[i - 1, j - 1] + sigma_sub(str1[i - 1], str2[j - 1]) + s >= T:
+            out.insert(0, (str1[i - 1], str2[j - 1]))
+            _retrieve(
+                i - 1,
+                j - 1,
+                s + sigma_sub(str1[i - 1], str2[j - 1]),
+                S,
+                T,
+                str1,
+                str2,
+                out,
+            )
      return out
  
+
  def sigma_skip(p):
      """
      Returns score of an indel of P.
@@ -454,6 +1192,7 @@ def sigma_skip(p):
      """
      return C_skip
  
+
  def sigma_sub(p, q):
      """
      Returns score of a substitution of P with Q.
@@ -462,6 +1201,7 @@ def sigma_sub(p, q):
      """
      return C_sub - delta(p, q) - V(p) - V(q)
  
+
  def sigma_exp(p, q):
      """
      Returns score of an expansion/compression.
@@ -472,6 +1212,7 @@ def sigma_exp(p, q):
      q2 = q[1]
      return C_exp - delta(p, q1) - delta(p, q2) - V(p) - max(V(q1), V(q2))
  
+
  def delta(p, q):
      """
      Return weighted sum of difference between P and Q.
@@ -484,6 +1225,7 @@ def delta(p, q):
          total += diff(p, q, f) * salience[f]
      return total
  
+
  def diff(p, q, f):
      """
      Returns difference between phonetic segments P and Q for feature F.
@@ -493,6 +1235,7 @@ def diff(p, q, f):
      p_features, q_features = feature_matrix[p], feature_matrix[q]
      return abs(similarity_matrix[p_features[f]] - similarity_matrix[q_features[f]])
  
+
  def R(p, q):
      """
      Return relevant features for segment comparsion.
@@ -503,6 +1246,7 @@ def R(p, q):
          return R_c
      return R_v
  
+
  def V(p):
      """
      Return vowel weight if P is vowel.
@@ -513,19 +1257,22 @@ def V(p):
          return 0
      return C_vwl
  
+
  # === Test ===
  
+
  def demo():
      """
      A demonstration of the result of aligning phonetic sequences
      used in Kondrak's (2002) dissertation.
      """
-    data = [pair.split(',') for pair in cognate_data.split('\n')]
+    data = [pair.split(",") for pair in cognate_data.split("\n")]
      for pair in data:
          alignment = align(pair[0], pair[1])[0]
-        alignment = ['({}, {})'.format(a[0], a[1]) for a in alignment]
-        alignment = ' '.join(alignment)
-        print('{} ~ {} : {}'.format(pair[0], pair[1], alignment))
+        alignment = ["({}, {})".format(a[0], a[1]) for a in alignment]
+        alignment = " ".join(alignment)
+        print("{} ~ {} : {}".format(pair[0], pair[1], alignment))
+
  
  cognate_data = """jo,ʒə
  tu,ty
@@ -603,5 +1350,5 @@ ahkohkwa,ahkɛh
  pematesiweni,pematesewen
  asenja,aʔsɛn"""
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/metrics/aline.pyc b/nlp_resource_data/nltk/metrics/aline.pyc

deleted file mode 100755 (executable)

index 174df77..0000000

Binary files a/nlp_resource_data/nltk/metrics/aline.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/association.py b/nlp_resource_data/nltk/metrics/association.py

old mode 100755 (executable)

new mode 100644 (file)

index 3e012f6..c2638c8
--- a/nlp_resource_data/nltk/metrics/association.py
+++ b/nlp_resource_data/nltk/metrics/association.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Ngram Association Measures
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Joel Nothman <jnothman@student.usyd.edu.au>
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
@@ -11,11 +11,11 @@ generic, abstract implementation in ``NgramAssocMeasures``, and n-specific
  ``BigramAssocMeasures`` and ``TrigramAssocMeasures``.
  """
  
-from __future__ import division
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  import math as _math
+from abc import ABCMeta, abstractmethod
  from functools import reduce
+
+
  _log2 = lambda x: _math.log(x, 2.0)
  _ln = _math.log
  
@@ -26,9 +26,11 @@ _SMALL = 1e-20
  try:
      from scipy.stats import fisher_exact
  except ImportError:
+
      def fisher_exact(*_args, **_kwargs):
          raise NotImplementedError
  
+
  ### Indices to marginals arguments:
  
  NGRAM = 0
@@ -41,8 +43,7 @@ TOTAL = -1
  """Marginals index for the number of words in the data"""
  
  
-@add_metaclass(ABCMeta)
-class NgramAssocMeasures(object):
+class NgramAssocMeasures(metaclass=ABCMeta):
      """
      An abstract class defining a collection of generic association measures.
      Each public method returns a score, taking the following arguments::
@@ -67,15 +68,17 @@ class NgramAssocMeasures(object):
      @abstractmethod
      def _contingency(*marginals):
          """Calculates values of a contingency table from marginal values."""
-        raise NotImplementedError("The contingency table is not available"
-                                  "in the general ngram case")
+        raise NotImplementedError(
+            "The contingency table is not available" "in the general ngram case"
+        )
  
      @staticmethod
      @abstractmethod
      def _marginals(*contingency):
          """Calculates values of contingency table marginals from its values."""
-        raise NotImplementedError("The contingency table is not available"
-                                  "in the general ngram case")
+        raise NotImplementedError(
+            "The contingency table is not available" "in the general ngram case"
+        )
  
      @classmethod
      def _expected_values(cls, cont):
@@ -86,10 +89,13 @@ class NgramAssocMeasures(object):
          # For each contingency table cell
          for i in range(len(cont)):
              # Yield the expected value
-            yield (_product(sum(cont[x] for x in range(2 ** cls._n)
-                                if (x & j) == (i & j))
-                            for j in bits) /
-                   (n_all ** (cls._n - 1)))
+            yield (
+                _product(
+                    sum(cont[x] for x in range(2 ** cls._n) if (x & j) == (i & j))
+                    for j in bits
+                )
+                / (n_all ** (cls._n - 1))
+            )
  
      @staticmethod
      def raw_freq(*marginals):
@@ -101,10 +107,10 @@ class NgramAssocMeasures(object):
          """Scores ngrams using Student's t test with independence hypothesis
          for unigrams, as in Manning and Schutze 5.3.1.
          """
-        return ((marginals[NGRAM] -
-                  _product(marginals[UNIGRAMS]) /
-                  (marginals[TOTAL] ** (cls._n - 1))) /
-                (marginals[NGRAM] + _SMALL) ** .5)
+        return (
+            marginals[NGRAM]
+            - _product(marginals[UNIGRAMS]) / (marginals[TOTAL] ** (cls._n - 1))
+        ) / (marginals[NGRAM] + _SMALL) ** 0.5
  
      @classmethod
      def chi_sq(cls, *marginals):
@@ -113,8 +119,7 @@ class NgramAssocMeasures(object):
          """
          cont = cls._contingency(*marginals)
          exps = cls._expected_values(cont)
-        return sum((obs - exp) ** 2 / (exp + _SMALL)
-                   for obs, exp in zip(cont, exps))
+        return sum((obs - exp) ** 2 / (exp + _SMALL) for obs, exp in zip(cont, exps))
  
      @staticmethod
      def mi_like(*marginals, **kwargs):
@@ -122,31 +127,33 @@ class NgramAssocMeasures(object):
          argument power sets an exponent (default 3) for the numerator. No
          logarithm of the result is calculated.
          """
-        return (marginals[NGRAM] ** kwargs.get('power', 3) /
-                _product(marginals[UNIGRAMS]))
+        return marginals[NGRAM] ** kwargs.get("power", 3) / _product(
+            marginals[UNIGRAMS]
+        )
  
      @classmethod
      def pmi(cls, *marginals):
          """Scores ngrams by pointwise mutual information, as in Manning and
          Schutze 5.4.
          """
-        return (_log2(marginals[NGRAM] * marginals[TOTAL] ** (cls._n - 1)) -
-                _log2(_product(marginals[UNIGRAMS])))
+        return _log2(marginals[NGRAM] * marginals[TOTAL] ** (cls._n - 1)) - _log2(
+            _product(marginals[UNIGRAMS])
+        )
  
      @classmethod
      def likelihood_ratio(cls, *marginals):
          """Scores ngrams using likelihood ratios as in Manning and Schutze 5.3.4.
          """
          cont = cls._contingency(*marginals)
-        return (cls._n *
-                sum(obs * _ln(obs / (exp + _SMALL) + _SMALL)
-                    for obs, exp in zip(cont, cls._expected_values(cont))))
+        return cls._n * sum(
+            obs * _ln(obs / (exp + _SMALL) + _SMALL)
+            for obs, exp in zip(cont, cls._expected_values(cont))
+        )
  
      @classmethod
      def poisson_stirling(cls, *marginals):
          """Scores ngrams using the Poisson-Stirling measure."""
-        exp = (_product(marginals[UNIGRAMS]) /
-               (marginals[TOTAL] ** (cls._n - 1)))
+        exp = _product(marginals[UNIGRAMS]) / (marginals[TOTAL] ** (cls._n - 1))
          return marginals[NGRAM] * (_log2(marginals[NGRAM] / exp) - 1)
  
      @classmethod
@@ -214,8 +221,9 @@ class BigramAssocMeasures(NgramAssocMeasures):
          """
          n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)
  
-        return ((n_ii*n_oo - n_io*n_oi)**2 /
-                ((n_ii + n_io) * (n_ii + n_oi) * (n_io + n_oo) * (n_oi + n_oo)))
+        return (n_ii * n_oo - n_io * n_oi) ** 2 / (
+            (n_ii + n_io) * (n_ii + n_oi) * (n_io + n_oo) * (n_oi + n_oo)
+        )
  
      @classmethod
      def chi_sq(cls, n_ii, n_ix_xi_tuple, n_xx):
@@ -234,7 +242,7 @@ class BigramAssocMeasures(NgramAssocMeasures):
  
          n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals)
  
-        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less')
+        (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative="less")
          return pvalue
  
      @staticmethod
@@ -282,8 +290,7 @@ class TrigramAssocMeasures(NgramAssocMeasures):
          n_ioo = n_ixx - n_iii - n_ioi - n_iio
          n_ooo = n_xxx - n_iii - n_oii - n_ioi - n_iio - n_ooi - n_oio - n_ioo
  
-        return (n_iii, n_oii, n_ioi, n_ooi,
-                n_iio, n_oio, n_ioo, n_ooo)
+        return (n_iii, n_oii, n_ioi, n_ooi, n_iio, n_oio, n_ioo, n_ooo)
  
      @staticmethod
      def _marginals(*contingency):
@@ -292,12 +299,16 @@ class TrigramAssocMeasures(NgramAssocMeasures):
          (1, (1, 1, 1), (1, 73, 1), 2000)
          """
          n_iii, n_oii, n_ioi, n_ooi, n_iio, n_oio, n_ioo, n_ooo = contingency
-        return (n_iii,
-                (n_iii + n_iio, n_iii + n_ioi, n_iii + n_oii),
-                (n_iii + n_ioi + n_iio + n_ioo,
-                 n_iii + n_oii + n_iio + n_oio,
-                 n_iii + n_oii + n_ioi + n_ooi),
-                sum(contingency))
+        return (
+            n_iii,
+            (n_iii + n_iio, n_iii + n_ioi, n_iii + n_oii),
+            (
+                n_iii + n_ioi + n_iio + n_ioo,
+                n_iii + n_oii + n_iio + n_oio,
+                n_iii + n_oii + n_ioi + n_ooi,
+            ),
+            sum(contingency),
+        )
  
  
  class QuadgramAssocMeasures(NgramAssocMeasures):
@@ -344,12 +355,43 @@ class QuadgramAssocMeasures(NgramAssocMeasures):
          n_iioo = n_iixx - n_iiii - n_iioi - n_iiio
          n_oioo = n_xixx - n_iiii - n_oiii - n_iioi - n_iiio - n_oioi - n_oiio - n_iioo
          n_iooo = n_ixxx - n_iiii - n_ioii - n_iioi - n_iiio - n_iooi - n_iioo - n_ioio
-        n_oooo = n_xxxx - n_iiii - n_oiii - n_ioii - n_iioi - n_ooii - n_oioi - n_iooi - \
-                 n_oooi - n_iiio - n_oiio - n_ioio - n_ooio - n_iioo - n_oioo - n_iooo
-
-        return (n_iiii, n_oiii, n_ioii, n_ooii, n_iioi,
-                n_oioi, n_iooi, n_oooi, n_iiio, n_oiio,
-                n_ioio, n_ooio, n_iioo, n_oioo, n_iooo, n_oooo)
+        n_oooo = (
+            n_xxxx
+            - n_iiii
+            - n_oiii
+            - n_ioii
+            - n_iioi
+            - n_ooii
+            - n_oioi
+            - n_iooi
+            - n_oooi
+            - n_iiio
+            - n_oiio
+            - n_ioio
+            - n_ooio
+            - n_iioo
+            - n_oioo
+            - n_iooo
+        )
+
+        return (
+            n_iiii,
+            n_oiii,
+            n_ioii,
+            n_ooii,
+            n_iioi,
+            n_oioi,
+            n_iooi,
+            n_oooi,
+            n_iiio,
+            n_oiio,
+            n_ioio,
+            n_ooio,
+            n_iioo,
+            n_oioo,
+            n_iooo,
+            n_oooo,
+        )
  
      @staticmethod
      def _marginals(*contingency):
@@ -357,8 +399,9 @@ class QuadgramAssocMeasures(NgramAssocMeasures):
          QuadgramAssocMeasures._marginals(1, 0, 2, 46, 552, 825, 2577, 34967, 1, 0, 2, 48, 7250, 9031, 28585, 356653)
          (1, (2, 553, 3, 1), (7804, 6, 3132, 1378, 49, 2), (38970, 17660, 100, 38970), 440540)
          """
-        n_iiii, n_oiii, n_ioii, n_ooii, n_iioi, n_oioi, n_iooi, n_oooi, n_iiio, n_oiio, n_ioio, n_ooio, \
-        n_iioo, n_oioo, n_iooo, n_oooo = contingency
+        n_iiii, n_oiii, n_ioii, n_ooii, n_iioi, n_oioi, n_iooi, n_oooi, n_iiio, n_oiio, n_ioio, n_ooio, n_iioo, n_oioo, n_iooo, n_oooo = (
+            contingency
+        )
  
          n_iiix = n_iiii + n_iiio
          n_iixi = n_iiii + n_iioi
@@ -379,11 +422,13 @@ class QuadgramAssocMeasures(NgramAssocMeasures):
  
          n_all = sum(contingency)
  
-        return (n_iiii,
-                (n_iiix, n_iixi, n_ixii, n_xiii),
-                (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix),
-                (n_ixxx, n_xixx, n_xxix, n_xxxi),
-                n_all)
+        return (
+            n_iiii,
+            (n_iiix, n_iixi, n_ixii, n_xiii),
+            (n_iixx, n_ixix, n_ixxi, n_xixi, n_xxii, n_xiix),
+            (n_ixxx, n_xixx, n_xxix, n_xxxi),
+            n_all,
+        )
  
  
  class ContingencyMeasures(object):
@@ -393,12 +438,12 @@ class ContingencyMeasures(object):
  
      def __init__(self, measures):
          """Constructs a ContingencyMeasures given a NgramAssocMeasures class"""
-        self.__class__.__name__ = 'Contingency' + measures.__class__.__name__
+        self.__class__.__name__ = "Contingency" + measures.__class__.__name__
          for k in dir(measures):
-            if k.startswith('__'):
+            if k.startswith("__"):
                  continue
              v = getattr(measures, k)
-            if not k.startswith('_'):
+            if not k.startswith("_"):
                  v = self._make_contingency_fn(measures, v)
              setattr(self, k, v)
  
@@ -407,8 +452,10 @@ class ContingencyMeasures(object):
          """From an association measure function, produces a new function which
          accepts contingency table values as its arguments.
          """
+
          def res(*contingency):
              return old_fn(*measures._marginals(*contingency))
+
          res.__doc__ = old_fn.__doc__
          res.__name__ = old_fn.__name__
          return res
diff --git a/nlp_resource_data/nltk/metrics/association.pyc b/nlp_resource_data/nltk/metrics/association.pyc

deleted file mode 100755 (executable)

index aa1f11b..0000000

Binary files a/nlp_resource_data/nltk/metrics/association.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/confusionmatrix.py b/nlp_resource_data/nltk/metrics/confusionmatrix.py

old mode 100755 (executable)

new mode 100644 (file)

index 611d82a..7d96d77
--- a/nlp_resource_data/nltk/metrics/confusionmatrix.py
+++ b/nlp_resource_data/nltk/metrics/confusionmatrix.py
@@ -1,15 +1,14 @@
  # Natural Language Toolkit: Confusion Matrices
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
+
  from nltk.probability import FreqDist
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class ConfusionMatrix(object):
      """
      The confusion matrix between a list of reference values and a
@@ -43,24 +42,27 @@ class ConfusionMatrix(object):
              the same length.
          """
          if len(reference) != len(test):
-            raise ValueError('Lists must have the same length.')
+            raise ValueError("Lists must have the same length.")
  
          # Get a list of all values.
          if sort_by_count:
              ref_fdist = FreqDist(reference)
              test_fdist = FreqDist(test)
-            def key(v): return -(ref_fdist[v]+test_fdist[v])
-            values = sorted(set(reference+test), key=key)
+
+            def key(v):
+                return -(ref_fdist[v] + test_fdist[v])
+
+            values = sorted(set(reference + test), key=key)
          else:
-            values = sorted(set(reference+test))
+            values = sorted(set(reference + test))
  
          # Construct a value->index dictionary
-        indices = dict((val,i) for (i,val) in enumerate(values))
+        indices = dict((val, i) for (i, val) in enumerate(values))
  
          # Make a confusion matrix table.
          confusion = [[0 for val in values] for val in values]
-        max_conf = 0 # Maximum confusion
-        for w,g in zip(reference, test):
+        max_conf = 0  # Maximum confusion
+        for w, g in zip(reference, test):
              confusion[indices[w]][indices[g]] += 1
              max_conf = max(max_conf, confusion[indices[w]][indices[g]])
  
@@ -89,14 +91,18 @@ class ConfusionMatrix(object):
          return self._confusion[i][j]
  
      def __repr__(self):
-        return '<ConfusionMatrix: %s/%s correct>' % (self._correct,
-                                                     self._total)
+        return "<ConfusionMatrix: %s/%s correct>" % (self._correct, self._total)
  
      def __str__(self):
          return self.pretty_format()
  
-    def pretty_format(self, show_percents=False, values_in_chart=True,
-           truncate=None, sort_by_count=False):
+    def pretty_format(
+        self,
+        show_percents=False,
+        values_in_chart=True,
+        truncate=None,
+        sort_by_count=False,
+    ):
          """
          :return: A multi-line string representation of this confusion matrix.
          :type truncate: int
@@ -115,8 +121,9 @@ class ConfusionMatrix(object):
  
          values = self._values
          if sort_by_count:
-            values = sorted(values, key=lambda v:
-                            -sum(self._confusion[self._indices[v]]))
+            values = sorted(
+                values, key=lambda v: -sum(self._confusion[self._indices[v]])
+            )
  
          if truncate:
              values = values[:truncate]
@@ -124,34 +131,34 @@ class ConfusionMatrix(object):
          if values_in_chart:
              value_strings = ["%s" % val for val in values]
          else:
-            value_strings = [str(n+1) for n in range(len(values))]
+            value_strings = [str(n + 1) for n in range(len(values))]
  
          # Construct a format string for row values
          valuelen = max(len(val) for val in value_strings)
-        value_format = '%' + repr(valuelen) + 's | '
+        value_format = "%" + repr(valuelen) + "s | "
          # Construct a format string for matrix entries
          if show_percents:
              entrylen = 6
-            entry_format = '%5.1f%%'
-            zerostr = '     .'
+            entry_format = "%5.1f%%"
+            zerostr = "     ."
          else:
              entrylen = len(repr(self._max_conf))
-            entry_format = '%' + repr(entrylen) + 'd'
-            zerostr = ' '*(entrylen-1) + '.'
+            entry_format = "%" + repr(entrylen) + "d"
+            zerostr = " " * (entrylen - 1) + "."
  
          # Write the column values.
-        s = ''
+        s = ""
          for i in range(valuelen):
-            s += (' '*valuelen)+' |'
+            s += (" " * valuelen) + " |"
              for val in value_strings:
-                if i >= valuelen-len(val):
-                    s += val[i-valuelen+len(val)].rjust(entrylen+1)
+                if i >= valuelen - len(val):
+                    s += val[i - valuelen + len(val)].rjust(entrylen + 1)
                  else:
-                    s += ' '*(entrylen+1)
-            s += ' |\n'
+                    s += " " * (entrylen + 1)
+            s += " |\n"
  
          # Write a dividing line
-        s += '%s-+-%s+\n' % ('-'*valuelen, '-'*((entrylen+1)*len(values)))
+        s += "%s-+-%s+\n" % ("-" * valuelen, "-" * ((entrylen + 1) * len(values)))
  
          # Write the entries.
          for val, li in zip(value_strings, values):
@@ -162,45 +169,48 @@ class ConfusionMatrix(object):
                  if confusion[i][j] == 0:
                      s += zerostr
                  elif show_percents:
-                    s += entry_format % (100.0*confusion[i][j]/self._total)
+                    s += entry_format % (100.0 * confusion[i][j] / self._total)
                  else:
                      s += entry_format % confusion[i][j]
                  if i == j:
-                    prevspace = s.rfind(' ')
-                    s = s[:prevspace] + '<' + s[prevspace+1:] + '>'
-                else: s += ' '
-            s += '|\n'
+                    prevspace = s.rfind(" ")
+                    s = s[:prevspace] + "<" + s[prevspace + 1 :] + ">"
+                else:
+                    s += " "
+            s += "|\n"
  
          # Write a dividing line
-        s += '%s-+-%s+\n' % ('-'*valuelen, '-'*((entrylen+1)*len(values)))
+        s += "%s-+-%s+\n" % ("-" * valuelen, "-" * ((entrylen + 1) * len(values)))
  
          # Write a key
-        s += '(row = reference; col = test)\n'
+        s += "(row = reference; col = test)\n"
          if not values_in_chart:
-            s += 'Value key:\n'
+            s += "Value key:\n"
              for i, value in enumerate(values):
-                s += '%6d: %s\n' % (i+1, value)
+                s += "%6d: %s\n" % (i + 1, value)
  
          return s
  
      def key(self):
          values = self._values
-        str = 'Value key:\n'
-        indexlen = len(repr(len(values)-1))
-        key_format = '  %'+repr(indexlen)+'d: %s\n'
+        str = "Value key:\n"
+        indexlen = len(repr(len(values) - 1))
+        key_format = "  %" + repr(indexlen) + "d: %s\n"
          for i in range(len(values)):
              str += key_format % (i, values[i])
  
          return str
  
+
  def demo():
-    reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
-    test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
-    print('Reference =', reference)
-    print('Test    =', test)
-    print('Confusion matrix:')
+    reference = "DET NN VB DET JJ NN NN IN DET NN".split()
+    test = "DET VB VB DET NN NN NN IN DET NN".split()
+    print("Reference =", reference)
+    print("Test    =", test)
+    print("Confusion matrix:")
      print(ConfusionMatrix(reference, test))
      print(ConfusionMatrix(reference, test).pretty_format(sort_by_count=True))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/metrics/confusionmatrix.pyc b/nlp_resource_data/nltk/metrics/confusionmatrix.pyc

deleted file mode 100755 (executable)

index decee4f..0000000

Binary files a/nlp_resource_data/nltk/metrics/confusionmatrix.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/distance.py b/nlp_resource_data/nltk/metrics/distance.py

old mode 100755 (executable)

new mode 100644 (file)

index e8957bf..ae988ab
--- a/nlp_resource_data/nltk/metrics/distance.py
+++ b/nlp_resource_data/nltk/metrics/distance.py
@@ -1,6 +1,7 @@
+# -*- coding: utf-8 -*-
  # Natural Language Toolkit: Distance Metrics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  #         Tom Lippincott <tom@cs.columbia.edu>
@@ -19,8 +20,8 @@ As metrics, they must satisfy the following three requirements:
  3. d(a, c) <= d(a, b) + d(b, c)
  """
  
-from __future__ import print_function
-from __future__ import division
+import warnings
+import operator
  
  
  def _edit_dist_init(len1, len2):
@@ -28,9 +29,9 @@ def _edit_dist_init(len1, len2):
      for i in range(len1):
          lev.append([0] * len2)  # initialize 2D array to zero
      for i in range(len1):
-        lev[i][0] = i           # column 0: 0,1,2,3,4,...
+        lev[i][0] = i  # column 0: 0,1,2,3,4,...
      for j in range(len2):
-        lev[0][j] = j           # row 0: 0,1,2,3,4,...
+        lev[0][j] = j  # row 0: 0,1,2,3,4,...
      return lev
  
  
@@ -66,7 +67,8 @@ def edit_distance(s1, s2, substitution_cost=1, transpositions=False):
      been done in other orders, but at least three steps are needed.
  
      Allows specifying the cost of substitution edits (e.g., "a" -> "b"),
-    because sometimes it makes sense to assign greater penalties to substitutions.
+    because sometimes it makes sense to assign greater penalties to
+    substitutions.
  
      This also optionally allows transposition edits (e.g., "ab" -> "ba"),
      though this is disabled by default.
@@ -87,11 +89,89 @@ def edit_distance(s1, s2, substitution_cost=1, transpositions=False):
      # iterate over the array
      for i in range(len1):
          for j in range(len2):
-            _edit_dist_step(lev, i + 1, j + 1, s1, s2,
-                            substitution_cost=substitution_cost, transpositions=transpositions)
+            _edit_dist_step(
+                lev,
+                i + 1,
+                j + 1,
+                s1,
+                s2,
+                substitution_cost=substitution_cost,
+                transpositions=transpositions,
+            )
      return lev[len1][len2]
  
  
+def _edit_dist_backtrace(lev):
+    i, j = len(lev) - 1, len(lev[0]) - 1
+    alignment = [(i, j)]
+
+    while (i, j) != (0, 0):
+        directions = [
+            (i - 1, j),  # skip s1
+            (i, j - 1),  # skip s2
+            (i - 1, j - 1),  # substitution
+        ]
+
+        direction_costs = (
+            (lev[i][j] if (i >= 0 and j >= 0) else float("inf"), (i, j))
+            for i, j in directions
+        )
+        _, (i, j) = min(direction_costs, key=operator.itemgetter(0))
+
+        alignment.append((i, j))
+    return list(reversed(alignment))
+
+
+def edit_distance_align(s1, s2, substitution_cost=1):
+    """
+    Calculate the minimum Levenshtein edit-distance based alignment
+    mapping between two strings. The alignment finds the mapping
+    from string s1 to s2 that minimizes the edit distance cost.
+    For example, mapping "rain" to "shine" would involve 2
+    substitutions, 2 matches and an insertion resulting in
+    the following mapping:
+    [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5)]
+    NB: (0, 0) is the start state without any letters associated
+    See more: https://web.stanford.edu/class/cs124/lec/med.pdf
+
+    In case of multiple valid minimum-distance alignments, the
+    backtrace has the following operation precedence:
+    1. Skip s1 character
+    2. Skip s2 character
+    3. Substitute s1 and s2 characters
+    The backtrace is carried out in reverse string order.
+
+    This function does not support transposition.
+
+    :param s1, s2: The strings to be aligned
+    :type s1: str
+    :type s2: str
+    :type substitution_cost: int
+    :rtype List[Tuple(int, int)]
+    """
+    # set up a 2-D array
+    len1 = len(s1)
+    len2 = len(s2)
+    lev = _edit_dist_init(len1 + 1, len2 + 1)
+
+    # iterate over the array
+    for i in range(len1):
+        for j in range(len2):
+            _edit_dist_step(
+                lev,
+                i + 1,
+                j + 1,
+                s1,
+                s2,
+                substitution_cost=substitution_cost,
+                transpositions=False,
+            )
+
+    # backtrace to find alignment
+    alignment = _edit_dist_backtrace(lev)
+    return alignment
+
+
  def binary_distance(label1, label2):
      """Simple equality test.
  
@@ -112,7 +192,9 @@ def jaccard_distance(label1, label2):
      """Distance metric comparing set-similarity.
  
      """
-    return (len(label1.union(label2)) - len(label1.intersection(label2)))/len(label1.union(label2))
+    return (len(label1.union(label2)) - len(label1.intersection(label2))) / len(
+        label1.union(label2)
+    )
  
  
  def masi_distance(label1, label2):
@@ -121,7 +203,7 @@ def masi_distance(label1, label2):
  
      >>> from nltk.metrics import masi_distance
      >>> masi_distance(set([1, 2]), set([1, 2, 3, 4]))
-    0.335
+    0.665
  
      Passonneau 2006, Measuring Agreement on Set-Valued Items (MASI)
      for Semantic and Pragmatic Annotation.
@@ -140,10 +222,10 @@ def masi_distance(label1, label2):
      else:
          m = 0
  
-    return (1 - (len_intersection / float(len_union))) * m
+    return 1 - len_intersection / len_union * m
  
  
-def interval_distance(label1,label2):
+def interval_distance(label1, label2):
      """Krippendorff's interval distance metric
  
      >>> from nltk.metrics import interval_distance
@@ -155,7 +237,7 @@ def interval_distance(label1,label2):
  
      try:
          return pow(label1 - label2, 2)
-#        return pow(list(label1)[0]-list(label2)[0],2)
+    #        return pow(list(label1)[0]-list(label2)[0],2)
      except:
          print("non-numeric labels not supported with interval distance")
  
@@ -168,33 +250,229 @@ def presence(label):
  
  
  def fractional_presence(label):
-    return lambda x, y:\
-        abs(((1.0 / len(x)) - (1.0 / len(y)))) * (label in x and label in y) \
-        or 0.0 * (label not in x and label not in y) \
-        or abs((1.0 / len(x))) * (label in x and label not in y) \
+    return (
+        lambda x, y: abs(((1.0 / len(x)) - (1.0 / len(y))))
+        * (label in x and label in y)
+        or 0.0 * (label not in x and label not in y)
+        or abs((1.0 / len(x))) * (label in x and label not in y)
          or ((1.0 / len(y))) * (label not in x and label in y)
+    )
  
  
  def custom_distance(file):
      data = {}
-    with open(file, 'r') as infile:
+    with open(file, "r") as infile:
          for l in infile:
              labelA, labelB, dist = l.strip().split("\t")
              labelA = frozenset([labelA])
              labelB = frozenset([labelB])
-            data[frozenset([labelA,labelB])] = float(dist)
-    return lambda x,y:data[frozenset([x,y])]
+            data[frozenset([labelA, labelB])] = float(dist)
+    return lambda x, y: data[frozenset([x, y])]
  
  
-def demo():
-    edit_distance_examples = [
-        ("rain", "shine"), ("abcdef", "acbdef"), ("language", "lnaguaeg"),
-        ("language", "lnaugage"), ("language", "lngauage")]
-    for s1, s2 in edit_distance_examples:
-        print("Edit distance between '%s' and '%s':" % (s1, s2), edit_distance(s1, s2))
-    for s1, s2 in edit_distance_examples:
-        print("Edit distance with transpositions between '%s' and '%s':" % (s1, s2), edit_distance(s1, s2, transpositions=True))
+def jaro_similarity(s1, s2):
+    """
+   Computes the Jaro similarity between 2 sequences from:
  
+        Matthew A. Jaro (1989). Advances in record linkage methodology
+        as applied to the 1985 census of Tampa Florida. Journal of the
+        American Statistical Association. 84 (406): 414-20.
+
+    The Jaro distance between is the min no. of single-character transpositions
+    required to change one word into another. The Jaro similarity formula from
+    https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance :
+
+        jaro_sim = 0 if m = 0 else 1/3 * (m/|s_1| + m/s_2 + (m-t)/m)
+
+    where:
+        - |s_i| is the length of string s_i
+        - m is the no. of matching characters
+        - t is the half no. of possible transpositions.
+
+    """
+    # First, store the length of the strings
+    # because they will be re-used several times.
+    len_s1, len_s2 = len(s1), len(s2)
+
+    # The upper bound of the distance for being a matched character.
+    match_bound = max(len_s1, len_s2) // 2 - 1
+
+    # Initialize the counts for matches and transpositions.
+    matches = 0  # no.of matched characters in s1 and s2
+    transpositions = 0  # no. of transpositions between s1 and s2
+    flagged_1 = []  # positions in s1 which are matches to some character in s2
+    flagged_2 = []  # positions in s2 which are matches to some character in s1
+
+    # Iterate through sequences, check for matches and compute transpositions.
+    for i in range(len_s1):  # Iterate through each character.
+        upperbound = min(i + match_bound, len_s2 - 1)
+        lowerbound = max(0, i - match_bound)
+        for j in range(lowerbound, upperbound + 1):
+            if s1[i] == s2[j] and j not in flagged_2:
+                matches += 1
+                flagged_1.append(i)
+                flagged_2.append(j)
+                break
+    flagged_2.sort()
+    for i, j in zip(flagged_1, flagged_2):
+        if s1[i] != s2[j]:
+            transpositions += 1
+
+    if matches == 0:
+        return 0
+    else:
+        return (
+            1
+            / 3
+            * (
+                matches / len_s1
+                + matches / len_s2
+                + (matches - transpositions // 2) / matches
+            )
+        )
+
+
+def jaro_winkler_similarity(s1, s2, p=0.1, max_l=4):
+    """
+    The Jaro Winkler distance is an extension of the Jaro similarity in:
+
+        William E. Winkler. 1990. String Comparator Metrics and Enhanced
+        Decision Rules in the Fellegi-Sunter Model of Record Linkage.
+        Proceedings of the Section on Survey Research Methods.
+        American Statistical Association: 354-359.
+    such that:
+
+        jaro_winkler_sim = jaro_sim + ( l * p * (1 - jaro_sim) )
+
+    where,
+
+        - jaro_sim is the output from the Jaro Similarity,
+        see jaro_similarity()
+        - l is the length of common prefix at the start of the string
+            - this implementation provides an upperbound for the l value
+              to keep the prefixes.A common value of this upperbound is 4.
+        - p is the constant scaling factor to overweigh common prefixes.
+          The Jaro-Winkler similarity will fall within the [0, 1] bound,
+          given that max(p)<=0.25 , default is p=0.1 in Winkler (1990)
+
+
+    Test using outputs from https://www.census.gov/srd/papers/pdf/rr93-8.pdf
+    from "Table 5 Comparison of String Comparators Rescaled between 0 and 1"
+
+    >>> winkler_examples = [("billy", "billy"), ("billy", "bill"), ("billy", "blily"),
+    ... ("massie", "massey"), ("yvette", "yevett"), ("billy", "bolly"), ("dwayne", "duane"),
+    ... ("dixon", "dickson"), ("billy", "susan")]
+
+    >>> winkler_scores = [1.000, 0.967, 0.947, 0.944, 0.911, 0.893, 0.858, 0.853, 0.000]
+    >>> jaro_scores =    [1.000, 0.933, 0.933, 0.889, 0.889, 0.867, 0.822, 0.790, 0.000]
+
+        # One way to match the values on the Winkler's paper is to provide a different
+    # p scaling factor for different pairs of strings, e.g.
+    >>> p_factors = [0.1, 0.125, 0.20, 0.125, 0.20, 0.20, 0.20, 0.15, 0.1]
+
+    >>> for (s1, s2), jscore, wscore, p in zip(winkler_examples, jaro_scores, winkler_scores, p_factors):
+    ...     assert round(jaro_similarity(s1, s2), 3) == jscore
+    ...     assert round(jaro_winkler_similarity(s1, s2, p=p), 3) == wscore
+
+
+    Test using outputs from https://www.census.gov/srd/papers/pdf/rr94-5.pdf from
+    "Table 2.1. Comparison of String Comparators Using Last Names, First Names, and Street Names"
+
+    >>> winkler_examples = [('SHACKLEFORD', 'SHACKELFORD'), ('DUNNINGHAM', 'CUNNIGHAM'),
+    ... ('NICHLESON', 'NICHULSON'), ('JONES', 'JOHNSON'), ('MASSEY', 'MASSIE'),
+    ... ('ABROMS', 'ABRAMS'), ('HARDIN', 'MARTINEZ'), ('ITMAN', 'SMITH'),
+    ... ('JERALDINE', 'GERALDINE'), ('MARHTA', 'MARTHA'), ('MICHELLE', 'MICHAEL'),
+    ... ('JULIES', 'JULIUS'), ('TANYA', 'TONYA'), ('DWAYNE', 'DUANE'), ('SEAN', 'SUSAN'),
+    ... ('JON', 'JOHN'), ('JON', 'JAN'), ('BROOKHAVEN', 'BRROKHAVEN'),
+    ... ('BROOK HALLOW', 'BROOK HLLW'), ('DECATUR', 'DECATIR'), ('FITZRUREITER', 'FITZENREITER'),
+    ... ('HIGBEE', 'HIGHEE'), ('HIGBEE', 'HIGVEE'), ('LACURA', 'LOCURA'), ('IOWA', 'IONA'), ('1ST', 'IST')]
+
+    >>> jaro_scores =   [0.970, 0.896, 0.926, 0.790, 0.889, 0.889, 0.722, 0.467, 0.926,
+    ... 0.944, 0.869, 0.889, 0.867, 0.822, 0.783, 0.917, 0.000, 0.933, 0.944, 0.905,
+    ... 0.856, 0.889, 0.889, 0.889, 0.833, 0.000]
+
+    >>> winkler_scores = [0.982, 0.896, 0.956, 0.832, 0.944, 0.922, 0.722, 0.467, 0.926,
+    ... 0.961, 0.921, 0.933, 0.880, 0.858, 0.805, 0.933, 0.000, 0.947, 0.967, 0.943,
+    ... 0.913, 0.922, 0.922, 0.900, 0.867, 0.000]
+
+        # One way to match the values on the Winkler's paper is to provide a different
+    # p scaling factor for different pairs of strings, e.g.
+    >>> p_factors = [0.1, 0.1, 0.1, 0.1, 0.125, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.20,
+    ... 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
+
+
+    >>> for (s1, s2), jscore, wscore, p in zip(winkler_examples, jaro_scores, winkler_scores, p_factors):
+    ...     if (s1, s2) in [('JON', 'JAN'), ('1ST', 'IST')]:
+    ...         continue  # Skip bad examples from the paper.
+    ...     assert round(jaro_similarity(s1, s2), 3) == jscore
+    ...     assert round(jaro_winkler_similarity(s1, s2, p=p), 3) == wscore
+
+
+
+    This test-case proves that the output of Jaro-Winkler similarity depends on
+    the product  l * p and not on the product max_l * p. Here the product max_l * p > 1
+    however the product l * p <= 1
+
+    >>> round(jaro_winkler_similarity('TANYA', 'TONYA', p=0.1, max_l=100), 3)
+    0.88
+
+
+    """
+    # To ensure that the output of the Jaro-Winkler's similarity
+    # falls between [0,1], the product of l * p needs to be
+    # also fall between [0,1].
+    if not 0 <= max_l * p <= 1:
+        warnings.warn(
+            str(
+                "The product  `max_l * p` might not fall between [0,1]."
+                "Jaro-Winkler similarity might not be between 0 and 1."
+            )
+        )
+
+    # Compute the Jaro similarity
+    jaro_sim = jaro_similarity(s1, s2)
+
+    # Initialize the upper bound for the no. of prefixes.
+    # if user did not pre-define the upperbound,
+    # use shorter length between s1 and s2
+
+    # Compute the prefix matches.
+    l = 0
+    # zip() will automatically loop until the end of shorter string.
+    for s1_i, s2_i in zip(s1, s2):
+        if s1_i == s2_i:
+            l += 1
+        else:
+            break
+        if l == max_l:
+            break
+    # Return the similarity value as described in docstring.
+    return jaro_sim + (l * p * (1 - jaro_sim))
+
+
+def demo():
+    string_distance_examples = [
+        ("rain", "shine"),
+        ("abcdef", "acbdef"),
+        ("language", "lnaguaeg"),
+        ("language", "lnaugage"),
+        ("language", "lngauage"),
+    ]
+    for s1, s2 in string_distance_examples:
+        print("Edit distance btwn '%s' and '%s':" % (s1, s2), edit_distance(s1, s2))
+        print(
+            "Edit dist with transpositions btwn '%s' and '%s':" % (s1, s2),
+            edit_distance(s1, s2, transpositions=True),
+        )
+        print("Jaro similarity btwn '%s' and '%s':" % (s1, s2), jaro_similarity(s1, s2))
+        print(
+            "Jaro-Winkler similarity btwn '%s' and '%s':" % (s1, s2),
+            jaro_winkler_similarity(s1, s2),
+        )
+        print(
+            "Jaro-Winkler distance btwn '%s' and '%s':" % (s1, s2),
+            1 - jaro_winkler_similarity(s1, s2),
+        )
      s1 = set([1, 2, 3, 4])
      s2 = set([3, 4, 5])
      print("s1:", s1)
@@ -203,5 +481,6 @@ def demo():
      print("Jaccard distance:", jaccard_distance(s1, s2))
      print("MASI distance:", masi_distance(s1, s2))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/metrics/distance.pyc b/nlp_resource_data/nltk/metrics/distance.pyc

deleted file mode 100755 (executable)

index a81fe5d..0000000

Binary files a/nlp_resource_data/nltk/metrics/distance.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/paice.py b/nlp_resource_data/nltk/metrics/paice.py

old mode 100755 (executable)

new mode 100644 (file)

index d23e4b5..46e8fce
--- a/nlp_resource_data/nltk/metrics/paice.py
+++ b/nlp_resource_data/nltk/metrics/paice.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Agreement Metrics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Lauri Hallila <laurihallila@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -24,7 +24,7 @@ from math import sqrt
  
  
  def get_words_from_dictionary(lemmas):
-    '''
+    """
      Get original set of words used for analysis.
  
      :param lemmas: A dictionary where keys are lemmas and values are sets
@@ -32,7 +32,7 @@ def get_words_from_dictionary(lemmas):
      :type lemmas: dict(str): list(str)
      :return: Set of words that exist as values in the dictionary
      :rtype: set(str)
-    '''
+    """
      words = set()
      for lemma in lemmas:
          words.update(set(lemmas[lemma]))
@@ -40,7 +40,7 @@ def get_words_from_dictionary(lemmas):
  
  
  def _truncate(words, cutlength):
-    '''Group words by stems defined by truncating them at given length.
+    """Group words by stems defined by truncating them at given length.
  
      :param words: Set of words used for analysis
      :param cutlength: Words are stemmed by cutting at this length.
@@ -49,7 +49,7 @@ def _truncate(words, cutlength):
      :return: Dictionary where keys are stems and values are sets of words
      corresponding to that stem.
      :rtype: dict(str): set(str)
-    '''
+    """
      stems = {}
      for word in words:
          stem = word[:cutlength]
@@ -62,7 +62,7 @@ def _truncate(words, cutlength):
  
  # Reference: http://en.wikipedia.org/wiki/Line-line_intersection
  def _count_intersection(l1, l2):
-    '''Count intersection between two line segments defined by coordinate pairs.
+    """Count intersection between two line segments defined by coordinate pairs.
  
      :param l1: Tuple of two coordinate pairs defining the first line segment
      :param l2: Tuple of two coordinate pairs defining the second line segment
@@ -70,7 +70,7 @@ def _count_intersection(l1, l2):
      :type l2: tuple(float, float)
      :return: Coordinates of the intersection
      :rtype: tuple(float, float)
-    '''
+    """
      x1, y1 = l1[0]
      x2, y2 = l1[1]
      x3, y3 = l2[0]
@@ -78,7 +78,7 @@ def _count_intersection(l1, l2):
  
      denominator = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
  
-    if denominator == 0.0: # lines are parallel
+    if denominator == 0.0:  # lines are parallel
          if x1 == x2 == x3 == x4 == 0.0:
              # When lines are parallel, they must be on the y-axis.
              # We can ignore x-axis because we stop counting the
@@ -87,27 +87,31 @@ def _count_intersection(l1, l2):
              # OI (y-axis) diminishes when we go along the truncation line.
              return (0.0, y4)
  
-    x = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / denominator
-    y = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / denominator
+    x = (
+        (x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)
+    ) / denominator
+    y = (
+        (x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)
+    ) / denominator
      return (x, y)
  
  
  def _get_derivative(coordinates):
-    '''Get derivative of the line from (0,0) to given coordinates.
+    """Get derivative of the line from (0,0) to given coordinates.
  
      :param coordinates: A coordinate pair
      :type coordinates: tuple(float, float)
      :return: Derivative; inf if x is zero
      :rtype: float
-    '''
+    """
      try:
          return coordinates[1] / coordinates[0]
      except ZeroDivisionError:
-        return float('inf')
+        return float("inf")
  
  
  def _calculate_cut(lemmawords, stems):
-    '''Count understemmed and overstemmed pairs for (lemma, stem) pair with common words.
+    """Count understemmed and overstemmed pairs for (lemma, stem) pair with common words.
  
      :param lemmawords: Set or list of words corresponding to certain lemma.
      :param stems: A dictionary where keys are stems and values are sets
@@ -117,7 +121,7 @@ def _calculate_cut(lemmawords, stems):
      :return: Amount of understemmed and overstemmed pairs contributed by words
      existing in both lemmawords and stems.
      :rtype: tuple(float, float)
-    '''
+    """
      umt, wmt = 0.0, 0.0
      for stem in stems:
          cut = set(lemmawords) & set(stems[stem])
@@ -132,7 +136,7 @@ def _calculate_cut(lemmawords, stems):
  
  
  def _calculate(lemmas, stems):
-    '''Calculate actual and maximum possible amounts of understemmed and overstemmed word pairs.
+    """Calculate actual and maximum possible amounts of understemmed and overstemmed word pairs.
  
      :param lemmas: A dictionary where keys are lemmas and values are sets
      or lists of words corresponding to that lemma.
@@ -145,7 +149,7 @@ def _calculate(lemmas, stems):
      global wrongly merged total (gwmt) and
      global desired non-merge total (gdnt).
      :rtype: tuple(float, float, float, float)
-    '''
+    """
  
      n = sum(len(lemmas[word]) for word in lemmas)
  
@@ -173,7 +177,7 @@ def _calculate(lemmas, stems):
  
  
  def _indexes(gumt, gdmt, gwmt, gdnt):
-    '''Count Understemming Index (UI), Overstemming Index (OI) and Stemming Weight (SW).
+    """Count Understemming Index (UI), Overstemming Index (OI) and Stemming Weight (SW).
  
      :param gumt, gdmt, gwmt, gdnt: Global unachieved merge total (gumt),
      global desired merge total (gdmt),
@@ -184,7 +188,7 @@ def _indexes(gumt, gdmt, gwmt, gdnt):
      Overstemming Index (OI) and
      Stemming Weight (SW).
      :rtype: tuple(float, float, float)
-    '''
+    """
      # Calculate Understemming Index (UI),
      # Overstemming Index (OI) and Stemming Weight (SW)
      try:
@@ -202,24 +206,25 @@ def _indexes(gumt, gdmt, gwmt, gdnt):
      except ZeroDivisionError:
          if oi == 0.0:
              # OI and UI are 0, define SW as 'not a number'
-            sw = float('nan')
+            sw = float("nan")
          else:
              # UI is 0, define SW as infinity
-            sw = float('inf')
+            sw = float("inf")
      return (ui, oi, sw)
  
  
  class Paice(object):
-    '''Class for storing lemmas, stems and evaluation metrics.'''
+    """Class for storing lemmas, stems and evaluation metrics."""
+
      def __init__(self, lemmas, stems):
-        '''
+        """
          :param lemmas: A dictionary where keys are lemmas and values are sets
          or lists of words corresponding to that lemma.
          :param stems: A dictionary where keys are stems and values are sets
          or lists of words corresponding to that stem.
          :type lemmas: dict(str): list(str)
          :type stems: dict(str): set(str)
-        '''
+        """
          self.lemmas = lemmas
          self.stems = stems
          self.coords = []
@@ -229,20 +234,20 @@ class Paice(object):
          self.update()
  
      def __str__(self):
-        text = ['Global Unachieved Merge Total (GUMT): %s\n' % self.gumt]
-        text.append('Global Desired Merge Total (GDMT): %s\n' % self.gdmt)
-        text.append('Global Wrongly-Merged Total (GWMT): %s\n' % self.gwmt)
-        text.append('Global Desired Non-merge Total (GDNT): %s\n' % self.gdnt)
-        text.append('Understemming Index (GUMT / GDMT): %s\n' % self.ui)
-        text.append('Overstemming Index (GWMT / GDNT): %s\n' % self.oi)
-        text.append('Stemming Weight (OI / UI): %s\n' % self.sw)
-        text.append('Error-Rate Relative to Truncation (ERRT): %s\r\n' % self.errt)
-        coordinates = ' '.join(['(%s, %s)' % item for item in self.coords])
-        text.append('Truncation line: %s' % coordinates)
-        return ''.join(text)
+        text = ["Global Unachieved Merge Total (GUMT): %s\n" % self.gumt]
+        text.append("Global Desired Merge Total (GDMT): %s\n" % self.gdmt)
+        text.append("Global Wrongly-Merged Total (GWMT): %s\n" % self.gwmt)
+        text.append("Global Desired Non-merge Total (GDNT): %s\n" % self.gdnt)
+        text.append("Understemming Index (GUMT / GDMT): %s\n" % self.ui)
+        text.append("Overstemming Index (GWMT / GDNT): %s\n" % self.oi)
+        text.append("Stemming Weight (OI / UI): %s\n" % self.sw)
+        text.append("Error-Rate Relative to Truncation (ERRT): %s\r\n" % self.errt)
+        coordinates = " ".join(["(%s, %s)" % item for item in self.coords])
+        text.append("Truncation line: %s" % coordinates)
+        return "".join(text)
  
      def _get_truncation_indexes(self, words, cutlength):
-        '''Count (UI, OI) when stemming is done by truncating words at \'cutlength\'.
+        """Count (UI, OI) when stemming is done by truncating words at \'cutlength\'.
  
          :param words: Words used for the analysis
          :param cutlength: Words are stemmed by cutting them at this length
@@ -250,7 +255,7 @@ class Paice(object):
          :type cutlength: int
          :return: Understemming and overstemming indexes
          :rtype: tuple(int, int)
-        '''
+        """
  
          truncated = _truncate(words, cutlength)
          gumt, gdmt, gwmt, gdnt = _calculate(self.lemmas, truncated)
@@ -258,7 +263,7 @@ class Paice(object):
          return (ui, oi)
  
      def _get_truncation_coordinates(self, cutlength=0):
-        '''Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line.
+        """Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line.
  
          :param cutlength: Optional parameter to start counting from (ui, oi)
          coordinates gotten by stemming at this length. Useful for speeding up
@@ -267,7 +272,7 @@ class Paice(object):
          :type cutlength: int
          :return: List of coordinate pairs that define the truncation line
          :rtype: list(tuple(float, float))
-        '''
+        """
          words = get_words_from_dictionary(self.lemmas)
          maxlength = max(len(word) for word in words)
  
@@ -297,30 +302,30 @@ class Paice(object):
          return coords
  
      def _errt(self):
-        '''Count Error-Rate Relative to Truncation (ERRT).
+        """Count Error-Rate Relative to Truncation (ERRT).
  
          :return: ERRT, length of the line from origo to (UI, OI) divided by
          the length of the line from origo to the point defined by the same
          line when extended until the truncation line.
          :rtype: float
-        '''
+        """
          # Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line
          self.coords = self._get_truncation_coordinates()
          if (0.0, 0.0) in self.coords:
              # Truncation line goes through origo, so ERRT cannot be counted
              if (self.ui, self.oi) != (0.0, 0.0):
-                return float('inf')
+                return float("inf")
              else:
-                return float('nan')
+                return float("nan")
          if (self.ui, self.oi) == (0.0, 0.0):
              # (ui, oi) is origo; define errt as 0.0
              return 0.0
          # Count the intersection point
          # Note that (self.ui, self.oi) cannot be (0.0, 0.0) and self.coords has different coordinates
          # so we have actual line segments instead of a line segment and a point
-        intersection = _count_intersection(((0, 0), (self.ui, self.oi)),
-                                           self.coords[-2:]
-                                           )
+        intersection = _count_intersection(
+            ((0, 0), (self.ui, self.oi)), self.coords[-2:]
+        )
          # Count OP (length of the line from origo to (ui, oi))
          op = sqrt(self.ui ** 2 + self.oi ** 2)
          # Count OT (length of the line from origo to truncation line that goes through (ui, oi))
@@ -329,53 +334,56 @@ class Paice(object):
          return op / ot
  
      def update(self):
-        '''Update statistics after lemmas and stems have been set.'''
+        """Update statistics after lemmas and stems have been set."""
          self.gumt, self.gdmt, self.gwmt, self.gdnt = _calculate(self.lemmas, self.stems)
          self.ui, self.oi, self.sw = _indexes(self.gumt, self.gdmt, self.gwmt, self.gdnt)
          self.errt = self._errt()
  
  
  def demo():
-    '''Demonstration of the module.'''
+    """Demonstration of the module."""
      # Some words with their real lemmas
-    lemmas = {'kneel': ['kneel', 'knelt'],
-              'range': ['range', 'ranged'],
-              'ring': ['ring', 'rang', 'rung']
-              }
+    lemmas = {
+        "kneel": ["kneel", "knelt"],
+        "range": ["range", "ranged"],
+        "ring": ["ring", "rang", "rung"],
+    }
      # Same words with stems from a stemming algorithm
-    stems = {'kneel': ['kneel'],
-             'knelt': ['knelt'],
-             'rang': ['rang', 'range', 'ranged'],
-             'ring': ['ring'],
-             'rung': ['rung']
-             }
-    print('Words grouped by their lemmas:')
+    stems = {
+        "kneel": ["kneel"],
+        "knelt": ["knelt"],
+        "rang": ["rang", "range", "ranged"],
+        "ring": ["ring"],
+        "rung": ["rung"],
+    }
+    print("Words grouped by their lemmas:")
      for lemma in sorted(lemmas):
-        print('%s => %s' % (lemma, ' '.join(lemmas[lemma])))
+        print("%s => %s" % (lemma, " ".join(lemmas[lemma])))
      print()
-    print('Same words grouped by a stemming algorithm:')
+    print("Same words grouped by a stemming algorithm:")
      for stem in sorted(stems):
-        print('%s => %s' % (stem, ' '.join(stems[stem])))
+        print("%s => %s" % (stem, " ".join(stems[stem])))
      print()
      p = Paice(lemmas, stems)
      print(p)
      print()
      # Let's "change" results from a stemming algorithm
-    stems = {'kneel': ['kneel'],
-             'knelt': ['knelt'],
-             'rang': ['rang'],
-             'range': ['range', 'ranged'],
-             'ring': ['ring'],
-             'rung': ['rung']
-             }
-    print('Counting stats after changing stemming results:')
+    stems = {
+        "kneel": ["kneel"],
+        "knelt": ["knelt"],
+        "rang": ["rang"],
+        "range": ["range", "ranged"],
+        "ring": ["ring"],
+        "rung": ["rung"],
+    }
+    print("Counting stats after changing stemming results:")
      for stem in sorted(stems):
-        print('%s => %s' % (stem, ' '.join(stems[stem])))
+        print("%s => %s" % (stem, " ".join(stems[stem])))
      print()
      p.stems = stems
      p.update()
      print(p)
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/metrics/paice.pyc b/nlp_resource_data/nltk/metrics/paice.pyc

deleted file mode 100755 (executable)

index bbcfd69..0000000

Binary files a/nlp_resource_data/nltk/metrics/paice.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/scores.py b/nlp_resource_data/nltk/metrics/scores.py

old mode 100755 (executable)

new mode 100644 (file)

index ad78cc8..b5156ed
--- a/nlp_resource_data/nltk/metrics/scores.py
+++ b/nlp_resource_data/nltk/metrics/scores.py
@@ -1,19 +1,16 @@
  # Natural Language Toolkit: Evaluation
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, division
  
  from math import fabs
  import operator
  from random import shuffle
  from functools import reduce
  
-from six.moves import range, zip
-
  try:
      from scipy.stats.stats import betai
  except ImportError:
@@ -21,6 +18,7 @@ except ImportError:
  
  from nltk.util import LazyConcatenation, LazyMap
  
+
  def accuracy(reference, test):
      """
      Given a list of reference values and a corresponding list of test
@@ -40,6 +38,7 @@ def accuracy(reference, test):
          raise ValueError("Lists must have the same length.")
      return sum(x == y for x, y in zip(reference, test)) / len(test)
  
+
  def precision(reference, test):
      """
      Given a set of reference values and a set of test values, return
@@ -53,15 +52,15 @@ def precision(reference, test):
      :param test: A set of values to compare against the reference set.
      :rtype: float or None
      """
-    if (not hasattr(reference, 'intersection') or
-        not hasattr(test, 'intersection')):
-        raise TypeError('reference and test should be sets')
+    if not hasattr(reference, "intersection") or not hasattr(test, "intersection"):
+        raise TypeError("reference and test should be sets")
  
      if len(test) == 0:
          return None
      else:
          return len(reference.intersection(test)) / len(test)
  
+
  def recall(reference, test):
      """
      Given a set of reference values and a set of test values, return
@@ -75,15 +74,15 @@ def recall(reference, test):
      :param test: A set of values to compare against the reference set.
      :rtype: float or None
      """
-    if (not hasattr(reference, 'intersection') or
-        not hasattr(test, 'intersection')):
-        raise TypeError('reference and test should be sets')
+    if not hasattr(reference, "intersection") or not hasattr(test, "intersection"):
+        raise TypeError("reference and test should be sets")
  
      if len(reference) == 0:
          return None
      else:
          return len(reference.intersection(test)) / len(reference)
  
+
  def f_measure(reference, test, alpha=0.5):
      """
      Given a set of reference values and a set of test values, return
@@ -114,7 +113,8 @@ def f_measure(reference, test, alpha=0.5):
          return None
      if p == 0 or r == 0:
          return 0
-    return 1.0 / (alpha / p + (1-alpha) / r)
+    return 1.0 / (alpha / p + (1 - alpha) / r)
+
  
  def log_likelihood(reference, test):
      """
@@ -132,10 +132,10 @@ def log_likelihood(reference, test):
          raise ValueError("Lists must have the same length.")
  
      # Return the average value of dist.logprob(val).
-    total_likelihood = sum(dist.logprob(val)
-                            for (val, dist) in zip(reference, test))
+    total_likelihood = sum(dist.logprob(val) for (val, dist) in zip(reference, test))
      return total_likelihood / len(reference)
  
+
  def approxrand(a, b, **kwargs):
      """
      Returns an approximate significance level between two lists of
@@ -157,21 +157,20 @@ def approxrand(a, b, **kwargs):
      :param b: another list of independently generated test values
      :type b: list
      """
-    shuffles = kwargs.get('shuffles', 999)
+    shuffles = kwargs.get("shuffles", 999)
      # there's no point in trying to shuffle beyond all possible permutations
-    shuffles = \
-        min(shuffles, reduce(operator.mul, range(1, len(a) + len(b) + 1)))
-    stat = kwargs.get('statistic', lambda lst: sum(lst) / len(lst))
-    verbose = kwargs.get('verbose', False)
+    shuffles = min(shuffles, reduce(operator.mul, range(1, len(a) + len(b) + 1)))
+    stat = kwargs.get("statistic", lambda lst: sum(lst) / len(lst))
+    verbose = kwargs.get("verbose", False)
  
      if verbose:
-        print('shuffles: %d' % shuffles)
+        print("shuffles: %d" % shuffles)
  
      actual_stat = fabs(stat(a) - stat(b))
  
      if verbose:
-        print('actual statistic: %f' % actual_stat)
-        print('-' * 60)
+        print("actual statistic: %f" % actual_stat)
+        print("-" * 60)
  
      c = 1e-100
      lst = LazyConcatenation([a, b])
@@ -179,26 +178,26 @@ def approxrand(a, b, **kwargs):
  
      for i in range(shuffles):
          if verbose and i % 10 == 0:
-            print('shuffle: %d' % i)
+            print("shuffle: %d" % i)
  
          shuffle(indices)
  
-        pseudo_stat_a = stat(LazyMap(lambda i: lst[i], indices[:len(a)]))
-        pseudo_stat_b = stat(LazyMap(lambda i: lst[i], indices[len(a):]))
+        pseudo_stat_a = stat(LazyMap(lambda i: lst[i], indices[: len(a)]))
+        pseudo_stat_b = stat(LazyMap(lambda i: lst[i], indices[len(a) :]))
          pseudo_stat = fabs(pseudo_stat_a - pseudo_stat_b)
  
          if pseudo_stat >= actual_stat:
              c += 1
  
          if verbose and i % 10 == 0:
-            print('pseudo-statistic: %f' % pseudo_stat)
-            print('significance: %f' % ((c + 1) / (i + 1)))
-            print('-' * 60)
+            print("pseudo-statistic: %f" % pseudo_stat)
+            print("significance: %f" % ((c + 1) / (i + 1)))
+            print("-" * 60)
  
      significance = (c + 1) / (shuffles + 1)
  
      if verbose:
-        print('significance: %f' % significance)
+        print("significance: %f" % significance)
          if betai:
              for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]:
                  print("prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi)))
@@ -207,22 +206,23 @@ def approxrand(a, b, **kwargs):
  
  
  def demo():
-    print('-'*75)
-    reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
-    test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
-    print('Reference =', reference)
-    print('Test    =', test)
-    print('Accuracy:', accuracy(reference, test))
-
-    print('-'*75)
+    print("-" * 75)
+    reference = "DET NN VB DET JJ NN NN IN DET NN".split()
+    test = "DET VB VB DET NN NN NN IN DET NN".split()
+    print("Reference =", reference)
+    print("Test    =", test)
+    print("Accuracy:", accuracy(reference, test))
+
+    print("-" * 75)
      reference_set = set(reference)
      test_set = set(test)
-    print('Reference =', reference_set)
-    print('Test =   ', test_set)
-    print('Precision:', precision(reference_set, test_set))
-    print('   Recall:', recall(reference_set, test_set))
-    print('F-Measure:', f_measure(reference_set, test_set))
-    print('-'*75)
-
-if __name__ == '__main__':
+    print("Reference =", reference_set)
+    print("Test =   ", test_set)
+    print("Precision:", precision(reference_set, test_set))
+    print("   Recall:", recall(reference_set, test_set))
+    print("F-Measure:", f_measure(reference_set, test_set))
+    print("-" * 75)
+
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/metrics/scores.pyc b/nlp_resource_data/nltk/metrics/scores.pyc

deleted file mode 100755 (executable)

index 394df8b..0000000

Binary files a/nlp_resource_data/nltk/metrics/scores.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/segmentation.py b/nlp_resource_data/nltk/metrics/segmentation.py

old mode 100755 (executable)

new mode 100644 (file)

index 668f68e..412e00d
--- a/nlp_resource_data/nltk/metrics/segmentation.py
+++ b/nlp_resource_data/nltk/metrics/segmentation.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Text Segmentation Metrics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  #         David Doukhan <david.doukhan@gmail.com>
@@ -8,7 +8,6 @@
  # For license information, see LICENSE.TXT
  
  
-
  """
  Text Segmentation Metrics
  
@@ -46,8 +45,6 @@ try:
  except ImportError:
      pass
  
-from six.moves import range
-
  
  def windowdiff(seg1, seg2, k, boundary="1", weighted=False):
      """
@@ -82,20 +79,22 @@ def windowdiff(seg1, seg2, k, boundary="1", weighted=False):
      if len(seg1) != len(seg2):
          raise ValueError("Segmentations have unequal length")
      if k > len(seg1):
-        raise ValueError("Window width k should be smaller or equal than segmentation lengths")
+        raise ValueError(
+            "Window width k should be smaller or equal than segmentation lengths"
+        )
      wd = 0
      for i in range(len(seg1) - k + 1):
-        ndiff = abs(seg1[i:i+k].count(boundary) - seg2[i:i+k].count(boundary))
+        ndiff = abs(seg1[i : i + k].count(boundary) - seg2[i : i + k].count(boundary))
          if weighted:
              wd += ndiff
          else:
              wd += min(1, ndiff)
-    return wd / (len(seg1) - k + 1.)
-
+    return wd / (len(seg1) - k + 1.0)
  
  
  # Generalized Hamming Distance
  
+
  def _init_mat(nrows, ncols, ins_cost, del_cost):
      mat = np.empty((nrows, ncols))
      mat[0, :] = ins_cost * np.arange(ncols)
@@ -119,7 +118,7 @@ def _ghd_aux(mat, rowv, colv, ins_cost, del_cost, shift_cost_coeff):
              mat[i + 1, j + 1] = min(tcost, shift_cost)
  
  
-def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary='1'):
+def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary="1"):
      """
      Compute the Generalized Hamming Distance for a reference and a hypothetical
      segmentation, corresponding to the cost related to the transformation
@@ -185,7 +184,8 @@ def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary='1'
  
  # Beeferman's Pk text segmentation evaluation metric
  
-def pk(ref, hyp, k=None, boundary='1'):
+
+def pk(ref, hyp, k=None, boundary="1"):
      """
      Compute the Pk metric for a pair of segmentations A segmentation
      is any sequence over a vocabulary of two items (e.g. "0", "1"),
@@ -211,20 +211,21 @@ def pk(ref, hyp, k=None, boundary='1'):
      """
  
      if k is None:
-        k = int(round(len(ref) / (ref.count(boundary) * 2.)))
+        k = int(round(len(ref) / (ref.count(boundary) * 2.0)))
  
      err = 0
-    for i in range(len(ref)-k +1):
-        r = ref[i:i+k].count(boundary) > 0
-        h = hyp[i:i+k].count(boundary) > 0
+    for i in range(len(ref) - k + 1):
+        r = ref[i : i + k].count(boundary) > 0
+        h = hyp[i : i + k].count(boundary) > 0
          if r != h:
-           err += 1
-    return err / (len(ref)-k +1.)
+            err += 1
+    return err / (len(ref) - k + 1.0)
  
  
  # skip doctests if numpy is not installed
  def setup_module(module):
      from nose import SkipTest
+
      try:
          import numpy
      except ImportError:
diff --git a/nlp_resource_data/nltk/metrics/segmentation.pyc b/nlp_resource_data/nltk/metrics/segmentation.pyc

deleted file mode 100755 (executable)

index 839321d..0000000

Binary files a/nlp_resource_data/nltk/metrics/segmentation.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/metrics/spearman.py b/nlp_resource_data/nltk/metrics/spearman.py

old mode 100755 (executable)

new mode 100644 (file)

index 07b158e..a6d17db
--- a/nlp_resource_data/nltk/metrics/spearman.py
+++ b/nlp_resource_data/nltk/metrics/spearman.py
@@ -1,15 +1,15 @@
  # Natural Language Toolkit: Spearman Rank Correlation
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Joel Nothman <jnothman@student.usyd.edu.au>
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
-from __future__ import division
  
  """
  Tools for comparing ranked lists.
  """
  
+
  def _rank_dists(ranks1, ranks2):
      """Finds the difference between the values in ranks1 and ranks2 for keys
      present in both dicts. If the arguments are not dicts, they are converted
@@ -36,7 +36,7 @@ def spearman_correlation(ranks1, ranks2):
          res += d * d
          n += 1
      try:
-        return 1 - (6 * res / (n * (n*n - 1)))
+        return 1 - (6 * res / (n * (n * n - 1)))
      except ZeroDivisionError:
          # Result is undefined if only one item is ranked
          return 0.0
@@ -66,4 +66,3 @@ def ranks_from_scores(scores, rank_gap=1e-15):
  
          yield key, rank
          prev_score = score
-
diff --git a/nlp_resource_data/nltk/metrics/spearman.pyc b/nlp_resource_data/nltk/metrics/spearman.pyc

deleted file mode 100755 (executable)

index 4be8dee..0000000

Binary files a/nlp_resource_data/nltk/metrics/spearman.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/misc/__init__.py b/nlp_resource_data/nltk/misc/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index e03dc4e..ab1f761
--- a/nlp_resource_data/nltk/misc/__init__.py
+++ b/nlp_resource_data/nltk/misc/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Miscellaneous modules
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
diff --git a/nlp_resource_data/nltk/misc/__init__.pyc b/nlp_resource_data/nltk/misc/__init__.pyc

deleted file mode 100755 (executable)

index 5619b02..0000000

Binary files a/nlp_resource_data/nltk/misc/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/misc/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8b0d95b

Binary files /dev/null and b/nlp_resource_data/nltk/misc/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/misc/__pycache__/babelfish.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/babelfish.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8623bb4

Binary files /dev/null and b/nlp_resource_data/nltk/misc/__pycache__/babelfish.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/misc/__pycache__/chomsky.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/chomsky.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9de1f5d

Binary files /dev/null and b/nlp_resource_data/nltk/misc/__pycache__/chomsky.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/misc/__pycache__/minimalset.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/minimalset.cpython-37.pyc

new file mode 100644 (file)

index 0000000..919b3eb

Binary files /dev/null and b/nlp_resource_data/nltk/misc/__pycache__/minimalset.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/misc/__pycache__/sort.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/sort.cpython-37.pyc

new file mode 100644 (file)

index 0000000..bb9d353

Binary files /dev/null and b/nlp_resource_data/nltk/misc/__pycache__/sort.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/misc/__pycache__/wordfinder.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/wordfinder.cpython-37.pyc

new file mode 100644 (file)

index 0000000..364f733

Binary files /dev/null and b/nlp_resource_data/nltk/misc/__pycache__/wordfinder.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/misc/babelfish.py b/nlp_resource_data/nltk/misc/babelfish.py

old mode 100755 (executable)

new mode 100644 (file)

index 481a508..a43fd4d
--- a/nlp_resource_data/nltk/misc/babelfish.py
+++ b/nlp_resource_data/nltk/misc/babelfish.py
@@ -4,7 +4,7 @@ translation service; this service is no longer available; this
  module is kept in NLTK source code in order to provide better error
  messages for people following the NLTK Book 2.0.
  """
-from __future__ import print_function
+
  
  def babelize_shell():
      print("Babelfish online translation service is no longer available.")
diff --git a/nlp_resource_data/nltk/misc/babelfish.pyc b/nlp_resource_data/nltk/misc/babelfish.pyc

deleted file mode 100755 (executable)

index 0b25c4e..0000000

Binary files a/nlp_resource_data/nltk/misc/babelfish.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/misc/chomsky.py b/nlp_resource_data/nltk/misc/chomsky.py

old mode 100755 (executable)

new mode 100644 (file)

index 9cfb5c2..0d4b065
--- a/nlp_resource_data/nltk/misc/chomsky.py
+++ b/nlp_resource_data/nltk/misc/chomsky.py
@@ -12,7 +12,6 @@ To generate n sentences of linguistic wisdom, type
      (CHOMSKY n)  -- for example
      (CHOMSKY 5) generates half a screen of linguistic truth.
  """
-from __future__ import print_function
  
  leadins = """To characterize a linguistic level L,
      On the other hand,
@@ -92,7 +91,7 @@ verbs = """can be defined in such a way as to impose
      is necessary to impose an interpretation on
      appears to correlate rather closely with
      is rather different from"""
-#List of VERBs chosen for autorecursive obfuscation.
+# List of VERBs chosen for autorecursive obfuscation.
  
  objects = """ problems of phonemic and morphological analysis.
      a corpus of utterance tokens upon which conformity has been defined \
@@ -119,8 +118,6 @@ scope of a complex symbol.
  import textwrap, random
  from itertools import chain, islice
  
-from six.moves import zip
-
  
  def generate_chomsky(times=5, line_length=72):
      parts = []
@@ -131,5 +128,6 @@ def generate_chomsky(times=5, line_length=72):
      output = chain(*islice(zip(*parts), 0, times))
      print(textwrap.fill(" ".join(output), line_length))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      generate_chomsky()
diff --git a/nlp_resource_data/nltk/misc/chomsky.pyc b/nlp_resource_data/nltk/misc/chomsky.pyc

deleted file mode 100755 (executable)

index 4cd5a7e..0000000

Binary files a/nlp_resource_data/nltk/misc/chomsky.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/misc/minimalset.py b/nlp_resource_data/nltk/misc/minimalset.py

old mode 100755 (executable)

new mode 100644 (file)

index 1299f19..ea49d08
--- a/nlp_resource_data/nltk/misc/minimalset.py
+++ b/nlp_resource_data/nltk/misc/minimalset.py
@@ -1,12 +1,13 @@
  # Natural Language Toolkit: Minimal Sets
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
  
  from collections import defaultdict
  
+
  class MinimalSet(object):
      """
      Find contexts where more than one possible target value can
@@ -17,6 +18,7 @@ class MinimalSet(object):
      cases like wind (noun) 'air in rapid motion', vs wind (verb)
      'coil, wrap'.
      """
+
      def __init__(self, parameters=None):
          """
          Create a new minimal set.
@@ -25,9 +27,9 @@ class MinimalSet(object):
          :type parameters: list(tuple(str, str, str))
          """
          self._targets = set()  # the contrastive information
-        self._contexts = set() # what we are controlling for
+        self._contexts = set()  # what we are controlling for
          self._seen = defaultdict(set)  # to record what we have seen
-        self._displays = {}    # what we will display
+        self._displays = {}  # what we will display
  
          if parameters:
              for context, target, display in parameters:
@@ -75,9 +77,9 @@ class MinimalSet(object):
          result = []
          for target in self._targets:
              x = self.display(context, target)
-            if x: result.append(x)
+            if x:
+                result.append(x)
          return result
  
      def targets(self):
          return self._targets
-
diff --git a/nlp_resource_data/nltk/misc/minimalset.pyc b/nlp_resource_data/nltk/misc/minimalset.pyc

deleted file mode 100755 (executable)

index 35253fa..0000000

Binary files a/nlp_resource_data/nltk/misc/minimalset.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/misc/sort.py b/nlp_resource_data/nltk/misc/sort.py

old mode 100755 (executable)

new mode 100644 (file)

index 8e1dd38..0dbaf99
--- a/nlp_resource_data/nltk/misc/sort.py
+++ b/nlp_resource_data/nltk/misc/sort.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: List Sorting
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -10,8 +10,6 @@ This module provides a variety of list sorting algorithms, to
  illustrate the many different algorithms (recipes) for solving a
  problem, and how to analyze algorithms experimentally.
  """
-from __future__ import print_function, division
-
  # These algorithms are taken from:
  # Levitin (2004) The Design and Analysis of Algorithms
  
@@ -19,6 +17,7 @@ from __future__ import print_function, division
  # Selection Sort
  ##################################################################
  
+
  def selection(a):
      """
      Selection Sort: scan the list to find its smallest element, then
@@ -30,20 +29,22 @@ def selection(a):
      for i in range(len(a) - 1):
          min = i
  
-        for j in range(i+1, len(a)):
+        for j in range(i + 1, len(a)):
              if a[j] < a[min]:
                  min = j
  
              count += 1
  
-        a[min],a[i] = a[i],a[min]
+        a[min], a[i] = a[i], a[min]
  
      return count
  
+
  ##################################################################
  # Bubble Sort
  ##################################################################
  
+
  def bubble(a):
      """
      Bubble Sort: compare adjacent elements of the list left-to-right,
@@ -53,10 +54,10 @@ def bubble(a):
      apply the same method to this list, and so on.
      """
      count = 0
-    for i in range(len(a)-1):
-        for j in range(len(a)-i-1):
-            if a[j+1] < a[j]:
-                a[j],a[j+1] = a[j+1],a[j]
+    for i in range(len(a) - 1):
+        for j in range(len(a) - i - 1):
+            if a[j + 1] < a[j]:
+                a[j], a[j + 1] = a[j + 1], a[j]
                  count += 1
      return count
  
@@ -65,11 +66,12 @@ def bubble(a):
  # Merge Sort
  ##################################################################
  
+
  def _merge_lists(b, c):
      count = 0
      i = j = 0
      a = []
-    while (i < len(b) and j < len(c)):
+    while i < len(b) and j < len(c):
          count += 1
          if b[i] <= c[j]:
              a.append(b[i])
@@ -83,6 +85,7 @@ def _merge_lists(b, c):
          a += b[i:]
      return a, count
  
+
  def merge(a):
      """
      Merge Sort: split the list in half, and sort each half, then
@@ -96,46 +99,57 @@ def merge(a):
          count_b = merge(b)
          count_c = merge(c)
          result, count_a = _merge_lists(b, c)
-        a[:] = result # copy the result back into a.
+        a[:] = result  # copy the result back into a.
          count = count_a + count_b + count_c
      return count
  
+
  ##################################################################
  # Quick Sort
  ##################################################################
  
+
  def _partition(a, l, r):
-    p = a[l]; i = l; j = r+1
+    p = a[l]
+    i = l
+    j = r + 1
      count = 0
      while True:
          while i < r:
              i += 1
-            if a[i] >= p: break
+            if a[i] >= p:
+                break
          while j > l:
              j -= 1
-            if j < l or a[j] <= p: break
-        a[i],a[j] = a[j],a[i]               # swap
+            if j < l or a[j] <= p:
+                break
+        a[i], a[j] = a[j], a[i]  # swap
          count += 1
-        if i >= j: break
-    a[i],a[j] = a[j],a[i]                   # undo last swap
-    a[l],a[j] = a[j],a[l]
+        if i >= j:
+            break
+    a[i], a[j] = a[j], a[i]  # undo last swap
+    a[l], a[j] = a[j], a[l]
      return j, count
  
+
  def _quick(a, l, r):
      count = 0
-    if l<r:
+    if l < r:
          s, count = _partition(a, l, r)
-        count += _quick(a, l, s-1)
-        count += _quick(a, s+1, r)
+        count += _quick(a, l, s - 1)
+        count += _quick(a, s + 1, r)
      return count
  
+
  def quick(a):
-    return _quick(a, 0, len(a)-1)
+    return _quick(a, 0, len(a) - 1)
+
  
  ##################################################################
  # Demonstration
  ##################################################################
  
+
  def demo():
      from random import shuffle
  
@@ -143,15 +157,22 @@ def demo():
          a = list(range(size))
  
          # various sort methods
-        shuffle(a); count_selection = selection(a)
-        shuffle(a); count_bubble    = bubble(a)
-        shuffle(a); count_merge     = merge(a)
-        shuffle(a); count_quick     = quick(a)
-
-        print((("size=%5d:  selection=%8d,  bubble=%8d,  "
-                "merge=%6d,  quick=%6d") %
-               (size, count_selection, count_bubble,
-                count_merge, count_quick)))
-
-if __name__ == '__main__':
+        shuffle(a)
+        count_selection = selection(a)
+        shuffle(a)
+        count_bubble = bubble(a)
+        shuffle(a)
+        count_merge = merge(a)
+        shuffle(a)
+        count_quick = quick(a)
+
+        print(
+            (
+                ("size=%5d:  selection=%8d,  bubble=%8d,  " "merge=%6d,  quick=%6d")
+                % (size, count_selection, count_bubble, count_merge, count_quick)
+            )
+        )
+
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/misc/sort.pyc b/nlp_resource_data/nltk/misc/sort.pyc

deleted file mode 100755 (executable)

index ed561a1..0000000

Binary files a/nlp_resource_data/nltk/misc/sort.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/misc/wordfinder.py b/nlp_resource_data/nltk/misc/wordfinder.py

old mode 100755 (executable)

new mode 100644 (file)

index c2d5449..4514f62
--- a/nlp_resource_data/nltk/misc/wordfinder.py
+++ b/nlp_resource_data/nltk/misc/wordfinder.py
@@ -1,23 +1,23 @@
  # Natural Language Toolkit: Word Finder
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  # Simplified from PHP version by Robert Klein <brathna@gmail.com>
  # http://fswordfinder.sourceforge.net/
-from __future__ import print_function
  
  import random
  
  
  # reverse a word with probability 0.5
  def revword(word):
-    if random.randint(1,2) == 1:
+    if random.randint(1, 2) == 1:
          return word[::-1]
      return word
  
+
  # try to insert word at position x,y; direction encoded in xf,yf
  def step(word, x, xf, y, yf, grid):
      for i in range(len(word)):
@@ -27,27 +27,28 @@ def step(word, x, xf, y, yf, grid):
          grid[xf(i)][yf(i)] = word[i]
      return True
  
+
  # try to insert word at position x,y, in direction dir
  def check(word, dir, x, y, grid, rows, cols):
-    if dir==1:
-        if x-len(word)<0 or y-len(word)<0:
+    if dir == 1:
+        if x - len(word) < 0 or y - len(word) < 0:
              return False
-        return step(word, x, lambda i:x-i, y, lambda i:y-i, grid)
-    elif dir==2:
-        if x-len(word)<0:
+        return step(word, x, lambda i: x - i, y, lambda i: y - i, grid)
+    elif dir == 2:
+        if x - len(word) < 0:
              return False
-        return step(word, x, lambda i:x-i, y, lambda i:y, grid)
-    elif dir==3:
-        if x-len(word)<0 or y+(len(word)-1)>=cols:
+        return step(word, x, lambda i: x - i, y, lambda i: y, grid)
+    elif dir == 3:
+        if x - len(word) < 0 or y + (len(word) - 1) >= cols:
              return False
-        return step(word, x, lambda i:x-i, y, lambda i:y+i, grid)
-    elif dir==4:
-        if y-len(word)<0:
+        return step(word, x, lambda i: x - i, y, lambda i: y + i, grid)
+    elif dir == 4:
+        if y - len(word) < 0:
              return False
-        return step(word, x, lambda i:x, y, lambda i:y-i, grid)
+        return step(word, x, lambda i: x, y, lambda i: y - i, grid)
+
  
-def wordfinder(words, rows=20, cols=20, attempts=50,
-               alph='ABCDEFGHIJKLMNOPQRSTUVWXYZ'):
+def wordfinder(words, rows=20, cols=20, attempts=50, alph="ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
      """
      Attempt to arrange words into a letter-grid with the specified
      number of rows and columns.  Try each word in several positions
@@ -82,33 +83,41 @@ def wordfinder(words, rows=20, cols=20, attempts=50,
      # try to place each word
      for word in words:
          word = word.strip().upper()  # normalize
-        save = word                  # keep a record of the word
+        save = word  # keep a record of the word
          word = revword(word)
          for attempt in range(attempts):
              r = random.randint(0, len(word))
-            dir = random.choice([1,2,3,4])
-            x = random.randint(0,rows)
-            y = random.randint(0,cols)
-            if   dir==1: x+=r; y+=r
-            elif dir==2: x+=r
-            elif dir==3: x+=r; y-=r
-            elif dir==4: y+=r
-            if 0<=x<rows and 0<=y<cols:
+            dir = random.choice([1, 2, 3, 4])
+            x = random.randint(0, rows)
+            y = random.randint(0, cols)
+            if dir == 1:
+                x += r
+                y += r
+            elif dir == 2:
+                x += r
+            elif dir == 3:
+                x += r
+                y -= r
+            elif dir == 4:
+                y += r
+            if 0 <= x < rows and 0 <= y < cols:
                  if check(word, dir, x, y, grid, rows, cols):
-#                   used.append((save, dir, x, y, word))
+                    #                   used.append((save, dir, x, y, word))
                      used.append(save)
                      break
  
      # Fill up the remaining spaces
      for i in range(rows):
          for j in range(cols):
-            if grid[i][j] == '':
+            if grid[i][j] == "":
                  grid[i][j] = random.choice(alph)
  
      return grid, used
  
+
  def word_finder():
      from nltk.corpus import words
+
      wordlist = words.words()
      random.shuffle(wordlist)
      wordlist = wordlist[:200]
@@ -118,12 +127,13 @@ def word_finder():
      print("Word Finder\n")
      for i in range(len(grid)):
          for j in range(len(grid[i])):
-            print(grid[i][j], end=' ')
+            print(grid[i][j], end=" ")
          print()
      print()
  
      for i in range(len(used)):
-        print("%d:" % (i+1), used[i])
+        print("%d:" % (i + 1), used[i])
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      word_finder()
diff --git a/nlp_resource_data/nltk/misc/wordfinder.pyc b/nlp_resource_data/nltk/misc/wordfinder.pyc

deleted file mode 100755 (executable)

index 5f2b23c..0000000

Binary files a/nlp_resource_data/nltk/misc/wordfinder.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/__init__.py b/nlp_resource_data/nltk/parse/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 66441de..e4f0f15
--- a/nlp_resource_data/nltk/parse/__init__.py
+++ b/nlp_resource_data/nltk/parse/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Parsers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -44,36 +44,57 @@ three sub-modules for specialized kinds of parsing:
  """
  
  from nltk.parse.api import ParserI
-from nltk.parse.chart import (ChartParser, SteppingChartParser, TopDownChartParser,
-                              BottomUpChartParser, BottomUpLeftCornerChartParser,
-                              LeftCornerChartParser)
-from nltk.parse.featurechart import (FeatureChartParser, FeatureTopDownChartParser,
-                                     FeatureBottomUpChartParser,
-                                     FeatureBottomUpLeftCornerChartParser)
-from nltk.parse.earleychart import (IncrementalChartParser, EarleyChartParser,
-                                    IncrementalTopDownChartParser,
-                                    IncrementalBottomUpChartParser,
-                                    IncrementalBottomUpLeftCornerChartParser,
-                                    IncrementalLeftCornerChartParser,
-                                    FeatureIncrementalChartParser,
-                                    FeatureEarleyChartParser,
-                                    FeatureIncrementalTopDownChartParser,
-                                    FeatureIncrementalBottomUpChartParser,
-                                    FeatureIncrementalBottomUpLeftCornerChartParser)
-from nltk.parse.pchart import (BottomUpProbabilisticChartParser, InsideChartParser,
-                               RandomChartParser, UnsortedChartParser,
-                               LongestChartParser)
-from nltk.parse.recursivedescent import (RecursiveDescentParser,
-                                         SteppingRecursiveDescentParser)
-from nltk.parse.shiftreduce import (ShiftReduceParser, SteppingShiftReduceParser)
+from nltk.parse.chart import (
+    ChartParser,
+    SteppingChartParser,
+    TopDownChartParser,
+    BottomUpChartParser,
+    BottomUpLeftCornerChartParser,
+    LeftCornerChartParser,
+)
+from nltk.parse.featurechart import (
+    FeatureChartParser,
+    FeatureTopDownChartParser,
+    FeatureBottomUpChartParser,
+    FeatureBottomUpLeftCornerChartParser,
+)
+from nltk.parse.earleychart import (
+    IncrementalChartParser,
+    EarleyChartParser,
+    IncrementalTopDownChartParser,
+    IncrementalBottomUpChartParser,
+    IncrementalBottomUpLeftCornerChartParser,
+    IncrementalLeftCornerChartParser,
+    FeatureIncrementalChartParser,
+    FeatureEarleyChartParser,
+    FeatureIncrementalTopDownChartParser,
+    FeatureIncrementalBottomUpChartParser,
+    FeatureIncrementalBottomUpLeftCornerChartParser,
+)
+from nltk.parse.pchart import (
+    BottomUpProbabilisticChartParser,
+    InsideChartParser,
+    RandomChartParser,
+    UnsortedChartParser,
+    LongestChartParser,
+)
+from nltk.parse.recursivedescent import (
+    RecursiveDescentParser,
+    SteppingRecursiveDescentParser,
+)
+from nltk.parse.shiftreduce import ShiftReduceParser, SteppingShiftReduceParser
  from nltk.parse.util import load_parser, TestGrammar, extract_test_sentences
  from nltk.parse.viterbi import ViterbiParser
  from nltk.parse.dependencygraph import DependencyGraph
-from nltk.parse.projectivedependencyparser import (ProjectiveDependencyParser,
-                                                   ProbabilisticProjectiveDependencyParser)
-from nltk.parse.nonprojectivedependencyparser import (NonprojectiveDependencyParser,
-                                                      NaiveBayesDependencyScorer,
-                                                      ProbabilisticNonprojectiveParser)
+from nltk.parse.projectivedependencyparser import (
+    ProjectiveDependencyParser,
+    ProbabilisticProjectiveDependencyParser,
+)
+from nltk.parse.nonprojectivedependencyparser import (
+    NonprojectiveDependencyParser,
+    NaiveBayesDependencyScorer,
+    ProbabilisticNonprojectiveParser,
+)
  from nltk.parse.malt import MaltParser
  from nltk.parse.evaluate import DependencyEvaluator
  from nltk.parse.transitionparser import TransitionParser
diff --git a/nlp_resource_data/nltk/parse/__init__.pyc b/nlp_resource_data/nltk/parse/__init__.pyc

deleted file mode 100755 (executable)

index 18bb47c..0000000

Binary files a/nlp_resource_data/nltk/parse/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f0b4503

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2c6c601

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/bllip.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/bllip.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6edee04

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/bllip.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/chart.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/chart.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2aab3d0

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/chart.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/corenlp.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/corenlp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f706372

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/corenlp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/dependencygraph.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/dependencygraph.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a8d4eaa

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/dependencygraph.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/earleychart.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/earleychart.cpython-37.pyc

new file mode 100644 (file)

index 0000000..19306a9

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/earleychart.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/evaluate.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/evaluate.cpython-37.pyc

new file mode 100644 (file)

index 0000000..58e00bf

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/evaluate.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/featurechart.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/featurechart.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8cdc116

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/featurechart.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/generate.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/generate.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e619604

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/generate.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/malt.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/malt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6dcd31b

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/malt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6d3072d

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/pchart.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/pchart.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e9a5817

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/pchart.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc

new file mode 100644 (file)

index 0000000..08e4a80

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/recursivedescent.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/recursivedescent.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b64a4db

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/recursivedescent.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc

new file mode 100644 (file)

index 0000000..93f7265

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/stanford.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/stanford.cpython-37.pyc

new file mode 100644 (file)

index 0000000..255f7bd

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/stanford.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/transitionparser.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/transitionparser.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7b8d53b

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/transitionparser.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a5cff93

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/__pycache__/viterbi.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/viterbi.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8246ca7

Binary files /dev/null and b/nlp_resource_data/nltk/parse/__pycache__/viterbi.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/parse/api.py b/nlp_resource_data/nltk/parse/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 6ddd9aa..2a12adc
--- a/nlp_resource_data/nltk/parse/api.py
+++ b/nlp_resource_data/nltk/parse/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Parser API
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -11,6 +11,7 @@ import itertools
  
  from nltk.internals import overridden
  
+
  class ParserI(object):
      """
      A processing class for deriving trees that represent possible
@@ -26,6 +27,7 @@ class ParserI(object):
      Subclasses may define:
        - ``grammar()``
      """
+
      def grammar(self):
          """
          :return: The grammar used by this parser.
@@ -44,7 +46,11 @@ class ParserI(object):
          if overridden(self.parse_sents):
              return next(self.parse_sents([sent], *args, **kwargs))
          elif overridden(self.parse_one):
-            return (tree for tree in [self.parse_one(sent, *args, **kwargs)] if tree is not None)
+            return (
+                tree
+                for tree in [self.parse_one(sent, *args, **kwargs)]
+                if tree is not None
+            )
          elif overridden(self.parse_all):
              return iter(self.parse_all(sent, *args, **kwargs))
          else:
diff --git a/nlp_resource_data/nltk/parse/api.pyc b/nlp_resource_data/nltk/parse/api.pyc

deleted file mode 100755 (executable)

index 886afbb..0000000

Binary files a/nlp_resource_data/nltk/parse/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/bllip.py b/nlp_resource_data/nltk/parse/bllip.py

old mode 100755 (executable)

new mode 100644 (file)

index 06d0051..144f040
--- a/nlp_resource_data/nltk/parse/bllip.py
+++ b/nlp_resource_data/nltk/parse/bllip.py
@@ -2,12 +2,10 @@
  #
  # Author: David McClosky <dmcc@bigasterisk.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import print_function
-
  from nltk.parse.api import ParserI
  from nltk.tree import Tree
  
@@ -81,7 +79,7 @@ See http://pypi.python.org/pypi/bllipparser/ for more information
  on BLLIP Parser's Python interface.
  """
  
-__all__ = ['BllipParser']
+__all__ = ["BllipParser"]
  
  # this block allows this module to be imported even if bllipparser isn't
  # available
@@ -91,31 +89,44 @@ try:
  
      def _ensure_bllip_import_or_error():
          pass
+
+
  except ImportError as ie:
+
      def _ensure_bllip_import_or_error(ie=ie):
          raise ImportError("Couldn't import bllipparser module: %s" % ie)
  
+
  def _ensure_ascii(words):
      try:
          for i, word in enumerate(words):
-            word.decode('ascii')
+            word.decode("ascii")
      except UnicodeDecodeError:
-        raise ValueError("Token %d (%r) is non-ASCII. BLLIP Parser "
-                         "currently doesn't support non-ASCII inputs." %
-                         (i, word))
+        raise ValueError(
+            "Token %d (%r) is non-ASCII. BLLIP Parser "
+            "currently doesn't support non-ASCII inputs." % (i, word)
+        )
+
  
  def _scored_parse_to_nltk_tree(scored_parse):
      return Tree.fromstring(str(scored_parse.ptb_parse))
  
+
  class BllipParser(ParserI):
      """
      Interface for parsing with BLLIP Parser. BllipParser objects can be
      constructed with the ``BllipParser.from_unified_model_dir`` class
      method or manually using the ``BllipParser`` constructor.
      """
-    def __init__(self, parser_model=None, reranker_features=None,
-                 reranker_weights=None, parser_options=None,
-                 reranker_options=None):
+
+    def __init__(
+        self,
+        parser_model=None,
+        reranker_features=None,
+        reranker_weights=None,
+        parser_options=None,
+        reranker_options=None,
+    ):
          """
          Load a BLLIP Parser model from scratch. You'll typically want to
          use the ``from_unified_model_dir()`` class method to construct
@@ -149,9 +160,11 @@ class BllipParser(ParserI):
          self.rrp = RerankingParser()
          self.rrp.load_parser_model(parser_model, **parser_options)
          if reranker_features and reranker_weights:
-            self.rrp.load_reranker_model(features_filename=reranker_features,
-                                         weights_filename=reranker_weights,
-                                         **reranker_options)
+            self.rrp.load_reranker_model(
+                features_filename=reranker_features,
+                weights_filename=reranker_weights,
+                **reranker_options
+            )
  
      def parse(self, sentence):
          """
@@ -200,8 +213,9 @@ class BllipParser(ParserI):
              yield _scored_parse_to_nltk_tree(scored_parse)
  
      @classmethod
-    def from_unified_model_dir(this_class, model_dir, parser_options=None,
-                               reranker_options=None):
+    def from_unified_model_dir(
+        cls, model_dir, parser_options=None, reranker_options=None
+    ):
          """
          Create a ``BllipParser`` object from a unified parsing model
          directory. Unified parsing model directories are a standardized
@@ -224,11 +238,19 @@ class BllipParser(ParserI):
          :type reranker_options: dict(str)
          :rtype: BllipParser
          """
-        (parser_model_dir, reranker_features_filename,
-         reranker_weights_filename) = get_unified_model_parameters(model_dir)
-        return this_class(parser_model_dir, reranker_features_filename,
-                          reranker_weights_filename, parser_options,
-                          reranker_options)
+        (
+            parser_model_dir,
+            reranker_features_filename,
+            reranker_weights_filename,
+        ) = get_unified_model_parameters(model_dir)
+        return cls(
+            parser_model_dir,
+            reranker_features_filename,
+            reranker_weights_filename,
+            parser_options,
+            reranker_options,
+        )
+
  
  def demo():
      """This assumes the Python module bllipparser is installed."""
@@ -237,19 +259,20 @@ def demo():
      # sudo python -m nltk.downloader bllip_wsj_no_aux
  
      from nltk.data import find
-    model_dir = find('models/bllip_wsj_no_aux').path
  
-    print('Loading BLLIP Parsing models...')
+    model_dir = find("models/bllip_wsj_no_aux").path
+
+    print("Loading BLLIP Parsing models...")
      # the easiest way to get started is to use a unified model
      bllip = BllipParser.from_unified_model_dir(model_dir)
-    print('Done.')
+    print("Done.")
  
-    sentence1 = 'British left waffles on Falklands .'.split()
-    sentence2 = 'I saw the man with the telescope .'.split()
+    sentence1 = "British left waffles on Falklands .".split()
+    sentence2 = "I saw the man with the telescope .".split()
      # this sentence is known to fail under the WSJ parsing model
-    fail1 = '# ! ? : -'.split()
+    fail1 = "# ! ? : -".split()
      for sentence in (sentence1, sentence2, fail1):
-        print('Sentence: %r' % ' '.join(sentence))
+        print("Sentence: %r" % " ".join(sentence))
          try:
              tree = next(bllip.parse(sentence))
              print(tree)
@@ -258,17 +281,24 @@ def demo():
  
      # n-best parsing demo
      for i, parse in enumerate(bllip.parse(sentence1)):
-        print('parse %d:\n%s' % (i, parse))
+        print("parse %d:\n%s" % (i, parse))
  
      # using external POS tag constraints
-    print("forcing 'tree' to be 'NN':",
-          next(bllip.tagged_parse([('A', None), ('tree', 'NN')])))
-    print("forcing 'A' to be 'DT' and 'tree' to be 'NNP':",
-          next(bllip.tagged_parse([('A', 'DT'), ('tree', 'NNP')])))
+    print(
+        "forcing 'tree' to be 'NN':",
+        next(bllip.tagged_parse([("A", None), ("tree", "NN")])),
+    )
+    print(
+        "forcing 'A' to be 'DT' and 'tree' to be 'NNP':",
+        next(bllip.tagged_parse([("A", "DT"), ("tree", "NNP")])),
+    )
      # constraints don't have to make sense... (though on more complicated
      # sentences, they may cause the parse to fail)
-    print("forcing 'A' to be 'NNP':",
-          next(bllip.tagged_parse([('A', 'NNP'), ('tree', None)])))
+    print(
+        "forcing 'A' to be 'NNP':",
+        next(bllip.tagged_parse([("A", "NNP"), ("tree", None)])),
+    )
+
  
  def setup_module(module):
      from nose import SkipTest
@@ -276,7 +306,7 @@ def setup_module(module):
      try:
          _ensure_bllip_import_or_error()
      except ImportError:
-        raise SkipTest('doctests from nltk.parse.bllip are skipped because '
-                       'the bllipparser module is not installed')
-
-
+        raise SkipTest(
+            "doctests from nltk.parse.bllip are skipped because "
+            "the bllipparser module is not installed"
+        )
diff --git a/nlp_resource_data/nltk/parse/bllip.pyc b/nlp_resource_data/nltk/parse/bllip.pyc

deleted file mode 100755 (executable)

index 50d5d0c..0000000

Binary files a/nlp_resource_data/nltk/parse/bllip.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/chart.py b/nlp_resource_data/nltk/parse/chart.py

old mode 100755 (executable)

new mode 100644 (file)

index 79c3bd4..dffd644
--- a/nlp_resource_data/nltk/parse/chart.py
+++ b/nlp_resource_data/nltk/parse/chart.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: A Chart Parser
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  #         Jean Mark Gawron <gawron@mail.sdsu.edu>
@@ -35,20 +35,16 @@ defines three chart parsers:
    - ``SteppingChartParser`` is a subclass of ``ChartParser`` that can
      be used to step through the parsing process.
  """
-from __future__ import print_function, division, unicode_literals
  
  import itertools
  import re
  import warnings
  from functools import total_ordering
  
-from six.moves import range
-
  from nltk.tree import Tree
  from nltk.grammar import PCFG, is_nonterminal, is_terminal
  from nltk.util import OrderedDict
  from nltk.internals import raise_unorderable_types
-from nltk.compat import python_2_unicode_compatible, unicode_repr
  
  from nltk.parse.api import ParserI
  
@@ -57,6 +53,7 @@ from nltk.parse.api import ParserI
  ##  Edges
  ########################################################################
  
+
  @total_ordering
  class EdgeI(object):
      """
@@ -90,13 +87,14 @@ class EdgeI(object):
      The ``EdgeI`` interface provides a common interface to both types
      of edge, allowing chart parsers to treat them in a uniform manner.
      """
+
      def __init__(self):
          if self.__class__ == EdgeI:
-            raise TypeError('Edge is an abstract interface')
+            raise TypeError("Edge is an abstract interface")
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Span
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def span(self):
          """
@@ -132,9 +130,9 @@ class EdgeI(object):
          """
          raise NotImplementedError()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Left Hand Side
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def lhs(self):
          """
@@ -146,9 +144,9 @@ class EdgeI(object):
          """
          raise NotImplementedError()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Right Hand Side
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def rhs(self):
          """
@@ -198,13 +196,15 @@ class EdgeI(object):
          """
          raise NotImplementedError()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Comparisons & hashing
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def __eq__(self, other):
-        return (self.__class__ is other.__class__ and
-                self._comparison_key == other._comparison_key)
+        return (
+            self.__class__ is other.__class__
+            and self._comparison_key == other._comparison_key
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -225,7 +225,6 @@ class EdgeI(object):
              return self._hash
  
  
-@python_2_unicode_compatible
  class TreeEdge(EdgeI):
      """
      An edge that records the fact that a tree is (partially)
@@ -249,6 +248,7 @@ class TreeEdge(EdgeI):
  
      For more information about edges, see the ``EdgeI`` interface.
      """
+
      def __init__(self, span, lhs, rhs, dot=0):
          """
          Construct a new ``TreeEdge``.
@@ -288,8 +288,9 @@ class TreeEdge(EdgeI):
  
          :rtype: TreeEdge
          """
-        return TreeEdge(span=(index, index), lhs=production.lhs(),
-                        rhs=production.rhs(), dot=0)
+        return TreeEdge(
+            span=(index, index), lhs=production.lhs(), rhs=production.rhs(), dot=0
+        )
  
      def move_dot_forward(self, new_end):
          """
@@ -301,40 +302,64 @@ class TreeEdge(EdgeI):
          :type new_end: int
          :rtype: TreeEdge
          """
-        return TreeEdge(span=(self._span[0], new_end),
-                        lhs=self._lhs, rhs=self._rhs,
-                        dot=self._dot+1)
+        return TreeEdge(
+            span=(self._span[0], new_end),
+            lhs=self._lhs,
+            rhs=self._rhs,
+            dot=self._dot + 1,
+        )
  
      # Accessors
-    def lhs(self): return self._lhs
-    def span(self): return self._span
-    def start(self): return self._span[0]
-    def end(self): return self._span[1]
-    def length(self): return self._span[1] - self._span[0]
-    def rhs(self): return self._rhs
-    def dot(self): return self._dot
-    def is_complete(self): return self._dot == len(self._rhs)
-    def is_incomplete(self): return self._dot != len(self._rhs)
+    def lhs(self):
+        return self._lhs
+
+    def span(self):
+        return self._span
+
+    def start(self):
+        return self._span[0]
+
+    def end(self):
+        return self._span[1]
+
+    def length(self):
+        return self._span[1] - self._span[0]
+
+    def rhs(self):
+        return self._rhs
+
+    def dot(self):
+        return self._dot
+
+    def is_complete(self):
+        return self._dot == len(self._rhs)
+
+    def is_incomplete(self):
+        return self._dot != len(self._rhs)
+
      def nextsym(self):
-        if self._dot >= len(self._rhs): return None
-        else: return self._rhs[self._dot]
+        if self._dot >= len(self._rhs):
+            return None
+        else:
+            return self._rhs[self._dot]
  
      # String representation
      def __str__(self):
-        str = '[%s:%s] ' % (self._span[0], self._span[1])
-        str += '%-2r ->' % (self._lhs,)
+        str = "[%s:%s] " % (self._span[0], self._span[1])
+        str += "%-2r ->" % (self._lhs,)
  
          for i in range(len(self._rhs)):
-            if i == self._dot: str += ' *'
-            str += ' %s' % unicode_repr(self._rhs[i])
-        if len(self._rhs) == self._dot: str += ' *'
+            if i == self._dot:
+                str += " *"
+            str += " %s" % repr(self._rhs[i])
+        if len(self._rhs) == self._dot:
+            str += " *"
          return str
  
      def __repr__(self):
-        return '[Edge: %s]' % self
+        return "[Edge: %s]" % self
  
  
-@python_2_unicode_compatible
  class LeafEdge(EdgeI):
      """
      An edge that records the fact that a leaf value is consistent with
@@ -347,6 +372,7 @@ class LeafEdge(EdgeI):
      side is ``()``.  Its span is ``[index, index+1]``, and its dot
      position is ``0``.
      """
+
      def __init__(self, leaf, index):
          """
          Construct a new ``LeafEdge``.
@@ -361,27 +387,49 @@ class LeafEdge(EdgeI):
          self._comparison_key = (leaf, index)
  
      # Accessors
-    def lhs(self): return self._leaf
-    def span(self): return (self._index, self._index+1)
-    def start(self): return self._index
-    def end(self): return self._index+1
-    def length(self): return 1
-    def rhs(self): return ()
-    def dot(self): return 0
-    def is_complete(self): return True
-    def is_incomplete(self): return False
-    def nextsym(self): return None
+    def lhs(self):
+        return self._leaf
+
+    def span(self):
+        return (self._index, self._index + 1)
+
+    def start(self):
+        return self._index
+
+    def end(self):
+        return self._index + 1
+
+    def length(self):
+        return 1
+
+    def rhs(self):
+        return ()
+
+    def dot(self):
+        return 0
+
+    def is_complete(self):
+        return True
+
+    def is_incomplete(self):
+        return False
+
+    def nextsym(self):
+        return None
  
      # String representations
      def __str__(self):
-        return '[%s:%s] %s' % (self._index, self._index+1, unicode_repr(self._leaf))
+        return "[%s:%s] %s" % (self._index, self._index + 1, repr(self._leaf))
+
      def __repr__(self):
-        return '[Edge: %s]' % (self)
+        return "[Edge: %s]" % (self)
+
  
  ########################################################################
  ##  Chart
  ########################################################################
  
+
  class Chart(object):
      """
      A blackboard for hypotheses about the syntactic constituents of a
@@ -410,6 +458,7 @@ class Chart(object):
          to indices, where each index maps the corresponding edge
          attribute values to lists of edges.
      """
+
      def __init__(self, tokens):
          """
          Construct a new chart. The chart is initialized with the
@@ -439,9 +488,9 @@ class Chart(object):
          # (used by select()).
          self._indexes = {}
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Sentence Access
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def num_leaves(self):
          """
@@ -468,9 +517,9 @@ class Chart(object):
          """
          return self._tokens
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Edge access
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def edges(self):
          """
@@ -529,7 +578,8 @@ class Chart(object):
          :rtype: iter(EdgeI)
          """
          # If there are no restrictions, then return all edges.
-        if restrictions=={}: return iter(self._edges)
+        if restrictions == {}:
+            return iter(self._edges)
  
          # Find the index corresponding to the given restrictions.
          restr_keys = sorted(restrictions.keys())
@@ -550,7 +600,7 @@ class Chart(object):
          # Make sure it's a valid index.
          for key in restr_keys:
              if not hasattr(EdgeI, key):
-                raise ValueError('Bad restriction: %s' % key)
+                raise ValueError("Bad restriction: %s" % key)
  
          # Create the index.
          index = self._indexes[restr_keys] = {}
@@ -569,16 +619,16 @@ class Chart(object):
              vals = tuple(getattr(edge, key)() for key in restr_keys)
              index.setdefault(vals, []).append(edge)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Edge Insertion
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def insert_with_backpointer(self, new_edge, previous_edge, child_edge):
          """
          Add a new edge to the chart, using a pointer to the previous edge.
          """
          cpls = self.child_pointer_lists(previous_edge)
-        new_cpls = [cpl+(child_edge,) for cpl in cpls]
+        new_cpls = [cpl + (child_edge,) for cpl in cpls]
          return self.insert(new_edge, *new_cpls)
  
      def insert(self, edge, *child_pointer_lists):
@@ -617,9 +667,9 @@ class Chart(object):
      def _append_edge(self, edge):
          self._edges.append(edge)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Tree extraction & child pointer lists
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def parses(self, root, tree_class=Tree):
          """
@@ -683,8 +733,7 @@ class Chart(object):
              # Get the set of child choices for each child pointer.
              # child_choices[i] is the set of choices for the tree's
              # ith child.
-            child_choices = [self._trees(cp, complete, memo, tree_class)
-                             for cp in cpl]
+            child_choices = [self._trees(cp, complete, memo, tree_class) for cp in cpl]
  
              # For each combination of children, add a tree.
              for children in itertools.product(*child_choices):
@@ -692,8 +741,7 @@ class Chart(object):
  
          # If the edge is incomplete, then extend it with "partial trees":
          if edge.is_incomplete():
-            unexpanded = [tree_class(elt,[])
-                          for elt in edge.rhs()[edge.dot():]]
+            unexpanded = [tree_class(elt, []) for elt in edge.rhs()[edge.dot() :]]
              for tree in trees:
                  tree.extend(unexpanded)
  
@@ -714,9 +762,9 @@ class Chart(object):
          # Make a copy, in case they modify it.
          return self._edge_to_cpls.get(edge, {}).keys()
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Display
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      def pretty_format_edge(self, edge, width=None):
          """
          Return a pretty-printed string representation of a given edge
@@ -726,27 +774,30 @@ class Chart(object):
          :param width: The number of characters allotted to each
              index in the sentence.
          """
-        if width is None: width = 50 // (self.num_leaves()+1)
+        if width is None:
+            width = 50 // (self.num_leaves() + 1)
          (start, end) = (edge.start(), edge.end())
  
-        str = '|' + ('.'+' '*(width-1))*start
+        str = "|" + ("." + " " * (width - 1)) * start
  
          # Zero-width edges are "#" if complete, ">" if incomplete
          if start == end:
-            if edge.is_complete(): str += '#'
-            else: str += '>'
+            if edge.is_complete():
+                str += "#"
+            else:
+                str += ">"
  
          # Spanning complete edges are "[===]"; Other edges are
          # "[---]" if complete, "[--->" if incomplete
-        elif edge.is_complete() and edge.span() == (0,self._num_leaves):
-            str += '['+('='*width)*(end-start-1) + '='*(width-1)+']'
+        elif edge.is_complete() and edge.span() == (0, self._num_leaves):
+            str += "[" + ("=" * width) * (end - start - 1) + "=" * (width - 1) + "]"
          elif edge.is_complete():
-            str += '['+('-'*width)*(end-start-1) + '-'*(width-1)+']'
+            str += "[" + ("-" * width) * (end - start - 1) + "-" * (width - 1) + "]"
          else:
-            str += '['+('-'*width)*(end-start-1) + '-'*(width-1)+'>'
+            str += "[" + ("-" * width) * (end - start - 1) + "-" * (width - 1) + ">"
  
-        str += (' '*(width-1)+'.')*(self._num_leaves-end)
-        return str + '| %s' % edge
+        str += (" " * (width - 1) + ".") * (self._num_leaves - end)
+        return str + "| %s" % edge
  
      def pretty_format_leaves(self, width=None):
          """
@@ -754,15 +805,16 @@ class Chart(object):
          chart's leaves.  This string can be used as a header
          for calls to ``pretty_format_edge``.
          """
-        if width is None: width = 50 // (self.num_leaves()+1)
+        if width is None:
+            width = 50 // (self.num_leaves() + 1)
  
-        if self._tokens is not None and width>1:
-            header = '|.'
+        if self._tokens is not None and width > 1:
+            header = "|."
              for tok in self._tokens:
-                header += tok[:width-1].center(width-1)+'.'
-            header += '|'
+                header += tok[: width - 1].center(width - 1) + "."
+            header += "|"
          else:
-            header = ''
+            header = ""
  
          return header
  
@@ -774,74 +826,95 @@ class Chart(object):
              index in the sentence.
          :rtype: str
          """
-        if width is None: width = 50 // (self.num_leaves()+1)
+        if width is None:
+            width = 50 // (self.num_leaves() + 1)
          # sort edges: primary key=length, secondary key=start index.
          # (and filter out the token edges)
          edges = sorted([(e.length(), e.start(), e) for e in self])
-        edges = [e for (_,_,e) in edges]
+        edges = [e for (_, _, e) in edges]
  
-        return (self.pretty_format_leaves(width) + '\n' +
-                '\n'.join(self.pretty_format_edge(edge, width) for edge in edges))
+        return (
+            self.pretty_format_leaves(width)
+            + "\n"
+            + "\n".join(self.pretty_format_edge(edge, width) for edge in edges)
+        )
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Display: Dot (AT&T Graphviz)
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def dot_digraph(self):
          # Header
-        s = 'digraph nltk_chart {\n'
-        #s += '  size="5,5";\n'
-        s += '  rankdir=LR;\n'
-        s += '  node [height=0.1,width=0.1];\n'
+        s = "digraph nltk_chart {\n"
+        # s += '  size="5,5";\n'
+        s += "  rankdir=LR;\n"
+        s += "  node [height=0.1,width=0.1];\n"
          s += '  node [style=filled, color="lightgray"];\n'
  
          # Set up the nodes
          for y in range(self.num_edges(), -1, -1):
              if y == 0:
                  s += '  node [style=filled, color="black"];\n'
-            for x in range(self.num_leaves()+1):
-                if y == 0 or (x <= self._edges[y-1].start() or
-                              x >= self._edges[y-1].end()):
-                    s += '  %04d.%04d [label=""];\n' % (x,y)
+            for x in range(self.num_leaves() + 1):
+                if y == 0 or (
+                    x <= self._edges[y - 1].start() or x >= self._edges[y - 1].end()
+                ):
+                    s += '  %04d.%04d [label=""];\n' % (x, y)
  
          # Add a spacer
-        s += '  x [style=invis]; x->0000.0000 [style=invis];\n'
+        s += "  x [style=invis]; x->0000.0000 [style=invis];\n"
  
          # Declare ranks.
-        for x in range(self.num_leaves()+1):
-            s += '  {rank=same;'
-            for y in range(self.num_edges()+1):
-                if y == 0 or (x <= self._edges[y-1].start() or
-                              x >= self._edges[y-1].end()):
-                    s += ' %04d.%04d' % (x,y)
-            s += '}\n'
+        for x in range(self.num_leaves() + 1):
+            s += "  {rank=same;"
+            for y in range(self.num_edges() + 1):
+                if y == 0 or (
+                    x <= self._edges[y - 1].start() or x >= self._edges[y - 1].end()
+                ):
+                    s += " %04d.%04d" % (x, y)
+            s += "}\n"
  
          # Add the leaves
-        s += '  edge [style=invis, weight=100];\n'
-        s += '  node [shape=plaintext]\n'
-        s += '  0000.0000'
+        s += "  edge [style=invis, weight=100];\n"
+        s += "  node [shape=plaintext]\n"
+        s += "  0000.0000"
          for x in range(self.num_leaves()):
-            s += '->%s->%04d.0000' % (self.leaf(x), x+1)
-        s += ';\n\n'
+            s += "->%s->%04d.0000" % (self.leaf(x), x + 1)
+        s += ";\n\n"
  
          # Add the edges
-        s += '  edge [style=solid, weight=1];\n'
+        s += "  edge [style=solid, weight=1];\n"
          for y, edge in enumerate(self):
              for x in range(edge.start()):
-                s += ('  %04d.%04d -> %04d.%04d [style="invis"];\n' %
-                      (x, y+1, x+1, y+1))
-            s += ('  %04d.%04d -> %04d.%04d [label="%s"];\n' %
-                  (edge.start(), y+1, edge.end(), y+1, edge))
+                s += '  %04d.%04d -> %04d.%04d [style="invis"];\n' % (
+                    x,
+                    y + 1,
+                    x + 1,
+                    y + 1,
+                )
+            s += '  %04d.%04d -> %04d.%04d [label="%s"];\n' % (
+                edge.start(),
+                y + 1,
+                edge.end(),
+                y + 1,
+                edge,
+            )
              for x in range(edge.end(), self.num_leaves()):
-                s += ('  %04d.%04d -> %04d.%04d [style="invis"];\n' %
-                      (x, y+1, x+1, y+1))
-        s += '}\n'
+                s += '  %04d.%04d -> %04d.%04d [style="invis"];\n' % (
+                    x,
+                    y + 1,
+                    x + 1,
+                    y + 1,
+                )
+        s += "}\n"
          return s
  
+
  ########################################################################
  ##  Chart Rules
  ########################################################################
  
+
  class ChartRuleI(object):
      """
      A rule that specifies what new edges are licensed by any given set
@@ -861,6 +934,7 @@ class ChartRuleI(object):
          to license new edges.  Typically, this number ranges from zero
          to two.
      """
+
      def apply(self, chart, grammar, *edges):
          """
          Return a generator that will add edges licensed by this rule
@@ -888,7 +962,6 @@ class ChartRuleI(object):
          raise NotImplementedError()
  
  
-@python_2_unicode_compatible
  class AbstractChartRule(ChartRuleI):
      """
      An abstract base class for chart rules.  ``AbstractChartRule``
@@ -927,20 +1000,22 @@ class AbstractChartRule(ChartRuleI):
              for e1 in chart:
                  for e2 in chart:
                      for e3 in chart:
-                        for new_edge in self.apply(chart,grammar,e1,e2,e3):
+                        for new_edge in self.apply(chart, grammar, e1, e2, e3):
                              yield new_edge
  
          else:
-            raise AssertionError('NUM_EDGES>3 is not currently supported')
+            raise AssertionError("NUM_EDGES>3 is not currently supported")
  
      # Default: return a name based on the class name.
      def __str__(self):
          # Add spaces between InitialCapsWords.
-        return re.sub('([a-z])([A-Z])', r'\1 \2', self.__class__.__name__)
+        return re.sub("([a-z])([A-Z])", r"\1 \2", self.__class__.__name__)
+
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Fundamental Rule
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class FundamentalRule(AbstractChartRule):
      """
@@ -954,13 +1029,17 @@ class FundamentalRule(AbstractChartRule):
  
      - ``[A -> alpha B * beta][i:j]``
      """
+
      NUM_EDGES = 2
+
      def apply(self, chart, grammar, left_edge, right_edge):
          # Make sure the rule is applicable.
-        if not (left_edge.is_incomplete() and
-                right_edge.is_complete() and
-                left_edge.end() == right_edge.start() and
-                left_edge.nextsym() == right_edge.lhs()):
+        if not (
+            left_edge.is_incomplete()
+            and right_edge.is_complete()
+            and left_edge.end() == right_edge.start()
+            and left_edge.nextsym() == right_edge.lhs()
+        ):
              return
  
          # Construct the new edge.
@@ -970,6 +1049,7 @@ class FundamentalRule(AbstractChartRule):
          if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
              yield new_edge
  
+
  class SingleEdgeFundamentalRule(FundamentalRule):
      """
      A rule that joins a given edge with adjacent edges in the chart,
@@ -988,6 +1068,7 @@ class SingleEdgeFundamentalRule(FundamentalRule):
      :note: This is basically ``FundamentalRule``, with one edge left
          unspecified.
      """
+
      NUM_EDGES = 1
  
      def apply(self, chart, grammar, edge):
@@ -999,36 +1080,41 @@ class SingleEdgeFundamentalRule(FundamentalRule):
                  yield new_edge
  
      def _apply_complete(self, chart, grammar, right_edge):
-        for left_edge in chart.select(end=right_edge.start(),
-                                      is_complete=False,
-                                      nextsym=right_edge.lhs()):
+        for left_edge in chart.select(
+            end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs()
+        ):
              new_edge = left_edge.move_dot_forward(right_edge.end())
              if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
                  yield new_edge
  
      def _apply_incomplete(self, chart, grammar, left_edge):
-        for right_edge in chart.select(start=left_edge.end(),
-                                       is_complete=True,
-                                       lhs=left_edge.nextsym()):
+        for right_edge in chart.select(
+            start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym()
+        ):
              new_edge = left_edge.move_dot_forward(right_edge.end())
              if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
                  yield new_edge
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Inserting Terminal Leafs
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class LeafInitRule(AbstractChartRule):
-    NUM_EDGES=0
+    NUM_EDGES = 0
+
      def apply(self, chart, grammar):
          for index in range(chart.num_leaves()):
              new_edge = LeafEdge(chart.leaf(index), index)
              if chart.insert(new_edge, ()):
                  yield new_edge
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Top-Down Prediction
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class TopDownInitRule(AbstractChartRule):
      """
@@ -1037,13 +1123,16 @@ class TopDownInitRule(AbstractChartRule):
      ``[S -> \* alpha][0:i]`` is licensed for each grammar production
      ``S -> alpha``, where ``S`` is the grammar's start symbol.
      """
+
      NUM_EDGES = 0
+
      def apply(self, chart, grammar):
          for prod in grammar.productions(lhs=grammar.start()):
              new_edge = TreeEdge.from_production(prod, 0)
              if chart.insert(new_edge, ()):
                  yield new_edge
  
+
  class TopDownPredictRule(AbstractChartRule):
      """
      A rule licensing edges corresponding to the grammar productions
@@ -1054,14 +1143,18 @@ class TopDownPredictRule(AbstractChartRule):
  
      :note: This rule corresponds to the Predictor Rule in Earley parsing.
      """
+
      NUM_EDGES = 1
+
      def apply(self, chart, grammar, edge):
-        if edge.is_complete(): return
+        if edge.is_complete():
+            return
          for prod in grammar.productions(lhs=edge.nextsym()):
              new_edge = TreeEdge.from_production(prod, edge.end())
              if chart.insert(new_edge, ()):
                  yield new_edge
  
+
  class CachedTopDownPredictRule(TopDownPredictRule):
      """
      A cached version of ``TopDownPredictRule``.  After the first time
@@ -1071,20 +1164,24 @@ class CachedTopDownPredictRule(TopDownPredictRule):
  
      If ``chart`` or ``grammar`` are changed, then the cache is flushed.
      """
+
      def __init__(self):
          TopDownPredictRule.__init__(self)
          self._done = {}
  
      def apply(self, chart, grammar, edge):
-        if edge.is_complete(): return
+        if edge.is_complete():
+            return
          nextsym, index = edge.nextsym(), edge.end()
-        if not is_nonterminal(nextsym): return
+        if not is_nonterminal(nextsym):
+            return
  
          # If we've already applied this rule to an edge with the same
          # next & end, and the chart & grammar have not changed, then
          # just return (no new edges to add).
-        done = self._done.get((nextsym, index), (None,None))
-        if done[0] is chart and done[1] is grammar: return
+        done = self._done.get((nextsym, index), (None, None))
+        if done[0] is chart and done[1] is grammar:
+            return
  
          # Add all the edges indicated by the top down expand rule.
          for prod in grammar.productions(lhs=nextsym):
@@ -1093,7 +1190,8 @@ class CachedTopDownPredictRule(TopDownPredictRule):
              if prod.rhs():
                  first = prod.rhs()[0]
                  if is_terminal(first):
-                    if index >= chart.num_leaves() or first != chart.leaf(index): continue
+                    if index >= chart.num_leaves() or first != chart.leaf(index):
+                        continue
  
              new_edge = TreeEdge.from_production(prod, index)
              if chart.insert(new_edge, ()):
@@ -1102,9 +1200,11 @@ class CachedTopDownPredictRule(TopDownPredictRule):
          # Record the fact that we've applied this rule.
          self._done[nextsym, index] = (chart, grammar)
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Bottom-Up Prediction
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class BottomUpPredictRule(AbstractChartRule):
      """
@@ -1113,14 +1213,18 @@ class BottomUpPredictRule(AbstractChartRule):
      particular, this rule specifies that ``[A -> alpha \*]`` licenses
      the edge ``[B -> \* A beta]`` for each grammar production ``B -> A beta``.
      """
+
      NUM_EDGES = 1
+
      def apply(self, chart, grammar, edge):
-        if edge.is_incomplete(): return
+        if edge.is_incomplete():
+            return
          for prod in grammar.productions(rhs=edge.lhs()):
              new_edge = TreeEdge.from_production(prod, edge.start())
              if chart.insert(new_edge, ()):
                  yield new_edge
  
+
  class BottomUpPredictCombineRule(BottomUpPredictRule):
      """
      A rule licensing any edge corresponding to a production whose
@@ -1132,20 +1236,26 @@ class BottomUpPredictCombineRule(BottomUpPredictRule):
      :note: This is like ``BottomUpPredictRule``, but it also applies
          the ``FundamentalRule`` to the resulting edge.
      """
+
      NUM_EDGES = 1
+
      def apply(self, chart, grammar, edge):
-        if edge.is_incomplete(): return
+        if edge.is_incomplete():
+            return
          for prod in grammar.productions(rhs=edge.lhs()):
              new_edge = TreeEdge(edge.span(), prod.lhs(), prod.rhs(), 1)
              if chart.insert(new_edge, (edge,)):
                  yield new_edge
  
+
  class EmptyPredictRule(AbstractChartRule):
      """
      A rule that inserts all empty productions as passive edges,
      in every position in the chart.
      """
+
      NUM_EDGES = 0
+
      def apply(self, chart, grammar):
          for prod in grammar.productions(empty=True):
              for index in range(chart.num_leaves() + 1):
@@ -1158,22 +1268,23 @@ class EmptyPredictRule(AbstractChartRule):
  ##  Filtered Bottom Up
  ########################################################################
  
+
  class FilteredSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
      def _apply_complete(self, chart, grammar, right_edge):
          end = right_edge.end()
          nexttoken = end < chart.num_leaves() and chart.leaf(end)
-        for left_edge in chart.select(end=right_edge.start(),
-                                      is_complete=False,
-                                      nextsym=right_edge.lhs()):
+        for left_edge in chart.select(
+            end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs()
+        ):
              if _bottomup_filter(grammar, nexttoken, left_edge.rhs(), left_edge.dot()):
                  new_edge = left_edge.move_dot_forward(right_edge.end())
                  if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
                      yield new_edge
  
      def _apply_incomplete(self, chart, grammar, left_edge):
-        for right_edge in chart.select(start=left_edge.end(),
-                                       is_complete=True,
-                                       lhs=left_edge.nextsym()):
+        for right_edge in chart.select(
+            start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym()
+        ):
              end = right_edge.end()
              nexttoken = end < chart.num_leaves() and chart.leaf(end)
              if _bottomup_filter(grammar, nexttoken, left_edge.rhs(), left_edge.dot()):
@@ -1181,6 +1292,7 @@ class FilteredSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
                  if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
                      yield new_edge
  
+
  class FilteredBottomUpPredictCombineRule(BottomUpPredictCombineRule):
      def apply(self, chart, grammar, edge):
          if edge.is_incomplete():
@@ -1194,6 +1306,7 @@ class FilteredBottomUpPredictCombineRule(BottomUpPredictCombineRule):
                  if chart.insert(new_edge, (edge,)):
                      yield new_edge
  
+
  def _bottomup_filter(grammar, nexttoken, rhs, dot=0):
      if len(rhs) <= dot + 1:
          return True
@@ -1208,22 +1321,31 @@ def _bottomup_filter(grammar, nexttoken, rhs, dot=0):
  ##  Generic Chart Parser
  ########################################################################
  
-TD_STRATEGY = [LeafInitRule(),
-               TopDownInitRule(),
-               CachedTopDownPredictRule(),
-               SingleEdgeFundamentalRule()]
-BU_STRATEGY = [LeafInitRule(),
-               EmptyPredictRule(),
-               BottomUpPredictRule(),
-               SingleEdgeFundamentalRule()]
-BU_LC_STRATEGY = [LeafInitRule(),
-                  EmptyPredictRule(),
-                  BottomUpPredictCombineRule(),
-                  SingleEdgeFundamentalRule()]
-
-LC_STRATEGY = [LeafInitRule(),
-               FilteredBottomUpPredictCombineRule(),
-               FilteredSingleEdgeFundamentalRule()]
+TD_STRATEGY = [
+    LeafInitRule(),
+    TopDownInitRule(),
+    CachedTopDownPredictRule(),
+    SingleEdgeFundamentalRule(),
+]
+BU_STRATEGY = [
+    LeafInitRule(),
+    EmptyPredictRule(),
+    BottomUpPredictRule(),
+    SingleEdgeFundamentalRule(),
+]
+BU_LC_STRATEGY = [
+    LeafInitRule(),
+    EmptyPredictRule(),
+    BottomUpPredictCombineRule(),
+    SingleEdgeFundamentalRule(),
+]
+
+LC_STRATEGY = [
+    LeafInitRule(),
+    FilteredBottomUpPredictCombineRule(),
+    FilteredSingleEdgeFundamentalRule(),
+]
+
  
  class ChartParser(ParserI):
      """
@@ -1237,8 +1359,16 @@ class ChartParser(ParserI):
      |     Apply *rule* to any applicable edges in the chart.
      | Return any complete parses in the chart
      """
-    def __init__(self, grammar, strategy=BU_LC_STRATEGY, trace=0,
-                 trace_chart_width=50, use_agenda=True, chart_class=Chart):
+
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_STRATEGY,
+        trace=0,
+        trace_chart_width=50,
+        use_agenda=True,
+        chart_class=Chart,
+    ):
          """
          Create a new chart parser, that uses ``grammar`` to parse
          texts.
@@ -1286,11 +1416,12 @@ class ChartParser(ParserI):
          return self._grammar
  
      def _trace_new_edges(self, chart, rule, new_edges, trace, edge_width):
-        if not trace: return
+        if not trace:
+            return
          print_rule_header = trace > 1
          for edge in new_edges:
              if print_rule_header:
-                print('%s:' % rule)
+                print("%s:" % rule)
                  print_rule_header = False
              print(chart.pretty_format_edge(edge, edge_width))
  
@@ -1303,7 +1434,8 @@ class ChartParser(ParserI):
          :type tokens: list(str)
          :rtype: Chart
          """
-        if trace is None: trace = self._trace
+        if trace is None:
+            trace = self._trace
          trace_new_edges = self._trace_new_edges
  
          tokens = list(tokens)
@@ -1313,7 +1445,8 @@ class ChartParser(ParserI):
  
          # Width, for printing trace edges.
          trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1)
-        if trace: print(chart.pretty_format_leaves(trace_edge_width))
+        if trace:
+            print(chart.pretty_format_leaves(trace_edge_width))
  
          if self._use_agenda:
              # Use an agenda-based algorithm.
@@ -1351,46 +1484,58 @@ class ChartParser(ParserI):
          chart = self.chart_parse(tokens)
          return iter(chart.parses(self._grammar.start(), tree_class=tree_class))
  
+
  class TopDownChartParser(ChartParser):
      """
      A ``ChartParser`` using a top-down parsing strategy.
      See ``ChartParser`` for more information.
      """
+
      def __init__(self, grammar, **parser_args):
          ChartParser.__init__(self, grammar, TD_STRATEGY, **parser_args)
  
+
  class BottomUpChartParser(ChartParser):
      """
      A ``ChartParser`` using a bottom-up parsing strategy.
      See ``ChartParser`` for more information.
      """
+
      def __init__(self, grammar, **parser_args):
          if isinstance(grammar, PCFG):
-            warnings.warn("BottomUpChartParser only works for CFG, "
-                          "use BottomUpProbabilisticChartParser instead",
-                          category=DeprecationWarning)
+            warnings.warn(
+                "BottomUpChartParser only works for CFG, "
+                "use BottomUpProbabilisticChartParser instead",
+                category=DeprecationWarning,
+            )
          ChartParser.__init__(self, grammar, BU_STRATEGY, **parser_args)
  
+
  class BottomUpLeftCornerChartParser(ChartParser):
      """
      A ``ChartParser`` using a bottom-up left-corner parsing strategy.
      This strategy is often more efficient than standard bottom-up.
      See ``ChartParser`` for more information.
      """
+
      def __init__(self, grammar, **parser_args):
          ChartParser.__init__(self, grammar, BU_LC_STRATEGY, **parser_args)
  
+
  class LeftCornerChartParser(ChartParser):
      def __init__(self, grammar, **parser_args):
          if not grammar.is_nonempty():
-            raise ValueError("LeftCornerParser only works for grammars "
-                             "without empty productions.")
+            raise ValueError(
+                "LeftCornerParser only works for grammars " "without empty productions."
+            )
          ChartParser.__init__(self, grammar, LC_STRATEGY, **parser_args)
  
+
  ########################################################################
  ##  Stepping Chart Parser
  ########################################################################
  
+
  class SteppingChartParser(ChartParser):
      """
      A ``ChartParser`` that allows you to step through the parsing
@@ -1407,24 +1552,25 @@ class SteppingChartParser(ChartParser):
          or chart has been changed.  If so, then ``step`` must restart
          the parsing algorithm.
      """
+
      def __init__(self, grammar, strategy=[], trace=0):
          self._chart = None
          self._current_chartrule = None
          self._restart = False
          ChartParser.__init__(self, grammar, strategy, trace)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Initialization
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def initialize(self, tokens):
          "Begin parsing the given tokens."
          self._chart = Chart(list(tokens))
          self._restart = True
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Stepping
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def step(self):
          """
@@ -1443,18 +1589,21 @@ class SteppingChartParser(ChartParser):
          added with the current strategy and grammar.
          """
          if self._chart is None:
-            raise ValueError('Parser must be initialized first')
+            raise ValueError("Parser must be initialized first")
          while True:
              self._restart = False
-            w = 50 // (self._chart.num_leaves()+1)
+            w = 50 // (self._chart.num_leaves() + 1)
  
              for e in self._parse():
-                if self._trace > 1: print(self._current_chartrule)
-                if self._trace > 0: print(self._chart.pretty_format_edge(e,w))
+                if self._trace > 1:
+                    print(self._current_chartrule)
+                if self._trace > 0:
+                    print(self._chart.pretty_format_edge(e, w))
                  yield e
-                if self._restart: break
+                if self._restart:
+                    break
              else:
-                yield None # No more edges.
+                yield None  # No more edges.
  
      def _parse(self):
          """
@@ -1473,9 +1622,9 @@ class SteppingChartParser(ChartParser):
                      edges_added += 1
                      yield e
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Accessors
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def strategy(self):
          "Return the strategy used by this parser."
@@ -1497,9 +1646,9 @@ class SteppingChartParser(ChartParser):
          "Return the parse trees currently contained in the chart."
          return self._chart.parses(self._grammar.start(), tree_class)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Parser modification
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def set_strategy(self, strategy):
          """
@@ -1510,25 +1659,28 @@ class SteppingChartParser(ChartParser):
          :param strategy: A list of rules that should be used to decide
              what edges to add to the chart.
          """
-        if strategy == self._strategy: return
-        self._strategy = strategy[:] # Make a copy.
+        if strategy == self._strategy:
+            return
+        self._strategy = strategy[:]  # Make a copy.
          self._restart = True
  
      def set_grammar(self, grammar):
          "Change the grammar used by the parser."
-        if grammar is self._grammar: return
+        if grammar is self._grammar:
+            return
          self._grammar = grammar
          self._restart = True
  
      def set_chart(self, chart):
          "Load a given chart into the chart parser."
-        if chart is self._chart: return
+        if chart is self._chart:
+            return
          self._chart = chart
          self._restart = True
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Standard parser methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def parse(self, tokens, tree_class=Tree):
          tokens = list(tokens)
@@ -1539,18 +1691,23 @@ class SteppingChartParser(ChartParser):
  
          # Step until no more edges are generated.
          for e in self.step():
-            if e is None: break
+            if e is None:
+                break
  
          # Return an iterator of complete parses.
          return self.parses(tree_class=tree_class)
  
+
  ########################################################################
  ##  Demo Code
  ########################################################################
  
+
  def demo_grammar():
      from nltk.grammar import CFG
-    return CFG.fromstring("""
+
+    return CFG.fromstring(
+        """
  S  -> NP VP
  PP -> "with" NP
  NP -> NP PP
@@ -1569,12 +1726,19 @@ Verb -> "ate"
  Verb -> "saw"
  Prep -> "with"
  Prep -> "under"
-""")
-
-def demo(choice=None,
-         print_times=True, print_grammar=False,
-         print_trees=True, trace=2,
-         sent='I saw John with a dog with my cookie', numparses=5):
+"""
+    )
+
+
+def demo(
+    choice=None,
+    print_times=True,
+    print_grammar=False,
+    print_trees=True,
+    trace=2,
+    sent="I saw John with a dog with my cookie",
+    numparses=5,
+):
      """
      A demonstration of the chart parsers.
      """
@@ -1597,31 +1761,35 @@ def demo(choice=None,
      # Ask the user which parser to test,
      # if the parser wasn't provided as an argument
      if choice is None:
-        print('  1: Top-down chart parser')
-        print('  2: Bottom-up chart parser')
-        print('  3: Bottom-up left-corner chart parser')
-        print('  4: Left-corner chart parser with bottom-up filter')
-        print('  5: Stepping chart parser (alternating top-down & bottom-up)')
-        print('  6: All parsers')
-        print('\nWhich parser (1-6)? ', end=' ')
+        print("  1: Top-down chart parser")
+        print("  2: Bottom-up chart parser")
+        print("  3: Bottom-up left-corner chart parser")
+        print("  4: Left-corner chart parser with bottom-up filter")
+        print("  5: Stepping chart parser (alternating top-down & bottom-up)")
+        print("  6: All parsers")
+        print("\nWhich parser (1-6)? ", end=" ")
          choice = sys.stdin.readline().strip()
          print()
  
      choice = str(choice)
      if choice not in "123456":
-        print('Bad parser number')
+        print("Bad parser number")
          return
  
      # Keep track of how long each parser takes.
      times = {}
  
-    strategies = {'1': ('Top-down', TD_STRATEGY),
-                  '2': ('Bottom-up', BU_STRATEGY),
-                  '3': ('Bottom-up left-corner', BU_LC_STRATEGY),
-                  '4': ('Filtered left-corner', LC_STRATEGY)}
+    strategies = {
+        "1": ("Top-down", TD_STRATEGY),
+        "2": ("Bottom-up", BU_STRATEGY),
+        "3": ("Bottom-up left-corner", BU_LC_STRATEGY),
+        "4": ("Filtered left-corner", LC_STRATEGY),
+    }
      choices = []
-    if choice in strategies: choices = [choice]
-    if choice=='6': choices = "1234"
+    if choice in strategies:
+        choices = [choice]
+    if choice == "6":
+        choices = "1234"
  
      # Run the requested chart parser(s), except the stepping parser.
      for strategy in choices:
@@ -1632,12 +1800,13 @@ def demo(choice=None,
          chart = cp.chart_parse(tokens)
          parses = list(chart.parses(grammar.start()))
  
-        times[strategies[strategy][0]] = time.time()-t
+        times[strategies[strategy][0]] = time.time() - t
          print("Nr edges in chart:", len(chart.edges()))
          if numparses:
-            assert len(parses)==numparses, 'Not all parses found'
+            assert len(parses) == numparses, "Not all parses found"
          if print_trees:
-            for tree in parses: print(tree)
+            for tree in parses:
+                print(tree)
          else:
              print("Nr trees:", len(parses))
          print()
@@ -1650,33 +1819,38 @@ def demo(choice=None,
          cp = SteppingChartParser(grammar, trace=trace)
          cp.initialize(tokens)
          for i in range(5):
-            print('*** SWITCH TO TOP DOWN')
+            print("*** SWITCH TO TOP DOWN")
              cp.set_strategy(TD_STRATEGY)
              for j, e in enumerate(cp.step()):
-                if j>20 or e is None: break
-            print('*** SWITCH TO BOTTOM UP')
+                if j > 20 or e is None:
+                    break
+            print("*** SWITCH TO BOTTOM UP")
              cp.set_strategy(BU_STRATEGY)
              for j, e in enumerate(cp.step()):
-                if j>20 or e is None: break
-        times['Stepping'] = time.time()-t
+                if j > 20 or e is None:
+                    break
+        times["Stepping"] = time.time() - t
          print("Nr edges in chart:", len(cp.chart().edges()))
          if numparses:
-            assert len(list(cp.parses()))==numparses, 'Not all parses found'
+            assert len(list(cp.parses())) == numparses, "Not all parses found"
          if print_trees:
-            for tree in cp.parses(): print(tree)
+            for tree in cp.parses():
+                print(tree)
          else:
              print("Nr trees:", len(list(cp.parses())))
          print()
  
      # Print the times of all parsers:
-    if not (print_times and times): return
+    if not (print_times and times):
+        return
      print("* Parsing times")
      print()
      maxlen = max(len(key) for key in times)
-    format = '%' + repr(maxlen) + 's parser: %6.3fsec'
+    format = "%" + repr(maxlen) + "s parser: %6.3fsec"
      times_items = times.items()
-    for (parser, t) in sorted(times_items, key=lambda a:a[1]):
+    for (parser, t) in sorted(times_items, key=lambda a: a[1]):
          print(format % (parser, t))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/chart.pyc b/nlp_resource_data/nltk/parse/chart.pyc

deleted file mode 100755 (executable)

index 5d502cf..0000000

Binary files a/nlp_resource_data/nltk/parse/chart.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/corenlp.py b/nlp_resource_data/nltk/parse/corenlp.py

old mode 100755 (executable)

new mode 100644 (file)

index f6043ef..1ba4801
--- a/nlp_resource_data/nltk/parse/corenlp.py
+++ b/nlp_resource_data/nltk/parse/corenlp.py
@@ -1,14 +1,12 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Interface to the CoreNLP REST API.
  #
-# Copyright (C) 2001-2016 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Dmitrijs Milajevs <dimazest@gmail.com>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals
-
  import re
  import json
  import time
@@ -16,12 +14,15 @@ import socket
  
  from nltk.internals import find_jar_iter, config_java, java, _java_options
  
+from nltk.tag.api import TaggerI
  from nltk.parse.api import ParserI
  from nltk.tokenize.api import TokenizerI
  from nltk.parse.dependencygraph import DependencyGraph
  from nltk.tree import Tree
  
-_stanford_url = 'http://stanfordnlp.github.io/CoreNLP/'
+from unittest import skip
+
+_stanford_url = "http://stanfordnlp.github.io/CoreNLP/"
  
  
  class CoreNLPServerError(EnvironmentError):
@@ -30,7 +31,7 @@ class CoreNLPServerError(EnvironmentError):
  
  def try_port(port=0):
      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    sock.bind(('', port))
+    sock.bind(("", port))
  
      p = sock.getsockname()[1]
      sock.close()
@@ -40,34 +41,36 @@ def try_port(port=0):
  
  class CoreNLPServer(object):
  
-    _MODEL_JAR_PATTERN = r'stanford-corenlp-(\d+)\.(\d+)\.(\d+)-models\.jar'
-    _JAR = r'stanford-corenlp-(\d+)\.(\d+)\.(\d+)\.jar'
+    _MODEL_JAR_PATTERN = r"stanford-corenlp-(\d+)\.(\d+)\.(\d+)-models\.jar"
+    _JAR = r"stanford-corenlp-(\d+)\.(\d+)\.(\d+)\.jar"
  
      def __init__(
-        self, path_to_jar=None, path_to_models_jar=None, verbose=False,
-        java_options=None, corenlp_options=None, port=None,
+        self,
+        path_to_jar=None,
+        path_to_models_jar=None,
+        verbose=False,
+        java_options=None,
+        corenlp_options=None,
+        port=None,
      ):
  
          if corenlp_options is None:
-            corenlp_options = [
-                '-preload', 'tokenize,ssplit,pos,lemma,parse,depparse',
-            ]
+            corenlp_options = ["-preload", "tokenize,ssplit,pos,lemma,parse,depparse"]
  
-        jars = list(find_jar_iter(
-            self._JAR,
-            path_to_jar,
-            env_vars=('CORENLP', ),
-            searchpath=(),
-            url=_stanford_url,
-            verbose=verbose,
-            is_regex=True,
-        ))
+        jars = list(
+            find_jar_iter(
+                self._JAR,
+                path_to_jar,
+                env_vars=("CORENLP",),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
+            )
+        )
  
          # find the most recent code and model jar
-        stanford_jar = max(
-            jars,
-            key=lambda model_name: re.match(self._JAR, model_name)
-        )
+        stanford_jar = max(jars, key=lambda model_name: re.match(self._JAR, model_name))
  
          if port is None:
              try:
@@ -78,19 +81,19 @@ class CoreNLPServer(object):
          else:
              try_port(port)
  
-        self.url = 'http://localhost:{}'.format(port)
+        self.url = "http://localhost:{}".format(port)
  
          model_jar = max(
              find_jar_iter(
                  self._MODEL_JAR_PATTERN,
                  path_to_models_jar,
-                env_vars=('CORENLP_MODELS', ),
+                env_vars=("CORENLP_MODELS",),
                  searchpath=(),
                  url=_stanford_url,
                  verbose=verbose,
                  is_regex=True,
              ),
-            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name)
+            key=lambda model_name: re.match(self._MODEL_JAR_PATTERN, model_name),
          )
  
          self.verbose = verbose
@@ -98,29 +101,31 @@ class CoreNLPServer(object):
          self._classpath = stanford_jar, model_jar
  
          self.corenlp_options = corenlp_options
-        self.java_options = java_options or ['-mx2g']
+        self.java_options = java_options or ["-mx2g"]
  
-    def start(self):
+    def start(self, stdout="devnull", stderr="devnull"):
+        """ Starts the CoreNLP server
+
+        :param stdout, stderr: Specifies where CoreNLP output is redirected. Valid values are 'devnull', 'stdout', 'pipe'
+        """
          import requests
  
-        cmd = ['edu.stanford.nlp.pipeline.StanfordCoreNLPServer']
+        cmd = ["edu.stanford.nlp.pipeline.StanfordCoreNLPServer"]
  
          if self.corenlp_options:
              cmd.extend(self.corenlp_options)
  
          # Configure java.
-        default_options = ' '.join(_java_options)
+        default_options = " ".join(_java_options)
          config_java(options=self.java_options, verbose=self.verbose)
  
          try:
-            # TODO: it's probably a bad idea to pipe stdout, as it will
-            #       accumulate when lots of text is being parsed.
              self.popen = java(
                  cmd,
                  classpath=self._classpath,
                  blocking=False,
-                stdout='pipe',
-                stderr='pipe',
+                stdout=stdout,
+                stderr=stderr,
              )
          finally:
              # Return java configurations to their default values.
@@ -132,35 +137,31 @@ class CoreNLPServer(object):
              _, stderrdata = self.popen.communicate()
              raise CoreNLPServerError(
                  returncode,
-                'Could not start the server. '
-                'The error was: {}'.format(stderrdata.decode('ascii'))
+                "Could not start the server. "
+                "The error was: {}".format(stderrdata.decode("ascii")),
              )
  
          for i in range(30):
              try:
-                response = requests.get(requests.compat.urljoin(self.url, 'live'))
+                response = requests.get(requests.compat.urljoin(self.url, "live"))
              except requests.exceptions.ConnectionError:
                  time.sleep(1)
              else:
                  if response.ok:
                      break
          else:
-            raise CoreNLPServerError(
-                'Could not connect to the server.'
-            )
+            raise CoreNLPServerError("Could not connect to the server.")
  
          for i in range(60):
              try:
-                response = requests.get(requests.compat.urljoin(self.url, 'ready'))
+                response = requests.get(requests.compat.urljoin(self.url, "ready"))
              except requests.exceptions.ConnectionError:
                  time.sleep(1)
              else:
                  if response.ok:
                      break
          else:
-            raise CoreNLPServerError(
-                'The server is not ready.'
-            )
+            raise CoreNLPServerError("The server is not ready.")
  
      def stop(self):
          self.popen.terminate()
@@ -176,15 +177,20 @@ class CoreNLPServer(object):
          return False
  
  
-class GenericCoreNLPParser(ParserI, TokenizerI):
+class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI):
      """Interface to the CoreNLP Parser."""
  
-    def __init__(self, url='http://localhost:9000', encoding='utf8'):
+    def __init__(self, url="http://localhost:9000", encoding="utf8", tagtype=None):
          import requests
  
          self.url = url
          self.encoding = encoding
  
+        if tagtype not in ["pos", "ner", None]:
+            raise ValueError("tagtype must be either 'pos', 'ner' or None")
+
+        self.tagtype = tagtype
+
          self.session = requests.Session()
  
      def parse_sents(self, sentences, *args, **kwargs):
@@ -202,7 +208,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
          :rtype: iter(iter(Tree))
          """
          # Converting list(list(str)) -> list(str)
-        sentences = (' '.join(words) for words in sentences)
+        sentences = (" ".join(words) for words in sentences)
          return self.raw_parse_sents(sentences, *args, **kwargs)
  
      def raw_parse(self, sentence, properties=None, *args, **kwargs):
@@ -215,25 +221,20 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
          :type sentence: str
          :rtype: iter(Tree)
          """
-        default_properties = {
-            'tokenize.whitespace': 'false',
-        }
+        default_properties = {"tokenize.whitespace": "false"}
          default_properties.update(properties or {})
  
          return next(
              self.raw_parse_sents(
-                [sentence],
-                properties=default_properties,
-                *args,
-                **kwargs
+                [sentence], properties=default_properties, *args, **kwargs
              )
          )
  
-    def api_call(self, data, properties=None):
+    def api_call(self, data, properties=None, timeout=60):
          default_properties = {
-            'outputFormat': 'json',
-            'annotators': 'tokenize,pos,lemma,ssplit,{parser_annotator}'.format(
-                parser_annotator=self.parser_annotator,
+            "outputFormat": "json",
+            "annotators": "tokenize,pos,lemma,ssplit,{parser_annotator}".format(
+                parser_annotator=self.parser_annotator
              ),
          }
  
@@ -241,11 +242,9 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
  
          response = self.session.post(
              self.url,
-            params={
-                'properties': json.dumps(default_properties),
-            },
+            params={"properties": json.dumps(default_properties)},
              data=data.encode(self.encoding),
-            timeout=60,
+            timeout=timeout,
          )
  
          response.raise_for_status()
@@ -253,12 +252,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
          return response.json()
  
      def raw_parse_sents(
-        self,
-        sentences,
-        verbose=False,
-        properties=None,
-        *args,
-        **kwargs
+        self, sentences, verbose=False, properties=None, *args, **kwargs
      ):
          """Parse multiple sentences.
  
@@ -272,7 +266,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
          """
          default_properties = {
              # Only splits on '\n', never inside the sentence.
-            'ssplit.ssplit.eolonly': 'true',
+            "ssplit.eolonly": "true"
          }
  
          default_properties.update(properties or {})
@@ -287,12 +281,11 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
                  tree = self.make_tree(parse)
                  yield iter([tree])
          """
-        parsed_data = self.api_call('\n'.join(sentences), properties=default_properties)
-        for parsed_sent in parsed_data['sentences']:
+        parsed_data = self.api_call("\n".join(sentences), properties=default_properties)
+        for parsed_sent in parsed_data["sentences"]:
              tree = self.make_tree(parsed_sent)
              yield iter([tree])
  
-
      def parse_text(self, text, *args, **kwargs):
          """Parse a piece of text.
  
@@ -304,7 +297,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
          """
          parsed_data = self.api_call(text, *args, **kwargs)
  
-        for parse in parsed_data['sentences']:
+        for parse in parsed_data["sentences"]:
              yield self.make_tree(parse)
  
      def tokenize(self, text, properties=None):
@@ -326,18 +319,79 @@ class GenericCoreNLPParser(ParserI, TokenizerI):
          ['The', 'color', 'of', 'the', 'wall', 'is', 'blue', '.']
  
          """
-        default_properties = {
-            'annotators': 'tokenize,ssplit',
-
-        }
+        default_properties = {"annotators": "tokenize,ssplit"}
  
          default_properties.update(properties or {})
  
          result = self.api_call(text, properties=default_properties)
  
-        for sentence in result['sentences']:
-            for token in sentence['tokens']:
-                yield token['originalText'] or token['word']
+        for sentence in result["sentences"]:
+            for token in sentence["tokens"]:
+                yield token["originalText"] or token["word"]
+
+    def tag_sents(self, sentences):
+        """
+        Tag multiple sentences.
+
+        Takes multiple sentences as a list where each sentence is a list of
+        tokens.
+
+        :param sentences: Input sentences to tag
+        :type sentences: list(list(str))
+        :rtype: list(list(tuple(str, str))
+        """
+        # Converting list(list(str)) -> list(str)
+        sentences = (" ".join(words) for words in sentences)
+        return [sentences[0] for sentences in self.raw_tag_sents(sentences)]
+
+    def tag(self, sentence):
+        """
+        Tag a list of tokens.
+
+        :rtype: list(tuple(str, str))
+
+        >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='ner')
+        >>> tokens = 'Rami Eid is studying at Stony Brook University in NY'.split()
+        >>> parser.tag(tokens)
+        [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'),
+        ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'O')]
+
+        >>> parser = CoreNLPParser(url='http://localhost:9000', tagtype='pos')
+        >>> tokens = "What is the airspeed of an unladen swallow ?".split()
+        >>> parser.tag(tokens)
+        [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'),
+        ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'),
+        ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')]
+        """
+        return self.tag_sents([sentence])[0]
+
+    def raw_tag_sents(self, sentences):
+        """
+        Tag multiple sentences.
+
+        Takes multiple sentences as a list where each sentence is a string.
+
+        :param sentences: Input sentences to tag
+        :type sentences: list(str)
+        :rtype: list(list(list(tuple(str, str)))
+        """
+        default_properties = {
+            "ssplit.isOneSentence": "true",
+            "annotators": "tokenize,ssplit,",
+        }
+
+        # Supports only 'pos' or 'ner' tags.
+        assert self.tagtype in ["pos", "ner"]
+        default_properties["annotators"] += self.tagtype
+        for sentence in sentences:
+            tagged_data = self.api_call(sentence, properties=default_properties)
+            yield [
+                [
+                    (token["word"], token[self.tagtype])
+                    for token in tagged_sentence["tokens"]
+                ]
+                for tagged_sentence in tagged_data["sentences"]
+            ]
  
  
  class CoreNLPParser(GenericCoreNLPParser):
@@ -485,11 +539,11 @@ class CoreNLPParser(GenericCoreNLPParser):
  
      """
  
-    _OUTPUT_FORMAT = 'penn'
-    parser_annotator = 'parse'
+    _OUTPUT_FORMAT = "penn"
+    parser_annotator = "parse"
  
      def make_tree(self, result):
-        return Tree.fromstring(result['parse'])
+        return Tree.fromstring(result["parse"])
  
  
  class CoreNLPDependencyParser(GenericCoreNLPParser):
@@ -656,61 +710,64 @@ class CoreNLPDependencyParser(GenericCoreNLPParser):
  
      """
  
-    _OUTPUT_FORMAT = 'conll2007'
-    parser_annotator = 'depparse'
+    _OUTPUT_FORMAT = "conll2007"
+    parser_annotator = "depparse"
  
      def make_tree(self, result):
  
          return DependencyGraph(
              (
-                ' '.join(n_items[1:])  # NLTK expects an iterable of strings...
+                " ".join(n_items[1:])  # NLTK expects an iterable of strings...
                  for n_items in sorted(transform(result))
              ),
-            cell_separator=' ',  # To make sure that a non-breaking space is kept inside of a token.
+            cell_separator=" ",  # To make sure that a non-breaking space is kept inside of a token.
          )
  
  
  def transform(sentence):
-    for dependency in sentence['basicDependencies']:
+    for dependency in sentence["basicDependencies"]:
  
-        dependent_index = dependency['dependent']
-        token = sentence['tokens'][dependent_index - 1]
+        dependent_index = dependency["dependent"]
+        token = sentence["tokens"][dependent_index - 1]
  
          # Return values that we don't know as '_'. Also, consider tag and ctag
          # to be equal.
          yield (
              dependent_index,
-            '_',
-            token['word'],
-            token['lemma'],
-            token['pos'],
-            token['pos'],
-            '_',
-            str(dependency['governor']),
-            dependency['dep'],
-            '_',
-            '_',
+            "_",
+            token["word"],
+            token["lemma"],
+            token["pos"],
+            token["pos"],
+            "_",
+            str(dependency["governor"]),
+            dependency["dep"],
+            "_",
+            "_",
          )
  
  
+@skip("Skipping all CoreNLP tests.")
  def setup_module(module):
      from nose import SkipTest
  
      global server
+
      try:
          server = CoreNLPServer(port=9000)
      except LookupError as e:
-        raise SkipTest('Could not instantiate CoreNLPServer.')
+        raise SkipTest("Could not instantiate CoreNLPServer.")
  
      try:
          server.start()
      except CoreNLPServerError as e:
          raise SkipTest(
-            'Skipping CoreNLP tests because the server could not be started. '
-            'Make sure that the 9000 port is free. '
-            '{}'.format(e.strerror)
+            "Skipping CoreNLP tests because the server could not be started. "
+            "Make sure that the 9000 port is free. "
+            "{}".format(e.strerror)
          )
  
  
+@skip("Skipping all CoreNLP tests.")
  def teardown_module(module):
      server.stop()
diff --git a/nlp_resource_data/nltk/parse/corenlp.pyc b/nlp_resource_data/nltk/parse/corenlp.pyc

deleted file mode 100755 (executable)

index b9fe849..0000000

Binary files a/nlp_resource_data/nltk/parse/corenlp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/dependencygraph.py b/nlp_resource_data/nltk/parse/dependencygraph.py

old mode 100755 (executable)

new mode 100644 (file)

index 6fadad9..4d3f7c5
--- a/nlp_resource_data/nltk/parse/dependencygraph.py
+++ b/nlp_resource_data/nltk/parse/dependencygraph.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Dependency Grammars
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Jason Narad <jason.narad@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (modifications)
  #
@@ -13,7 +13,6 @@ Tools for reading and writing dependency trees.
  The input is assumed to be in Malt-TAB format
  (http://stp.lingfil.uu.se/~nivre/research/MaltXML.html).
  """
-from __future__ import print_function, unicode_literals
  
  from collections import defaultdict
  from itertools import chain
@@ -21,24 +20,26 @@ from pprint import pformat
  import subprocess
  import warnings
  
-from six import string_types
-
  from nltk.tree import Tree
-from nltk.compat import python_2_unicode_compatible
-
  
  #################################################################
  # DependencyGraph Class
  #################################################################
  
  
-@python_2_unicode_compatible
  class DependencyGraph(object):
      """
      A container for the nodes and labelled edges of a dependency structure.
      """
  
-    def __init__(self, tree_str=None, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label='ROOT'):
+    def __init__(
+        self,
+        tree_str=None,
+        cell_extractor=None,
+        zero_based=False,
+        cell_separator=None,
+        top_relation_label="ROOT",
+    ):
          """Dependency graph.
  
          We place a dummy `TOP` node with the index 0, since the root node is
@@ -56,25 +57,22 @@ class DependencyGraph(object):
          identified, for examlple, `ROOT`, `null` or `TOP`.
  
          """
-        self.nodes = defaultdict(lambda:  {'address': None,
-                                           'word': None,
-                                           'lemma': None,
-                                           'ctag': None,
-                                           'tag': None,
-                                           'feats': None,
-                                           'head': None,
-                                           'deps': defaultdict(list),
-                                           'rel': None,
-                                           })
-
-        self.nodes[0].update(
-            {
-                'ctag': 'TOP',
-                'tag': 'TOP',
-                'address': 0,
+        self.nodes = defaultdict(
+            lambda: {
+                "address": None,
+                "word": None,
+                "lemma": None,
+                "ctag": None,
+                "tag": None,
+                "feats": None,
+                "head": None,
+                "deps": defaultdict(list),
+                "rel": None,
              }
          )
  
+        self.nodes[0].update({"ctag": "TOP", "tag": "TOP", "address": 0})
+
          self.root = None
  
          if tree_str:
@@ -100,23 +98,22 @@ class DependencyGraph(object):
          """
          for node in self.nodes.values():
              new_deps = []
-            for dep in node['deps']:
+            for dep in node["deps"]:
                  if dep in originals:
                      new_deps.append(redirect)
                  else:
                      new_deps.append(dep)
-            node['deps'] = new_deps
+            node["deps"] = new_deps
  
      def add_arc(self, head_address, mod_address):
          """
          Adds an arc from the node specified by head_address to the
          node specified by the mod address.
          """
-        relation = self.nodes[mod_address]['rel']
-        self.nodes[head_address]['deps'].setdefault(relation, [])
-        self.nodes[head_address]['deps'][relation].append(mod_address)
-        #self.nodes[head_address]['deps'].append(mod_address)
-
+        relation = self.nodes[mod_address]["rel"]
+        self.nodes[head_address]["deps"].setdefault(relation, [])
+        self.nodes[head_address]["deps"][relation].append(mod_address)
+        # self.nodes[head_address]['deps'].append(mod_address)
  
      def connect_graph(self):
          """
@@ -125,11 +122,11 @@ class DependencyGraph(object):
          """
          for node1 in self.nodes.values():
              for node2 in self.nodes.values():
-                if node1['address'] != node2['address'] and node2['rel'] != 'TOP':
-                    relation = node2['rel']
-                    node1['deps'].setdefault(relation, [])
-                    node1['deps'][relation].append(node2['address'])
-                    #node1['deps'].append(node2['address'])
+                if node1["address"] != node2["address"] and node2["rel"] != "TOP":
+                    relation = node2["rel"]
+                    node1["deps"].setdefault(relation, [])
+                    node1["deps"][relation].append(node2["address"])
+                    # node1['deps'].append(node2['address'])
  
      def get_by_address(self, node_address):
          """Return the node with the given address."""
@@ -166,19 +163,23 @@ class DependencyGraph(object):
  
          """
          # Start the digraph specification
-        s = 'digraph G{\n'
-        s += 'edge [dir=forward]\n'
-        s += 'node [shape=plaintext]\n'
+        s = "digraph G{\n"
+        s += "edge [dir=forward]\n"
+        s += "node [shape=plaintext]\n"
  
          # Draw the remaining nodes
-        for node in sorted(self.nodes.values(), key=lambda v: v['address']):
-            s += '\n%s [label="%s (%s)"]' % (node['address'], node['address'], node['word'])
-            for rel, deps in node['deps'].items():
+        for node in sorted(self.nodes.values(), key=lambda v: v["address"]):
+            s += '\n%s [label="%s (%s)"]' % (
+                node["address"],
+                node["address"],
+                node["word"],
+            )
+            for rel, deps in node["deps"].items():
                  for dep in deps:
                      if rel is not None:
-                        s += '\n%s -> %s [label="%s"]' % (node['address'], dep, rel)
+                        s += '\n%s -> %s [label="%s"]' % (node["address"], dep, rel)
                      else:
-                        s += '\n%s -> %s ' % (node['address'], dep)
+                        s += "\n%s -> %s " % (node["address"], dep)
          s += "\n}"
  
          return s
@@ -199,19 +200,20 @@ class DependencyGraph(object):
  
          try:
              process = subprocess.Popen(
-                ['dot', '-Tsvg'],
+                ["dot", "-Tsvg"],
                  stdin=subprocess.PIPE,
                  stdout=subprocess.PIPE,
                  stderr=subprocess.PIPE,
                  universal_newlines=True,
              )
          except OSError:
-            raise Exception('Cannot find the dot binary from Graphviz package')
+            raise Exception("Cannot find the dot binary from Graphviz package")
          out, err = process.communicate(dot_string)
          if err:
              raise Exception(
-                'Cannot create svg representation by running dot from string: {}'
-                ''.format(dot_string))
+                "Cannot create svg representation by running dot from string: {}"
+                "".format(dot_string)
+            )
          return out
  
      def __str__(self):
@@ -221,7 +223,9 @@ class DependencyGraph(object):
          return "<DependencyGraph with {0} nodes>".format(len(self.nodes))
  
      @staticmethod
-    def load(filename, zero_based=False, cell_separator=None, top_relation_label='ROOT'):
+    def load(
+        filename, zero_based=False, cell_separator=None, top_relation_label="ROOT"
+    ):
          """
          :param filename: a name of a file in Malt-TAB format
          :param zero_based: nodes in the input file are numbered starting from 0
@@ -242,7 +246,7 @@ class DependencyGraph(object):
                      cell_separator=cell_separator,
                      top_relation_label=top_relation_label,
                  )
-                for tree_str in infile.read().split('\n\n')
+                for tree_str in infile.read().split("\n\n")
              ]
  
      def left_children(self, node_index):
@@ -250,8 +254,8 @@ class DependencyGraph(object):
          Returns the number of left children under the node specified
          by the given address.
          """
-        children = chain.from_iterable(self.nodes[node_index]['deps'].values())
-        index = self.nodes[node_index]['address']
+        children = chain.from_iterable(self.nodes[node_index]["deps"].values())
+        index = self.nodes[node_index]["address"]
          return sum(1 for c in children if c < index)
  
      def right_children(self, node_index):
@@ -259,15 +263,22 @@ class DependencyGraph(object):
          Returns the number of right children under the node specified
          by the given address.
          """
-        children = chain.from_iterable(self.nodes[node_index]['deps'].values())
-        index = self.nodes[node_index]['address']
+        children = chain.from_iterable(self.nodes[node_index]["deps"].values())
+        index = self.nodes[node_index]["address"]
          return sum(1 for c in children if c > index)
  
      def add_node(self, node):
-        if not self.contains_address(node['address']):
-            self.nodes[node['address']].update(node)
-
-    def _parse(self, input_, cell_extractor=None, zero_based=False, cell_separator=None, top_relation_label='ROOT'):
+        if not self.contains_address(node["address"]):
+            self.nodes[node["address"]].update(node)
+
+    def _parse(
+        self,
+        input_,
+        cell_extractor=None,
+        zero_based=False,
+        cell_separator=None,
+        top_relation_label="ROOT",
+    ):
          """Parse a sentence.
  
          :param extractor: a function that given a tuple of cells returns a
@@ -284,11 +295,11 @@ class DependencyGraph(object):
  
          def extract_3_cells(cells, index):
              word, tag, head = cells
-            return index, word, word, tag, tag, '', head, ''
+            return index, word, word, tag, tag, "", head, ""
  
          def extract_4_cells(cells, index):
              word, tag, head, rel = cells
-            return index, word, word, tag, tag, '', head, rel
+            return index, word, word, tag, tag, "", head, rel
  
          def extract_7_cells(cells, index):
              line_index, word, lemma, tag, _, head, rel = cells
@@ -297,7 +308,7 @@ class DependencyGraph(object):
              except ValueError:
                  # index can't be parsed as an integer, use default
                  pass
-            return index, word, lemma, tag, tag, '', head, rel
+            return index, word, lemma, tag, tag, "", head, rel
  
          def extract_10_cells(cells, index):
              line_index, word, lemma, ctag, tag, feats, head, rel, _, _ = cells
@@ -315,8 +326,8 @@ class DependencyGraph(object):
              10: extract_10_cells,
          }
  
-        if isinstance(input_, string_types):
-            input_ = (line for line in input_.split('\n'))
+        if isinstance(input_, str):
+            input_ = (line for line in input_.split("\n"))
  
          lines = (l.rstrip() for l in input_)
          lines = (l for l in lines if l)
@@ -334,19 +345,21 @@ class DependencyGraph(object):
                      cell_extractor = extractors[cell_number]
                  except KeyError:
                      raise ValueError(
-                        'Number of tab-delimited fields ({0}) not supported by '
-                        'CoNLL(10) or Malt-Tab(4) format'.format(cell_number)
+                        "Number of tab-delimited fields ({0}) not supported by "
+                        "CoNLL(10) or Malt-Tab(4) format".format(cell_number)
                      )
  
              try:
-                index, word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells, index)
+                index, word, lemma, ctag, tag, feats, head, rel = cell_extractor(
+                    cells, index
+                )
              except (TypeError, ValueError):
                  # cell_extractor doesn't take 2 arguments or doesn't return 8
                  # values; assume the cell_extractor is an older external
                  # extractor and doesn't accept or return an index.
                  word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells)
  
-            if head == '_':
+            if head == "_":
                  continue
  
              head = int(head)
@@ -355,36 +368,35 @@ class DependencyGraph(object):
  
              self.nodes[index].update(
                  {
-                    'address': index,
-                    'word': word,
-                    'lemma': lemma,
-                    'ctag': ctag,
-                    'tag': tag,
-                    'feats': feats,
-                    'head': head,
-                    'rel': rel,
+                    "address": index,
+                    "word": word,
+                    "lemma": lemma,
+                    "ctag": ctag,
+                    "tag": tag,
+                    "feats": feats,
+                    "head": head,
+                    "rel": rel,
                  }
              )
  
              # Make sure that the fake root node has labeled dependencies.
              if (cell_number == 3) and (head == 0):
                  rel = top_relation_label
-            self.nodes[head]['deps'][rel].append(index)
+            self.nodes[head]["deps"][rel].append(index)
  
-        if self.nodes[0]['deps'][top_relation_label]:
-            root_address = self.nodes[0]['deps'][top_relation_label][0]
+        if self.nodes[0]["deps"][top_relation_label]:
+            root_address = self.nodes[0]["deps"][top_relation_label][0]
              self.root = self.nodes[root_address]
              self.top_relation_label = top_relation_label
          else:
              warnings.warn(
-                "The graph doesn't contain a node "
-                "that depends on the root element."
+                "The graph doesn't contain a node " "that depends on the root element."
              )
  
      def _word(self, node, filter=True):
-        w = node['word']
+        w = node["word"]
          if filter:
-            if w != ',':
+            if w != ",":
                  return w
          return w
  
@@ -395,8 +407,8 @@ class DependencyGraph(object):
          :return: either a word (if the indexed node is a leaf) or a ``Tree``.
          """
          node = self.get_by_address(i)
-        word = node['word']
-        deps = sorted(chain.from_iterable(node['deps'].values()))
+        word = node["word"]
+        deps = sorted(chain.from_iterable(node["deps"].values()))
  
          if deps:
              return Tree(word, [self._tree(dep) for dep in deps])
@@ -410,8 +422,8 @@ class DependencyGraph(object):
          """
          node = self.root
  
-        word = node['word']
-        deps = sorted(chain.from_iterable(node['deps'].values()))
+        word = node["word"]
+        deps = sorted(chain.from_iterable(node["deps"].values()))
          return Tree(word, [self._tree(dep) for dep in deps])
  
      def triples(self, node=None):
@@ -423,22 +435,22 @@ class DependencyGraph(object):
          if not node:
              node = self.root
  
-        head = (node['word'], node['ctag'])
-        for i in sorted(chain.from_iterable(node['deps'].values())):
+        head = (node["word"], node["ctag"])
+        for i in sorted(chain.from_iterable(node["deps"].values())):
              dep = self.get_by_address(i)
-            yield (head, dep['rel'], (dep['word'], dep['ctag']))
+            yield (head, dep["rel"], (dep["word"], dep["ctag"]))
              for triple in self.triples(node=dep):
                  yield triple
  
      def _hd(self, i):
          try:
-            return self.nodes[i]['head']
+            return self.nodes[i]["head"]
          except IndexError:
              return None
  
      def _rel(self, i):
          try:
-            return self.nodes[i]['rel']
+            return self.nodes[i]["rel"]
          except IndexError:
              return None
  
@@ -472,8 +484,8 @@ class DependencyGraph(object):
          distances = {}
  
          for node in self.nodes.values():
-            for dep in node['deps']:
-                key = tuple([node['address'], dep])
+            for dep in node["deps"]:
+                key = tuple([node["address"], dep])
                  distances[key] = 1
  
          for _ in self.nodes:
@@ -494,13 +506,13 @@ class DependencyGraph(object):
          return False  # return []?
  
      def get_cycle_path(self, curr_node, goal_node_index):
-        for dep in curr_node['deps']:
+        for dep in curr_node["deps"]:
              if dep == goal_node_index:
-                return [curr_node['address']]
-        for dep in curr_node['deps']:
+                return [curr_node["address"]]
+        for dep in curr_node["deps"]:
              path = self.get_cycle_path(self.get_by_address(dep), goal_node_index)
              if len(path) > 0:
-                path.insert(0, curr_node['address'])
+                path.insert(0, curr_node["address"])
                  return path
          return []
  
@@ -514,18 +526,24 @@ class DependencyGraph(object):
          """
  
          if style == 3:
-            template = '{word}\t{tag}\t{head}\n'
+            template = "{word}\t{tag}\t{head}\n"
          elif style == 4:
-            template = '{word}\t{tag}\t{head}\t{rel}\n'
+            template = "{word}\t{tag}\t{head}\t{rel}\n"
          elif style == 10:
-            template = '{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n'
+            template = (
+                "{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n"
+            )
          else:
              raise ValueError(
-                'Number of tab-delimited fields ({0}) not supported by '
-                'CoNLL(10) or Malt-Tab(4) format'.format(style)
+                "Number of tab-delimited fields ({0}) not supported by "
+                "CoNLL(10) or Malt-Tab(4) format".format(style)
              )
  
-        return ''.join(template.format(i=i, **node) for i, node in sorted(self.nodes.items()) if node['tag'] != 'TOP')
+        return "".join(
+            template.format(i=i, **node)
+            for i, node in sorted(self.nodes.items())
+            if node["tag"] != "TOP"
+        )
  
      def nx_graph(self):
          """Convert the data in a ``nodelist`` into a networkx labeled directed graph."""
@@ -533,12 +551,11 @@ class DependencyGraph(object):
  
          nx_nodelist = list(range(1, len(self.nodes)))
          nx_edgelist = [
-            (n, self._hd(n), self._rel(n))
-            for n in nx_nodelist if self._hd(n)
+            (n, self._hd(n), self._rel(n)) for n in nx_nodelist if self._hd(n)
          ]
          self.nx_labels = {}
          for n in nx_nodelist:
-            self.nx_labels[n] = self.nodes[n]['word']
+            self.nx_labels[n] = self.nodes[n]["word"]
  
          g = networkx.MultiDiGraph()
          g.add_nodes_from(nx_nodelist)
@@ -563,7 +580,8 @@ def malt_demo(nx=False):
      A demonstration of the result of reading a dependency
      version of the first sentence of the Penn Treebank.
      """
-    dg = DependencyGraph("""Pierre  NNP     2       NMOD
+    dg = DependencyGraph(
+        """Pierre  NNP     2       NMOD
  Vinken  NNP     8       SUB
  ,       ,       2       P
  61      CD      5       NMOD
@@ -581,7 +599,8 @@ director        NN      12      PMOD
  Nov.    NNP     9       VMOD
  29      CD      16      NMOD
  .       .       9       VMOD
-""")
+"""
+    )
      tree = dg.tree()
      tree.pprint()
      if nx:
@@ -597,7 +616,7 @@ Nov.    NNP     9       VMOD
          networkx.draw_networkx_labels(g, pos, dg.nx_labels)
          pylab.xticks([])
          pylab.yticks([])
-        pylab.savefig('tree.png')
+        pylab.savefig("tree.png")
          pylab.show()
  
  
@@ -614,12 +633,11 @@ def conll_demo():
  
  
  def conll_file_demo():
-    print('Mass conll_read demo...')
-    graphs = [DependencyGraph(entry)
-              for entry in conll_data2.split('\n\n') if entry]
+    print("Mass conll_read demo...")
+    graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
      for graph in graphs:
          tree = graph.tree()
-        print('\n')
+        print("\n")
          tree.pprint()
  
  
@@ -627,13 +645,14 @@ def cycle_finding_demo():
      dg = DependencyGraph(treebank_data)
      print(dg.contains_cycle())
      cyclic_dg = DependencyGraph()
-    cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0})
-    cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1})
-    cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2})
-    cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3})
-    cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4})
+    cyclic_dg.add_node({"word": None, "deps": [1], "rel": "TOP", "address": 0})
+    cyclic_dg.add_node({"word": None, "deps": [2], "rel": "NTOP", "address": 1})
+    cyclic_dg.add_node({"word": None, "deps": [4], "rel": "NTOP", "address": 2})
+    cyclic_dg.add_node({"word": None, "deps": [1], "rel": "NTOP", "address": 3})
+    cyclic_dg.add_node({"word": None, "deps": [3], "rel": "NTOP", "address": 4})
      print(cyclic_dg.contains_cycle())
  
+
  treebank_data = """Pierre  NNP     2       NMOD
  Vinken  NNP     8       SUB
  ,       ,       2       P
@@ -756,5 +775,5 @@ conll_data2 = """1   Cathy             Cathy             N     N     eigen|ev|ne
  16  .                 .                 Punc  Punc  punt                             15  punct   _  _
  """
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/dependencygraph.pyc b/nlp_resource_data/nltk/parse/dependencygraph.pyc

deleted file mode 100755 (executable)

index eb0a12a..0000000

Binary files a/nlp_resource_data/nltk/parse/dependencygraph.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/earleychart.py b/nlp_resource_data/nltk/parse/earleychart.py

old mode 100755 (executable)

new mode 100644 (file)

index 5955b50..d6cc14e
--- a/nlp_resource_data/nltk/parse/earleychart.py
+++ b/nlp_resource_data/nltk/parse/earleychart.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: An Incremental Earley Chart Parser
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
  #         Rob Speer <rspeer@mit.edu>
  #         Edward Loper <edloper@gmail.com>
@@ -25,28 +25,39 @@ This is appealing for, say, speech recognizer hypothesis filtering.
  The main parser class is ``EarleyChartParser``, which is a top-down
  algorithm, originally formulated by Jay Earley (1970).
  """
-from __future__ import print_function, division
-
-from six.moves import range
-
-from nltk.parse.chart import (Chart, ChartParser, EdgeI, LeafEdge, LeafInitRule,
-                              BottomUpPredictRule, BottomUpPredictCombineRule,
-                              TopDownInitRule, SingleEdgeFundamentalRule,
-                              EmptyPredictRule,
-                              CachedTopDownPredictRule,
-                              FilteredSingleEdgeFundamentalRule,
-                              FilteredBottomUpPredictCombineRule)
-from nltk.parse.featurechart import (FeatureChart, FeatureChartParser,
-                                     FeatureTopDownInitRule,
-                                     FeatureTopDownPredictRule,
-                                     FeatureEmptyPredictRule,
-                                     FeatureBottomUpPredictRule,
-                                     FeatureBottomUpPredictCombineRule,
-                                     FeatureSingleEdgeFundamentalRule)
-
-#////////////////////////////////////////////////////////////
+
+from time import perf_counter
+
+from nltk.parse.chart import (
+    Chart,
+    ChartParser,
+    EdgeI,
+    LeafEdge,
+    LeafInitRule,
+    BottomUpPredictRule,
+    BottomUpPredictCombineRule,
+    TopDownInitRule,
+    SingleEdgeFundamentalRule,
+    EmptyPredictRule,
+    CachedTopDownPredictRule,
+    FilteredSingleEdgeFundamentalRule,
+    FilteredBottomUpPredictCombineRule,
+)
+from nltk.parse.featurechart import (
+    FeatureChart,
+    FeatureChartParser,
+    FeatureTopDownInitRule,
+    FeatureTopDownPredictRule,
+    FeatureEmptyPredictRule,
+    FeatureBottomUpPredictRule,
+    FeatureBottomUpPredictCombineRule,
+    FeatureSingleEdgeFundamentalRule,
+)
+
+# ////////////////////////////////////////////////////////////
  # Incremental Chart
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class IncrementalChart(Chart):
      def initialize(self):
@@ -70,7 +81,8 @@ class IncrementalChart(Chart):
          edgelist = self._edgelists[end]
  
          # If there are no restrictions, then return all edges.
-        if restrictions=={}: return iter(edgelist)
+        if restrictions == {}:
+            return iter(edgelist)
  
          # Find the index corresponding to the given restrictions.
          restr_keys = sorted(restrictions.keys())
@@ -87,7 +99,7 @@ class IncrementalChart(Chart):
          # Make sure it's a valid index.
          for key in restr_keys:
              if not hasattr(EdgeI, key):
-                raise ValueError('Bad restriction: %s' % key)
+                raise ValueError("Bad restriction: %s" % key)
  
          # Create the index.
          index = self._indexes[restr_keys] = tuple({} for x in self._positions())
@@ -117,7 +129,8 @@ class FeatureIncrementalChart(IncrementalChart, FeatureChart):
          edgelist = self._edgelists[end]
  
          # If there are no restrictions, then return all edges.
-        if restrictions=={}: return iter(edgelist)
+        if restrictions == {}:
+            return iter(edgelist)
  
          # Find the index corresponding to the given restrictions.
          restr_keys = sorted(restrictions.keys())
@@ -127,15 +140,16 @@ class FeatureIncrementalChart(IncrementalChart, FeatureChart):
          if restr_keys not in self._indexes:
              self._add_index(restr_keys)
  
-        vals = tuple(self._get_type_if_possible(restrictions[key])
-                     for key in restr_keys)
+        vals = tuple(
+            self._get_type_if_possible(restrictions[key]) for key in restr_keys
+        )
          return iter(self._indexes[restr_keys][end].get(vals, []))
  
      def _add_index(self, restr_keys):
          # Make sure it's a valid index.
          for key in restr_keys:
              if not hasattr(EdgeI, key):
-                raise ValueError('Bad restriction: %s' % key)
+                raise ValueError("Bad restriction: %s" % key)
  
          # Create the index.
          index = self._indexes[restr_keys] = tuple({} for x in self._positions())
@@ -144,50 +158,61 @@ class FeatureIncrementalChart(IncrementalChart, FeatureChart):
          for end, edgelist in enumerate(self._edgelists):
              this_index = index[end]
              for edge in edgelist:
-                vals = tuple(self._get_type_if_possible(getattr(edge, key)())
-                             for key in restr_keys)
+                vals = tuple(
+                    self._get_type_if_possible(getattr(edge, key)())
+                    for key in restr_keys
+                )
                  this_index.setdefault(vals, []).append(edge)
  
      def _register_with_indexes(self, edge):
          end = edge.end()
          for (restr_keys, index) in self._indexes.items():
-            vals = tuple(self._get_type_if_possible(getattr(edge, key)())
-                         for key in restr_keys)
+            vals = tuple(
+                self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
+            )
              index[end].setdefault(vals, []).append(edge)
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Incremental CFG Rules
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class CompleteFundamentalRule(SingleEdgeFundamentalRule):
      def _apply_incomplete(self, chart, grammar, left_edge):
          end = left_edge.end()
          # When the chart is incremental, we only have to look for
          # empty complete edges here.
-        for right_edge in chart.select(start=end, end=end,
-                                       is_complete=True,
-                                       lhs=left_edge.nextsym()):
+        for right_edge in chart.select(
+            start=end, end=end, is_complete=True, lhs=left_edge.nextsym()
+        ):
              new_edge = left_edge.move_dot_forward(right_edge.end())
              if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
                  yield new_edge
  
+
  class CompleterRule(CompleteFundamentalRule):
      _fundamental_rule = CompleteFundamentalRule()
+
      def apply(self, chart, grammar, edge):
          if not isinstance(edge, LeafEdge):
              for new_edge in self._fundamental_rule.apply(chart, grammar, edge):
                  yield new_edge
  
+
  class ScannerRule(CompleteFundamentalRule):
      _fundamental_rule = CompleteFundamentalRule()
+
      def apply(self, chart, grammar, edge):
          if isinstance(edge, LeafEdge):
              for new_edge in self._fundamental_rule.apply(chart, grammar, edge):
                  yield new_edge
  
+
  class PredictorRule(CachedTopDownPredictRule):
      pass
  
+
  class FilteredCompleteFundamentalRule(FilteredSingleEdgeFundamentalRule):
      def apply(self, chart, grammar, edge):
          # Since the Filtered rule only works for grammars without empty productions,
@@ -196,9 +221,11 @@ class FilteredCompleteFundamentalRule(FilteredSingleEdgeFundamentalRule):
              for new_edge in self._apply_complete(chart, grammar, edge):
                  yield new_edge
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Incremental FCFG Rules
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class FeatureCompleteFundamentalRule(FeatureSingleEdgeFundamentalRule):
      def _apply_incomplete(self, chart, grammar, left_edge):
@@ -206,46 +233,61 @@ class FeatureCompleteFundamentalRule(FeatureSingleEdgeFundamentalRule):
          end = left_edge.end()
          # When the chart is incremental, we only have to look for
          # empty complete edges here.
-        for right_edge in chart.select(start=end, end=end,
-                                       is_complete=True,
-                                       lhs=left_edge.nextsym()):
+        for right_edge in chart.select(
+            start=end, end=end, is_complete=True, lhs=left_edge.nextsym()
+        ):
              for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
                  yield new_edge
  
+
  class FeatureCompleterRule(CompleterRule):
      _fundamental_rule = FeatureCompleteFundamentalRule()
  
+
  class FeatureScannerRule(ScannerRule):
      _fundamental_rule = FeatureCompleteFundamentalRule()
  
+
  class FeaturePredictorRule(FeatureTopDownPredictRule):
      pass
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Incremental CFG Chart Parsers
-#////////////////////////////////////////////////////////////
-
-EARLEY_STRATEGY = [LeafInitRule(),
-                   TopDownInitRule(),
-                   CompleterRule(),
-                   ScannerRule(),
-                   PredictorRule()]
-TD_INCREMENTAL_STRATEGY = [LeafInitRule(),
-                           TopDownInitRule(),
-                           CachedTopDownPredictRule(),
-                           CompleteFundamentalRule()]
-BU_INCREMENTAL_STRATEGY = [LeafInitRule(),
-                           EmptyPredictRule(),
-                           BottomUpPredictRule(),
-                           CompleteFundamentalRule()]
-BU_LC_INCREMENTAL_STRATEGY = [LeafInitRule(),
-                              EmptyPredictRule(),
-                              BottomUpPredictCombineRule(),
-                              CompleteFundamentalRule()]
-
-LC_INCREMENTAL_STRATEGY = [LeafInitRule(),
-                           FilteredBottomUpPredictCombineRule(),
-                           FilteredCompleteFundamentalRule()]
+# ////////////////////////////////////////////////////////////
+
+EARLEY_STRATEGY = [
+    LeafInitRule(),
+    TopDownInitRule(),
+    CompleterRule(),
+    ScannerRule(),
+    PredictorRule(),
+]
+TD_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    TopDownInitRule(),
+    CachedTopDownPredictRule(),
+    CompleteFundamentalRule(),
+]
+BU_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    EmptyPredictRule(),
+    BottomUpPredictRule(),
+    CompleteFundamentalRule(),
+]
+BU_LC_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    EmptyPredictRule(),
+    BottomUpPredictCombineRule(),
+    CompleteFundamentalRule(),
+]
+
+LC_INCREMENTAL_STRATEGY = [
+    LeafInitRule(),
+    FilteredBottomUpPredictCombineRule(),
+    FilteredCompleteFundamentalRule(),
+]
+
  
  class IncrementalChartParser(ChartParser):
      """
@@ -262,9 +304,15 @@ class IncrementalChartParser(ChartParser):
      |       Apply CompleterRule to edge
      | Return any complete parses in the chart
      """
-    def __init__(self, grammar, strategy=BU_LC_INCREMENTAL_STRATEGY,
-                 trace=0, trace_chart_width=50,
-                 chart_class=IncrementalChart):
+
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_INCREMENTAL_STRATEGY,
+        trace=0,
+        trace_chart_width=50,
+        chart_class=IncrementalChart,
+    ):
          """
          Create a new Earley chart parser, that uses ``grammar`` to
          parse texts.
@@ -296,11 +344,13 @@ class IncrementalChartParser(ChartParser):
              elif rule.NUM_EDGES == 1:
                  self._inference_rules.append(rule)
              else:
-                raise ValueError("Incremental inference rules must have "
-                                 "NUM_EDGES == 0 or 1")
+                raise ValueError(
+                    "Incremental inference rules must have " "NUM_EDGES == 0 or 1"
+                )
  
      def chart_parse(self, tokens, trace=None):
-        if trace is None: trace = self._trace
+        if trace is None:
+            trace = self._trace
          trace_new_edges = self._trace_new_edges
  
          tokens = list(tokens)
@@ -310,15 +360,17 @@ class IncrementalChartParser(ChartParser):
  
          # Width, for printing trace edges.
          trace_edge_width = self._trace_chart_width // (chart.num_leaves() + 1)
-        if trace: print(chart.pretty_format_leaves(trace_edge_width))
+        if trace:
+            print(chart.pretty_format_leaves(trace_edge_width))
  
          for axiom in self._axioms:
              new_edges = list(axiom.apply(chart, grammar))
              trace_new_edges(chart, axiom, new_edges, trace, trace_edge_width)
  
          inference_rules = self._inference_rules
-        for end in range(chart.num_leaves()+1):
-            if trace > 1: print("\n* Processing queue:", end, "\n")
+        for end in range(chart.num_leaves() + 1):
+            if trace > 1:
+                print("\n* Processing queue:", end, "\n")
              agenda = list(chart.select(end=end))
              while agenda:
                  edge = agenda.pop()
@@ -326,93 +378,141 @@ class IncrementalChartParser(ChartParser):
                      new_edges = list(rule.apply(chart, grammar, edge))
                      trace_new_edges(chart, rule, new_edges, trace, trace_edge_width)
                      for new_edge in new_edges:
-                        if new_edge.end()==end:
+                        if new_edge.end() == end:
                              agenda.append(new_edge)
  
          return chart
  
+
  class EarleyChartParser(IncrementalChartParser):
      def __init__(self, grammar, **parser_args):
          IncrementalChartParser.__init__(self, grammar, EARLEY_STRATEGY, **parser_args)
-    pass
+
  
  class IncrementalTopDownChartParser(IncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        IncrementalChartParser.__init__(self, grammar, TD_INCREMENTAL_STRATEGY, **parser_args)
+        IncrementalChartParser.__init__(
+            self, grammar, TD_INCREMENTAL_STRATEGY, **parser_args
+        )
+
  
  class IncrementalBottomUpChartParser(IncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        IncrementalChartParser.__init__(self, grammar, BU_INCREMENTAL_STRATEGY, **parser_args)
+        IncrementalChartParser.__init__(
+            self, grammar, BU_INCREMENTAL_STRATEGY, **parser_args
+        )
+
  
  class IncrementalBottomUpLeftCornerChartParser(IncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        IncrementalChartParser.__init__(self, grammar, BU_LC_INCREMENTAL_STRATEGY, **parser_args)
+        IncrementalChartParser.__init__(
+            self, grammar, BU_LC_INCREMENTAL_STRATEGY, **parser_args
+        )
+
  
  class IncrementalLeftCornerChartParser(IncrementalChartParser):
      def __init__(self, grammar, **parser_args):
          if not grammar.is_nonempty():
-            raise ValueError("IncrementalLeftCornerParser only works for grammars "
-                             "without empty productions.")
-        IncrementalChartParser.__init__(self, grammar, LC_INCREMENTAL_STRATEGY, **parser_args)
+            raise ValueError(
+                "IncrementalLeftCornerParser only works for grammars "
+                "without empty productions."
+            )
+        IncrementalChartParser.__init__(
+            self, grammar, LC_INCREMENTAL_STRATEGY, **parser_args
+        )
  
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
  # Incremental FCFG Chart Parsers
-#////////////////////////////////////////////////////////////
-
-EARLEY_FEATURE_STRATEGY = [LeafInitRule(),
-                           FeatureTopDownInitRule(),
-                           FeatureCompleterRule(),
-                           FeatureScannerRule(),
-                           FeaturePredictorRule()]
-TD_INCREMENTAL_FEATURE_STRATEGY = [LeafInitRule(),
-                                   FeatureTopDownInitRule(),
-                                   FeatureTopDownPredictRule(),
-                                   FeatureCompleteFundamentalRule()]
-BU_INCREMENTAL_FEATURE_STRATEGY = [LeafInitRule(),
-                                   FeatureEmptyPredictRule(),
-                                   FeatureBottomUpPredictRule(),
-                                   FeatureCompleteFundamentalRule()]
-BU_LC_INCREMENTAL_FEATURE_STRATEGY = [LeafInitRule(),
-                                      FeatureEmptyPredictRule(),
-                                      FeatureBottomUpPredictCombineRule(),
-                                      FeatureCompleteFundamentalRule()]
+# ////////////////////////////////////////////////////////////
+
+EARLEY_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureTopDownInitRule(),
+    FeatureCompleterRule(),
+    FeatureScannerRule(),
+    FeaturePredictorRule(),
+]
+TD_INCREMENTAL_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureTopDownInitRule(),
+    FeatureTopDownPredictRule(),
+    FeatureCompleteFundamentalRule(),
+]
+BU_INCREMENTAL_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictRule(),
+    FeatureCompleteFundamentalRule(),
+]
+BU_LC_INCREMENTAL_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictCombineRule(),
+    FeatureCompleteFundamentalRule(),
+]
+
  
  class FeatureIncrementalChartParser(IncrementalChartParser, FeatureChartParser):
-    def __init__(self, grammar,
-                 strategy=BU_LC_INCREMENTAL_FEATURE_STRATEGY,
-                 trace_chart_width=20,
-                 chart_class=FeatureIncrementalChart,
-                 **parser_args):
-        IncrementalChartParser.__init__(self, grammar,
-                                        strategy=strategy,
-                                        trace_chart_width=trace_chart_width,
-                                        chart_class=chart_class,
-                                        **parser_args)
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_INCREMENTAL_FEATURE_STRATEGY,
+        trace_chart_width=20,
+        chart_class=FeatureIncrementalChart,
+        **parser_args
+    ):
+        IncrementalChartParser.__init__(
+            self,
+            grammar,
+            strategy=strategy,
+            trace_chart_width=trace_chart_width,
+            chart_class=chart_class,
+            **parser_args
+        )
+
  
  class FeatureEarleyChartParser(FeatureIncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        FeatureIncrementalChartParser.__init__(self, grammar, EARLEY_FEATURE_STRATEGY, **parser_args)
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, EARLEY_FEATURE_STRATEGY, **parser_args
+        )
+
  
  class FeatureIncrementalTopDownChartParser(FeatureIncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        FeatureIncrementalChartParser.__init__(self, grammar, TD_INCREMENTAL_FEATURE_STRATEGY, **parser_args)
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, TD_INCREMENTAL_FEATURE_STRATEGY, **parser_args
+        )
+
  
  class FeatureIncrementalBottomUpChartParser(FeatureIncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        FeatureIncrementalChartParser.__init__(self, grammar, BU_INCREMENTAL_FEATURE_STRATEGY, **parser_args)
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, BU_INCREMENTAL_FEATURE_STRATEGY, **parser_args
+        )
+
  
  class FeatureIncrementalBottomUpLeftCornerChartParser(FeatureIncrementalChartParser):
      def __init__(self, grammar, **parser_args):
-        FeatureIncrementalChartParser.__init__(self, grammar, BU_LC_INCREMENTAL_FEATURE_STRATEGY, **parser_args)
+        FeatureIncrementalChartParser.__init__(
+            self, grammar, BU_LC_INCREMENTAL_FEATURE_STRATEGY, **parser_args
+        )
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Demonstration
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  
-def demo(print_times=True, print_grammar=False,
-         print_trees=True, trace=2,
-         sent='I saw John with a dog with my cookie', numparses=5):
+
+def demo(
+    print_times=True,
+    print_grammar=False,
+    print_trees=True,
+    trace=2,
+    sent="I saw John with a dog with my cookie",
+    numparses=5,
+):
      """
      A demonstration of the Earley parsers.
      """
@@ -434,19 +534,22 @@ def demo(print_times=True, print_grammar=False,
  
      # Do the parsing.
      earley = EarleyChartParser(grammar, trace=trace)
-    t = time.clock()
+    t = perf_counter()
      chart = earley.chart_parse(tokens)
      parses = list(chart.parses(grammar.start()))
-    t = time.clock()-t
+    t = perf_counter() - t
  
      # Print results.
      if numparses:
-        assert len(parses)==numparses, 'Not all parses found'
+        assert len(parses) == numparses, "Not all parses found"
      if print_trees:
-        for tree in parses: print(tree)
+        for tree in parses:
+            print(tree)
      else:
          print("Nr trees:", len(parses))
      if print_times:
          print("Time:", t)
  
-if __name__ == '__main__': demo()
+
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/parse/earleychart.pyc b/nlp_resource_data/nltk/parse/earleychart.pyc

deleted file mode 100755 (executable)

index fc81b0e..0000000

Binary files a/nlp_resource_data/nltk/parse/earleychart.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/evaluate.py b/nlp_resource_data/nltk/parse/evaluate.py

old mode 100755 (executable)

new mode 100644 (file)

index 0d101bf..672ad6d
--- a/nlp_resource_data/nltk/parse/evaluate.py
+++ b/nlp_resource_data/nltk/parse/evaluate.py
@@ -2,12 +2,10 @@
  #
  # Author: Long Duong <longdt219@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import division
-
  import unicodedata
  
  
@@ -63,8 +61,10 @@ class DependencyEvaluator(object):
      >>> de = DependencyEvaluator([parsed_sent],[gold_sent])
      >>> las, uas = de.eval()
      >>> las
+    0.6...
+    >>> uas
      0.8...
-    >>> abs(uas - 0.6) < 0.00001
+    >>> abs(uas - 0.8) < 0.00001
      True
      """
  
@@ -91,8 +91,10 @@ class DependencyEvaluator(object):
  
          :return : tuple(float,float)
          """
-        if (len(self._parsed_sents) != len(self._gold_sents)):
-            raise ValueError(" Number of parsed sentence is different with number of gold sentence.")
+        if len(self._parsed_sents) != len(self._gold_sents):
+            raise ValueError(
+                " Number of parsed sentence is different with number of gold sentence."
+            )
  
          corr = 0
          corrL = 0
@@ -102,7 +104,7 @@ class DependencyEvaluator(object):
              parsed_sent_nodes = self._parsed_sents[i].nodes
              gold_sent_nodes = self._gold_sents[i].nodes
  
-            if (len(parsed_sent_nodes) != len(gold_sent_nodes)):
+            if len(parsed_sent_nodes) != len(gold_sent_nodes):
                  raise ValueError("Sentences must have equal length.")
  
              for parsed_node_address, parsed_node in parsed_sent_nodes.items():
@@ -124,6 +126,4 @@ class DependencyEvaluator(object):
                      if parsed_node["rel"] == gold_node["rel"]:
                          corrL += 1
  
-        return corr / total, corrL / total
-
-
+        return corrL / total, corr / total
diff --git a/nlp_resource_data/nltk/parse/evaluate.pyc b/nlp_resource_data/nltk/parse/evaluate.pyc

deleted file mode 100755 (executable)

index ee55343..0000000

Binary files a/nlp_resource_data/nltk/parse/evaluate.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/featurechart.py b/nlp_resource_data/nltk/parse/featurechart.py

old mode 100755 (executable)

new mode 100644 (file)

index eafd0bf..a06c50f
--- a/nlp_resource_data/nltk/parse/featurechart.py
+++ b/nlp_resource_data/nltk/parse/featurechart.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Chart Parser for Feature-Based Grammars
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Rob Speer <rspeer@mit.edu>
  #         Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
  # URL: <http://nltk.org/>
@@ -11,30 +11,39 @@
  Extension of chart parsing implementation to handle grammars with
  feature structures as nodes.
  """
-from __future__ import print_function, unicode_literals
+from time import perf_counter
  
-from six.moves import range
-
-from nltk.compat import python_2_unicode_compatible
  from nltk.featstruct import FeatStruct, unify, TYPE, find_variables
  from nltk.sem import logic
  from nltk.tree import Tree
-from nltk.grammar import (Nonterminal, Production, CFG,
-                          FeatStructNonterminal, is_nonterminal,
-                          is_terminal)
-from nltk.parse.chart import (TreeEdge, Chart, ChartParser, EdgeI,
-                              FundamentalRule, LeafInitRule,
-                              EmptyPredictRule, BottomUpPredictRule,
-                              SingleEdgeFundamentalRule,
-                              BottomUpPredictCombineRule,
-                              CachedTopDownPredictRule,
-                              TopDownInitRule)
-
-#////////////////////////////////////////////////////////////
+from nltk.grammar import (
+    Nonterminal,
+    Production,
+    CFG,
+    FeatStructNonterminal,
+    is_nonterminal,
+    is_terminal,
+)
+from nltk.parse.chart import (
+    TreeEdge,
+    Chart,
+    ChartParser,
+    EdgeI,
+    FundamentalRule,
+    LeafInitRule,
+    EmptyPredictRule,
+    BottomUpPredictRule,
+    SingleEdgeFundamentalRule,
+    BottomUpPredictCombineRule,
+    CachedTopDownPredictRule,
+    TopDownInitRule,
+)
+
+# ////////////////////////////////////////////////////////////
  # Tree Edge
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
-@python_2_unicode_compatible
  class FeatureTreeEdge(TreeEdge):
      """
      A specialized tree edge that allows shared variable bindings
@@ -47,6 +56,7 @@ class FeatureTreeEdge(TreeEdge):
      every nonterminal in the edge whose symbol implements the
      interface ``SubstituteBindingsI``.
      """
+
      def __init__(self, span, lhs, rhs, dot=0, bindings=None):
          """
          Construct a new edge.  If the edge is incomplete (i.e., if
@@ -56,7 +66,8 @@ class FeatureTreeEdge(TreeEdge):
          clear the bindings.  See ``TreeEdge`` for a description of
          the other arguments.
          """
-        if bindings is None: bindings = {}
+        if bindings is None:
+            bindings = {}
  
          # If the edge is complete, then substitute in the bindings,
          # and then throw them away.  (If we didn't throw them away, we
@@ -82,8 +93,9 @@ class FeatureTreeEdge(TreeEdge):
              ``(index,index)``; and its dot position will be ``0``.
          :rtype: TreeEdge
          """
-        return FeatureTreeEdge(span=(index, index), lhs=production.lhs(),
-                               rhs=production.rhs(), dot=0)
+        return FeatureTreeEdge(
+            span=(index, index), lhs=production.lhs(), rhs=production.rhs(), dot=0
+        )
  
      def move_dot_forward(self, new_end, bindings=None):
          """
@@ -96,12 +108,17 @@ class FeatureTreeEdge(TreeEdge):
          :param bindings: Bindings for the new edge.
          :type bindings: dict
          """
-        return FeatureTreeEdge(span=(self._span[0], new_end),
-                               lhs=self._lhs, rhs=self._rhs,
-                               dot=self._dot+1, bindings=bindings)
+        return FeatureTreeEdge(
+            span=(self._span[0], new_end),
+            lhs=self._lhs,
+            rhs=self._rhs,
+            dot=self._dot + 1,
+            bindings=bindings,
+        )
  
      def _bind(self, nt, bindings):
-        if not isinstance(nt, FeatStructNonterminal): return nt
+        if not isinstance(nt, FeatStructNonterminal):
+            return nt
          return nt.substitute_bindings(bindings)
  
      def next_with_bindings(self):
@@ -118,26 +135,31 @@ class FeatureTreeEdge(TreeEdge):
          :return: The set of variables used by this edge.
          :rtype: set(Variable)
          """
-        return find_variables([self._lhs] + list(self._rhs) +
-                              list(self._bindings.keys()) +
-                              list(self._bindings.values()),
-                              fs_class=FeatStruct)
+        return find_variables(
+            [self._lhs]
+            + list(self._rhs)
+            + list(self._bindings.keys())
+            + list(self._bindings.values()),
+            fs_class=FeatStruct,
+        )
  
      def __str__(self):
          if self.is_complete():
-            return TreeEdge.__unicode__(self)
+            return super().__str__()
          else:
-            bindings = '{%s}' % ', '.join('%s: %r' % item for item in
-                                           sorted(self._bindings.items()))
-            return '%s %s' % (TreeEdge.__unicode__(self), bindings)
+            bindings = "{%s}" % ", ".join(
+                "%s: %r" % item for item in sorted(self._bindings.items())
+            )
+            return "%s %s" % (super().__str__(), bindings)
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # A specialized Chart for feature grammars
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  
  # TODO: subsumes check when adding new edges
  
+
  class FeatureChart(Chart):
      """
      A Chart for feature grammars.
@@ -151,7 +173,8 @@ class FeatureChart(Chart):
          ``restrictions`` on the edges.
          """
          # If there are no restrictions, then return all edges.
-        if restrictions=={}: return iter(self._edges)
+        if restrictions == {}:
+            return iter(self._edges)
  
          # Find the index corresponding to the given restrictions.
          restr_keys = sorted(restrictions.keys())
@@ -161,8 +184,9 @@ class FeatureChart(Chart):
          if restr_keys not in self._indexes:
              self._add_index(restr_keys)
  
-        vals = tuple(self._get_type_if_possible(restrictions[key])
-                     for key in restr_keys)
+        vals = tuple(
+            self._get_type_if_possible(restrictions[key]) for key in restr_keys
+        )
          return iter(self._indexes[restr_keys].get(vals, []))
  
      def _add_index(self, restr_keys):
@@ -173,15 +197,16 @@ class FeatureChart(Chart):
          # Make sure it's a valid index.
          for key in restr_keys:
              if not hasattr(EdgeI, key):
-                raise ValueError('Bad restriction: %s' % key)
+                raise ValueError("Bad restriction: %s" % key)
  
          # Create the index.
          index = self._indexes[restr_keys] = {}
  
          # Add all existing edges to the index.
          for edge in self._edges:
-            vals = tuple(self._get_type_if_possible(getattr(edge, key)())
-                         for key in restr_keys)
+            vals = tuple(
+                self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
+            )
              index.setdefault(vals, []).append(edge)
  
      def _register_with_indexes(self, edge):
@@ -190,8 +215,9 @@ class FeatureChart(Chart):
          edge with all existing indexes.
          """
          for (restr_keys, index) in self._indexes.items():
-            vals = tuple(self._get_type_if_possible(getattr(edge, key)())
-                         for key in restr_keys)
+            vals = tuple(
+                self._get_type_if_possible(getattr(edge, key)()) for key in restr_keys
+            )
              index.setdefault(vals, []).append(edge)
  
      def _get_type_if_possible(self, item):
@@ -206,17 +232,19 @@ class FeatureChart(Chart):
  
      def parses(self, start, tree_class=Tree):
          for edge in self.select(start=0, end=self._num_leaves):
-            if ((isinstance(edge, FeatureTreeEdge)) and
-                (edge.lhs()[TYPE] == start[TYPE]) and
-                (unify(edge.lhs(), start, rename_vars=True))
-                ):
+            if (
+                (isinstance(edge, FeatureTreeEdge))
+                and (edge.lhs()[TYPE] == start[TYPE])
+                and (unify(edge.lhs(), start, rename_vars=True))
+            ):
                  for tree in self.trees(edge, complete=True, tree_class=tree_class):
                      yield tree
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Fundamental Rule
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class FeatureFundamentalRule(FundamentalRule):
      """
@@ -239,18 +267,23 @@ class FeatureFundamentalRule(FundamentalRule):
  
      assuming that B1 and B2 can be unified to generate B3.
      """
+
      def apply(self, chart, grammar, left_edge, right_edge):
          # Make sure the rule is applicable.
-        if not (left_edge.end() == right_edge.start() and
-                left_edge.is_incomplete() and
-                right_edge.is_complete() and
-                isinstance(left_edge, FeatureTreeEdge)):
+        if not (
+            left_edge.end() == right_edge.start()
+            and left_edge.is_incomplete()
+            and right_edge.is_complete()
+            and isinstance(left_edge, FeatureTreeEdge)
+        ):
              return
          found = right_edge.lhs()
          nextsym = left_edge.nextsym()
          if isinstance(right_edge, FeatureTreeEdge):
-            if not is_nonterminal(nextsym): return
-            if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]: return
+            if not is_nonterminal(nextsym):
+                return
+            if left_edge.nextsym()[TYPE] != right_edge.lhs()[TYPE]:
+                return
              # Create a copy of the bindings.
              bindings = left_edge.bindings()
              # We rename vars here, because we don't want variables
@@ -259,9 +292,11 @@ class FeatureFundamentalRule(FundamentalRule):
              # Unify B1 (left_edge.nextsym) with B2 (right_edge.lhs) to
              # generate B3 (result).
              result = unify(nextsym, found, bindings, rename_vars=False)
-            if result is None: return
+            if result is None:
+                return
          else:
-            if nextsym != found: return
+            if nextsym != found:
+                return
              # Create a copy of the bindings.
              bindings = left_edge.bindings()
  
@@ -272,6 +307,7 @@ class FeatureFundamentalRule(FundamentalRule):
          if chart.insert_with_backpointer(new_edge, left_edge, right_edge):
              yield new_edge
  
+
  class FeatureSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
      """
      A specialized version of the completer / single edge fundamental rule
@@ -279,28 +315,30 @@ class FeatureSingleEdgeFundamentalRule(SingleEdgeFundamentalRule):
      Rather than simply comparing the nonterminals for equality, they are
      unified.
      """
+
      _fundamental_rule = FeatureFundamentalRule()
  
      def _apply_complete(self, chart, grammar, right_edge):
          fr = self._fundamental_rule
-        for left_edge in chart.select(end=right_edge.start(),
-                                      is_complete=False,
-                                      nextsym=right_edge.lhs()):
+        for left_edge in chart.select(
+            end=right_edge.start(), is_complete=False, nextsym=right_edge.lhs()
+        ):
              for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
                  yield new_edge
  
      def _apply_incomplete(self, chart, grammar, left_edge):
          fr = self._fundamental_rule
-        for right_edge in chart.select(start=left_edge.end(),
-                                       is_complete=True,
-                                       lhs=left_edge.nextsym()):
+        for right_edge in chart.select(
+            start=left_edge.end(), is_complete=True, lhs=left_edge.nextsym()
+        ):
              for new_edge in fr.apply(chart, grammar, left_edge, right_edge):
                  yield new_edge
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Top-Down Prediction
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class FeatureTopDownInitRule(TopDownInitRule):
      def apply(self, chart, grammar):
@@ -309,6 +347,7 @@ class FeatureTopDownInitRule(TopDownInitRule):
              if chart.insert(new_edge, ()):
                  yield new_edge
  
+
  class FeatureTopDownPredictRule(CachedTopDownPredictRule):
      """
      A specialized version of the (cached) top down predict rule that operates
@@ -327,10 +366,13 @@ class FeatureTopDownPredictRule(CachedTopDownPredictRule):
      for each grammar production ``B2 -> gamma``, assuming that B1
      and B2 can be unified.
      """
+
      def apply(self, chart, grammar, edge):
-        if edge.is_complete(): return
+        if edge.is_complete():
+            return
          nextsym, index = edge.nextsym(), edge.end()
-        if not is_nonterminal(nextsym): return
+        if not is_nonterminal(nextsym):
+            return
  
          # If we've already applied this rule to an edge with the same
          # next & end, and the chart & grammar have not changed, then
@@ -346,8 +388,10 @@ class FeatureTopDownPredictRule(CachedTopDownPredictRule):
              if prod.rhs():
                  first = prod.rhs()[0]
                  if is_terminal(first):
-                    if index >= chart.num_leaves(): continue
-                    if first != chart.leaf(index): continue
+                    if index >= chart.num_leaves():
+                        continue
+                    if first != chart.leaf(index):
+                        continue
  
              # We rename vars here, because we don't want variables
              # from the two different productions to match.
@@ -360,46 +404,56 @@ class FeatureTopDownPredictRule(CachedTopDownPredictRule):
          self._done[nextsym_with_bindings, index] = (chart, grammar)
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Bottom-Up Prediction
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class FeatureBottomUpPredictRule(BottomUpPredictRule):
      def apply(self, chart, grammar, edge):
-        if edge.is_incomplete(): return
+        if edge.is_incomplete():
+            return
          for prod in grammar.productions(rhs=edge.lhs()):
              if isinstance(edge, FeatureTreeEdge):
                  _next = prod.rhs()[0]
-                if not is_nonterminal(_next): continue
+                if not is_nonterminal(_next):
+                    continue
  
              new_edge = FeatureTreeEdge.from_production(prod, edge.start())
              if chart.insert(new_edge, ()):
                  yield new_edge
  
+
  class FeatureBottomUpPredictCombineRule(BottomUpPredictCombineRule):
      def apply(self, chart, grammar, edge):
-        if edge.is_incomplete(): return
+        if edge.is_incomplete():
+            return
          found = edge.lhs()
          for prod in grammar.productions(rhs=found):
              bindings = {}
              if isinstance(edge, FeatureTreeEdge):
                  _next = prod.rhs()[0]
-                if not is_nonterminal(_next): continue
+                if not is_nonterminal(_next):
+                    continue
  
                  # We rename vars here, because we don't want variables
                  # from the two different productions to match.
-                used_vars = find_variables((prod.lhs(),) + prod.rhs(),
-                                           fs_class=FeatStruct)
+                used_vars = find_variables(
+                    (prod.lhs(),) + prod.rhs(), fs_class=FeatStruct
+                )
                  found = found.rename_variables(used_vars=used_vars)
  
                  result = unify(_next, found, bindings, rename_vars=False)
-                if result is None: continue
+                if result is None:
+                    continue
  
-            new_edge = (FeatureTreeEdge.from_production(prod, edge.start())
-                        .move_dot_forward(edge.end(), bindings))
+            new_edge = FeatureTreeEdge.from_production(
+                prod, edge.start()
+            ).move_dot_forward(edge.end(), bindings)
              if chart.insert(new_edge, (edge,)):
                  yield new_edge
  
+
  class FeatureEmptyPredictRule(EmptyPredictRule):
      def apply(self, chart, grammar):
          for prod in grammar.productions(empty=True):
@@ -409,51 +463,70 @@ class FeatureEmptyPredictRule(EmptyPredictRule):
                      yield new_edge
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Feature Chart Parser
-#////////////////////////////////////////////////////////////
-
-TD_FEATURE_STRATEGY = [LeafInitRule(),
-                       FeatureTopDownInitRule(),
-                       FeatureTopDownPredictRule(),
-                       FeatureSingleEdgeFundamentalRule()]
-BU_FEATURE_STRATEGY = [LeafInitRule(),
-                       FeatureEmptyPredictRule(),
-                       FeatureBottomUpPredictRule(),
-                       FeatureSingleEdgeFundamentalRule()]
-BU_LC_FEATURE_STRATEGY = [LeafInitRule(),
-                          FeatureEmptyPredictRule(),
-                          FeatureBottomUpPredictCombineRule(),
-                          FeatureSingleEdgeFundamentalRule()]
+# ////////////////////////////////////////////////////////////
+
+TD_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureTopDownInitRule(),
+    FeatureTopDownPredictRule(),
+    FeatureSingleEdgeFundamentalRule(),
+]
+BU_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictRule(),
+    FeatureSingleEdgeFundamentalRule(),
+]
+BU_LC_FEATURE_STRATEGY = [
+    LeafInitRule(),
+    FeatureEmptyPredictRule(),
+    FeatureBottomUpPredictCombineRule(),
+    FeatureSingleEdgeFundamentalRule(),
+]
+
  
  class FeatureChartParser(ChartParser):
-    def __init__(self, grammar,
-                 strategy=BU_LC_FEATURE_STRATEGY,
-                 trace_chart_width=20,
-                 chart_class=FeatureChart,
-                 **parser_args):
-        ChartParser.__init__(self, grammar,
-                             strategy=strategy,
-                             trace_chart_width=trace_chart_width,
-                             chart_class=chart_class,
-                             **parser_args)
+    def __init__(
+        self,
+        grammar,
+        strategy=BU_LC_FEATURE_STRATEGY,
+        trace_chart_width=20,
+        chart_class=FeatureChart,
+        **parser_args
+    ):
+        ChartParser.__init__(
+            self,
+            grammar,
+            strategy=strategy,
+            trace_chart_width=trace_chart_width,
+            chart_class=chart_class,
+            **parser_args
+        )
+
  
  class FeatureTopDownChartParser(FeatureChartParser):
      def __init__(self, grammar, **parser_args):
          FeatureChartParser.__init__(self, grammar, TD_FEATURE_STRATEGY, **parser_args)
  
+
  class FeatureBottomUpChartParser(FeatureChartParser):
      def __init__(self, grammar, **parser_args):
          FeatureChartParser.__init__(self, grammar, BU_FEATURE_STRATEGY, **parser_args)
  
+
  class FeatureBottomUpLeftCornerChartParser(FeatureChartParser):
      def __init__(self, grammar, **parser_args):
-        FeatureChartParser.__init__(self, grammar, BU_LC_FEATURE_STRATEGY, **parser_args)
+        FeatureChartParser.__init__(
+            self, grammar, BU_LC_FEATURE_STRATEGY, **parser_args
+        )
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Instantiate Variable Chart
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  class InstantiateVarsChart(FeatureChart):
      """
@@ -463,6 +536,7 @@ class InstantiateVarsChart(FeatureChart):
      variables in the edge's ``lhs`` whose names start with '@' will be
      replaced by unique new ``Variable``s.
      """
+
      def __init__(self, tokens):
          FeatureChart.__init__(self, tokens)
  
@@ -471,7 +545,8 @@ class InstantiateVarsChart(FeatureChart):
          FeatureChart.initialize(self)
  
      def insert(self, edge, child_pointer_list):
-        if edge in self._instantiated: return False
+        if edge in self._instantiated:
+            return False
          self.instantiate_edge(edge)
          return FeatureChart.insert(self, edge, child_pointer_list)
  
@@ -487,32 +562,41 @@ class InstantiateVarsChart(FeatureChart):
          """
          # If the edge is a leaf, or is not complete, or is
          # already in the chart, then just return it as-is.
-        if not isinstance(edge, FeatureTreeEdge): return
-        if not edge.is_complete(): return
-        if edge in self._edge_to_cpls: return
+        if not isinstance(edge, FeatureTreeEdge):
+            return
+        if not edge.is_complete():
+            return
+        if edge in self._edge_to_cpls:
+            return
  
          # Get a list of variables that need to be instantiated.
          # If there are none, then return as-is.
          inst_vars = self.inst_vars(edge)
-        if not inst_vars: return
+        if not inst_vars:
+            return
  
          # Instantiate the edge!
          self._instantiated.add(edge)
          edge._lhs = edge.lhs().substitute_bindings(inst_vars)
  
      def inst_vars(self, edge):
-        return dict((var, logic.unique_variable())
-                    for var in edge.lhs().variables()
-                    if var.name.startswith('@'))
+        return dict(
+            (var, logic.unique_variable())
+            for var in edge.lhs().variables()
+            if var.name.startswith("@")
+        )
  
  
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
  # Demo
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+
  
  def demo_grammar():
      from nltk.grammar import FeatureGrammar
-    return FeatureGrammar.fromstring("""
+
+    return FeatureGrammar.fromstring(
+        """
  S  -> NP VP
  PP -> Prep NP
  NP -> NP PP
@@ -531,14 +615,21 @@ Verb -> "ate"
  Verb -> "saw"
  Prep -> "with"
  Prep -> "under"
-""")
-
-def demo(print_times=True, print_grammar=True,
-         print_trees=True, print_sentence=True,
-         trace=1,
-         parser=FeatureChartParser,
-         sent='I saw John with a dog with my cookie'):
+"""
+    )
+
+
+def demo(
+    print_times=True,
+    print_grammar=True,
+    print_trees=True,
+    print_sentence=True,
+    trace=1,
+    parser=FeatureChartParser,
+    sent="I saw John with a dog with my cookie",
+):
      import sys, time
+
      print()
      grammar = demo_grammar()
      if print_grammar:
@@ -548,32 +639,38 @@ def demo(print_times=True, print_grammar=True,
      if print_sentence:
          print("Sentence:", sent)
      tokens = sent.split()
-    t = time.clock()
+    t = perf_counter()
      cp = parser(grammar, trace=trace)
      chart = cp.chart_parse(tokens)
      trees = list(chart.parses(grammar.start()))
      if print_times:
-        print("Time: %s" % (time.clock() - t))
+        print("Time: %s" % (perf_counter() - t))
      if print_trees:
-        for tree in trees: print(tree)
+        for tree in trees:
+            print(tree)
      else:
          print("Nr trees:", len(trees))
  
+
  def run_profile():
      import profile
-    profile.run('for i in range(1): demo()', '/tmp/profile.out')
+
+    profile.run("for i in range(1): demo()", "/tmp/profile.out")
      import pstats
-    p = pstats.Stats('/tmp/profile.out')
-    p.strip_dirs().sort_stats('time', 'cum').print_stats(60)
-    p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
  
-if __name__ == '__main__':
+    p = pstats.Stats("/tmp/profile.out")
+    p.strip_dirs().sort_stats("time", "cum").print_stats(60)
+    p.strip_dirs().sort_stats("cum", "time").print_stats(60)
+
+
+if __name__ == "__main__":
      from nltk.data import load
+
      demo()
      print()
-    grammar = load('grammars/book_grammars/feat0.fcfg')
+    grammar = load("grammars/book_grammars/feat0.fcfg")
      cp = FeatureChartParser(grammar, trace=2)
-    sent = 'Kim likes children'
+    sent = "Kim likes children"
      tokens = sent.split()
      trees = cp.parse(tokens)
      for tree in trees:
diff --git a/nlp_resource_data/nltk/parse/featurechart.pyc b/nlp_resource_data/nltk/parse/featurechart.pyc

deleted file mode 100755 (executable)

index 7addf46..0000000

Binary files a/nlp_resource_data/nltk/parse/featurechart.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/generate.py b/nlp_resource_data/nltk/parse/generate.py

old mode 100755 (executable)

new mode 100644 (file)

index 8326f5d..4549b8d
--- a/nlp_resource_data/nltk/parse/generate.py
+++ b/nlp_resource_data/nltk/parse/generate.py
@@ -1,13 +1,12 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Generating from a CFG
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  #
-from __future__ import print_function
  
  import itertools
  import sys
@@ -46,7 +45,9 @@ def _generate_all(grammar, items, depth):
          except RuntimeError as _error:
              if _error.message == "maximum recursion depth exceeded":
                  # Helpful error message while still showing the recursion stack.
-                raise RuntimeError("The grammar has rule(s) that yield infinite recursion!!")
+                raise RuntimeError(
+                    "The grammar has rule(s) that yield infinite recursion!!"
+                )
              else:
                  raise
      else:
@@ -57,11 +58,12 @@ def _generate_one(grammar, item, depth):
      if depth > 0:
          if isinstance(item, Nonterminal):
              for prod in grammar.productions(lhs=item):
-                for frag in _generate_all(grammar, prod.rhs(), depth-1):
+                for frag in _generate_all(grammar, prod.rhs(), depth - 1):
                      yield frag
          else:
              yield [item]
  
+
  demo_grammar = """
    S -> NP VP
    NP -> Det N
@@ -76,12 +78,12 @@ demo_grammar = """
  def demo(N=23):
      from nltk.grammar import CFG
  
-    print('Generating the first %d sentences for demo grammar:' % (N,))
+    print("Generating the first %d sentences for demo grammar:" % (N,))
      print(demo_grammar)
      grammar = CFG.fromstring(demo_grammar)
      for n, sent in enumerate(generate(grammar, n=N), 1):
-        print('%3d. %s' % (n, ' '.join(sent)))
+        print("%3d. %s" % (n, " ".join(sent)))
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/generate.pyc b/nlp_resource_data/nltk/parse/generate.pyc

deleted file mode 100755 (executable)

index abe1f7a..0000000

Binary files a/nlp_resource_data/nltk/parse/generate.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/malt.py b/nlp_resource_data/nltk/parse/malt.py

old mode 100755 (executable)

new mode 100644 (file)

index 68bb396..523901e
--- a/nlp_resource_data/nltk/parse/malt.py
+++ b/nlp_resource_data/nltk/parse/malt.py
@@ -4,14 +4,10 @@
  # Author: Dan Garrette <dhgarrette@gmail.com>
  # Contributor: Liling Tan, Mustufain, osamamukhtar11
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import print_function
-from __future__ import unicode_literals
-
-from six import text_type
  import os
  import sys
  import tempfile
@@ -28,32 +24,39 @@ from nltk.parse.util import taggedsents_to_conll
  
  def malt_regex_tagger():
      from nltk.tag import RegexpTagger
+
      _tagger = RegexpTagger(
-    [(r'\.$','.'), (r'\,$',','), (r'\?$','?'),    # fullstop, comma, Qmark
-    (r'\($','('), (r'\)$',')'),             # round brackets
-    (r'\[$','['), (r'\]$',']'),             # square brackets
-    (r'^-?[0-9]+(.[0-9]+)?$', 'CD'),        # cardinal numbers
-    (r'(The|the|A|a|An|an)$', 'DT'),        # articles
-    (r'(He|he|She|she|It|it|I|me|Me|You|you)$', 'PRP'), # pronouns
-    (r'(His|his|Her|her|Its|its)$', 'PRP$'),    # possesive
-    (r'(my|Your|your|Yours|yours)$', 'PRP$'),   # possesive
-    (r'(on|On|in|In|at|At|since|Since)$', 'IN'),# time prepopsitions
-    (r'(for|For|ago|Ago|before|Before)$', 'IN'),# time prepopsitions
-    (r'(till|Till|until|Until)$', 'IN'),        # time prepopsitions
-    (r'(by|By|beside|Beside)$', 'IN'),          # space prepopsitions
-    (r'(under|Under|below|Below)$', 'IN'),      # space prepopsitions
-    (r'(over|Over|above|Above)$', 'IN'),        # space prepopsitions
-    (r'(across|Across|through|Through)$', 'IN'),# space prepopsitions
-    (r'(into|Into|towards|Towards)$', 'IN'),    # space prepopsitions
-    (r'(onto|Onto|from|From)$', 'IN'),          # space prepopsitions
-    (r'.*able$', 'JJ'), # adjectives
-    (r'.*ness$', 'NN'), # nouns formed from adjectives
-    (r'.*ly$', 'RB'),   # adverbs
-    (r'.*s$', 'NNS'),   # plural nouns
-    (r'.*ing$', 'VBG'), # gerunds
-    (r'.*ed$', 'VBD'),  # past tense verbs
-    (r'.*', 'NN'),      # nouns (default)
-    ])
+        [
+            (r"\.$", "."),
+            (r"\,$", ","),
+            (r"\?$", "?"),  # fullstop, comma, Qmark
+            (r"\($", "("),
+            (r"\)$", ")"),  # round brackets
+            (r"\[$", "["),
+            (r"\]$", "]"),  # square brackets
+            (r"^-?[0-9]+(.[0-9]+)?$", "CD"),  # cardinal numbers
+            (r"(The|the|A|a|An|an)$", "DT"),  # articles
+            (r"(He|he|She|she|It|it|I|me|Me|You|you)$", "PRP"),  # pronouns
+            (r"(His|his|Her|her|Its|its)$", "PRP$"),  # possesive
+            (r"(my|Your|your|Yours|yours)$", "PRP$"),  # possesive
+            (r"(on|On|in|In|at|At|since|Since)$", "IN"),  # time prepopsitions
+            (r"(for|For|ago|Ago|before|Before)$", "IN"),  # time prepopsitions
+            (r"(till|Till|until|Until)$", "IN"),  # time prepopsitions
+            (r"(by|By|beside|Beside)$", "IN"),  # space prepopsitions
+            (r"(under|Under|below|Below)$", "IN"),  # space prepopsitions
+            (r"(over|Over|above|Above)$", "IN"),  # space prepopsitions
+            (r"(across|Across|through|Through)$", "IN"),  # space prepopsitions
+            (r"(into|Into|towards|Towards)$", "IN"),  # space prepopsitions
+            (r"(onto|Onto|from|From)$", "IN"),  # space prepopsitions
+            (r".*able$", "JJ"),  # adjectives
+            (r".*ness$", "NN"),  # nouns formed from adjectives
+            (r".*ly$", "RB"),  # adverbs
+            (r".*s$", "NNS"),  # plural nouns
+            (r".*ing$", "VBG"),  # gerunds
+            (r".*ed$", "VBD"),  # past tense verbs
+            (r".*", "NN"),  # nouns (default)
+        ]
+    )
      return _tagger.tag
  
  
@@ -61,18 +64,20 @@ def find_maltparser(parser_dirname):
      """
      A module to find MaltParser .jar file and its dependencies.
      """
-    if os.path.exists(parser_dirname): # If a full path is given.
+    if os.path.exists(parser_dirname):  # If a full path is given.
          _malt_dir = parser_dirname
-    else: # Try to find path to maltparser directory in environment variables.
-        _malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',))
+    else:  # Try to find path to maltparser directory in environment variables.
+        _malt_dir = find_dir(parser_dirname, env_vars=("MALT_PARSER",))
      # Checks that that the found directory contains all the necessary .jar
-    malt_dependencies = ['','','']
+    malt_dependencies = ["", "", ""]
      _malt_jars = set(find_jars_within_path(_malt_dir))
      _jars = set(os.path.split(jar)[1] for jar in _malt_jars)
-    malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar'])
+    malt_dependencies = set(["log4j.jar", "libsvm.jar", "liblinear-1.8.jar"])
  
      assert malt_dependencies.issubset(_jars)
-    assert any(filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars))
+    assert any(
+        filter(lambda i: i.startswith("maltparser-") and i.endswith(".jar"), _jars)
+    )
      return list(_malt_jars)
  
  
@@ -80,12 +85,12 @@ def find_malt_model(model_filename):
      """
      A module to find pre-trained MaltParser model.
      """
-    if model_filename == None:
-        return 'malt_temp.mco'
-    elif os.path.exists(model_filename): # If a full path is given.
+    if model_filename is None:
+        return "malt_temp.mco"
+    elif os.path.exists(model_filename):  # If a full path is given.
          return model_filename
-    else: # Try to find path to malt model in environment variables.
-        return find_file(model_filename, env_vars=('MALT_MODEL',), verbose=False)
+    else:  # Try to find path to malt model in environment variables.
+        return find_file(model_filename, env_vars=("MALT_MODEL",), verbose=False)
  
  
  class MaltParser(ParserI):
@@ -107,7 +112,14 @@ class MaltParser(ParserI):
          >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
          (shot I (elephant an) (in (pajamas my)) .)
      """
-    def __init__(self, parser_dirname, model_filename=None, tagger=None, additional_java_args=None):
+
+    def __init__(
+        self,
+        parser_dirname,
+        model_filename=None,
+        tagger=None,
+        additional_java_args=None,
+    ):
          """
          An interface for parsing with the Malt Parser.
  
@@ -132,17 +144,18 @@ class MaltParser(ParserI):
          # Find all the necessary jar files for MaltParser.
          self.malt_jars = find_maltparser(parser_dirname)
          # Initialize additional java arguments.
-        self.additional_java_args = additional_java_args if \
-                        additional_java_args is not None else []
+        self.additional_java_args = (
+            additional_java_args if additional_java_args is not None else []
+        )
          # Initialize model.
          self.model = find_malt_model(model_filename)
-        self._trained = self.model != 'malt_temp.mco'
+        self._trained = self.model != "malt_temp.mco"
          # Set the working_dir parameters i.e. `-w` from MaltParser's option.
          self.working_dir = tempfile.gettempdir()
          # Initialize POS tagger.
          self.tagger = tagger if tagger is not None else malt_regex_tagger()
  
-    def parse_tagged_sents(self, sentences, verbose=False, top_relation_label='null'):
+    def parse_tagged_sents(self, sentences, verbose=False, top_relation_label="null"):
          """
          Use MaltParser to parse multiple POS tagged sentences. Takes multiple
          sentences where each sentence is a list of (word, tag) tuples.
@@ -156,43 +169,59 @@ class MaltParser(ParserI):
          if not self._trained:
              raise Exception("Parser has not been trained. Call train() first.")
  
-        with tempfile.NamedTemporaryFile(prefix='malt_input.conll.',
-              dir=self.working_dir, mode='w', delete=False) as input_file:
-              with tempfile.NamedTemporaryFile(prefix='malt_output.conll.',
-                     dir=self.working_dir, mode='w', delete=False) as output_file:
+        with tempfile.NamedTemporaryFile(
+            prefix="malt_input.conll.", dir=self.working_dir, mode="w", delete=False
+        ) as input_file:
+            with tempfile.NamedTemporaryFile(
+                prefix="malt_output.conll.",
+                dir=self.working_dir,
+                mode="w",
+                delete=False,
+            ) as output_file:
                  # Convert list of sentences to CONLL format.
                  for line in taggedsents_to_conll(sentences):
-                    input_file.write(text_type(line))
+                    input_file.write(str(line))
                  input_file.close()
  
                  # Generate command to run maltparser.
-                cmd =self.generate_malt_command(input_file.name,
-                                output_file.name, mode="parse")
+                cmd = self.generate_malt_command(
+                    input_file.name, output_file.name, mode="parse"
+                )
  
                  # This is a maltparser quirk, it needs to be run
                  # where the model file is. otherwise it goes into an awkward
                  # missing .jars or strange -w working_dir problem.
-                _current_path = os.getcwd() # Remembers the current path.
-                try: # Change to modelfile path
+                _current_path = os.getcwd()  # Remembers the current path.
+                try:  # Change to modelfile path
                      os.chdir(os.path.split(self.model)[0])
                  except:
                      pass
-                ret = self._execute(cmd, verbose) # Run command.
-                os.chdir(_current_path) # Change back to current path.
+                ret = self._execute(cmd, verbose)  # Run command.
+                os.chdir(_current_path)  # Change back to current path.
  
-                if ret is not 0:
-                    raise Exception("MaltParser parsing (%s) failed with exit "
-                            "code %d" % (' '.join(cmd), ret))
+                if ret != 0:
+                    raise Exception(
+                        "MaltParser parsing (%s) failed with exit "
+                        "code %d" % (" ".join(cmd), ret)
+                    )
  
                  # Must return iter(iter(Tree))
                  with open(output_file.name) as infile:
-                    for tree_str in infile.read().split('\n\n'):
-                        yield(iter([DependencyGraph(tree_str, top_relation_label=top_relation_label)]))
+                    for tree_str in infile.read().split("\n\n"):
+                        yield (
+                            iter(
+                                [
+                                    DependencyGraph(
+                                        tree_str, top_relation_label=top_relation_label
+                                    )
+                                ]
+                            )
+                        )
  
          os.remove(input_file.name)
          os.remove(output_file.name)
  
-    def parse_sents(self, sentences, verbose=False, top_relation_label='null'):
+    def parse_sents(self, sentences, verbose=False, top_relation_label="null"):
          """
          Use MaltParser to parse multiple sentences.
          Takes a list of sentences, where each sentence is a list of words.
@@ -204,7 +233,9 @@ class MaltParser(ParserI):
          :return: iter(DependencyGraph)
          """
          tagged_sentences = (self.tagger(sentence) for sentence in sentences)
-        return self.parse_tagged_sents(tagged_sentences, verbose, top_relation_label=top_relation_label)
+        return self.parse_tagged_sents(
+            tagged_sentences, verbose, top_relation_label=top_relation_label
+        )
  
      def generate_malt_command(self, inputfilename, outputfilename=None, mode=None):
          """
@@ -216,23 +247,26 @@ class MaltParser(ParserI):
          :type outputfilename: str
          """
  
-        cmd = ['java']
-        cmd+= self.additional_java_args # Adds additional java arguments
+        cmd = ["java"]
+        cmd += self.additional_java_args  # Adds additional java arguments
          # Joins classpaths with ";" if on Windows and on Linux/Mac use ":"
-        classpaths_separator = ';' if sys.platform.startswith('win') else ':'
-        cmd+= ['-cp', classpaths_separator.join(self.malt_jars)] # Adds classpaths for jars
-        cmd+= ['org.maltparser.Malt'] # Adds the main function.
+        classpaths_separator = ";" if sys.platform.startswith("win") else ":"
+        cmd += [
+            "-cp",
+            classpaths_separator.join(self.malt_jars),
+        ]  # Adds classpaths for jars
+        cmd += ["org.maltparser.Malt"]  # Adds the main function.
  
          # Adds the model file.
-        if os.path.exists(self.model): # when parsing
-            cmd+= ['-c', os.path.split(self.model)[-1]]
-        else: # when learning
-            cmd+= ['-c', self.model]
-
-        cmd+= ['-i', inputfilename]
-        if mode == 'parse':
-            cmd+= ['-o', outputfilename]
-        cmd+= ['-m', mode] # mode use to generate parses.
+        if os.path.exists(self.model):  # when parsing
+            cmd += ["-c", os.path.split(self.model)[-1]]
+        else:  # when learning
+            cmd += ["-c", self.model]
+
+        cmd += ["-i", inputfilename]
+        if mode == "parse":
+            cmd += ["-o", outputfilename]
+        cmd += ["-m", mode]  # mode use to generate parses.
          return cmd
  
      @staticmethod
@@ -250,10 +284,11 @@ class MaltParser(ParserI):
          """
  
          # Write the conll_str to malt_train.conll file in /tmp/
-        with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
-             dir=self.working_dir, mode='w', delete=False) as input_file:
-            input_str = ('\n'.join(dg.to_conll(10) for dg in depgraphs))
-            input_file.write(text_type(input_str))
+        with tempfile.NamedTemporaryFile(
+            prefix="malt_train.conll.", dir=self.working_dir, mode="w", delete=False
+        ) as input_file:
+            input_str = "\n".join(dg.to_conll(10) for dg in depgraphs)
+            input_file.write(str(input_str))
          # Trains the model with the malt_train.conll
          self.train_from_file(input_file.name, verbose=verbose)
          # Removes the malt_train.conll once training finishes.
@@ -269,25 +304,28 @@ class MaltParser(ParserI):
          # If conll_file is a ZipFilePathPointer,
          # then we need to do some extra massaging
          if isinstance(conll_file, ZipFilePathPointer):
-            with tempfile.NamedTemporaryFile(prefix='malt_train.conll.',
-            dir=self.working_dir, mode='w', delete=False) as input_file:
+            with tempfile.NamedTemporaryFile(
+                prefix="malt_train.conll.", dir=self.working_dir, mode="w", delete=False
+            ) as input_file:
                  with conll_file.open() as conll_input_file:
                      conll_str = conll_input_file.read()
-                    input_file.write(text_type(conll_str))
+                    input_file.write(str(conll_str))
                  return self.train_from_file(input_file.name, verbose=verbose)
  
          # Generate command to run maltparser.
-        cmd =self.generate_malt_command(conll_file, mode="learn")
+        cmd = self.generate_malt_command(conll_file, mode="learn")
          ret = self._execute(cmd, verbose)
          if ret != 0:
-            raise Exception("MaltParser training (%s) failed with exit "
-                    "code %d" % (' '.join(cmd), ret))
+            raise Exception(
+                "MaltParser training (%s) failed with exit "
+                "code %d" % (" ".join(cmd), ret)
+            )
          self._trained = True
  
  
  if __name__ == '__main__':
-    '''
-    A demostration function to show how NLTK users can use the malt parser API.
+    """
+    A demonstration function to show how NLTK users can use the malt parser API.
  
      >>> from nltk import pos_tag
      >>> assert 'MALT_PARSER' in os.environ, str(
@@ -322,9 +360,9 @@ if __name__ == '__main__':
      >>> # Parse a single sentence.
      >>> parsed_sent1 = mp.parse_one(sent1)
      >>> parsed_sent2 = mp.parse_one(sent2)
-    >>> print (parsed_sent1.tree())
+    >>> print(parsed_sent1.tree())
      (sees John Mary .)
-    >>> print (parsed_sent2.tree())
+    >>> print(parsed_sent2.tree())
      (walks John (dog a) .)
      >>>
      >>> # Parsing multiple sentences.
@@ -351,6 +389,7 @@ if __name__ == '__main__':
      (shot I (elephant an) (in (pajamas my)) .)
      >>> print(next(next(parsed_sents)).tree())
      (flies Time (like banana) .)
-    '''
+    """
+
      import doctest
      doctest.testmod()
diff --git a/nlp_resource_data/nltk/parse/malt.pyc b/nlp_resource_data/nltk/parse/malt.pyc

deleted file mode 100755 (executable)

index 241bbc7..0000000

Binary files a/nlp_resource_data/nltk/parse/malt.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py b/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py

old mode 100755 (executable)

new mode 100644 (file)

index 5adcd2c..2b901dd
--- a/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py
+++ b/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py
@@ -1,18 +1,15 @@
  # Natural Language Toolkit: Dependency Grammars
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Jason Narad <jason.narad@gmail.com>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  #
-from __future__ import print_function
  
  import math
  import logging
  
-from six.moves import range
-
  from nltk.parse.dependencygraph import DependencyGraph
  
  logger = logging.getLogger(__name__)
@@ -36,7 +33,7 @@ class DependencyScorerI(object):
  
      def __init__(self):
          if self.__class__ == DependencyScorerI:
-            raise TypeError('DependencyScorerI is an abstract interface')
+            raise TypeError("DependencyScorerI is an abstract interface")
  
      def train(self, graphs):
          """
@@ -73,6 +70,7 @@ class DependencyScorerI(object):
          """
          raise NotImplementedError()
  
+
  #################################################################
  # NaiveBayesDependencyScorer
  #################################################################
@@ -117,17 +115,17 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
          for graph in graphs:
              for head_node in graph.nodes.values():
                  for child_index, child_node in graph.nodes.items():
-                    if child_index in head_node['deps']:
+                    if child_index in head_node["deps"]:
                          label = "T"
                      else:
                          label = "F"
                      labeled_examples.append(
                          (
                              dict(
-                                a=head_node['word'],
-                                b=head_node['tag'],
-                                c=child_node['word'],
-                                d=child_node['tag'],
+                                a=head_node["word"],
+                                b=head_node["tag"],
+                                c=child_node["word"],
+                                d=child_node["tag"],
                              ),
                              label,
                          )
@@ -154,10 +152,10 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
                  edges.append(
                      (
                          dict(
-                            a=head_node['word'],
-                            b=head_node['tag'],
-                            c=child_node['word'],
-                            d=child_node['tag'],
+                            a=head_node["word"],
+                            b=head_node["tag"],
+                            c=child_node["word"],
+                            d=child_node["tag"],
                          )
                      )
                  )
@@ -167,9 +165,9 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
          row = []
          count = 0
          for pdist in self.classifier.prob_classify_many(edges):
-            logger.debug('%.4f %.4f', pdist.prob('T'), pdist.prob('F'))
+            logger.debug("%.4f %.4f", pdist.prob("T"), pdist.prob("F"))
              # smoothing in case the probability = 0
-            row.append([math.log(pdist.prob("T")+0.00000000001)])
+            row.append([math.log(pdist.prob("T") + 0.00000000001)])
              count += 1
              if count == len(graph.nodes):
                  edge_scores.append(row)
@@ -184,14 +182,17 @@ class NaiveBayesDependencyScorer(DependencyScorerI):
  # A short class necessary to show parsing example from paper
  class DemoScorer(DependencyScorerI):
      def train(self, graphs):
-        print('Training...')
+        print("Training...")
  
      def score(self, graph):
          # scores for Keith Hall 'K-best Spanning Tree Parsing' paper
-        return [[[], [5],  [1],  [1]],
-                [[], [],   [11], [4]],
-                [[], [10], [],   [5]],
-                [[], [8],  [8],  []]]
+        return [
+            [[], [5], [1], [1]],
+            [[], [], [11], [4]],
+            [[], [10], [], [5]],
+            [[], [8], [8], []],
+        ]
+
  
  #################################################################
  # Non-Projective Probabilistic Parsing
@@ -248,11 +249,12 @@ class ProbabilisticNonprojectiveParser(object):
      4
  
      """
+
      def __init__(self):
          """
          Creates a new non-projective parser.
          """
-        logging.debug('initializing prob. nonprojective...')
+        logging.debug("initializing prob. nonprojective...")
  
      def train(self, graphs, dependency_scorer):
          """
@@ -294,12 +296,12 @@ class ProbabilisticNonprojectiveParser(object):
          :type g_graph, b_graph, c_graph: DependencyGraph
          :param g_graph, b_graph, c_graph: Graphs which need to be updated.
          """
-        logger.debug('Collapsing nodes...')
+        logger.debug("Collapsing nodes...")
          # Collapse all cycle nodes into v_n+1 in G_Graph
          for cycle_node_index in cycle_path:
              g_graph.remove_by_address(cycle_node_index)
          g_graph.add_node(new_node)
-        g_graph.redirect_arcs(cycle_path, new_node['address'])
+        g_graph.redirect_arcs(cycle_path, new_node["address"])
  
      def update_edge_scores(self, new_node, cycle_path):
          """
@@ -311,24 +313,20 @@ class ProbabilisticNonprojectiveParser(object):
          :type cycle_path: A list of integers.
          :param cycle_path: A list of node addresses that belong to the cycle.
          """
-        logger.debug('cycle %s', cycle_path)
+        logger.debug("cycle %s", cycle_path)
  
          cycle_path = self.compute_original_indexes(cycle_path)
  
-        logger.debug('old cycle %s', cycle_path)
-        logger.debug('Prior to update: %s', self.scores)
+        logger.debug("old cycle %s", cycle_path)
+        logger.debug("Prior to update: %s", self.scores)
  
          for i, row in enumerate(self.scores):
              for j, column in enumerate(self.scores[i]):
                  logger.debug(self.scores[i][j])
-                if (
-                    j in cycle_path
-                    and i not in cycle_path
-                    and self.scores[i][j]
-                ):
+                if j in cycle_path and i not in cycle_path and self.scores[i][j]:
                      subtract_val = self.compute_max_subtract_score(j, cycle_path)
  
-                    logger.debug('%s - %s', self.scores[i][j], subtract_val)
+                    logger.debug("%s - %s", self.scores[i][j], subtract_val)
  
                      new_vals = []
                      for cur_val in self.scores[i][j]:
@@ -341,7 +339,7 @@ class ProbabilisticNonprojectiveParser(object):
                  if i in cycle_path and j in cycle_path:
                      self.scores[i][j] = []
  
-        logger.debug('After update: %s', self.scores)
+        logger.debug("After update: %s", self.scores)
  
      def compute_original_indexes(self, new_indexes):
          """
@@ -400,17 +398,18 @@ class ProbabilisticNonprojectiveParser(object):
          the node that is arced to.
          """
          originals = self.compute_original_indexes([node_index])
-        logger.debug('originals: %s', originals)
+        logger.debug("originals: %s", originals)
  
          max_arc = None
          max_score = None
          for row_index in range(len(self.scores)):
              for col_index in range(len(self.scores[row_index])):
-                # print self.scores[row_index][col_index]
-                if col_index in originals and (max_score is None or self.scores[row_index][col_index] > max_score):
+                if col_index in originals and (
+                    max_score is None or self.scores[row_index][col_index] > max_score
+                ):
                      max_score = self.scores[row_index][col_index]
                      max_arc = row_index
-                    logger.debug('%s, %s', row_index, col_index)
+                    logger.debug("%s, %s", row_index, col_index)
  
          logger.debug(max_score)
  
@@ -428,7 +427,9 @@ class ProbabilisticNonprojectiveParser(object):
          max_orig = None
          for row_index in range(len(self.scores)):
              for col_index in range(len(self.scores[row_index])):
-                if col_index in originals and (max_score is None or self.scores[row_index][col_index] > max_score):
+                if col_index in originals and (
+                    max_score is None or self.scores[row_index][col_index] > max_score
+                ):
                      max_score = self.scores[row_index][col_index]
                      max_arc = row_index
                      max_orig = col_index
@@ -455,27 +456,15 @@ class ProbabilisticNonprojectiveParser(object):
          g_graph = DependencyGraph()
          for index, token in enumerate(tokens):
              g_graph.nodes[index + 1].update(
-                {
-                    'word': token,
-                    'tag': tags[index],
-                    'rel': 'NTOP',
-                    'address': index + 1,
-                }
+                {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1}
              )
-        #print (g_graph.nodes)
-
  
          # Fully connect non-root nodes in g_graph
          g_graph.connect_graph()
          original_graph = DependencyGraph()
          for index, token in enumerate(tokens):
              original_graph.nodes[index + 1].update(
-                {
-                    'word': token,
-                    'tag': tags[index],
-                    'rel': 'NTOP',
-                    'address': index+1,
-                }
+                {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1}
              )
  
          b_graph = DependencyGraph()
@@ -483,43 +472,32 @@ class ProbabilisticNonprojectiveParser(object):
  
          for index, token in enumerate(tokens):
              c_graph.nodes[index + 1].update(
-                {
-                    'word': token,
-                    'tag': tags[index],
-                    'rel': 'NTOP',
-                    'address': index + 1,
-                }
+                {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1}
              )
  
          # Assign initial scores to g_graph edges
          self.initialize_edge_scores(g_graph)
          logger.debug(self.scores)
          # Initialize a list of unvisited vertices (by node address)
-        unvisited_vertices = [
-            vertex['address'] for vertex in c_graph.nodes.values()
-        ]
+        unvisited_vertices = [vertex["address"] for vertex in c_graph.nodes.values()]
          # Iterate over unvisited vertices
          nr_vertices = len(tokens)
          betas = {}
          while unvisited_vertices:
              # Mark current node as visited
              current_vertex = unvisited_vertices.pop(0)
-            logger.debug('current_vertex: %s', current_vertex)
+            logger.debug("current_vertex: %s", current_vertex)
              # Get corresponding node n_i to vertex v_i
              current_node = g_graph.get_by_address(current_vertex)
-            logger.debug('current_node: %s', current_node)
+            logger.debug("current_node: %s", current_node)
              # Get best in-edge node b for current node
              best_in_edge = self.best_incoming_arc(current_vertex)
              betas[current_vertex] = self.original_best_arc(current_vertex)
-            logger.debug('best in arc: %s --> %s', best_in_edge, current_vertex)
+            logger.debug("best in arc: %s --> %s", best_in_edge, current_vertex)
              # b_graph = Union(b_graph, b)
              for new_vertex in [current_vertex, best_in_edge]:
                  b_graph.nodes[new_vertex].update(
-                    {
-                        'word': 'TEMP',
-                        'rel': 'NTOP',
-                        'address': new_vertex,
-                    }
+                    {"word": "TEMP", "rel": "NTOP", "address": new_vertex}
                  )
              b_graph.add_arc(best_in_edge, current_vertex)
              # Beta(current node) = b  - stored for parse recovery
@@ -527,21 +505,17 @@ class ProbabilisticNonprojectiveParser(object):
              cycle_path = b_graph.contains_cycle()
              if cycle_path:
                  # Create a new node v_n+1 with address = len(nodes) + 1
-                new_node = {
-                    'word': 'NONE',
-                    'rel': 'NTOP',
-                    'address': nr_vertices + 1,
-                }
+                new_node = {"word": "NONE", "rel": "NTOP", "address": nr_vertices + 1}
                  # c_graph = Union(c_graph, v_n+1)
                  c_graph.add_node(new_node)
                  # Collapse all nodes in cycle C into v_n+1
                  self.update_edge_scores(new_node, cycle_path)
                  self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph)
                  for cycle_index in cycle_path:
-                    c_graph.add_arc(new_node['address'], cycle_index)
+                    c_graph.add_arc(new_node["address"], cycle_index)
                      # self.replaced_by[cycle_index] = new_node['address']
  
-                self.inner_nodes[new_node['address']] = cycle_path
+                self.inner_nodes[new_node["address"]] = cycle_path
  
                  # Add v_n+1 to list of unvisited vertices
                  unvisited_vertices.insert(0, nr_vertices + 1)
@@ -553,32 +527,33 @@ class ProbabilisticNonprojectiveParser(object):
                  for cycle_node_address in cycle_path:
                      b_graph.remove_by_address(cycle_node_address)
  
-            logger.debug('g_graph: %s', g_graph)
-            logger.debug('b_graph: %s', b_graph)
-            logger.debug('c_graph: %s', c_graph)
-            logger.debug('Betas: %s', betas)
-            logger.debug('replaced nodes %s', self.inner_nodes)
+            logger.debug("g_graph: %s", g_graph)
+            logger.debug("b_graph: %s", b_graph)
+            logger.debug("c_graph: %s", c_graph)
+            logger.debug("Betas: %s", betas)
+            logger.debug("replaced nodes %s", self.inner_nodes)
  
          # Recover parse tree
-        logger.debug('Final scores: %s', self.scores)
+        logger.debug("Final scores: %s", self.scores)
  
-        logger.debug('Recovering parse...')
+        logger.debug("Recovering parse...")
          for i in range(len(tokens) + 1, nr_vertices + 1):
              betas[betas[i][1]] = betas[i]
  
-        logger.debug('Betas: %s', betas)
+        logger.debug("Betas: %s", betas)
          for node in original_graph.nodes.values():
              # TODO: It's dangerous to assume that deps it a dictionary
              # because it's a default dictionary. Ideally, here we should not
              # be concerned how dependencies are stored inside of a dependency
              # graph.
-            node['deps'] = {}
+            node["deps"] = {}
          for i in range(1, len(tokens) + 1):
              original_graph.add_arc(betas[i][0], betas[i][1])
  
-        logger.debug('Done.')
+        logger.debug("Done.")
          yield original_graph
  
+
  #################################################################
  # Rule-based Non-Projective Parser
  #################################################################
@@ -623,21 +598,21 @@ class NonprojectiveDependencyParser(object):
  
          for index, token in enumerate(tokens):
              self._graph.nodes[index] = {
-                'word': token,
-                'deps': [],
-                'rel': 'NTOP',
-                'address': index,
+                "word": token,
+                "deps": [],
+                "rel": "NTOP",
+                "address": index,
              }
  
          for head_node in self._graph.nodes.values():
              deps = []
-            for dep_node in self._graph.nodes.values()  :
+            for dep_node in self._graph.nodes.values():
                  if (
-                    self._grammar.contains(head_node['word'], dep_node['word'])
-                    and head_node['word'] != dep_node['word']
+                    self._grammar.contains(head_node["word"], dep_node["word"])
+                    and head_node["word"] != dep_node["word"]
                  ):
-                    deps.append(dep_node['address'])
-            head_node['deps'] = deps
+                    deps.append(dep_node["address"])
+            head_node["deps"] = deps
  
          # Create lattice of possible heads
          roots = []
@@ -711,22 +686,19 @@ class NonprojectiveDependencyParser(object):
              graph = DependencyGraph()
              graph.root = graph.nodes[analysis.index(-1) + 1]
  
-            for address, (token, head_index) in enumerate(zip(tokens, analysis), start=1):
+            for address, (token, head_index) in enumerate(
+                zip(tokens, analysis), start=1
+            ):
                  head_address = head_index + 1
  
                  node = graph.nodes[address]
-                node.update(
-                    {
-                        'word': token,
-                        'address': address,
-                    }
-                )
+                node.update({"word": token, "address": address})
  
                  if head_address == 0:
-                    rel = 'ROOT'
+                    rel = "ROOT"
                  else:
-                    rel = ''
-                graph.nodes[head_index + 1]['deps'][rel].append(address)
+                    rel = ""
+                graph.nodes[head_index + 1]["deps"][rel].append(address)
  
              # TODO: check for cycles
              yield graph
@@ -736,6 +708,7 @@ class NonprojectiveDependencyParser(object):
  # Demos
  #################################################################
  
+
  def demo():
      # hall_demo()
      nonprojective_conll_parse_demo()
@@ -745,39 +718,56 @@ def demo():
  def hall_demo():
      npp = ProbabilisticNonprojectiveParser()
      npp.train([], DemoScorer())
-    for parse_graph in npp.parse(['v1', 'v2', 'v3'], [None, None, None]):
+    for parse_graph in npp.parse(["v1", "v2", "v3"], [None, None, None]):
          print(parse_graph)
  
  
  def nonprojective_conll_parse_demo():
      from nltk.parse.dependencygraph import conll_data2
  
-    graphs = [
-        DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry
-    ]
+    graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
      npp = ProbabilisticNonprojectiveParser()
      npp.train(graphs, NaiveBayesDependencyScorer())
-    for parse_graph in npp.parse(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc']):
+    for parse_graph in npp.parse(
+        ["Cathy", "zag", "hen", "zwaaien", "."], ["N", "V", "Pron", "Adj", "N", "Punc"]
+    ):
          print(parse_graph)
  
  
  def rule_based_demo():
      from nltk.grammar import DependencyGrammar
  
-    grammar = DependencyGrammar.fromstring("""
+    grammar = DependencyGrammar.fromstring(
+        """
      'taught' -> 'play' | 'man'
      'man' -> 'the' | 'in'
      'in' -> 'corner'
      'corner' -> 'the'
      'play' -> 'golf' | 'dachshund' | 'to'
      'dachshund' -> 'his'
-    """)
+    """
+    )
      print(grammar)
      ndp = NonprojectiveDependencyParser(grammar)
-    graphs = ndp.parse(['the', 'man', 'in', 'the', 'corner', 'taught', 'his', 'dachshund', 'to', 'play', 'golf'])
-    print('Graphs:')
+    graphs = ndp.parse(
+        [
+            "the",
+            "man",
+            "in",
+            "the",
+            "corner",
+            "taught",
+            "his",
+            "dachshund",
+            "to",
+            "play",
+            "golf",
+        ]
+    )
+    print("Graphs:")
      for graph in graphs:
          print(graph)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.pyc b/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.pyc

deleted file mode 100755 (executable)

index d862974..0000000

Binary files a/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/pchart.py b/nlp_resource_data/nltk/parse/pchart.py

old mode 100755 (executable)

new mode 100644 (file)

index 288d8c2..2b14eab
--- a/nlp_resource_data/nltk/parse/pchart.py
+++ b/nlp_resource_data/nltk/parse/pchart.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Probabilistic Chart Parsers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
@@ -29,7 +29,6 @@ The ``BottomUpProbabilisticChartParser`` constructor has an optional
  argument beam_size.  If non-zero, this controls the size of the beam
  (aka the edge queue).  This option is most useful with InsideChartParser.
  """
-from __future__ import print_function, unicode_literals
  
  ##//////////////////////////////////////////////////////
  ##  Bottom-Up PCFG Chart Parser
@@ -38,17 +37,19 @@ from __future__ import print_function, unicode_literals
  # [XX] This might not be implemented quite right -- it would be better
  # to associate probabilities with child pointer lists.
  
+import random
  from functools import reduce
  from nltk.tree import Tree, ProbabilisticTree
  from nltk.grammar import Nonterminal, PCFG
  
  from nltk.parse.api import ParserI
  from nltk.parse.chart import Chart, LeafEdge, TreeEdge, AbstractChartRule
-from nltk.compat import python_2_unicode_compatible
  
  # Probabilistic edges
  class ProbabilisticLeafEdge(LeafEdge):
-    def prob(self): return 1.0
+    def prob(self):
+        return 1.0
+
  
  class ProbabilisticTreeEdge(TreeEdge):
      def __init__(self, prob, *args, **kwargs):
@@ -57,60 +58,78 @@ class ProbabilisticTreeEdge(TreeEdge):
          # two edges with different probabilities are not equal.
          self._comparison_key = (self._comparison_key, prob)
  
-    def prob(self): return self._prob
+    def prob(self):
+        return self._prob
  
      @staticmethod
      def from_production(production, index, p):
-        return ProbabilisticTreeEdge(p, (index, index), production.lhs(),
-                                     production.rhs(), 0)
+        return ProbabilisticTreeEdge(
+            p, (index, index), production.lhs(), production.rhs(), 0
+        )
+
  
  # Rules using probabilistic edges
  class ProbabilisticBottomUpInitRule(AbstractChartRule):
-    NUM_EDGES=0
+    NUM_EDGES = 0
+
      def apply(self, chart, grammar):
          for index in range(chart.num_leaves()):
              new_edge = ProbabilisticLeafEdge(chart.leaf(index), index)
              if chart.insert(new_edge, ()):
                  yield new_edge
  
+
  class ProbabilisticBottomUpPredictRule(AbstractChartRule):
-    NUM_EDGES=1
+    NUM_EDGES = 1
+
      def apply(self, chart, grammar, edge):
-        if edge.is_incomplete(): return
+        if edge.is_incomplete():
+            return
          for prod in grammar.productions():
              if edge.lhs() == prod.rhs()[0]:
-                new_edge = ProbabilisticTreeEdge.from_production(prod, edge.start(), prod.prob())
+                new_edge = ProbabilisticTreeEdge.from_production(
+                    prod, edge.start(), prod.prob()
+                )
                  if chart.insert(new_edge, ()):
                      yield new_edge
  
+
  class ProbabilisticFundamentalRule(AbstractChartRule):
-    NUM_EDGES=2
+    NUM_EDGES = 2
+
      def apply(self, chart, grammar, left_edge, right_edge):
          # Make sure the rule is applicable.
-        if not (left_edge.end() == right_edge.start() and
-                left_edge.nextsym() == right_edge.lhs() and
-                left_edge.is_incomplete() and right_edge.is_complete()):
+        if not (
+            left_edge.end() == right_edge.start()
+            and left_edge.nextsym() == right_edge.lhs()
+            and left_edge.is_incomplete()
+            and right_edge.is_complete()
+        ):
              return
  
          # Construct the new edge.
          p = left_edge.prob() * right_edge.prob()
-        new_edge = ProbabilisticTreeEdge(p,
-                            span=(left_edge.start(), right_edge.end()),
-                            lhs=left_edge.lhs(), rhs=left_edge.rhs(),
-                            dot=left_edge.dot()+1)
+        new_edge = ProbabilisticTreeEdge(
+            p,
+            span=(left_edge.start(), right_edge.end()),
+            lhs=left_edge.lhs(),
+            rhs=left_edge.rhs(),
+            dot=left_edge.dot() + 1,
+        )
  
          # Add it to the chart, with appropriate child pointers.
          changed_chart = False
          for cpl1 in chart.child_pointer_lists(left_edge):
-            if chart.insert(new_edge, cpl1+(right_edge,)):
+            if chart.insert(new_edge, cpl1 + (right_edge,)):
                  changed_chart = True
  
          # If we changed the chart, then generate the edge.
-        if changed_chart: yield new_edge
+        if changed_chart:
+            yield new_edge
+
  
-@python_2_unicode_compatible
  class SingleEdgeProbabilisticFundamentalRule(AbstractChartRule):
-    NUM_EDGES=1
+    NUM_EDGES = 1
  
      _fundamental_rule = ProbabilisticFundamentalRule()
  
@@ -118,19 +137,22 @@ class SingleEdgeProbabilisticFundamentalRule(AbstractChartRule):
          fr = self._fundamental_rule
          if edge1.is_incomplete():
              # edge1 = left_edge; edge2 = right_edge
-            for edge2 in chart.select(start=edge1.end(), is_complete=True,
-                                     lhs=edge1.nextsym()):
+            for edge2 in chart.select(
+                start=edge1.end(), is_complete=True, lhs=edge1.nextsym()
+            ):
                  for new_edge in fr.apply(chart, grammar, edge1, edge2):
                      yield new_edge
          else:
              # edge2 = left_edge; edge1 = right_edge
-            for edge2 in chart.select(end=edge1.start(), is_complete=False,
-                                      nextsym=edge1.lhs()):
+            for edge2 in chart.select(
+                end=edge1.start(), is_complete=False, nextsym=edge1.lhs()
+            ):
                  for new_edge in fr.apply(chart, grammar, edge2, edge1):
                      yield new_edge
  
      def __str__(self):
-        return 'Fundamental Rule'
+        return "Fundamental Rule"
+
  
  class BottomUpProbabilisticChartParser(ParserI):
      """
@@ -158,6 +180,7 @@ class BottomUpProbabilisticChartParser(ParserI):
      :ivar _trace: The level of tracing output that should be generated
          when parsing a text.
      """
+
      def __init__(self, grammar, beam_size=0, trace=0):
          """
          Create a new ``BottomUpProbabilisticChartParser``, that uses
@@ -212,8 +235,10 @@ class BottomUpProbabilisticChartParser(ParserI):
          # Initialize the chart.
          for edge in bu_init.apply(chart, grammar):
              if self._trace > 1:
-                print('  %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
-                                        edge.prob()))
+                print(
+                    "  %-50s [%s]"
+                    % (chart.pretty_format_edge(edge, width=2), edge.prob())
+                )
              queue.append(edge)
  
          while len(queue) > 0:
@@ -227,8 +252,10 @@ class BottomUpProbabilisticChartParser(ParserI):
              # Get the best edge.
              edge = queue.pop()
              if self._trace > 0:
-                print('  %-50s [%s]' % (chart.pretty_format_edge(edge,width=2),
-                                        edge.prob()))
+                print(
+                    "  %-50s [%s]"
+                    % (chart.pretty_format_edge(edge, width=2), edge.prob())
+                )
  
              # Apply BU & FR to it.
              queue.extend(bu.apply(chart, grammar, edge))
@@ -250,7 +277,8 @@ class BottomUpProbabilisticChartParser(ParserI):
          return iter(parses)
  
      def _setprob(self, tree, prod_probs):
-        if tree.prob() is not None: return
+        if tree.prob() is not None:
+            return
  
          # Get the prob of the CFG production.
          lhs = Nonterminal(tree.label())
@@ -291,12 +319,13 @@ class BottomUpProbabilisticChartParser(ParserI):
      def _prune(self, queue, chart):
          """ Discard items in the queue if the queue is longer than the beam."""
          if len(queue) > self.beam_size:
-            split = len(queue)-self.beam_size
+            split = len(queue) - self.beam_size
              if self._trace > 2:
                  for edge in queue[:split]:
-                    print('  %-50s [DISCARDED]' % chart.pretty_format_edge(edge,2))
+                    print("  %-50s [DISCARDED]" % chart.pretty_format_edge(edge, 2))
              del queue[:split]
  
+
  class InsideChartParser(BottomUpProbabilisticChartParser):
      """
      A bottom-up parser for ``PCFG`` grammars that tries edges in descending
@@ -312,6 +341,7 @@ class InsideChartParser(BottomUpProbabilisticChartParser):
      This sorting order results in a type of lowest-cost-first search
      strategy.
      """
+
      # Inherit constructor.
      def sort_queue(self, queue, chart):
          """
@@ -330,6 +360,7 @@ class InsideChartParser(BottomUpProbabilisticChartParser):
          """
          queue.sort(key=lambda edge: edge.prob())
  
+
  # Eventually, this will become some sort of inside-outside parser:
  # class InsideOutsideParser(BottomUpProbabilisticChartParser):
  #     def __init__(self, grammar, trace=0):
@@ -349,7 +380,7 @@ class InsideChartParser(BottomUpProbabilisticChartParser):
  #                                      bestp.get(elt,0))
  #
  #         self._bestp = bestp
-#         for (k,v) in self._bestp.items(): print k,v
+#         for (k,v) in self._bestp.items(): print(k,v)
  #
  #     def _sortkey(self, edge):
  #         return edge.structure()[PROB] * self._bestp[edge.lhs()]
@@ -357,23 +388,28 @@ class InsideChartParser(BottomUpProbabilisticChartParser):
  #     def sort_queue(self, queue, chart):
  #         queue.sort(key=self._sortkey)
  
-import random
+
  class RandomChartParser(BottomUpProbabilisticChartParser):
      """
      A bottom-up parser for ``PCFG`` grammars that tries edges in random order.
      This sorting order results in a random search strategy.
      """
+
      # Inherit constructor
      def sort_queue(self, queue, chart):
-        i = random.randint(0, len(queue)-1)
+        i = random.randint(0, len(queue) - 1)
          (queue[-1], queue[i]) = (queue[i], queue[-1])
  
+
  class UnsortedChartParser(BottomUpProbabilisticChartParser):
      """
      A bottom-up parser for ``PCFG`` grammars that tries edges in whatever order.
      """
+
      # Inherit constructor
-    def sort_queue(self, queue, chart): return
+    def sort_queue(self, queue, chart):
+        return
+
  
  class LongestChartParser(BottomUpProbabilisticChartParser):
      """
@@ -381,14 +417,17 @@ class LongestChartParser(BottomUpProbabilisticChartParser):
      shorter ones.  This sorting order results in a type of best-first
      search strategy.
      """
+
      # Inherit constructor
      def sort_queue(self, queue, chart):
          queue.sort(key=lambda edge: edge.length())
  
+
  ##//////////////////////////////////////////////////////
  ##  Test Code
  ##//////////////////////////////////////////////////////
  
+
  def demo(choice=None, draw_parses=None, print_parses=None):
      """
      A demonstration of the probabilistic parsers.  The user is
@@ -401,7 +440,8 @@ def demo(choice=None, draw_parses=None, print_parses=None):
      from nltk.parse import pchart
  
      # Define two demos.  Each demo has a sentence and a grammar.
-    toy_pcfg1 = PCFG.fromstring("""
+    toy_pcfg1 = PCFG.fromstring(
+        """
      S -> NP VP [1.0]
      NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
      Det -> 'the' [0.8] | 'my' [0.2]
@@ -410,9 +450,11 @@ def demo(choice=None, draw_parses=None, print_parses=None):
      V -> 'ate' [0.35] | 'saw' [0.65]
      PP -> P NP [1.0]
      P -> 'with' [0.61] | 'under' [0.39]
-    """)
+    """
+    )
  
-    toy_pcfg2 = PCFG.fromstring("""
+    toy_pcfg2 = PCFG.fromstring(
+        """
      S    -> NP VP         [1.0]
      VP   -> V NP          [.59]
      VP   -> V             [.40]
@@ -436,25 +478,27 @@ def demo(choice=None, draw_parses=None, print_parses=None):
      Det  -> 'the'         [.41]
      Det  -> 'a'           [.31]
      Det  -> 'my'          [.28]
-    """)
+    """
+    )
  
-    demos = [('I saw John with my telescope', toy_pcfg1),
-             ('the boy saw Jack with Bob under the table with a telescope',
-              toy_pcfg2)]
+    demos = [
+        ("I saw John with my telescope", toy_pcfg1),
+        ("the boy saw Jack with Bob under the table with a telescope", toy_pcfg2),
+    ]
  
      if choice is None:
          # Ask the user which demo they want to use.
          print()
          for i in range(len(demos)):
-            print('%3s: %s' % (i+1, demos[i][0]))
-            print('     %r' % demos[i][1])
+            print("%3s: %s" % (i + 1, demos[i][0]))
+            print("     %r" % demos[i][1])
              print()
-        print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
-        choice = int(sys.stdin.readline().strip())-1
+        print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
+        choice = int(sys.stdin.readline().strip()) - 1
      try:
          sent, grammar = demos[choice]
      except:
-        print('Bad sentence number')
+        print("Bad sentence number")
          return
  
      # Tokenize the sentence.
@@ -466,8 +510,8 @@ def demo(choice=None, draw_parses=None, print_parses=None):
          pchart.RandomChartParser(grammar),
          pchart.UnsortedChartParser(grammar),
          pchart.LongestChartParser(grammar),
-        pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)   # was BeamParser
-        ]
+        pchart.InsideChartParser(grammar, beam_size=len(tokens) + 1),  # was BeamParser
+    ]
  
      # Run the parsers on the tokenized sentence.
      times = []
@@ -475,48 +519,60 @@ def demo(choice=None, draw_parses=None, print_parses=None):
      num_parses = []
      all_parses = {}
      for parser in parsers:
-        print('\ns: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
+        print("\ns: %s\nparser: %s\ngrammar: %s" % (sent, parser, grammar))
          parser.trace(3)
          t = time.time()
          parses = list(parser.parse(tokens))
-        times.append(time.time()-t)
-        p = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses) if parses else 0)
+        times.append(time.time() - t)
+        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) if parses else 0
          average_p.append(p)
          num_parses.append(len(parses))
-        for p in parses: all_parses[p.freeze()] = 1
+        for p in parses:
+            all_parses[p.freeze()] = 1
  
      # Print some summary statistics
      print()
-    print('       Parser      Beam | Time (secs)   # Parses   Average P(parse)')
-    print('------------------------+------------------------------------------')
+    print("       Parser      Beam | Time (secs)   # Parses   Average P(parse)")
+    print("------------------------+------------------------------------------")
      for i in range(len(parsers)):
-        print('%18s %4d |%11.4f%11d%19.14f' % (parsers[i].__class__.__name__,
-                                             parsers[i].beam_size,
-                                             times[i],num_parses[i],average_p[i]))
+        print(
+            "%18s %4d |%11.4f%11d%19.14f"
+            % (
+                parsers[i].__class__.__name__,
+                parsers[i].beam_size,
+                times[i],
+                num_parses[i],
+                average_p[i],
+            )
+        )
      parses = all_parses.keys()
-    if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
-    else: p = 0
-    print('------------------------+------------------------------------------')
-    print('%18s      |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses), p))
+    if parses:
+        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
+    else:
+        p = 0
+    print("------------------------+------------------------------------------")
+    print("%18s      |%11s%11d%19.14f" % ("(All Parses)", "n/a", len(parses), p))
  
      if draw_parses is None:
          # Ask the user if we should draw the parses.
          print()
-        print('Draw parses (y/n)? ', end=' ')
-        draw_parses = sys.stdin.readline().strip().lower().startswith('y')
+        print("Draw parses (y/n)? ", end=" ")
+        draw_parses = sys.stdin.readline().strip().lower().startswith("y")
      if draw_parses:
          from nltk.draw.tree import draw_trees
-        print('  please wait...')
+
+        print("  please wait...")
          draw_trees(*parses)
  
      if print_parses is None:
          # Ask the user if we should print the parses.
          print()
-        print('Print parses (y/n)? ', end=' ')
-        print_parses = sys.stdin.readline().strip().lower().startswith('y')
+        print("Print parses (y/n)? ", end=" ")
+        print_parses = sys.stdin.readline().strip().lower().startswith("y")
      if print_parses:
          for parse in parses:
              print(parse)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/pchart.pyc b/nlp_resource_data/nltk/parse/pchart.pyc

deleted file mode 100755 (executable)

index 27d60e6..0000000

Binary files a/nlp_resource_data/nltk/parse/pchart.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/projectivedependencyparser.py b/nlp_resource_data/nltk/parse/projectivedependencyparser.py

old mode 100755 (executable)

new mode 100644 (file)

index 273851d..b4d56cf
--- a/nlp_resource_data/nltk/parse/projectivedependencyparser.py
+++ b/nlp_resource_data/nltk/parse/projectivedependencyparser.py
@@ -1,29 +1,31 @@
  # Natural Language Toolkit: Dependency Grammars
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Jason Narad <jason.narad@gmail.com>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  #
-from __future__ import print_function, unicode_literals
  
  from collections import defaultdict
  from itertools import chain
  from functools import total_ordering
  
-from nltk.grammar import (DependencyProduction, DependencyGrammar,
-                          ProbabilisticDependencyGrammar)
+from nltk.grammar import (
+    DependencyProduction,
+    DependencyGrammar,
+    ProbabilisticDependencyGrammar,
+)
  from nltk.parse.dependencygraph import DependencyGraph
  from nltk.internals import raise_unorderable_types
-from nltk.compat import python_2_unicode_compatible
+
  
  #################################################################
  # Dependency Span
  #################################################################
  
+
  @total_ordering
-@python_2_unicode_compatible
  class DependencySpan(object):
      """
      A contiguous span over some part of the input string representing
@@ -35,6 +37,7 @@ class DependencySpan(object):
      to the head word for the entire span.  This is the same as the root node if
      the dependency structure were depicted as a graph.
      """
+
      def __init__(self, start_index, end_index, head_index, arcs, tags):
          self._start_index = start_index
          self._end_index = end_index
@@ -56,21 +59,30 @@ class DependencySpan(object):
          :return: A concise string representatino of the ``DependencySpan``.
          :rtype: str.
          """
-        return 'Span %d-%d; Head Index: %d' % (self._start_index, self._end_index, self._head_index)
+        return "Span %d-%d; Head Index: %d" % (
+            self._start_index,
+            self._end_index,
+            self._head_index,
+        )
  
      def __str__(self):
          """
          :return: A verbose string representation of the ``DependencySpan``.
          :rtype: str
          """
-        str = 'Span %d-%d; Head Index: %d' % (self._start_index, self._end_index, self._head_index)
+        str = "Span %d-%d; Head Index: %d" % (
+            self._start_index,
+            self._end_index,
+            self._head_index,
+        )
          for i in range(len(self._arcs)):
-            str += '\n%d <- %d, %s' % (i, self._arcs[i], self._tags[i])
+            str += "\n%d <- %d, %s" % (i, self._arcs[i], self._tags[i])
          return str
  
      def __eq__(self, other):
-        return (type(self) == type(other) and
-                self._comparison_key == other._comparison_key)
+        return (
+            type(self) == type(other) and self._comparison_key == other._comparison_key
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -86,17 +98,19 @@ class DependencySpan(object):
          """
          return self._hash
  
+
  #################################################################
  # Chart Cell
  #################################################################
  
-@python_2_unicode_compatible
+
  class ChartCell(object):
      """
      A cell from the parse chart formed when performing the CYK algorithm.
      Each cell keeps track of its x and y coordinates (though this will probably
      be discarded), and a list of spans serving as the cell's entries.
      """
+
      def __init__(self, x, y):
          """
          :param x: This cell's x coordinate.
@@ -123,14 +137,14 @@ class ChartCell(object):
          :return: A verbose string representation of this ``ChartCell``.
          :rtype: str.
          """
-        return 'CC[%d,%d]: %s' % (self._x, self._y, self._entries)
+        return "CC[%d,%d]: %s" % (self._x, self._y, self._entries)
  
      def __repr__(self):
          """
          :return: A concise string representation of this ``ChartCell``.
          :rtype: str.
          """
-        return '%s' % self
+        return "%s" % self
  
  
  #################################################################
@@ -176,13 +190,13 @@ class ProjectiveDependencyParser(object):
          for i in range(0, len(self._tokens) + 1):
              chart.append([])
              for j in range(0, len(self._tokens) + 1):
-                chart[i].append(ChartCell(i,j))
-                if i==j+1:
-                    chart[i][j].add(DependencySpan(i-1,i,i-1,[-1], ['null']))
+                chart[i].append(ChartCell(i, j))
+                if i == j + 1:
+                    chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ["null"]))
  
-        for i in range(1,len(self._tokens)+1):
-            for j in range(i-2,-1,-1):
-                for k in range(i-1,j,-1):
+        for i in range(1, len(self._tokens) + 1):
+            for j in range(i - 2, -1, -1):
+                for k in range(i - 1, j, -1):
                      for span1 in chart[k][j]._entries:
                          for span2 in chart[i][k]._entries:
                              for newspan in self.concatenate(span1, span2):
@@ -190,17 +204,27 @@ class ProjectiveDependencyParser(object):
  
          for parse in chart[len(self._tokens)][0]._entries:
              conll_format = ""
-#            malt_format = ""
+            #            malt_format = ""
              for i in range(len(tokens)):
-#                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
-                #conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
-                # Modify to comply with the new Dependency Graph requirement (at least must have an root elements) 
-                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'ROOT', '-', '-')
+                #                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
+                # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                # Modify to comply with the new Dependency Graph requirement (at least must have an root elements)
+                conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
+                    i + 1,
+                    tokens[i],
+                    tokens[i],
+                    "null",
+                    "null",
+                    "null",
+                    parse._arcs[i] + 1,
+                    "ROOT",
+                    "-",
+                    "-",
+                )
              dg = DependencyGraph(conll_format)
-#           if self.meets_arity(dg):
+            #           if self.meets_arity(dg):
              yield dg.tree()
  
-
      def concatenate(self, span1, span2):
          """
          Concatenates the two spans in whichever way possible.  This
@@ -215,7 +239,7 @@ class ProjectiveDependencyParser(object):
          """
          spans = []
          if span1._start_index == span2._start_index:
-            print('Error: Mismatched spans - replace this with thrown error')
+            print("Error: Mismatched spans - replace this with thrown error")
          if span1._start_index > span2._start_index:
              temp_span = span1
              span1 = span2
@@ -223,16 +247,36 @@ class ProjectiveDependencyParser(object):
          # adjacent rightward covered concatenation
          new_arcs = span1._arcs + span2._arcs
          new_tags = span1._tags + span2._tags
-        if self._grammar.contains(self._tokens[span1._head_index], self._tokens[span2._head_index]):
-#           print 'Performing rightward cover %d to %d' % (span1._head_index, span2._head_index)
+        if self._grammar.contains(
+            self._tokens[span1._head_index], self._tokens[span2._head_index]
+        ):
+            #           print('Performing rightward cover %d to %d' % (span1._head_index, span2._head_index))
              new_arcs[span2._head_index - span1._start_index] = span1._head_index
-            spans.append(DependencySpan(span1._start_index, span2._end_index, span1._head_index, new_arcs, new_tags))
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span1._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
          # adjacent leftward covered concatenation
          new_arcs = span1._arcs + span2._arcs
-        if self._grammar.contains(self._tokens[span2._head_index], self._tokens[span1._head_index]):
-#           print 'performing leftward cover %d to %d' % (span2._head_index, span1._head_index)
+        if self._grammar.contains(
+            self._tokens[span2._head_index], self._tokens[span1._head_index]
+        ):
+            #           print('performing leftward cover %d to %d' % (span2._head_index, span1._head_index))
              new_arcs[span1._head_index - span1._start_index] = span2._head_index
-            spans.append(DependencySpan(span1._start_index, span2._end_index, span2._head_index, new_arcs, new_tags))
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span2._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
          return spans
  
  
@@ -287,21 +331,26 @@ class ProbabilisticProjectiveDependencyParser(object):
          for i in range(0, len(self._tokens) + 1):
              chart.append([])
              for j in range(0, len(self._tokens) + 1):
-                chart[i].append(ChartCell(i,j))
-                if i==j+1:
-                    if tokens[i-1] in self._grammar._tags:
-                        for tag in self._grammar._tags[tokens[i-1]]:
-                            chart[i][j].add(DependencySpan(i-1,i,i-1,[-1], [tag]))
+                chart[i].append(ChartCell(i, j))
+                if i == j + 1:
+                    if tokens[i - 1] in self._grammar._tags:
+                        for tag in self._grammar._tags[tokens[i - 1]]:
+                            chart[i][j].add(
+                                DependencySpan(i - 1, i, i - 1, [-1], [tag])
+                            )
                      else:
-                        print('No tag found for input token \'%s\', parse is impossible.' % tokens[i-1])
+                        print(
+                            "No tag found for input token '%s', parse is impossible."
+                            % tokens[i - 1]
+                        )
                          return []
-        for i in range(1,len(self._tokens)+1):
-            for j in range(i-2,-1,-1):
-                for k in range(i-1,j,-1):
+        for i in range(1, len(self._tokens) + 1):
+            for j in range(i - 2, -1, -1):
+                for k in range(i - 1, j, -1):
                      for span1 in chart[k][j]._entries:
-                            for span2 in chart[i][k]._entries:
-                                for newspan in self.concatenate(span1, span2):
-                                    chart[i][j].add(newspan)
+                        for span2 in chart[i][k]._entries:
+                            for newspan in self.concatenate(span1, span2):
+                                chart[i][j].add(newspan)
          trees = []
          max_parse = None
          max_score = 0
@@ -309,17 +358,32 @@ class ProbabilisticProjectiveDependencyParser(object):
              conll_format = ""
              malt_format = ""
              for i in range(len(tokens)):
-                malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null')
-                #conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
-                # Modify to comply with recent change in dependency graph such that there must be a ROOT element. 
-                conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'ROOT', '-', '-')
+                malt_format += "%s\t%s\t%d\t%s\n" % (
+                    tokens[i],
+                    "null",
+                    parse._arcs[i] + 1,
+                    "null",
+                )
+                # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-')
+                # Modify to comply with recent change in dependency graph such that there must be a ROOT element.
+                conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % (
+                    i + 1,
+                    tokens[i],
+                    tokens[i],
+                    parse._tags[i],
+                    parse._tags[i],
+                    "null",
+                    parse._arcs[i] + 1,
+                    "ROOT",
+                    "-",
+                    "-",
+                )
              dg = DependencyGraph(conll_format)
-            score = self.compute_prob(dg)            
+            score = self.compute_prob(dg)
              trees.append((score, dg.tree()))
          trees.sort()
          return (tree for (score, tree) in trees)
  
-
      def concatenate(self, span1, span2):
          """
          Concatenates the two spans in whichever way possible.  This
@@ -334,7 +398,7 @@ class ProbabilisticProjectiveDependencyParser(object):
          """
          spans = []
          if span1._start_index == span2._start_index:
-            print('Error: Mismatched spans - replace this with thrown error')
+            print("Error: Mismatched spans - replace this with thrown error")
          if span1._start_index > span2._start_index:
              temp_span = span1
              span1 = span2
@@ -342,15 +406,35 @@ class ProbabilisticProjectiveDependencyParser(object):
          # adjacent rightward covered concatenation
          new_arcs = span1._arcs + span2._arcs
          new_tags = span1._tags + span2._tags
-        if self._grammar.contains(self._tokens[span1._head_index], self._tokens[span2._head_index]):
+        if self._grammar.contains(
+            self._tokens[span1._head_index], self._tokens[span2._head_index]
+        ):
              new_arcs[span2._head_index - span1._start_index] = span1._head_index
-            spans.append(DependencySpan(span1._start_index, span2._end_index, span1._head_index, new_arcs, new_tags))
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span1._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
          # adjacent leftward covered concatenation
          new_arcs = span1._arcs + span2._arcs
          new_tags = span1._tags + span2._tags
-        if self._grammar.contains(self._tokens[span2._head_index], self._tokens[span1._head_index]):
+        if self._grammar.contains(
+            self._tokens[span2._head_index], self._tokens[span1._head_index]
+        ):
              new_arcs[span1._head_index - span1._start_index] = span2._head_index
-            spans.append(DependencySpan(span1._start_index, span2._end_index, span2._head_index, new_arcs, new_tags))
+            spans.append(
+                DependencySpan(
+                    span1._start_index,
+                    span2._end_index,
+                    span2._head_index,
+                    new_arcs,
+                    new_tags,
+                )
+            )
          return spans
  
      def train(self, graphs):
@@ -368,49 +452,71 @@ class ProbabilisticProjectiveDependencyParser(object):
          tags = {}
          for dg in graphs:
              for node_index in range(1, len(dg.nodes)):
-                #children = dg.nodes[node_index]['deps']
-                children = list(chain(*dg.nodes[node_index]['deps'].values()))
-                
+                # children = dg.nodes[node_index]['deps']
+                children = list(chain(*dg.nodes[node_index]["deps"].values()))
+
                  nr_left_children = dg.left_children(node_index)
                  nr_right_children = dg.right_children(node_index)
                  nr_children = nr_left_children + nr_right_children
-                for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2):
-                    head_word = dg.nodes[node_index]['word']
-                    head_tag = dg.nodes[node_index]['tag']
+                for child_index in range(
+                    0 - (nr_left_children + 1), nr_right_children + 2
+                ):
+                    head_word = dg.nodes[node_index]["word"]
+                    head_tag = dg.nodes[node_index]["tag"]
                      if head_word in tags:
                          tags[head_word].add(head_tag)
                      else:
                          tags[head_word] = set([head_tag])
-                    child = 'STOP'
-                    child_tag = 'STOP'
-                    prev_word = 'START'
-                    prev_tag = 'START'
+                    child = "STOP"
+                    child_tag = "STOP"
+                    prev_word = "START"
+                    prev_tag = "START"
                      if child_index < 0:
                          array_index = child_index + nr_left_children
                          if array_index >= 0:
-                            child = dg.nodes[children[array_index]]['word']
-                            child_tag = dg.nodes[children[array_index]]['tag']
+                            child = dg.nodes[children[array_index]]["word"]
+                            child_tag = dg.nodes[children[array_index]]["tag"]
                          if child_index != -1:
-                            prev_word = dg.nodes[children[array_index + 1]]['word']
-                            prev_tag = dg.nodes[children[array_index + 1]]['tag']
-                        if child != 'STOP':
+                            prev_word = dg.nodes[children[array_index + 1]]["word"]
+                            prev_tag = dg.nodes[children[array_index + 1]]["tag"]
+                        if child != "STOP":
                              productions.append(DependencyProduction(head_word, [child]))
-                        head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % (child, child_tag, prev_tag, head_word, head_tag)
-                        mod_event = '(mods (%s, %s, %s) left))' % (prev_tag, head_word, head_tag)
+                        head_event = "(head (%s %s) (mods (%s, %s, %s) left))" % (
+                            child,
+                            child_tag,
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
+                        mod_event = "(mods (%s, %s, %s) left))" % (
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
                          events[head_event] += 1
                          events[mod_event] += 1
                      elif child_index > 0:
                          array_index = child_index + nr_left_children - 1
                          if array_index < nr_children:
-                            child = dg.nodes[children[array_index]]['word']
-                            child_tag = dg.nodes[children[array_index]]['tag']
+                            child = dg.nodes[children[array_index]]["word"]
+                            child_tag = dg.nodes[children[array_index]]["tag"]
                          if child_index != 1:
-                            prev_word = dg.nodes[children[array_index - 1]]['word']
-                            prev_tag =  dg.nodes[children[array_index - 1]]['tag']
-                        if child != 'STOP':
+                            prev_word = dg.nodes[children[array_index - 1]]["word"]
+                            prev_tag = dg.nodes[children[array_index - 1]]["tag"]
+                        if child != "STOP":
                              productions.append(DependencyProduction(head_word, [child]))
-                        head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % (child, child_tag, prev_tag, head_word, head_tag)
-                        mod_event = '(mods (%s, %s, %s) right))' % (prev_tag, head_word, head_tag)
+                        head_event = "(head (%s %s) (mods (%s, %s, %s) right))" % (
+                            child,
+                            child_tag,
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
+                        mod_event = "(mods (%s, %s, %s) right))" % (
+                            prev_tag,
+                            head_word,
+                            head_tag,
+                        )
                          events[head_event] += 1
                          events[mod_event] += 1
          self._grammar = ProbabilisticDependencyGrammar(productions, events, tags)
@@ -428,55 +534,75 @@ class ProbabilisticProjectiveDependencyParser(object):
          """
          prob = 1.0
          for node_index in range(1, len(dg.nodes)):
-            #children = dg.nodes[node_index]['deps']
-            children = list(chain(*dg.nodes[node_index]['deps'].values()))
-            
+            # children = dg.nodes[node_index]['deps']
+            children = list(chain(*dg.nodes[node_index]["deps"].values()))
+
              nr_left_children = dg.left_children(node_index)
              nr_right_children = dg.right_children(node_index)
              nr_children = nr_left_children + nr_right_children
              for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2):
-                head_word = dg.nodes[node_index]['word']
-                head_tag = dg.nodes[node_index]['tag']
-                child = 'STOP'
-                child_tag = 'STOP'
-                prev_word = 'START'
-                prev_tag = 'START'
+                head_word = dg.nodes[node_index]["word"]
+                head_tag = dg.nodes[node_index]["tag"]
+                child = "STOP"
+                child_tag = "STOP"
+                prev_word = "START"
+                prev_tag = "START"
                  if child_index < 0:
                      array_index = child_index + nr_left_children
                      if array_index >= 0:
-                        child = dg.nodes[children[array_index]]['word']
-                        child_tag = dg.nodes[children[array_index]]['tag']
+                        child = dg.nodes[children[array_index]]["word"]
+                        child_tag = dg.nodes[children[array_index]]["tag"]
                      if child_index != -1:
-                        prev_word = dg.nodes[children[array_index + 1]]['word']
-                        prev_tag = dg.nodes[children[array_index + 1]]['tag']
-                    head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % (child, child_tag, prev_tag, head_word, head_tag)
-                    mod_event = '(mods (%s, %s, %s) left))' % (prev_tag, head_word, head_tag)
+                        prev_word = dg.nodes[children[array_index + 1]]["word"]
+                        prev_tag = dg.nodes[children[array_index + 1]]["tag"]
+                    head_event = "(head (%s %s) (mods (%s, %s, %s) left))" % (
+                        child,
+                        child_tag,
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
+                    mod_event = "(mods (%s, %s, %s) left))" % (
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
                      h_count = self._grammar._events[head_event]
                      m_count = self._grammar._events[mod_event]
-                    
-                    # If the grammar is not covered 
+
+                    # If the grammar is not covered
                      if m_count != 0:
-                        prob *= (h_count / m_count)
+                        prob *= h_count / m_count
                      else:
-                        prob = 0.00000001  # Very small number  
-                    
+                        prob = 0.00000001  # Very small number
+
                  elif child_index > 0:
                      array_index = child_index + nr_left_children - 1
                      if array_index < nr_children:
-                        child = dg.nodes[children[array_index]]['word']
-                        child_tag = dg.nodes[children[array_index]]['tag']
+                        child = dg.nodes[children[array_index]]["word"]
+                        child_tag = dg.nodes[children[array_index]]["tag"]
                      if child_index != 1:
-                        prev_word = dg.nodes[children[array_index - 1]]['word']
-                        prev_tag = dg.nodes[children[array_index - 1]]['tag']
-                    head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % (child, child_tag, prev_tag, head_word, head_tag)
-                    mod_event = '(mods (%s, %s, %s) right))' % (prev_tag, head_word, head_tag)
+                        prev_word = dg.nodes[children[array_index - 1]]["word"]
+                        prev_tag = dg.nodes[children[array_index - 1]]["tag"]
+                    head_event = "(head (%s %s) (mods (%s, %s, %s) right))" % (
+                        child,
+                        child_tag,
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
+                    mod_event = "(mods (%s, %s, %s) right))" % (
+                        prev_tag,
+                        head_word,
+                        head_tag,
+                    )
                      h_count = self._grammar._events[head_event]
                      m_count = self._grammar._events[mod_event]
  
                      if m_count != 0:
-                        prob *= (h_count / m_count)
+                        prob *= h_count / m_count
                      else:
-                        prob = 0.00000001  # Very small number  
+                        prob = 0.00000001  # Very small number
  
          return prob
  
@@ -485,9 +611,10 @@ class ProbabilisticProjectiveDependencyParser(object):
  # Demos
  #################################################################
  
+
  def demo():
      projective_rule_parse_demo()
-#    arity_parse_demo()
+    #    arity_parse_demo()
      projective_prob_parse_demo()
  
  
@@ -497,17 +624,20 @@ def projective_rule_parse_demo():
      ``DependencyGrammar`` to perform a projective dependency
      parse.
      """
-    grammar = DependencyGrammar.fromstring("""
+    grammar = DependencyGrammar.fromstring(
+        """
      'scratch' -> 'cats' | 'walls'
      'walls' -> 'the'
      'cats' -> 'the'
-    """)
+    """
+    )
      print(grammar)
      pdp = ProjectiveDependencyParser(grammar)
-    trees = pdp.parse(['the', 'cats', 'scratch', 'the', 'walls'])
+    trees = pdp.parse(["the", "cats", "scratch", "the", "walls"])
      for tree in trees:
          print(tree)
  
+
  def arity_parse_demo():
      """
      A demonstration showing the creation of a ``DependencyGrammar``
@@ -516,42 +646,48 @@ def arity_parse_demo():
      created by a ``ProjectiveDependencyParser``.
      """
      print()
-    print('A grammar with no arity constraints. Each DependencyProduction')
-    print('specifies a relationship between one head word and only one')
-    print('modifier word.')
-    grammar = DependencyGrammar.fromstring("""
+    print("A grammar with no arity constraints. Each DependencyProduction")
+    print("specifies a relationship between one head word and only one")
+    print("modifier word.")
+    grammar = DependencyGrammar.fromstring(
+        """
      'fell' -> 'price' | 'stock'
      'price' -> 'of' | 'the'
      'of' -> 'stock'
      'stock' -> 'the'
-    """)
+    """
+    )
      print(grammar)
  
      print()
-    print('For the sentence \'The price of the stock fell\', this grammar')
-    print('will produce the following three parses:')
+    print("For the sentence 'The price of the stock fell', this grammar")
+    print("will produce the following three parses:")
      pdp = ProjectiveDependencyParser(grammar)
-    trees = pdp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])
+    trees = pdp.parse(["the", "price", "of", "the", "stock", "fell"])
      for tree in trees:
          print(tree)
  
      print()
-    print('By contrast, the following grammar contains a ')
-    print('DependencyProduction that specifies a relationship')
-    print('between a single head word, \'price\', and two modifier')
-    print('words, \'of\' and \'the\'.')
-    grammar = DependencyGrammar.fromstring("""
+    print("By contrast, the following grammar contains a ")
+    print("DependencyProduction that specifies a relationship")
+    print("between a single head word, 'price', and two modifier")
+    print("words, 'of' and 'the'.")
+    grammar = DependencyGrammar.fromstring(
+        """
      'fell' -> 'price' | 'stock'
      'price' -> 'of' 'the'
      'of' -> 'stock'
      'stock' -> 'the'
-    """)
+    """
+    )
      print(grammar)
  
      print()
-    print('This constrains the number of possible parses to just one:') # unimplemented, soon to replace
+    print(
+        "This constrains the number of possible parses to just one:"
+    )  # unimplemented, soon to replace
      pdp = ProjectiveDependencyParser(grammar)
-    trees = pdp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])
+    trees = pdp.parse(["the", "price", "of", "the", "stock", "fell"])
      for tree in trees:
          print(tree)
  
@@ -563,17 +699,17 @@ def projective_prob_parse_demo():
      """
      from nltk.parse.dependencygraph import conll_data2
  
-    graphs = [DependencyGraph(entry)
-              for entry in conll_data2.split('\n\n') if entry]
+    graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry]
      ppdp = ProbabilisticProjectiveDependencyParser()
-    print('Training Probabilistic Projective Dependency Parser...')
+    print("Training Probabilistic Projective Dependency Parser...")
      ppdp.train(graphs)
-    
-    sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.']
-    print('Parsing \'', " ".join(sent), '\'...')
-    print('Parse:')
+
+    sent = ["Cathy", "zag", "hen", "wild", "zwaaien", "."]
+    print("Parsing '", " ".join(sent), "'...")
+    print("Parse:")
      for tree in ppdp.parse(sent):
          print(tree)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/projectivedependencyparser.pyc b/nlp_resource_data/nltk/parse/projectivedependencyparser.pyc

deleted file mode 100755 (executable)

index 5a83c10..0000000

Binary files a/nlp_resource_data/nltk/parse/projectivedependencyparser.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/recursivedescent.py b/nlp_resource_data/nltk/parse/recursivedescent.py

old mode 100755 (executable)

new mode 100644 (file)

index a84a12f..8496d4c
--- a/nlp_resource_data/nltk/parse/recursivedescent.py
+++ b/nlp_resource_data/nltk/parse/recursivedescent.py
@@ -1,15 +1,13 @@
  # Natural Language Toolkit: Recursive Descent Parser
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
  
  from nltk.grammar import Nonterminal
  from nltk.tree import Tree, ImmutableTree
-from nltk.compat import unicode_repr
  
  from nltk.parse.api import ParserI
  
@@ -51,6 +49,7 @@ class RecursiveDescentParser(ParserI):
  
      :see: ``nltk.grammar``
      """
+
      def __init__(self, grammar, trace=0):
          """
          Create a new ``RecursiveDescentParser``, that uses ``grammar``
@@ -161,7 +160,7 @@ class RecursiveDescentParser(ParserI):
          """
  
          tree_leaf = tree[frontier[0]]
-        if (len(rtext) > 0 and tree_leaf == rtext[0]):
+        if len(rtext) > 0 and tree_leaf == rtext[0]:
              # If it's a terminal that matches rtext[0], then substitute
              # in the token, and continue parsing.
              newtree = tree.copy(deep=True)
@@ -207,8 +206,10 @@ class RecursiveDescentParser(ParserI):
              leaves that have not yet been matched.
          """
  
-        if production is None: productions = self._grammar.productions()
-        else: productions = [production]
+        if production is None:
+            productions = self._grammar.productions()
+        else:
+            productions = [production]
  
          for production in productions:
              lhs = production.lhs().symbol()
@@ -219,12 +220,14 @@ class RecursiveDescentParser(ParserI):
                  else:
                      newtree = tree.copy(deep=True)
                      newtree[frontier[0]] = subtree
-                new_frontier = [frontier[0]+(i,) for i in
-                                range(len(production.rhs()))]
+                new_frontier = [
+                    frontier[0] + (i,) for i in range(len(production.rhs()))
+                ]
                  if self._trace:
                      self._trace_expand(newtree, new_frontier, production)
-                for result in self._parse(remaining_text, newtree,
-                                          new_frontier + frontier[1:]):
+                for result in self._parse(
+                    remaining_text, newtree, new_frontier + frontier[1:]
+                ):
                      yield result
  
      def _production_to_tree(self, production):
@@ -274,17 +277,18 @@ class RecursiveDescentParser(ParserI):
          :rtype: None
          """
  
-        if treeloc == (): print("*", end=' ')
+        if treeloc == ():
+            print("*", end=" ")
          if isinstance(tree, Tree):
              if len(tree) == 0:
-                print(unicode_repr(Nonterminal(tree.label())), end=' ')
+                print(repr(Nonterminal(tree.label())), end=" ")
              for i in range(len(tree)):
                  if treeloc is not None and i == treeloc[0]:
                      self._trace_fringe(tree[i], treeloc[1:])
                  else:
                      self._trace_fringe(tree[i])
          else:
-            print(unicode_repr(tree), end=' ')
+            print(repr(tree), end=" ")
  
      def _trace_tree(self, tree, frontier, operation):
          """
@@ -294,34 +298,50 @@ class RecursiveDescentParser(ParserI):
              generated the current state.
          :rtype: None
          """
-        if self._trace == 2: print('  %c [' % operation, end=' ')
-        else: print('    [', end=' ')
-        if len(frontier) > 0: self._trace_fringe(tree, frontier[0])
-        else: self._trace_fringe(tree)
-        print(']')
+        if self._trace == 2:
+            print("  %c [" % operation, end=" ")
+        else:
+            print("    [", end=" ")
+        if len(frontier) > 0:
+            self._trace_fringe(tree, frontier[0])
+        else:
+            self._trace_fringe(tree)
+        print("]")
  
      def _trace_start(self, tree, frontier, text):
-        print('Parsing %r' % " ".join(text))
-        if self._trace > 2: print('Start:')
-        if self._trace > 1: self._trace_tree(tree, frontier, ' ')
+        print("Parsing %r" % " ".join(text))
+        if self._trace > 2:
+            print("Start:")
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, " ")
  
      def _trace_expand(self, tree, frontier, production):
-        if self._trace > 2: print('Expand: %s' % production)
-        if self._trace > 1: self._trace_tree(tree, frontier, 'E')
+        if self._trace > 2:
+            print("Expand: %s" % production)
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, "E")
  
      def _trace_match(self, tree, frontier, tok):
-        if self._trace > 2: print('Match: %r' % tok)
-        if self._trace > 1: self._trace_tree(tree, frontier, 'M')
+        if self._trace > 2:
+            print("Match: %r" % tok)
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, "M")
  
      def _trace_succeed(self, tree, frontier):
-        if self._trace > 2: print('GOOD PARSE:')
-        if self._trace == 1: print('Found a parse:\n%s' % tree)
-        if self._trace > 1: self._trace_tree(tree, frontier, '+')
+        if self._trace > 2:
+            print("GOOD PARSE:")
+        if self._trace == 1:
+            print("Found a parse:\n%s" % tree)
+        if self._trace > 1:
+            self._trace_tree(tree, frontier, "+")
  
      def _trace_backtrack(self, tree, frontier, toks=None):
          if self._trace > 2:
-            if toks: print('Backtrack: %r match failed' % toks[0])
-            else: print('Backtrack')
+            if toks:
+                print("Backtrack: %r match failed" % toks[0])
+            else:
+                print("Backtrack")
+
  
  ##//////////////////////////////////////////////////////
  ##  Stepping Recursive Descent Parser
@@ -350,6 +370,7 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          or not to match a token.
      :see: ``nltk.grammar``
      """
+
      def __init__(self, grammar, trace=0):
          super(SteppingRecursiveDescentParser, self).__init__(grammar, trace)
          self._rtext = None
@@ -364,8 +385,8 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
      # something nicer when we get the chance.
      def _freeze(self, tree):
          c = tree.copy()
-#        for pos in c.treepositions('leaves'):
-#            c[pos] = c[pos].freeze()
+        #        for pos in c.treepositions('leaves'):
+        #            c[pos] = c[pos].freeze()
          return ImmutableTree.convert(c)
  
      def parse(self, tokens):
@@ -436,11 +457,13 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          # Try matching (if we haven't already)
          if self.untried_match():
              token = self.match()
-            if token is not None: return token
+            if token is not None:
+                return token
  
          # Try expanding.
          production = self.expand()
-        if production is not None: return production
+        if production is not None:
+            return production
  
          # Try backtracking
          if self.backtrack():
@@ -475,7 +498,8 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          # If they didn't specify a production, check all untried ones.
          if production is None:
              productions = self.untried_expandable_productions()
-        else: productions = [production]
+        else:
+            productions = [production]
  
          parses = []
          for prod in productions:
@@ -526,7 +550,8 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          :return: true if an operation was successfully undone.
          :rtype: bool
          """
-        if len(self._history) == 0: return False
+        if len(self._history) == 0:
+            return False
          (self._rtext, self._tree, self._frontier) = self._history.pop()
          return True
  
@@ -537,14 +562,17 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          :rtype: list(Production)
          """
          # Make sure we *can* expand.
-        if len(self._frontier) == 0: return []
+        if len(self._frontier) == 0:
+            return []
          frontier_child = self._tree[self._frontier[0]]
-        if (len(self._frontier) == 0 or
-            not isinstance(frontier_child, Tree)):
+        if len(self._frontier) == 0 or not isinstance(frontier_child, Tree):
              return []
  
-        return [p for p in self._grammar.productions()
-                if p.lhs().symbol() == frontier_child.label()]
+        return [
+            p
+            for p in self._grammar.productions()
+            if p.lhs().symbol() == frontier_child.label()
+        ]
  
      def untried_expandable_productions(self):
          """
@@ -554,8 +582,7 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          """
  
          tried_expansions = self._tried_e.get(self._freeze(self._tree), [])
-        return [p for p in self.expandable_productions()
-                if p not in tried_expansions]
+        return [p for p in self.expandable_productions() if p not in tried_expansions]
  
      def untried_match(self):
          """
@@ -564,9 +591,10 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          :rtype: bool
          """
  
-        if len(self._rtext) == 0: return False
+        if len(self._rtext) == 0:
+            return False
          tried_matches = self._tried_m.get(self._freeze(self._tree), [])
-        return (self._rtext[0] not in tried_matches)
+        return self._rtext[0] not in tried_matches
  
      def currently_complete(self):
          """
@@ -574,7 +602,7 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
              complete parse.
          :rtype: bool
          """
-        return (len(self._frontier) == 0 and len(self._rtext) == 0)
+        return len(self._frontier) == 0 and len(self._rtext) == 0
  
      def _parse(self, remaining_text, tree, frontier):
          """
@@ -591,13 +619,13 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          :return: ``[1]``
          :rtype: list of int
          """
-        self._history.append( (self._rtext, self._tree, self._frontier) )
+        self._history.append((self._rtext, self._tree, self._frontier))
          self._rtext = remaining_text
          self._tree = tree
          self._frontier = frontier
  
          # Is it a good parse?  If so, record it.
-        if (len(frontier) == 0 and len(remaining_text) == 0):
+        if len(frontier) == 0 and len(remaining_text) == 0:
              self._parses.append(tree)
              self._trace_succeed(self._tree, self._frontier)
  
@@ -620,10 +648,12 @@ class SteppingRecursiveDescentParser(RecursiveDescentParser):
          """
          self._grammar = grammar
  
+
  ##//////////////////////////////////////////////////////
  ##  Demonstration Code
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      """
      A demonstration of the recursive descent parser.
@@ -631,7 +661,8 @@ def demo():
  
      from nltk import parse, CFG
  
-    grammar = CFG.fromstring("""
+    grammar = CFG.fromstring(
+        """
      S -> NP VP
      NP -> Det N | Det N PP
      VP -> V NP | V NP PP
@@ -641,15 +672,17 @@ def demo():
      Det -> 'the' | 'a'
      P -> 'in' | 'with'
      V -> 'saw'
-    """)
+    """
+    )
  
      for prod in grammar.productions():
          print(prod)
  
-    sent = 'I saw a man in the park'.split()
+    sent = "I saw a man in the park".split()
      parser = parse.RecursiveDescentParser(grammar, trace=2)
      for p in parser.parse(sent):
          print(p)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/recursivedescent.pyc b/nlp_resource_data/nltk/parse/recursivedescent.pyc

deleted file mode 100755 (executable)

index 714523d..0000000

Binary files a/nlp_resource_data/nltk/parse/recursivedescent.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/shiftreduce.py b/nlp_resource_data/nltk/parse/shiftreduce.py

old mode 100755 (executable)

new mode 100644 (file)

index 7fc8289..5991465
--- a/nlp_resource_data/nltk/parse/shiftreduce.py
+++ b/nlp_resource_data/nltk/parse/shiftreduce.py
@@ -1,15 +1,13 @@
  # Natural Language Toolkit: Shift-Reduce Parser
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
  
  from nltk.grammar import Nonterminal
  from nltk.tree import Tree
-from nltk.compat import unicode_repr
  
  from nltk.parse.api import ParserI
  
@@ -57,6 +55,7 @@ class ShiftReduceParser(ParserI):
  
      :see: ``nltk.grammar``
      """
+
      def __init__(self, grammar, trace=0):
          """
          Create a new ``ShiftReduceParser``, that uses ``grammar`` to
@@ -87,17 +86,18 @@ class ShiftReduceParser(ParserI):
  
          # Trace output.
          if self._trace:
-            print('Parsing %r' % " ".join(tokens))
+            print("Parsing %r" % " ".join(tokens))
              self._trace_stack(stack, remaining_text)
  
          # iterate through the text, pushing the token onto
          # the stack, then reducing the stack.
          while len(remaining_text) > 0:
              self._shift(stack, remaining_text)
-            while self._reduce(stack, remaining_text): pass
+            while self._reduce(stack, remaining_text):
+                pass
  
          # Did we reduce everything?
-        if len(stack) == 1: 
+        if len(stack) == 1:
              # Did we end up with the right category?
              if stack[0].label() == self._grammar.start().symbol():
                  yield stack[0]
@@ -117,7 +117,8 @@ class ShiftReduceParser(ParserI):
          """
          stack.append(remaining_text[0])
          remaining_text.remove(remaining_text[0])
-        if self._trace: self._trace_shift(stack, remaining_text)
+        if self._trace:
+            self._trace_shift(stack, remaining_text)
  
      def _match_rhs(self, rhs, rightmost_stack):
          """
@@ -137,14 +138,19 @@ class ShiftReduceParser(ParserI):
              stack.
          """
  
-        if len(rightmost_stack) != len(rhs): return False
+        if len(rightmost_stack) != len(rhs):
+            return False
          for i in range(len(rightmost_stack)):
              if isinstance(rightmost_stack[i], Tree):
-                if not isinstance(rhs[i], Nonterminal): return False
-                if rightmost_stack[i].label() != rhs[i].symbol(): return False
+                if not isinstance(rhs[i], Nonterminal):
+                    return False
+                if rightmost_stack[i].label() != rhs[i].symbol():
+                    return False
              else:
-                if isinstance(rhs[i], Nonterminal): return False
-                if rightmost_stack[i] != rhs[i]: return False
+                if isinstance(rhs[i], Nonterminal):
+                    return False
+                if rightmost_stack[i] != rhs[i]:
+                    return False
          return True
  
      def _reduce(self, stack, remaining_text, production=None):
@@ -208,7 +214,7 @@ class ShiftReduceParser(ParserI):
          # 3: display which tokens & productions are shifed/reduced
          self._trace = trace
  
-    def _trace_stack(self, stack, remaining_text, marker=' '):
+    def _trace_stack(self, stack, remaining_text, marker=" "):
          """
          Print trace output displaying the given stack and text.
  
@@ -217,13 +223,13 @@ class ShiftReduceParser(ParserI):
              stack.  This is used with trace level 2 to print 'S'
              before shifted stacks and 'R' before reduced stacks.
          """
-        s = '  '+marker+' [ '
+        s = "  " + marker + " [ "
          for elt in stack:
              if isinstance(elt, Tree):
-                s += unicode_repr(Nonterminal(elt.label())) + ' '
+                s += repr(Nonterminal(elt.label())) + " "
              else:
-                s += unicode_repr(elt) + ' '
-        s += '* ' + ' '.join(remaining_text) + ']'
+                s += repr(elt) + " "
+        s += "* " + " ".join(remaining_text) + "]"
          print(s)
  
      def _trace_shift(self, stack, remaining_text):
@@ -232,9 +238,12 @@ class ShiftReduceParser(ParserI):
  
          :rtype: None
          """
-        if self._trace > 2: print('Shift %r:' % stack[-1])
-        if self._trace == 2: self._trace_stack(stack, remaining_text, 'S')
-        elif self._trace > 0: self._trace_stack(stack, remaining_text)
+        if self._trace > 2:
+            print("Shift %r:" % stack[-1])
+        if self._trace == 2:
+            self._trace_stack(stack, remaining_text, "S")
+        elif self._trace > 0:
+            self._trace_stack(stack, remaining_text)
  
      def _trace_reduce(self, stack, production, remaining_text):
          """
@@ -245,9 +254,11 @@ class ShiftReduceParser(ParserI):
          """
          if self._trace > 2:
              rhs = " ".join(production.rhs())
-            print('Reduce %r <- %s' % (production.lhs(), rhs))
-        if self._trace == 2: self._trace_stack(stack, remaining_text, 'R')
-        elif self._trace > 1: self._trace_stack(stack, remaining_text)
+            print("Reduce %r <- %s" % (production.lhs(), rhs))
+        if self._trace == 2:
+            self._trace_stack(stack, remaining_text, "R")
+        elif self._trace > 1:
+            self._trace_stack(stack, remaining_text)
  
      def _check_grammar(self):
          """
@@ -262,11 +273,12 @@ class ShiftReduceParser(ParserI):
          # Any production whose RHS is an extension of another production's RHS
          # will never be used.
          for i in range(len(productions)):
-            for j in range(i+1, len(productions)):
+            for j in range(i + 1, len(productions)):
                  rhs1 = productions[i].rhs()
                  rhs2 = productions[j].rhs()
-                if rhs1[:len(rhs2)] == rhs2:
-                    print('Warning: %r will never be used' % productions[i])
+                if rhs1[: len(rhs2)] == rhs2:
+                    print("Warning: %r will never be used" % productions[i])
+
  
  ##//////////////////////////////////////////////////////
  ##  Stepping Shift/Reduce Parser
@@ -289,6 +301,7 @@ class SteppingShiftReduceParser(ShiftReduceParser):
          history is used to implement the ``undo`` operation.
      :see: ``nltk.grammar``
      """
+
      def __init__(self, grammar, trace=0):
          super(SteppingShiftReduceParser, self).__init__(grammar, trace)
          self._stack = None
@@ -350,8 +363,9 @@ class SteppingShiftReduceParser(ShiftReduceParser):
          :return: True if the shift operation was successful.
          :rtype: bool
          """
-        if len(self._remaining_text) == 0: return False
-        self._history.append( (self._stack[:], self._remaining_text[:]) )
+        if len(self._remaining_text) == 0:
+            return False
+        self._history.append((self._stack[:], self._remaining_text[:]))
          self._shift(self._stack, self._remaining_text)
          return True
  
@@ -367,11 +381,11 @@ class SteppingShiftReduceParser(ShiftReduceParser):
  
          :rtype: Production or None
          """
-        self._history.append( (self._stack[:], self._remaining_text[:]) )
-        return_val = self._reduce(self._stack, self._remaining_text,
-                                  production)
+        self._history.append((self._stack[:], self._remaining_text[:]))
+        return_val = self._reduce(self._stack, self._remaining_text, production)
  
-        if not return_val: self._history.pop()
+        if not return_val:
+            self._history.pop()
          return return_val
  
      def undo(self):
@@ -385,7 +399,8 @@ class SteppingShiftReduceParser(ShiftReduceParser):
          :return: true if an operation was successfully undone.
          :rtype: bool
          """
-        if len(self._history) == 0: return False
+        if len(self._history) == 0:
+            return False
          (self._stack, self._remaining_text) = self._history.pop()
          return True
  
@@ -408,13 +423,14 @@ class SteppingShiftReduceParser(ShiftReduceParser):
              parser so far.
          :rtype: iter(Tree)
          """
-        if (len(self._remaining_text) == 0 and
-            len(self._stack) == 1 and
-            self._stack[0].label() == self._grammar.start().symbol()
-            ):
+        if (
+            len(self._remaining_text) == 0
+            and len(self._stack) == 1
+            and self._stack[0].label() == self._grammar.start().symbol()
+        ):
              yield self._stack[0]
  
-# copied from nltk.parser
+    # copied from nltk.parser
  
      def set_grammar(self, grammar):
          """
@@ -425,10 +441,12 @@ class SteppingShiftReduceParser(ShiftReduceParser):
          """
          self._grammar = grammar
  
+
  ##//////////////////////////////////////////////////////
  ##  Demonstration Code
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      """
      A demonstration of the shift-reduce parser.
@@ -436,7 +454,8 @@ def demo():
  
      from nltk import parse, CFG
  
-    grammar = CFG.fromstring("""
+    grammar = CFG.fromstring(
+        """
      S -> NP VP
      NP -> Det N | Det N PP
      VP -> V NP | V NP PP
@@ -446,13 +465,15 @@ def demo():
      Det -> 'the' | 'a'
      P -> 'in' | 'with'
      V -> 'saw'
-    """)
+    """
+    )
  
-    sent = 'I saw a man in the park'.split()
+    sent = "I saw a man in the park".split()
  
      parser = parse.ShiftReduceParser(grammar, trace=2)
      for p in parser.parse(sent):
          print(p)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/shiftreduce.pyc b/nlp_resource_data/nltk/parse/shiftreduce.pyc

deleted file mode 100755 (executable)

index 6d91abc..0000000

Binary files a/nlp_resource_data/nltk/parse/shiftreduce.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/stanford.py b/nlp_resource_data/nltk/parse/stanford.py

old mode 100755 (executable)

new mode 100644 (file)

index 34939a9..4350b35
--- a/nlp_resource_data/nltk/parse/stanford.py
+++ b/nlp_resource_data/nltk/parse/stanford.py
@@ -1,69 +1,82 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Interface to the Stanford Parser
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Xu <xxu@student.unimelb.edu.au>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals
-
  import tempfile
  import os
-import re
  import warnings
+from unittest import skip
  from subprocess import PIPE
-from io import StringIO
-
-from six import text_type
  
-from nltk.internals import find_jar, find_jar_iter, config_java, java, _java_options, find_jars_within_path
+from nltk.internals import (
+    find_jar_iter,
+    config_java,
+    java,
+    _java_options,
+    find_jars_within_path,
+)
  
  from nltk.parse.api import ParserI
  from nltk.parse.dependencygraph import DependencyGraph
  from nltk.tree import Tree
  
-_stanford_url = 'https://nlp.stanford.edu/software/lex-parser.shtml'
+_stanford_url = "https://nlp.stanford.edu/software/lex-parser.shtml"
+
  
  class GenericStanfordParser(ParserI):
      """Interface to the Stanford Parser"""
  
-    _MODEL_JAR_PATTERN = r'stanford-parser-(\d+)(\.(\d+))+-models\.jar'
-    _JAR = r'stanford-parser\.jar'
-    _MAIN_CLASS = 'edu.stanford.nlp.parser.lexparser.LexicalizedParser'
+    _MODEL_JAR_PATTERN = r"stanford-parser-(\d+)(\.(\d+))+-models\.jar"
+    _JAR = r"stanford-parser\.jar"
+    _MAIN_CLASS = "edu.stanford.nlp.parser.lexparser.LexicalizedParser"
  
      _USE_STDIN = False
      _DOUBLE_SPACED_OUTPUT = False
  
-    def __init__(self, path_to_jar=None, path_to_models_jar=None,
-                 model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz',
-                 encoding='utf8', verbose=False,
-                 java_options='-mx1000m', corenlp_options=''):
+    def __init__(
+        self,
+        path_to_jar=None,
+        path_to_models_jar=None,
+        model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz",
+        encoding="utf8",
+        verbose=False,
+        java_options="-mx4g",
+        corenlp_options="",
+    ):
  
          # find the most recent code and model jar
          stanford_jar = max(
              find_jar_iter(
-                self._JAR, path_to_jar,
-                env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'),
-                searchpath=(), url=_stanford_url,
-                verbose=verbose, is_regex=True
+                self._JAR,
+                path_to_jar,
+                env_vars=("STANFORD_PARSER", "STANFORD_CORENLP"),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
              ),
-            key=lambda model_path: os.path.dirname(model_path)
+            key=lambda model_path: os.path.dirname(model_path),
          )
  
-        model_jar=max(
+        model_jar = max(
              find_jar_iter(
-                self._MODEL_JAR_PATTERN, path_to_models_jar,
-                env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'),
-                searchpath=(), url=_stanford_url,
-                verbose=verbose, is_regex=True
+                self._MODEL_JAR_PATTERN,
+                path_to_models_jar,
+                env_vars=("STANFORD_MODELS", "STANFORD_CORENLP"),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+                is_regex=True,
              ),
-            key=lambda model_path: os.path.dirname(model_path)
+            key=lambda model_path: os.path.dirname(model_path),
          )
  
-
-        #self._classpath = (stanford_jar, model_jar)
+        # self._classpath = (stanford_jar, model_jar)
  
          # Adding logging jar files to classpath
          stanford_dir = os.path.split(stanford_jar)[0]
@@ -80,17 +93,17 @@ class GenericStanfordParser(ParserI):
          cur_trees = []
          blank = False
          for line in output_.splitlines(False):
-            if line == '':
+            if line == "":
                  if blank:
                      res.append(iter(cur_trees))
                      cur_trees = []
                      blank = False
                  elif self._DOUBLE_SPACED_OUTPUT:
-                    cur_trees.append(self._make_tree('\n'.join(cur_lines)))
+                    cur_trees.append(self._make_tree("\n".join(cur_lines)))
                      cur_lines = []
                      blank = True
                  else:
-                    res.append(iter([self._make_tree('\n'.join(cur_lines))]))
+                    res.append(iter([self._make_tree("\n".join(cur_lines))]))
                      cur_lines = []
              else:
                  cur_lines.append(line)
@@ -112,14 +125,21 @@ class GenericStanfordParser(ParserI):
          """
          cmd = [
              self._MAIN_CLASS,
-            '-model', self.model_path,
-            '-sentences', 'newline',
-            '-outputFormat', self._OUTPUT_FORMAT,
-            '-tokenized',
-            '-escaper', 'edu.stanford.nlp.process.PTBEscapingProcessor',
+            "-model",
+            self.model_path,
+            "-sentences",
+            "newline",
+            "-outputFormat",
+            self._OUTPUT_FORMAT,
+            "-tokenized",
+            "-escaper",
+            "edu.stanford.nlp.process.PTBEscapingProcessor",
          ]
-        return self._parse_trees_output(self._execute(
-            cmd, '\n'.join(' '.join(sentence) for sentence in sentences), verbose))
+        return self._parse_trees_output(
+            self._execute(
+                cmd, "\n".join(" ".join(sentence) for sentence in sentences), verbose
+            )
+        )
  
      def raw_parse(self, sentence, verbose=False):
          """
@@ -145,11 +165,16 @@ class GenericStanfordParser(ParserI):
          """
          cmd = [
              self._MAIN_CLASS,
-            '-model', self.model_path,
-            '-sentences', 'newline',
-            '-outputFormat', self._OUTPUT_FORMAT,
+            "-model",
+            self.model_path,
+            "-sentences",
+            "newline",
+            "-outputFormat",
+            self._OUTPUT_FORMAT,
          ]
-        return self._parse_trees_output(self._execute(cmd, '\n'.join(sentences), verbose))
+        return self._parse_trees_output(
+            self._execute(cmd, "\n".join(sentences), verbose)
+        )
  
      def tagged_parse(self, sentence, verbose=False):
          """
@@ -173,36 +198,50 @@ class GenericStanfordParser(ParserI):
          :type sentences: list(list(tuple(str, str)))
          :rtype: iter(iter(Tree))
          """
-        tag_separator = '/'
+        tag_separator = "/"
          cmd = [
              self._MAIN_CLASS,
-            '-model', self.model_path,
-            '-sentences', 'newline',
-            '-outputFormat', self._OUTPUT_FORMAT,
-            '-tokenized',
-            '-tagSeparator', tag_separator,
-            '-tokenizerFactory', 'edu.stanford.nlp.process.WhitespaceTokenizer',
-            '-tokenizerMethod', 'newCoreLabelTokenizerFactory',
+            "-model",
+            self.model_path,
+            "-sentences",
+            "newline",
+            "-outputFormat",
+            self._OUTPUT_FORMAT,
+            "-tokenized",
+            "-tagSeparator",
+            tag_separator,
+            "-tokenizerFactory",
+            "edu.stanford.nlp.process.WhitespaceTokenizer",
+            "-tokenizerMethod",
+            "newCoreLabelTokenizerFactory",
          ]
          # We don't need to escape slashes as "splitting is done on the last instance of the character in the token"
-        return self._parse_trees_output(self._execute(
-            cmd, '\n'.join(' '.join(tag_separator.join(tagged) for tagged in sentence) for sentence in sentences), verbose))
+        return self._parse_trees_output(
+            self._execute(
+                cmd,
+                "\n".join(
+                    " ".join(tag_separator.join(tagged) for tagged in sentence)
+                    for sentence in sentences
+                ),
+                verbose,
+            )
+        )
  
      def _execute(self, cmd, input_, verbose=False):
          encoding = self._encoding
-        cmd.extend(['-encoding', encoding])
+        cmd.extend(["-encoding", encoding])
          if self.corenlp_options:
              cmd.append(self.corenlp_options)
  
-        default_options = ' '.join(_java_options)
+        default_options = " ".join(_java_options)
  
          # Configure java.
          config_java(options=self.java_options, verbose=verbose)
  
          # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
-        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
+        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as input_file:
              # Write the actual sentences to the temporary input file
-            if isinstance(input_, text_type) and encoding:
+            if isinstance(input_, str) and encoding:
                  input_ = input_.encode(encoding)
              input_file.write(input_)
              input_file.flush()
@@ -210,15 +249,21 @@ class GenericStanfordParser(ParserI):
              # Run the tagger and get the output.
              if self._USE_STDIN:
                  input_file.seek(0)
-                stdout, stderr = java(cmd, classpath=self._classpath,
-                                      stdin=input_file, stdout=PIPE, stderr=PIPE)
+                stdout, stderr = java(
+                    cmd,
+                    classpath=self._classpath,
+                    stdin=input_file,
+                    stdout=PIPE,
+                    stderr=PIPE,
+                )
              else:
                  cmd.append(input_file.name)
-                stdout, stderr = java(cmd, classpath=self._classpath,
-                                      stdout=PIPE, stderr=PIPE)
+                stdout, stderr = java(
+                    cmd, classpath=self._classpath, stdout=PIPE, stderr=PIPE
+                )
  
-            stdout = stdout.replace(b'\xc2\xa0',b' ')
-            stdout = stdout.replace(b'\x00\xa0',b' ')
+            stdout = stdout.replace(b"\xc2\xa0", b" ")
+            stdout = stdout.replace(b"\x00\xa0", b" ")
              stdout = stdout.decode(encoding)
  
          os.unlink(input_file.name)
@@ -228,6 +273,7 @@ class GenericStanfordParser(ParserI):
  
          return stdout
  
+
  class StanfordParser(GenericStanfordParser):
      """
      >>> parser=StanfordParser(
@@ -257,8 +303,8 @@ class StanfordParser(GenericStanfordParser):
      [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]),
      Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP',
      [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']),
-    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']),
-    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])]
+    Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', [Tree('', []),
+    Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', [])])])])])])])]
  
      >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents((
      ...     (
@@ -279,7 +325,17 @@ class StanfordParser(GenericStanfordParser):
      [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])]
      """
  
-    _OUTPUT_FORMAT = 'penn'
+    _OUTPUT_FORMAT = "penn"
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The StanfordParser will be deprecated\n"
+            "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+        super(StanfordParser, self).__init__(*args, **kwargs)
  
      def _make_tree(self, result):
          return Tree.fromstring(result)
@@ -335,16 +391,26 @@ class StanfordDependencyParser(GenericStanfordParser):
  
      """
  
-    _OUTPUT_FORMAT = 'conll2007'
+    _OUTPUT_FORMAT = "conll2007"
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The StanfordDependencyParser will be deprecated\n"
+            "Please use \033[91mnltk.parse.corenlp.CoreNLPDependencyParser\033[0m instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+        super(StanfordDependencyParser, self).__init__(*args, **kwargs)
  
      def _make_tree(self, result):
-        return DependencyGraph(result, top_relation_label='root')
+        return DependencyGraph(result, top_relation_label="root")
  
  
  class StanfordNeuralDependencyParser(GenericStanfordParser):
-    '''
+    """
      >>> from nltk.parse.stanford import StanfordNeuralDependencyParser
-    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx3g')
+    >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g')
  
      >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE
      [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy']), '.'])]
@@ -371,42 +437,52 @@ class StanfordNeuralDependencyParser(GenericStanfordParser):
      ... ))], []) # doctest: +NORMALIZE_WHITESPACE
      [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends',
      ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])]
-    '''
+    """
  
-    _OUTPUT_FORMAT = 'conll'
-    _MAIN_CLASS = 'edu.stanford.nlp.pipeline.StanfordCoreNLP'
-    _JAR = r'stanford-corenlp-(\d+)(\.(\d+))+\.jar'
-    _MODEL_JAR_PATTERN = r'stanford-corenlp-(\d+)(\.(\d+))+-models\.jar'
+    _OUTPUT_FORMAT = "conll"
+    _MAIN_CLASS = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
+    _JAR = r"stanford-corenlp-(\d+)(\.(\d+))+\.jar"
+    _MODEL_JAR_PATTERN = r"stanford-corenlp-(\d+)(\.(\d+))+-models\.jar"
      _USE_STDIN = True
      _DOUBLE_SPACED_OUTPUT = True
  
      def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The StanfordNeuralDependencyParser will be deprecated\n"
+            "Please use \033[91mnltk.parse.corenlp.CoreNLPDependencyParser\033[0m instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
          super(StanfordNeuralDependencyParser, self).__init__(*args, **kwargs)
-        self.corenlp_options += '-annotators tokenize,ssplit,pos,depparse'
+        self.corenlp_options += "-annotators tokenize,ssplit,pos,depparse"
  
      def tagged_parse_sents(self, sentences, verbose=False):
-        '''
+        """
          Currently unimplemented because the neural dependency parser (and
          the StanfordCoreNLP pipeline class) doesn't support passing in pre-
          tagged tokens.
-        '''
+        """
          raise NotImplementedError(
-            'tagged_parse[_sents] is not supported by '
-            'StanfordNeuralDependencyParser; use '
-            'parse[_sents] or raw_parse[_sents] instead.'
+            "tagged_parse[_sents] is not supported by "
+            "StanfordNeuralDependencyParser; use "
+            "parse[_sents] or raw_parse[_sents] instead."
          )
  
      def _make_tree(self, result):
-        return DependencyGraph(result, top_relation_label='ROOT')
+        return DependencyGraph(result, top_relation_label="ROOT")
  
  
+@skip("doctests from nltk.parse.stanford are skipped because it's deprecated")
  def setup_module(module):
      from nose import SkipTest
  
      try:
          StanfordParser(
-            model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz'
+            model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"
          )
          StanfordNeuralDependencyParser()
      except LookupError:
-        raise SkipTest('doctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn\'t exist')
+        raise SkipTest(
+            "doctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn't exist"
+        )
diff --git a/nlp_resource_data/nltk/parse/stanford.pyc b/nlp_resource_data/nltk/parse/stanford.pyc

deleted file mode 100755 (executable)

index bf3b11b..0000000

Binary files a/nlp_resource_data/nltk/parse/stanford.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/transitionparser.py b/nlp_resource_data/nltk/parse/transitionparser.py

old mode 100755 (executable)

new mode 100644 (file)

index cad2261..6615288
--- a/nlp_resource_data/nltk/parse/transitionparser.py
+++ b/nlp_resource_data/nltk/parse/transitionparser.py
@@ -2,19 +2,17 @@
  #
  # Author: Long Duong <longdt219@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
  import tempfile
  import pickle
  
  from os import remove
  from copy import deepcopy
  from operator import itemgetter
+
  try:
      from numpy import array
      from scipy import sparse
@@ -26,7 +24,6 @@ except ImportError:
  from nltk.parse import ParserI, DependencyGraph, DependencyEvaluator
  
  
-
  class Configuration(object):
      """
      Class for holding configuration which is the partial analysis of the input sentence.
@@ -54,8 +51,14 @@ class Configuration(object):
          self._max_address = len(self.buffer)
  
      def __str__(self):
-        return 'Stack : ' + \
-            str(self.stack) + '  Buffer : ' + str(self.buffer) + '   Arcs : ' + str(self.arcs)
+        return (
+            "Stack : "
+            + str(self.stack)
+            + "  Buffer : "
+            + str(self.buffer)
+            + "   Arcs : "
+            + str(self.arcs)
+        )
  
      def _check_informative(self, feat, flag=False):
          """
@@ -64,10 +67,10 @@ class Configuration(object):
          """
          if feat is None:
              return False
-        if feat == '':
+        if feat == "":
              return False
          if flag is False:
-            if feat == '_':
+            if feat == "_":
                  return False
          return True
  
@@ -85,28 +88,28 @@ class Configuration(object):
              # Stack 0
              stack_idx0 = self.stack[len(self.stack) - 1]
              token = self._tokens[stack_idx0]
-            if self._check_informative(token['word'], True):
-                result.append('STK_0_FORM_' + token['word'])
-            if 'lemma' in token and self._check_informative(token['lemma']):
-                result.append('STK_0_LEMMA_' + token['lemma'])
-            if self._check_informative(token['tag']):
-                result.append('STK_0_POS_' + token['tag'])
-            if 'feats' in token and self._check_informative(token['feats']):
-                feats = token['feats'].split("|")
+            if self._check_informative(token["word"], True):
+                result.append("STK_0_FORM_" + token["word"])
+            if "lemma" in token and self._check_informative(token["lemma"]):
+                result.append("STK_0_LEMMA_" + token["lemma"])
+            if self._check_informative(token["tag"]):
+                result.append("STK_0_POS_" + token["tag"])
+            if "feats" in token and self._check_informative(token["feats"]):
+                feats = token["feats"].split("|")
                  for feat in feats:
-                    result.append('STK_0_FEATS_' + feat)
+                    result.append("STK_0_FEATS_" + feat)
              # Stack 1
              if len(self.stack) > 1:
                  stack_idx1 = self.stack[len(self.stack) - 2]
                  token = self._tokens[stack_idx1]
-                if self._check_informative(token['tag']):
-                    result.append('STK_1_POS_' + token['tag'])
+                if self._check_informative(token["tag"]):
+                    result.append("STK_1_POS_" + token["tag"])
  
              # Left most, right most dependency of stack[0]
              left_most = 1000000
              right_most = -1
-            dep_left_most = ''
-            dep_right_most = ''
+            dep_left_most = ""
+            dep_right_most = ""
              for (wi, r, wj) in self.arcs:
                  if wi == stack_idx0:
                      if (wj > wi) and (wj > right_most):
@@ -116,48 +119,48 @@ class Configuration(object):
                          left_most = wj
                          dep_left_most = r
              if self._check_informative(dep_left_most):
-                result.append('STK_0_LDEP_' + dep_left_most)
+                result.append("STK_0_LDEP_" + dep_left_most)
              if self._check_informative(dep_right_most):
-                result.append('STK_0_RDEP_' + dep_right_most)
+                result.append("STK_0_RDEP_" + dep_right_most)
  
          # Check Buffered 0
          if len(self.buffer) > 0:
              # Buffer 0
              buffer_idx0 = self.buffer[0]
              token = self._tokens[buffer_idx0]
-            if self._check_informative(token['word'], True):
-                result.append('BUF_0_FORM_' + token['word'])
-            if 'lemma' in token and self._check_informative(token['lemma']):
-                result.append('BUF_0_LEMMA_' + token['lemma'])
-            if self._check_informative(token['tag']):
-                result.append('BUF_0_POS_' + token['tag'])
-            if 'feats' in token and self._check_informative(token['feats']):
-                feats = token['feats'].split("|")
+            if self._check_informative(token["word"], True):
+                result.append("BUF_0_FORM_" + token["word"])
+            if "lemma" in token and self._check_informative(token["lemma"]):
+                result.append("BUF_0_LEMMA_" + token["lemma"])
+            if self._check_informative(token["tag"]):
+                result.append("BUF_0_POS_" + token["tag"])
+            if "feats" in token and self._check_informative(token["feats"]):
+                feats = token["feats"].split("|")
                  for feat in feats:
-                    result.append('BUF_0_FEATS_' + feat)
+                    result.append("BUF_0_FEATS_" + feat)
              # Buffer 1
              if len(self.buffer) > 1:
                  buffer_idx1 = self.buffer[1]
                  token = self._tokens[buffer_idx1]
-                if self._check_informative(token['word'], True):
-                    result.append('BUF_1_FORM_' + token['word'])
-                if self._check_informative(token['tag']):
-                    result.append('BUF_1_POS_' + token['tag'])
+                if self._check_informative(token["word"], True):
+                    result.append("BUF_1_FORM_" + token["word"])
+                if self._check_informative(token["tag"]):
+                    result.append("BUF_1_POS_" + token["tag"])
              if len(self.buffer) > 2:
                  buffer_idx2 = self.buffer[2]
                  token = self._tokens[buffer_idx2]
-                if self._check_informative(token['tag']):
-                    result.append('BUF_2_POS_' + token['tag'])
+                if self._check_informative(token["tag"]):
+                    result.append("BUF_2_POS_" + token["tag"])
              if len(self.buffer) > 3:
                  buffer_idx3 = self.buffer[3]
                  token = self._tokens[buffer_idx3]
-                if self._check_informative(token['tag']):
-                    result.append('BUF_3_POS_' + token['tag'])
+                if self._check_informative(token["tag"]):
+                    result.append("BUF_3_POS_" + token["tag"])
                      # Left most, right most dependency of stack[0]
              left_most = 1000000
              right_most = -1
-            dep_left_most = ''
-            dep_right_most = ''
+            dep_left_most = ""
+            dep_right_most = ""
              for (wi, r, wj) in self.arcs:
                  if wi == buffer_idx0:
                      if (wj > wi) and (wj > right_most):
@@ -167,9 +170,9 @@ class Configuration(object):
                          left_most = wj
                          dep_left_most = r
              if self._check_informative(dep_left_most):
-                result.append('BUF_0_LDEP_' + dep_left_most)
+                result.append("BUF_0_LDEP_" + dep_left_most)
              if self._check_informative(dep_right_most):
-                result.append('BUF_0_RDEP_' + dep_right_most)
+                result.append("BUF_0_RDEP_" + dep_right_most)
  
          return result
  
@@ -179,11 +182,12 @@ class Transition(object):
      This class defines a set of transition which is applied to a configuration to get another configuration
      Note that for different parsing algorithm, the transition is different.
      """
+
      # Define set of transitions
-    LEFT_ARC = 'LEFTARC'
-    RIGHT_ARC = 'RIGHTARC'
-    SHIFT = 'SHIFT'
-    REDUCE = 'REDUCE'
+    LEFT_ARC = "LEFTARC"
+    RIGHT_ARC = "RIGHTARC"
+    SHIFT = "SHIFT"
+    REDUCE = "REDUCE"
  
      def __init__(self, alg_option):
          """
@@ -192,10 +196,13 @@ class Transition(object):
          """
          self._algo = alg_option
          if alg_option not in [
-                TransitionParser.ARC_STANDARD,
-                TransitionParser.ARC_EAGER]:
-            raise ValueError(" Currently we only support %s and %s " %
-                                        (TransitionParser.ARC_STANDARD, TransitionParser.ARC_EAGER))
+            TransitionParser.ARC_STANDARD,
+            TransitionParser.ARC_EAGER,
+        ]:
+            raise ValueError(
+                " Currently we only support %s and %s "
+                % (TransitionParser.ARC_STANDARD, TransitionParser.ARC_EAGER)
+            )
  
      def left_arc(self, conf, relation):
          """
@@ -282,17 +289,20 @@ class TransitionParser(ParserI):
      """
      Class for transition based parser. Implement 2 algorithms which are "arc-standard" and "arc-eager"
      """
-    ARC_STANDARD = 'arc-standard'
-    ARC_EAGER = 'arc-eager'
+
+    ARC_STANDARD = "arc-standard"
+    ARC_EAGER = "arc-eager"
  
      def __init__(self, algorithm):
          """
          :param algorithm: the algorithm option of this parser. Currently support `arc-standard` and `arc-eager` algorithm
          :type algorithm: str
          """
-        if not(algorithm in [self.ARC_STANDARD, self.ARC_EAGER]):
-            raise ValueError(" Currently we only support %s and %s " %
-                                        (self.ARC_STANDARD, self.ARC_EAGER))
+        if not (algorithm in [self.ARC_STANDARD, self.ARC_EAGER]):
+            raise ValueError(
+                " Currently we only support %s and %s "
+                % (self.ARC_STANDARD, self.ARC_EAGER)
+            )
          self._algorithm = algorithm
  
          self._dictionary = {}
@@ -303,11 +313,11 @@ class TransitionParser(ParserI):
          p_node = depgraph.nodes[idx_parent]
          c_node = depgraph.nodes[idx_child]
  
-        if c_node['word'] is None:
+        if c_node["word"] is None:
              return None  # Root word
  
-        if c_node['head'] == p_node['address']:
-            return c_node['rel']
+        if c_node["head"] == p_node["address"]:
+            return c_node["rel"]
          else:
              return None
  
@@ -323,16 +333,18 @@ class TransitionParser(ParserI):
              unsorted_result.append(self._dictionary[feature])
  
          # Default value of each feature is 1.0
-        return ' '.join(str(featureID) + ':1.0' for featureID in sorted(unsorted_result))
+        return " ".join(
+            str(featureID) + ":1.0" for featureID in sorted(unsorted_result)
+        )
  
      def _is_projective(self, depgraph):
          arc_list = []
          for key in depgraph.nodes:
              node = depgraph.nodes[key]
  
-            if 'head' in node:
-                childIdx = node['address']
-                parentIdx = node['head']
+            if "head" in node:
+                childIdx = node["address"]
+                parentIdx = node["head"]
                  if parentIdx is not None:
                      arc_list.append((parentIdx, childIdx))
  
@@ -358,8 +370,8 @@ class TransitionParser(ParserI):
          self._transition.setdefault(key, len(self._transition) + 1)
          self._match_transition[self._transition[key]] = key
  
-        input_str = str(self._transition[key]) + ' ' + binary_features + '\n'
-        input_file.write(input_str.encode('utf-8'))
+        input_str = str(self._transition[key]) + " " + binary_features + "\n"
+        input_file.write(input_str.encode("utf-8"))
  
      def _create_training_examples_arc_std(self, depgraphs, input_file):
          """
@@ -386,7 +398,7 @@ class TransitionParser(ParserI):
                      # Left-arc operation
                      rel = self._get_dep_relation(b0, s0, depgraph)
                      if rel is not None:
-                        key = Transition.LEFT_ARC + ':' + rel
+                        key = Transition.LEFT_ARC + ":" + rel
                          self._write_to_file(key, binary_features, input_file)
                          operation.left_arc(conf, rel)
                          training_seq.append(key)
@@ -407,11 +419,8 @@ class TransitionParser(ParserI):
                                          precondition = False
  
                          if precondition:
-                            key = Transition.RIGHT_ARC + ':' + rel
-                            self._write_to_file(
-                                key,
-                                binary_features,
-                                input_file)
+                            key = Transition.RIGHT_ARC + ":" + rel
+                            self._write_to_file(key, binary_features, input_file)
                              operation.right_arc(conf, rel)
                              training_seq.append(key)
                              continue
@@ -451,7 +460,7 @@ class TransitionParser(ParserI):
                      # Left-arc operation
                      rel = self._get_dep_relation(b0, s0, depgraph)
                      if rel is not None:
-                        key = Transition.LEFT_ARC + ':' + rel
+                        key = Transition.LEFT_ARC + ":" + rel
                          self._write_to_file(key, binary_features, input_file)
                          operation.left_arc(conf, rel)
                          training_seq.append(key)
@@ -460,7 +469,7 @@ class TransitionParser(ParserI):
                      # Right-arc operation
                      rel = self._get_dep_relation(s0, b0, depgraph)
                      if rel is not None:
-                        key = Transition.RIGHT_ARC + ':' + rel
+                        key = Transition.RIGHT_ARC + ":" + rel
                          self._write_to_file(key, binary_features, input_file)
                          operation.right_arc(conf, rel)
                          training_seq.append(key)
@@ -500,9 +509,8 @@ class TransitionParser(ParserI):
  
          try:
              input_file = tempfile.NamedTemporaryFile(
-                prefix='transition_parse.train',
-                dir=tempfile.gettempdir(),
-                delete=False)
+                prefix="transition_parse.train", dir=tempfile.gettempdir(), delete=False
+            )
  
              if self._algorithm == self.ARC_STANDARD:
                  self._create_training_examples_arc_std(depgraphs, input_file)
@@ -517,17 +525,18 @@ class TransitionParser(ParserI):
              # Todo : because of probability = True => very slow due to
              # cross-validation. Need to improve the speed here
              model = svm.SVC(
-                kernel='poly',
+                kernel="poly",
                  degree=2,
                  coef0=0,
                  gamma=0.2,
                  C=0.5,
                  verbose=verbose,
-                probability=True)
+                probability=True,
+            )
  
              model.fit(x_train, y_train)
              # Save the model to file name (as pickle)
-            pickle.dump(model, open(modelfile, 'wb'))
+            pickle.dump(model, open(modelfile, "wb"))
          finally:
              remove(input_file.name)
  
@@ -541,7 +550,7 @@ class TransitionParser(ParserI):
          """
          result = []
          # First load the model
-        model = pickle.load(open(modelFile, 'rb'))
+        model = pickle.load(open(modelFile, "rb"))
          operation = Transition(self._algorithm)
  
          for depgraph in depgraphs:
@@ -560,13 +569,15 @@ class TransitionParser(ParserI):
                  np_row = array(row)
                  np_data = array(data)
  
-                x_test = sparse.csr_matrix((np_data, (np_row, np_col)), shape=(1, len(self._dictionary)))
+                x_test = sparse.csr_matrix(
+                    (np_data, (np_row, np_col)), shape=(1, len(self._dictionary))
+                )
  
                  # It's best to use decision function as follow BUT it's not supported yet for sparse SVM
                  # Using decision funcion to build the votes array
-                #dec_func = model.decision_function(x_test)[0]
-                #votes = {}
-                #k = 0
+                # dec_func = model.decision_function(x_test)[0]
+                # votes = {}
+                # k = 0
                  # for i in range(len(model.classes_)):
                  #    for j in range(i+1, len(model.classes_)):
                  #        #if  dec_func[k] > 0:
@@ -577,21 +588,18 @@ class TransitionParser(ParserI):
                  #           votes[j] +=1
                  #        k +=1
                  # Sort votes according to the values
-                #sorted_votes = sorted(votes.items(), key=itemgetter(1), reverse=True)
+                # sorted_votes = sorted(votes.items(), key=itemgetter(1), reverse=True)
  
                  # We will use predict_proba instead of decision_function
                  prob_dict = {}
                  pred_prob = model.predict_proba(x_test)[0]
                  for i in range(len(pred_prob)):
                      prob_dict[i] = pred_prob[i]
-                sorted_Prob = sorted(
-                    prob_dict.items(),
-                    key=itemgetter(1),
-                    reverse=True)
+                sorted_Prob = sorted(prob_dict.items(), key=itemgetter(1), reverse=True)
  
                  # Note that SHIFT is always a valid operation
                  for (y_pred_idx, confidence) in sorted_Prob:
-                    #y_pred = model.predict(x_test)[0]
+                    # y_pred = model.predict(x_test)[0]
                      # From the prediction match to the operation
                      y_pred = model.classes_[y_pred_idx]
  
@@ -600,10 +608,16 @@ class TransitionParser(ParserI):
                          baseTransition = strTransition.split(":")[0]
  
                          if baseTransition == Transition.LEFT_ARC:
-                            if operation.left_arc(conf, strTransition.split(":")[1]) != -1:
+                            if (
+                                operation.left_arc(conf, strTransition.split(":")[1])
+                                != -1
+                            ):
                                  break
                          elif baseTransition == Transition.RIGHT_ARC:
-                            if operation.right_arc(conf, strTransition.split(":")[1]) != -1:
+                            if (
+                                operation.right_arc(conf, strTransition.split(":")[1])
+                                != -1
+                            ):
                                  break
                          elif baseTransition == Transition.REDUCE:
                              if operation.reduce(conf) != -1:
@@ -612,20 +626,22 @@ class TransitionParser(ParserI):
                              if operation.shift(conf) != -1:
                                  break
                      else:
-                        raise ValueError("The predicted transition is not recognized, expected errors")
+                        raise ValueError(
+                            "The predicted transition is not recognized, expected errors"
+                        )
  
              # Finish with operations build the dependency graph from Conf.arcs
  
              new_depgraph = deepcopy(depgraph)
              for key in new_depgraph.nodes:
                  node = new_depgraph.nodes[key]
-                node['rel'] = ''
+                node["rel"] = ""
                  # With the default, all the token depend on the Root
-                node['head'] = 0
+                node["head"] = 0
              for (head, rel, child) in conf.arcs:
                  c_node = new_depgraph.nodes[child]
-                c_node['head'] = head
-                c_node['rel'] = rel
+                c_node["head"] = head
+                c_node["rel"] = rel
              result.append(new_depgraph)
  
          return result
@@ -771,4 +787,3 @@ def demo():
  
      Note that result is very poor because of only one training example.
      """
-
diff --git a/nlp_resource_data/nltk/parse/transitionparser.pyc b/nlp_resource_data/nltk/parse/transitionparser.pyc

deleted file mode 100755 (executable)

index 98676f7..0000000

Binary files a/nlp_resource_data/nltk/parse/transitionparser.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/util.py b/nlp_resource_data/nltk/parse/util.py

old mode 100755 (executable)

new mode 100644 (file)

index e8694b6..34630a0
--- a/nlp_resource_data/nltk/parse/util.py
+++ b/nlp_resource_data/nltk/parse/util.py
@@ -2,7 +2,7 @@
  #
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
@@ -10,7 +10,6 @@
  """
  Utility functions for parsers.
  """
-from __future__ import print_function
  
  from nltk.grammar import CFG, FeatureGrammar, PCFG
  from nltk.data import load
@@ -19,9 +18,10 @@ from nltk.parse.chart import Chart, ChartParser
  from nltk.parse.pchart import InsideChartParser
  from nltk.parse.featurechart import FeatureChart, FeatureChartParser
  
-def load_parser(grammar_url, trace=0,
-                parser=None, chart_class=None,
-                beam_size=0, **load_args):
+
+def load_parser(
+    grammar_url, trace=0, parser=None, chart_class=None, beam_size=0, **load_args
+):
      """
      Load a grammar from a file, and build a parser based on that grammar.
      The parser depends on the grammar format, and might also depend
@@ -55,8 +55,7 @@ def load_parser(grammar_url, trace=0,
      """
      grammar = load(grammar_url, **load_args)
      if not isinstance(grammar, CFG):
-        raise ValueError("The grammar must be a CFG, "
-                         "or a subclass thereof.")
+        raise ValueError("The grammar must be a CFG, " "or a subclass thereof.")
      if isinstance(grammar, PCFG):
          if parser is None:
              parser = InsideChartParser
@@ -69,85 +68,89 @@ def load_parser(grammar_url, trace=0,
              chart_class = FeatureChart
          return parser(grammar, trace=trace, chart_class=chart_class)
  
-    else: # Plain CFG.
+    else:  # Plain CFG.
          if parser is None:
              parser = ChartParser
          if chart_class is None:
              chart_class = Chart
          return parser(grammar, trace=trace, chart_class=chart_class)
  
+
  def taggedsent_to_conll(sentence):
-       """
-       A module to convert a single POS tagged sentence into CONLL format.
-       
-       >>> from nltk import word_tokenize, pos_tag
-       >>> text = "This is a foobar sentence."
-       >>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))):
-       ...     print(line, end="")
+    """
+    A module to convert a single POS tagged sentence into CONLL format.
+
+    >>> from nltk import word_tokenize, pos_tag
+    >>> text = "This is a foobar sentence."
+    >>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))):
+    ...        print(line, end="")
          1      This    _       DT      DT      _       0       a       _       _
          2      is      _       VBZ     VBZ     _       0       a       _       _
          3      a       _       DT      DT      _       0       a       _       _
          4      foobar  _       JJ      JJ      _       0       a       _       _
          5      sentence        _       NN      NN      _       0       a       _       _
          6      .               _       .       .       _       0       a       _       _
-       
-       :param sentence: A single input sentence to parse
-       :type sentence: list(tuple(str, str))
-       :rtype: iter(str) 
-       :return: a generator yielding a single sentence in CONLL format.
-       """
-       for (i, (word, tag)) in enumerate(sentence, start=1):
-               input_str = [str(i), word, '_', tag, tag, '_', '0', 'a', '_', '_']
-               input_str = "\t".join(input_str) + "\n"
-               yield input_str
+
+    :param sentence: A single input sentence to parse
+    :type sentence: list(tuple(str, str))
+    :rtype: iter(str)
+    :return: a generator yielding a single sentence in CONLL format.
+    """
+    for (i, (word, tag)) in enumerate(sentence, start=1):
+        input_str = [str(i), word, "_", tag, tag, "_", "0", "a", "_", "_"]
+        input_str = "\t".join(input_str) + "\n"
+        yield input_str
  
  
  def taggedsents_to_conll(sentences):
-       """
-       A module to convert the a POS tagged document stream
-       (i.e. list of list of tuples, a list of sentences) and yield lines 
-       in CONLL format. This module yields one line per word and two newlines 
-       for end of sentence. 
-
-       >>> from nltk import word_tokenize, sent_tokenize, pos_tag
-       >>> text = "This is a foobar sentence. Is that right?"
-       >>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
-       >>> for line in taggedsents_to_conll(sentences):
-        ...     if line:
-       ...         print(line, end="")
-        1      This    _       DT      DT      _       0       a       _       _
-        2      is      _       VBZ     VBZ     _       0       a       _       _
-        3      a       _       DT      DT      _       0       a       _       _
-        4      foobar  _       JJ      JJ      _       0       a       _       _
-        5      sentence        _       NN      NN      _       0       a       _       _
-        6      .               _       .       .       _       0       a       _       _
-        <BLANKLINE>
-        <BLANKLINE>
-        1      Is      _       VBZ     VBZ     _       0       a       _       _
-        2      that    _       IN      IN      _       0       a       _       _
-        3      right   _       NN      NN      _       0       a       _       _
-        4      ?       _       .       .       _       0       a       _       _
-        <BLANKLINE>
-        <BLANKLINE>
-
-       :param sentences: Input sentences to parse
-       :type sentence: list(list(tuple(str, str)))
-       :rtype: iter(str) 
-       :return: a generator yielding sentences in CONLL format.
-       """
-       for sentence in sentences:
-               for input_str in taggedsent_to_conll(sentence):
-                       yield input_str
-               yield '\n\n'            
+    """
+    A module to convert the a POS tagged document stream
+    (i.e. list of list of tuples, a list of sentences) and yield lines
+    in CONLL format. This module yields one line per word and two newlines
+    for end of sentence.
+
+    >>> from nltk import word_tokenize, sent_tokenize, pos_tag
+    >>> text = "This is a foobar sentence. Is that right?"
+    >>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
+    >>> for line in taggedsents_to_conll(sentences):
+    ...     if line:
+    ...         print(line, end="")
+    1  This    _       DT      DT      _       0       a       _       _
+    2  is      _       VBZ     VBZ     _       0       a       _       _
+    3  a       _       DT      DT      _       0       a       _       _
+    4  foobar  _       JJ      JJ      _       0       a       _       _
+    5  sentence        _       NN      NN      _       0       a       _       _
+    6  .               _       .       .       _       0       a       _       _
+    <BLANKLINE>
+    <BLANKLINE>
+    1  Is      _       VBZ     VBZ     _       0       a       _       _
+    2  that    _       IN      IN      _       0       a       _       _
+    3  right   _       NN      NN      _       0       a       _       _
+    4  ?       _       .       .       _       0       a       _       _
+    <BLANKLINE>
+    <BLANKLINE>
+
+    :param sentences: Input sentences to parse
+    :type sentence: list(list(tuple(str, str)))
+    :rtype: iter(str)
+    :return: a generator yielding sentences in CONLL format.
+    """
+    for sentence in sentences:
+        for input_str in taggedsent_to_conll(sentence):
+            yield input_str
+        yield "\n\n"
+
  
  ######################################################################
-#{ Test Suites
+# { Test Suites
  ######################################################################
  
+
  class TestGrammar(object):
      """
      Unit tests for  CFG.
      """
+
      def __init__(self, grammar, suite, accept=None, reject=None):
          self.test_grammar = grammar
  
@@ -156,7 +159,6 @@ class TestGrammar(object):
          self._accept = accept
          self._reject = reject
  
-
      def run(self, show_trees=False):
          """
          Sentences in the test suite are divided into two classes:
@@ -167,8 +169,8 @@ class TestGrammar(object):
          according to the grammar, then the value of ``trees`` will be None.
          """
          for test in self.suite:
-            print(test['doc'] + ":", end=' ')
-            for key in ['accept', 'reject']:
+            print(test["doc"] + ":", end=" ")
+            for key in ["accept", "reject"]:
                  for sent in test[key]:
                      tokens = sent.split()
                      trees = list(self.cp.parse(tokens))
@@ -177,7 +179,7 @@ class TestGrammar(object):
                          print(sent)
                          for tree in trees:
                              print(tree)
-                    if key == 'accept':
+                    if key == "accept":
                          if trees == []:
                              raise ValueError("Sentence '%s' failed to parse'" % sent)
                          else:
@@ -190,6 +192,7 @@ class TestGrammar(object):
              if accepted and rejected:
                  print("All tests passed!")
  
+
  def extract_test_sentences(string, comment_chars="#%;", encoding=None):
      """
      Parses a string with one test sentence per line.
@@ -209,14 +212,14 @@ def extract_test_sentences(string, comment_chars="#%;", encoding=None):
      if encoding is not None:
          string = string.decode(encoding)
      sentences = []
-    for sentence in string.split('\n'):
-        if sentence == '' or sentence[0] in comment_chars:
+    for sentence in string.split("\n"):
+        if sentence == "" or sentence[0] in comment_chars:
              continue
-        split_info = sentence.split(':', 1)
+        split_info = sentence.split(":", 1)
          result = None
          if len(split_info) == 2:
-            if split_info[0] in ['True','true','False','false']:
-                result = split_info[0] in ['True','true']
+            if split_info[0] in ["True", "true", "False", "false"]:
+                result = split_info[0] in ["True", "true"]
                  sentence = split_info[1]
              else:
                  result = int(split_info[0])
@@ -227,5 +230,6 @@ def extract_test_sentences(string, comment_chars="#%;", encoding=None):
          sentences += [(tokens, result)]
      return sentences
  
+
  # nose thinks it is a test
  extract_test_sentences.__test__ = False
diff --git a/nlp_resource_data/nltk/parse/util.pyc b/nlp_resource_data/nltk/parse/util.pyc

deleted file mode 100755 (executable)

index 8564030..0000000

Binary files a/nlp_resource_data/nltk/parse/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/parse/viterbi.py b/nlp_resource_data/nltk/parse/viterbi.py

old mode 100755 (executable)

new mode 100644 (file)

index dce5979..bcb9687
--- a/nlp_resource_data/nltk/parse/viterbi.py
+++ b/nlp_resource_data/nltk/parse/viterbi.py
@@ -1,15 +1,13 @@
  # Natural Language Toolkit: Viterbi Probabilistic Parser
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
  
  from functools import reduce
  from nltk.tree import Tree, ProbabilisticTree
-from nltk.compat import python_2_unicode_compatible
  
  from nltk.parse.api import ParserI
  
@@ -17,7 +15,7 @@ from nltk.parse.api import ParserI
  ##  Viterbi PCFG Parser
  ##//////////////////////////////////////////////////////
  
-@python_2_unicode_compatible
+
  class ViterbiParser(ParserI):
      """
      A bottom-up ``PCFG`` parser that uses dynamic programming to find
@@ -73,6 +71,7 @@ class ViterbiParser(ParserI):
      :ivar _trace: The level of tracing output that should be generated
          when parsing a text.
      """
+
      def __init__(self, grammar, trace=0):
          """
          Create a new ``ViterbiParser`` parser, that uses ``grammar`` to
@@ -121,24 +120,27 @@ class ViterbiParser(ParserI):
  
          # Initialize the constituents dictionary with the words from
          # the text.
-        if self._trace: print(('Inserting tokens into the most likely'+
-                               ' constituents table...'))
+        if self._trace:
+            print(("Inserting tokens into the most likely" + " constituents table..."))
          for index in range(len(tokens)):
              token = tokens[index]
-            constituents[index,index+1,token] = token
+            constituents[index, index + 1, token] = token
              if self._trace > 1:
                  self._trace_lexical_insertion(token, index, len(tokens))
  
          # Consider each span of length 1, 2, ..., n; and add any trees
          # that might cover that span to the constituents dictionary.
-        for length in range(1, len(tokens)+1):
+        for length in range(1, len(tokens) + 1):
              if self._trace:
-                print(('Finding the most likely constituents'+
-                       ' spanning %d text elements...' % length))
-            for start in range(len(tokens)-length+1):
-                span = (start, start+length)
-                self._add_constituents_spanning(span, constituents,
-                                                tokens)
+                print(
+                    (
+                        "Finding the most likely constituents"
+                        + " spanning %d text elements..." % length
+                    )
+                )
+            for start in range(len(tokens) - length + 1):
+                span = (start, start + length)
+                self._add_constituents_spanning(span, constituents, tokens)
  
          # Return the tree that spans the entire text & have the right cat
          tree = constituents.get((0, len(tokens), self._grammar.start()))
@@ -195,8 +197,7 @@ class ViterbiParser(ParserI):
              # probability.
              for (production, children) in instantiations:
                  subtrees = [c for c in children if isinstance(c, Tree)]
-                p = reduce(lambda pr,t:pr*t.prob(),
-                           subtrees, production.prob())
+                p = reduce(lambda pr, t: pr * t.prob(), subtrees, production.prob())
                  node = production.lhs().symbol()
                  tree = ProbabilisticTree(node, children, prob=p)
  
@@ -206,9 +207,9 @@ class ViterbiParser(ParserI):
                  if self._trace > 1:
                      if c is None or c != tree:
                          if c is None or c.prob() < tree.prob():
-                            print('   Insert:', end=' ')
+                            print("   Insert:", end=" ")
                          else:
-                            print('  Discard:', end=' ')
+                            print("  Discard:", end=" ")
                          self._trace_production(production, p, span, len(tokens))
                  if c is None or c.prob() < tree.prob():
                      constituents[span[0], span[1], production.lhs()] = tree
@@ -243,7 +244,7 @@ class ViterbiParser(ParserI):
              childlists = self._match_rhs(production.rhs(), span, constituents)
  
              for childlist in childlists:
-                rv.append( (production, childlist) )
+                rv.append((production, childlist))
          return rv
  
      def _match_rhs(self, rhs, span, constituents):
@@ -275,16 +276,18 @@ class ViterbiParser(ParserI):
          (start, end) = span
  
          # Base case
-        if start >= end and rhs == (): return [[]]
-        if start >= end or rhs == (): return []
+        if start >= end and rhs == ():
+            return [[]]
+        if start >= end or rhs == ():
+            return []
  
          # Find everything that matches the 1st symbol of the RHS
          childlists = []
-        for split in range(start, end+1):
-            l=constituents.get((start,split,rhs[0]))
+        for split in range(start, end + 1):
+            l = constituents.get((start, split, rhs[0]))
              if l is not None:
-                rights = self._match_rhs(rhs[1:], (split,end), constituents)
-                childlists += [[l]+r for r in rights]
+                rights = self._match_rhs(rhs[1:], (split, end), constituents)
+                childlists += [[l] + r for r in rights]
  
          return childlists
  
@@ -302,27 +305,29 @@ class ViterbiParser(ParserI):
          :rtype: None
          """
  
-        str = '|' + '.' * span[0]
-        str += '=' * (span[1] - span[0])
-        str += '.' * (width - span[1]) + '| '
-        str += '%s' % production
-        if self._trace > 2: str = '%-40s %12.10f ' % (str, p)
+        str = "|" + "." * span[0]
+        str += "=" * (span[1] - span[0])
+        str += "." * (width - span[1]) + "| "
+        str += "%s" % production
+        if self._trace > 2:
+            str = "%-40s %12.10f " % (str, p)
  
          print(str)
  
      def _trace_lexical_insertion(self, token, index, width):
-        str = '   Insert: |' + '.' * index + '=' + '.' * (width-index-1) + '| '
-        str += '%s' % (token,)
+        str = "   Insert: |" + "." * index + "=" + "." * (width - index - 1) + "| "
+        str += "%s" % (token,)
          print(str)
  
      def __repr__(self):
-        return '<ViterbiParser for %r>' % self._grammar
+        return "<ViterbiParser for %r>" % self._grammar
  
  
  ##//////////////////////////////////////////////////////
  ##  Test Code
  ##//////////////////////////////////////////////////////
  
+
  def demo():
      """
      A demonstration of the probabilistic parsers.  The user is
@@ -336,21 +341,23 @@ def demo():
      from nltk.grammar import toy_pcfg1, toy_pcfg2
  
      # Define two demos.  Each demo has a sentence and a grammar.
-    demos = [('I saw the man with my telescope', toy_pcfg1),
-             ('the boy saw Jack with Bob under the table with a telescope', toy_pcfg2)]
+    demos = [
+        ("I saw the man with my telescope", toy_pcfg1),
+        ("the boy saw Jack with Bob under the table with a telescope", toy_pcfg2),
+    ]
  
      # Ask the user which demo they want to use.
      print()
      for i in range(len(demos)):
-        print('%3s: %s' % (i+1, demos[i][0]))
-        print('     %r' % demos[i][1])
+        print("%3s: %s" % (i + 1, demos[i][0]))
+        print("     %r" % demos[i][1])
          print()
-    print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
+    print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
      try:
-        snum = int(sys.stdin.readline().strip())-1
+        snum = int(sys.stdin.readline().strip()) - 1
          sent, grammar = demos[snum]
      except:
-        print('Bad sentence number')
+        print("Bad sentence number")
          return
  
      # Tokenize the sentence.
@@ -359,43 +366,47 @@ def demo():
      parser = ViterbiParser(grammar)
      all_parses = {}
  
-    print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
+    print("\nsent: %s\nparser: %s\ngrammar: %s" % (sent, parser, grammar))
      parser.trace(3)
      t = time.time()
      parses = parser.parse_all(tokens)
-    time = time.time()-t
-    average = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
-               if parses else 0)
+    time = time.time() - t
+    average = (
+        reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) if parses else 0
+    )
      num_parses = len(parses)
      for p in parses:
          all_parses[p.freeze()] = 1
  
      # Print some summary statistics
      print()
-    print('Time (secs)   # Parses   Average P(parse)')
-    print('-----------------------------------------')
-    print('%11.4f%11d%19.14f' % (time, num_parses, average))
+    print("Time (secs)   # Parses   Average P(parse)")
+    print("-----------------------------------------")
+    print("%11.4f%11d%19.14f" % (time, num_parses, average))
      parses = all_parses.keys()
      if parses:
-        p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
-    else: p = 0
-    print('------------------------------------------')
-    print('%11s%11d%19.14f' % ('n/a', len(parses), p))
+        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
+    else:
+        p = 0
+    print("------------------------------------------")
+    print("%11s%11d%19.14f" % ("n/a", len(parses), p))
  
      # Ask the user if we should draw the parses.
      print()
-    print('Draw parses (y/n)? ', end=' ')
-    if sys.stdin.readline().strip().lower().startswith('y'):
+    print("Draw parses (y/n)? ", end=" ")
+    if sys.stdin.readline().strip().lower().startswith("y"):
          from nltk.draw.tree import draw_trees
-        print('  please wait...')
+
+        print("  please wait...")
          draw_trees(*parses)
  
      # Ask the user if we should print the parses.
      print()
-    print('Print parses (y/n)? ', end=' ')
-    if sys.stdin.readline().strip().lower().startswith('y'):
+    print("Print parses (y/n)? ", end=" ")
+    if sys.stdin.readline().strip().lower().startswith("y"):
          for parse in parses:
              print(parse)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/parse/viterbi.pyc b/nlp_resource_data/nltk/parse/viterbi.pyc

deleted file mode 100755 (executable)

index a89c0d9..0000000

Binary files a/nlp_resource_data/nltk/parse/viterbi.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/probability.py b/nlp_resource_data/nltk/probability.py

old mode 100755 (executable)

new mode 100644 (file)

index 0528c2b..5a59c3f
--- a/nlp_resource_data/nltk/probability.py
+++ b/nlp_resource_data/nltk/probability.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Probability and Statistics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (additions)
  #         Trevor Cohn <tacohn@cs.mu.oz.au> (additions)
@@ -37,29 +37,25 @@ implementation of the ``ConditionalProbDistI`` interface is
  ``ConditionalProbDist``, a derived distribution.
  
  """
-from __future__ import print_function, unicode_literals, division
  
  import math
  import random
  import warnings
  import array
-from operator import itemgetter
  from collections import defaultdict, Counter
  from functools import reduce
  from abc import ABCMeta, abstractmethod
  
-from six import itervalues, text_type, add_metaclass
-
-from nltk import compat
  from nltk.internals import raise_unorderable_types
  
-_NINF = float('-1e300')
+_NINF = float("-1e300")
  
  ##//////////////////////////////////////////////////////
  ##  Frequency Distributions
  ##//////////////////////////////////////////////////////
  
-@compat.python_2_unicode_compatible
+
+
  class FreqDist(Counter):
      """
      A frequency distribution for the outcomes of an experiment.  A
@@ -171,7 +167,6 @@ class FreqDist(Counter):
          """
          return [item for item in self if self[item] == 1]
  
-
      def Nr(self, r, bins=None):
          return self.r_Nr(bins)[r]
  
@@ -246,7 +241,9 @@ class FreqDist(Counter):
          :rtype: any or None
          """
          if len(self) == 0:
-            raise ValueError('A FreqDist must have at least one sample before max is defined.')
+            raise ValueError(
+                "A FreqDist must have at least one sample before max is defined."
+            )
          return self.most_common(1)[0][0]
  
      def plot(self, *args, **kwargs):
@@ -263,35 +260,48 @@ class FreqDist(Counter):
          :type title: bool
          """
          try:
-            from matplotlib import pylab
+            import matplotlib.pyplot as plt
          except ImportError:
-            raise ValueError('The plot function requires matplotlib to be installed.'
-                         'See http://matplotlib.org/')
+            raise ValueError(
+                "The plot function requires matplotlib to be installed."
+                "See http://matplotlib.org/"
+            )
  
          if len(args) == 0:
              args = [len(self)]
          samples = [item for item, _ in self.most_common(*args)]
  
-        cumulative = _get_kwarg(kwargs, 'cumulative', False)
+        cumulative = _get_kwarg(kwargs, "cumulative", False)
+        percents = _get_kwarg(kwargs, "percents", False)
          if cumulative:
              freqs = list(self._cumulative_frequencies(samples))
              ylabel = "Cumulative Counts"
+            if percents:
+                freqs = [f / freqs[len(freqs) - 1] * 100 for f in freqs]
+                ylabel = "Cumulative Percents"
          else:
              freqs = [self[sample] for sample in samples]
              ylabel = "Counts"
          # percents = [f * 100 for f in freqs]  only in ProbDist?
  
-        pylab.grid(True, color="silver")
-        if not "linewidth" in kwargs:
+        ax = plt.gca()
+        ax.grid(True, color="silver")
+
+        if "linewidth" not in kwargs:
              kwargs["linewidth"] = 2
          if "title" in kwargs:
-            pylab.title(kwargs["title"])
+            ax.set_title(kwargs["title"])
              del kwargs["title"]
-        pylab.plot(freqs, **kwargs)
-        pylab.xticks(range(len(samples)), [text_type(s) for s in samples], rotation=90)
-        pylab.xlabel("Samples")
-        pylab.ylabel(ylabel)
-        pylab.show()
+
+        ax.plot(freqs, **kwargs)
+        ax.set_xticks(range(len(samples)))
+        ax.set_xticklabels([str(s) for s in samples], rotation=90)
+        ax.set_xlabel("Samples")
+        ax.set_ylabel(ylabel)
+
+        plt.show()
+
+        return ax
  
      def tabulate(self, *args, **kwargs):
          """
@@ -309,21 +319,21 @@ class FreqDist(Counter):
              args = [len(self)]
          samples = [item for item, _ in self.most_common(*args)]
  
-        cumulative = _get_kwarg(kwargs, 'cumulative', False)
+        cumulative = _get_kwarg(kwargs, "cumulative", False)
          if cumulative:
              freqs = list(self._cumulative_frequencies(samples))
          else:
              freqs = [self[sample] for sample in samples]
          # percents = [f * 100 for f in freqs]  only in ProbDist?
  
-        width = max(len("%s" % s) for s in samples)
+        width = max(len("{}".format(s)) for s in samples)
          width = max(width, max(len("%d" % f) for f in freqs))
  
          for i in range(len(samples)):
-            print("%*s" % (width, samples[i]), end=' ')
+            print("%*s" % (width, samples[i]), end=" ")
          print()
          for i in range(len(samples)):
-            print("%*d" % (width, freqs[i]), end=' ')
+            print("%*d" % (width, freqs[i]), end=" ")
          print()
  
      def copy(self):
@@ -377,14 +387,47 @@ class FreqDist(Counter):
          return self.__class__(super(FreqDist, self).__and__(other))
  
      def __le__(self, other):
+        """
+        Returns True if this frequency distribution is a subset of the other
+        and for no key the value exceeds the value of the same key from
+        the other frequency distribution.
+
+        The <= operator forms partial order and satisfying the axioms
+        reflexivity, antisymmetry and transitivity.
+
+        >>> FreqDist('a') <= FreqDist('a')
+        True
+        >>> a = FreqDist('abc')
+        >>> b = FreqDist('aabc')
+        >>> (a <= b, b <= a)
+        (True, False)
+        >>> FreqDist('a') <= FreqDist('abcd')
+        True
+        >>> FreqDist('abc') <= FreqDist('xyz')
+        False
+        >>> FreqDist('xyz') <= FreqDist('abc')
+        False
+        >>> c = FreqDist('a')
+        >>> d = FreqDist('aa')
+        >>> e = FreqDist('aaa')
+        >>> c <= d and d <= e and c <= e
+        True
+        """
          if not isinstance(other, FreqDist):
              raise_unorderable_types("<=", self, other)
-        return set(self).issubset(other) and all(self[key] <= other[key] for key in self)
+        return set(self).issubset(other) and all(
+            self[key] <= other[key] for key in self
+        )
+
+    def __ge__(self, other):
+        if not isinstance(other, FreqDist):
+            raise_unorderable_types(">=", self, other)
+        return set(self).issuperset(other) and all(
+            self[key] >= other[key] for key in other
+        )
  
-    # @total_ordering doesn't work here, since the class inherits from a builtin class
-    __ge__ = lambda self, other: not self <= other or self == other
      __lt__ = lambda self, other: self <= other and not self == other
-    __gt__ = lambda self, other: not self <= other
+    __gt__ = lambda self, other: self >= other and not self == other
  
      def __repr__(self):
          """
@@ -412,10 +455,10 @@ class FreqDist(Counter):
          :type maxlen: int
          :rtype: string
          """
-        items = ['{0!r}: {1!r}'.format(*item) for item in self.most_common(maxlen)]
+        items = ["{0!r}: {1!r}".format(*item) for item in self.most_common(maxlen)]
          if len(self) > maxlen:
-            items.append('...')
-        return 'FreqDist({{{0}}})'.format(', '.join(items))
+            items.append("...")
+        return "FreqDist({{{0}}})".format(", ".join(items))
  
      def __str__(self):
          """
@@ -423,15 +466,24 @@ class FreqDist(Counter):
  
          :rtype: string
          """
-        return '<FreqDist with %d samples and %d outcomes>' % (len(self), self.N())
+        return "<FreqDist with %d samples and %d outcomes>" % (len(self), self.N())
+
+    def __iter__(self):
+        """
+        Return an iterator which yields tokens ordered by frequency.
+
+        :rtype: iterator
+        """
+        for token, _ in self.most_common(self.B()):
+            yield token
  
  
  ##//////////////////////////////////////////////////////
  ##  Probability Distributions
  ##//////////////////////////////////////////////////////
  
-@add_metaclass(ABCMeta)
-class ProbDistI(object):
+
+class ProbDistI(metaclass=ABCMeta):
      """
      A probability distribution for the outcomes of an experiment.  A
      probability distribution specifies how likely it is that an
@@ -444,6 +496,7 @@ class ProbDistI(object):
      used to model the probability distribution of the experiment used
      to generate a frequency distribution.
      """
+
      SUM_TO_ONE = True
      """True if the probabilities of the samples in this probability
         distribution will always sum to one."""
@@ -477,7 +530,7 @@ class ProbDistI(object):
          """
          # Default definition, in terms of prob()
          p = self.prob(sample)
-        return (math.log(p, 2) if p != 0 else _NINF)
+        return math.log(p, 2) if p != 0 else _NINF
  
      @abstractmethod
      def max(self):
@@ -519,24 +572,28 @@ class ProbDistI(object):
          p_init = p
          for sample in self.samples():
              p -= self.prob(sample)
-            if p <= 0: return sample
+            if p <= 0:
+                return sample
          # allow for some rounding error:
-        if p < .0001:
+        if p < 0.0001:
              return sample
          # we *should* never get here
          if self.SUM_TO_ONE:
-            warnings.warn("Probability distribution %r sums to %r; generate()"
-                          " is returning an arbitrary sample." % (self, p_init-p))
+            warnings.warn(
+                "Probability distribution %r sums to %r; generate()"
+                " is returning an arbitrary sample." % (self, p_init - p)
+            )
          return random.choice(list(self.samples()))
  
  
-@compat.python_2_unicode_compatible
+
  class UniformProbDist(ProbDistI):
      """
      A probability distribution that assigns equal probability to each
      sample in a given set; and a zero probability to all other
      samples.
      """
+
      def __init__(self, samples):
          """
          Construct a new uniform probability distribution, that assigns
@@ -548,14 +605,15 @@ class UniformProbDist(ProbDistI):
          :raise ValueError: If ``samples`` is empty.
          """
          if len(samples) == 0:
-            raise ValueError('A Uniform probability distribution must '+
-                             'have at least one sample.')
+            raise ValueError(
+                "A Uniform probability distribution must " + "have at least one sample."
+            )
          self._sampleset = set(samples)
-        self._prob = 1.0/len(self._sampleset)
+        self._prob = 1.0 / len(self._sampleset)
          self._samples = list(self._sampleset)
  
      def prob(self, sample):
-        return (self._prob if sample in self._sampleset else 0)
+        return self._prob if sample in self._sampleset else 0
  
      def max(self):
          return self._samples[0]
@@ -564,20 +622,22 @@ class UniformProbDist(ProbDistI):
          return self._samples
  
      def __repr__(self):
-        return '<UniformProbDist with %d samples>' % len(self._sampleset)
+        return "<UniformProbDist with %d samples>" % len(self._sampleset)
+
  
  
-@compat.python_2_unicode_compatible
  class RandomProbDist(ProbDistI):
      """
      Generates a random probability distribution whereby each sample
      will be between 0 and 1 with equal probability (uniform random distribution.
      Also called a continuous uniform distribution).
      """
+
      def __init__(self, samples):
          if len(samples) == 0:
-            raise ValueError('A probability distribution must '+
-                             'have at least one sample.')
+            raise ValueError(
+                "A probability distribution must " + "have at least one sample."
+            )
          self._probs = self.unirand(samples)
          self._samples = list(self._probs.keys())
  
@@ -593,16 +653,21 @@ class RandomProbDist(ProbDistI):
          randrow = [random.random() for i in range(len(samples))]
          total = sum(randrow)
          for i, x in enumerate(randrow):
-            randrow[i] = x/total
+            randrow[i] = x / total
  
          total = sum(randrow)
          if total != 1:
-            #this difference, if present, is so small (near NINF) that it
-            #can be subtracted from any element without risking probs not (0 1)
+            # this difference, if present, is so small (near NINF) that it
+            # can be subtracted from any element without risking probs not (0 1)
              randrow[-1] -= total - 1
  
          return dict((s, randrow[i]) for i, s in enumerate(samples))
  
+    def max(self):
+        if not hasattr(self, "_max"):
+            self._max = max((p, v) for (v, p) in self._probs.items())[1]
+        return self._max
+
      def prob(self, sample):
          return self._probs.get(sample, 0)
  
@@ -610,16 +675,17 @@ class RandomProbDist(ProbDistI):
          return self._samples
  
      def __repr__(self):
-        return '<RandomUniformProbDist with %d samples>' %len(self._probs)
+        return "<RandomUniformProbDist with %d samples>" % len(self._probs)
+
  
  
-@compat.python_2_unicode_compatible
  class DictionaryProbDist(ProbDistI):
      """
      A probability distribution whose probabilities are directly
      specified by a given dictionary.  The given dictionary maps
      samples to probabilities.
      """
+
      def __init__(self, prob_dict=None, log=False, normalize=False):
          """
          Construct a new probability distribution from the given
@@ -632,18 +698,20 @@ class DictionaryProbDist(ProbDistI):
          distribution assigns zero probability to all values.
          """
  
-        self._prob_dict = (prob_dict.copy() if prob_dict is not None else {})
+        self._prob_dict = prob_dict.copy() if prob_dict is not None else {}
          self._log = log
  
          # Normalize the distribution, if requested.
          if normalize:
              if len(prob_dict) == 0:
-                raise ValueError('A DictionaryProbDist must have at least one sample ' +
-                             'before it can be normalized.')
+                raise ValueError(
+                    "A DictionaryProbDist must have at least one sample "
+                    + "before it can be normalized."
+                )
              if log:
                  value_sum = sum_logs(list(self._prob_dict.values()))
                  if value_sum <= _NINF:
-                    logp = math.log(1.0/len(prob_dict), 2)
+                    logp = math.log(1.0 / len(prob_dict), 2)
                      for x in prob_dict:
                          self._prob_dict[x] = logp
                  else:
@@ -652,17 +720,17 @@ class DictionaryProbDist(ProbDistI):
              else:
                  value_sum = sum(self._prob_dict.values())
                  if value_sum == 0:
-                    p = 1.0/len(prob_dict)
+                    p = 1.0 / len(prob_dict)
                      for x in prob_dict:
                          self._prob_dict[x] = p
                  else:
-                    norm_factor = 1.0/value_sum
+                    norm_factor = 1.0 / value_sum
                      for (x, p) in self._prob_dict.items():
                          self._prob_dict[x] *= norm_factor
  
      def prob(self, sample):
          if self._log:
-            return (2**(self._prob_dict[sample]) if sample in self._prob_dict else 0)
+            return 2 ** (self._prob_dict[sample]) if sample in self._prob_dict else 0
          else:
              return self._prob_dict.get(sample, 0)
  
@@ -670,21 +738,26 @@ class DictionaryProbDist(ProbDistI):
          if self._log:
              return self._prob_dict.get(sample, _NINF)
          else:
-            if sample not in self._prob_dict: return _NINF
-            elif self._prob_dict[sample] == 0: return _NINF
-            else: return math.log(self._prob_dict[sample], 2)
+            if sample not in self._prob_dict:
+                return _NINF
+            elif self._prob_dict[sample] == 0:
+                return _NINF
+            else:
+                return math.log(self._prob_dict[sample], 2)
  
      def max(self):
-        if not hasattr(self, '_max'):
-            self._max = max((p,v) for (v,p) in self._prob_dict.items())[1]
+        if not hasattr(self, "_max"):
+            self._max = max((p, v) for (v, p) in self._prob_dict.items())[1]
          return self._max
+
      def samples(self):
          return self._prob_dict.keys()
+
      def __repr__(self):
-        return '<ProbDist with %d samples>' % len(self._prob_dict)
+        return "<ProbDist with %d samples>" % len(self._prob_dict)
+
  
  
-@compat.python_2_unicode_compatible
  class MLEProbDist(ProbDistI):
      """
      The maximum likelihood estimate for the probability distribution
@@ -693,6 +766,7 @@ class MLEProbDist(ProbDistI):
      each sample as the frequency of that sample in the frequency
      distribution.
      """
+
      def __init__(self, freqdist, bins=None):
          """
          Use the maximum likelihood estimate to create a probability
@@ -727,10 +801,10 @@ class MLEProbDist(ProbDistI):
          :rtype: str
          :return: A string representation of this ``ProbDist``.
          """
-        return '<MLEProbDist based on %d samples>' % self._freqdist.N()
+        return "<MLEProbDist based on %d samples>" % self._freqdist.N()
+
  
  
-@compat.python_2_unicode_compatible
  class LidstoneProbDist(ProbDistI):
      """
      The Lidstone estimate for the probability distribution of the
@@ -743,7 +817,9 @@ class LidstoneProbDist(ProbDistI):
      *gamma* to the count for each bin, and taking the maximum
      likelihood estimate of the resulting frequency distribution.
      """
+
      SUM_TO_ONE = False
+
      def __init__(self, freqdist, gamma, bins=None):
          """
          Use the Lidstone estimate to create a probability distribution
@@ -767,14 +843,17 @@ class LidstoneProbDist(ProbDistI):
          """
          if (bins == 0) or (bins is None and freqdist.N() == 0):
              name = self.__class__.__name__[:-8]
-            raise ValueError('A %s probability distribution ' % name +
-                             'must have at least one bin.')
+            raise ValueError(
+                "A %s probability distribution " % name + "must have at least one bin."
+            )
          if (bins is not None) and (bins < freqdist.B()):
              name = self.__class__.__name__[:-8]
-            raise ValueError('\nThe number of bins in a %s distribution ' % name +
-                             '(%d) must be greater than or equal to\n' % bins +
-                             'the number of bins in the FreqDist used ' +
-                             'to create it (%d).' % freqdist.B())
+            raise ValueError(
+                "\nThe number of bins in a %s distribution " % name
+                + "(%d) must be greater than or equal to\n" % bins
+                + "the number of bins in the FreqDist used "
+                + "to create it (%d)." % freqdist.B()
+            )
  
          self._freqdist = freqdist
          self._gamma = float(gamma)
@@ -823,10 +902,10 @@ class LidstoneProbDist(ProbDistI):
  
          :rtype: str
          """
-        return '<LidstoneProbDist based on %d samples>' % self._freqdist.N()
+        return "<LidstoneProbDist based on %d samples>" % self._freqdist.N()
+
  
  
-@compat.python_2_unicode_compatible
  class LaplaceProbDist(LidstoneProbDist):
      """
      The Laplace estimate for the probability distribution of the
@@ -837,6 +916,7 @@ class LaplaceProbDist(LidstoneProbDist):
      each bin, and taking the maximum likelihood estimate of the
      resulting frequency distribution.
      """
+
      def __init__(self, freqdist, bins=None):
          """
          Use the Laplace estimate to create a probability distribution
@@ -859,10 +939,10 @@ class LaplaceProbDist(LidstoneProbDist):
          :rtype: str
          :return: A string representation of this ``ProbDist``.
          """
-        return '<LaplaceProbDist based on %d samples>' % self._freqdist.N()
+        return "<LaplaceProbDist based on %d samples>" % self._freqdist.N()
+
  
  
-@compat.python_2_unicode_compatible
  class ELEProbDist(LidstoneProbDist):
      """
      The expected likelihood estimate for the probability distribution
@@ -873,6 +953,7 @@ class ELEProbDist(LidstoneProbDist):
      to the count for each bin, and taking the maximum likelihood
      estimate of the resulting frequency distribution.
      """
+
      def __init__(self, freqdist, bins=None):
          """
          Use the expected likelihood estimate to create a probability
@@ -896,10 +977,10 @@ class ELEProbDist(LidstoneProbDist):
  
          :rtype: str
          """
-        return '<ELEProbDist based on %d samples>' % self._freqdist.N()
+        return "<ELEProbDist based on %d samples>" % self._freqdist.N()
+
  
  
-@compat.python_2_unicode_compatible
  class HeldoutProbDist(ProbDistI):
      """
      The heldout estimate for the probability distribution of the
@@ -941,7 +1022,9 @@ class HeldoutProbDist(ProbDistI):
          in the base distribution.  ``_max_r`` is used to decide how
          large ``_estimate`` must be.
      """
+
      SUM_TO_ONE = False
+
      def __init__(self, base_fdist, heldout_fdist, bins=None):
          """
          Use the heldout estimate to create a probability distribution
@@ -969,7 +1052,7 @@ class HeldoutProbDist(ProbDistI):
          # Calculate Tr, Nr, and N.
          Tr = self._calculate_Tr()
          r_Nr = base_fdist.r_Nr(bins)
-        Nr = [r_Nr[r] for r in range(self._max_r+1)]
+        Nr = [r_Nr[r] for r in range(self._max_r + 1)]
          N = heldout_fdist.N()
  
          # Use Tr, Nr, and N to compute the probability estimate for
@@ -984,7 +1067,7 @@ class HeldoutProbDist(ProbDistI):
  
          :rtype: list(float)
          """
-        Tr = [0.0] * (self._max_r+1)
+        Tr = [0.0] * (self._max_r + 1)
          for sample in self._heldout_fdist:
              r = self._base_fdist[sample]
              Tr[r] += self._heldout_fdist[sample]
@@ -1011,9 +1094,11 @@ class HeldoutProbDist(ProbDistI):
              frequency distribution.
          """
          estimate = []
-        for r in range(self._max_r+1):
-            if Nr[r] == 0: estimate.append(None)
-            else: estimate.append(Tr[r]/(Nr[r]*N))
+        for r in range(self._max_r + 1):
+            if Nr[r] == 0:
+                estimate.append(None)
+            else:
+                estimate.append(Tr[r] / (Nr[r] * N))
          return estimate
  
      def base_fdist(self):
@@ -1056,11 +1141,11 @@ class HeldoutProbDist(ProbDistI):
          :rtype: str
          :return: A string representation of this ``ProbDist``.
          """
-        s = '<HeldoutProbDist: %d base samples; %d heldout samples>'
+        s = "<HeldoutProbDist: %d base samples; %d heldout samples>"
          return s % (self._base_fdist.N(), self._heldout_fdist.N())
  
  
-@compat.python_2_unicode_compatible
+
  class CrossValidationProbDist(ProbDistI):
      """
      The cross-validation estimate for the probability distribution of
@@ -1069,7 +1154,9 @@ class CrossValidationProbDist(ProbDistI):
      is found by averaging the held-out estimates for the sample in
      each pair of frequency distributions.
      """
+
      SUM_TO_ONE = False
+
      def __init__(self, freqdists, bins):
          """
          Use the cross-validation estimate to create a probability
@@ -1115,7 +1202,7 @@ class CrossValidationProbDist(ProbDistI):
          prob = 0.0
          for heldout_probdist in self._heldout_probdists:
              prob += heldout_probdist.prob(sample)
-        return prob/len(self._heldout_probdists)
+        return prob / len(self._heldout_probdists)
  
      def discount(self):
          raise NotImplementedError()
@@ -1126,10 +1213,10 @@ class CrossValidationProbDist(ProbDistI):
  
          :rtype: str
          """
-        return '<CrossValidationProbDist: %d-way>' % len(self._freqdists)
+        return "<CrossValidationProbDist: %d-way>" % len(self._freqdists)
+
  
  
-@compat.python_2_unicode_compatible
  class WittenBellProbDist(ProbDistI):
      """
      The Witten-Bell estimate of a probability distribution. This distribution
@@ -1172,8 +1259,9 @@ class WittenBellProbDist(ProbDistI):
              it's assumed to be equal to that of the ``freqdist``
          :type bins: int
          """
-        assert bins is None or bins >= freqdist.B(),\
-               'bins parameter must not be less than %d=freqdist.B()' % freqdist.B()
+        assert bins is None or bins >= freqdist.B(), (
+            "bins parameter must not be less than %d=freqdist.B()" % freqdist.B()
+        )
          if bins is None:
              bins = freqdist.B()
          self._freqdist = freqdist
@@ -1181,7 +1269,7 @@ class WittenBellProbDist(ProbDistI):
          self._Z = bins - self._freqdist.B()
          self._N = self._freqdist.N()
          # self._P0 is P(0), precalculated for efficiency:
-        if self._N==0:
+        if self._N == 0:
              # if freqdist is empty, we approximate P(0) by a UniformProbDist:
              self._P0 = 1.0 / self._Z
          else:
@@ -1190,7 +1278,7 @@ class WittenBellProbDist(ProbDistI):
      def prob(self, sample):
          # inherit docs from ProbDistI
          c = self._freqdist[sample]
-        return (c / (self._N + self._T) if c != 0 else self._P0)
+        return c / (self._N + self._T) if c != 0 else self._P0
  
      def max(self):
          return self._freqdist.max()
@@ -1210,7 +1298,7 @@ class WittenBellProbDist(ProbDistI):
  
          :rtype: str
          """
-        return '<WittenBellProbDist based on %d samples>' % self._freqdist.N()
+        return "<WittenBellProbDist based on %d samples>" % self._freqdist.N()
  
  
  ##//////////////////////////////////////////////////////
@@ -1271,7 +1359,8 @@ class WittenBellProbDist(ProbDistI):
  ##  Simple Good-Turing Probablity Distributions
  ##//////////////////////////////////////////////////////
  
-@compat.python_2_unicode_compatible
+
+
  class SimpleGoodTuringProbDist(ProbDistI):
      """
      SimpleGoodTuring ProbDist approximates from frequency to frequency of
@@ -1291,7 +1380,9 @@ class SimpleGoodTuringProbDist(ProbDistI):
      - slope: b = sigma ((xi-E(x)(yi-E(y))) / sigma ((xi-E(x))(xi-E(x)))
      - intercept: a = E(y) - b.E(x)
      """
+
      SUM_TO_ONE = False
+
      def __init__(self, freqdist, bins=None):
          """
          :param freqdist: The frequency counts upon which to base the
@@ -1302,8 +1393,9 @@ class SimpleGoodTuringProbDist(ProbDistI):
              then it's assumed to be equal to ``freqdist``.B() + 1
          :type bins: int
          """
-        assert bins is None or bins > freqdist.B(),\
-               'bins parameter must not be less than %d=freqdist.B()+1' % (freqdist.B()+1)
+        assert (
+            bins is None or bins > freqdist.B()
+        ), "bins parameter must not be less than %d=freqdist.B()+1" % (freqdist.B() + 1)
          if bins is None:
              bins = freqdist.B() + 1
          self._freqdist = freqdist
@@ -1345,8 +1437,8 @@ class SimpleGoodTuringProbDist(ProbDistI):
  
          zr = []
          for j in range(len(r)):
-            i = (r[j-1] if j > 0 else 0)
-            k = (2 * r[j] - i if j == len(r) - 1 else r[j+1])
+            i = r[j - 1] if j > 0 else 0
+            k = 2 * r[j] - i if j == len(r) - 1 else r[j + 1]
              zr_ = 2.0 * nr[j] / (k - i)
              zr.append(zr_)
  
@@ -1358,13 +1450,15 @@ class SimpleGoodTuringProbDist(ProbDistI):
          y_mean = sum(log_zr) / len(log_zr)
          for (x, y) in zip(log_r, log_zr):
              xy_cov += (x - x_mean) * (y - y_mean)
-            x_var += (x - x_mean)**2
-        self._slope = (xy_cov / x_var if x_var != 0 else 0.0)
+            x_var += (x - x_mean) ** 2
+        self._slope = xy_cov / x_var if x_var != 0 else 0.0
          if self._slope >= -1:
-            warnings.warn('SimpleGoodTuring did not find a proper best fit '
-                          'line for smoothing probabilities of occurrences. '
-                          'The probability estimates are likely to be '
-                          'unreliable.')
+            warnings.warn(
+                "SimpleGoodTuring did not find a proper best fit "
+                "line for smoothing probabilities of occurrences. "
+                "The probability estimates are likely to be "
+                "unreliable."
+            )
          self._intercept = y_mean - self._slope * x_mean
  
      def _switch(self, r, nr):
@@ -1373,17 +1467,17 @@ class SimpleGoodTuringProbDist(ProbDistI):
          when estimating E[Nr].
          """
          for i, r_ in enumerate(r):
-            if len(r) == i + 1 or r[i+1] != r_ + 1:
+            if len(r) == i + 1 or r[i + 1] != r_ + 1:
                  # We are at the end of r, or there is a gap in r
                  self._switch_at = r_
                  break
  
              Sr = self.smoothedNr
-            smooth_r_star = (r_ + 1) * Sr(r_+1) / Sr(r_)
-            unsmooth_r_star = (r_ + 1) * nr[i+1] / nr[i]
+            smooth_r_star = (r_ + 1) * Sr(r_ + 1) / Sr(r_)
+            unsmooth_r_star = (r_ + 1) * nr[i + 1] / nr[i]
  
-            std = math.sqrt(self._variance(r_, nr[i], nr[i+1]))
-            if abs(unsmooth_r_star-smooth_r_star) <= 1.96 * std:
+            std = math.sqrt(self._variance(r_, nr[i], nr[i + 1]))
+            if abs(unsmooth_r_star - smooth_r_star) <= 1.96 * std:
                  self._switch_at = r_
                  break
  
@@ -1391,7 +1485,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
          r = float(r)
          nr = float(nr)
          nr_1 = float(nr_1)
-        return (r + 1.0)**2 * (nr_1 / nr**2) * (1.0 + nr_1 / nr)
+        return (r + 1.0) ** 2 * (nr_1 / nr ** 2) * (1.0 + nr_1 / nr)
  
      def _renormalize(self, r, nr):
          """
@@ -1403,7 +1497,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
          """
          prob_cov = 0.0
          for r_, nr_ in zip(r, nr):
-            prob_cov  += nr_ * self._prob_measure(r_)
+            prob_cov += nr_ * self._prob_measure(r_)
          if prob_cov:
              self._renormal = (1 - self._prob_measure(0)) / prob_cov
  
@@ -1443,16 +1537,16 @@ class SimpleGoodTuringProbDist(ProbDistI):
          return p
  
      def _prob_measure(self, count):
-        if count == 0 and self._freqdist.N() == 0 :
+        if count == 0 and self._freqdist.N() == 0:
              return 1.0
          elif count == 0 and self._freqdist.N() != 0:
              return self._freqdist.Nr(1) / self._freqdist.N()
  
          if self._switch_at > count:
-            Er_1 = self._freqdist.Nr(count+1)
+            Er_1 = self._freqdist.Nr(count + 1)
              Er = self._freqdist.Nr(count)
          else:
-            Er_1 = self.smoothedNr(count+1)
+            Er_1 = self.smoothedNr(count + 1)
              Er = self.smoothedNr(count)
  
          r_star = (count + 1) * Er_1 / Er
@@ -1460,17 +1554,17 @@ class SimpleGoodTuringProbDist(ProbDistI):
  
      def check(self):
          prob_sum = 0.0
-        for i in  range(0, len(self._Nr)):
+        for i in range(0, len(self._Nr)):
              prob_sum += self._Nr[i] * self._prob_measure(i) / self._renormal
          print("Probability Sum:", prob_sum)
-        #assert prob_sum != 1.0, "probability sum should be one!"
+        # assert prob_sum != 1.0, "probability sum should be one!"
  
      def discount(self):
          """
          This function returns the total mass of probability transfers from the
          seen samples to the unseen samples.
          """
-        return  self.smoothedNr(1) / self._freqdist.N()
+        return self.smoothedNr(1) / self._freqdist.N()
  
      def max(self):
          return self._freqdist.max()
@@ -1487,8 +1581,7 @@ class SimpleGoodTuringProbDist(ProbDistI):
  
          :rtype: str
          """
-        return '<SimpleGoodTuringProbDist based on %d samples>'\
-                % self._freqdist.N()
+        return "<SimpleGoodTuringProbDist based on %d samples>" % self._freqdist.N()
  
  
  class MutableProbDist(ProbDistI):
@@ -1522,6 +1615,10 @@ class MutableProbDist(ProbDistI):
                  self._data[i] = prob_dist.prob(samples[i])
          self._logs = store_logs
  
+    def max(self):
+        # inherit documentation
+        return max((p, v) for (v, p) in self._sample_dict.items())[1]
+
      def samples(self):
          # inherit documentation
          return self._samples
@@ -1531,14 +1628,14 @@ class MutableProbDist(ProbDistI):
          i = self._sample_dict.get(sample)
          if i is None:
              return 0.0
-        return (2**(self._data[i]) if self._logs else self._data[i])
+        return 2 ** (self._data[i]) if self._logs else self._data[i]
  
      def logprob(self, sample):
          # inherit documentation
          i = self._sample_dict.get(sample)
          if i is None:
-            return float('-inf')
-        return (self._data[i] if self._logs else math.log(self._data[i], 2))
+            return float("-inf")
+        return self._data[i] if self._logs else math.log(self._data[i], 2)
  
      def update(self, sample, prob, log=True):
          """
@@ -1558,9 +1655,10 @@ class MutableProbDist(ProbDistI):
          i = self._sample_dict.get(sample)
          assert i is not None
          if self._logs:
-            self._data[i] = (prob if log else math.log(prob, 2))
+            self._data[i] = prob if log else math.log(prob, 2)
          else:
-            self._data[i] = (2**(prob) if log else prob)
+            self._data[i] = 2 ** (prob) if log else prob
+
  
  ##/////////////////////////////////////////////////////
  ##  Kneser-Ney Probability Distribution
@@ -1597,7 +1695,8 @@ class MutableProbDist(ProbDistI):
  # and take advantage of storing and retrieving information in dictionaries
  # where possible.
  
-@compat.python_2_unicode_compatible
+
+
  class KneserNeyProbDist(ProbDistI):
      """
      Kneser-Ney estimate of a probability distribution. This is a version of
@@ -1607,6 +1706,7 @@ class KneserNeyProbDist(ProbDistI):
      value can be specified. The default discount is set to 0.75.
  
      """
+
      def __init__(self, freqdist, bins=None, discount=0.75):
          """
          :param freqdist: The trigram frequency distribution upon which to base
@@ -1637,15 +1737,15 @@ class KneserNeyProbDist(ProbDistI):
          self._trigrams_contain = defaultdict(float)
          self._wordtypes_before = defaultdict(float)
          for w0, w1, w2 in freqdist:
-            self._bigrams[(w0,w1)] += freqdist[(w0, w1, w2)]
-            self._wordtypes_after[(w0,w1)] += 1
+            self._bigrams[(w0, w1)] += freqdist[(w0, w1, w2)]
+            self._wordtypes_after[(w0, w1)] += 1
              self._trigrams_contain[w1] += 1
-            self._wordtypes_before[(w1,w2)] += 1
+            self._wordtypes_before[(w1, w2)] += 1
  
      def prob(self, trigram):
          # sample must be a triple
          if len(trigram) != 3:
-            raise ValueError('Expected an iterable with 3 members.')
+            raise ValueError("Expected an iterable with 3 members.")
          trigram = tuple(trigram)
          w0, w1, w2 = trigram
  
@@ -1654,20 +1754,20 @@ class KneserNeyProbDist(ProbDistI):
          else:
              # if the sample trigram was seen during training
              if trigram in self._trigrams:
-                prob = (self._trigrams[trigram]
-                        - self.discount())/self._bigrams[(w0, w1)]
+                prob = (self._trigrams[trigram] - self.discount()) / self._bigrams[
+                    (w0, w1)
+                ]
  
              # else if the 'rougher' environment was seen during training
-            elif (w0,w1) in self._bigrams and (w1,w2) in self._wordtypes_before:
+            elif (w0, w1) in self._bigrams and (w1, w2) in self._wordtypes_before:
                  aftr = self._wordtypes_after[(w0, w1)]
                  bfr = self._wordtypes_before[(w1, w2)]
  
                  # the probability left over from alphas
-                leftover_prob = ((aftr * self.discount())
-                                 / self._bigrams[(w0, w1)])
+                leftover_prob = (aftr * self.discount()) / self._bigrams[(w0, w1)]
  
                  # the beta (including normalization)
-                beta = bfr /(self._trigrams_contain[w1] - aftr)
+                beta = bfr / (self._trigrams_contain[w1] - aftr)
  
                  prob = leftover_prob * beta
  
@@ -1703,34 +1803,39 @@ class KneserNeyProbDist(ProbDistI):
          return self._trigrams.max()
  
      def __repr__(self):
-        '''
+        """
          Return a string representation of this ProbDist
  
          :rtype: str
-        '''
-        return '<KneserNeyProbDist based on {0} trigrams'.format(self._trigrams.N())
+        """
+        return "<KneserNeyProbDist based on {0} trigrams".format(self._trigrams.N())
+
  
  ##//////////////////////////////////////////////////////
  ##  Probability Distribution Operations
  ##//////////////////////////////////////////////////////
  
+
  def log_likelihood(test_pdist, actual_pdist):
-    if (not isinstance(test_pdist, ProbDistI) or
-        not isinstance(actual_pdist, ProbDistI)):
-        raise ValueError('expected a ProbDist.')
+    if not isinstance(test_pdist, ProbDistI) or not isinstance(actual_pdist, ProbDistI):
+        raise ValueError("expected a ProbDist.")
      # Is this right?
-    return sum(actual_pdist.prob(s) * math.log(test_pdist.prob(s), 2)
-               for s in actual_pdist)
+    return sum(
+        actual_pdist.prob(s) * math.log(test_pdist.prob(s), 2) for s in actual_pdist
+    )
+
  
  def entropy(pdist):
      probs = (pdist.prob(s) for s in pdist.samples())
-    return -sum(p * math.log(p,2) for p in probs)
+    return -sum(p * math.log(p, 2) for p in probs)
+
  
  ##//////////////////////////////////////////////////////
  ##  Conditional Distributions
  ##//////////////////////////////////////////////////////
  
-@compat.python_2_unicode_compatible
+
+
  class ConditionalFreqDist(defaultdict):
      """
      A collection of frequency distributions for a single experiment
@@ -1778,6 +1883,7 @@ class ConditionalFreqDist(defaultdict):
      condition.
  
      """
+
      def __init__(self, cond_samples=None):
          """
          Construct a new empty conditional frequency distribution.  In
@@ -1817,7 +1923,7 @@ class ConditionalFreqDist(defaultdict):
  
          :rtype: int
          """
-        return sum(fdist.N() for fdist in itervalues(self))
+        return sum(fdist.N() for fdist in self.values())
  
      def plot(self, *args, **kwargs):
          """
@@ -1833,40 +1939,58 @@ class ConditionalFreqDist(defaultdict):
          :type conditions: list
          """
          try:
-            from matplotlib import pylab
+            import matplotlib.pyplot as plt #import statment fix
          except ImportError:
-            raise ValueError('The plot function requires matplotlib to be installed.'
-                         'See http://matplotlib.org/')
+            raise ValueError(
+                "The plot function requires matplotlib to be installed."
+                "See http://matplotlib.org/"
+            )
  
          cumulative = _get_kwarg(kwargs, 'cumulative', False)
-        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
+        percents = _get_kwarg(kwargs, 'percents', False)
+        conditions = [c for c in _get_kwarg(kwargs, 'conditions', self.conditions()) if c in self] # conditions should be in self
          title = _get_kwarg(kwargs, 'title', '')
-        samples = _get_kwarg(kwargs, 'samples',
-                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
-        if not "linewidth" in kwargs:
+        samples = _get_kwarg(
+            kwargs, 'samples', sorted(set(v 
+                                            for c in conditions
+                                            for v in self[c]))
+        )  # this computation could be wasted
+        if "linewidth" not in kwargs:
              kwargs["linewidth"] = 2
-
-        for condition in conditions:
-            if cumulative:
-                freqs = list(self[condition]._cumulative_frequencies(samples))
-                ylabel = "Cumulative Counts"
-                legend_loc = 'lower right'
-            else:
-                freqs = [self[condition][sample] for sample in samples]
-                ylabel = "Counts"
-                legend_loc = 'upper right'
-            # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
-            kwargs['label'] = "%s" % condition
-            pylab.plot(freqs, *args, **kwargs)
-
-        pylab.legend(loc=legend_loc)
-        pylab.grid(True, color="silver")
-        pylab.xticks(range(len(samples)), [text_type(s) for s in samples], rotation=90)
-        if title:
-            pylab.title(title)
-        pylab.xlabel("Samples")
-        pylab.ylabel(ylabel)
-        pylab.show()
+        ax = plt.gca()
+        if (len(conditions) != 0):
+            freqs = []
+            for condition in conditions:
+                if cumulative:
+                    # freqs should be a list of list where each sub list will be a frequency of a condition
+                    freqs.append(list(self[condition]._cumulative_frequencies(samples)))
+                    ylabel = "Cumulative Counts"
+                    legend_loc = 'lower right'
+                    if percents:
+                        freqs[-1] = [f / freqs[len(freqs) - 1] * 100 for f in freqs]
+                        ylabel = "Cumulative Percents"
+                else:
+                    freqs.append([self[condition][sample] for sample in samples])
+                    ylabel = "Counts"
+                    legend_loc = 'upper right'
+                # percents = [f * 100 for f in freqs] only in ConditionalProbDist?
+
+            i = 0
+            for freq in freqs:
+                kwargs['label'] = conditions[i] #label for each condition
+                i += 1
+                ax.plot(freq, *args, **kwargs)
+            ax.legend(loc=legend_loc)
+            ax.grid(True, color="silver")
+            ax.set_xticks(range(len(samples)))
+            ax.set_xticklabels([str(s) for s in samples], rotation=90)
+            if title:
+                ax.set_title(title)
+            ax.set_xlabel("Samples")
+            ax.set_ylabel(ylabel)
+        plt.show()
+
+        return ax
  
      def tabulate(self, *args, **kwargs):
          """
@@ -1880,10 +2004,13 @@ class ConditionalFreqDist(defaultdict):
          :type title: bool
          """
  
-        cumulative = _get_kwarg(kwargs, 'cumulative', False)
-        conditions = _get_kwarg(kwargs, 'conditions', sorted(self.conditions()))
-        samples = _get_kwarg(kwargs, 'samples',
-                             sorted(set(v for c in conditions for v in self[c])))  # this computation could be wasted
+        cumulative = _get_kwarg(kwargs, "cumulative", False)
+        conditions = _get_kwarg(kwargs, "conditions", sorted(self.conditions()))
+        samples = _get_kwarg(
+            kwargs,
+            "samples",
+            sorted(set(v for c in conditions if c in self for v in self[c])),
+        )  # this computation could be wasted
  
          width = max(len("%s" % s) for s in samples)
          freqs = dict()
@@ -1895,14 +2022,14 @@ class ConditionalFreqDist(defaultdict):
              width = max(width, max(len("%d" % f) for f in freqs[c]))
  
          condition_size = max(len("%s" % c) for c in conditions)
-        print(' ' * condition_size, end=' ')
+        print(" " * condition_size, end=" ")
          for s in samples:
-            print("%*s" % (width, s), end=' ')
+            print("%*s" % (width, s), end=" ")
          print()
          for c in conditions:
-            print("%*s" % (condition_size, c), end=' ')
+            print("%*s" % (condition_size, c), end=" ")
              for f in freqs[c]:
-                print("%*d" % (width, f), end=' ')
+                print("%*d" % (width, f), end=" ")
              print()
  
      # Mathematical operators
@@ -1978,16 +2105,20 @@ class ConditionalFreqDist(defaultdict):
      def __le__(self, other):
          if not isinstance(other, ConditionalFreqDist):
              raise_unorderable_types("<=", self, other)
-        return set(self.conditions()).issubset(other.conditions()) \
-               and all(self[c] <= other[c] for c in self.conditions())
+        return set(self.conditions()).issubset(other.conditions()) and all(
+            self[c] <= other[c] for c in self.conditions()
+        )
+
      def __lt__(self, other):
          if not isinstance(other, ConditionalFreqDist):
              raise_unorderable_types("<", self, other)
          return self <= other and self != other
+
      def __ge__(self, other):
          if not isinstance(other, ConditionalFreqDist):
              raise_unorderable_types(">=", self, other)
          return other <= self
+
      def __gt__(self, other):
          if not isinstance(other, ConditionalFreqDist):
              raise_unorderable_types(">", self, other)
@@ -1999,12 +2130,11 @@ class ConditionalFreqDist(defaultdict):
  
          :rtype: str
          """
-        return '<ConditionalFreqDist with %d conditions>' % len(self)
+        return "<ConditionalFreqDist with %d conditions>" % len(self)
+
  
  
-@compat.python_2_unicode_compatible
-@add_metaclass(ABCMeta)
-class ConditionalProbDistI(dict):
+class ConditionalProbDistI(dict, metaclass=ABCMeta):
      """
      A collection of probability distributions for a single experiment
      run under different conditions.  Conditional probability
@@ -2017,6 +2147,7 @@ class ConditionalProbDistI(dict):
      condition to the ``ProbDist`` for the experiment under that
      condition.
      """
+
      @abstractmethod
      def __init__(self):
          """
@@ -2039,7 +2170,7 @@ class ConditionalProbDistI(dict):
  
          :rtype: str
          """
-        return '<%s with %d conditions>' % (type(self).__name__, len(self))
+        return "<%s with %d conditions>" % (type(self).__name__, len(self))
  
  
  class ConditionalProbDist(ConditionalProbDistI):
@@ -2075,8 +2206,8 @@ class ConditionalProbDist(ConditionalProbDistI):
          0.423...
  
      """
-    def __init__(self, cfdist, probdist_factory,
-                 *factory_args, **factory_kw_args):
+
+    def __init__(self, cfdist, probdist_factory, *factory_args, **factory_kw_args):
          """
          Construct a new conditional probability distribution, based on
          the given conditional frequency distribution and ``ProbDist``
@@ -2105,15 +2236,17 @@ class ConditionalProbDist(ConditionalProbDistI):
          self._factory_kw_args = factory_kw_args
  
          for condition in cfdist:
-            self[condition] = probdist_factory(cfdist[condition],
-                                               *factory_args, **factory_kw_args)
+            self[condition] = probdist_factory(
+                cfdist[condition], *factory_args, **factory_kw_args
+            )
  
      def __missing__(self, key):
-        self[key] = self._probdist_factory(FreqDist(),
-                                           *self._factory_args,
-                                           **self._factory_kw_args)
+        self[key] = self._probdist_factory(
+            FreqDist(), *self._factory_args, **self._factory_kw_args
+        )
          return self[key]
  
+
  class DictionaryConditionalProbDist(ConditionalProbDistI):
      """
      An alternative ConditionalProbDist that simply wraps a dictionary of
@@ -2132,6 +2265,7 @@ class DictionaryConditionalProbDist(ConditionalProbDistI):
          self[key] = DictionaryProbDist()
          return self[key]
  
+
  ##//////////////////////////////////////////////////////
  ## Adding in log-space.
  ##//////////////////////////////////////////////////////
@@ -2139,6 +2273,7 @@ class DictionaryConditionalProbDist(ConditionalProbDistI):
  # If the difference is bigger than this, then just take the bigger one:
  _ADD_LOGS_MAX_DIFF = math.log(1e-30, 2)
  
+
  def add_logs(logx, logy):
      """
      Given two numbers ``logx`` = *log(x)* and ``logy`` = *log(y)*, return
@@ -2146,20 +2281,23 @@ def add_logs(logx, logy):
      ``log(2**(logx)+2**(logy))``, but the actual implementation
      avoids overflow errors that could result from direct computation.
      """
-    if (logx < logy + _ADD_LOGS_MAX_DIFF):
+    if logx < logy + _ADD_LOGS_MAX_DIFF:
          return logy
-    if (logy < logx + _ADD_LOGS_MAX_DIFF):
+    if logy < logx + _ADD_LOGS_MAX_DIFF:
          return logx
      base = min(logx, logy)
-    return base + math.log(2**(logx-base) + 2**(logy-base), 2)
+    return base + math.log(2 ** (logx - base) + 2 ** (logy - base), 2)
+
  
  def sum_logs(logs):
-    return (reduce(add_logs, logs[1:], logs[0]) if len(logs) != 0 else _NINF)
+    return reduce(add_logs, logs[1:], logs[0]) if len(logs) != 0 else _NINF
+
  
  ##//////////////////////////////////////////////////////
  ##  Probabilistic Mix-in
  ##//////////////////////////////////////////////////////
  
+
  class ProbabilisticMixIn(object):
      """
      A mix-in class to associate probabilities with other classes
@@ -2185,6 +2323,7 @@ class ProbabilisticMixIn(object):
      You should generally also redefine the string representation
      methods, the comparison methods, and the hashing method.
      """
+
      def __init__(self, **kwargs):
          """
          Initialize this object's probability.  This initializer should
@@ -2197,14 +2336,13 @@ class ProbabilisticMixIn(object):
              the object.
          :type logprob: float
          """
-        if 'prob' in kwargs:
-            if 'logprob' in kwargs:
-                raise TypeError('Must specify either prob or logprob '
-                                '(not both)')
+        if "prob" in kwargs:
+            if "logprob" in kwargs:
+                raise TypeError("Must specify either prob or logprob " "(not both)")
              else:
-                ProbabilisticMixIn.set_prob(self, kwargs['prob'])
-        elif 'logprob' in kwargs:
-            ProbabilisticMixIn.set_logprob(self, kwargs['logprob'])
+                ProbabilisticMixIn.set_prob(self, kwargs["prob"])
+        elif "logprob" in kwargs:
+            ProbabilisticMixIn.set_logprob(self, kwargs["logprob"])
          else:
              self.__prob = self.__logprob = None
  
@@ -2237,8 +2375,9 @@ class ProbabilisticMixIn(object):
          :rtype: float
          """
          if self.__prob is None:
-            if self.__logprob is None: return None
-            self.__prob = 2**(self.__logprob)
+            if self.__logprob is None:
+                return None
+            self.__prob = 2 ** (self.__logprob)
          return self.__prob
  
      def logprob(self):
@@ -2249,18 +2388,23 @@ class ProbabilisticMixIn(object):
          :rtype: float
          """
          if self.__logprob is None:
-            if self.__prob is None: return None
+            if self.__prob is None:
+                return None
              self.__logprob = math.log(self.__prob, 2)
          return self.__logprob
  
+
  class ImmutableProbabilisticMixIn(ProbabilisticMixIn):
      def set_prob(self, prob):
-        raise ValueError('%s is immutable' % self.__class__.__name__)
+        raise ValueError("%s is immutable" % self.__class__.__name__)
+
      def set_logprob(self, prob):
-        raise ValueError('%s is immutable' % self.__class__.__name__)
+        raise ValueError("%s is immutable" % self.__class__.__name__)
+
  
  ## Helper function for processing keyword arguments
  
+
  def _get_kwarg(kwargs, key, default):
      if key in kwargs:
          arg = kwargs[key]
@@ -2269,24 +2413,28 @@ def _get_kwarg(kwargs, key, default):
          arg = default
      return arg
  
+
  ##//////////////////////////////////////////////////////
  ##  Demonstration
  ##//////////////////////////////////////////////////////
  
+
  def _create_rand_fdist(numsamples, numoutcomes):
      """
      Create a new frequency distribution, with random samples.  The
      samples are numbers from 1 to ``numsamples``, and are generated by
      summing two numbers, each of which has a uniform distribution.
      """
-    import random
+
      fdist = FreqDist()
      for x in range(numoutcomes):
-        y = (random.randint(1, (1 + numsamples) // 2) +
-             random.randint(0, numsamples // 2))
+        y = random.randint(1, (1 + numsamples) // 2) + random.randint(
+            0, numsamples // 2
+        )
          fdist[y] += 1
      return fdist
  
+
  def _create_sum_pdist(numsamples):
      """
      Return the true probability distribution for the experiment
@@ -2295,9 +2443,10 @@ def _create_sum_pdist(numsamples):
      fdist = FreqDist()
      for x in range(1, (1 + numsamples) // 2 + 1):
          for y in range(0, numsamples // 2 + 1):
-            fdist[x+y] += 1
+            fdist[x + y] += 1
      return MLEProbDist(fdist)
  
+
  def demo(numsamples=6, numoutcomes=500):
      """
      A demonstration of frequency distributions and probability
@@ -2338,64 +2487,88 @@ def demo(numsamples=6, numoutcomes=500):
  
      # Find the probability of each sample.
      vals = []
-    for n in range(1,numsamples+1):
-        vals.append(tuple([n, fdist1.freq(n)] +
-                          [pdist.prob(n) for pdist in pdists]))
+    for n in range(1, numsamples + 1):
+        vals.append(tuple([n, fdist1.freq(n)] + [pdist.prob(n) for pdist in pdists]))
  
      # Print the results in a formatted table.
-    print(('%d samples (1-%d); %d outcomes were sampled for each FreqDist' %
-           (numsamples, numsamples, numoutcomes)))
-    print('='*9*(len(pdists)+2))
-    FORMATSTR = '      FreqDist '+ '%8s '*(len(pdists)-1) + '|  Actual'
+    print(
+        (
+            "%d samples (1-%d); %d outcomes were sampled for each FreqDist"
+            % (numsamples, numsamples, numoutcomes)
+        )
+    )
+    print("=" * 9 * (len(pdists) + 2))
+    FORMATSTR = "      FreqDist " + "%8s " * (len(pdists) - 1) + "|  Actual"
      print(FORMATSTR % tuple(repr(pdist)[1:9] for pdist in pdists[:-1]))
-    print('-'*9*(len(pdists)+2))
-    FORMATSTR = '%3d   %8.6f ' + '%8.6f '*(len(pdists)-1) + '| %8.6f'
+    print("-" * 9 * (len(pdists) + 2))
+    FORMATSTR = "%3d   %8.6f " + "%8.6f " * (len(pdists) - 1) + "| %8.6f"
      for val in vals:
          print(FORMATSTR % val)
  
      # Print the totals for each column (should all be 1.0)
      zvals = list(zip(*vals))
      sums = [sum(val) for val in zvals[1:]]
-    print('-'*9*(len(pdists)+2))
-    FORMATSTR = 'Total ' + '%8.6f '*(len(pdists)) + '| %8.6f'
+    print("-" * 9 * (len(pdists) + 2))
+    FORMATSTR = "Total " + "%8.6f " * (len(pdists)) + "| %8.6f"
      print(FORMATSTR % tuple(sums))
-    print('='*9*(len(pdists)+2))
+    print("=" * 9 * (len(pdists) + 2))
  
      # Display the distributions themselves, if they're short enough.
      if len("%s" % fdist1) < 70:
-        print('  fdist1: %s' % fdist1)
-        print('  fdist2: %s' % fdist2)
-        print('  fdist3: %s' % fdist3)
+        print("  fdist1: %s" % fdist1)
+        print("  fdist2: %s" % fdist2)
+        print("  fdist3: %s" % fdist3)
      print()
  
-    print('Generating:')
+    print("Generating:")
      for pdist in pdists:
          fdist = FreqDist(pdist.generate() for i in range(5000))
-        print('%20s %s' % (pdist.__class__.__name__[:20], ("%s" % fdist)[:55]))
+        print("%20s %s" % (pdist.__class__.__name__[:20], ("%s" % fdist)[:55]))
      print()
  
+
  def gt_demo():
      from nltk import corpus
-    emma_words = corpus.gutenberg.words('austen-emma.txt')
+
+    emma_words = corpus.gutenberg.words("austen-emma.txt")
      fd = FreqDist(emma_words)
      sgt = SimpleGoodTuringProbDist(fd)
-    print('%18s %8s  %14s' \
-        % ("word", "freqency", "SimpleGoodTuring"))
-    fd_keys_sorted=(key for key, value in sorted(fd.items(), key=lambda item: item[1], reverse=True))
+    print("%18s %8s  %14s" % ("word", "freqency", "SimpleGoodTuring"))
+    fd_keys_sorted = (
+        key for key, value in sorted(fd.items(), key=lambda item: item[1], reverse=True)
+    )
      for key in fd_keys_sorted:
-        print('%18s %8d  %14e' \
-            % (key, fd[key], sgt.prob(key)))
+        print("%18s %8d  %14e" % (key, fd[key], sgt.prob(key)))
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo(6, 10)
      demo(5, 5000)
      gt_demo()
  
-__all__ = ['ConditionalFreqDist', 'ConditionalProbDist',
-           'ConditionalProbDistI', 'CrossValidationProbDist',
-           'DictionaryConditionalProbDist', 'DictionaryProbDist', 'ELEProbDist',
-           'FreqDist', 'SimpleGoodTuringProbDist', 'HeldoutProbDist',
-           'ImmutableProbabilisticMixIn', 'LaplaceProbDist', 'LidstoneProbDist',
-           'MLEProbDist', 'MutableProbDist', 'KneserNeyProbDist', 'ProbDistI', 'ProbabilisticMixIn',
-           'UniformProbDist', 'WittenBellProbDist', 'add_logs',
-           'log_likelihood', 'sum_logs', 'entropy']
+__all__ = [
+    "ConditionalFreqDist",
+    "ConditionalProbDist",
+    "ConditionalProbDistI",
+    "CrossValidationProbDist",
+    "DictionaryConditionalProbDist",
+    "DictionaryProbDist",
+    "ELEProbDist",
+    "FreqDist",
+    "SimpleGoodTuringProbDist",
+    "HeldoutProbDist",
+    "ImmutableProbabilisticMixIn",
+    "LaplaceProbDist",
+    "LidstoneProbDist",
+    "MLEProbDist",
+    "MutableProbDist",
+    "KneserNeyProbDist",
+    "ProbDistI",
+    "ProbabilisticMixIn",
+    "UniformProbDist",
+    "WittenBellProbDist",
+    "add_logs",
+    "log_likelihood",
+    "sum_logs",
+    "entropy",
+]
diff --git a/nlp_resource_data/nltk/probability.pyc b/nlp_resource_data/nltk/probability.pyc

deleted file mode 100755 (executable)

index 863dae5..0000000

Binary files a/nlp_resource_data/nltk/probability.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/__init__.py b/nlp_resource_data/nltk/sem/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 7bad174..bc2bca4
--- a/nlp_resource_data/nltk/sem/__init__.py
+++ b/nlp_resource_data/nltk/sem/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Semantic Interpretation
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -41,16 +41,30 @@ is then created with domain and valuation as parameters.
      >>> m = Model(dom, val)
  """
  
-from nltk.sem.util import (parse_sents, interpret_sents, evaluate_sents,
-                           root_semrep)
-from nltk.sem.evaluate import (Valuation, Assignment, Model, Undefined,
-                               is_rel, set2rel, arity, read_valuation)
-from nltk.sem.logic import (boolean_ops, binding_ops, equality_preds,
-                           read_logic, Variable, Expression,
-                           ApplicationExpression, LogicalExpressionException)
+from nltk.sem.util import parse_sents, interpret_sents, evaluate_sents, root_semrep
+from nltk.sem.evaluate import (
+    Valuation,
+    Assignment,
+    Model,
+    Undefined,
+    is_rel,
+    set2rel,
+    arity,
+    read_valuation,
+)
+from nltk.sem.logic import (
+    boolean_ops,
+    binding_ops,
+    equality_preds,
+    read_logic,
+    Variable,
+    Expression,
+    ApplicationExpression,
+    LogicalExpressionException,
+)
  from nltk.sem.skolemize import skolemize
  from nltk.sem.lfg import FStructure
-from nltk.sem.relextract import (extract_rels, rtuple, clause)
+from nltk.sem.relextract import extract_rels, rtuple, clause
  from nltk.sem.boxer import Boxer
  from nltk.sem.drt import DrtExpression, DRS
  
diff --git a/nlp_resource_data/nltk/sem/__init__.pyc b/nlp_resource_data/nltk/sem/__init__.pyc

deleted file mode 100755 (executable)

index 7cb5d8b..0000000

Binary files a/nlp_resource_data/nltk/sem/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..bc69cda

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/boxer.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/boxer.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d3a4ad2

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/boxer.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/chat80.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/chat80.cpython-37.pyc

new file mode 100644 (file)

index 0000000..45a022e

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/chat80.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fd34437

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/drt.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/drt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fb72736

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/drt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc

new file mode 100644 (file)

index 0000000..33f6c6e

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/evaluate.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/evaluate.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b212373

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/evaluate.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/glue.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/glue.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c722470

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/glue.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/hole.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/hole.cpython-37.pyc

new file mode 100644 (file)

index 0000000..95c6642

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/hole.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/lfg.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/lfg.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a5b8834

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/lfg.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/linearlogic.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/linearlogic.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2cf3401

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/linearlogic.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/logic.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/logic.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5089632

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/logic.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/relextract.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/relextract.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4f0b2a1

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/relextract.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/skolemize.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/skolemize.cpython-37.pyc

new file mode 100644 (file)

index 0000000..95e789a

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/skolemize.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fc5fcd4

Binary files /dev/null and b/nlp_resource_data/nltk/sem/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sem/boxer.py b/nlp_resource_data/nltk/sem/boxer.py

old mode 100755 (executable)

new mode 100644 (file)

index a56017f..bc87dab
--- a/nlp_resource_data/nltk/sem/boxer.py
+++ b/nlp_resource_data/nltk/sem/boxer.py
@@ -3,7 +3,7 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
@@ -24,7 +24,6 @@ Usage:
          models/
              boxer/
  """
-from __future__ import print_function, unicode_literals
  
  import os
  import re
@@ -36,14 +35,25 @@ from functools import reduce
  
  from nltk.internals import find_binary
  
-from nltk.sem.logic import (ExpectedMoreTokensException, LogicalExpressionException,
-                            UnexpectedTokenException, Variable)
+from nltk.sem.logic import (
+    ExpectedMoreTokensException,
+    LogicalExpressionException,
+    UnexpectedTokenException,
+    Variable,
+)
+
+from nltk.sem.drt import (
+    DRS,
+    DrtApplicationExpression,
+    DrtEqualityExpression,
+    DrtNegatedExpression,
+    DrtOrExpression,
+    DrtParser,
+    DrtProposition,
+    DrtTokens,
+    DrtVariableExpression,
+)
  
-from nltk.sem.drt import (DRS, DrtApplicationExpression, DrtEqualityExpression,
-                          DrtNegatedExpression, DrtOrExpression, DrtParser,
-                          DrtProposition, DrtTokens, DrtVariableExpression)
-
-from nltk.compat import python_2_unicode_compatible
  
  class Boxer(object):
      """
@@ -51,7 +61,14 @@ class Boxer(object):
      semantic parser that produces Discourse Representation Structures (DRSs).
      """
  
-    def __init__(self, boxer_drs_interpreter=None, elimeq=False, bin_dir=None, verbose=False, resolve=True):
+    def __init__(
+        self,
+        boxer_drs_interpreter=None,
+        elimeq=False,
+        bin_dir=None,
+        verbose=False,
+        resolve=True,
+    ):
          """
          :param boxer_drs_interpreter: A class that converts from the
          ``AbstractBoxerDrs`` object hierarchy to a different object.  The
@@ -60,7 +77,7 @@ class Boxer(object):
          :param elimeq: When set to true, Boxer removes all equalities from the
          DRSs and discourse referents standing in the equality relation are
          unified, but only if this can be done in a meaning-preserving manner.
-        :param resolve: When set to true, Boxer will resolve all anaphoric DRSs and perform merge-reduction. 
+        :param resolve: When set to true, Boxer will resolve all anaphoric DRSs and perform merge-reduction.
          Resolution follows Van der Sandt's theory of binding and accommodation.
          """
          if boxer_drs_interpreter is None:
@@ -73,9 +90,11 @@ class Boxer(object):
          self.set_bin_dir(bin_dir, verbose)
  
      def set_bin_dir(self, bin_dir, verbose=False):
-        self._candc_bin = self._find_binary('candc', bin_dir, verbose)
-        self._candc_models_path = os.path.normpath(os.path.join(self._candc_bin[:-5], '../models'))
-        self._boxer_bin = self._find_binary('boxer', bin_dir, verbose)
+        self._candc_bin = self._find_binary("candc", bin_dir, verbose)
+        self._candc_models_path = os.path.normpath(
+            os.path.join(self._candc_bin[:-5], "../models")
+        )
+        self._boxer_bin = self._find_binary("boxer", bin_dir, verbose)
  
      def interpret(self, input, discourse_id=None, question=False, verbose=False):
          """
@@ -86,7 +105,7 @@ class Boxer(object):
          :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate.
          :return: ``drt.DrtExpression``
          """
-        discourse_ids = ([discourse_id] if discourse_id is not None else None)
+        discourse_ids = [discourse_id] if discourse_id is not None else None
          d, = self.interpret_multi_sents([[input]], discourse_ids, question, verbose)
          if not d:
              raise Exception('Unable to interpret: "{0}"'.format(input))
@@ -101,13 +120,15 @@ class Boxer(object):
          :param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate.
          :return: ``drt.DrtExpression``
          """
-        discourse_ids = ([discourse_id] if discourse_id is not None else None)
+        discourse_ids = [discourse_id] if discourse_id is not None else None
          d, = self.interpret_multi_sents([input], discourse_ids, question, verbose)
          if not d:
              raise Exception('Unable to interpret: "{0}"'.format(input))
          return d
  
-    def interpret_sents(self, inputs, discourse_ids=None, question=False, verbose=False):
+    def interpret_sents(
+        self, inputs, discourse_ids=None, question=False, verbose=False
+    ):
          """
          Use Boxer to give a first order representation.
  
@@ -116,9 +137,13 @@ class Boxer(object):
          :param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate.
          :return: list of ``drt.DrtExpression``
          """
-        return self.interpret_multi_sents([[input] for input in inputs], discourse_ids, question, verbose)
+        return self.interpret_multi_sents(
+            [[input] for input in inputs], discourse_ids, question, verbose
+        )
  
-    def interpret_multi_sents(self, inputs, discourse_ids=None, question=False, verbose=False):
+    def interpret_multi_sents(
+        self, inputs, discourse_ids=None, question=False, verbose=False
+    ):
          """
          Use Boxer to give a first order representation.
  
@@ -138,8 +163,8 @@ class Boxer(object):
          candc_out = self._call_candc(inputs, discourse_ids, question, verbose=verbose)
          boxer_out = self._call_boxer(candc_out, verbose=verbose)
  
-#        if 'ERROR: input file contains no ccg/2 terms.' in boxer_out:
-#            raise UnparseableInputException('Could not parse with candc: "%s"' % input_str)
+        #        if 'ERROR: input file contains no ccg/2 terms.' in boxer_out:
+        #            raise UnparseableInputException('Could not parse with candc: "%s"' % input_str)
  
          drs_dict = self._parse_to_drs_dict(boxer_out, use_disc_id)
          return [drs_dict.get(id, None) for id in discourse_ids]
@@ -153,9 +178,26 @@ class Boxer(object):
          :param filename: str A filename for the output file
          :return: stdout
          """
-        args = ['--models', os.path.join(self._candc_models_path, ['boxer','questions'][question]),
-                '--candc-printer', 'boxer']
-        return self._call('\n'.join(sum((["<META>'{0}'".format(id)] + d for d,id in zip(inputs,discourse_ids)), [])), self._candc_bin, args, verbose)
+        args = [
+            "--models",
+            os.path.join(self._candc_models_path, ["boxer", "questions"][question]),
+            "--candc-printer",
+            "boxer",
+        ]
+        return self._call(
+            "\n".join(
+                sum(
+                    (
+                        ["<META>'{0}'".format(id)] + d
+                        for d, id in zip(inputs, discourse_ids)
+                    ),
+                    [],
+                )
+            ),
+            self._candc_bin,
+            args,
+            verbose,
+        )
  
      def _call_boxer(self, candc_out, verbose=False):
          """
@@ -166,31 +208,45 @@ class Boxer(object):
          """
          f = None
          try:
-            fd, temp_filename = tempfile.mkstemp(prefix='boxer-', suffix='.in', text=True)
-            f = os.fdopen(fd, 'w')
+            fd, temp_filename = tempfile.mkstemp(
+                prefix="boxer-", suffix=".in", text=True
+            )
+            f = os.fdopen(fd, "w")
              f.write(candc_out)
          finally:
-            if f: f.close()
-
-        args = ['--box', 'false',
-                '--semantics', 'drs',
-                #'--flat', 'false', # removed from boxer
-                '--resolve', ['false','true'][self._resolve],
-                '--elimeq', ['false','true'][self._elimeq],
-                '--format', 'prolog',
-                '--instantiate', 'true',
-                '--input', temp_filename]
+            if f:
+                f.close()
+
+        args = [
+            "--box",
+            "false",
+            "--semantics",
+            "drs",
+            #'--flat', 'false', # removed from boxer
+            "--resolve",
+            ["false", "true"][self._resolve],
+            "--elimeq",
+            ["false", "true"][self._elimeq],
+            "--format",
+            "prolog",
+            "--instantiate",
+            "true",
+            "--input",
+            temp_filename,
+        ]
          stdout = self._call(None, self._boxer_bin, args, verbose)
          os.remove(temp_filename)
          return stdout
  
      def _find_binary(self, name, bin_dir, verbose=False):
-        return find_binary(name,
+        return find_binary(
+            name,
              path_to_bin=bin_dir,
-            env_vars=['CANDC'],
-            url='http://svn.ask.it.usyd.edu.au/trac/candc/',
-            binary_names=[name, name + '.exe'],
-            verbose=verbose)
+            env_vars=["CANDC"],
+            url="http://svn.ask.it.usyd.edu.au/trac/candc/",
+            binary_names=[name, name + ".exe"],
+            verbose=verbose,
+        )
  
      def _call(self, input_str, binary, args=[], verbose=False):
          """
@@ -202,59 +258,67 @@ class Boxer(object):
          :return: stdout
          """
          if verbose:
-            print('Calling:', binary)
-            print('Args:', args)
-            print('Input:', input_str)
-            print('Command:', binary + ' ' + ' '.join(args))
+            print("Calling:", binary)
+            print("Args:", args)
+            print("Input:", input_str)
+            print("Command:", binary + " " + " ".join(args))
  
          # Call via a subprocess
          if input_str is None:
              cmd = [binary] + args
              p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
          else:
-            cmd = 'echo "{0}" | {1} {2}'.format(input_str, binary, ' '.join(args))
-            p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+            cmd = 'echo "{0}" | {1} {2}'.format(input_str, binary, " ".join(args))
+            p = subprocess.Popen(
+                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
+            )
          stdout, stderr = p.communicate()
  
          if verbose:
-            print('Return code:', p.returncode)
-            if stdout: print('stdout:\n', stdout, '\n')
-            if stderr: print('stderr:\n', stderr, '\n')
+            print("Return code:", p.returncode)
+            if stdout:
+                print("stdout:\n", stdout, "\n")
+            if stderr:
+                print("stderr:\n", stderr, "\n")
          if p.returncode != 0:
-            raise Exception('ERROR CALLING: {0} {1}\nReturncode: {2}\n{3}'.format(binary, ' '.join(args), p.returncode, stderr))
+            raise Exception(
+                "ERROR CALLING: {0} {1}\nReturncode: {2}\n{3}".format(
+                    binary, " ".join(args), p.returncode, stderr
+                )
+            )
  
          return stdout
  
      def _parse_to_drs_dict(self, boxer_out, use_disc_id):
-        lines = boxer_out.split('\n')
+        lines = boxer_out.split("\n")
          drs_dict = {}
          i = 0
          while i < len(lines):
              line = lines[i]
-            if line.startswith('id('):
-                comma_idx = line.index(',')
+            if line.startswith("id("):
+                comma_idx = line.index(",")
                  discourse_id = line[3:comma_idx]
                  if discourse_id[0] == "'" and discourse_id[-1] == "'":
                      discourse_id = discourse_id[1:-1]
-                drs_id = line[comma_idx+1:line.index(')')]
+                drs_id = line[comma_idx + 1 : line.index(")")]
                  i += 1
                  line = lines[i]
-                assert line.startswith('sem({0},'.format(drs_id))
+                assert line.startswith("sem({0},".format(drs_id))
                  if line[-4:] == "').'":
                      line = line[:-4] + ")."
-                assert line.endswith(').'), "can't parse line: {0}".format(line)
+                assert line.endswith(")."), "can't parse line: {0}".format(line)
  
-                search_start = len('sem({0},['.format(drs_id))
+                search_start = len("sem({0},[".format(drs_id))
                  brace_count = 1
                  drs_start = -1
-                for j,c in enumerate(line[search_start:]):
-                    if(c == '['):
+                for j, c in enumerate(line[search_start:]):
+                    if c == "[":
                          brace_count += 1
-                    if(c == ']'):
+                    if c == "]":
                          brace_count -= 1
-                        if(brace_count == 0):
+                        if brace_count == 0:
                              drs_start = search_start + j + 1
-                            if line[drs_start:drs_start+3] == "','":
+                            if line[drs_start : drs_start + 3] == "','":
                                  drs_start = drs_start + 3
                              else:
                                  drs_start = drs_start + 1
@@ -268,7 +332,7 @@ class Boxer(object):
          return drs_dict
  
      def _parse_drs(self, drs_string, discourse_id, use_disc_id):
-        return BoxerOutputDrsParser([None,discourse_id][use_disc_id]).parse(drs_string)
+        return BoxerOutputDrsParser([None, discourse_id][use_disc_id]).parse(drs_string)
  
  
  class BoxerOutputDrsParser(DrtParser):
@@ -286,7 +350,7 @@ class BoxerOutputDrsParser(DrtParser):
          return DrtParser.parse(self, data, signature)
  
      def get_all_symbols(self):
-        return ['(', ')', ',', '[', ']',':']
+        return ["(", ")", ",", "[", "]", ":"]
  
      def handle(self, tok, context):
          return self.handle_drs(tok)
@@ -307,11 +371,11 @@ class BoxerOutputDrsParser(DrtParser):
          return accum
  
      def handle_drs(self, tok):
-        if tok == 'drs':
+        if tok == "drs":
              return self.parse_drs()
-        elif tok in ['merge', 'smerge']:
+        elif tok in ["merge", "smerge"]:
              return self._handle_binary_expression(self._make_merge_expression)(None, [])
-        elif tok in ['alfa']:
+        elif tok in ["alfa"]:
              return self._handle_alfa(self._make_merge_expression)(None, [])
  
      def handle_condition(self, tok, indices):
@@ -321,64 +385,73 @@ class BoxerOutputDrsParser(DrtParser):
          :param indices: list of int
          :return: list of ``DrtExpression``
          """
-        if tok == 'not':
+        if tok == "not":
              return [self._handle_not()]
  
-        if tok == 'or':
+        if tok == "or":
              conds = [self._handle_binary_expression(self._make_or_expression)]
-        elif tok == 'imp':
+        elif tok == "imp":
              conds = [self._handle_binary_expression(self._make_imp_expression)]
-        elif tok == 'eq':
+        elif tok == "eq":
              conds = [self._handle_eq()]
-        elif tok == 'prop':
+        elif tok == "prop":
              conds = [self._handle_prop()]
  
-        elif tok == 'pred':
+        elif tok == "pred":
              conds = [self._handle_pred()]
-        elif tok == 'named':
+        elif tok == "named":
              conds = [self._handle_named()]
-        elif tok == 'rel':
+        elif tok == "rel":
              conds = [self._handle_rel()]
-        elif tok == 'timex':
+        elif tok == "timex":
              conds = self._handle_timex()
-        elif tok == 'card':
+        elif tok == "card":
              conds = [self._handle_card()]
  
-        elif tok == 'whq':
+        elif tok == "whq":
              conds = [self._handle_whq()]
-        elif tok == 'duplex':
-                conds = [self._handle_duplex()]
+        elif tok == "duplex":
+            conds = [self._handle_duplex()]
  
          else:
              conds = []
  
-        return sum([[cond(sent_index, word_indices) for cond in conds] for sent_index, word_indices in self._sent_and_word_indices(indices)], [])
+        return sum(
+            [
+                [cond(sent_index, word_indices) for cond in conds]
+                for sent_index, word_indices in self._sent_and_word_indices(indices)
+            ],
+            [],
+        )
  
      def _handle_not(self):
-        self.assertToken(self.token(), '(')
+        self.assertToken(self.token(), "(")
          drs = self.process_next_expression(None)
-        self.assertToken(self.token(), ')')
+        self.assertToken(self.token(), ")")
          return BoxerNot(drs)
  
      def _handle_pred(self):
-        #pred(_G3943, dog, n, 0)
-        self.assertToken(self.token(), '(')
+        # pred(_G3943, dog, n, 0)
+        self.assertToken(self.token(), "(")
          variable = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          name = self.token()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          pos = self.token()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          sense = int(self.token())
-        self.assertToken(self.token(), ')')
+        self.assertToken(self.token(), ")")
  
          def _handle_pred_f(sent_index, word_indices):
-            return BoxerPred(self.discourse_id, sent_index, word_indices, variable, name, pos, sense)
+            return BoxerPred(
+                self.discourse_id, sent_index, word_indices, variable, name, pos, sense
+            )
+
          return _handle_pred_f
  
      def _handle_duplex(self):
-        #duplex(whq, drs(...), var, drs(...))
-        self.assertToken(self.token(), '(')
+        # duplex(whq, drs(...), var, drs(...))
+        self.assertToken(self.token(), "(")
          # self.assertToken(self.token(), '[')
          ans_types = []
          # while self.token(0) != ']':
@@ -396,231 +469,298 @@ class BoxerOutputDrsParser(DrtParser):
          #     else:
          #         ans_types.append(self.token())
          # self.token() #swallow the ']'
-      
-        self.assertToken(self.token(), 'whq')
-        self.assertToken(self.token(), ',')
+
+        self.assertToken(self.token(), "whq")
+        self.assertToken(self.token(), ",")
          d1 = self.process_next_expression(None)
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          ref = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          d2 = self.process_next_expression(None)
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerWhq(self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2)
-
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerWhq(
+            self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2
+        )
  
      def _handle_named(self):
-        #named(x0, john, per, 0)
-        self.assertToken(self.token(), '(')
+        # named(x0, john, per, 0)
+        self.assertToken(self.token(), "(")
          variable = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          name = self.token()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          type = self.token()
-        self.assertToken(self.token(), ',')
-        sense = self.token() # as per boxer rev 2554
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerNamed(self.discourse_id, sent_index, word_indices, variable, name, type, sense)
+        self.assertToken(self.token(), ",")
+        sense = self.token()  # as per boxer rev 2554
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerNamed(
+            self.discourse_id, sent_index, word_indices, variable, name, type, sense
+        )
  
      def _handle_rel(self):
-        #rel(_G3993, _G3943, agent, 0)
-        self.assertToken(self.token(), '(')
+        # rel(_G3993, _G3943, agent, 0)
+        self.assertToken(self.token(), "(")
          var1 = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          var2 = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          rel = self.token()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          sense = int(self.token())
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerRel(self.discourse_id, sent_index, word_indices, var1, var2, rel, sense)
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerRel(
+            self.discourse_id, sent_index, word_indices, var1, var2, rel, sense
+        )
  
      def _handle_timex(self):
-        #timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX'))
-        self.assertToken(self.token(), '(')
+        # timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX'))
+        self.assertToken(self.token(), "(")
          arg = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          new_conds = self._handle_time_expression(arg)
-        self.assertToken(self.token(), ')')
+        self.assertToken(self.token(), ")")
          return new_conds
  
      def _handle_time_expression(self, arg):
-        #date([]: (+), []:'XXXX', [1004]:'04', []:'XX')
+        # date([]: (+), []:'XXXX', [1004]:'04', []:'XX')
          tok = self.token()
-        self.assertToken(self.token(), '(')
-        if tok == 'date':
+        self.assertToken(self.token(), "(")
+        if tok == "date":
              conds = self._handle_date(arg)
-        elif tok == 'time':
+        elif tok == "time":
              conds = self._handle_time(arg)
          else:
              return None
-        self.assertToken(self.token(), ')')
-        return [lambda sent_index, word_indices: BoxerPred(self.discourse_id, sent_index, word_indices, arg, tok, 'n', 0)] + \
-               [lambda sent_index, word_indices: cond for cond in conds]
+        self.assertToken(self.token(), ")")
+        return [
+            lambda sent_index, word_indices: BoxerPred(
+                self.discourse_id, sent_index, word_indices, arg, tok, "n", 0
+            )
+        ] + [lambda sent_index, word_indices: cond for cond in conds]
  
      def _handle_date(self, arg):
-        #[]: (+), []:'XXXX', [1004]:'04', []:'XX'
+        # []: (+), []:'XXXX', [1004]:'04', []:'XX'
          conds = []
-        (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
-        self.assertToken(self.token(), '(')
+        (sent_index, word_indices), = self._sent_and_word_indices(
+            self._parse_index_list()
+        )
+        self.assertToken(self.token(), "(")
          pol = self.token()
-        self.assertToken(self.token(), ')')
-        conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_pol_{0}'.format(pol), 'a', 0))
-        self.assertToken(self.token(), ',')
-
-        (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
+        self.assertToken(self.token(), ")")
+        conds.append(
+            BoxerPred(
+                self.discourse_id,
+                sent_index,
+                word_indices,
+                arg,
+                "date_pol_{0}".format(pol),
+                "a",
+                0,
+            )
+        )
+        self.assertToken(self.token(), ",")
+
+        (sent_index, word_indices), = self._sent_and_word_indices(
+            self._parse_index_list()
+        )
          year = self.token()
-        if year != 'XXXX':
-            year = year.replace(':', '_')
-            conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_year_{0}'.format(year), 'a', 0))
-        self.assertToken(self.token(), ',')
-
-        (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
+        if year != "XXXX":
+            year = year.replace(":", "_")
+            conds.append(
+                BoxerPred(
+                    self.discourse_id,
+                    sent_index,
+                    word_indices,
+                    arg,
+                    "date_year_{0}".format(year),
+                    "a",
+                    0,
+                )
+            )
+        self.assertToken(self.token(), ",")
+
+        (sent_index, word_indices), = self._sent_and_word_indices(
+            self._parse_index_list()
+        )
          month = self.token()
-        if month != 'XX':
-            conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_month_{0}'.format(month), 'a', 0))
-        self.assertToken(self.token(), ',')
-
-        (sent_index, word_indices), = self._sent_and_word_indices(self._parse_index_list())
+        if month != "XX":
+            conds.append(
+                BoxerPred(
+                    self.discourse_id,
+                    sent_index,
+                    word_indices,
+                    arg,
+                    "date_month_{0}".format(month),
+                    "a",
+                    0,
+                )
+            )
+        self.assertToken(self.token(), ",")
+
+        (sent_index, word_indices), = self._sent_and_word_indices(
+            self._parse_index_list()
+        )
          day = self.token()
-        if day != 'XX':
-            conds.append(BoxerPred(self.discourse_id, sent_index, word_indices, arg, 'date_day_{0}'.format(day), 'a', 0))
+        if day != "XX":
+            conds.append(
+                BoxerPred(
+                    self.discourse_id,
+                    sent_index,
+                    word_indices,
+                    arg,
+                    "date_day_{0}".format(day),
+                    "a",
+                    0,
+                )
+            )
  
          return conds
  
      def _handle_time(self, arg):
-        #time([1018]:'18', []:'XX', []:'XX')
+        # time([1018]:'18', []:'XX', []:'XX')
          conds = []
          self._parse_index_list()
          hour = self.token()
-        if hour != 'XX':
-            conds.append(self._make_atom('r_hour_2',arg,hour))
-        self.assertToken(self.token(), ',')
+        if hour != "XX":
+            conds.append(self._make_atom("r_hour_2", arg, hour))
+        self.assertToken(self.token(), ",")
  
          self._parse_index_list()
          min = self.token()
-        if min != 'XX':
-            conds.append(self._make_atom('r_min_2',arg,min))
-        self.assertToken(self.token(), ',')
+        if min != "XX":
+            conds.append(self._make_atom("r_min_2", arg, min))
+        self.assertToken(self.token(), ",")
  
          self._parse_index_list()
          sec = self.token()
-        if sec != 'XX':
-            conds.append(self._make_atom('r_sec_2',arg,sec))
+        if sec != "XX":
+            conds.append(self._make_atom("r_sec_2", arg, sec))
  
          return conds
  
      def _handle_card(self):
-        #card(_G18535, 28, ge)
-        self.assertToken(self.token(), '(')
+        # card(_G18535, 28, ge)
+        self.assertToken(self.token(), "(")
          variable = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          value = self.token()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          type = self.token()
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerCard(self.discourse_id, sent_index, word_indices, variable, value, type)
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerCard(
+            self.discourse_id, sent_index, word_indices, variable, value, type
+        )
  
      def _handle_prop(self):
-        #prop(_G15949, drs(...))
-        self.assertToken(self.token(), '(')
+        # prop(_G15949, drs(...))
+        self.assertToken(self.token(), "(")
          variable = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          drs = self.process_next_expression(None)
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerProp(self.discourse_id, sent_index, word_indices, variable, drs)
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerProp(
+            self.discourse_id, sent_index, word_indices, variable, drs
+        )
  
      def _parse_index_list(self):
-        #[1001,1002]:
+        # [1001,1002]:
          indices = []
-        self.assertToken(self.token(), '[')
-        while self.token(0) != ']':
+        self.assertToken(self.token(), "[")
+        while self.token(0) != "]":
              indices.append(self.parse_index())
-            if self.token(0) == ',':
-                self.token() #swallow ','
-        self.token() #swallow ']'
-        self.assertToken(self.token(), ':')
+            if self.token(0) == ",":
+                self.token()  # swallow ','
+        self.token()  # swallow ']'
+        self.assertToken(self.token(), ":")
          return indices
  
      def parse_drs(self):
-        #drs([[1001]:_G3943],
+        # drs([[1001]:_G3943],
          #    [[1002]:pred(_G3943, dog, n, 0)]
          #   )
-        self.assertToken(self.token(), '(')
-        self.assertToken(self.token(), '[')
+        self.assertToken(self.token(), "(")
+        self.assertToken(self.token(), "[")
          refs = set()
-        while self.token(0) != ']':
+        while self.token(0) != "]":
              indices = self._parse_index_list()
              refs.add(self.parse_variable())
-            if self.token(0) == ',':
-                self.token() #swallow ','
-        self.token() #swallow ']'
-        self.assertToken(self.token(), ',')
-        self.assertToken(self.token(), '[')
+            if self.token(0) == ",":
+                self.token()  # swallow ','
+        self.token()  # swallow ']'
+        self.assertToken(self.token(), ",")
+        self.assertToken(self.token(), "[")
          conds = []
-        while self.token(0) != ']':
+        while self.token(0) != "]":
              indices = self._parse_index_list()
              conds.extend(self.parse_condition(indices))
-            if self.token(0) == ',':
-                self.token() #swallow ','
-        self.token() #swallow ']'
-        self.assertToken(self.token(), ')')
+            if self.token(0) == ",":
+                self.token()  # swallow ','
+        self.token()  # swallow ']'
+        self.assertToken(self.token(), ")")
          return BoxerDrs(list(refs), conds)
  
      def _handle_binary_expression(self, make_callback):
-        self.assertToken(self.token(), '(')
+        self.assertToken(self.token(), "(")
          drs1 = self.process_next_expression(None)
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          drs2 = self.process_next_expression(None)
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: make_callback(sent_index, word_indices, drs1, drs2)
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: make_callback(
+            sent_index, word_indices, drs1, drs2
+        )
  
      def _handle_alfa(self, make_callback):
-        self.assertToken(self.token(), '(')
+        self.assertToken(self.token(), "(")
          type = self.token()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          drs1 = self.process_next_expression(None)
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          drs2 = self.process_next_expression(None)
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: make_callback(sent_index, word_indices, drs1, drs2)
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: make_callback(
+            sent_index, word_indices, drs1, drs2
+        )
  
      def _handle_eq(self):
-        self.assertToken(self.token(), '(')
+        self.assertToken(self.token(), "(")
          var1 = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          var2 = self.parse_variable()
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerEq(self.discourse_id, sent_index, word_indices, var1, var2)
-
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerEq(
+            self.discourse_id, sent_index, word_indices, var1, var2
+        )
  
      def _handle_whq(self):
-        self.assertToken(self.token(), '(')
-        self.assertToken(self.token(), '[')
+        self.assertToken(self.token(), "(")
+        self.assertToken(self.token(), "[")
          ans_types = []
-        while self.token(0) != ']':
+        while self.token(0) != "]":
              cat = self.token()
-            self.assertToken(self.token(), ':')
-            if cat == 'des':
+            self.assertToken(self.token(), ":")
+            if cat == "des":
                  ans_types.append(self.token())
-            elif cat == 'num':
-                ans_types.append('number')
+            elif cat == "num":
+                ans_types.append("number")
                  typ = self.token()
-                if typ == 'cou':
-                    ans_types.append('count')
+                if typ == "cou":
+                    ans_types.append("count")
                  else:
                      ans_types.append(typ)
              else:
                  ans_types.append(self.token())
-        self.token() #swallow the ']'
+        self.token()  # swallow the ']'
  
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          d1 = self.process_next_expression(None)
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          ref = self.parse_variable()
-        self.assertToken(self.token(), ',')
+        self.assertToken(self.token(), ",")
          d2 = self.process_next_expression(None)
-        self.assertToken(self.token(), ')')
-        return lambda sent_index, word_indices: BoxerWhq(self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2)
+        self.assertToken(self.token(), ")")
+        return lambda sent_index, word_indices: BoxerWhq(
+            self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2
+        )
  
      def _make_merge_expression(self, sent_index, word_indices, drs1, drs2):
          return BoxerDrs(drs1.refs + drs2.refs, drs1.conds + drs2.conds)
@@ -633,7 +773,7 @@ class BoxerOutputDrsParser(DrtParser):
  
      def parse_variable(self):
          var = self.token()
-        assert re.match('^[exps]\d+$', var), var
+        assert re.match("^[exps]\d+$", var), var
          return var
  
      def parse_index(self):
@@ -643,15 +783,17 @@ class BoxerOutputDrsParser(DrtParser):
          """
          :return: list of (sent_index, word_indices) tuples
          """
-        sent_indices = set((i / 1000)-1 for i in indices if i>=0)
+        sent_indices = set((i / 1000) - 1 for i in indices if i >= 0)
          if sent_indices:
              pairs = []
              for sent_index in sent_indices:
-                word_indices = [(i % 1000)-1 for i in indices if sent_index == (i / 1000)-1]
+                word_indices = [
+                    (i % 1000) - 1 for i in indices if sent_index == (i / 1000) - 1
+                ]
                  pairs.append((sent_index, word_indices))
              return pairs
          else:
-            word_indices = [(i % 1000)-1 for i in indices]
+            word_indices = [(i % 1000) - 1 for i in indices]
              return [(None, word_indices)]
  
  
@@ -659,30 +801,39 @@ class BoxerDrsParser(DrtParser):
      """
      Reparse the str form of subclasses of ``AbstractBoxerDrs``
      """
+
      def __init__(self, discourse_id=None):
          DrtParser.__init__(self)
          self.discourse_id = discourse_id
  
      def get_all_symbols(self):
-        return [DrtTokens.OPEN, DrtTokens.CLOSE, DrtTokens.COMMA, DrtTokens.OPEN_BRACKET, DrtTokens.CLOSE_BRACKET]
+        return [
+            DrtTokens.OPEN,
+            DrtTokens.CLOSE,
+            DrtTokens.COMMA,
+            DrtTokens.OPEN_BRACKET,
+            DrtTokens.CLOSE_BRACKET,
+        ]
  
      def attempt_adjuncts(self, expression, context):
          return expression
  
      def handle(self, tok, context):
          try:
-#             if tok == 'drs':
-#                 self.assertNextToken(DrtTokens.OPEN)
-#                 label = int(self.token())
-#                 self.assertNextToken(DrtTokens.COMMA)
-#                 refs = list(map(int, self.handle_refs()))
-#                 self.assertNextToken(DrtTokens.COMMA)
-#                 conds = self.handle_conds(None)
-#                 self.assertNextToken(DrtTokens.CLOSE)
-#                 return BoxerDrs(label, refs, conds)
-            if tok == 'pred':
+            #             if tok == 'drs':
+            #                 self.assertNextToken(DrtTokens.OPEN)
+            #                 label = int(self.token())
+            #                 self.assertNextToken(DrtTokens.COMMA)
+            #                 refs = list(map(int, self.handle_refs()))
+            #                 self.assertNextToken(DrtTokens.COMMA)
+            #                 conds = self.handle_conds(None)
+            #                 self.assertNextToken(DrtTokens.CLOSE)
+            #                 return BoxerDrs(label, refs, conds)
+            if tok == "pred":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = self.nullableIntToken()
                  self.assertNextToken(DrtTokens.COMMA)
@@ -697,9 +848,11 @@ class BoxerDrsParser(DrtParser):
                  sense = int(self.token())
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerPred(disc_id, sent_id, word_ids, variable, name, pos, sense)
-            elif tok == 'named':
+            elif tok == "named":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = int(self.token())
                  self.assertNextToken(DrtTokens.COMMA)
@@ -713,10 +866,14 @@ class BoxerDrsParser(DrtParser):
                  self.assertNextToken(DrtTokens.COMMA)
                  sense = int(self.token())
                  self.assertNextToken(DrtTokens.CLOSE)
-                return BoxerNamed(disc_id, sent_id, word_ids, variable, name, type, sense)
-            elif tok == 'rel':
+                return BoxerNamed(
+                    disc_id, sent_id, word_ids, variable, name, type, sense
+                )
+            elif tok == "rel":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = self.nullableIntToken()
                  self.assertNextToken(DrtTokens.COMMA)
@@ -731,9 +888,11 @@ class BoxerDrsParser(DrtParser):
                  sense = int(self.token())
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerRel(disc_id, sent_id, word_ids, var1, var2, rel, sense)
-            elif tok == 'prop':
+            elif tok == "prop":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = int(self.token())
                  self.assertNextToken(DrtTokens.COMMA)
@@ -744,21 +903,23 @@ class BoxerDrsParser(DrtParser):
                  drs = self.process_next_expression(None)
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerProp(disc_id, sent_id, word_ids, variable, drs)
-            elif tok == 'not':
+            elif tok == "not":
                  self.assertNextToken(DrtTokens.OPEN)
                  drs = self.process_next_expression(None)
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerNot(drs)
-            elif tok == 'imp':
+            elif tok == "imp":
                  self.assertNextToken(DrtTokens.OPEN)
                  drs1 = self.process_next_expression(None)
                  self.assertNextToken(DrtTokens.COMMA)
                  drs2 = self.process_next_expression(None)
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerDrs(drs1.refs, drs1.conds, drs2)
-            elif tok == 'or':
+            elif tok == "or":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = self.nullableIntToken()
                  self.assertNextToken(DrtTokens.COMMA)
@@ -769,9 +930,11 @@ class BoxerDrsParser(DrtParser):
                  drs2 = self.process_next_expression(None)
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerOr(disc_id, sent_id, word_ids, drs1, drs2)
-            elif tok == 'eq':
+            elif tok == "eq":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = self.nullableIntToken()
                  self.assertNextToken(DrtTokens.COMMA)
@@ -782,9 +945,11 @@ class BoxerDrsParser(DrtParser):
                  var2 = int(self.token())
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerEq(disc_id, sent_id, word_ids, var1, var2)
-            elif tok == 'card':
+            elif tok == "card":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = self.nullableIntToken()
                  self.assertNextToken(DrtTokens.COMMA)
@@ -797,9 +962,11 @@ class BoxerDrsParser(DrtParser):
                  type = self.token()
                  self.assertNextToken(DrtTokens.CLOSE)
                  return BoxerCard(disc_id, sent_id, word_ids, var, value, type)
-            elif tok == 'whq':
+            elif tok == "whq":
                  self.assertNextToken(DrtTokens.OPEN)
-                disc_id = (self.token(), self.discourse_id)[self.discourse_id is not None]
+                disc_id = (
+                    self.discourse_id if self.discourse_id is not None else self.token()
+                )
                  self.assertNextToken(DrtTokens.COMMA)
                  sent_id = self.nullableIntToken()
                  self.assertNextToken(DrtTokens.COMMA)
@@ -820,14 +987,13 @@ class BoxerDrsParser(DrtParser):
  
      def nullableIntToken(self):
          t = self.token()
-        return [None,int(t)][t != 'None']
+        return int(t) if t != "None" else None
  
      def get_next_token_variable(self, description):
          try:
              return self.token()
          except ExpectedMoreTokensException as e:
-            raise ExpectedMoreTokensException(e.index, 'Variable expected.')
-
+            raise ExpectedMoreTokensException(e.index, "Variable expected.")
  
  
  class AbstractBoxerDrs(object):
@@ -840,7 +1006,7 @@ class AbstractBoxerDrs(object):
  
      def variable_types(self):
          vartypes = {}
-        for t,vars in zip(('z','e','p'), self.variables()):
+        for t, vars in zip(("z", "e", "p"), self.variables()):
              for v in vars:
                  vartypes[v] = t
          return vartypes
@@ -858,7 +1024,7 @@ class AbstractBoxerDrs(object):
          return self
  
      def _clean_name(self, name):
-        return name.replace('-','_').replace("'", "_")
+        return name.replace("-", "_").replace("'", "_")
  
      def renumber_sentences(self, f):
          return self
@@ -867,7 +1033,6 @@ class AbstractBoxerDrs(object):
          return hash("{0}".format(self))
  
  
-@python_2_unicode_compatible
  class BoxerDrs(AbstractBoxerDrs):
      def __init__(self, refs, conds, consequent=None):
          AbstractBoxerDrs.__init__(self)
@@ -878,10 +1043,10 @@ class BoxerDrs(AbstractBoxerDrs):
      def _variables(self):
          variables = (set(), set(), set())
          for cond in self.conds:
-            for s,v in zip(variables, cond._variables()):
+            for s, v in zip(variables, cond._variables()):
                  s.update(v)
          if self.consequent is not None:
-            for s,v in zip(variables, self.consequent._variables()):
+            for s, v in zip(variables, self.consequent._variables()):
                  s.update(v)
          return variables
  
@@ -892,26 +1057,34 @@ class BoxerDrs(AbstractBoxerDrs):
          return atoms
  
      def clean(self):
-        consequent = (self.consequent.clean() if self.consequent else None)
+        consequent = self.consequent.clean() if self.consequent else None
          return BoxerDrs(self.refs, [c.clean() for c in self.conds], consequent)
  
      def renumber_sentences(self, f):
-        consequent = (self.consequent.renumber_sentences(f) if self.consequent else None)
-        return BoxerDrs(self.refs, [c.renumber_sentences(f) for c in self.conds], consequent)
+        consequent = self.consequent.renumber_sentences(f) if self.consequent else None
+        return BoxerDrs(
+            self.refs, [c.renumber_sentences(f) for c in self.conds], consequent
+        )
  
      def __repr__(self):
-        s = 'drs([%s], [%s])' % (', '.join("%s" % r for r in self.refs),
-                                 ', '.join("%s" % c for c in self.conds))
+        s = "drs([%s], [%s])" % (
+            ", ".join("%s" % r for r in self.refs),
+            ", ".join("%s" % c for c in self.conds),
+        )
          if self.consequent is not None:
-            s = 'imp(%s, %s)' % (s, self.consequent)
+            s = "imp(%s, %s)" % (s, self.consequent)
          return s
  
      def __eq__(self, other):
-        return self.__class__ == other.__class__ and \
-               self.refs == other.refs and \
-               len(self.conds) == len(other.conds) and \
-               reduce(operator.and_, (c1==c2 for c1,c2 in zip(self.conds, other.conds))) and \
-               self.consequent == other.consequent
+        return (
+            self.__class__ == other.__class__
+            and self.refs == other.refs
+            and len(self.conds) == len(other.conds)
+            and reduce(
+                operator.and_, (c1 == c2 for c1, c2 in zip(self.conds, other.conds))
+            )
+            and self.consequent == other.consequent
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -919,7 +1092,6 @@ class BoxerDrs(AbstractBoxerDrs):
      __hash__ = AbstractBoxerDrs.__hash__
  
  
-@python_2_unicode_compatible
  class BoxerNot(AbstractBoxerDrs):
      def __init__(self, drs):
          AbstractBoxerDrs.__init__(self)
@@ -938,7 +1110,7 @@ class BoxerNot(AbstractBoxerDrs):
          return BoxerNot(self.drs.renumber_sentences(f))
  
      def __repr__(self):
-        return 'not(%s)' % (self.drs)
+        return "not(%s)" % (self.drs)
  
      def __eq__(self, other):
          return self.__class__ == other.__class__ and self.drs == other.drs
@@ -948,7 +1120,7 @@ class BoxerNot(AbstractBoxerDrs):
  
      __hash__ = AbstractBoxerDrs.__hash__
  
-@python_2_unicode_compatible
+
  class BoxerIndexed(AbstractBoxerDrs):
      def __init__(self, discourse_id, sent_index, word_indices):
          AbstractBoxerDrs.__init__(self)
@@ -960,11 +1132,13 @@ class BoxerIndexed(AbstractBoxerDrs):
          return set([self])
  
      def __eq__(self, other):
-        return self.__class__ == other.__class__ and \
-               self.discourse_id == other.discourse_id and \
-               self.sent_index == other.sent_index and \
-               self.word_indices == other.word_indices and \
-               reduce(operator.and_, (s==o for s,o in zip(self, other)))
+        return (
+            self.__class__ == other.__class__
+            and self.discourse_id == other.discourse_id
+            and self.sent_index == other.sent_index
+            and self.word_indices == other.word_indices
+            and reduce(operator.and_, (s == o for s, o in zip(self, other)))
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -972,11 +1146,16 @@ class BoxerIndexed(AbstractBoxerDrs):
      __hash__ = AbstractBoxerDrs.__hash__
  
      def __repr__(self):
-        s = '%s(%s, %s, [%s]' % (self._pred(), self.discourse_id,
-                                 self.sent_index, ', '.join("%s" % wi for wi in self.word_indices))
+        s = "%s(%s, %s, [%s]" % (
+            self._pred(),
+            self.discourse_id,
+            self.sent_index,
+            ", ".join("%s" % wi for wi in self.word_indices),
+        )
          for v in self:
-            s += ', %s' % v
-        return s + ')'
+            s += ", %s" % v
+        return s + ")"
+
  
  class BoxerPred(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, var, name, pos, sense):
@@ -990,20 +1169,45 @@ class BoxerPred(BoxerIndexed):
          return (set([self.var]), set(), set())
  
      def change_var(self, var):
-        return BoxerPred(self.discourse_id, self.sent_index, self.word_indices, var, self.name, self.pos, self.sense)
+        return BoxerPred(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            var,
+            self.name,
+            self.pos,
+            self.sense,
+        )
  
      def clean(self):
-        return BoxerPred(self.discourse_id, self.sent_index, self.word_indices, self.var, self._clean_name(self.name), self.pos, self.sense)
+        return BoxerPred(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            self.var,
+            self._clean_name(self.name),
+            self.pos,
+            self.sense,
+        )
  
      def renumber_sentences(self, f):
          new_sent_index = f(self.sent_index)
-        return BoxerPred(self.discourse_id, new_sent_index, self.word_indices, self.var, self.name, self.pos, self.sense)
+        return BoxerPred(
+            self.discourse_id,
+            new_sent_index,
+            self.word_indices,
+            self.var,
+            self.name,
+            self.pos,
+            self.sense,
+        )
  
      def __iter__(self):
          return iter((self.var, self.name, self.pos, self.sense))
  
      def _pred(self):
-        return 'pred'
+        return "pred"
+
  
  class BoxerNamed(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, var, name, type, sense):
@@ -1017,19 +1221,44 @@ class BoxerNamed(BoxerIndexed):
          return (set([self.var]), set(), set())
  
      def change_var(self, var):
-        return BoxerNamed(self.discourse_id, self.sent_index, self.word_indices, var, self.name, self.type, self.sense)
+        return BoxerNamed(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            var,
+            self.name,
+            self.type,
+            self.sense,
+        )
  
      def clean(self):
-        return BoxerNamed(self.discourse_id, self.sent_index, self.word_indices, self.var, self._clean_name(self.name), self.type, self.sense)
+        return BoxerNamed(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            self.var,
+            self._clean_name(self.name),
+            self.type,
+            self.sense,
+        )
  
      def renumber_sentences(self, f):
-        return BoxerNamed(self.discourse_id, f(self.sent_index), self.word_indices, self.var, self.name, self.type, self.sense)
+        return BoxerNamed(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.var,
+            self.name,
+            self.type,
+            self.sense,
+        )
  
      def __iter__(self):
          return iter((self.var, self.name, self.type, self.sense))
  
      def _pred(self):
-        return 'named'
+        return "named"
+
  
  class BoxerRel(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, var1, var2, rel, sense):
@@ -1043,16 +1272,33 @@ class BoxerRel(BoxerIndexed):
          return (set([self.var1, self.var2]), set(), set())
  
      def clean(self):
-        return BoxerRel(self.discourse_id, self.sent_index, self.word_indices, self.var1, self.var2, self._clean_name(self.rel), self.sense)
+        return BoxerRel(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            self.var1,
+            self.var2,
+            self._clean_name(self.rel),
+            self.sense,
+        )
  
      def renumber_sentences(self, f):
-        return BoxerRel(self.discourse_id, f(self.sent_index), self.word_indices, self.var1, self.var2, self.rel, self.sense)
+        return BoxerRel(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.var1,
+            self.var2,
+            self.rel,
+            self.sense,
+        )
  
      def __iter__(self):
          return iter((self.var1, self.var2, self.rel, self.sense))
  
      def _pred(self):
-        return 'rel'
+        return "rel"
+
  
  class BoxerProp(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, var, drs):
@@ -1061,7 +1307,9 @@ class BoxerProp(BoxerIndexed):
          self.drs = drs
  
      def _variables(self):
-        return tuple(map(operator.or_, (set(), set(), set([self.var])), self.drs._variables()))
+        return tuple(
+            map(operator.or_, (set(), set(), set([self.var])), self.drs._variables())
+        )
  
      def referenced_labels(self):
          return set([self.drs])
@@ -1070,16 +1318,29 @@ class BoxerProp(BoxerIndexed):
          return self.drs.atoms()
  
      def clean(self):
-        return BoxerProp(self.discourse_id, self.sent_index, self.word_indices, self.var, self.drs.clean())
+        return BoxerProp(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            self.var,
+            self.drs.clean(),
+        )
  
      def renumber_sentences(self, f):
-        return BoxerProp(self.discourse_id, f(self.sent_index), self.word_indices, self.var, self.drs.renumber_sentences(f))
+        return BoxerProp(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.var,
+            self.drs.renumber_sentences(f),
+        )
  
      def __iter__(self):
          return iter((self.var, self.drs))
  
      def _pred(self):
-        return 'prop'
+        return "prop"
+
  
  class BoxerEq(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, var1, var2):
@@ -1094,13 +1355,20 @@ class BoxerEq(BoxerIndexed):
          return set()
  
      def renumber_sentences(self, f):
-        return BoxerEq(self.discourse_id, f(self.sent_index), self.word_indices, self.var1, self.var2)
+        return BoxerEq(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.var1,
+            self.var2,
+        )
  
      def __iter__(self):
          return iter((self.var1, self.var2))
  
      def _pred(self):
-        return 'eq'
+        return "eq"
+
  
  class BoxerCard(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, var, value, type):
@@ -1113,13 +1381,21 @@ class BoxerCard(BoxerIndexed):
          return (set([self.var]), set(), set())
  
      def renumber_sentences(self, f):
-        return BoxerCard(self.discourse_id, f(self.sent_index), self.word_indices, self.var, self.value, self.type)
+        return BoxerCard(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.var,
+            self.value,
+            self.type,
+        )
  
      def __iter__(self):
          return iter((self.var, self.value, self.type))
  
      def _pred(self):
-        return 'card'
+        return "card"
+
  
  class BoxerOr(BoxerIndexed):
      def __init__(self, discourse_id, sent_index, word_indices, drs1, drs2):
@@ -1134,19 +1410,34 @@ class BoxerOr(BoxerIndexed):
          return self.drs1.atoms() | self.drs2.atoms()
  
      def clean(self):
-        return BoxerOr(self.discourse_id, self.sent_index, self.word_indices, self.drs1.clean(), self.drs2.clean())
+        return BoxerOr(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            self.drs1.clean(),
+            self.drs2.clean(),
+        )
  
      def renumber_sentences(self, f):
-        return BoxerOr(self.discourse_id, f(self.sent_index), self.word_indices, self.drs1, self.drs2)
+        return BoxerOr(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.drs1,
+            self.drs2,
+        )
  
      def __iter__(self):
          return iter((self.drs1, self.drs2))
  
      def _pred(self):
-        return 'or'
+        return "or"
+
  
  class BoxerWhq(BoxerIndexed):
-    def __init__(self, discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2):
+    def __init__(
+        self, discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2
+    ):
          BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices)
          self.ans_types = ans_types
          self.drs1 = drs1
@@ -1154,23 +1445,47 @@ class BoxerWhq(BoxerIndexed):
          self.drs2 = drs2
  
      def _variables(self):
-        return tuple(map(operator.or_, (set([self.variable]), set(), set()), self.drs1._variables(), self.drs2._variables()))
+        return tuple(
+            map(
+                operator.or_,
+                (set([self.variable]), set(), set()),
+                self.drs1._variables(),
+                self.drs2._variables(),
+            )
+        )
  
      def atoms(self):
          return self.drs1.atoms() | self.drs2.atoms()
  
      def clean(self):
-        return BoxerWhq(self.discourse_id, self.sent_index, self.word_indices, self.ans_types, self.drs1.clean(), self.variable, self.drs2.clean())
+        return BoxerWhq(
+            self.discourse_id,
+            self.sent_index,
+            self.word_indices,
+            self.ans_types,
+            self.drs1.clean(),
+            self.variable,
+            self.drs2.clean(),
+        )
  
      def renumber_sentences(self, f):
-        return BoxerWhq(self.discourse_id, f(self.sent_index), self.word_indices, self.ans_types, self.drs1, self.variable, self.drs2)
+        return BoxerWhq(
+            self.discourse_id,
+            f(self.sent_index),
+            self.word_indices,
+            self.ans_types,
+            self.drs1,
+            self.variable,
+            self.drs2,
+        )
  
      def __iter__(self):
-        return iter(('['+','.join(self.ans_types)+']', self.drs1, self.variable, self.drs2))
+        return iter(
+            ("[" + ",".join(self.ans_types) + "]", self.drs1, self.variable, self.drs2)
+        )
  
      def _pred(self):
-        return 'whq'
-
+        return "whq"
  
  
  class PassthroughBoxerDrsInterpreter(object):
@@ -1188,28 +1503,32 @@ class NltkDrtBoxerDrsInterpreter(object):
          :return: ``DrtExpression``
          """
          if isinstance(ex, BoxerDrs):
-            drs = DRS([Variable(r) for r in ex.refs], list(map(self.interpret, ex.conds)))
+            drs = DRS(
+                [Variable(r) for r in ex.refs], list(map(self.interpret, ex.conds))
+            )
              if ex.consequent is not None:
                  drs.consequent = self.interpret(ex.consequent)
              return drs
          elif isinstance(ex, BoxerNot):
              return DrtNegatedExpression(self.interpret(ex.drs))
          elif isinstance(ex, BoxerPred):
-            pred = self._add_occur_indexing('%s_%s' % (ex.pos, ex.name), ex)
+            pred = self._add_occur_indexing("%s_%s" % (ex.pos, ex.name), ex)
              return self._make_atom(pred, ex.var)
          elif isinstance(ex, BoxerNamed):
-            pred = self._add_occur_indexing('ne_%s_%s' % (ex.type, ex.name), ex)
+            pred = self._add_occur_indexing("ne_%s_%s" % (ex.type, ex.name), ex)
              return self._make_atom(pred, ex.var)
          elif isinstance(ex, BoxerRel):
-            pred = self._add_occur_indexing('%s' % (ex.rel), ex)
+            pred = self._add_occur_indexing("%s" % (ex.rel), ex)
              return self._make_atom(pred, ex.var1, ex.var2)
          elif isinstance(ex, BoxerProp):
              return DrtProposition(Variable(ex.var), self.interpret(ex.drs))
          elif isinstance(ex, BoxerEq):
-            return DrtEqualityExpression(DrtVariableExpression(Variable(ex.var1)),
-                                         DrtVariableExpression(Variable(ex.var2)))
+            return DrtEqualityExpression(
+                DrtVariableExpression(Variable(ex.var1)),
+                DrtVariableExpression(Variable(ex.var2)),
+            )
          elif isinstance(ex, BoxerCard):
-            pred = self._add_occur_indexing('card_%s_%s' % (ex.type, ex.value), ex)
+            pred = self._add_occur_indexing("card_%s_%s" % (ex.type, ex.value), ex)
              return self._make_atom(pred, ex.var)
          elif isinstance(ex, BoxerOr):
              return DrtOrExpression(self.interpret(ex.drs1), self.interpret(ex.drs2))
@@ -1217,20 +1536,22 @@ class NltkDrtBoxerDrsInterpreter(object):
              drs1 = self.interpret(ex.drs1)
              drs2 = self.interpret(ex.drs2)
              return DRS(drs1.refs + drs2.refs, drs1.conds + drs2.conds)
-        assert False, '%s: %s' % (ex.__class__.__name__, ex)
+        assert False, "%s: %s" % (ex.__class__.__name__, ex)
  
      def _make_atom(self, pred, *args):
          accum = DrtVariableExpression(Variable(pred))
          for arg in args:
-            accum = DrtApplicationExpression(accum, DrtVariableExpression(Variable(arg)))
+            accum = DrtApplicationExpression(
+                accum, DrtVariableExpression(Variable(arg))
+            )
          return accum
  
      def _add_occur_indexing(self, base, ex):
          if self._occur_index and ex.sent_index is not None:
              if ex.discourse_id:
-                base += '_%s'  % ex.discourse_id
-            base += '_s%s' % ex.sent_index
-            base += '_w%s' % sorted(ex.word_indices)[0]
+                base += "_%s" % ex.discourse_id
+            base += "_s%s" % ex.sent_index
+            base += "_w%s" % sorted(ex.word_indices)[0]
          return base
  
  
@@ -1238,19 +1559,44 @@ class UnparseableInputException(Exception):
      pass
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      opts = OptionParser("usage: %prog TEXT [options]")
-    opts.add_option("--verbose", "-v", help="display verbose logs", action="store_true", default=False, dest="verbose")
-    opts.add_option("--fol", "-f", help="output FOL", action="store_true", default=False, dest="fol")
-    opts.add_option("--question", "-q", help="input is a question", action="store_true", default=False, dest="question")
-    opts.add_option("--occur", "-o", help="occurrence index", action="store_true", default=False, dest="occur_index")
+    opts.add_option(
+        "--verbose",
+        "-v",
+        help="display verbose logs",
+        action="store_true",
+        default=False,
+        dest="verbose",
+    )
+    opts.add_option(
+        "--fol", "-f", help="output FOL", action="store_true", default=False, dest="fol"
+    )
+    opts.add_option(
+        "--question",
+        "-q",
+        help="input is a question",
+        action="store_true",
+        default=False,
+        dest="question",
+    )
+    opts.add_option(
+        "--occur",
+        "-o",
+        help="occurrence index",
+        action="store_true",
+        default=False,
+        dest="occur_index",
+    )
      (options, args) = opts.parse_args()
  
      if len(args) != 1:
          opts.error("incorrect number of arguments")
  
      interpreter = NltkDrtBoxerDrsInterpreter(occur_index=options.occur_index)
-    drs = Boxer(interpreter).interpret_multi(args[0].split(r'\n'), question=options.question, verbose=options.verbose)
+    drs = Boxer(interpreter).interpret_multi(
+        args[0].split(r"\n"), question=options.question, verbose=options.verbose
+    )
      if drs is None:
          print(None)
      else:
diff --git a/nlp_resource_data/nltk/sem/boxer.pyc b/nlp_resource_data/nltk/sem/boxer.pyc

deleted file mode 100755 (executable)

index c209430..0000000

Binary files a/nlp_resource_data/nltk/sem/boxer.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/chat80.py b/nlp_resource_data/nltk/sem/chat80.py

old mode 100755 (executable)

new mode 100644 (file)

index 180c50a..2597177
--- a/nlp_resource_data/nltk/sem/chat80.py
+++ b/nlp_resource_data/nltk/sem/chat80.py
@@ -1,7 +1,7 @@
  # Natural Language Toolkit: Chat-80 KB Reader
  # See http://www.w3.org/TR/swbp-skos-core-guide/
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>,
  # URL: <http://nltk.sourceforge.net>
  # For license information, see LICENSE.TXT
@@ -122,104 +122,133 @@ The set of rules is written to the file ``chat_pnames.cfg`` in the
  current directory.
  
  """
-from __future__ import print_function, unicode_literals
  
  import re
  import shelve
  import os
  import sys
  
-from six import string_types
-
  import nltk.data
-from nltk.compat import python_2_unicode_compatible
  
  ###########################################################################
  # Chat-80 relation metadata bundles needed to build the valuation
  ###########################################################################
  
-borders = {'rel_name': 'borders',
-           'closures': ['symmetric'],
-           'schema': ['region', 'border'],
-           'filename': 'borders.pl'}
-
-contains = {'rel_name': 'contains0',
-            'closures': ['transitive'],
-            'schema': ['region', 'contain'],
-            'filename': 'contain.pl'}
-
-city = {'rel_name': 'city',
-        'closures': [],
-        'schema': ['city', 'country', 'population'],
-        'filename': 'cities.pl'}
-
-country = {'rel_name': 'country',
-           'closures': [],
-           'schema': ['country', 'region', 'latitude', 'longitude',
-                      'area', 'population', 'capital', 'currency'],
-           'filename': 'countries.pl'}
-
-circle_of_lat = {'rel_name': 'circle_of_latitude',
-                 'closures': [],
-                 'schema': ['circle_of_latitude', 'degrees'],
-                 'filename': 'world1.pl'}
-
-circle_of_long = {'rel_name': 'circle_of_longitude',
-                 'closures': [],
-                 'schema': ['circle_of_longitude', 'degrees'],
-                 'filename': 'world1.pl'}
-
-continent = {'rel_name': 'continent',
-             'closures': [],
-             'schema': ['continent'],
-             'filename': 'world1.pl'}
-
-region = {'rel_name': 'in_continent',
-          'closures': [],
-          'schema': ['region', 'continent'],
-          'filename': 'world1.pl'}
-
-ocean = {'rel_name': 'ocean',
-         'closures': [],
-         'schema': ['ocean'],
-         'filename': 'world1.pl'}
-
-sea = {'rel_name': 'sea',
-       'closures': [],
-       'schema': ['sea'],
-       'filename': 'world1.pl'}
-
-
-
-items = ['borders', 'contains', 'city', 'country', 'circle_of_lat',
-         'circle_of_long', 'continent', 'region', 'ocean', 'sea']
+borders = {
+    "rel_name": "borders",
+    "closures": ["symmetric"],
+    "schema": ["region", "border"],
+    "filename": "borders.pl",
+}
+
+contains = {
+    "rel_name": "contains0",
+    "closures": ["transitive"],
+    "schema": ["region", "contain"],
+    "filename": "contain.pl",
+}
+
+city = {
+    "rel_name": "city",
+    "closures": [],
+    "schema": ["city", "country", "population"],
+    "filename": "cities.pl",
+}
+
+country = {
+    "rel_name": "country",
+    "closures": [],
+    "schema": [
+        "country",
+        "region",
+        "latitude",
+        "longitude",
+        "area",
+        "population",
+        "capital",
+        "currency",
+    ],
+    "filename": "countries.pl",
+}
+
+circle_of_lat = {
+    "rel_name": "circle_of_latitude",
+    "closures": [],
+    "schema": ["circle_of_latitude", "degrees"],
+    "filename": "world1.pl",
+}
+
+circle_of_long = {
+    "rel_name": "circle_of_longitude",
+    "closures": [],
+    "schema": ["circle_of_longitude", "degrees"],
+    "filename": "world1.pl",
+}
+
+continent = {
+    "rel_name": "continent",
+    "closures": [],
+    "schema": ["continent"],
+    "filename": "world1.pl",
+}
+
+region = {
+    "rel_name": "in_continent",
+    "closures": [],
+    "schema": ["region", "continent"],
+    "filename": "world1.pl",
+}
+
+ocean = {
+    "rel_name": "ocean",
+    "closures": [],
+    "schema": ["ocean"],
+    "filename": "world1.pl",
+}
+
+sea = {"rel_name": "sea", "closures": [], "schema": ["sea"], "filename": "world1.pl"}
+
+
+items = [
+    "borders",
+    "contains",
+    "city",
+    "country",
+    "circle_of_lat",
+    "circle_of_long",
+    "continent",
+    "region",
+    "ocean",
+    "sea",
+]
  items = tuple(sorted(items))
  
  item_metadata = {
-    'borders': borders,
-    'contains': contains,
-    'city': city,
-    'country': country,
-    'circle_of_lat': circle_of_lat,
-    'circle_of_long': circle_of_long,
-    'continent': continent,
-    'region': region,
-    'ocean': ocean,
-    'sea': sea
-    }
+    "borders": borders,
+    "contains": contains,
+    "city": city,
+    "country": country,
+    "circle_of_lat": circle_of_lat,
+    "circle_of_long": circle_of_long,
+    "continent": continent,
+    "region": region,
+    "ocean": ocean,
+    "sea": sea,
+}
  
  rels = item_metadata.values()
  
-not_unary = ['borders.pl', 'contain.pl']
+not_unary = ["borders.pl", "contain.pl"]
  
  ###########################################################################
  
-@python_2_unicode_compatible
+
  class Concept(object):
      """
      A Concept class, loosely based on SKOS
      (http://www.w3.org/TR/swbp-skos-core-guide/).
      """
+
      def __init__(self, prefLabel, arity, altLabels=[], closures=[], extension=set()):
          """
          :param prefLabel: the preferred label for the concept
@@ -238,21 +267,24 @@ class Concept(object):
          self.arity = arity
          self.altLabels = altLabels
          self.closures = closures
-        #keep _extension internally as a set
+        # keep _extension internally as a set
          self._extension = extension
-        #public access is via a list (for slicing)
+        # public access is via a list (for slicing)
          self.extension = sorted(list(extension))
  
      def __str__(self):
-        #_extension = ''
-        #for element in sorted(self.extension):
-            #if isinstance(element, tuple):
-                #element = '(%s, %s)' % (element)
-            #_extension += element + ', '
-        #_extension = _extension[:-1]
-
-        return "Label = '%s'\nArity = %s\nExtension = %s" % \
-               (self.prefLabel, self.arity, self.extension)
+        # _extension = ''
+        # for element in sorted(self.extension):
+        # if isinstance(element, tuple):
+        # element = '(%s, %s)' % (element)
+        # _extension += element + ', '
+        # _extension = _extension[:-1]
+
+        return "Label = '%s'\nArity = %s\nExtension = %s" % (
+            self.prefLabel,
+            self.arity,
+            self.extension,
+        )
  
      def __repr__(self):
          return "Concept('%s')" % self.prefLabel
@@ -270,7 +302,6 @@ class Concept(object):
          self.extension = sorted(list(self._extension))
          return self._extension
  
-
      def _make_graph(self, s):
          """
          Convert a set of pairs into an adjacency linked list encoding of a graph.
@@ -306,7 +337,6 @@ class Concept(object):
                  pairs.append((node, adjacent))
          return set(pairs)
  
-
      def close(self):
          """
          Close a binary relation in the ``Concept``'s extension set.
@@ -315,18 +345,18 @@ class Concept(object):
                   relation is closed under a given property
          """
          from nltk.sem import is_rel
+
          assert is_rel(self._extension)
-        if 'symmetric' in self.closures:
+        if "symmetric" in self.closures:
              pairs = []
              for (x, y) in self._extension:
                  pairs.append((y, x))
              sym = set(pairs)
              self._extension = self._extension.union(sym)
-        if 'transitive' in self.closures:
-            all =  self._make_graph(self._extension)
-            closed =  self._transclose(all)
+        if "transitive" in self.closures:
+            all = self._make_graph(self._extension)
+            closed = self._transclose(all)
              trans = self._make_pairs(closed)
-            #print sorted(trans)
              self._extension = self._extension.union(trans)
          self.extension = sorted(list(self._extension))
  
@@ -370,6 +400,7 @@ def clause2concepts(filename, rel_name, schema, closures=[]):
  
      return concepts
  
+
  def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
      """
      Convert a file of Prolog clauses into a database table.
@@ -389,16 +420,19 @@ def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
      :type schema: str
      """
      import sqlite3
+
      records = _str2records(filename, rel_name)
-    connection =  sqlite3.connect(dbname)
+    connection = sqlite3.connect(dbname)
      cur = connection.cursor()
      if setup:
-        cur.execute('''CREATE TABLE city_table
-        (City text, Country text, Population int)''')
+        cur.execute(
+            """CREATE TABLE city_table
+        (City text, Country text, Population int)"""
+        )
  
      table_name = "city_table"
      for t in records:
-        cur.execute('insert into %s values (?,?,?)' % table_name, t)
+        cur.execute("insert into %s values (?,?,?)" % table_name, t)
          if verbose:
              print("inserting values into %s: " % table_name, t)
      connection.commit()
@@ -406,6 +440,7 @@ def cities2table(filename, rel_name, dbname, verbose=False, setup=False):
          print("Committing update to %s" % dbname)
      cur.close()
  
+
  def sql_query(dbname, query):
      """
      Execute an SQL query over a database.
@@ -415,16 +450,21 @@ def sql_query(dbname, query):
      :type rel_name: str
      """
      import sqlite3
+
      try:
          path = nltk.data.find(dbname)
-        connection =  sqlite3.connect(str(path))
+        connection = sqlite3.connect(str(path))
          cur = connection.cursor()
          return cur.execute(query)
      except (ValueError, sqlite3.OperationalError):
          import warnings
-        warnings.warn("Make sure the database file %s is installed and uncompressed." % dbname)
+
+        warnings.warn(
+            "Make sure the database file %s is installed and uncompressed." % dbname
+        )
          raise
  
+
  def _str2records(filename, rel):
      """
      Read a file into memory and convert each relation clause into a list.
@@ -433,12 +473,13 @@ def _str2records(filename, rel):
      contents = nltk.data.load("corpora/chat80/%s" % filename, format="text")
      for line in contents.splitlines():
          if line.startswith(rel):
-            line = re.sub(rel+r'\(', '', line)
-            line = re.sub(r'\)\.$', '', line)
-            record = line.split(',')
+            line = re.sub(rel + r"\(", "", line)
+            line = re.sub(r"\)\.$", "", line)
+            record = line.split(",")
              recs.append(record)
      return recs
  
+
  def unary_concept(label, subj, records):
      """
      Make a unary concept out of the primary key in a record.
@@ -461,6 +502,7 @@ def unary_concept(label, subj, records):
          c.augment(record[subj])
      return c
  
+
  def binary_concept(label, closures, subj, obj, records):
      """
      Make a binary concept out of the primary key and another field in a record.
@@ -489,8 +531,8 @@ def binary_concept(label, closures, subj, obj, records):
      :return: ``Concept`` of arity 2
      :rtype: Concept
      """
-    if not label == 'border' and not label == 'contain':
-        label = label + '_of'
+    if not label == "border" and not label == "contain":
+        label = label + "_of"
      c = Concept(label, arity=2, closures=closures, extension=set())
      for record in records:
          c.augment((record[subj], record[obj]))
@@ -511,15 +553,15 @@ def process_bundle(rels):
      """
      concepts = {}
      for rel in rels:
-        rel_name = rel['rel_name']
-        closures = rel['closures']
-        schema = rel['schema']
-        filename = rel['filename']
+        rel_name = rel["rel_name"]
+        closures = rel["closures"]
+        schema = rel["schema"]
+        filename = rel["filename"]
  
          concept_list = clause2concepts(filename, rel_name, schema, closures)
          for c in concept_list:
              label = c.prefLabel
-            if (label in concepts):
+            if label in concepts:
                  for data in c.extension:
                      concepts[label].augment(data)
                  concepts[label].close()
@@ -543,9 +585,11 @@ def make_valuation(concepts, read=False, lexicon=False):
  
      for c in concepts:
          vals.append((c.prefLabel, c.extension))
-    if lexicon: read = True
+    if lexicon:
+        read = True
      if read:
          from nltk.sem import Valuation
+
          val = Valuation({})
          val.update(vals)
          # add labels for individuals
@@ -568,7 +612,7 @@ def val_dump(rels, db):
      """
      concepts = process_bundle(rels).values()
      valuation = make_valuation(concepts, read=True)
-    db_out = shelve.open(db, 'n')
+    db_out = shelve.open(db, "n")
  
      db_out.update(valuation)
  
@@ -583,33 +627,34 @@ def val_load(db):
                 The suffix '.db' should be omitted from the name.
      :type db: str
      """
-    dbname = db+".db"
+    dbname = db + ".db"
  
      if not os.access(dbname, os.R_OK):
          sys.exit("Cannot read file: %s" % dbname)
      else:
          db_in = shelve.open(db)
          from nltk.sem import Valuation
+
          val = Valuation(db_in)
-#        val.read(db_in.items())
+        #        val.read(db_in.items())
          return val
  
  
-#def alpha(str):
-    #"""
-    #Utility to filter out non-alphabetic constants.
+# def alpha(str):
+# """
+# Utility to filter out non-alphabetic constants.
  
-    #:param str: candidate constant
-    #:type str: string
-    #:rtype: bool
-    #"""
-    #try:
-        #int(str)
-        #return False
-    #except ValueError:
-        ## some unknown values in records are labeled '?'
-        #if not str == '?':
-            #return True
+#:param str: candidate constant
+#:type str: string
+#:rtype: bool
+# """
+# try:
+# int(str)
+# return False
+# except ValueError:
+## some unknown values in records are labeled '?'
+# if not str == '?':
+# return True
  
  
  def label_indivs(valuation, lexicon=False):
@@ -629,12 +674,13 @@ def label_indivs(valuation, lexicon=False):
      pairs = [(e, e) for e in domain]
      if lexicon:
          lex = make_lex(domain)
-        with open("chat_pnames.cfg", 'w') as outfile:
+        with open("chat_pnames.cfg", "w") as outfile:
              outfile.writelines(lex)
      # read the pairs into the valuation
      valuation.update(pairs)
      return valuation
  
+
  def make_lex(symbols):
      """
      Create lexical CFG rules for each individual symbol.
@@ -657,9 +703,9 @@ def make_lex(symbols):
      template = "PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n"
  
      for s in symbols:
-        parts = s.split('_')
+        parts = s.split("_")
          caps = [p.capitalize() for p in parts]
-        pname = '_'.join(caps)
+        pname = "_".join(caps)
          rule = template % (s, pname)
          lex.append(rule)
      return lex
@@ -669,7 +715,8 @@ def make_lex(symbols):
  # Interface function to emulate other corpus readers
  ###########################################################################
  
-def concepts(items = items):
+
+def concepts(items=items):
      """
      Build a list of concepts corresponding to the relation names in ``items``.
  
@@ -678,7 +725,8 @@ def concepts(items = items):
      :return: the ``Concept`` objects which are extracted from the relations
      :rtype: list(Concept)
      """
-    if isinstance(items, string_types): items = (items,)
+    if isinstance(items, str):
+        items = (items,)
  
      rels = [item_metadata[r] for r in items]
  
@@ -686,53 +734,80 @@ def concepts(items = items):
      return concept_map.values()
  
  
-
-
  ###########################################################################
  
  
  def main():
      import sys
      from optparse import OptionParser
-    description = \
-    """
+
+    description = """
  Extract data from the Chat-80 Prolog files and convert them into a
  Valuation object for use in the NLTK semantics package.
      """
  
      opts = OptionParser(description=description)
      opts.set_defaults(verbose=True, lex=False, vocab=False)
-    opts.add_option("-s", "--store", dest="outdb",
-                    help="store a valuation in DB", metavar="DB")
-    opts.add_option("-l", "--load", dest="indb",
-                    help="load a stored valuation from DB", metavar="DB")
-    opts.add_option("-c", "--concepts", action="store_true",
-                    help="print concepts instead of a valuation")
-    opts.add_option("-r", "--relation", dest="label",
-                    help="print concept with label REL (check possible labels with '-v' option)", metavar="REL")
-    opts.add_option("-q", "--quiet", action="store_false", dest="verbose",
-                    help="don't print out progress info")
-    opts.add_option("-x", "--lex", action="store_true", dest="lex",
-                    help="write a file of lexical entries for country names, then exit")
-    opts.add_option("-v", "--vocab", action="store_true", dest="vocab",
-                        help="print out the vocabulary of concept labels and their arity, then exit")
+    opts.add_option(
+        "-s", "--store", dest="outdb", help="store a valuation in DB", metavar="DB"
+    )
+    opts.add_option(
+        "-l",
+        "--load",
+        dest="indb",
+        help="load a stored valuation from DB",
+        metavar="DB",
+    )
+    opts.add_option(
+        "-c",
+        "--concepts",
+        action="store_true",
+        help="print concepts instead of a valuation",
+    )
+    opts.add_option(
+        "-r",
+        "--relation",
+        dest="label",
+        help="print concept with label REL (check possible labels with '-v' option)",
+        metavar="REL",
+    )
+    opts.add_option(
+        "-q",
+        "--quiet",
+        action="store_false",
+        dest="verbose",
+        help="don't print out progress info",
+    )
+    opts.add_option(
+        "-x",
+        "--lex",
+        action="store_true",
+        dest="lex",
+        help="write a file of lexical entries for country names, then exit",
+    )
+    opts.add_option(
+        "-v",
+        "--vocab",
+        action="store_true",
+        dest="vocab",
+        help="print out the vocabulary of concept labels and their arity, then exit",
+    )
  
      (options, args) = opts.parse_args()
      if options.outdb and options.indb:
          opts.error("Options --store and --load are mutually exclusive")
  
-
      if options.outdb:
          # write the valuation to a persistent database
          if options.verbose:
-            outdb = options.outdb+".db"
+            outdb = options.outdb + ".db"
              print("Dumping a valuation to %s" % outdb)
          val_dump(rels, options.outdb)
          sys.exit(0)
      else:
          # try to read in a valuation from a database
          if options.indb is not None:
-            dbname = options.indb+".db"
+            dbname = options.indb + ".db"
              if not os.access(dbname, os.R_OK):
                  sys.exit("Cannot read file: %s" % dbname)
              else:
@@ -773,10 +848,10 @@ def sql_demo():
      """
      print()
      print("Using SQL to extract rows from 'city.db' RDB.")
-    for row in sql_query('corpora/city_database/city.db', "SELECT * FROM city_table"):
+    for row in sql_query("corpora/city_database/city.db", "SELECT * FROM city_table"):
          print(row)
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      main()
      sql_demo()
diff --git a/nlp_resource_data/nltk/sem/chat80.pyc b/nlp_resource_data/nltk/sem/chat80.pyc

deleted file mode 100755 (executable)

index 6b771b9..0000000

Binary files a/nlp_resource_data/nltk/sem/chat80.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/cooper_storage.py b/nlp_resource_data/nltk/sem/cooper_storage.py

old mode 100755 (executable)

new mode 100644 (file)

index f1a7aab..830c3e4
--- a/nlp_resource_data/nltk/sem/cooper_storage.py
+++ b/nlp_resource_data/nltk/sem/cooper_storage.py
@@ -1,19 +1,20 @@
  # Natural Language Toolkit: Cooper storage for Quantifier Ambiguity
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function
  
  from nltk.sem.logic import LambdaExpression, ApplicationExpression, Variable
  from nltk.parse import load_parser
  from nltk.parse.featurechart import InstantiateVarsChart
  
+
  class CooperStore(object):
      """
      A container for handling quantifier ambiguity via Cooper storage.
      """
+
      def __init__(self, featstruct):
          """
          :param featstruct: The value of the ``sem`` node in a tree from
@@ -24,8 +25,8 @@ class CooperStore(object):
          self.featstruct = featstruct
          self.readings = []
          try:
-            self.core = featstruct['CORE']
-            self.store = featstruct['STORE']
+            self.core = featstruct["CORE"]
+            self.store = featstruct["STORE"]
          except KeyError:
              print("%s is not a Cooper storage structure" % featstruct)
  
@@ -35,12 +36,13 @@ class CooperStore(object):
          :type lst: list
          :rtype: iter
          """
-        remove = lambda lst0, index: lst0[:index] + lst0[index+1:]
+        remove = lambda lst0, index: lst0[:index] + lst0[index + 1 :]
          if lst:
              for index, x in enumerate(lst):
                  for y in self._permute(remove(lst, index)):
-                    yield (x,)+y
-        else: yield ()
+                    yield (x,) + y
+        else:
+            yield ()
  
      def s_retrieve(self, trace=False):
          """
@@ -59,14 +61,16 @@ class CooperStore(object):
          """
          for perm, store_perm in enumerate(self._permute(self.store)):
              if trace:
-                print("Permutation %s" % (perm+1))
+                print("Permutation %s" % (perm + 1))
              term = self.core
              for bindop in store_perm:
                  # we just want the arguments that are wrapped by the 'bo' predicate
                  quant, varex = tuple(bindop.args)
                  # use var to make an abstraction over the current term and then
                  # apply the quantifier to it
-                term = ApplicationExpression(quant, LambdaExpression(varex.variable, term))
+                term = ApplicationExpression(
+                    quant, LambdaExpression(varex.variable, term)
+                )
                  if trace:
                      print("  ", term)
                  term = term.simplify()
@@ -78,7 +82,7 @@ def parse_with_bindops(sentence, grammar=None, trace=0):
      Use a grammar with Binding Operators to parse a sentence.
      """
      if not grammar:
-        grammar = 'grammars/book_grammars/storage.fcfg'
+        grammar = "grammars/book_grammars/storage.fcfg"
      parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart)
      # Parse the sentence.
      tokens = sentence.split()
@@ -87,14 +91,15 @@ def parse_with_bindops(sentence, grammar=None, trace=0):
  
  def demo():
      from nltk.sem import cooper_storage as cs
+
      sentence = "every girl chases a dog"
-    #sentence = "a man gives a bone to every dog"
+    # sentence = "a man gives a bone to every dog"
      print()
      print("Analyis of sentence '%s'" % sentence)
      print("=" * 50)
      trees = cs.parse_with_bindops(sentence, trace=0)
      for tree in trees:
-        semrep = cs.CooperStore(tree.label()['SEM'])
+        semrep = cs.CooperStore(tree.label()["SEM"])
          print()
          print("Binding operators:")
          print("-" * 15)
@@ -112,7 +117,8 @@ def demo():
          print("-" * 15)
  
          for i, reading in enumerate(semrep.readings):
-            print("%s: %s" % (i+1, reading))
+            print("%s: %s" % (i + 1, reading))
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/sem/cooper_storage.pyc b/nlp_resource_data/nltk/sem/cooper_storage.pyc

deleted file mode 100755 (executable)

index 9d4f8a9..0000000

Binary files a/nlp_resource_data/nltk/sem/cooper_storage.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/drt.py b/nlp_resource_data/nltk/sem/drt.py

old mode 100755 (executable)

new mode 100644 (file)

index bd64839..57e26fb
--- a/nlp_resource_data/nltk/sem/drt.py
+++ b/nlp_resource_data/nltk/sem/drt.py
@@ -2,44 +2,60 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
  
  import operator
  from functools import reduce
  from itertools import chain
  
-from six import string_types
-
-from nltk.compat import python_2_unicode_compatible
-from nltk.sem.logic import (APP, AbstractVariableExpression, AllExpression,
-                            AndExpression, ApplicationExpression, BinaryExpression,
-                            BooleanExpression, ConstantExpression, EqualityExpression,
-                            EventVariableExpression, ExistsExpression, Expression,
-                            FunctionVariableExpression, ImpExpression,
-                            IndividualVariableExpression, LambdaExpression, Tokens,
-                            LogicParser, NegatedExpression, OrExpression, Variable,
-                            is_eventvar, is_funcvar, is_indvar, unique_variable)
+from nltk.sem.logic import (
+    APP,
+    AbstractVariableExpression,
+    AllExpression,
+    AndExpression,
+    ApplicationExpression,
+    BinaryExpression,
+    BooleanExpression,
+    ConstantExpression,
+    EqualityExpression,
+    EventVariableExpression,
+    ExistsExpression,
+    Expression,
+    FunctionVariableExpression,
+    ImpExpression,
+    IndividualVariableExpression,
+    LambdaExpression,
+    Tokens,
+    LogicParser,
+    NegatedExpression,
+    OrExpression,
+    Variable,
+    is_eventvar,
+    is_funcvar,
+    is_indvar,
+    unique_variable,
+)
  
  # Import Tkinter-based modules if they are available
  try:
-    from six.moves.tkinter import Canvas, Tk
-    from six.moves.tkinter_font import Font
+    from tkinter import Canvas, Tk
+    from tkinter.font import Font
      from nltk.util import in_idle
  
  except ImportError:
      # No need to print a warning here, nltk.draw has already printed one.
      pass
  
+
  class DrtTokens(Tokens):
-    DRS = 'DRS'
-    DRS_CONC = '+'
-    PRONOUN = 'PRO'
-    OPEN_BRACKET = '['
-    CLOSE_BRACKET = ']'
-    COLON = ':'
+    DRS = "DRS"
+    DRS_CONC = "+"
+    PRONOUN = "PRO"
+    OPEN_BRACKET = "["
+    CLOSE_BRACKET = "]"
+    COLON = ":"
  
      PUNCT = [DRS_CONC, OPEN_BRACKET, CLOSE_BRACKET, COLON]
  
@@ -50,19 +66,21 @@ class DrtTokens(Tokens):
  
  class DrtParser(LogicParser):
      """A lambda calculus expression parser."""
+
      def __init__(self):
          LogicParser.__init__(self)
  
          self.operator_precedence = dict(
-                               [(x,1) for x in DrtTokens.LAMBDA_LIST]             + \
-                               [(x,2) for x in DrtTokens.NOT_LIST]                + \
-                               [(APP,3)]                                          + \
-                               [(x,4) for x in DrtTokens.EQ_LIST+Tokens.NEQ_LIST] + \
-                               [(DrtTokens.COLON,5)]                              + \
-                               [(DrtTokens.DRS_CONC,6)]                           + \
-                               [(x,7) for x in DrtTokens.OR_LIST]                 + \
-                               [(x,8) for x in DrtTokens.IMP_LIST]                + \
-                               [(None,9)])
+            [(x, 1) for x in DrtTokens.LAMBDA_LIST]
+            + [(x, 2) for x in DrtTokens.NOT_LIST]
+            + [(APP, 3)]
+            + [(x, 4) for x in DrtTokens.EQ_LIST + Tokens.NEQ_LIST]
+            + [(DrtTokens.COLON, 5)]
+            + [(DrtTokens.DRS_CONC, 6)]
+            + [(x, 7) for x in DrtTokens.OR_LIST]
+            + [(x, 8) for x in DrtTokens.IMP_LIST]
+            + [(None, 9)]
+        )
  
      def get_all_symbols(self):
          """This method exists to be overridden"""
@@ -102,8 +120,10 @@ class DrtParser(LogicParser):
      def handle_DRS(self, tok, context):
          # a DRS
          refs = self.handle_refs()
-        if self.inRange(0) and self.token(0) == DrtTokens.COMMA: #if there is a comma (it's optional)
-            self.token() # swallow the comma
+        if (
+            self.inRange(0) and self.token(0) == DrtTokens.COMMA
+        ):  # if there is a comma (it's optional)
+            self.token()  # swallow the comma
          conds = self.handle_conds(context)
          self.assertNextToken(DrtTokens.CLOSE)
          return DRS(refs, conds, None)
@@ -112,10 +132,10 @@ class DrtParser(LogicParser):
          self.assertNextToken(DrtTokens.OPEN_BRACKET)
          refs = []
          while self.inRange(0) and self.token(0) != DrtTokens.CLOSE_BRACKET:
-        # Support expressions like: DRS([x y],C) == DRS([x,y],C)
+            # Support expressions like: DRS([x y],C) == DRS([x,y],C)
              if refs and self.token(0) == DrtTokens.COMMA:
-                self.token() # swallow the comma
-            refs.append(self.get_next_token_variable('quantified'))
+                self.token()  # swallow the comma
+            refs.append(self.get_next_token_variable("quantified"))
          self.assertNextToken(DrtTokens.CLOSE_BRACKET)
          return refs
  
@@ -125,14 +145,14 @@ class DrtParser(LogicParser):
          while self.inRange(0) and self.token(0) != DrtTokens.CLOSE_BRACKET:
              # Support expressions like: DRS([x y],C) == DRS([x, y],C)
              if conds and self.token(0) == DrtTokens.COMMA:
-                self.token() # swallow the comma
+                self.token()  # swallow the comma
              conds.append(self.process_next_expression(context))
          self.assertNextToken(DrtTokens.CLOSE_BRACKET)
          return conds
  
      def handle_prop(self, tok, context):
          variable = self.make_VariableExpression(tok)
-        self.assertNextToken(':')
+        self.assertNextToken(":")
          drs = self.process_next_expression(DrtTokens.COLON)
          return DrtProposition(variable, drs)
  
@@ -149,12 +169,14 @@ class DrtParser(LogicParser):
          elif tok in DrtTokens.OR_LIST:
              return DrtOrExpression
          elif tok in DrtTokens.IMP_LIST:
+
              def make_imp_expression(first, second):
                  if isinstance(first, DRS):
                      return DRS(first.refs, first.conds, second)
                  if isinstance(first, DrtConcatenation):
                      return DrtConcatenation(first.first, first.second, second)
-                raise Exception('Antecedent of implication must be a DRS')
+                raise Exception("Antecedent of implication must be a DRS")
+
              return make_imp_expression
          else:
              return None
@@ -203,7 +225,7 @@ class DrtExpression(object):
              return DRS(self.refs, self.conds, other)
          if isinstance(self, DrtConcatenation):
              return DrtConcatenation(self.first, self.second, other)
-        raise Exception('Antecedent of implication must be a DRS')
+        raise Exception("Antecedent of implication must be a DRS")
  
      def equiv(self, other, prover=None):
          """
@@ -216,14 +238,15 @@ class DrtExpression(object):
          """
          assert isinstance(other, DrtExpression)
  
-        f1 = self.simplify().fol();
-        f2 = other.simplify().fol();
+        f1 = self.simplify().fol()
+        f2 = other.simplify().fol()
          return f1.equiv(f2, prover)
  
      @property
      def type(self):
-        raise AttributeError("'%s' object has no attribute 'type'" %
-                             self.__class__.__name__)
+        raise AttributeError(
+            "'%s' object has no attribute 'type'" % self.__class__.__name__
+        )
  
      def typecheck(self, signature=None):
          raise NotImplementedError()
@@ -241,10 +264,12 @@ class DrtExpression(object):
  
      def is_pronoun_function(self):
          """ Is self of the form "PRO(x)"? """
-        return isinstance(self, DrtApplicationExpression) and \
-               isinstance(self.function, DrtAbstractVariableExpression) and \
-               self.function.variable.name == DrtTokens.PRONOUN and \
-               isinstance(self.argument, DrtIndividualVariableExpression)
+        return (
+            isinstance(self, DrtApplicationExpression)
+            and isinstance(self.function, DrtAbstractVariableExpression)
+            and self.function.variable.name == DrtTokens.PRONOUN
+            and isinstance(self.argument, DrtIndividualVariableExpression)
+        )
  
      def make_EqualityExpression(self, first, second):
          return DrtEqualityExpression(first, second)
@@ -256,15 +281,14 @@ class DrtExpression(object):
          return resolve_anaphora(self)
  
      def eliminate_equality(self):
-        return self.visit_structured(lambda e: e.eliminate_equality(),
-                                     self.__class__)
+        return self.visit_structured(lambda e: e.eliminate_equality(), self.__class__)
  
      def pretty_format(self):
          """
          Draw the DRS
          :return: the pretty print string
          """
-        return '\n'.join(self._pretty())
+        return "\n".join(self._pretty())
  
      def pretty_print(self):
          print(self.pretty_format())
@@ -273,9 +297,9 @@ class DrtExpression(object):
          DrsDrawer(self).draw()
  
  
-@python_2_unicode_compatible
  class DRS(DrtExpression, Expression):
      """A Discourse Representation Structure."""
+
      def __init__(self, refs, conds, consequent=None):
          """
          :param refs: list of ``DrtIndividualVariableExpression`` for the
@@ -290,45 +314,63 @@ class DRS(DrtExpression, Expression):
          """Replace all instances of variable v with expression E in self,
          where v is free in self."""
          if variable in self.refs:
-            #if a bound variable is the thing being replaced
+            # if a bound variable is the thing being replaced
              if not replace_bound:
                  return self
              else:
                  i = self.refs.index(variable)
                  if self.consequent:
-                    consequent = self.consequent.replace(variable, expression, True, alpha_convert)
+                    consequent = self.consequent.replace(
+                        variable, expression, True, alpha_convert
+                    )
                  else:
                      consequent = None
-                return DRS(self.refs[:i]+[expression.variable]+self.refs[i+1:],
-                           [cond.replace(variable, expression, True, alpha_convert)
-                            for cond in self.conds],
-                           consequent)
+                return DRS(
+                    self.refs[:i] + [expression.variable] + self.refs[i + 1 :],
+                    [
+                        cond.replace(variable, expression, True, alpha_convert)
+                        for cond in self.conds
+                    ],
+                    consequent,
+                )
          else:
              if alpha_convert:
                  # any bound variable that appears in the expression must
                  # be alpha converted to avoid a conflict
-                for ref in (set(self.refs) & expression.free()):
+                for ref in set(self.refs) & expression.free():
                      newvar = unique_variable(ref)
                      newvarex = DrtVariableExpression(newvar)
                      i = self.refs.index(ref)
                      if self.consequent:
-                        consequent = self.consequent.replace(ref, newvarex, True, alpha_convert)
+                        consequent = self.consequent.replace(
+                            ref, newvarex, True, alpha_convert
+                        )
                      else:
                          consequent = None
-                    self = DRS(self.refs[:i]+[newvar]+self.refs[i+1:],
-                               [cond.replace(ref, newvarex, True, alpha_convert)
-                                for cond in self.conds],
-                               consequent)
-
-            #replace in the conditions
+                    self = DRS(
+                        self.refs[:i] + [newvar] + self.refs[i + 1 :],
+                        [
+                            cond.replace(ref, newvarex, True, alpha_convert)
+                            for cond in self.conds
+                        ],
+                        consequent,
+                    )
+
+            # replace in the conditions
              if self.consequent:
-                consequent = self.consequent.replace(variable, expression, replace_bound, alpha_convert)
+                consequent = self.consequent.replace(
+                    variable, expression, replace_bound, alpha_convert
+                )
              else:
                  consequent = None
-            return DRS(self.refs,
-                       [cond.replace(variable, expression, replace_bound, alpha_convert)
-                        for cond in self.conds],
-                       consequent)
+            return DRS(
+                self.refs,
+                [
+                    cond.replace(variable, expression, replace_bound, alpha_convert)
+                    for cond in self.conds
+                ],
+                consequent,
+            )
  
      def free(self):
          """:see: Expression.free()"""
@@ -340,7 +382,9 @@ class DRS(DrtExpression, Expression):
      def get_refs(self, recursive=False):
          """:see: AbstractExpression.get_refs()"""
          if recursive:
-            conds_refs = self.refs + list(chain(*(c.get_refs(True) for c in self.conds)))
+            conds_refs = self.refs + list(
+                chain(*(c.get_refs(True) for c in self.conds))
+            )
              if self.consequent:
                  conds_refs.extend(self.consequent.get_refs(True))
              return conds_refs
@@ -356,7 +400,7 @@ class DRS(DrtExpression, Expression):
  
      def visit_structured(self, function, combinator):
          """:see: Expression.visit_structured()"""
-        consequent = (function(self.consequent) if self.consequent else None)
+        consequent = function(self.consequent) if self.consequent else None
          return combinator(self.refs, list(map(function, self.conds)), consequent)
  
      def eliminate_equality(self):
@@ -364,12 +408,16 @@ class DRS(DrtExpression, Expression):
          i = 0
          while i < len(drs.conds):
              cond = drs.conds[i]
-            if isinstance(cond, EqualityExpression) and \
-               isinstance(cond.first, AbstractVariableExpression) and \
-               isinstance(cond.second, AbstractVariableExpression):
-                drs = DRS(list(set(drs.refs)-set([cond.second.variable])),
-                          drs.conds[:i]+drs.conds[i+1:],
-                          drs.consequent)
+            if (
+                isinstance(cond, EqualityExpression)
+                and isinstance(cond.first, AbstractVariableExpression)
+                and isinstance(cond.second, AbstractVariableExpression)
+            ):
+                drs = DRS(
+                    list(set(drs.refs) - set([cond.second.variable])),
+                    drs.conds[:i] + drs.conds[i + 1 :],
+                    drs.consequent,
+                )
                  if cond.second.variable != cond.first.variable:
                      drs = drs.replace(cond.second.variable, cond.first, False, False)
                      i = 0
@@ -380,12 +428,15 @@ class DRS(DrtExpression, Expression):
          for cond in drs.conds:
              new_cond = cond.eliminate_equality()
              new_cond_simp = new_cond.simplify()
-            if not isinstance(new_cond_simp, DRS) or \
-               new_cond_simp.refs or new_cond_simp.conds or \
-               new_cond_simp.consequent:
+            if (
+                not isinstance(new_cond_simp, DRS)
+                or new_cond_simp.refs
+                or new_cond_simp.conds
+                or new_cond_simp.consequent
+            ):
                  conds.append(new_cond)
  
-        consequent = (drs.consequent.eliminate_equality() if drs.consequent else None)
+        consequent = drs.consequent.eliminate_equality() if drs.consequent else None
          return DRS(drs.refs, conds, consequent)
  
      def fol(self):
@@ -413,20 +464,29 @@ class DRS(DrtExpression, Expression):
              return accum
  
      def _pretty(self):
-        refs_line = ' '.join(self._order_ref_strings(self.refs))
+        refs_line = " ".join(self._order_ref_strings(self.refs))
  
-        cond_lines = [cond for cond_line in [filter(lambda s: s.strip(), cond._pretty())
-                                             for cond in self.conds]
-                      for cond in cond_line]
+        cond_lines = [
+            cond
+            for cond_line in [
+                filter(lambda s: s.strip(), cond._pretty()) for cond in self.conds
+            ]
+            for cond in cond_line
+        ]
          length = max([len(refs_line)] + list(map(len, cond_lines)))
-        drs = ([' _' + '_' * length            + '_ ',
-                '| ' + refs_line.ljust(length) + ' |',
-                '|-' + '-' * length            + '-|'] +
-               ['| ' + line.ljust(length)      + ' |' for line in cond_lines] +
-               ['|_' + '_' * length            + '_|'])
+        drs = (
+            [
+                " _" + "_" * length + "_ ",
+                "| " + refs_line.ljust(length) + " |",
+                "|-" + "-" * length + "-|",
+            ]
+            + ["| " + line.ljust(length) + " |" for line in cond_lines]
+            + ["|_" + "_" * length + "_|"]
+        )
          if self.consequent:
-            return DrtBinaryExpression._assemble_pretty(drs, DrtTokens.IMP,
-                                                        self.consequent._pretty())
+            return DrtBinaryExpression._assemble_pretty(
+                drs, DrtTokens.IMP, self.consequent._pretty()
+            )
          return drs
  
      def _order_ref_strings(self, refs):
@@ -444,10 +504,12 @@ class DRS(DrtExpression, Expression):
                  event_vars.append(s)
              else:
                  other_vars.append(s)
-        return sorted(other_vars) + \
-               sorted(event_vars, key=lambda v: int([v[2:],-1][len(v[2:]) == 0])) + \
-               sorted(func_vars, key=lambda v: (v[0], int([v[1:],-1][len(v[1:])==0]))) + \
-               sorted(ind_vars, key=lambda v: (v[0], int([v[1:],-1][len(v[1:])==0])))
+        return (
+            sorted(other_vars)
+            + sorted(event_vars, key=lambda v: int([v[2:], -1][len(v[2:]) == 0]))
+            + sorted(func_vars, key=lambda v: (v[0], int([v[1:], -1][len(v[1:]) == 0])))
+            + sorted(ind_vars, key=lambda v: (v[0], int([v[1:], -1][len(v[1:]) == 0])))
+        )
  
      def __eq__(self, other):
          r"""Defines equality modulo alphabetic variance.
@@ -458,8 +520,9 @@ class DRS(DrtExpression, Expression):
                  for (r1, r2) in zip(self.refs, converted_other.refs):
                      varex = self.make_VariableExpression(r1)
                      converted_other = converted_other.replace(r2, varex, True)
-                if self.consequent == converted_other.consequent and \
-                   len(self.conds) == len(converted_other.conds):
+                if self.consequent == converted_other.consequent and len(
+                    self.conds
+                ) == len(converted_other.conds):
                      for c1, c2 in zip(self.conds, converted_other.conds):
                          if not (c1 == c2):
                              return False
@@ -472,11 +535,20 @@ class DRS(DrtExpression, Expression):
      __hash__ = Expression.__hash__
  
      def __str__(self):
-        drs = '([%s],[%s])' % (','.join(self._order_ref_strings(self.refs)),
-                               ', '.join("%s" % cond for cond in self.conds)) # map(str, self.conds)))
+        drs = "([%s],[%s])" % (
+            ",".join(self._order_ref_strings(self.refs)),
+            ", ".join("%s" % cond for cond in self.conds),
+        )  # map(str, self.conds)))
          if self.consequent:
-            return DrtTokens.OPEN + drs + ' ' + DrtTokens.IMP + ' ' + \
-                   "%s" % self.consequent + DrtTokens.CLOSE
+            return (
+                DrtTokens.OPEN
+                + drs
+                + " "
+                + DrtTokens.IMP
+                + " "
+                + "%s" % self.consequent
+                + DrtTokens.CLOSE
+            )
          return drs
  
  
@@ -505,26 +577,35 @@ class DrtAbstractVariableExpression(DrtExpression, AbstractVariableExpression):
  
      def _pretty(self):
          s = "%s" % self
-        blank = ' '*len(s)
+        blank = " " * len(s)
          return [blank, blank, s, blank]
  
      def eliminate_equality(self):
          return self
  
-class DrtIndividualVariableExpression(DrtAbstractVariableExpression, IndividualVariableExpression):
+
+class DrtIndividualVariableExpression(
+    DrtAbstractVariableExpression, IndividualVariableExpression
+):
      pass
  
-class DrtFunctionVariableExpression(DrtAbstractVariableExpression, FunctionVariableExpression):
+
+class DrtFunctionVariableExpression(
+    DrtAbstractVariableExpression, FunctionVariableExpression
+):
      pass
  
-class DrtEventVariableExpression(DrtIndividualVariableExpression, EventVariableExpression):
+
+class DrtEventVariableExpression(
+    DrtIndividualVariableExpression, EventVariableExpression
+):
      pass
  
+
  class DrtConstantExpression(DrtAbstractVariableExpression, ConstantExpression):
      pass
  
  
-@python_2_unicode_compatible
  class DrtProposition(DrtExpression, Expression):
      def __init__(self, variable, drs):
          self.variable = variable
@@ -532,21 +613,31 @@ class DrtProposition(DrtExpression, Expression):
  
      def replace(self, variable, expression, replace_bound=False, alpha_convert=True):
          if self.variable == variable:
-            assert isinstance(expression, DrtAbstractVariableExpression), "Can only replace a proposition label with a variable"
-            return DrtProposition(expression.variable, self.drs.replace(variable, expression, replace_bound, alpha_convert))
+            assert isinstance(
+                expression, DrtAbstractVariableExpression
+            ), "Can only replace a proposition label with a variable"
+            return DrtProposition(
+                expression.variable,
+                self.drs.replace(variable, expression, replace_bound, alpha_convert),
+            )
          else:
-            return DrtProposition(self.variable, self.drs.replace(variable, expression, replace_bound, alpha_convert))
+            return DrtProposition(
+                self.variable,
+                self.drs.replace(variable, expression, replace_bound, alpha_convert),
+            )
  
      def eliminate_equality(self):
          return DrtProposition(self.variable, self.drs.eliminate_equality())
  
      def get_refs(self, recursive=False):
-        return (self.drs.get_refs(True) if recursive else [])
+        return self.drs.get_refs(True) if recursive else []
  
      def __eq__(self, other):
-        return self.__class__ == other.__class__ and \
-               self.variable == other.variable and \
-               self.drs == other.drs
+        return (
+            self.__class__ == other.__class__
+            and self.variable == other.variable
+            and self.drs == other.drs
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -558,10 +649,12 @@ class DrtProposition(DrtExpression, Expression):
  
      def _pretty(self):
          drs_s = self.drs._pretty()
-        blank = ' ' * len("%s" % self.variable)
-        return ([blank                + ' ' + line for line in drs_s[:1]] +
-                ["%s" % self.variable + ':' + line for line in drs_s[1:2]] +
-                [blank                + ' ' + line for line in drs_s[2:]])
+        blank = " " * len("%s" % self.variable)
+        return (
+            [blank + " " + line for line in drs_s[:1]]
+            + ["%s" % self.variable + ":" + line for line in drs_s[1:2]]
+            + [blank + " " + line for line in drs_s[2:]]
+        )
  
      def visit(self, function, combinator):
          """:see: Expression.visit()"""
@@ -572,7 +665,7 @@ class DrtProposition(DrtExpression, Expression):
          return combinator(self.variable, function(self.drs))
  
      def __str__(self):
-        return 'prop(%s, %s)' % (self.variable, self.drs)
+        return "prop(%s, %s)" % (self.variable, self.drs)
  
  
  class DrtNegatedExpression(DrtExpression, NegatedExpression):
@@ -585,10 +678,13 @@ class DrtNegatedExpression(DrtExpression, NegatedExpression):
  
      def _pretty(self):
          term_lines = self.term._pretty()
-        return (['    ' + line for line in term_lines[:2]] +
-                ['__  ' + line for line in term_lines[2:3]] +
-                ['  | ' + line for line in term_lines[3:4]] +
-                ['    ' + line for line in term_lines[4:]])
+        return (
+            ["    " + line for line in term_lines[:2]]
+            + ["__  " + line for line in term_lines[2:3]]
+            + ["  | " + line for line in term_lines[3:4]]
+            + ["    " + line for line in term_lines[4:]]
+        )
+
  
  class DrtLambdaExpression(DrtExpression, LambdaExpression):
      def alpha_convert(self, newvar):
@@ -596,8 +692,10 @@ class DrtLambdaExpression(DrtExpression, LambdaExpression):
          binder in the expression to ``newvar``.
          :param newvar: ``Variable``, for the new variable
          """
-        return self.__class__(newvar, self.term.replace(self.variable,
-                          DrtVariableExpression(newvar), True))
+        return self.__class__(
+            newvar,
+            self.term.replace(self.variable, DrtVariableExpression(newvar), True),
+        )
  
      def fol(self):
          return LambdaExpression(self.variable, self.term.fol())
@@ -608,39 +706,61 @@ class DrtLambdaExpression(DrtExpression, LambdaExpression):
          while term.__class__ == self.__class__:
              variables.append(term.variable)
              term = term.term
-        var_string = ' '.join("%s" % v for v in variables) + DrtTokens.DOT
+        var_string = " ".join("%s" % v for v in variables) + DrtTokens.DOT
          term_lines = term._pretty()
-        blank = ' ' * len(var_string)
-        return (['    ' + blank      + line for line in term_lines[:1]] +
-                [' \  ' + blank      + line for line in term_lines[1:2]] +
-                [' /\ ' + var_string + line for line in term_lines[2:3]] +
-                ['    ' + blank      + line for line in term_lines[3:]])
+        blank = " " * len(var_string)
+        return (
+            ["    " + blank + line for line in term_lines[:1]]
+            + [" \  " + blank + line for line in term_lines[1:2]]
+            + [" /\ " + var_string + line for line in term_lines[2:3]]
+            + ["    " + blank + line for line in term_lines[3:]]
+        )
+
  
  class DrtBinaryExpression(DrtExpression, BinaryExpression):
      def get_refs(self, recursive=False):
          """:see: AbstractExpression.get_refs()"""
-        return self.first.get_refs(True) + self.second.get_refs(True) if recursive else []
+        return (
+            self.first.get_refs(True) + self.second.get_refs(True) if recursive else []
+        )
  
      def _pretty(self):
-        return DrtBinaryExpression._assemble_pretty(self._pretty_subex(self.first), self.getOp(), self._pretty_subex(self.second))
+        return DrtBinaryExpression._assemble_pretty(
+            self._pretty_subex(self.first),
+            self.getOp(),
+            self._pretty_subex(self.second),
+        )
  
      @staticmethod
      def _assemble_pretty(first_lines, op, second_lines):
          max_lines = max(len(first_lines), len(second_lines))
          first_lines = _pad_vertically(first_lines, max_lines)
          second_lines = _pad_vertically(second_lines, max_lines)
-        blank = ' ' * len(op)
+        blank = " " * len(op)
          first_second_lines = list(zip(first_lines, second_lines))
-        return ([' ' + first_line + ' ' + blank + ' ' + second_line + ' ' for first_line, second_line in first_second_lines[:2]] +
-                ['(' + first_line + ' ' + op    + ' ' + second_line + ')' for first_line, second_line in first_second_lines[2:3]] +
-                [' ' + first_line + ' ' + blank + ' ' + second_line + ' ' for first_line, second_line in first_second_lines[3:]])
+        return (
+            [
+                " " + first_line + " " + blank + " " + second_line + " "
+                for first_line, second_line in first_second_lines[:2]
+            ]
+            + [
+                "(" + first_line + " " + op + " " + second_line + ")"
+                for first_line, second_line in first_second_lines[2:3]
+            ]
+            + [
+                " " + first_line + " " + blank + " " + second_line + " "
+                for first_line, second_line in first_second_lines[3:]
+            ]
+        )
  
      def _pretty_subex(self, subex):
          return subex._pretty()
  
+
  class DrtBooleanExpression(DrtBinaryExpression, BooleanExpression):
      pass
  
+
  class DrtOrExpression(DrtBooleanExpression, OrExpression):
      def fol(self):
          return OrExpression(self.first.fol(), self.second.fol())
@@ -650,13 +770,15 @@ class DrtOrExpression(DrtBooleanExpression, OrExpression):
              return [line[1:-1] for line in subex._pretty()]
          return DrtBooleanExpression._pretty_subex(self, subex)
  
+
  class DrtEqualityExpression(DrtBinaryExpression, EqualityExpression):
      def fol(self):
          return EqualityExpression(self.first.fol(), self.second.fol())
  
-@python_2_unicode_compatible
+
  class DrtConcatenation(DrtBooleanExpression):
      """DRS of the form '(DRS + DRS)'"""
+
      def __init__(self, first, second, consequent=None):
          DrtBooleanExpression.__init__(self, first, second)
          self.consequent = consequent
@@ -671,29 +793,37 @@ class DrtConcatenation(DrtBooleanExpression):
          # If variable is bound
          if variable in self.get_refs():
              if replace_bound:
-                first  = first.replace(variable, expression, replace_bound, alpha_convert)
-                second = second.replace(variable, expression, replace_bound, alpha_convert)
+                first = first.replace(
+                    variable, expression, replace_bound, alpha_convert
+                )
+                second = second.replace(
+                    variable, expression, replace_bound, alpha_convert
+                )
                  if consequent:
-                    consequent = consequent.replace(variable, expression, replace_bound, alpha_convert)
+                    consequent = consequent.replace(
+                        variable, expression, replace_bound, alpha_convert
+                    )
          else:
              if alpha_convert:
                  # alpha convert every ref that is free in 'expression'
-                for ref in (set(self.get_refs(True)) & expression.free()):
+                for ref in set(self.get_refs(True)) & expression.free():
                      v = DrtVariableExpression(unique_variable(ref))
-                    first  = first.replace(ref, v, True, alpha_convert)
+                    first = first.replace(ref, v, True, alpha_convert)
                      second = second.replace(ref, v, True, alpha_convert)
                      if consequent:
                          consequent = consequent.replace(ref, v, True, alpha_convert)
  
-            first  = first.replace(variable, expression, replace_bound, alpha_convert)
+            first = first.replace(variable, expression, replace_bound, alpha_convert)
              second = second.replace(variable, expression, replace_bound, alpha_convert)
              if consequent:
-                consequent = consequent.replace(variable, expression, replace_bound, alpha_convert)
+                consequent = consequent.replace(
+                    variable, expression, replace_bound, alpha_convert
+                )
  
          return self.__class__(first, second, consequent)
  
      def eliminate_equality(self):
-        #TODO: at some point.  for now, simplify.
+        # TODO: at some point.  for now, simplify.
          drs = self.simplify()
          assert not isinstance(drs, DrtConcatenation)
          return drs.eliminate_equality()
@@ -701,11 +831,11 @@ class DrtConcatenation(DrtBooleanExpression):
      def simplify(self):
          first = self.first.simplify()
          second = self.second.simplify()
-        consequent = (self.consequent.simplify() if self.consequent else None)
+        consequent = self.consequent.simplify() if self.consequent else None
  
          if isinstance(first, DRS) and isinstance(second, DRS):
              # For any ref that is in both 'first' and 'second'
-            for ref in (set(first.get_refs(True)) & set(second.get_refs(True))):
+            for ref in set(first.get_refs(True)) & set(second.get_refs(True)):
                  # alpha convert the ref in 'second' to prevent collision
                  newvar = DrtVariableExpression(unique_variable(ref))
                  second = second.replace(ref, newvar, True)
@@ -732,12 +862,14 @@ class DrtConcatenation(DrtBooleanExpression):
              other_refs = other.get_refs()
              if len(self_refs) == len(other_refs):
                  converted_other = other
-                for (r1,r2) in zip(self_refs, other_refs):
+                for (r1, r2) in zip(self_refs, other_refs):
                      varex = self.make_VariableExpression(r1)
                      converted_other = converted_other.replace(r2, varex, True)
-                return self.first == converted_other.first and \
-                        self.second == converted_other.second and \
-                        self.consequent == converted_other.consequent
+                return (
+                    self.first == converted_other.first
+                    and self.second == converted_other.second
+                    and self.consequent == converted_other.consequent
+                )
          return False
  
      def __ne__(self, other):
@@ -752,12 +884,15 @@ class DrtConcatenation(DrtBooleanExpression):
          return e
  
      def _pretty(self):
-        drs = DrtBinaryExpression._assemble_pretty(self._pretty_subex(self.first),
-                                                   self.getOp(),
-                                                   self._pretty_subex(self.second))
+        drs = DrtBinaryExpression._assemble_pretty(
+            self._pretty_subex(self.first),
+            self.getOp(),
+            self._pretty_subex(self.second),
+        )
          if self.consequent:
-            drs = DrtBinaryExpression._assemble_pretty(drs, DrtTokens.IMP,
-                                                       self._pretty(self.consequent))
+            drs = DrtBinaryExpression._assemble_pretty(
+                drs, DrtTokens.IMP, self._pretty(self.consequent)
+            )
          return drs
  
      def _pretty_subex(self, subex):
@@ -765,22 +900,29 @@ class DrtConcatenation(DrtBooleanExpression):
              return [line[1:-1] for line in subex._pretty()]
          return DrtBooleanExpression._pretty_subex(self, subex)
  
-
      def visit(self, function, combinator):
          """:see: Expression.visit()"""
          if self.consequent:
-            return combinator([function(self.first), function(self.second), function(self.consequent)])
+            return combinator(
+                [function(self.first), function(self.second), function(self.consequent)]
+            )
          else:
              return combinator([function(self.first), function(self.second)])
  
      def __str__(self):
          first = self._str_subex(self.first)
          second = self._str_subex(self.second)
-        drs = Tokens.OPEN + first + ' ' + self.getOp() \
-                + ' ' + second + Tokens.CLOSE
+        drs = Tokens.OPEN + first + " " + self.getOp() + " " + second + Tokens.CLOSE
          if self.consequent:
-            return DrtTokens.OPEN + drs + ' ' + DrtTokens.IMP + ' ' + \
-                   "%s" % self.consequent + DrtTokens.CLOSE
+            return (
+                DrtTokens.OPEN
+                + drs
+                + " "
+                + DrtTokens.IMP
+                + " "
+                + "%s" % self.consequent
+                + DrtTokens.CLOSE
+            )
          return drs
  
      def _str_subex(self, subex):
@@ -796,8 +938,11 @@ class DrtApplicationExpression(DrtExpression, ApplicationExpression):
  
      def get_refs(self, recursive=False):
          """:see: AbstractExpression.get_refs()"""
-        return (self.function.get_refs(True) + self.argument.get_refs(True)
-                if recursive else [])
+        return (
+            self.function.get_refs(True) + self.argument.get_refs(True)
+            if recursive
+            else []
+        )
  
      def _pretty(self):
          function, args = self.uncurry()
@@ -807,17 +952,27 @@ class DrtApplicationExpression(DrtExpression, ApplicationExpression):
          function_lines = _pad_vertically(function_lines, max_lines)
          args_lines = [_pad_vertically(arg_lines, max_lines) for arg_lines in args_lines]
          func_args_lines = list(zip(function_lines, list(zip(*args_lines))))
-        return ([func_line + ' ' + ' '.join(args_line) + ' ' for func_line, args_line in func_args_lines[:2]] +
-                [func_line + '(' + ','.join(args_line) + ')' for func_line, args_line in func_args_lines[2:3]] +
-                [func_line + ' ' + ' '.join(args_line) + ' ' for func_line, args_line in func_args_lines[3:]])
+        return (
+            [
+                func_line + " " + " ".join(args_line) + " "
+                for func_line, args_line in func_args_lines[:2]
+            ]
+            + [
+                func_line + "(" + ",".join(args_line) + ")"
+                for func_line, args_line in func_args_lines[2:3]
+            ]
+            + [
+                func_line + " " + " ".join(args_line) + " "
+                for func_line, args_line in func_args_lines[3:]
+            ]
+        )
  
  
  def _pad_vertically(lines, max_lines):
-    pad_line = [' ' * len(lines[0])]
+    pad_line = [" " * len(lines[0])]
      return lines + pad_line * (max_lines - len(lines))
  
  
-@python_2_unicode_compatible
  class PossibleAntecedents(list, DrtExpression, Expression):
      def free(self):
          """Set of free variables."""
@@ -836,11 +991,11 @@ class PossibleAntecedents(list, DrtExpression, Expression):
  
      def _pretty(self):
          s = "%s" % self
-        blank = ' ' * len(s)
+        blank = " " * len(s)
          return [blank, blank, s]
  
      def __str__(self):
-        return '[' + ','.join("%s" % it for it in self) + ']'
+        return "[" + ",".join("%s" % it for it in self) + "]"
  
  
  class AnaphoraResolutionException(Exception):
@@ -855,11 +1010,12 @@ def resolve_anaphora(expression, trail=[]):
                  for ref in ancestor.get_refs():
                      refex = expression.make_VariableExpression(ref)
  
-                    #==========================================================
+                    # ==========================================================
                      # Don't allow resolution to itself or other types
-                    #==========================================================
-                    if refex.__class__ == expression.argument.__class__ and \
-                       not (refex == expression.argument):
+                    # ==========================================================
+                    if refex.__class__ == expression.argument.__class__ and not (
+                        refex == expression.argument
+                    ):
                          possible_antecedents.append(refex)
  
              if len(possible_antecedents) == 1:
@@ -880,14 +1036,16 @@ def resolve_anaphora(expression, trail=[]):
              # if the condition is of the form '(x = [])' then raise exception
              if isinstance(r_cond, EqualityExpression):
                  if isinstance(r_cond.first, PossibleAntecedents):
-                    #Reverse the order so that the variable is on the left
+                    # Reverse the order so that the variable is on the left
                      temp = r_cond.first
                      r_cond.first = r_cond.second
                      r_cond.second = temp
                  if isinstance(r_cond.second, PossibleAntecedents):
                      if not r_cond.second:
-                        raise AnaphoraResolutionException("Variable '%s' does not "
-                                "resolve to anything." % r_cond.first)
+                        raise AnaphoraResolutionException(
+                            "Variable '%s' does not "
+                            "resolve to anything." % r_cond.first
+                        )
  
              r_conds.append(r_cond)
          if expression.consequent:
@@ -900,29 +1058,37 @@ def resolve_anaphora(expression, trail=[]):
          return expression
  
      elif isinstance(expression, NegatedExpression):
-        return expression.__class__(resolve_anaphora(expression.term, trail + [expression]))
+        return expression.__class__(
+            resolve_anaphora(expression.term, trail + [expression])
+        )
  
      elif isinstance(expression, DrtConcatenation):
          if expression.consequent:
              consequent = resolve_anaphora(expression.consequent, trail + [expression])
          else:
              consequent = None
-        return expression.__class__(resolve_anaphora(expression.first, trail + [expression]),
-                                    resolve_anaphora(expression.second, trail + [expression]),
-                                    consequent)
+        return expression.__class__(
+            resolve_anaphora(expression.first, trail + [expression]),
+            resolve_anaphora(expression.second, trail + [expression]),
+            consequent,
+        )
  
      elif isinstance(expression, BinaryExpression):
-        return expression.__class__(resolve_anaphora(expression.first, trail + [expression]),
-                                    resolve_anaphora(expression.second, trail + [expression]))
+        return expression.__class__(
+            resolve_anaphora(expression.first, trail + [expression]),
+            resolve_anaphora(expression.second, trail + [expression]),
+        )
  
      elif isinstance(expression, LambdaExpression):
-        return expression.__class__(expression.variable, resolve_anaphora(expression.term, trail + [expression]))
+        return expression.__class__(
+            expression.variable, resolve_anaphora(expression.term, trail + [expression])
+        )
  
  
  class DrsDrawer(object):
-    BUFFER = 3     #Space between elements
-    TOPSPACE = 10  #Space above whole DRS
-    OUTERSPACE = 6 #Space to the left, right, and bottom of the whle DRS
+    BUFFER = 3  # Space between elements
+    TOPSPACE = 10  # Space above whole DRS
+    OUTERSPACE = 6  # Space to the left, right, and bottom of the whle DRS
  
      def __init__(self, drs, size_canvas=True, canvas=None):
          """
@@ -935,7 +1101,7 @@ class DrsDrawer(object):
              master = Tk()
              master.title("DRT")
  
-            font = Font(family='helvetica', size=12)
+            font = Font(family="helvetica", size=12)
  
              if size_canvas:
                  canvas = Canvas(master, width=0, height=0)
@@ -943,9 +1109,9 @@ class DrsDrawer(object):
                  self.canvas = canvas
                  (right, bottom) = self._visit(drs, self.OUTERSPACE, self.TOPSPACE)
  
-                width = max(right+self.OUTERSPACE, 100)
-                height = bottom+self.OUTERSPACE
-                canvas = Canvas(master, width=width, height=height)#, bg='white')
+                width = max(right + self.OUTERSPACE, 100)
+                height = bottom + self.OUTERSPACE
+                canvas = Canvas(master, width=width, height=height)  # , bg='white')
              else:
                  canvas = Canvas(master, width=300, height=300)
  
@@ -989,13 +1155,15 @@ class DrsDrawer(object):
          :param y: the left side of the current drawing area
          :return: the bottom-rightmost point
          """
-        if isinstance(item, string_types):
-            self.canvas.create_text(x, y, anchor='nw', font=self.canvas.font, text=item)
+        if isinstance(item, str):
+            self.canvas.create_text(x, y, anchor="nw", font=self.canvas.font, text=item)
          elif isinstance(item, tuple):
              # item is the lower-right of a box
              (right, bottom) = item
              self.canvas.create_rectangle(x, y, right, bottom)
-            horiz_line_y = y + self._get_text_height() + (self.BUFFER * 2) #the line separating refs from conds
+            horiz_line_y = (
+                y + self._get_text_height() + (self.BUFFER * 2)
+            )  # the line separating refs from conds
              self.canvas.create_line(x, horiz_line_y, right, horiz_line_y)
  
          return self._visit_command(item, x, y)
@@ -1009,7 +1177,7 @@ class DrsDrawer(object):
          :param y: the left side of the current drawing area
          :return: the bottom-rightmost point
          """
-        if isinstance(item, string_types):
+        if isinstance(item, str):
              return (x + self.canvas.font.measure(item), y + self._get_text_height())
          elif isinstance(item, tuple):
              return item
@@ -1023,14 +1191,14 @@ class DrsDrawer(object):
          :return: the bottom-rightmost point
          """
          if command == self._visit_command:
-            #if we don't need to draw the item, then we can use the cached values
+            # if we don't need to draw the item, then we can use the cached values
              try:
-                #attempt to retrieve cached values
+                # attempt to retrieve cached values
                  right = expression._drawing_width + x
                  bottom = expression._drawing_height + y
                  return (right, bottom)
              except AttributeError:
-                #the values have not been cached yet, so compute them
+                # the values have not been cached yet, so compute them
                  pass
  
          if isinstance(expression, DrtAbstractVariableExpression):
@@ -1054,7 +1222,7 @@ class DrsDrawer(object):
  
          (right, bottom) = factory(expression, command, x, y)
  
-        #cache the values
+        # cache the values
          expression._drawing_width = right - x
          expression._drawing_height = bottom - y
  
@@ -1071,21 +1239,25 @@ class DrsDrawer(object):
          (right, bottom) = self._handle(expression.term, command, right, y)
  
          # Handle variables now that we know the y-coordinate
-        command(DrtTokens.NOT, x, self._get_centered_top(y, bottom - y, self._get_text_height()))
+        command(
+            DrtTokens.NOT,
+            x,
+            self._get_centered_top(y, bottom - y, self._get_text_height()),
+        )
  
          return (right, bottom)
  
      def _handle_DRS(self, expression, command, x, y):
-        left = x + self.BUFFER #indent the left side
-        bottom = y + self.BUFFER #indent the top
+        left = x + self.BUFFER  # indent the left side
+        bottom = y + self.BUFFER  # indent the top
  
          # Handle Discourse Referents
          if expression.refs:
-            refs = ' '.join("%s"%r for r in expression.refs)
+            refs = " ".join("%s" % r for r in expression.refs)
          else:
-            refs = '     '
+            refs = "     "
          (max_right, bottom) = command(refs, left, bottom)
-        bottom += (self.BUFFER * 2)
+        bottom += self.BUFFER * 2
  
          # Handle Conditions
          if expression.conds:
@@ -1103,32 +1275,40 @@ class DrsDrawer(object):
      def _handle_ApplicationExpression(self, expression, command, x, y):
          function, args = expression.uncurry()
          if not isinstance(function, DrtAbstractVariableExpression):
-            #It's not a predicate expression ("P(x,y)"), so leave arguments curried
+            # It's not a predicate expression ("P(x,y)"), so leave arguments curried
              function = expression.function
              args = [expression.argument]
  
          # Get the max bottom of any element on the line
          function_bottom = self._visit(function, x, y)[1]
-        max_bottom = max([function_bottom] + [self._visit(arg, x, y)[1] for arg in args])
+        max_bottom = max(
+            [function_bottom] + [self._visit(arg, x, y)[1] for arg in args]
+        )
  
          line_height = max_bottom - y
  
          # Handle 'function'
-        function_drawing_top = self._get_centered_top(y, line_height, function._drawing_height)
+        function_drawing_top = self._get_centered_top(
+            y, line_height, function._drawing_height
+        )
          right = self._handle(function, command, x, function_drawing_top)[0]
  
          # Handle open paren
-        centred_string_top = self._get_centered_top(y, line_height, self._get_text_height())
+        centred_string_top = self._get_centered_top(
+            y, line_height, self._get_text_height()
+        )
          right = command(DrtTokens.OPEN, right, centred_string_top)[0]
  
          # Handle each arg
-        for (i,arg) in enumerate(args):
-            arg_drawing_top = self._get_centered_top(y, line_height, arg._drawing_height)
+        for (i, arg) in enumerate(args):
+            arg_drawing_top = self._get_centered_top(
+                y, line_height, arg._drawing_height
+            )
              right = self._handle(arg, command, right, arg_drawing_top)[0]
  
-            if i+1 < len(args):
-                #since it's not the last arg, add a comma
-                right = command(DrtTokens.COMMA + ' ', right, centred_string_top)[0]
+            if i + 1 < len(args):
+                # since it's not the last arg, add a comma
+                right = command(DrtTokens.COMMA + " ", right, centred_string_top)[0]
  
          # Handle close paren
          right = command(DrtTokens.CLOSE, right, centred_string_top)[0]
@@ -1144,7 +1324,9 @@ class DrsDrawer(object):
          (right, bottom) = self._handle(expression.term, command, right, y)
  
          # Handle variables now that we know the y-coordinate
-        command(variables, x, self._get_centered_top(y, bottom - y, self._get_text_height()))
+        command(
+            variables, x, self._get_centered_top(y, bottom - y, self._get_text_height())
+        )
  
          return (right, bottom)
  
@@ -1155,19 +1337,31 @@ class DrsDrawer(object):
          line_height = max(first_height, second_height)
  
          # Handle open paren
-        centred_string_top = self._get_centered_top(y, line_height, self._get_text_height())
+        centred_string_top = self._get_centered_top(
+            y, line_height, self._get_text_height()
+        )
          right = command(DrtTokens.OPEN, x, centred_string_top)[0]
  
          # Handle the first operand
          first_height = expression.first._drawing_height
-        (right, first_bottom) = self._handle(expression.first, command, right, self._get_centered_top(y, line_height, first_height))
+        (right, first_bottom) = self._handle(
+            expression.first,
+            command,
+            right,
+            self._get_centered_top(y, line_height, first_height),
+        )
  
          # Handle the operator
-        right = command(' %s ' % expression.getOp(), right, centred_string_top)[0]
+        right = command(" %s " % expression.getOp(), right, centred_string_top)[0]
  
          # Handle the second operand
          second_height = expression.second._drawing_height
-        (right, second_bottom) = self._handle(expression.second, command, right, self._get_centered_top(y, line_height, second_height))
+        (right, second_bottom) = self._handle(
+            expression.second,
+            command,
+            right,
+            self._get_centered_top(y, line_height, second_height),
+        )
  
          # Handle close paren
          right = command(DrtTokens.CLOSE, right, centred_string_top)[0]
@@ -1191,36 +1385,40 @@ class DrsDrawer(object):
  
  
  def demo():
-    print('='*20 + 'TEST PARSE' + '='*20)
+    print("=" * 20 + "TEST PARSE" + "=" * 20)
      dexpr = DrtExpression.fromstring
-    print(dexpr(r'([x,y],[sees(x,y)])'))
-    print(dexpr(r'([x],[man(x), walks(x)])'))
-    print(dexpr(r'\x.\y.([],[sees(x,y)])'))
-    print(dexpr(r'\x.([],[walks(x)])(john)'))
-    print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))'))
-    print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))'))
-    print(dexpr(r'([x],[PRO(x), sees(John,x)])'))
-    print(dexpr(r'([x],[man(x), -([],[walks(x)])])'))
-    print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])'))
-
-    print('='*20 + 'Test fol()' + '='*20)
-    print(dexpr(r'([x,y],[sees(x,y)])').fol())
-
-    print('='*20 + 'Test alpha conversion and lambda expression equality' + '='*20)
-    e1 = dexpr(r'\x.([],[P(x)])')
+    print(dexpr(r"([x,y],[sees(x,y)])"))
+    print(dexpr(r"([x],[man(x), walks(x)])"))
+    print(dexpr(r"\x.\y.([],[sees(x,y)])"))
+    print(dexpr(r"\x.([],[walks(x)])(john)"))
+    print(dexpr(r"(([x],[walks(x)]) + ([y],[runs(y)]))"))
+    print(dexpr(r"(([],[walks(x)]) -> ([],[runs(x)]))"))
+    print(dexpr(r"([x],[PRO(x), sees(John,x)])"))
+    print(dexpr(r"([x],[man(x), -([],[walks(x)])])"))
+    print(dexpr(r"([],[(([x],[man(x)]) -> ([],[walks(x)]))])"))
+
+    print("=" * 20 + "Test fol()" + "=" * 20)
+    print(dexpr(r"([x,y],[sees(x,y)])").fol())
+
+    print("=" * 20 + "Test alpha conversion and lambda expression equality" + "=" * 20)
+    e1 = dexpr(r"\x.([],[P(x)])")
      print(e1)
-    e2 = e1.alpha_convert(Variable('z'))
+    e2 = e1.alpha_convert(Variable("z"))
      print(e2)
      print(e1 == e2)
  
-    print('='*20 + 'Test resolve_anaphora()' + '='*20)
-    print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])')))
-    print(resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')))
-    print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))')))
+    print("=" * 20 + "Test resolve_anaphora()" + "=" * 20)
+    print(resolve_anaphora(dexpr(r"([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])")))
+    print(
+        resolve_anaphora(dexpr(r"([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])"))
+    )
+    print(resolve_anaphora(dexpr(r"(([x,y],[]) + ([],[PRO(x)]))")))
  
-    print('='*20 + 'Test pretty_print()' + '='*20)
+    print("=" * 20 + "Test pretty_print()" + "=" * 20)
      dexpr(r"([],[])").pretty_print()
-    dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print()
+    dexpr(
+        r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])"
+    ).pretty_print()
      dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print()
      dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print()
      dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print()
@@ -1228,31 +1426,30 @@ def demo():
  
  def test_draw():
      try:
-        from six.moves.tkinter import Tk
+        from tkinter import Tk
      except ImportError:
          from nose import SkipTest
+
          raise SkipTest("tkinter is required, but it's not available.")
  
      expressions = [
-            r'x',
-            r'([],[])',
-            r'([x],[])',
-            r'([x],[man(x)])',
-
-            r'([x,y],[sees(x,y)])',
-            r'([x],[man(x), walks(x)])',
-            r'\x.([],[man(x), walks(x)])',
-            r'\x y.([],[sees(x,y)])',
-            r'([],[(([],[walks(x)]) + ([],[runs(x)]))])',
-
-            r'([x],[man(x), -([],[walks(x)])])',
-            r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])'
-            ]
+        r"x",
+        r"([],[])",
+        r"([x],[])",
+        r"([x],[man(x)])",
+        r"([x,y],[sees(x,y)])",
+        r"([x],[man(x), walks(x)])",
+        r"\x.([],[man(x), walks(x)])",
+        r"\x y.([],[sees(x,y)])",
+        r"([],[(([],[walks(x)]) + ([],[runs(x)]))])",
+        r"([x],[man(x), -([],[walks(x)])])",
+        r"([],[(([x],[man(x)]) -> ([],[walks(x)]))])",
+    ]
  
      for e in expressions:
          d = DrtExpression.fromstring(e)
          d.draw()
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/sem/drt.pyc b/nlp_resource_data/nltk/sem/drt.pyc

deleted file mode 100755 (executable)

index 9961ce7..0000000

Binary files a/nlp_resource_data/nltk/sem/drt.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/drt_glue_demo.py b/nlp_resource_data/nltk/sem/drt_glue_demo.py

old mode 100755 (executable)

new mode 100644 (file)

index 4fe4a47..61a4f5b
--- a/nlp_resource_data/nltk/sem/drt_glue_demo.py
+++ b/nlp_resource_data/nltk/sem/drt_glue_demo.py
@@ -3,14 +3,22 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  try:
-    from six.moves.tkinter import (Button, Frame, IntVar, Label, Listbox, Menu,
-                                   Scrollbar, Tk)
-    from six.moves.tkinter_font import Font
+    from tkinter import (
+        Button,
+        Frame,
+        IntVar,
+        Label,
+        Listbox,
+        Menu,
+        Scrollbar,
+        Tk,
+    )
+    from tkinter.font import Font
      from nltk.draw.util import CanvasFrame, ShowText
  
  except ImportError:
@@ -28,7 +36,7 @@ class DrtGlueDemo(object):
      def __init__(self, examples):
          # Set up the main window.
          self._top = Tk()
-        self._top.title('DRT Glue Demo')
+        self._top.title("DRT Glue Demo")
  
          # Set up key bindings.
          self._init_bindings()
@@ -60,7 +68,7 @@ class DrtGlueDemo(object):
          self._init_canvas(self._top)
  
          # Resize callback
-        self._canvas.bind('<Configure>', self._configure)
+        self._canvas.bind("<Configure>", self._configure)
  
      #########################################
      ##  Initialization Helpers
@@ -68,15 +76,20 @@ class DrtGlueDemo(object):
  
      def _init_glue(self):
          tagger = RegexpTagger(
-            [('^(David|Mary|John)$', 'NNP'),
-             ('^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', 'VB'),
-             ('^(go|order|vanish|find|approach)$', 'VB'),
-             ('^(a)$', 'ex_quant'),
-             ('^(every)$', 'univ_quant'),
-             ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
-             ('^(big|gray|former)$', 'JJ'),
-             ('^(him|himself)$', 'PRP')
-        ])
+            [
+                ("^(David|Mary|John)$", "NNP"),
+                (
+                    "^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
+                    "VB",
+                ),
+                ("^(go|order|vanish|find|approach)$", "VB"),
+                ("^(a)$", "ex_quant"),
+                ("^(every)$", "univ_quant"),
+                ("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
+                ("^(big|gray|former)$", "JJ"),
+                ("^(him|himself)$", "PRP"),
+            ]
+        )
  
          depparser = MaltParser(tagger=tagger)
          self._glue = DrtGlue(depparser=depparser, remove_duplicates=False)
@@ -88,113 +101,134 @@ class DrtGlueDemo(object):
  
          # TWhat's our font size (default=same as sysfont)
          self._size = IntVar(root)
-        self._size.set(self._sysfont.cget('size'))
+        self._size.set(self._sysfont.cget("size"))
  
-        self._boldfont = Font(family='helvetica', weight='bold',
-                                    size=self._size.get())
-        self._font = Font(family='helvetica',
-                                    size=self._size.get())
-        if self._size.get() < 0: big = self._size.get()-2
-        else: big = self._size.get()+2
-        self._bigfont = Font(family='helvetica', weight='bold',
-                                    size=big)
+        self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
+        self._font = Font(family="helvetica", size=self._size.get())
+        if self._size.get() < 0:
+            big = self._size.get() - 2
+        else:
+            big = self._size.get() + 2
+        self._bigfont = Font(family="helvetica", weight="bold", size=big)
  
      def _init_exampleListbox(self, parent):
          self._exampleFrame = listframe = Frame(parent)
-        self._exampleFrame.pack(fill='both', side='left', padx=2)
-        self._exampleList_label = Label(self._exampleFrame, font=self._boldfont,
-                                     text='Examples')
+        self._exampleFrame.pack(fill="both", side="left", padx=2)
+        self._exampleList_label = Label(
+            self._exampleFrame, font=self._boldfont, text="Examples"
+        )
          self._exampleList_label.pack()
-        self._exampleList = Listbox(self._exampleFrame, selectmode='single',
-                                 relief='groove', background='white',
-                                 foreground='#909090', font=self._font,
-                                 selectforeground='#004040',
-                                 selectbackground='#c0f0c0')
-
-        self._exampleList.pack(side='right', fill='both', expand=1)
+        self._exampleList = Listbox(
+            self._exampleFrame,
+            selectmode="single",
+            relief="groove",
+            background="white",
+            foreground="#909090",
+            font=self._font,
+            selectforeground="#004040",
+            selectbackground="#c0f0c0",
+        )
+
+        self._exampleList.pack(side="right", fill="both", expand=1)
  
          for example in self._examples:
-            self._exampleList.insert('end', ('  %s' % example))
+            self._exampleList.insert("end", ("  %s" % example))
          self._exampleList.config(height=min(len(self._examples), 25), width=40)
  
          # Add a scrollbar if there are more than 25 examples.
          if len(self._examples) > 25:
-            listscroll = Scrollbar(self._exampleFrame,
-                                   orient='vertical')
-            self._exampleList.config(yscrollcommand = listscroll.set)
+            listscroll = Scrollbar(self._exampleFrame, orient="vertical")
+            self._exampleList.config(yscrollcommand=listscroll.set)
              listscroll.config(command=self._exampleList.yview)
-            listscroll.pack(side='left', fill='y')
+            listscroll.pack(side="left", fill="y")
  
          # If they select a example, apply it.
-        self._exampleList.bind('<<ListboxSelect>>', self._exampleList_select)
+        self._exampleList.bind("<<ListboxSelect>>", self._exampleList_select)
  
      def _init_readingListbox(self, parent):
          self._readingFrame = listframe = Frame(parent)
-        self._readingFrame.pack(fill='both', side='left', padx=2)
-        self._readingList_label = Label(self._readingFrame, font=self._boldfont,
-                                     text='Readings')
+        self._readingFrame.pack(fill="both", side="left", padx=2)
+        self._readingList_label = Label(
+            self._readingFrame, font=self._boldfont, text="Readings"
+        )
          self._readingList_label.pack()
-        self._readingList = Listbox(self._readingFrame, selectmode='single',
-                                 relief='groove', background='white',
-                                 foreground='#909090', font=self._font,
-                                 selectforeground='#004040',
-                                 selectbackground='#c0f0c0')
-
-        self._readingList.pack(side='right', fill='both', expand=1)
+        self._readingList = Listbox(
+            self._readingFrame,
+            selectmode="single",
+            relief="groove",
+            background="white",
+            foreground="#909090",
+            font=self._font,
+            selectforeground="#004040",
+            selectbackground="#c0f0c0",
+        )
+
+        self._readingList.pack(side="right", fill="both", expand=1)
  
          # Add a scrollbar if there are more than 25 examples.
-        listscroll = Scrollbar(self._readingFrame,
-                               orient='vertical')
-        self._readingList.config(yscrollcommand = listscroll.set)
+        listscroll = Scrollbar(self._readingFrame, orient="vertical")
+        self._readingList.config(yscrollcommand=listscroll.set)
          listscroll.config(command=self._readingList.yview)
-        listscroll.pack(side='right', fill='y')
+        listscroll.pack(side="right", fill="y")
  
          self._populate_readingListbox()
  
      def _populate_readingListbox(self):
          # Populate the listbox with integers
-        self._readingList.delete(0, 'end')
+        self._readingList.delete(0, "end")
          for i in range(len(self._readings)):
-            self._readingList.insert('end', ('  %s' % (i+1)))
+            self._readingList.insert("end", ("  %s" % (i + 1)))
          self._readingList.config(height=min(len(self._readings), 25), width=5)
  
          # If they select a example, apply it.
-        self._readingList.bind('<<ListboxSelect>>', self._readingList_select)
+        self._readingList.bind("<<ListboxSelect>>", self._readingList_select)
  
      def _init_bindings(self):
          # Key bindings are a good thing.
-        self._top.bind('<Control-q>', self.destroy)
-        self._top.bind('<Control-x>', self.destroy)
-        self._top.bind('<Escape>', self.destroy)
-        self._top.bind('n', self.next)
-        self._top.bind('<space>', self.next)
-        self._top.bind('p', self.prev)
-        self._top.bind('<BackSpace>', self.prev)
+        self._top.bind("<Control-q>", self.destroy)
+        self._top.bind("<Control-x>", self.destroy)
+        self._top.bind("<Escape>", self.destroy)
+        self._top.bind("n", self.next)
+        self._top.bind("<space>", self.next)
+        self._top.bind("p", self.prev)
+        self._top.bind("<BackSpace>", self.prev)
  
      def _init_buttons(self, parent):
          # Set up the frames.
          self._buttonframe = buttonframe = Frame(parent)
-        buttonframe.pack(fill='none', side='bottom', padx=3, pady=2)
-        Button(buttonframe, text='Prev',
-               background='#90c0d0', foreground='black',
-               command=self.prev,).pack(side='left')
-        Button(buttonframe, text='Next',
-               background='#90c0d0', foreground='black',
-               command=self.next,).pack(side='left')
+        buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
+        Button(
+            buttonframe,
+            text="Prev",
+            background="#90c0d0",
+            foreground="black",
+            command=self.prev,
+        ).pack(side="left")
+        Button(
+            buttonframe,
+            text="Next",
+            background="#90c0d0",
+            foreground="black",
+            command=self.next,
+        ).pack(side="left")
  
      def _configure(self, event):
          self._autostep = 0
          (x1, y1, x2, y2) = self._cframe.scrollregion()
          y2 = event.height - 6
-        self._canvas['scrollregion'] = '%d %d %d %d' % (x1,y1,x2,y2)
+        self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
          self._redraw()
  
      def _init_canvas(self, parent):
-        self._cframe = CanvasFrame(parent, background='white',
-                                   #width=525, height=250,
-                                   closeenough=10,
-                                   border=2, relief='sunken')
-        self._cframe.pack(expand=1, fill='both', side='top', pady=2)
+        self._cframe = CanvasFrame(
+            parent,
+            background="white",
+            # width=525, height=250,
+            closeenough=10,
+            border=2,
+            relief="sunken",
+        )
+        self._cframe.pack(expand=1, fill="both", side="top", pady=2)
          canvas = self._canvas = self._cframe.canvas()
  
          # Initially, there's no tree or text
@@ -206,41 +240,71 @@ class DrtGlueDemo(object):
          menubar = Menu(parent)
  
          filemenu = Menu(menubar, tearoff=0)
-        filemenu.add_command(label='Exit', underline=1,
-                             command=self.destroy, accelerator='q')
-        menubar.add_cascade(label='File', underline=0, menu=filemenu)
+        filemenu.add_command(
+            label="Exit", underline=1, command=self.destroy, accelerator="q"
+        )
+        menubar.add_cascade(label="File", underline=0, menu=filemenu)
  
          actionmenu = Menu(menubar, tearoff=0)
-        actionmenu.add_command(label='Next', underline=0,
-                               command=self.next, accelerator='n, Space')
-        actionmenu.add_command(label='Previous', underline=0,
-                               command=self.prev, accelerator='p, Backspace')
-        menubar.add_cascade(label='Action', underline=0, menu=actionmenu)
+        actionmenu.add_command(
+            label="Next", underline=0, command=self.next, accelerator="n, Space"
+        )
+        actionmenu.add_command(
+            label="Previous", underline=0, command=self.prev, accelerator="p, Backspace"
+        )
+        menubar.add_cascade(label="Action", underline=0, menu=actionmenu)
  
          optionmenu = Menu(menubar, tearoff=0)
-        optionmenu.add_checkbutton(label='Remove Duplicates', underline=0,
-                                   variable=self._glue.remove_duplicates,
-                                   command=self._toggle_remove_duplicates,
-                                   accelerator='r')
-        menubar.add_cascade(label='Options', underline=0, menu=optionmenu)
+        optionmenu.add_checkbutton(
+            label="Remove Duplicates",
+            underline=0,
+            variable=self._glue.remove_duplicates,
+            command=self._toggle_remove_duplicates,
+            accelerator="r",
+        )
+        menubar.add_cascade(label="Options", underline=0, menu=optionmenu)
  
          viewmenu = Menu(menubar, tearoff=0)
-        viewmenu.add_radiobutton(label='Tiny', variable=self._size,
-                                 underline=0, value=10, command=self.resize)
-        viewmenu.add_radiobutton(label='Small', variable=self._size,
-                                 underline=0, value=12, command=self.resize)
-        viewmenu.add_radiobutton(label='Medium', variable=self._size,
-                                 underline=0, value=14, command=self.resize)
-        viewmenu.add_radiobutton(label='Large', variable=self._size,
-                                 underline=0, value=18, command=self.resize)
-        viewmenu.add_radiobutton(label='Huge', variable=self._size,
-                                 underline=0, value=24, command=self.resize)
-        menubar.add_cascade(label='View', underline=0, menu=viewmenu)
+        viewmenu.add_radiobutton(
+            label="Tiny",
+            variable=self._size,
+            underline=0,
+            value=10,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Small",
+            variable=self._size,
+            underline=0,
+            value=12,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Medium",
+            variable=self._size,
+            underline=0,
+            value=14,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Large",
+            variable=self._size,
+            underline=0,
+            value=18,
+            command=self.resize,
+        )
+        viewmenu.add_radiobutton(
+            label="Huge",
+            variable=self._size,
+            underline=0,
+            value=24,
+            command=self.resize,
+        )
+        menubar.add_cascade(label="View", underline=0, menu=viewmenu)
  
          helpmenu = Menu(menubar, tearoff=0)
-        helpmenu.add_command(label='About', underline=0,
-                             command=self.about)
-        menubar.add_cascade(label='Help', underline=0, menu=helpmenu)
+        helpmenu.add_command(label="About", underline=0, command=self.about)
+        menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
  
          parent.config(menu=menubar)
  
@@ -256,11 +320,11 @@ class DrtGlueDemo(object):
              self._drsWidget.clear()
  
          if self._drs:
-            self._drsWidget = DrsWidget( self._canvas, self._drs )
+            self._drsWidget = DrsWidget(self._canvas, self._drs)
              self._drsWidget.draw()
  
          if self._error:
-            self._drsWidget = DrsWidget( self._canvas, self._error )
+            self._drsWidget = DrsWidget(self._canvas, self._error)
              self._drsWidget.draw()
  
      #########################################
@@ -269,7 +333,8 @@ class DrtGlueDemo(object):
  
      def destroy(self, *e):
          self._autostep = 0
-        if self._top is None: return
+        if self._top is None:
+            return
          self._top.destroy()
          self._top = None
  
@@ -287,23 +352,22 @@ class DrtGlueDemo(object):
                  if index <= 0:
                      self._select_previous_example()
                  else:
-                    self._readingList_store_selection(index-1)
+                    self._readingList_store_selection(index - 1)
  
              else:
-                #select its first reading
-                self._readingList_store_selection(readingListSize-1)
+                # select its first reading
+                self._readingList_store_selection(readingListSize - 1)
  
          else:
              self._select_previous_example()
  
-
      def _select_previous_example(self):
-        #if the current example is not the first example
+        # if the current example is not the first example
          if self._curExample > 0:
-            self._exampleList_store_selection(self._curExample-1)
+            self._exampleList_store_selection(self._curExample - 1)
          else:
-            #go to the last example
-            self._exampleList_store_selection(len(self._examples)-1)
+            # go to the last example
+            self._exampleList_store_selection(len(self._examples) - 1)
  
      def next(self, *e):
          selection = self._readingList.curselection()
@@ -316,33 +380,35 @@ class DrtGlueDemo(object):
                  index = int(selection[0])
  
                  # if it's on (or past) the last item
-                if index >= (readingListSize-1):
+                if index >= (readingListSize - 1):
                      self._select_next_example()
                  else:
-                    self._readingList_store_selection(index+1)
+                    self._readingList_store_selection(index + 1)
  
              else:
-                #select its first reading
+                # select its first reading
                  self._readingList_store_selection(0)
  
          else:
              self._select_next_example()
  
      def _select_next_example(self):
-        #if the current example is not the last example
-        if self._curExample < len(self._examples)-1:
-            self._exampleList_store_selection(self._curExample+1)
+        # if the current example is not the last example
+        if self._curExample < len(self._examples) - 1:
+            self._exampleList_store_selection(self._curExample + 1)
          else:
-            #go to the first example
+            # go to the first example
              self._exampleList_store_selection(0)
  
-
      def about(self, *e):
-        ABOUT = ("NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n"+
-                 "Written by Daniel H. Garrette")
-        TITLE = 'About: NLTK DRT Glue Demo'
+        ABOUT = (
+            "NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n"
+            + "Written by Daniel H. Garrette"
+        )
+        TITLE = "About: NLTK DRT Glue Demo"
          try:
-            from six.moves.tkinter_messagebox import Message
+            from tkinter.messagebox import Message
+
              Message(message=ABOUT, title=TITLE).show()
          except:
              ShowText(self._top, TITLE, ABOUT)
@@ -358,22 +424,24 @@ class DrtGlueDemo(object):
          from a secript); otherwise, the demo will close as soon as
          the script completes.
          """
-        if in_idle(): return
+        if in_idle():
+            return
          self._top.mainloop(*args, **kwargs)
  
      def resize(self, size=None):
-        if size is not None: self._size.set(size)
+        if size is not None:
+            self._size.set(size)
          size = self._size.get()
          self._font.configure(size=-(abs(size)))
          self._boldfont.configure(size=-(abs(size)))
          self._sysfont.configure(size=-(abs(size)))
-        self._bigfont.configure(size=-(abs(size+2)))
+        self._bigfont.configure(size=-(abs(size + 2)))
          self._redraw()
  
      def _toggle_remove_duplicates(self):
          self._glue.remove_duplicates = not self._glue.remove_duplicates
  
-        self._exampleList.selection_clear(0, 'end')
+        self._exampleList.selection_clear(0, "end")
          self._readings = []
          self._populate_readingListbox()
          self._readingCache = [None for ex in self._examples]
@@ -383,17 +451,17 @@ class DrtGlueDemo(object):
          self._drs = None
          self._redraw()
  
-
      def _exampleList_select(self, event):
          selection = self._exampleList.curselection()
-        if len(selection) != 1: return
+        if len(selection) != 1:
+            return
          self._exampleList_store_selection(int(selection[0]))
  
      def _exampleList_store_selection(self, index):
          self._curExample = index
          example = self._examples[index]
  
-        self._exampleList.selection_clear(0, 'end')
+        self._exampleList.selection_clear(0, "end")
          if example:
              cache = self._readingCache[index]
              if cache:
@@ -410,13 +478,15 @@ class DrtGlueDemo(object):
                      self._readingCache[index] = self._readings
                  except Exception as e:
                      self._readings = []
-                    self._error = DrtVariableExpression(Variable('Error: ' + str(e)))
+                    self._error = DrtVariableExpression(Variable("Error: " + str(e)))
                      self._readingCache[index] = self._error
  
-                    #add a star to the end of the example
+                    # add a star to the end of the example
                      self._exampleList.delete(index)
-                    self._exampleList.insert(index, ('  %s *' % example))
-                    self._exampleList.config(height=min(len(self._examples), 25), width=40)
+                    self._exampleList.insert(index, ("  %s *" % example))
+                    self._exampleList.config(
+                        height=min(len(self._examples), 25), width=40
+                    )
  
              self._populate_readingListbox()
  
@@ -425,16 +495,16 @@ class DrtGlueDemo(object):
              self._drs = None
              self._redraw()
  
-
      def _readingList_select(self, event):
          selection = self._readingList.curselection()
-        if len(selection) != 1: return
+        if len(selection) != 1:
+            return
          self._readingList_store_selection(int(selection[0]))
  
      def _readingList_store_selection(self, index):
          reading = self._readings[index]
  
-        self._readingList.selection_clear(0, 'end')
+        self._readingList.selection_clear(0, "end")
          if reading:
              self._readingList.selection_set(index)
  
@@ -447,39 +517,45 @@ class DrsWidget(object):
      def __init__(self, canvas, drs, **attribs):
          self._drs = drs
          self._canvas = canvas
-        canvas.font = Font(font=canvas.itemcget(canvas.create_text(0, 0, text=''), 'font'))
+        canvas.font = Font(
+            font=canvas.itemcget(canvas.create_text(0, 0, text=""), "font")
+        )
          canvas._BUFFER = 3
          self.bbox = (0, 0, 0, 0)
  
      def draw(self):
          (right, bottom) = DrsDrawer(self._drs, canvas=self._canvas).draw()
-        self.bbox = (0, 0, right+1, bottom+1)
+        self.bbox = (0, 0, right + 1, bottom + 1)
  
      def clear(self):
-        self._canvas.create_rectangle(self.bbox, fill="white", width="0" )
+        self._canvas.create_rectangle(self.bbox, fill="white", width="0")
+
  
  def demo():
-    examples = ['John walks',
-                'David sees Mary',
-                'David eats a sandwich',
-                'every man chases a dog',
-#                'every man believes a dog yawns',
-#                'John gives David a sandwich',
-                'John chases himself',
-#                'John persuades David to order a pizza',
-#                'John tries to go',
-#                'John tries to find a unicorn',
-#                'John seems to vanish',
-#                'a unicorn seems to approach',
-#                'every big cat leaves',
-#                'every gray cat leaves',
-#                'every big gray cat leaves',
-#                'a former senator leaves',
-#                'John likes a cat',
-#                'John likes every cat',
-#                'he walks',
-#                'John walks and he leaves'
-                ]
+    examples = [
+        "John walks",
+        "David sees Mary",
+        "David eats a sandwich",
+        "every man chases a dog",
+        #                'every man believes a dog yawns',
+        #                'John gives David a sandwich',
+        "John chases himself",
+        #                'John persuades David to order a pizza',
+        #                'John tries to go',
+        #                'John tries to find a unicorn',
+        #                'John seems to vanish',
+        #                'a unicorn seems to approach',
+        #                'every big cat leaves',
+        #                'every gray cat leaves',
+        #                'every big gray cat leaves',
+        #                'a former senator leaves',
+        #                'John likes a cat',
+        #                'John likes every cat',
+        #                'he walks',
+        #                'John walks and he leaves'
+    ]
      DrtGlueDemo(examples).mainloop()
  
-if __name__ == '__main__': demo()
+
+if __name__ == "__main__":
+    demo()
diff --git a/nlp_resource_data/nltk/sem/drt_glue_demo.pyc b/nlp_resource_data/nltk/sem/drt_glue_demo.pyc

deleted file mode 100755 (executable)

index d26406a..0000000

Binary files a/nlp_resource_data/nltk/sem/drt_glue_demo.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/evaluate.py b/nlp_resource_data/nltk/sem/evaluate.py

old mode 100755 (executable)

new mode 100644 (file)

index f9cdb90..3a1eab0
--- a/nlp_resource_data/nltk/sem/evaluate.py
+++ b/nlp_resource_data/nltk/sem/evaluate.py
@@ -1,19 +1,18 @@
  # Natural Language Toolkit: Models for first-order languages with lambda
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>,
  # URL: <http://nltk.sourceforge.net>
  # For license information, see LICENSE.TXT
  
-#TODO:
-    #- fix tracing
-    #- fix iterator-based approach to existentials
+# TODO:
+# - fix tracing
+# - fix iterator-based approach to existentials
  
  """
  This module provides data structures for representing first-order
  models.
  """
-from __future__ import print_function, unicode_literals
  
  from pprint import pformat
  import inspect
@@ -21,35 +20,45 @@ import textwrap
  import re
  import sys
  
-from six import string_types
+from nltk.decorators import decorator  # this used in code that is commented out
  
-from nltk.decorators import decorator # this used in code that is commented out
-from nltk.compat import python_2_unicode_compatible
+from nltk.sem.logic import (
+    AbstractVariableExpression,
+    AllExpression,
+    Expression,
+    AndExpression,
+    ApplicationExpression,
+    EqualityExpression,
+    ExistsExpression,
+    IffExpression,
+    ImpExpression,
+    IndividualVariableExpression,
+    LambdaExpression,
+    NegatedExpression,
+    OrExpression,
+    Variable,
+    is_indvar,
+)
  
-from nltk.sem.logic import (AbstractVariableExpression, AllExpression, Expression,
-                            AndExpression, ApplicationExpression, EqualityExpression,
-                            ExistsExpression, IffExpression, ImpExpression,
-                            IndividualVariableExpression, LambdaExpression,
-                            NegatedExpression, OrExpression,
-                            Variable, is_indvar)
  
+class Error(Exception):
+    pass
  
-class Error(Exception): pass
  
-class Undefined(Error):  pass
+class Undefined(Error):
+    pass
+
  
  def trace(f, *args, **kw):
-    if sys.version_info[0] >= 3:
-        argspec = inspect.getfullargspec(f)
-    else:
-        argspec = inspect.getargspec(f)
+    argspec = inspect.getfullargspec(f)
      d = dict(zip(argspec[0], args))
-    if d.pop('trace', None):
+    if d.pop("trace", None):
          print()
          for item in d.items():
              print("%s => %s" % item)
      return f(*args, **kw)
  
+
  def is_rel(s):
      """
      Check whether a set represents a relation (of any arity).
@@ -62,11 +71,12 @@ def is_rel(s):
      if len(s) == 0:
          return True
      # all the elements are tuples of the same length
-    elif all(isinstance(el, tuple) for el in s) and len(max(s))==len(min(s)):
+    elif all(isinstance(el, tuple) for el in s) and len(max(s)) == len(min(s)):
          return True
      else:
          raise ValueError("Set %r contains sequences of different lengths" % s)
  
+
  def set2rel(s):
      """
      Convert a set containing individuals (strings or numbers) into a set of
@@ -82,14 +92,15 @@ def set2rel(s):
      """
      new = set()
      for elem in s:
-        if isinstance(elem, string_types):
+        if isinstance(elem, str):
              new.add((elem,))
          elif isinstance(elem, int):
-            new.add((str(elem,)))
+            new.add((str(elem)))
          else:
              new.add(elem)
      return new
  
+
  def arity(rel):
      """
      Check the arity of a relation.
@@ -101,7 +112,6 @@ def arity(rel):
      return len(list(rel)[0])
  
  
-@python_2_unicode_compatible
  class Valuation(dict):
      """
      A dictionary which represents a model-theoretic Valuation of non-logical constants.
@@ -113,19 +123,23 @@ class Valuation(dict):
      just behave like a standard  dictionary) if indexed with an expression that
      is not in its list of symbols.
      """
+
      def __init__(self, xs):
          """
          :param xs: a list of (symbol, value) pairs.
          """
          super(Valuation, self).__init__()
          for (sym, val) in xs:
-            if isinstance(val, string_types) or isinstance(val, bool):
+            if isinstance(val, str) or isinstance(val, bool):
                  self[sym] = val
              elif isinstance(val, set):
                  self[sym] = set2rel(val)
              else:
-                msg = textwrap.fill("Error in initializing Valuation. "
-                                    "Unrecognized value for symbol '%s':\n%s" % (sym, val), width=66)
+                msg = textwrap.fill(
+                    "Error in initializing Valuation. "
+                    "Unrecognized value for symbol '%s':\n%s" % (sym, val),
+                    width=66,
+                )
  
                  raise ValueError(msg)
  
@@ -143,10 +157,12 @@ class Valuation(dict):
          """Set-theoretic domain of the value-space of a Valuation."""
          dom = []
          for val in self.values():
-            if isinstance(val, string_types):
+            if isinstance(val, str):
                  dom.append(val)
              elif not isinstance(val, bool):
-                dom.extend([elem for tuple_ in val for elem in tuple_ if elem is not None])
+                dom.extend(
+                    [elem for tuple_ in val for elem in tuple_ if elem is not None]
+                )
          return set(dom)
  
      @property
@@ -162,11 +178,15 @@ class Valuation(dict):
  ##########################################
  # REs used by the _read_valuation function
  ##########################################
-_VAL_SPLIT_RE = re.compile(r'\s*=+>\s*')
-_ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*')
-_TUPLES_RE = re.compile(r"""\s*
+_VAL_SPLIT_RE = re.compile(r"\s*=+>\s*")
+_ELEMENT_SPLIT_RE = re.compile(r"\s*,\s*")
+_TUPLES_RE = re.compile(
+    r"""\s*
                                  (\([^)]+\))  # tuple-expression
-                                \s*""", re.VERBOSE)
+                                \s*""",
+    re.VERBOSE,
+)
+
  
  def _read_valuation_line(s):
      """
@@ -187,7 +207,7 @@ def _read_valuation_line(s):
      symbol = pieces[0]
      value = pieces[1]
      # check whether the value is meant to be a set
-    if value.startswith('{'):
+    if value.startswith("{"):
          value = value[1:-1]
          tuple_strings = _TUPLES_RE.findall(value)
          # are the set elements tuples?
@@ -202,6 +222,7 @@ def _read_valuation_line(s):
          value = set(set_elements)
      return symbol, value
  
+
  def read_valuation(s, encoding=None):
      """
      Convert a valuation string into a valuation.
@@ -218,15 +239,15 @@ def read_valuation(s, encoding=None):
      statements = []
      for linenum, line in enumerate(s.splitlines()):
          line = line.strip()
-        if line.startswith('#') or line=='': continue
+        if line.startswith("#") or line == "":
+            continue
          try:
              statements.append(_read_valuation_line(line))
          except ValueError:
-            raise ValueError('Unable to parse line %s: %s' % (linenum, line))
+            raise ValueError("Unable to parse line %s: %s" % (linenum, line))
      return Valuation(statements)
  
  
-@python_2_unicode_compatible
  class Assignment(dict):
      """
      A dictionary which represents an assignment of values to variables.
@@ -285,10 +306,13 @@ class Assignment(dict):
          self.domain = domain
          if assign:
              for (var, val) in assign:
-                assert val in self.domain,\
-                       "'%s' is not in the domain: %s" % (val, self.domain)
-                assert is_indvar(var),\
-                       "Wrong format for an Individual Variable: '%s'" % var
+                assert val in self.domain, "'%s' is not in the domain: %s" % (
+                    val,
+                    self.domain,
+                )
+                assert is_indvar(var), (
+                    "Wrong format for an Individual Variable: '%s'" % var
+                )
                  self[var] = val
          self.variant = None
          self._addvariant()
@@ -346,16 +370,13 @@ class Assignment(dict):
          ``self.variant``.
  
          """
-        assert val in self.domain,\
-               "%s is not in the domain %s" % (val, self.domain)
-        assert is_indvar(var),\
-               "Wrong format for an Individual Variable: '%s'" % var
+        assert val in self.domain, "%s is not in the domain %s" % (val, self.domain)
+        assert is_indvar(var), "Wrong format for an Individual Variable: '%s'" % var
          self[var] = val
          self._addvariant()
          return self
  
  
-@python_2_unicode_compatible
  class Model(object):
      """
      A first order model is a domain *D* of discourse and a valuation *V*.
@@ -379,8 +400,10 @@ class Model(object):
          self.domain = domain
          self.valuation = valuation
          if not domain.issuperset(valuation.domain):
-            raise Error("The valuation domain, %s, must be a subset of the model's domain, %s"\
-                  % (valuation.domain, domain))
+            raise Error(
+                "The valuation domain, %s, must be a subset of the model's domain, %s"
+                % (valuation.domain, domain)
+            )
  
      def __repr__(self):
          return "(%r, %r)" % (self.domain, self.valuation)
@@ -402,14 +425,13 @@ class Model(object):
              value = self.satisfy(parsed, g, trace=trace)
              if trace:
                  print()
-                print("'%s' evaluates to %s under M, %s" %  (expr, value, g))
+                print("'%s' evaluates to %s under M, %s" % (expr, value, g))
              return value
          except Undefined:
              if trace:
                  print()
-                print("'%s' is undefined under M, %s" %  (expr, g))
-            return 'Undefined'
-
+                print("'%s' is undefined under M, %s" % (expr, g))
+            return "Undefined"
  
      def satisfy(self, parsed, g, trace=None):
          """
@@ -430,32 +452,27 @@ class Model(object):
          if isinstance(parsed, ApplicationExpression):
              function, arguments = parsed.uncurry()
              if isinstance(function, AbstractVariableExpression):
-                #It's a predicate expression ("P(x,y)"), so used uncurried arguments
+                # It's a predicate expression ("P(x,y)"), so used uncurried arguments
                  funval = self.satisfy(function, g)
                  argvals = tuple(self.satisfy(arg, g) for arg in arguments)
                  return argvals in funval
              else:
-                #It must be a lambda expression, so use curried form
+                # It must be a lambda expression, so use curried form
                  funval = self.satisfy(parsed.function, g)
                  argval = self.satisfy(parsed.argument, g)
                  return funval[argval]
          elif isinstance(parsed, NegatedExpression):
              return not self.satisfy(parsed.term, g)
          elif isinstance(parsed, AndExpression):
-            return self.satisfy(parsed.first, g) and \
-                   self.satisfy(parsed.second, g)
+            return self.satisfy(parsed.first, g) and self.satisfy(parsed.second, g)
          elif isinstance(parsed, OrExpression):
-            return self.satisfy(parsed.first, g) or \
-                   self.satisfy(parsed.second, g)
+            return self.satisfy(parsed.first, g) or self.satisfy(parsed.second, g)
          elif isinstance(parsed, ImpExpression):
-            return (not self.satisfy(parsed.first, g)) or \
-                   self.satisfy(parsed.second, g)
+            return (not self.satisfy(parsed.first, g)) or self.satisfy(parsed.second, g)
          elif isinstance(parsed, IffExpression):
-            return self.satisfy(parsed.first, g) == \
-                   self.satisfy(parsed.second, g)
+            return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
          elif isinstance(parsed, EqualityExpression):
-            return self.satisfy(parsed.first, g) == \
-                   self.satisfy(parsed.second, g)
+            return self.satisfy(parsed.first, g) == self.satisfy(parsed.second, g)
          elif isinstance(parsed, AllExpression):
              new_g = g.copy()
              for u in self.domain:
@@ -484,7 +501,7 @@ class Model(object):
          else:
              return self.i(parsed, g, trace)
  
-    #@decorator(trace_eval)
+    # @decorator(trace_eval)
      def i(self, parsed, g, trace=False):
          """
          An interpretation function.
@@ -524,11 +541,11 @@ class Model(object):
          :return: a set of the entities that satisfy ``parsed``.
          """
  
-        spacer = '   '
+        spacer = "   "
          indent = spacer + (spacer * nesting)
          candidates = []
  
-        if isinstance(varex, string_types):
+        if isinstance(varex, str):
              var = Variable(varex)
          else:
              var = varex
@@ -536,12 +553,15 @@ class Model(object):
          if var in parsed.free():
              if trace:
                  print()
-                print((spacer * nesting) + "Open formula is '%s' with assignment %s" % (parsed, g))
+                print(
+                    (spacer * nesting)
+                    + "Open formula is '%s' with assignment %s" % (parsed, g)
+                )
              for u in self.domain:
                  new_g = g.copy()
                  new_g.add(var.name, u)
                  if trace and trace > 1:
-                    lowtrace = trace-1
+                    lowtrace = trace - 1
                  else:
                      lowtrace = 0
                  value = self.satisfy(parsed, new_g, lowtrace)
@@ -552,13 +572,18 @@ class Model(object):
                  # parsed == False under g[u/var]?
                  if value == False:
                      if trace:
-                        print(indent + "value of '%s' under %s is False" % (parsed, new_g))
+                        print(
+                            indent + "value of '%s' under %s is False" % (parsed, new_g)
+                        )
  
                  # so g[u/var] is a satisfying assignment
                  else:
                      candidates.append(u)
                      if trace:
-                        print(indent + "value of '%s' under %s is %s" % (parsed, new_g, value))
+                        print(
+                            indent
+                            + "value of '%s' under %s is %s" % (parsed, new_g, value)
+                        )
  
              result = set(c for c in candidates)
          # var isn't free in parsed
@@ -568,12 +593,9 @@ class Model(object):
          return result
  
  
-
-
-
-#//////////////////////////////////////////////////////////////////////
+# //////////////////////////////////////////////////////////////////////
  # Demo..
-#//////////////////////////////////////////////////////////////////////
+# //////////////////////////////////////////////////////////////////////
  # number of spacer chars
  mult = 30
  
@@ -583,37 +605,37 @@ def propdemo(trace=None):
      """Example of a propositional model."""
  
      global val1, dom1, m1, g1
-    val1 = Valuation([('P', True), ('Q', True), ('R', False)])
+    val1 = Valuation([("P", True), ("Q", True), ("R", False)])
      dom1 = set([])
      m1 = Model(dom1, val1)
      g1 = Assignment(dom1)
  
      print()
-    print('*' * mult)
+    print("*" * mult)
      print("Propositional Formulas Demo")
-    print('*' * mult)
-    print('(Propositional constants treated as nullary predicates)')
+    print("*" * mult)
+    print("(Propositional constants treated as nullary predicates)")
      print()
      print("Model m1:\n", m1)
-    print('*' * mult)
+    print("*" * mult)
      sentences = [
-    '(P & Q)',
-    '(P & R)',
-    '- P',
-    '- R',
-    '- - P',
-    '- (P & R)',
-    '(P | R)',
-    '(R | P)',
-    '(R | R)',
-    '(- P | R)',
-    '(P | - P)',
-    '(P -> Q)',
-    '(P -> R)',
-    '(R -> P)',
-    '(P <-> P)',
-    '(R <-> R)',
-    '(P <-> R)',
+        "(P & Q)",
+        "(P & R)",
+        "- P",
+        "- R",
+        "- - P",
+        "- (P & R)",
+        "(P | R)",
+        "(R | P)",
+        "(R | R)",
+        "(- P | R)",
+        "(P | - P)",
+        "(P -> Q)",
+        "(P -> R)",
+        "(R -> P)",
+        "(P <-> P)",
+        "(R <-> R)",
+        "(P <-> R)",
      ]
  
      for sent in sentences:
@@ -623,42 +645,57 @@ def propdemo(trace=None):
          else:
              print("The value of '%s' is: %s" % (sent, m1.evaluate(sent, g1)))
  
+
  # Demo 2: FOL Model
  #############
  
+
  def folmodel(quiet=False, trace=None):
      """Example of a first-order model."""
  
      global val2, v2, dom2, m2, g2
  
-    v2 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
-         ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
-         ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+    v2 = [
+        ("adam", "b1"),
+        ("betty", "g1"),
+        ("fido", "d1"),
+        ("girl", set(["g1", "g2"])),
+        ("boy", set(["b1", "b2"])),
+        ("dog", set(["d1"])),
+        ("love", set([("b1", "g1"), ("b2", "g2"), ("g1", "b1"), ("g2", "b1")])),
+    ]
      val2 = Valuation(v2)
      dom2 = val2.domain
      m2 = Model(dom2, val2)
-    g2 = Assignment(dom2, [('x', 'b1'), ('y', 'g2')])
+    g2 = Assignment(dom2, [("x", "b1"), ("y", "g2")])
  
      if not quiet:
          print()
-        print('*' * mult)
+        print("*" * mult)
          print("Models Demo")
          print("*" * mult)
-        print("Model m2:\n", "-" * 14,"\n", m2)
+        print("Model m2:\n", "-" * 14, "\n", m2)
          print("Variable assignment = ", g2)
  
-        exprs = ['adam', 'boy', 'love', 'walks', 'x', 'y', 'z']
+        exprs = ["adam", "boy", "love", "walks", "x", "y", "z"]
          parsed_exprs = [Expression.fromstring(e) for e in exprs]
  
          print()
          for parsed in parsed_exprs:
              try:
-                print("The interpretation of '%s' in m2 is %s" % (parsed, m2.i(parsed, g2)))
+                print(
+                    "The interpretation of '%s' in m2 is %s"
+                    % (parsed, m2.i(parsed, g2))
+                )
              except Undefined:
                  print("The interpretation of '%s' in m2 is Undefined" % parsed)
  
-
-        applications = [('boy', ('adam')), ('walks', ('adam',)), ('love', ('adam', 'y')), ('love', ('y', 'adam'))]
+        applications = [
+            ("boy", ("adam")),
+            ("walks", ("adam",)),
+            ("love", ("adam", "y")),
+            ("love", ("y", "adam")),
+        ]
  
          for (fun, args) in applications:
              try:
@@ -668,9 +705,11 @@ def folmodel(quiet=False, trace=None):
              except Undefined:
                  print("%s(%s) evaluates to Undefined" % (fun, args))
  
+
  # Demo 3: FOL
  #########
  
+
  def foldemo(trace=None):
      """
      Interpretation of closed expressions in a first-order model.
@@ -678,32 +717,31 @@ def foldemo(trace=None):
      folmodel(quiet=True)
  
      print()
-    print('*' * mult)
+    print("*" * mult)
      print("FOL Formulas Demo")
-    print('*' * mult)
+    print("*" * mult)
  
      formulas = [
-    'love (adam, betty)',
-    '(adam = mia)',
-    '\\x. (boy(x) | girl(x))',
-    '\\x. boy(x)(adam)',
-    '\\x y. love(x, y)',
-    '\\x y. love(x, y)(adam)(betty)',
-    '\\x y. love(x, y)(adam, betty)',
-    '\\x y. (boy(x) & love(x, y))',
-    '\\x. exists y. (boy(x) & love(x, y))',
-    'exists z1. boy(z1)',
-    'exists x. (boy(x) &  -(x = adam))',
-    'exists x. (boy(x) & all y. love(y, x))',
-    'all x. (boy(x) | girl(x))',
-    'all x. (girl(x) -> exists y. boy(y) & love(x, y))',    #Every girl loves exists boy.
-    'exists x. (boy(x) & all y. (girl(y) -> love(y, x)))',  #There is exists boy that every girl loves.
-    'exists x. (boy(x) & all y. (girl(y) -> love(x, y)))',  #exists boy loves every girl.
-    'all x. (dog(x) -> - girl(x))',
-    'exists x. exists y. (love(x, y) & love(x, y))'
+        "love (adam, betty)",
+        "(adam = mia)",
+        "\\x. (boy(x) | girl(x))",
+        "\\x. boy(x)(adam)",
+        "\\x y. love(x, y)",
+        "\\x y. love(x, y)(adam)(betty)",
+        "\\x y. love(x, y)(adam, betty)",
+        "\\x y. (boy(x) & love(x, y))",
+        "\\x. exists y. (boy(x) & love(x, y))",
+        "exists z1. boy(z1)",
+        "exists x. (boy(x) &  -(x = adam))",
+        "exists x. (boy(x) & all y. love(y, x))",
+        "all x. (boy(x) | girl(x))",
+        "all x. (girl(x) -> exists y. boy(y) & love(x, y))",  # Every girl loves exists boy.
+        "exists x. (boy(x) & all y. (girl(y) -> love(y, x)))",  # There is exists boy that every girl loves.
+        "exists x. (boy(x) & all y. (girl(y) -> love(x, y)))",  # exists boy loves every girl.
+        "all x. (dog(x) -> - girl(x))",
+        "exists x. exists y. (love(x, y) & love(x, y))",
      ]
  
-
      for fmla in formulas:
          g2.purge()
          if trace:
@@ -715,37 +753,38 @@ def foldemo(trace=None):
  # Demo 3: Satisfaction
  #############
  
+
  def satdemo(trace=None):
      """Satisfiers of an open formula in a first order model."""
  
      print()
-    print('*' * mult)
+    print("*" * mult)
      print("Satisfiers Demo")
-    print('*' * mult)
+    print("*" * mult)
  
      folmodel(quiet=True)
  
      formulas = [
-               'boy(x)',
-               '(x = x)',
-               '(boy(x) | girl(x))',
-               '(boy(x) & girl(x))',
-               'love(adam, x)',
-               'love(x, adam)',
-               '-(x = adam)',
-               'exists z22. love(x, z22)',
-               'exists y. love(y, x)',
-               'all y. (girl(y) -> love(x, y))',
-               'all y. (girl(y) -> love(y, x))',
-               'all y. (girl(y) -> (boy(x) & love(y, x)))',
-               '(boy(x) & all y. (girl(y) -> love(x, y)))',
-               '(boy(x) & all y. (girl(y) -> love(y, x)))',
-               '(boy(x) & exists y. (girl(y) & love(y, x)))',
-               '(girl(x) -> dog(x))',
-               'all y. (dog(y) -> (x = y))',
-               'exists y. love(y, x)',
-               'exists y. (love(adam, y) & love(y, x))'
-                ]
+        "boy(x)",
+        "(x = x)",
+        "(boy(x) | girl(x))",
+        "(boy(x) & girl(x))",
+        "love(adam, x)",
+        "love(x, adam)",
+        "-(x = adam)",
+        "exists z22. love(x, z22)",
+        "exists y. love(y, x)",
+        "all y. (girl(y) -> love(x, y))",
+        "all y. (girl(y) -> love(y, x))",
+        "all y. (girl(y) -> (boy(x) & love(y, x)))",
+        "(boy(x) & all y. (girl(y) -> love(x, y)))",
+        "(boy(x) & all y. (girl(y) -> love(y, x)))",
+        "(boy(x) & exists y. (girl(y) & love(y, x)))",
+        "(girl(x) -> dog(x))",
+        "all y. (dog(y) -> (x = y))",
+        "exists y. love(y, x)",
+        "exists y. (love(adam, y) & love(y, x))",
+    ]
  
      if trace:
          print(m2)
@@ -758,7 +797,7 @@ def satdemo(trace=None):
  
      for p in parsed:
          g2.purge()
-        print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, 'x', g2, trace)))
+        print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, "x", g2, trace)))
  
  
  def demo(num=0, trace=None):
@@ -773,11 +812,7 @@ def demo(num=0, trace=None):
  
      :param trace: trace = 1, or trace = 2 for more verbose tracing
      """
-    demos = {
-        1: propdemo,
-        2: folmodel,
-        3: foldemo,
-        4: satdemo}
+    demos = {1: propdemo, 2: folmodel, 3: foldemo, 4: satdemo}
  
      try:
          demos[num](trace=trace)
diff --git a/nlp_resource_data/nltk/sem/evaluate.pyc b/nlp_resource_data/nltk/sem/evaluate.pyc

deleted file mode 100755 (executable)

index d580411..0000000

Binary files a/nlp_resource_data/nltk/sem/evaluate.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/glue.py b/nlp_resource_data/nltk/sem/glue.py

old mode 100755 (executable)

new mode 100644 (file)

index 765ff3f..684c90c
--- a/nlp_resource_data/nltk/sem/glue.py
+++ b/nlp_resource_data/nltk/sem/glue.py
@@ -2,53 +2,62 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, division, unicode_literals
  
  import os
  from itertools import chain
  
-from six import string_types
-
  import nltk
  from nltk.internals import Counter
  from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger, RegexpTagger
-from nltk.sem.logic import (Expression, Variable, VariableExpression,
-                            LambdaExpression, AbstractVariableExpression)
-from nltk.compat import python_2_unicode_compatible
+from nltk.sem.logic import (
+    Expression,
+    Variable,
+    VariableExpression,
+    LambdaExpression,
+    AbstractVariableExpression,
+)
  from nltk.sem import drt
  from nltk.sem import linearlogic
  
-SPEC_SEMTYPES = {'a'       : 'ex_quant',
-                 'an'      : 'ex_quant',
-                 'every'   : 'univ_quant',
-                 'the'     : 'def_art',
-                 'no'      : 'no_quant',
-                 'default' : 'ex_quant'}
+SPEC_SEMTYPES = {
+    "a": "ex_quant",
+    "an": "ex_quant",
+    "every": "univ_quant",
+    "the": "def_art",
+    "no": "no_quant",
+    "default": "ex_quant",
+}
+
+OPTIONAL_RELATIONSHIPS = ["nmod", "vmod", "punct"]
  
-OPTIONAL_RELATIONSHIPS = ['nmod', 'vmod', 'punct']
  
-@python_2_unicode_compatible
  class GlueFormula(object):
      def __init__(self, meaning, glue, indices=None):
          if not indices:
              indices = set()
  
-        if isinstance(meaning, string_types):
+        if isinstance(meaning, str):
              self.meaning = Expression.fromstring(meaning)
          elif isinstance(meaning, Expression):
              self.meaning = meaning
          else:
-            raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))
+            raise RuntimeError(
+                "Meaning term neither string or expression: %s, %s"
+                % (meaning, meaning.__class__)
+            )
  
-        if isinstance(glue, string_types):
+        if isinstance(glue, str):
              self.glue = linearlogic.LinearLogicParser().parse(glue)
          elif isinstance(glue, linearlogic.Expression):
              self.glue = glue
          else:
-            raise RuntimeError('Glue term neither string or expression: %s, %s' % (glue, glue.__class__))
+            raise RuntimeError(
+                "Glue term neither string or expression: %s, %s"
+                % (glue, glue.__class__)
+            )
  
          self.indices = indices
  
@@ -57,21 +66,30 @@ class GlueFormula(object):
              arg  = (john        ,  subj)
              returns ((walk john),          f)
          """
-        if self.indices & arg.indices: # if the sets are NOT disjoint
-            raise linearlogic.LinearLogicApplicationException("'%s' applied to '%s'.  Indices are not disjoint." % (self, arg))
-        else: # if the sets ARE disjoint
-            return_indices = (self.indices | arg.indices)
+        if self.indices & arg.indices:  # if the sets are NOT disjoint
+            raise linearlogic.LinearLogicApplicationException(
+                "'%s' applied to '%s'.  Indices are not disjoint." % (self, arg)
+            )
+        else:  # if the sets ARE disjoint
+            return_indices = self.indices | arg.indices
  
          try:
-            return_glue = linearlogic.ApplicationExpression(self.glue, arg.glue, arg.indices)
+            return_glue = linearlogic.ApplicationExpression(
+                self.glue, arg.glue, arg.indices
+            )
          except linearlogic.LinearLogicApplicationException:
-            raise linearlogic.LinearLogicApplicationException("'%s' applied to '%s'" % (self.simplify(), arg.simplify()))
+            raise linearlogic.LinearLogicApplicationException(
+                "'%s' applied to '%s'" % (self.simplify(), arg.simplify())
+            )
  
          arg_meaning_abstracted = arg.meaning
          if return_indices:
-            for dep in self.glue.simplify().antecedent.dependencies[::-1]: # if self.glue is (A -o B), dep is in A.dependencies
-                arg_meaning_abstracted = self.make_LambdaExpression(Variable('v%s' % dep),
-                                                                    arg_meaning_abstracted)
+            for dep in self.glue.simplify().antecedent.dependencies[
+                ::-1
+            ]:  # if self.glue is (A -o B), dep is in A.dependencies
+                arg_meaning_abstracted = self.make_LambdaExpression(
+                    Variable("v%s" % dep), arg_meaning_abstracted
+                )
          return_meaning = self.meaning.applyto(arg_meaning_abstracted)
  
          return self.__class__(return_meaning, return_glue, return_indices)
@@ -85,22 +103,33 @@ class GlueFormula(object):
      def lambda_abstract(self, other):
          assert isinstance(other, GlueFormula)
          assert isinstance(other.meaning, AbstractVariableExpression)
-        return self.__class__(self.make_LambdaExpression(other.meaning.variable,
-                                                         self.meaning),
-                              linearlogic.ImpExpression(other.glue, self.glue))
+        return self.__class__(
+            self.make_LambdaExpression(other.meaning.variable, self.meaning),
+            linearlogic.ImpExpression(other.glue, self.glue),
+        )
  
      def compile(self, counter=None):
          """From Iddo Lev's PhD Dissertation p108-109"""
          if not counter:
              counter = Counter()
-        (compiled_glue, new_forms) = self.glue.simplify().compile_pos(counter, self.__class__)
-        return new_forms + [self.__class__(self.meaning, compiled_glue, set([counter.get()]))]
+        (compiled_glue, new_forms) = self.glue.simplify().compile_pos(
+            counter, self.__class__
+        )
+        return new_forms + [
+            self.__class__(self.meaning, compiled_glue, set([counter.get()]))
+        ]
  
      def simplify(self):
-        return self.__class__(self.meaning.simplify(), self.glue.simplify(), self.indices)
+        return self.__class__(
+            self.meaning.simplify(), self.glue.simplify(), self.indices
+        )
  
      def __eq__(self, other):
-        return self.__class__ == other.__class__ and self.meaning == other.meaning and self.glue == other.glue
+        return (
+            self.__class__ == other.__class__
+            and self.meaning == other.meaning
+            and self.glue == other.glue
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -111,15 +140,15 @@ class GlueFormula(object):
  
      def __str__(self):
          assert isinstance(self.indices, set)
-        accum = '%s : %s' % (self.meaning, self.glue)
+        accum = "%s : %s" % (self.meaning, self.glue)
          if self.indices:
-            accum += ' : {' + ', '.join(str(index) for index in self.indices) + '}'
+            accum += " : {" + ", ".join(str(index) for index in self.indices) + "}"
          return accum
  
      def __repr__(self):
          return "%s" % self
  
-@python_2_unicode_compatible
+
  class GlueDict(dict):
      def __init__(self, filename, encoding=None):
          self.filename = filename
@@ -131,22 +160,30 @@ class GlueDict(dict):
              self.clear()
  
          try:
-            contents = nltk.data.load(self.filename, format='text', encoding=self.file_encoding)
+            contents = nltk.data.load(
+                self.filename, format="text", encoding=self.file_encoding
+            )
              # TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load()
          except LookupError as e:
              try:
-                contents = nltk.data.load('file:' + self.filename, format='text', encoding=self.file_encoding)
+                contents = nltk.data.load(
+                    "file:" + self.filename, format="text", encoding=self.file_encoding
+                )
              except LookupError:
                  raise e
          lines = contents.splitlines()
  
-        for line in lines:                          # example: 'n : (\\x.(<word> x), (v-or))'
-                                                    #     lambdacalc -^  linear logic -^
-            line = line.strip()                     # remove trailing newline
-            if not len(line): continue              # skip empty lines
-            if line[0] == '#': continue             # skip commented out lines
+        for line in lines:  # example: 'n : (\\x.(<word> x), (v-or))'
+            #     lambdacalc -^  linear logic -^
+            line = line.strip()  # remove trailing newline
+            if not len(line):
+                continue  # skip empty lines
+            if line[0] == "#":
+                continue  # skip commented out lines
  
-            parts = line.split(' : ', 2)            # ['verb', '(\\x.(<word> x), ( subj -o f ))', '[subj]']
+            parts = line.split(
+                " : ", 2
+            )  # ['verb', '(\\x.(<word> x), ( subj -o f ))', '[subj]']
  
              glue_formulas = []
              paren_count = 0
@@ -157,37 +194,49 @@ class GlueDict(dict):
  
              if len(parts) > 1:
                  for (i, c) in enumerate(parts[1]):
-                    if c == '(':
-                        if paren_count == 0:             # if it's the first '(' of a tuple
-                            tuple_start = i+1           # then save the index
+                    if c == "(":
+                        if paren_count == 0:  # if it's the first '(' of a tuple
+                            tuple_start = i + 1  # then save the index
                          paren_count += 1
-                    elif c == ')':
+                    elif c == ")":
                          paren_count -= 1
-                        if paren_count == 0:             # if it's the last ')' of a tuple
-                            meaning_term =  parts[1][tuple_start:tuple_comma]   # '\\x.(<word> x)'
-                            glue_term =     parts[1][tuple_comma+1:i]           # '(v-r)'
-                            glue_formulas.append([meaning_term, glue_term])     # add the GlueFormula to the list
-                    elif c == ',':
-                        if paren_count == 1:             # if it's a comma separating the parts of the tuple
-                            tuple_comma = i             # then save the index
-                    elif c == '#':                      # skip comments at the ends of lines
-                        if paren_count != 0:             # if the line hasn't parsed correctly so far
-                            raise RuntimeError('Formula syntax is incorrect for entry ' + line)
-                        break                           # break to the next line
-
-            if len(parts) > 2:                      #if there is a relationship entry at the end
-                rel_start = parts[2].index('[')+1
-                rel_end   = parts[2].index(']')
+                        if paren_count == 0:  # if it's the last ')' of a tuple
+                            meaning_term = parts[1][
+                                tuple_start:tuple_comma
+                            ]  # '\\x.(<word> x)'
+                            glue_term = parts[1][tuple_comma + 1 : i]  # '(v-r)'
+                            glue_formulas.append(
+                                [meaning_term, glue_term]
+                            )  # add the GlueFormula to the list
+                    elif c == ",":
+                        if (
+                            paren_count == 1
+                        ):  # if it's a comma separating the parts of the tuple
+                            tuple_comma = i  # then save the index
+                    elif c == "#":  # skip comments at the ends of lines
+                        if (
+                            paren_count != 0
+                        ):  # if the line hasn't parsed correctly so far
+                            raise RuntimeError(
+                                "Formula syntax is incorrect for entry " + line
+                            )
+                        break  # break to the next line
+
+            if len(parts) > 2:  # if there is a relationship entry at the end
+                rel_start = parts[2].index("[") + 1
+                rel_end = parts[2].index("]")
                  if rel_start == rel_end:
                      relationships = frozenset()
                  else:
-                    relationships = frozenset(r.strip() for r in parts[2][rel_start:rel_end].split(','))
+                    relationships = frozenset(
+                        r.strip() for r in parts[2][rel_start:rel_end].split(",")
+                    )
  
              try:
-                start_inheritance = parts[0].index('(')
-                end_inheritance = parts[0].index(')')
+                start_inheritance = parts[0].index("(")
+                end_inheritance = parts[0].index(")")
                  sem = parts[0][:start_inheritance].strip()
-                supertype = parts[0][start_inheritance+1:end_inheritance]
+                supertype = parts[0][start_inheritance + 1 : end_inheritance]
              except:
                  sem = parts[0].strip()
                  supertype = None
@@ -195,41 +244,49 @@ class GlueDict(dict):
              if sem not in self:
                  self[sem] = {}
  
-            if relationships is None: #if not specified for a specific relationship set
-                #add all relationship entries for parents
+            if (
+                relationships is None
+            ):  # if not specified for a specific relationship set
+                # add all relationship entries for parents
                  if supertype:
                      for rels in self[supertype]:
                          if rels not in self[sem]:
                              self[sem][rels] = []
                          glue = self[supertype][rels]
                          self[sem][rels].extend(glue)
-                        self[sem][rels].extend(glue_formulas) # add the glue formulas to every rel entry
+                        self[sem][rels].extend(
+                            glue_formulas
+                        )  # add the glue formulas to every rel entry
                  else:
                      if None not in self[sem]:
                          self[sem][None] = []
-                    self[sem][None].extend(glue_formulas) # add the glue formulas to every rel entry
+                    self[sem][None].extend(
+                        glue_formulas
+                    )  # add the glue formulas to every rel entry
              else:
                  if relationships not in self[sem]:
                      self[sem][relationships] = []
                  if supertype:
                      self[sem][relationships].extend(self[supertype][relationships])
-                self[sem][relationships].extend(glue_formulas) # add the glue entry to the dictionary
+                self[sem][relationships].extend(
+                    glue_formulas
+                )  # add the glue entry to the dictionary
  
      def __str__(self):
-        accum = ''
+        accum = ""
          for pos in self:
              str_pos = "%s" % pos
              for relset in self[pos]:
                  i = 1
                  for gf in self[pos][relset]:
                      if i == 1:
-                        accum += str_pos + ': '
+                        accum += str_pos + ": "
                      else:
-                        accum += ' '*(len(str_pos)+2)
+                        accum += " " * (len(str_pos) + 2)
                      accum += "%s" % gf
                      if relset and i == len(self[pos][relset]):
-                        accum += ' : %s' % relset
-                    accum += '\n'
+                        accum += " : %s" % relset
+                    accum += "\n"
                      i += 1
          return accum
  
@@ -237,15 +294,17 @@ class GlueDict(dict):
          if node is None:
              # TODO: should it be depgraph.root? Is this code tested?
              top = depgraph.nodes[0]
-            depList = list(chain(*top['deps'].values()))
+            depList = list(chain(*top["deps"].values()))
              root = depgraph.nodes[depList[0]]
  
              return self.to_glueformula_list(depgraph, root, Counter(), verbose)
  
          glueformulas = self.lookup(node, depgraph, counter)
-        for dep_idx in chain(*node['deps'].values()):
+        for dep_idx in chain(*node["deps"].values()):
              dep = depgraph.nodes[dep_idx]
-            glueformulas.extend(self.to_glueformula_list(depgraph, dep, counter, verbose))
+            glueformulas.extend(
+                self.to_glueformula_list(depgraph, dep, counter, verbose)
+            )
          return glueformulas
  
      def lookup(self, node, depgraph, counter):
@@ -267,28 +326,29 @@ class GlueDict(dict):
          if not len(lookup):
              raise KeyError(
                  "There is no GlueDict entry for sem type of '%s' "
-                "with tag '%s', and rel '%s'" %
-                (node['word'], node['tag'], node['rel'])
-                )
+                "with tag '%s', and rel '%s'" % (node["word"], node["tag"], node["rel"])
+            )
  
-        return self.get_glueformulas_from_semtype_entry(lookup, node['word'], node, depgraph, counter)
+        return self.get_glueformulas_from_semtype_entry(
+            lookup, node["word"], node, depgraph, counter
+        )
  
      def add_missing_dependencies(self, node, depgraph):
-        rel = node['rel'].lower()
+        rel = node["rel"].lower()
  
-        if rel == 'main':
-            headnode = depgraph.nodes[node['head']]
-            subj = self.lookup_unique('subj', headnode, depgraph)
-            relation = subj['rel']
-            node['deps'].setdefault(relation,[])
-            node['deps'][relation].append(subj['address'])
-            #node['deps'].append(subj['address'])
+        if rel == "main":
+            headnode = depgraph.nodes[node["head"]]
+            subj = self.lookup_unique("subj", headnode, depgraph)
+            relation = subj["rel"]
+            node["deps"].setdefault(relation, [])
+            node["deps"][relation].append(subj["address"])
+            # node['deps'].append(subj['address'])
  
      def _lookup_semtype_option(self, semtype, node, depgraph):
          relationships = frozenset(
-            depgraph.nodes[dep]['rel'].lower()
-            for dep in chain(*node['deps'].values())
-            if depgraph.nodes[dep]['rel'].lower() not in OPTIONAL_RELATIONSHIPS
+            depgraph.nodes[dep]["rel"].lower()
+            for dep in chain(*node["deps"].values())
+            if depgraph.nodes[dep]["rel"].lower() not in OPTIONAL_RELATIONSHIPS
          )
  
          try:
@@ -299,9 +359,11 @@ class GlueDict(dict):
              # most relations of any possible relationship set that is a subset
              # of the actual depgraph
              best_match = frozenset()
-            for relset_option in set(semtype)-set([None]):
-                if len(relset_option) > len(best_match) and \
-                   relset_option < relationships:
+            for relset_option in set(semtype) - set([None]):
+                if (
+                    len(relset_option) > len(best_match)
+                    and relset_option < relationships
+                ):
                      best_match = relset_option
              if not best_match:
                  if None in semtype:
@@ -317,20 +379,22 @@ class GlueDict(dict):
          Based on the node, return a list of plausible semtypes in order of
          plausibility.
          """
-        rel = node['rel'].lower()
-        word = node['word'].lower()
+        rel = node["rel"].lower()
+        word = node["word"].lower()
  
-        if rel == 'spec':
+        if rel == "spec":
              if word in SPEC_SEMTYPES:
                  return [SPEC_SEMTYPES[word]]
              else:
-                return [SPEC_SEMTYPES['default']]
-        elif rel in ['nmod', 'vmod']:
-            return [node['tag'], rel]
+                return [SPEC_SEMTYPES["default"]]
+        elif rel in ["nmod", "vmod"]:
+            return [node["tag"], rel]
          else:
-            return [node['tag']]
+            return [node["tag"]]
  
-    def get_glueformulas_from_semtype_entry(self, lookup, word, node, depgraph, counter):
+    def get_glueformulas_from_semtype_entry(
+        self, lookup, word, node, depgraph, counter
+    ):
          glueformulas = []
  
          glueFormulaFactory = self.get_GlueFormula_factory()
@@ -339,7 +403,7 @@ class GlueDict(dict):
              if not len(glueformulas):
                  gf.word = word
              else:
-                gf.word = '%s%s' % (word, len(glueformulas)+1)
+                gf.word = "%s%s" % (word, len(glueformulas) + 1)
  
              gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get())
  
@@ -352,8 +416,8 @@ class GlueDict(dict):
          parameter "<word>"
          :param word: The actual word to be replace "<word>"
          """
-        word = word.replace('.', '')
-        return generic.replace('<word>', word)
+        word = word.replace(".", "")
+        return generic.replace("<word>", word)
  
      def initialize_labels(self, expr, node, depgraph, unique_index):
          if isinstance(expr, linearlogic.AtomicExpression):
@@ -365,35 +429,42 @@ class GlueDict(dict):
          else:
              return linearlogic.ImpExpression(
                  self.initialize_labels(expr.antecedent, node, depgraph, unique_index),
-                self.initialize_labels(expr.consequent, node, depgraph, unique_index)
+                self.initialize_labels(expr.consequent, node, depgraph, unique_index),
              )
  
      def find_label_name(self, name, node, depgraph, unique_index):
          try:
-            dot = name.index('.')
+            dot = name.index(".")
  
              before_dot = name[:dot]
-            after_dot = name[dot+1:]
-            if before_dot == 'super':
-                return self.find_label_name(after_dot, depgraph.nodes[node['head']], depgraph, unique_index)
+            after_dot = name[dot + 1 :]
+            if before_dot == "super":
+                return self.find_label_name(
+                    after_dot, depgraph.nodes[node["head"]], depgraph, unique_index
+                )
              else:
-                return self.find_label_name(after_dot, self.lookup_unique(before_dot, node, depgraph), depgraph, unique_index)
+                return self.find_label_name(
+                    after_dot,
+                    self.lookup_unique(before_dot, node, depgraph),
+                    depgraph,
+                    unique_index,
+                )
          except ValueError:
              lbl = self.get_label(node)
-            if name == 'f':
+            if name == "f":
                  return lbl
-            elif name == 'v':
-                return '%sv' % lbl
-            elif name == 'r':
-                return '%sr' % lbl
-            elif name == 'super':
-                return self.get_label(depgraph.nodes[node['head']])
-            elif name == 'var':
-                return '%s%s' % (lbl.upper(), unique_index)
-            elif name == 'a':
-                return self.get_label(self.lookup_unique('conja', node, depgraph))
-            elif name == 'b':
-                return self.get_label(self.lookup_unique('conjb', node, depgraph))
+            elif name == "v":
+                return "%sv" % lbl
+            elif name == "r":
+                return "%sr" % lbl
+            elif name == "super":
+                return self.get_label(depgraph.nodes[node["head"]])
+            elif name == "var":
+                return "%s%s" % (lbl.upper(), unique_index)
+            elif name == "a":
+                return self.get_label(self.lookup_unique("conja", node, depgraph))
+            elif name == "b":
+                return self.get_label(self.lookup_unique("conjb", node, depgraph))
              else:
                  return self.get_label(self.lookup_unique(name, node, depgraph))
  
@@ -404,10 +475,36 @@ class GlueDict(dict):
          :param value: where to index into the list of characters
          :type value: int
          """
-        value = node['address']
-
-        letter = ['f','g','h','i','j','k','l','m','n','o','p','q','r','s',
-                  't','u','v','w','x','y','z','a','b','c','d','e'][value-1]
+        value = node["address"]
+
+        letter = [
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z",
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+        ][value - 1]
          num = int(value) // 26
          if num > 0:
              return letter + str(num)
@@ -420,14 +517,16 @@ class GlueDict(dict):
          """
          deps = [
              depgraph.nodes[dep]
-            for dep in chain(*node['deps'].values())
-            if depgraph.nodes[dep]['rel'].lower() == rel.lower()
+            for dep in chain(*node["deps"].values())
+            if depgraph.nodes[dep]["rel"].lower() == rel.lower()
          ]
  
          if len(deps) == 0:
-            raise KeyError("'%s' doesn't contain a feature '%s'" % (node['word'], rel))
+            raise KeyError("'%s' doesn't contain a feature '%s'" % (node["word"], rel))
          elif len(deps) > 1:
-            raise KeyError("'%s' should only have one feature '%s'" % (node['word'], rel))
+            raise KeyError(
+                "'%s' should only have one feature '%s'" % (node["word"], rel)
+            )
          else:
              return deps[0]
  
@@ -436,27 +535,33 @@ class GlueDict(dict):
  
  
  class Glue(object):
-    def __init__(self, semtype_file=None, remove_duplicates=False,
-                 depparser=None, verbose=False):
+    def __init__(
+        self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
+    ):
          self.verbose = verbose
          self.remove_duplicates = remove_duplicates
          self.depparser = depparser
  
          from nltk import Prover9
+
          self.prover = Prover9()
  
          if semtype_file:
              self.semtype_file = semtype_file
          else:
-            self.semtype_file = os.path.join('grammars', 'sample_grammars','glue.semtype')
+            self.semtype_file = os.path.join(
+                "grammars", "sample_grammars", "glue.semtype"
+            )
  
      def train_depparser(self, depgraphs=None):
          if depgraphs:
              self.depparser.train(depgraphs)
          else:
-            self.depparser.train_from_file(nltk.data.find(
-                os.path.join('grammars', 'sample_grammars',
-                             'glue_train.conll')))
+            self.depparser.train_from_file(
+                nltk.data.find(
+                    os.path.join("grammars", "sample_grammars", "glue_train.conll")
+                )
+            )
  
      def parse_to_meaning(self, sentence):
          readings = []
@@ -469,10 +574,12 @@ class Glue(object):
          agenda_length = len(agenda)
          atomics = dict()
          nonatomics = dict()
-        while agenda: # is not empty
+        while agenda:  # is not empty
              cur = agenda.pop()
              glue_simp = cur.glue.simplify()
-            if isinstance(glue_simp, linearlogic.ImpExpression): # if cur.glue is non-atomic
+            if isinstance(
+                glue_simp, linearlogic.ImpExpression
+            ):  # if cur.glue is non-atomic
                  for key in atomics:
                      try:
                          if isinstance(cur.glue, linearlogic.ApplicationExpression):
@@ -481,7 +588,9 @@ class Glue(object):
                              bindings = linearlogic.BindingDict()
                          glue_simp.antecedent.unify(key, bindings)
                          for atomic in atomics[key]:
-                            if not (cur.indices & atomic.indices): # if the sets of indices are disjoint
+                            if not (
+                                cur.indices & atomic.indices
+                            ):  # if the sets of indices are disjoint
                                  try:
                                      agenda.append(cur.applyto(atomic))
                                  except linearlogic.LinearLogicApplicationException:
@@ -493,16 +602,20 @@ class Glue(object):
                  except KeyError:
                      nonatomics[glue_simp.antecedent] = [cur]
  
-            else: # else cur.glue is atomic
+            else:  # else cur.glue is atomic
                  for key in nonatomics:
                      for nonatomic in nonatomics[key]:
                          try:
-                            if isinstance(nonatomic.glue, linearlogic.ApplicationExpression):
+                            if isinstance(
+                                nonatomic.glue, linearlogic.ApplicationExpression
+                            ):
                                  bindings = nonatomic.glue.bindings
                              else:
                                  bindings = linearlogic.BindingDict()
                              glue_simp.unify(key, bindings)
-                            if not (cur.indices & nonatomic.indices): # if the sets of indices are disjoint
+                            if not (
+                                cur.indices & nonatomic.indices
+                            ):  # if the sets of indices are disjoint
                                  try:
                                      agenda.append(nonatomic.applyto(cur))
                                  except linearlogic.LinearLogicApplicationException:
@@ -533,11 +646,11 @@ class Glue(object):
                          add_reading = False
                          break
                  except Exception as e:
-                    #if there is an exception, the syntax of the formula
-                    #may not be understandable by the prover, so don't
-                    #throw out the reading.
-                    print('Error when checking logical equality of statements', e)
-                    pass
+                    # if there is an exception, the syntax of the formula
+                    # may not be understandable by the prover, so don't
+                    # throw out the reading.
+                    print("Error when checking logical equality of statements", e)
+
          if add_reading:
              reading_list.append(glueformula.meaning)
  
@@ -554,9 +667,10 @@ class Glue(object):
          :rtype: DependencyGraph
          """
  
-        #Lazy-initialize the depparser
+        # Lazy-initialize the depparser
          if self.depparser is None:
              from nltk.parse import MaltParser
+
              self.depparser = MaltParser(tagger=self.get_pos_tagger())
          if not self.depparser._trained:
              self.train_depparser()
@@ -575,7 +689,7 @@ class Glue(object):
              return_list.extend(gf.compile(index_counter))
  
          if self.verbose:
-            print('Compiled Glue Premises:')
+            print("Compiled Glue Premises:")
              for cgf in return_list:
                  print(cgf)
  
@@ -583,27 +697,30 @@ class Glue(object):
  
      def get_pos_tagger(self):
          from nltk.corpus import brown
+
          regexp_tagger = RegexpTagger(
-            [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
-             (r'(The|the|A|a|An|an)$', 'AT'),   # articles
-             (r'.*able$', 'JJ'),                # adjectives
-             (r'.*ness$', 'NN'),                # nouns formed from adjectives
-             (r'.*ly$', 'RB'),                  # adverbs
-             (r'.*s$', 'NNS'),                  # plural nouns
-             (r'.*ing$', 'VBG'),                # gerunds
-             (r'.*ed$', 'VBD'),                 # past tense verbs
-             (r'.*', 'NN')                      # nouns (default)
-        ])
-        brown_train = brown.tagged_sents(categories='news')
+            [
+                (r"^-?[0-9]+(.[0-9]+)?$", "CD"),  # cardinal numbers
+                (r"(The|the|A|a|An|an)$", "AT"),  # articles
+                (r".*able$", "JJ"),  # adjectives
+                (r".*ness$", "NN"),  # nouns formed from adjectives
+                (r".*ly$", "RB"),  # adverbs
+                (r".*s$", "NNS"),  # plural nouns
+                (r".*ing$", "VBG"),  # gerunds
+                (r".*ed$", "VBD"),  # past tense verbs
+                (r".*", "NN"),  # nouns (default)
+            ]
+        )
+        brown_train = brown.tagged_sents(categories="news")
          unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
          bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
          trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)
  
-        #Override particular words
+        # Override particular words
          main_tagger = RegexpTagger(
-            [(r'(A|a|An|an)$', 'ex_quant'),
-             (r'(Every|every|All|all)$', 'univ_quant')
-        ], backoff=trigram_tagger)
+            [(r"(A|a|An|an)$", "ex_quant"), (r"(Every|every|All|all)$", "univ_quant")],
+            backoff=trigram_tagger,
+        )
  
          return main_tagger
  
@@ -613,19 +730,25 @@ class DrtGlueFormula(GlueFormula):
          if not indices:
              indices = set()
  
-        if isinstance(meaning, string_types):
+        if isinstance(meaning, str):
              self.meaning = drt.DrtExpression.fromstring(meaning)
          elif isinstance(meaning, drt.DrtExpression):
              self.meaning = meaning
          else:
-            raise RuntimeError('Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__))
+            raise RuntimeError(
+                "Meaning term neither string or expression: %s, %s"
+                % (meaning, meaning.__class__)
+            )
  
-        if isinstance(glue, string_types):
+        if isinstance(glue, str):
              self.glue = linearlogic.LinearLogicParser().parse(glue)
          elif isinstance(glue, linearlogic.Expression):
              self.glue = glue
          else:
-            raise RuntimeError('Glue term neither string or expression: %s, %s' % (glue, glue.__class__))
+            raise RuntimeError(
+                "Glue term neither string or expression: %s, %s"
+                % (glue, glue.__class__)
+            )
  
          self.indices = indices
  
@@ -635,15 +758,20 @@ class DrtGlueFormula(GlueFormula):
      def make_LambdaExpression(self, variable, term):
          return drt.DrtLambdaExpression(variable, term)
  
+
  class DrtGlueDict(GlueDict):
      def get_GlueFormula_factory(self):
          return DrtGlueFormula
  
+
  class DrtGlue(Glue):
-    def __init__(self, semtype_file=None, remove_duplicates=False,
-                 depparser=None, verbose=False):
+    def __init__(
+        self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
+    ):
          if not semtype_file:
-            semtype_file = os.path.join('grammars', 'sample_grammars','drt_glue.semtype')
+            semtype_file = os.path.join(
+                "grammars", "sample_grammars", "drt_glue.semtype"
+            )
          Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose)
  
      def get_glue_dict(self):
@@ -652,45 +780,53 @@ class DrtGlue(Glue):
  
  def demo(show_example=-1):
      from nltk.parse import MaltParser
-    examples = ['David sees Mary',
-                'David eats a sandwich',
-                'every man chases a dog',
-                'every man believes a dog sleeps',
-                'John gives David a sandwich',
-                'John chases himself']
-#                'John persuades David to order a pizza',
-#                'John tries to go',
-#                'John tries to find a unicorn',
-#                'John seems to vanish',
-#                'a unicorn seems to approach',
-#                'every big cat leaves',
-#                'every gray cat leaves',
-#                'every big gray cat leaves',
-#                'a former senator leaves',
-
-    print('============== DEMO ==============')
+
+    examples = [
+        "David sees Mary",
+        "David eats a sandwich",
+        "every man chases a dog",
+        "every man believes a dog sleeps",
+        "John gives David a sandwich",
+        "John chases himself",
+    ]
+    #                'John persuades David to order a pizza',
+    #                'John tries to go',
+    #                'John tries to find a unicorn',
+    #                'John seems to vanish',
+    #                'a unicorn seems to approach',
+    #                'every big cat leaves',
+    #                'every gray cat leaves',
+    #                'every big gray cat leaves',
+    #                'a former senator leaves',
+
+    print("============== DEMO ==============")
  
      tagger = RegexpTagger(
-        [('^(David|Mary|John)$', 'NNP'),
-         ('^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', 'VB'),
-         ('^(go|order|vanish|find|approach)$', 'VB'),
-         ('^(a)$', 'ex_quant'),
-         ('^(every)$', 'univ_quant'),
-         ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
-         ('^(big|gray|former)$', 'JJ'),
-         ('^(him|himself)$', 'PRP')
-    ])
+        [
+            ("^(David|Mary|John)$", "NNP"),
+            (
+                "^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
+                "VB",
+            ),
+            ("^(go|order|vanish|find|approach)$", "VB"),
+            ("^(a)$", "ex_quant"),
+            ("^(every)$", "univ_quant"),
+            ("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
+            ("^(big|gray|former)$", "JJ"),
+            ("^(him|himself)$", "PRP"),
+        ]
+    )
  
      depparser = MaltParser(tagger=tagger)
      glue = Glue(depparser=depparser, verbose=False)
  
      for (i, sentence) in enumerate(examples):
-        if i==show_example or show_example==-1:
-            print('[[[Example %s]]]  %s' % (i, sentence))
+        if i == show_example or show_example == -1:
+            print("[[[Example %s]]]  %s" % (i, sentence))
              for reading in glue.parse_to_meaning(sentence.split()):
                  print(reading.simplify())
-            print('')
+            print("")
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/sem/glue.pyc b/nlp_resource_data/nltk/sem/glue.pyc

deleted file mode 100755 (executable)

index 0206c35..0000000

Binary files a/nlp_resource_data/nltk/sem/glue.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/hole.py b/nlp_resource_data/nltk/sem/hole.py

old mode 100755 (executable)

new mode 100644 (file)

index fe39369..bcd6dbf
--- a/nlp_resource_data/nltk/sem/hole.py
+++ b/nlp_resource_data/nltk/sem/hole.py
@@ -3,7 +3,7 @@
  # Author:     Peter Wang
  # Updated by: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
  
@@ -19,19 +19,23 @@ After parsing, the semantic representation is in the form of an underspecified
  representation that is not easy to read.  We use a "plugging" algorithm to
  convert that representation into first-order logic formulas.
  """
-from __future__ import print_function, unicode_literals
  
  from functools import reduce
  
-from six import itervalues
-
-from nltk import compat
  from nltk.parse import load_parser
  
  from nltk.sem.skolemize import skolemize
-from nltk.sem.logic import (AllExpression, AndExpression, ApplicationExpression,
-                            ExistsExpression, IffExpression, ImpExpression,
-                            LambdaExpression, NegatedExpression, OrExpression)
+from nltk.sem.logic import (
+    AllExpression,
+    AndExpression,
+    ApplicationExpression,
+    ExistsExpression,
+    IffExpression,
+    ImpExpression,
+    LambdaExpression,
+    NegatedExpression,
+    OrExpression,
+)
  
  
  # Note that in this code there may be multiple types of trees being referred to:
@@ -42,27 +46,30 @@ from nltk.sem.logic import (AllExpression, AndExpression, ApplicationExpression,
  # 4. the search space when plugging (search tree)
  #
  
+
  class Constants(object):
-    ALL = 'ALL'
-    EXISTS = 'EXISTS'
-    NOT = 'NOT'
-    AND = 'AND'
-    OR = 'OR'
-    IMP = 'IMP'
-    IFF = 'IFF'
-    PRED = 'PRED'
-    LEQ = 'LEQ'
-    HOLE = 'HOLE'
-    LABEL = 'LABEL'
-
-    MAP = {ALL: lambda v, e: AllExpression(v.variable, e),
-           EXISTS: lambda v, e: ExistsExpression(v.variable, e),
-           NOT: NegatedExpression,
-           AND: AndExpression,
-           OR: OrExpression,
-           IMP: ImpExpression,
-           IFF: IffExpression,
-           PRED: ApplicationExpression}
+    ALL = "ALL"
+    EXISTS = "EXISTS"
+    NOT = "NOT"
+    AND = "AND"
+    OR = "OR"
+    IMP = "IMP"
+    IFF = "IFF"
+    PRED = "PRED"
+    LEQ = "LEQ"
+    HOLE = "HOLE"
+    LABEL = "LABEL"
+
+    MAP = {
+        ALL: lambda v, e: AllExpression(v.variable, e),
+        EXISTS: lambda v, e: ExistsExpression(v.variable, e),
+        NOT: NegatedExpression,
+        AND: AndExpression,
+        OR: OrExpression,
+        IMP: ImpExpression,
+        IFF: IffExpression,
+        PRED: ApplicationExpression,
+    }
  
  
  class HoleSemantics(object):
@@ -73,6 +80,7 @@ class HoleSemantics(object):
      then provides some operations on the semantics dealing with holes, labels
      and finding legal ways to plug holes with labels.
      """
+
      def __init__(self, usr):
          """
          Constructor.  `usr' is a ``sem.Expression`` representing an
@@ -131,7 +139,7 @@ class HoleSemantics(object):
  
      def _find_top_nodes(self, node_list):
          top_nodes = node_list.copy()
-        for f in itervalues(self.fragments):
+        for f in self.fragments.values():
              # the label is the first argument of the predicate
              args = f[1]
              for arg in args:
@@ -184,7 +192,9 @@ class HoleSemantics(object):
              (node, ancestors) = queue[0]
              if node in self.holes:
                  # The node is a hole, try to plug it.
-                self._plug_hole(node, ancestors, queue[1:], potential_labels, plug_acc, record)
+                self._plug_hole(
+                    node, ancestors, queue[1:], potential_labels, plug_acc, record
+                )
              else:
                  assert node in self.labels
                  # The node is a label.  Replace it in the queue by the holes and
@@ -193,10 +203,9 @@ class HoleSemantics(object):
                  head = [(a, ancestors) for a in args if self.is_node(a)]
                  self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record)
          else:
-            raise Exception('queue empty')
+            raise Exception("queue empty")
  
-    def _plug_hole(self, hole, ancestors0, queue, potential_labels0,
-                   plug_acc0, record):
+    def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record):
          """
          Try all possible ways of plugging a single hole.
          See _plug_nodes for the meanings of the parameters.
@@ -235,7 +244,9 @@ class HoleSemantics(object):
                  # before filling level i+1.
                  # A depth-first search would work as well since the trees must
                  # be finite but the bookkeeping would be harder.
-                self._plug_nodes(queue + [(l, ancestors)], potential_labels, plug_acc, record)
+                self._plug_nodes(
+                    queue + [(l, ancestors)], potential_labels, plug_acc, record
+                )
  
      def _violates_constraints(self, label, ancestors):
          """
@@ -287,12 +298,12 @@ class HoleSemantics(object):
              return node
  
  
-@compat.python_2_unicode_compatible
  class Constraint(object):
      """
      This class represents a constraint of the form (L =< N),
      where L is a label and N is a node (a label or a hole).
      """
+
      def __init__(self, lhs, rhs):
          self.lhs = lhs
          self.rhs = rhs
@@ -310,15 +321,15 @@ class Constraint(object):
          return hash(repr(self))
  
      def __repr__(self):
-        return '(%s < %s)' % (self.lhs, self.rhs)
+        return "(%s < %s)" % (self.lhs, self.rhs)
  
  
  def hole_readings(sentence, grammar_filename=None, verbose=False):
      if not grammar_filename:
-        grammar_filename = 'grammars/sample_grammars/hole.fcfg'
+        grammar_filename = "grammars/sample_grammars/hole.fcfg"
  
      if verbose:
-        print('Reading grammar file', grammar_filename)
+        print("Reading grammar file", grammar_filename)
  
      parser = load_parser(grammar_filename)
  
@@ -326,16 +337,16 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
      tokens = sentence.split()
      trees = list(parser.parse(tokens))
      if verbose:
-        print('Got %d different parses' % len(trees))
+        print("Got %d different parses" % len(trees))
  
      all_readings = []
      for tree in trees:
          # Get the semantic feature from the top of the parse tree.
-        sem = tree.label()['SEM'].simplify()
+        sem = tree.label()["SEM"].simplify()
  
          # Print the raw semantic representation.
          if verbose:
-            print('Raw:       ', sem)
+            print("Raw:       ", sem)
  
          # Skolemize away all quantifiers.  All variables become unique.
          while isinstance(sem, LambdaExpression):
@@ -343,7 +354,7 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
          skolemized = skolemize(sem)
  
          if verbose:
-            print('Skolemized:', skolemized)
+            print("Skolemized:", skolemized)
  
          # Break the hole semantics representation down into its components
          # i.e. holes, labels, formula fragments and constraints.
@@ -351,14 +362,14 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
  
          # Maybe show the details of the semantic representation.
          if verbose:
-            print('Holes:       ', hole_sem.holes)
-            print('Labels:      ', hole_sem.labels)
-            print('Constraints: ', hole_sem.constraints)
-            print('Top hole:    ', hole_sem.top_hole)
-            print('Top labels:  ', hole_sem.top_most_labels)
-            print('Fragments:')
+            print("Holes:       ", hole_sem.holes)
+            print("Labels:      ", hole_sem.labels)
+            print("Constraints: ", hole_sem.constraints)
+            print("Top hole:    ", hole_sem.top_hole)
+            print("Top labels:  ", hole_sem.top_most_labels)
+            print("Fragments:")
              for l, f in hole_sem.fragments.items():
-                print('\t%s: %s' % (l, f))
+                print("\t%s: %s" % (l, f))
  
          # Find all the possible ways to plug the formulas together.
          pluggings = hole_sem.pluggings()
@@ -370,7 +381,7 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
          if verbose:
              for i, r in enumerate(readings):
                  print()
-                print('%d. %s' % (i, r))
+                print("%d. %s" % (i, r))
              print()
  
          all_readings.extend(readings)
@@ -378,9 +389,9 @@ def hole_readings(sentence, grammar_filename=None, verbose=False):
      return all_readings
  
  
-if __name__ == '__main__':
-    for r in hole_readings('a dog barks'):
+if __name__ == "__main__":
+    for r in hole_readings("a dog barks"):
          print(r)
      print()
-    for r in hole_readings('every girl chases a dog'):
+    for r in hole_readings("every girl chases a dog"):
          print(r)
diff --git a/nlp_resource_data/nltk/sem/hole.pyc b/nlp_resource_data/nltk/sem/hole.pyc

deleted file mode 100755 (executable)

index 6de7c4e..0000000

Binary files a/nlp_resource_data/nltk/sem/hole.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/lfg.py b/nlp_resource_data/nltk/sem/lfg.py

old mode 100755 (executable)

new mode 100644 (file)

index 85b3353..d4decf0
--- a/nlp_resource_data/nltk/sem/lfg.py
+++ b/nlp_resource_data/nltk/sem/lfg.py
@@ -2,18 +2,15 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, division, unicode_literals
  
  from itertools import chain
  
  from nltk.internals import Counter
-from nltk.compat import python_2_unicode_compatible
  
  
-@python_2_unicode_compatible
  class FStructure(dict):
      def safeappend(self, key, item):
          """
@@ -39,18 +36,19 @@ class FStructure(dict):
  
      def to_depgraph(self, rel=None):
          from nltk.parse.dependencygraph import DependencyGraph
+
          depgraph = DependencyGraph()
          nodes = depgraph.nodes
  
-        self._to_depgraph(nodes, 0, 'ROOT')
+        self._to_depgraph(nodes, 0, "ROOT")
  
          # Add all the dependencies for all the nodes
          for address, node in nodes.items():
-            for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
-                if n2['head'] == address:
-                    relation = n2['rel']
-                    node['deps'].setdefault(relation,[])
-                    node['deps'][relation].append(n2['address'])
+            for n2 in (n for n in nodes.values() if n["rel"] != "TOP"):
+                if n2["head"] == address:
+                    relation = n2["rel"]
+                    node["deps"].setdefault(relation, [])
+                    node["deps"][relation].append(n2["address"])
  
          depgraph.root = nodes[1]
  
@@ -61,11 +59,11 @@ class FStructure(dict):
  
          nodes[index].update(
              {
-                'address': index,
-                'word': self.pred[0],
-                'tag': self.pred[1],
-                'head': head,
-                'rel': rel,
+                "address": index,
+                "word": self.pred[0],
+                "tag": self.pred[1],
+                "head": head,
+                "rel": rel,
              }
          )
  
@@ -77,18 +75,20 @@ class FStructure(dict):
                      new_index = len(nodes)
                      nodes[new_index].update(
                          {
-                            'address': new_index,
-                            'word': item[0],
-                            'tag': item[1],
-                            'head': index,
-                            'rel': feature,
+                            "address": new_index,
+                            "word": item[0],
+                            "tag": item[1],
+                            "head": index,
+                            "rel": feature,
                          }
                      )
                  elif isinstance(item, list):
                      for n in item:
                          n._to_depgraph(nodes, index, feature)
                  else:
-                    raise Exception('feature %s is not an FStruct, a list, or a tuple' % feature)
+                    raise Exception(
+                        "feature %s is not an FStruct, a list, or a tuple" % feature
+                    )
  
      @staticmethod
      def read_depgraph(depgraph):
@@ -99,9 +99,9 @@ class FStructure(dict):
          if not label_counter:
              label_counter = Counter()
  
-        if node['rel'].lower() in ['spec', 'punct']:
+        if node["rel"].lower() in ["spec", "punct"]:
              # the value of a 'spec' entry is a word, not an FStructure
-            return (node['word'], node['tag'])
+            return (node["word"], node["tag"])
  
          else:
              fstruct = FStructure()
@@ -110,18 +110,21 @@ class FStructure(dict):
  
              fstruct.parent = parent
  
-            word, tag = node['word'], node['tag']
-            if tag[:2] == 'VB':
-                if tag[2:3] == 'D':
-                    fstruct.safeappend('tense', ('PAST', 'tense'))
+            word, tag = node["word"], node["tag"]
+            if tag[:2] == "VB":
+                if tag[2:3] == "D":
+                    fstruct.safeappend("tense", ("PAST", "tense"))
                  fstruct.pred = (word, tag[:2])
  
              if not fstruct.pred:
                  fstruct.pred = (word, tag)
  
-            children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())]
+            children = [depgraph.nodes[idx] for idx in chain(*node["deps"].values())]
              for child in children:
-                fstruct.safeappend(child['rel'], FStructure._read_depgraph(child, depgraph, label_counter, fstruct))
+                fstruct.safeappend(
+                    child["rel"],
+                    FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
+                )
  
              return fstruct
  
@@ -133,8 +136,34 @@ class FStructure(dict):
          :param value: where to index into the list of characters
          :type value: int
          """
-        letter = ['f','g','h','i','j','k','l','m','n','o','p','q','r','s',
-                  't','u','v','w','x','y','z','a','b','c','d','e'][value-1]
+        letter = [
+            "f",
+            "g",
+            "h",
+            "i",
+            "j",
+            "k",
+            "l",
+            "m",
+            "n",
+            "o",
+            "p",
+            "q",
+            "r",
+            "s",
+            "t",
+            "u",
+            "v",
+            "w",
+            "x",
+            "y",
+            "z",
+            "a",
+            "b",
+            "c",
+            "d",
+            "e",
+        ][value - 1]
          num = int(value) // 26
          if num > 0:
              return letter + str(num)
@@ -142,39 +171,50 @@ class FStructure(dict):
              return letter
  
      def __repr__(self):
-        return self.__unicode__().replace('\n', '')
+        return self.__str__().replace("\n", "")
  
      def __str__(self):
          return self.pretty_format()
  
      def pretty_format(self, indent=3):
          try:
-            accum = '%s:[' % self.label
+            accum = "%s:[" % self.label
          except NameError:
-            accum = '['
+            accum = "["
          try:
-            accum += 'pred \'%s\'' % (self.pred[0])
+            accum += "pred '%s'" % (self.pred[0])
          except NameError:
              pass
  
          for feature in sorted(self):
              for item in self[feature]:
                  if isinstance(item, FStructure):
-                    next_indent = indent+len(feature)+3+len(self.label)
-                    accum += '\n%s%s %s' % (' '*(indent), feature, item.pretty_format(next_indent))
+                    next_indent = indent + len(feature) + 3 + len(self.label)
+                    accum += "\n%s%s %s" % (
+                        " " * (indent),
+                        feature,
+                        item.pretty_format(next_indent),
+                    )
                  elif isinstance(item, tuple):
-                    accum += '\n%s%s \'%s\'' % (' '*(indent), feature, item[0])
+                    accum += "\n%s%s '%s'" % (" " * (indent), feature, item[0])
                  elif isinstance(item, list):
-                    accum += '\n%s%s {%s}' % (' '*(indent), feature, ('\n%s' % (' '*(indent+len(feature)+2))).join(item))
-                else: # ERROR
-                    raise Exception('feature %s is not an FStruct, a list, or a tuple' % feature)
-        return accum+']'
-
+                    accum += "\n%s%s {%s}" % (
+                        " " * (indent),
+                        feature,
+                        ("\n%s" % (" " * (indent + len(feature) + 2))).join(item),
+                    )
+                else:  # ERROR
+                    raise Exception(
+                        "feature %s is not an FStruct, a list, or a tuple" % feature
+                    )
+        return accum + "]"
  
  
  def demo_read_depgraph():
      from nltk.parse.dependencygraph import DependencyGraph
-    dg1 = DependencyGraph("""\
+
+    dg1 = DependencyGraph(
+        """\
  Esso       NNP     2       SUB
  said       VBD     0       ROOT
  the        DT      5       NMOD
@@ -183,28 +223,36 @@ field      NN      6       SUB
  started    VBD     2       VMOD
  production NN      6       OBJ
  Tuesday    NNP     6       VMOD
-""")
-    dg2 = DependencyGraph("""\
+"""
+    )
+    dg2 = DependencyGraph(
+        """\
  John    NNP     2       SUB
  sees    VBP     0       ROOT
  Mary    NNP     2       OBJ
-""")
-    dg3 = DependencyGraph("""\
+"""
+    )
+    dg3 = DependencyGraph(
+        """\
  a       DT      2       SPEC
  man     NN      3       SUBJ
  walks   VB      0       ROOT
-""")
-    dg4 = DependencyGraph("""\
+"""
+    )
+    dg4 = DependencyGraph(
+        """\
  every   DT      2       SPEC
  girl    NN      3       SUBJ
  chases  VB      0       ROOT
  a       DT      5       SPEC
  dog     NN      3       OBJ
-""")
+"""
+    )
  
-    depgraphs = [dg1,dg2,dg3,dg4]
+    depgraphs = [dg1, dg2, dg3, dg4]
      for dg in depgraphs:
          print(FStructure.read_depgraph(dg))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo_read_depgraph()
diff --git a/nlp_resource_data/nltk/sem/lfg.pyc b/nlp_resource_data/nltk/sem/lfg.pyc

deleted file mode 100755 (executable)

index 111b96c..0000000

Binary files a/nlp_resource_data/nltk/sem/lfg.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/linearlogic.py b/nlp_resource_data/nltk/sem/linearlogic.py

old mode 100755 (executable)

new mode 100644 (file)

index 38457a7..abd9d19
--- a/nlp_resource_data/nltk/sem/linearlogic.py
+++ b/nlp_resource_data/nltk/sem/linearlogic.py
@@ -2,32 +2,31 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function, unicode_literals
-
-from six import string_types
  
  from nltk.internals import Counter
-from nltk.compat import python_2_unicode_compatible
  from nltk.sem.logic import LogicParser, APP
  
  _counter = Counter()
  
+
  class Tokens(object):
-    #Punctuation
-    OPEN = '('
-    CLOSE = ')'
+    # Punctuation
+    OPEN = "("
+    CLOSE = ")"
  
-    #Operations
-    IMP = '-o'
+    # Operations
+    IMP = "-o"
  
      PUNCT = [OPEN, CLOSE]
      TOKENS = PUNCT + [IMP]
  
+
  class LinearLogicParser(LogicParser):
      """A linear logic expression parser."""
+
      def __init__(self):
          LogicParser.__init__(self)
  
@@ -59,7 +58,7 @@ class LinearLogicParser(LogicParser):
          argument expression."""
          if self.has_priority(APP, context):
              if self.inRange(0) and self.token(0) == Tokens.OPEN:
-                self.token() #swallow then open paren
+                self.token()  # swallow then open paren
                  argument = self.process_next_expression(APP)
                  self.assertNextToken(Tokens.CLOSE)
                  expression = ApplicationExpression(expression, argument, None)
@@ -72,7 +71,6 @@ class LinearLogicParser(LogicParser):
              return ConstantExpression(name)
  
  
-@python_2_unicode_compatible
  class Expression(object):
  
      _linear_logic_parser = LinearLogicParser()
@@ -88,17 +86,16 @@ class Expression(object):
          return self.applyto(other)
  
      def __repr__(self):
-        return '<%s %s>' % (self.__class__.__name__, self)
+        return "<%s %s>" % (self.__class__.__name__, self)
  
  
-@python_2_unicode_compatible
  class AtomicExpression(Expression):
      def __init__(self, name, dependencies=None):
          """
          :param name: str for the constant name
          :param dependencies: list of int for the indices on which this atom is dependent
          """
-        assert isinstance(name, string_types)
+        assert isinstance(name, str)
          self.name = name
  
          if not dependencies:
@@ -158,6 +155,7 @@ class AtomicExpression(Expression):
      def __hash__(self):
          return hash(self.name)
  
+
  class ConstantExpression(AtomicExpression):
      def unify(self, other, bindings):
          """
@@ -179,6 +177,7 @@ class ConstantExpression(AtomicExpression):
              return bindings
          raise UnificationException(self, other, bindings)
  
+
  class VariableExpression(AtomicExpression):
      def unify(self, other, bindings):
          """
@@ -198,7 +197,7 @@ class VariableExpression(AtomicExpression):
          except VariableBindingException:
              raise UnificationException(self, other, bindings)
  
-@python_2_unicode_compatible
+
  class ImpExpression(Expression):
      def __init__(self, antecedent, consequent):
          """
@@ -211,7 +210,9 @@ class ImpExpression(Expression):
          self.consequent = consequent
  
      def simplify(self, bindings=None):
-        return self.__class__(self.antecedent.simplify(bindings), self.consequent.simplify(bindings))
+        return self.__class__(
+            self.antecedent.simplify(bindings), self.consequent.simplify(bindings)
+        )
  
      def unify(self, other, bindings):
          """
@@ -224,7 +225,11 @@ class ImpExpression(Expression):
          """
          assert isinstance(other, ImpExpression)
          try:
-            return bindings + self.antecedent.unify(other.antecedent, bindings) + self.consequent.unify(other.consequent, bindings)
+            return (
+                bindings
+                + self.antecedent.unify(other.antecedent, bindings)
+                + self.consequent.unify(other.consequent, bindings)
+            )
          except VariableBindingException:
              raise UnificationException(self, other, bindings)
  
@@ -238,7 +243,7 @@ class ImpExpression(Expression):
          """
          (a, a_new) = self.antecedent.compile_neg(index_counter, glueFormulaFactory)
          (c, c_new) = self.consequent.compile_pos(index_counter, glueFormulaFactory)
-        return (ImpExpression(a,c), a_new + c_new)
+        return (ImpExpression(a, c), a_new + c_new)
  
      def compile_neg(self, index_counter, glueFormulaFactory):
          """
@@ -252,7 +257,7 @@ class ImpExpression(Expression):
          (c, c_new) = self.consequent.compile_neg(index_counter, glueFormulaFactory)
          fresh_index = index_counter.get()
          c.dependencies.append(fresh_index)
-        new_v = glueFormulaFactory('v%s' % fresh_index, a, set([fresh_index]))
+        new_v = glueFormulaFactory("v%s" % fresh_index, a, set([fresh_index]))
          return (c, a_new + c_new + [new_v])
  
      def initialize_labels(self, fstruct):
@@ -260,20 +265,30 @@ class ImpExpression(Expression):
          self.consequent.initialize_labels(fstruct)
  
      def __eq__(self, other):
-        return self.__class__ == other.__class__ and \
-                self.antecedent == other.antecedent and self.consequent == other.consequent
+        return (
+            self.__class__ == other.__class__
+            and self.antecedent == other.antecedent
+            and self.consequent == other.consequent
+        )
  
      def __ne__(self, other):
          return not self == other
  
      def __str__(self):
          return "%s%s %s %s%s" % (
-            Tokens.OPEN, self.antecedent, Tokens.IMP, self.consequent, Tokens.CLOSE)
+            Tokens.OPEN,
+            self.antecedent,
+            Tokens.IMP,
+            self.consequent,
+            Tokens.CLOSE,
+        )
  
      def __hash__(self):
-        return hash('%s%s%s' % (hash(self.antecedent), Tokens.IMP, hash(self.consequent)))
+        return hash(
+            "%s%s%s" % (hash(self.antecedent), Tokens.IMP, hash(self.consequent))
+        )
+
  
-@python_2_unicode_compatible
  class ApplicationExpression(Expression):
      def __init__(self, function, argument, argument_indices=None):
          """
@@ -297,15 +312,23 @@ class ApplicationExpression(Expression):
                  bindings += argument.bindings
              bindings += function_simp.antecedent.unify(argument_simp, bindings)
          except UnificationException as e:
-            raise LinearLogicApplicationException('Cannot apply %s to %s. %s' % (function_simp, argument_simp, e))
+            raise LinearLogicApplicationException(
+                "Cannot apply %s to %s. %s" % (function_simp, argument_simp, e)
+            )
  
          # If you are running it on complied premises, more conditions apply
          if argument_indices:
              # A.dependencies of (A -o (B -o C)) must be a proper subset of argument_indices
              if not set(function_simp.antecedent.dependencies) < argument_indices:
-                raise LinearLogicApplicationException('Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s' % (function_simp, argument_simp))
+                raise LinearLogicApplicationException(
+                    "Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s"
+                    % (function_simp, argument_simp)
+                )
              if set(function_simp.antecedent.dependencies) == argument_indices:
-                raise LinearLogicApplicationException('Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s' % (function_simp, argument_simp))
+                raise LinearLogicApplicationException(
+                    "Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s"
+                    % (function_simp, argument_simp)
+                )
  
          self.function = function
          self.argument = argument
@@ -326,8 +349,11 @@ class ApplicationExpression(Expression):
          return self.function.simplify(bindings).consequent
  
      def __eq__(self, other):
-        return self.__class__ == other.__class__ and \
-                self.function == other.function and self.argument == other.argument
+        return (
+            self.__class__ == other.__class__
+            and self.function == other.function
+            and self.argument == other.argument
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -336,9 +362,11 @@ class ApplicationExpression(Expression):
          return "%s" % self.function + Tokens.OPEN + "%s" % self.argument + Tokens.CLOSE
  
      def __hash__(self):
-        return hash('%s%s%s' % (hash(self.antecedent), Tokens.OPEN, hash(self.consequent)))
+        return hash(
+            "%s%s%s" % (hash(self.antecedent), Tokens.OPEN, hash(self.consequent))
+        )
+
  
-@python_2_unicode_compatible
  class BindingDict(object):
      def __init__(self, bindings=None):
          """
@@ -374,7 +402,9 @@ class BindingDict(object):
          if not existing or binding == existing:
              self.d[variable] = binding
          else:
-            raise VariableBindingException('Variable %s already bound to another value' % (variable))
+            raise VariableBindingException(
+                "Variable %s already bound to another value" % (variable)
+            )
  
      def __getitem__(self, variable):
          """
@@ -406,8 +436,10 @@ class BindingDict(object):
                  combined[v] = other.d[v]
              return combined
          except VariableBindingException:
-            raise VariableBindingException('Attempting to add two contradicting'\
-                        ' VariableBindingsLists: %s, %s' % (self, other))
+            raise VariableBindingException(
+                "Attempting to add two contradicting"
+                " VariableBindingsLists: %s, %s" % (self, other)
+            )
  
      def __ne__(self, other):
          return not self == other
@@ -418,17 +450,20 @@ class BindingDict(object):
          return self.d == other.d
  
      def __str__(self):
-        return '{' + ', '.join('%s: %s' % (v, self.d[v]) for v in self.d) + '}'
+        return "{" + ", ".join("%s: %s" % (v, self.d[v]) for v in self.d) + "}"
  
      def __repr__(self):
-        return 'BindingDict: %s' % self
+        return "BindingDict: %s" % self
+
  
  class VariableBindingException(Exception):
      pass
  
+
  class UnificationException(Exception):
      def __init__(self, a, b, bindings):
-        Exception.__init__(self, 'Cannot unify %s with %s given %s' % (a, b, bindings))
+        Exception.__init__(self, "Cannot unify %s with %s given %s" % (a, b, bindings))
+
  
  class LinearLogicApplicationException(Exception):
      pass
@@ -437,15 +472,15 @@ class LinearLogicApplicationException(Exception):
  def demo():
      lexpr = Expression.fromstring
  
-    print(lexpr(r'f'))
-    print(lexpr(r'(g -o f)'))
-    print(lexpr(r'((g -o G) -o G)'))
-    print(lexpr(r'g -o h -o f'))
-    print(lexpr(r'(g -o f)(g)').simplify())
-    print(lexpr(r'(H -o f)(g)').simplify())
-    print(lexpr(r'((g -o G) -o G)((g -o f))').simplify())
-    print(lexpr(r'(H -o H)((g -o f))').simplify())
+    print(lexpr(r"f"))
+    print(lexpr(r"(g -o f)"))
+    print(lexpr(r"((g -o G) -o G)"))
+    print(lexpr(r"g -o h -o f"))
+    print(lexpr(r"(g -o f)(g)").simplify())
+    print(lexpr(r"(H -o f)(g)").simplify())
+    print(lexpr(r"((g -o G) -o G)((g -o f))").simplify())
+    print(lexpr(r"(H -o H)((g -o f))").simplify())
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/sem/linearlogic.pyc b/nlp_resource_data/nltk/sem/linearlogic.pyc

deleted file mode 100755 (executable)

index 01b0ec1..0000000

Binary files a/nlp_resource_data/nltk/sem/linearlogic.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/logic.py b/nlp_resource_data/nltk/sem/logic.py

old mode 100755 (executable)

new mode 100644 (file)

index 1053802..c203e1f
--- a/nlp_resource_data/nltk/sem/logic.py
+++ b/nlp_resource_data/nltk/sem/logic.py
@@ -2,7 +2,7 @@
  #
  # Author: Dan Garrette <dhgarrette@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
  
@@ -10,82 +10,90 @@
  A version of first order predicate logic, built on
  top of the typed lambda calculus.
  """
-from __future__ import print_function, unicode_literals
  
  import re
  import operator
  from collections import defaultdict
  from functools import reduce, total_ordering
  
-from six import string_types
-
  from nltk.util import Trie
  from nltk.internals import Counter
-from nltk.compat import python_2_unicode_compatible
  
-APP = 'APP'
+APP = "APP"
  
  _counter = Counter()
  
+
  class Tokens(object):
-    LAMBDA = '\\';     LAMBDA_LIST = ['\\']
-
-    #Quantifiers
-    EXISTS = 'exists'; EXISTS_LIST = ['some', 'exists', 'exist']
-    ALL = 'all';       ALL_LIST = ['all', 'forall']
-
-    #Punctuation
-    DOT = '.'
-    OPEN = '('
-    CLOSE = ')'
-    COMMA = ','
-
-    #Operations
-    NOT = '-';         NOT_LIST = ['not', '-', '!']
-    AND = '&';         AND_LIST = ['and', '&', '^']
-    OR = '|';          OR_LIST = ['or', '|']
-    IMP = '->';        IMP_LIST = ['implies', '->', '=>']
-    IFF = '<->';       IFF_LIST = ['iff', '<->', '<=>']
-    EQ = '=';          EQ_LIST = ['=', '==']
-    NEQ = '!=';        NEQ_LIST = ['!=']
-
-    #Collections of tokens
+    LAMBDA = "\\"
+    LAMBDA_LIST = ["\\"]
+
+    # Quantifiers
+    EXISTS = "exists"
+    EXISTS_LIST = ["some", "exists", "exist"]
+    ALL = "all"
+    ALL_LIST = ["all", "forall"]
+
+    # Punctuation
+    DOT = "."
+    OPEN = "("
+    CLOSE = ")"
+    COMMA = ","
+
+    # Operations
+    NOT = "-"
+    NOT_LIST = ["not", "-", "!"]
+    AND = "&"
+    AND_LIST = ["and", "&", "^"]
+    OR = "|"
+    OR_LIST = ["or", "|"]
+    IMP = "->"
+    IMP_LIST = ["implies", "->", "=>"]
+    IFF = "<->"
+    IFF_LIST = ["iff", "<->", "<=>"]
+    EQ = "="
+    EQ_LIST = ["=", "=="]
+    NEQ = "!="
+    NEQ_LIST = ["!="]
+
+    # Collections of tokens
      BINOPS = AND_LIST + OR_LIST + IMP_LIST + IFF_LIST
      QUANTS = EXISTS_LIST + ALL_LIST
      PUNCT = [DOT, OPEN, CLOSE, COMMA]
  
      TOKENS = BINOPS + EQ_LIST + NEQ_LIST + QUANTS + LAMBDA_LIST + PUNCT + NOT_LIST
  
-    #Special
-    SYMBOLS = [x for x in TOKENS if re.match(r'^[-\\.(),!&^|>=<]*$', x)]
+    # Special
+    SYMBOLS = [x for x in TOKENS if re.match(r"^[-\\.(),!&^|>=<]*$", x)]
  
  
  def boolean_ops():
      """
      Boolean operators
      """
-    names =  ["negation", "conjunction", "disjunction", "implication", "equivalence"]
+    names = ["negation", "conjunction", "disjunction", "implication", "equivalence"]
      for pair in zip(names, [Tokens.NOT, Tokens.AND, Tokens.OR, Tokens.IMP, Tokens.IFF]):
-        print("%-15s\t%s" %  pair)
+        print("%-15s\t%s" % pair)
+
  
  def equality_preds():
      """
      Equality predicates
      """
-    names =  ["equality", "inequality"]
+    names = ["equality", "inequality"]
      for pair in zip(names, [Tokens.EQ, Tokens.NEQ]):
-        print("%-15s\t%s" %  pair)
+        print("%-15s\t%s" % pair)
+
  
  def binding_ops():
      """
      Binding operators
      """
-    names =  ["existential", "universal", "lambda"]
+    names = ["existential", "universal", "lambda"]
      for pair in zip(names, [Tokens.EXISTS, Tokens.ALL, Tokens.LAMBDA]):
-        print("%-15s\t%s" %  pair)
+        print("%-15s\t%s" % pair)
  
  
-@python_2_unicode_compatible
  class LogicParser(object):
      """A lambda calculus expression parser."""
  
@@ -112,16 +120,17 @@ class LogicParser(object):
          self.quote_chars = []
  
          self.operator_precedence = dict(
-                           [(x,1) for x in Tokens.LAMBDA_LIST]             + \
-                           [(x,2) for x in Tokens.NOT_LIST]                + \
-                           [(APP,3)]                                       + \
-                           [(x,4) for x in Tokens.EQ_LIST+Tokens.NEQ_LIST] + \
-                           [(x,5) for x in Tokens.QUANTS]                  + \
-                           [(x,6) for x in Tokens.AND_LIST]                + \
-                           [(x,7) for x in Tokens.OR_LIST]                 + \
-                           [(x,8) for x in Tokens.IMP_LIST]                + \
-                           [(x,9) for x in Tokens.IFF_LIST]                + \
-                           [(None,10)])
+            [(x, 1) for x in Tokens.LAMBDA_LIST]
+            + [(x, 2) for x in Tokens.NOT_LIST]
+            + [(APP, 3)]
+            + [(x, 4) for x in Tokens.EQ_LIST + Tokens.NEQ_LIST]
+            + [(x, 5) for x in Tokens.QUANTS]
+            + [(x, 6) for x in Tokens.AND_LIST]
+            + [(x, 7) for x in Tokens.OR_LIST]
+            + [(x, 8) for x in Tokens.IMP_LIST]
+            + [(x, 9) for x in Tokens.IFF_LIST]
+            + [(None, 10)]
+        )
          self.right_associated_operations = [APP]
  
      def parse(self, data, signature=None):
@@ -141,9 +150,9 @@ class LogicParser(object):
          try:
              result = self.process_next_expression(None)
              if self.inRange(0):
-                raise UnexpectedTokenException(self._currentIndex+1, self.token(0))
+                raise UnexpectedTokenException(self._currentIndex + 1, self.token(0))
          except LogicalExpressionException as e:
-            msg = '%s\n%s\n%s^' % (e, data, ' '*mapping[e.index-1])
+            msg = "%s\n%s\n%s^" % (e, data, " " * mapping[e.index - 1])
              raise LogicalExpressionException(None, msg)
  
          if self.type_check:
@@ -156,7 +165,7 @@ class LogicParser(object):
          out = []
          mapping = {}
          tokenTrie = Trie(self.get_all_symbols())
-        token = ''
+        token = ""
          data_idx = 0
          token_start_idx = data_idx
          while data_idx < len(data):
@@ -170,29 +179,29 @@ class LogicParser(object):
  
              st = tokenTrie
              c = data[data_idx]
-            symbol = ''
+            symbol = ""
              while c in st:
                  symbol += c
                  st = st[c]
-                if len(data)-data_idx > len(symbol):
-                    c = data[data_idx+len(symbol)]
+                if len(data) - data_idx > len(symbol):
+                    c = data[data_idx + len(symbol)]
                  else:
                      break
              if Trie.LEAF in st:
-                #token is a complete symbol
+                # token is a complete symbol
                  if token:
                      mapping[len(out)] = token_start_idx
                      out.append(token)
-                    token = ''
+                    token = ""
                  mapping[len(out)] = data_idx
                  out.append(symbol)
                  data_idx += len(symbol)
              else:
-                if data[data_idx] in ' \t\n': #any whitespace
+                if data[data_idx] in " \t\n":  # any whitespace
                      if token:
                          mapping[len(out)] = token_start_idx
                          out.append(token)
-                        token = ''
+                        token = ""
                  else:
                      if not token:
                          token_start_idx = data_idx
@@ -202,11 +211,11 @@ class LogicParser(object):
              mapping[len(out)] = token_start_idx
              out.append(token)
          mapping[len(out)] = len(data)
-        mapping[len(out)+1] = len(data)+1
+        mapping[len(out) + 1] = len(data) + 1
          return out, mapping
  
      def process_quoted_token(self, data_idx, data):
-        token = ''
+        token = ""
          c = data[data_idx]
          i = data_idx
          for start, end, escape, incl_quotes in self.quote_chars:
@@ -219,22 +228,25 @@ class LogicParser(object):
                          if incl_quotes:
                              token += data[i]
                          i += 1
-                        if len(data) == i: #if there are no more chars
-                            raise LogicalExpressionException(None, "End of input reached.  "
-                                    "Escape character [%s] found at end."
-                                    % escape)
+                        if len(data) == i:  # if there are no more chars
+                            raise LogicalExpressionException(
+                                None,
+                                "End of input reached.  "
+                                "Escape character [%s] found at end." % escape,
+                            )
                          token += data[i]
                      else:
                          token += data[i]
                      i += 1
                      if len(data) == i:
-                        raise LogicalExpressionException(None, "End of input reached.  "
-                                             "Expected: [%s]" % end)
+                        raise LogicalExpressionException(
+                            None, "End of input reached.  " "Expected: [%s]" % end
+                        )
                  if incl_quotes:
                      token += data[i]
                  i += 1
                  if not token:
-                    raise LogicalExpressionException(None, 'Empty quoted token found')
+                    raise LogicalExpressionException(None, "Empty quoted token found")
                  break
          return token, i
  
@@ -244,7 +256,7 @@ class LogicParser(object):
  
      def inRange(self, location):
          """Return TRUE if the given location is within the buffer"""
-        return self._currentIndex+location < len(self._buffer)
+        return self._currentIndex + location < len(self._buffer)
  
      def token(self, location=None):
          """Get the next waiting token.  If a location is given, then
@@ -255,10 +267,10 @@ class LogicParser(object):
                  tok = self._buffer[self._currentIndex]
                  self._currentIndex += 1
              else:
-                tok = self._buffer[self._currentIndex+location]
+                tok = self._buffer[self._currentIndex + location]
              return tok
          except IndexError:
-            raise ExpectedMoreTokensException(self._currentIndex+1)
+            raise ExpectedMoreTokensException(self._currentIndex + 1)
  
      def isvariable(self, tok):
          return tok not in Tokens.TOKENS
@@ -268,12 +280,16 @@ class LogicParser(object):
          try:
              tok = self.token()
          except ExpectedMoreTokensException:
-            raise ExpectedMoreTokensException(self._currentIndex+1, message='Expression expected.')
+            raise ExpectedMoreTokensException(
+                self._currentIndex + 1, message="Expression expected."
+            )
  
          accum = self.handle(tok, context)
  
          if not accum:
-            raise UnexpectedTokenException(self._currentIndex, tok, message='Expression expected.')
+            raise UnexpectedTokenException(
+                self._currentIndex, tok, message="Expression expected."
+            )
  
          return self.attempt_adjuncts(accum, context)
  
@@ -297,7 +313,7 @@ class LogicParser(object):
  
      def attempt_adjuncts(self, expression, context):
          cur_idx = None
-        while cur_idx != self._currentIndex: #while adjuncts are added
+        while cur_idx != self._currentIndex:  # while adjuncts are added
              cur_idx = self._currentIndex
              expression = self.attempt_EqualityExpression(expression, context)
              expression = self.attempt_ApplicationExpression(expression, context)
@@ -311,25 +327,32 @@ class LogicParser(object):
          return NegatedExpression(expression)
  
      def handle_variable(self, tok, context):
-        #It's either: 1) a predicate expression: sees(x,y)
+        # It's either: 1) a predicate expression: sees(x,y)
          #             2) an application expression: P(x)
          #             3) a solo variable: john OR x
          accum = self.make_VariableExpression(tok)
          if self.inRange(0) and self.token(0) == Tokens.OPEN:
-            #The predicate has arguments
-            if not isinstance(accum, FunctionVariableExpression) and \
-               not isinstance(accum, ConstantExpression):
-                raise LogicalExpressionException(self._currentIndex,
-                                     "'%s' is an illegal predicate name.  "
-                                     "Individual variables may not be used as "
-                                     "predicates." % tok)
-            self.token() #swallow the Open Paren
-
-            #curry the arguments
-            accum = self.make_ApplicationExpression(accum, self.process_next_expression(APP))
+            # The predicate has arguments
+            if not isinstance(accum, FunctionVariableExpression) and not isinstance(
+                accum, ConstantExpression
+            ):
+                raise LogicalExpressionException(
+                    self._currentIndex,
+                    "'%s' is an illegal predicate name.  "
+                    "Individual variables may not be used as "
+                    "predicates." % tok,
+                )
+            self.token()  # swallow the Open Paren
+
+            # curry the arguments
+            accum = self.make_ApplicationExpression(
+                accum, self.process_next_expression(APP)
+            )
              while self.inRange(0) and self.token(0) == Tokens.COMMA:
-                self.token() #swallow the comma
-                accum = self.make_ApplicationExpression(accum, self.process_next_expression(APP))
+                self.token()  # swallow the comma
+                accum = self.make_ApplicationExpression(
+                    accum, self.process_next_expression(APP)
+                )
              self.assertNextToken(Tokens.CLOSE)
          return accum
  
@@ -337,28 +360,36 @@ class LogicParser(object):
          try:
              tok = self.token()
          except ExpectedMoreTokensException as e:
-            raise ExpectedMoreTokensException(e.index, 'Variable expected.')
+            raise ExpectedMoreTokensException(e.index, "Variable expected.")
          if isinstance(self.make_VariableExpression(tok), ConstantExpression):
-            raise LogicalExpressionException(self._currentIndex,
-                                 "'%s' is an illegal variable name.  "
-                                 "Constants may not be %s." % (tok, description))
+            raise LogicalExpressionException(
+                self._currentIndex,
+                "'%s' is an illegal variable name.  "
+                "Constants may not be %s." % (tok, description),
+            )
          return Variable(tok)
  
      def handle_lambda(self, tok, context):
          # Expression is a lambda expression
          if not self.inRange(0):
-            raise ExpectedMoreTokensException(self._currentIndex+2,
-                                              message="Variable and Expression expected following lambda operator.")
-        vars = [self.get_next_token_variable('abstracted')]
+            raise ExpectedMoreTokensException(
+                self._currentIndex + 2,
+                message="Variable and Expression expected following lambda operator.",
+            )
+        vars = [self.get_next_token_variable("abstracted")]
          while True:
-            if not self.inRange(0) or (self.token(0) == Tokens.DOT and not self.inRange(1)):
-                raise ExpectedMoreTokensException(self._currentIndex+2, message="Expression expected.")
+            if not self.inRange(0) or (
+                self.token(0) == Tokens.DOT and not self.inRange(1)
+            ):
+                raise ExpectedMoreTokensException(
+                    self._currentIndex + 2, message="Expression expected."
+                )
              if not self.isvariable(self.token(0)):
                  break
              # Support expressions like: \x y.M == \x.\y.M
-            vars.append(self.get_next_token_variable('abstracted'))
+            vars.append(self.get_next_token_variable("abstracted"))
          if self.inRange(0) and self.token(0) == Tokens.DOT:
-            self.token() #swallow the dot
+            self.token()  # swallow the dot
  
          accum = self.process_next_expression(tok)
          while vars:
@@ -370,18 +401,25 @@ class LogicParser(object):
          factory = self.get_QuantifiedExpression_factory(tok)
  
          if not self.inRange(0):
-            raise ExpectedMoreTokensException(self._currentIndex+2,
-                                              message="Variable and Expression expected following quantifier '%s'." % tok)
-        vars = [self.get_next_token_variable('quantified')]
+            raise ExpectedMoreTokensException(
+                self._currentIndex + 2,
+                message="Variable and Expression expected following quantifier '%s'."
+                % tok,
+            )
+        vars = [self.get_next_token_variable("quantified")]
          while True:
-            if not self.inRange(0) or (self.token(0) == Tokens.DOT and not self.inRange(1)):
-                raise ExpectedMoreTokensException(self._currentIndex+2, message="Expression expected.")
+            if not self.inRange(0) or (
+                self.token(0) == Tokens.DOT and not self.inRange(1)
+            ):
+                raise ExpectedMoreTokensException(
+                    self._currentIndex + 2, message="Expression expected."
+                )
              if not self.isvariable(self.token(0)):
                  break
              # Support expressions like: some x y.M == some x.some y.M
-            vars.append(self.get_next_token_variable('quantified'))
+            vars.append(self.get_next_token_variable("quantified"))
          if self.inRange(0) and self.token(0) == Tokens.DOT:
-            self.token() #swallow the dot
+            self.token()  # swallow the dot
  
          accum = self.process_next_expression(tok)
          while vars:
@@ -402,7 +440,7 @@ class LogicParser(object):
          return factory(variable, term)
  
      def handle_open(self, tok, context):
-        #Expression is in parens
+        # Expression is in parens
          accum = self.process_next_expression(None)
          self.assertNextToken(Tokens.CLOSE)
          return accum
@@ -413,9 +451,13 @@ class LogicParser(object):
          Otherwise, the parameter will be returned."""
          if self.inRange(0):
              tok = self.token(0)
-            if tok in Tokens.EQ_LIST + Tokens.NEQ_LIST and self.has_priority(tok, context):
-                self.token() #swallow the "=" or "!="
-                expression = self.make_EqualityExpression(expression, self.process_next_expression(tok))
+            if tok in Tokens.EQ_LIST + Tokens.NEQ_LIST and self.has_priority(
+                tok, context
+            ):
+                self.token()  # swallow the "=" or "!="
+                expression = self.make_EqualityExpression(
+                    expression, self.process_next_expression(tok)
+                )
                  if tok in Tokens.NEQ_LIST:
                      expression = self.make_NegatedExpression(expression)
          return expression
@@ -433,9 +475,10 @@ class LogicParser(object):
              tok = self.token(0)
              factory = self.get_BooleanExpression_factory(tok)
              if factory and self.has_priority(tok, context):
-                self.token() #swallow the operator
-                expression = self.make_BooleanExpression(factory, expression,
-                                                         self.process_next_expression(tok))
+                self.token()  # swallow the operator
+                expression = self.make_BooleanExpression(
+                    factory, expression, self.process_next_expression(tok)
+                )
              else:
                  break
          return expression
@@ -464,22 +507,30 @@ class LogicParser(object):
          argument expression."""
          if self.has_priority(APP, context):
              if self.inRange(0) and self.token(0) == Tokens.OPEN:
-                if not isinstance(expression, LambdaExpression) and \
-                   not isinstance(expression, ApplicationExpression) and \
-                   not isinstance(expression, FunctionVariableExpression) and \
-                   not isinstance(expression, ConstantExpression):
-                    raise LogicalExpressionException(self._currentIndex,
-                                         ("The function '%s" % expression) +
-                                         "' is not a Lambda Expression, an "
-                                         "Application Expression, or a "
-                                         "functional predicate, so it may "
-                                         "not take arguments.")
-                self.token() #swallow then open paren
-                #curry the arguments
-                accum = self.make_ApplicationExpression(expression, self.process_next_expression(APP))
+                if (
+                    not isinstance(expression, LambdaExpression)
+                    and not isinstance(expression, ApplicationExpression)
+                    and not isinstance(expression, FunctionVariableExpression)
+                    and not isinstance(expression, ConstantExpression)
+                ):
+                    raise LogicalExpressionException(
+                        self._currentIndex,
+                        ("The function '%s" % expression)
+                        + "' is not a Lambda Expression, an "
+                        "Application Expression, or a "
+                        "functional predicate, so it may "
+                        "not take arguments.",
+                    )
+                self.token()  # swallow then open paren
+                # curry the arguments
+                accum = self.make_ApplicationExpression(
+                    expression, self.process_next_expression(APP)
+                )
                  while self.inRange(0) and self.token(0) == Tokens.COMMA:
-                    self.token() #swallow the comma
-                    accum = self.make_ApplicationExpression(accum, self.process_next_expression(APP))
+                    self.token()  # swallow the comma
+                    accum = self.make_ApplicationExpression(
+                        accum, self.process_next_expression(APP)
+                    )
                  self.assertNextToken(Tokens.CLOSE)
                  return accum
          return expression
@@ -494,15 +545,20 @@ class LogicParser(object):
          return LambdaExpression(variable, term)
  
      def has_priority(self, operation, context):
-        return self.operator_precedence[operation] < self.operator_precedence[context] or \
-               (operation in self.right_associated_operations and \
-                self.operator_precedence[operation] == self.operator_precedence[context])
+        return self.operator_precedence[operation] < self.operator_precedence[
+            context
+        ] or (
+            operation in self.right_associated_operations
+            and self.operator_precedence[operation] == self.operator_precedence[context]
+        )
  
      def assertNextToken(self, expected):
          try:
              tok = self.token()
          except ExpectedMoreTokensException as e:
-            raise ExpectedMoreTokensException(e.index, message="Expected token '%s'." % expected)
+            raise ExpectedMoreTokensException(
+                e.index, message="Expected token '%s'." % expected
+            )
  
          if isinstance(expected, list):
              if tok not in expected:
@@ -521,10 +577,10 @@ class LogicParser(object):
  
      def __repr__(self):
          if self.inRange(0):
-            msg = 'Next token: ' + self.token(0)
+            msg = "Next token: " + self.token(0)
          else:
-            msg = 'No more tokens'
-        return '<' + self.__class__.__name__ + ': ' + msg + '>'
+            msg = "No more tokens"
+        return "<" + self.__class__.__name__ + ": " + msg + ">"
  
  
  def read_logic(s, logic_parser=None, encoding=None):
@@ -548,22 +604,22 @@ def read_logic(s, logic_parser=None, encoding=None):
      statements = []
      for linenum, line in enumerate(s.splitlines()):
          line = line.strip()
-        if line.startswith('#') or line=='': continue
+        if line.startswith("#") or line == "":
+            continue
          try:
              statements.append(logic_parser.parse(line))
          except LogicalExpressionException:
-            raise ValueError('Unable to parse line %s: %s' % (linenum, line))
+            raise ValueError("Unable to parse line %s: %s" % (linenum, line))
      return statements
  
  
  @total_ordering
-@python_2_unicode_compatible
  class Variable(object):
      def __init__(self, name):
          """
          :param name: the name of the variable
          """
-        assert isinstance(name, string_types), "%s is not a string" % name
+        assert isinstance(name, str), "%s is not a string" % name
          self.name = name
  
      def __eq__(self, other):
@@ -602,34 +658,34 @@ def unique_variable(pattern=None, ignore=None):
      """
      if pattern is not None:
          if is_indvar(pattern.name):
-            prefix = 'z'
+            prefix = "z"
          elif is_funcvar(pattern.name):
-            prefix = 'F'
+            prefix = "F"
          elif is_eventvar(pattern.name):
-            prefix = 'e0'
+            prefix = "e0"
          else:
              assert False, "Cannot generate a unique constant"
      else:
-        prefix = 'z'
+        prefix = "z"
  
      v = Variable("%s%s" % (prefix, _counter.get()))
      while ignore is not None and v in ignore:
          v = Variable("%s%s" % (prefix, _counter.get()))
      return v
  
+
  def skolem_function(univ_scope=None):
      """
      Return a skolem function over the variables in univ_scope
      param univ_scope
      """
-    skolem = VariableExpression(Variable('F%s' % _counter.get()))
+    skolem = VariableExpression(Variable("F%s" % _counter.get()))
      if univ_scope:
          for v in list(univ_scope):
              skolem = skolem(VariableExpression(v))
      return skolem
  
  
-@python_2_unicode_compatible
  class Type(object):
      def __repr__(self):
          return "%s" % self
@@ -641,18 +697,20 @@ class Type(object):
      def fromstring(cls, s):
          return read_type(s)
  
-@python_2_unicode_compatible
+
  class ComplexType(Type):
      def __init__(self, first, second):
-        assert(isinstance(first, Type)), "%s is not a Type" % first
-        assert(isinstance(second, Type)), "%s is not a Type" % second
+        assert isinstance(first, Type), "%s is not a Type" % first
+        assert isinstance(second, Type), "%s is not a Type" % second
          self.first = first
          self.second = second
  
      def __eq__(self, other):
-        return isinstance(other, ComplexType) and \
-               self.first == other.first and \
-               self.second == other.second
+        return (
+            isinstance(other, ComplexType)
+            and self.first == other.first
+            and self.second == other.second
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -661,8 +719,7 @@ class ComplexType(Type):
  
      def matches(self, other):
          if isinstance(other, ComplexType):
-            return self.first.matches(other.first) and \
-                   self.second.matches(other.second)
+            return self.first.matches(other.first) and self.second.matches(other.second)
          else:
              return self == ANY_TYPE
  
@@ -673,7 +730,7 @@ class ComplexType(Type):
              f = self.first.resolve(other.first)
              s = self.second.resolve(other.second)
              if f and s:
-                return ComplexType(f,s)
+                return ComplexType(f, s)
              else:
                  return None
          elif self == ANY_TYPE:
@@ -685,13 +742,14 @@ class ComplexType(Type):
          if self == ANY_TYPE:
              return "%s" % ANY_TYPE
          else:
-            return '<%s,%s>' % (self.first, self.second)
+            return "<%s,%s>" % (self.first, self.second)
  
      def str(self):
          if self == ANY_TYPE:
              return ANY_TYPE.str()
          else:
-            return '(%s -> %s)' % (self.first.str(), self.second.str())
+            return "(%s -> %s)" % (self.first.str(), self.second.str())
+
  
  class BasicType(Type):
      def __eq__(self, other):
@@ -711,40 +769,42 @@ class BasicType(Type):
          else:
              return None
  
-@python_2_unicode_compatible
+
  class EntityType(BasicType):
      def __str__(self):
-        return 'e'
+        return "e"
  
      def str(self):
-        return 'IND'
+        return "IND"
+
  
-@python_2_unicode_compatible
  class TruthValueType(BasicType):
      def __str__(self):
-        return 't'
+        return "t"
  
      def str(self):
-        return 'BOOL'
+        return "BOOL"
+
  
-@python_2_unicode_compatible
  class EventType(BasicType):
      def __str__(self):
-        return 'v'
+        return "v"
  
      def str(self):
-        return 'EVENT'
+        return "EVENT"
+
  
-@python_2_unicode_compatible
  class AnyType(BasicType, ComplexType):
      def __init__(self):
          pass
  
      @property
-    def first(self): return self
+    def first(self):
+        return self
  
      @property
-    def second(self): return self
+    def second(self):
+        return self
  
      def __eq__(self, other):
          return isinstance(other, AnyType) or other.__eq__(self)
@@ -761,10 +821,10 @@ class AnyType(BasicType, ComplexType):
          return other
  
      def __str__(self):
-        return '?'
+        return "?"
  
      def str(self):
-        return 'ANY'
+        return "ANY"
  
  
  TRUTH_TYPE = TruthValueType()
@@ -774,23 +834,24 @@ ANY_TYPE = AnyType()
  
  
  def read_type(type_string):
-    assert isinstance(type_string, string_types)
-    type_string = type_string.replace(' ', '') #remove spaces
+    assert isinstance(type_string, str)
+    type_string = type_string.replace(" ", "")  # remove spaces
  
-    if type_string[0] == '<':
-        assert type_string[-1] == '>'
+    if type_string[0] == "<":
+        assert type_string[-1] == ">"
          paren_count = 0
-        for i,char in enumerate(type_string):
-            if char == '<':
+        for i, char in enumerate(type_string):
+            if char == "<":
                  paren_count += 1
-            elif char == '>':
+            elif char == ">":
                  paren_count -= 1
                  assert paren_count > 0
-            elif char == ',':
+            elif char == ",":
                  if paren_count == 1:
                      break
-        return ComplexType(read_type(type_string[1  :i ]),
-                           read_type(type_string[i+1:-1]))
+        return ComplexType(
+            read_type(type_string[1:i]), read_type(type_string[i + 1 : -1])
+        )
      elif type_string[0] == "%s" % ENTITY_TYPE:
          return ENTITY_TYPE
      elif type_string[0] == "%s" % TRUTH_TYPE:
@@ -798,35 +859,46 @@ def read_type(type_string):
      elif type_string[0] == "%s" % ANY_TYPE:
          return ANY_TYPE
      else:
-        raise LogicalExpressionException("Unexpected character: '%s'." % type_string[0])
+        raise LogicalExpressionException(
+            None, "Unexpected character: '%s'." % type_string[0]
+        )
  
  
  class TypeException(Exception):
      def __init__(self, msg):
          super(TypeException, self).__init__(msg)
  
+
  class InconsistentTypeHierarchyException(TypeException):
      def __init__(self, variable, expression=None):
          if expression:
-            msg = "The variable '%s' was found in multiple places with different"\
+            msg = (
+                "The variable '%s' was found in multiple places with different"
                  " types in '%s'." % (variable, expression)
+            )
          else:
-            msg = "The variable '%s' was found in multiple places with different"\
+            msg = (
+                "The variable '%s' was found in multiple places with different"
                  " types." % (variable)
+            )
          super(InconsistentTypeHierarchyException, self).__init__(msg)
  
+
  class TypeResolutionException(TypeException):
      def __init__(self, expression, other_type):
          super(TypeResolutionException, self).__init__(
-            "The type of '%s', '%s', cannot be resolved with type '%s'" %
-            (expression, expression.type, other_type))
+            "The type of '%s', '%s', cannot be resolved with type '%s'"
+            % (expression, expression.type, other_type)
+        )
+
  
  class IllegalTypeException(TypeException):
      def __init__(self, expression, other_type, allowed_type):
          super(IllegalTypeException, self).__init__(
-            "Cannot set type of %s '%s' to '%s'; must match type '%s'." %
-            (expression.__class__.__name__, expression, other_type,
-            allowed_type))
+            "Cannot set type of %s '%s' to '%s'; must match type '%s'."
+            % (expression.__class__.__name__, expression, other_type, allowed_type)
+        )
+
  
  def typecheck(expressions, signature=None):
      """
@@ -835,10 +907,10 @@ def typecheck(expressions, signature=None):
      :param signature: dict that maps variable names to types (or string
      representations of types)
      """
-    #typecheck and create master signature
+    # typecheck and create master signature
      for expression in expressions:
          signature = expression.typecheck(signature)
-    #apply master signature to all expressions
+    # apply master signature to all expressions
      for expression in expressions[:-1]:
          expression.typecheck(signature)
      return signature
@@ -849,6 +921,7 @@ class SubstituteBindingsI(object):
      An interface for classes that can perform substitutions for
      variables.
      """
+
      def substitute_bindings(self, bindings):
          """
          :return: The object that is obtained by replacing
@@ -865,7 +938,6 @@ class SubstituteBindingsI(object):
          raise NotImplementedError()
  
  
-@python_2_unicode_compatible
  class Expression(SubstituteBindingsI):
      """This is the base abstract object for all logical expressions"""
  
@@ -936,6 +1008,7 @@ class Expression(SubstituteBindingsI):
  
          if prover is None:
              from nltk.inference import Prover9
+
              prover = Prover9()
          bicond = IffExpression(self.simplify(), other.simplify())
          return prover.prove(bicond)
@@ -951,8 +1024,10 @@ class Expression(SubstituteBindingsI):
                  if isinstance(val, Variable):
                      val = self.make_VariableExpression(val)
                  elif not isinstance(val, Expression):
-                    raise ValueError('Can not substitute a non-expression '
-                                     'value into an expression: %r' % (val,))
+                    raise ValueError(
+                        "Can not substitute a non-expression "
+                        "value into an expression: %r" % (val,)
+                    )
                  # Substitute bindings in the target value.
                  val = val.substitute_bindings(bindings)
                  # Replace var w/ the target value.
@@ -1010,29 +1085,34 @@ class Expression(SubstituteBindingsI):
          :param alpha_convert: bool Alpha convert automatically to avoid name clashes?
          """
          assert isinstance(variable, Variable), "%s is not a Variable" % variable
-        assert isinstance(expression, Expression), "%s is not an Expression" % expression
+        assert isinstance(expression, Expression), (
+            "%s is not an Expression" % expression
+        )
  
-        return self.visit_structured(lambda e: e.replace(variable, expression,
-                                                         replace_bound, alpha_convert),
-                                     self.__class__)
+        return self.visit_structured(
+            lambda e: e.replace(variable, expression, replace_bound, alpha_convert),
+            self.__class__,
+        )
  
      def normalize(self, newvars=None):
          """Rename auto-generated unique variables"""
+
          def get_indiv_vars(e):
              if isinstance(e, IndividualVariableExpression):
                  return set([e])
              elif isinstance(e, AbstractVariableExpression):
                  return set()
              else:
-                return e.visit(get_indiv_vars,
-                               lambda parts: reduce(operator.or_, parts, set()))
+                return e.visit(
+                    get_indiv_vars, lambda parts: reduce(operator.or_, parts, set())
+                )
  
          result = self
-        for i,e in enumerate(sorted(get_indiv_vars(self), key=lambda e: e.variable)):
-            if isinstance(e,EventVariableExpression):
-                newVar = e.__class__(Variable('e0%s' % (i+1)))
-            elif isinstance(e,IndividualVariableExpression):
-                newVar = e.__class__(Variable('z%s' % (i+1)))
+        for i, e in enumerate(sorted(get_indiv_vars(self), key=lambda e: e.variable)):
+            if isinstance(e, EventVariableExpression):
+                newVar = e.__class__(Variable("e0%s" % (i + 1)))
+            elif isinstance(e, IndividualVariableExpression):
+                newVar = e.__class__(Variable("z%s" % (i + 1)))
              else:
                  newVar = e
              result = result.replace(e.variable, newVar, True)
@@ -1071,7 +1151,7 @@ class Expression(SubstituteBindingsI):
          return self.visit(function, lambda parts: combinator(*parts))
  
      def __repr__(self):
-        return '<%s %s>' % (self.__class__.__name__, self)
+        return "<%s %s>" % (self.__class__.__name__, self)
  
      def __str__(self):
          return self.str()
@@ -1083,8 +1163,9 @@ class Expression(SubstituteBindingsI):
          variables and any variable starting with '?' or '@'.
          :return: set of ``Variable`` objects
          """
-        return self.free() | set(p for p in self.predicates()|self.constants()
-                                 if re.match('^[?@]', p.name))
+        return self.free() | set(
+            p for p in self.predicates() | self.constants() if re.match("^[?@]", p.name)
+        )
  
      def free(self):
          """
@@ -1092,24 +1173,27 @@ class Expression(SubstituteBindingsI):
          both individual and predicate variables, but not constants.
          :return: set of ``Variable`` objects
          """
-        return self.visit(lambda e: e.free(),
-                          lambda parts: reduce(operator.or_, parts, set()))
+        return self.visit(
+            lambda e: e.free(), lambda parts: reduce(operator.or_, parts, set())
+        )
  
      def constants(self):
          """
          Return a set of individual constants (non-predicates).
          :return: set of ``Variable`` objects
          """
-        return self.visit(lambda e: e.constants(),
-                          lambda parts: reduce(operator.or_, parts, set()))
+        return self.visit(
+            lambda e: e.constants(), lambda parts: reduce(operator.or_, parts, set())
+        )
  
      def predicates(self):
          """
          Return a set of predicates (constants, not variables).
          :return: set of ``Variable`` objects
          """
-        return self.visit(lambda e: e.predicates(),
-                          lambda parts: reduce(operator.or_, parts, set()))
+        return self.visit(
+            lambda e: e.predicates(), lambda parts: reduce(operator.or_, parts, set())
+        )
  
      def simplify(self):
          """
@@ -1121,7 +1205,6 @@ class Expression(SubstituteBindingsI):
          return VariableExpression(variable)
  
  
-@python_2_unicode_compatible
  class ApplicationExpression(Expression):
      r"""
      This class is used to represent two related types of logical expressions.
@@ -1150,6 +1233,7 @@ class ApplicationExpression(Expression):
      ``AbstractVariableExpression``).  This means that the example from above
      will be returned as "(\x y.see(x,y)(john))(mary)".
      """
+
      def __init__(self, function, argument):
          """
          :param function: ``Expression``, for the function expression
@@ -1184,13 +1268,21 @@ class ApplicationExpression(Expression):
  
          self.argument._set_type(ANY_TYPE, signature)
          try:
-            self.function._set_type(ComplexType(self.argument.type, other_type), signature)
+            self.function._set_type(
+                ComplexType(self.argument.type, other_type), signature
+            )
          except TypeResolutionException:
              raise TypeException(
-                    "The function '%s' is of type '%s' and cannot be applied "
-                    "to '%s' of type '%s'.  Its argument must match type '%s'."
-                    % (self.function, self.function.type, self.argument,
-                       self.argument.type, self.function.type.first))
+                "The function '%s' is of type '%s' and cannot be applied "
+                "to '%s' of type '%s'.  Its argument must match type '%s'."
+                % (
+                    self.function,
+                    self.function.type,
+                    self.argument,
+                    self.argument.type,
+                    self.function.type.first,
+                )
+            )
  
      def findtype(self, variable):
          """:see Expression.findtype()"""
@@ -1198,11 +1290,11 @@ class ApplicationExpression(Expression):
          if self.is_atom():
              function, args = self.uncurry()
          else:
-            #It's not a predicate expression ("P(x,y)"), so leave args curried
+            # It's not a predicate expression ("P(x,y)"), so leave args curried
              function = self.function
              args = [self.argument]
  
-        found = [arg.findtype(variable) for arg in [function]+args]
+        found = [arg.findtype(variable) for arg in [function] + args]
  
          unique = []
          for f in found:
@@ -1240,9 +1332,11 @@ class ApplicationExpression(Expression):
          return combinator([function(self.function), function(self.argument)])
  
      def __eq__(self, other):
-        return isinstance(other, ApplicationExpression) and \
-                self.function == other.function and \
-                self.argument == other.argument
+        return (
+            isinstance(other, ApplicationExpression)
+            and self.function == other.function
+            and self.argument == other.argument
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -1253,9 +1347,9 @@ class ApplicationExpression(Expression):
          # uncurry the arguments and find the base function
          if self.is_atom():
              function, args = self.uncurry()
-            arg_str = ','.join("%s" % arg for arg in args)
+            arg_str = ",".join("%s" % arg for arg in args)
          else:
-            #Leave arguments curried
+            # Leave arguments curried
              function = self.function
              arg_str = "%s" % self.argument
  
@@ -1263,8 +1357,7 @@ class ApplicationExpression(Expression):
          parenthesize_function = False
          if isinstance(function, LambdaExpression):
              if isinstance(function.term, ApplicationExpression):
-                if not isinstance(function.term.function,
-                                  AbstractVariableExpression):
+                if not isinstance(function.term.function, AbstractVariableExpression):
                      parenthesize_function = True
              elif not isinstance(function.term, BooleanExpression):
                  parenthesize_function = True
@@ -1285,7 +1378,7 @@ class ApplicationExpression(Expression):
          function = self.function
          args = [self.argument]
          while isinstance(function, ApplicationExpression):
-            #(\x.\y.sees(x,y)(john))(mary)
+            # (\x.\y.sees(x,y)(john))(mary)
              args.insert(0, function.argument)
              function = function.function
          return (function, args)
@@ -1315,9 +1408,9 @@ class ApplicationExpression(Expression):
  
  
  @total_ordering
-@python_2_unicode_compatible
  class AbstractVariableExpression(Expression):
      """This class represents a variable to be used as a predicate or entity"""
+
      def __init__(self, variable):
          """
          :param variable: ``Variable``, for the variable
@@ -1331,7 +1424,9 @@ class AbstractVariableExpression(Expression):
      def replace(self, variable, expression, replace_bound=False, alpha_convert=True):
          """:see: Expression.replace()"""
          assert isinstance(variable, Variable), "%s is not an Variable" % variable
-        assert isinstance(expression, Expression), "%s is not an Expression" % expression
+        assert isinstance(expression, Expression), (
+            "%s is not an Expression" % expression
+        )
          if self.variable == variable:
              return expression
          else:
@@ -1369,8 +1464,10 @@ class AbstractVariableExpression(Expression):
      def __eq__(self, other):
          """Allow equality between instances of ``AbstractVariableExpression``
          subtypes."""
-        return isinstance(other, AbstractVariableExpression) and \
-               self.variable == other.variable
+        return (
+            isinstance(other, AbstractVariableExpression)
+            and self.variable == other.variable
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -1385,9 +1482,11 @@ class AbstractVariableExpression(Expression):
      def __str__(self):
          return "%s" % self.variable
  
+
  class IndividualVariableExpression(AbstractVariableExpression):
      """This class represents variables that take the form of a single lowercase
      character (other than 'e') followed by zero or more digits."""
+
      def _set_type(self, other_type=ANY_TYPE, signature=None):
          """:see Expression._set_type()"""
          assert isinstance(other_type, Type)
@@ -1400,7 +1499,9 @@ class IndividualVariableExpression(AbstractVariableExpression):
  
          signature[self.variable.name].append(self)
  
-    def _get_type(self): return ENTITY_TYPE
+    def _get_type(self):
+        return ENTITY_TYPE
+
      type = property(_get_type, _set_type)
  
      def free(self):
@@ -1411,9 +1512,11 @@ class IndividualVariableExpression(AbstractVariableExpression):
          """:see: Expression.constants()"""
          return set()
  
+
  class FunctionVariableExpression(AbstractVariableExpression):
      """This class represents variables that take the form of a single uppercase
      character followed by zero or more digits."""
+
      type = ANY_TYPE
  
      def free(self):
@@ -1424,14 +1527,18 @@ class FunctionVariableExpression(AbstractVariableExpression):
          """:see: Expression.constants()"""
          return set()
  
+
  class EventVariableExpression(IndividualVariableExpression):
      """This class represents variables that take the form of a single lowercase
      'e' character followed by zero or more digits."""
+
      type = EVENT_TYPE
  
+
  class ConstantExpression(AbstractVariableExpression):
      """This class represents variables that do not take the form of a single
      character followed by zero or more digits."""
+
      type = ENTITY_TYPE
  
      def _set_type(self, other_type=ANY_TYPE, signature=None):
@@ -1442,7 +1549,7 @@ class ConstantExpression(AbstractVariableExpression):
              signature = defaultdict(list)
  
          if other_type == ANY_TYPE:
-            #entity type by default, for individuals
+            # entity type by default, for individuals
              resolution = ENTITY_TYPE
          else:
              resolution = other_type
@@ -1486,6 +1593,7 @@ def VariableExpression(variable):
  class VariableBinderExpression(Expression):
      """This an abstract class for any Expression that binds a variable in an
      Expression.  This includes LambdaExpressions and Quantified Expressions"""
+
      def __init__(self, variable, term):
          """
          :param variable: ``Variable``, for the variable
@@ -1499,14 +1607,19 @@ class VariableBinderExpression(Expression):
      def replace(self, variable, expression, replace_bound=False, alpha_convert=True):
          """:see: Expression.replace()"""
          assert isinstance(variable, Variable), "%s is not a Variable" % variable
-        assert isinstance(expression, Expression), "%s is not an Expression" % expression
-        #if the bound variable is the thing being replaced
+        assert isinstance(expression, Expression), (
+            "%s is not an Expression" % expression
+        )
+        # if the bound variable is the thing being replaced
          if self.variable == variable:
              if replace_bound:
-                assert isinstance(expression, AbstractVariableExpression),\
-                       "%s is not a AbstractVariableExpression" % expression
-                return self.__class__(expression.variable,
-                                      self.term.replace(variable, expression, True, alpha_convert))
+                assert isinstance(expression, AbstractVariableExpression), (
+                    "%s is not a AbstractVariableExpression" % expression
+                )
+                return self.__class__(
+                    expression.variable,
+                    self.term.replace(variable, expression, True, alpha_convert),
+                )
              else:
                  return self
          else:
@@ -1515,9 +1628,11 @@ class VariableBinderExpression(Expression):
              if alpha_convert and self.variable in expression.free():
                  self = self.alpha_convert(unique_variable(pattern=self.variable))
  
-            #replace in the term
-            return self.__class__(self.variable,
-                                  self.term.replace(variable, expression, replace_bound, alpha_convert))
+            # replace in the term
+            return self.__class__(
+                self.variable,
+                self.term.replace(variable, expression, replace_bound, alpha_convert),
+            )
  
      def alpha_convert(self, newvar):
          """Rename all occurrences of the variable introduced by this variable
@@ -1525,10 +1640,9 @@ class VariableBinderExpression(Expression):
          :param newvar: ``Variable``, for the new variable
          """
          assert isinstance(newvar, Variable), "%s is not a Variable" % newvar
-        return self.__class__(newvar,
-                              self.term.replace(self.variable,
-                                                VariableExpression(newvar),
-                                                True))
+        return self.__class__(
+            newvar, self.term.replace(self.variable, VariableExpression(newvar), True)
+        )
  
      def free(self):
          """:see: Expression.free()"""
@@ -1553,8 +1667,7 @@ class VariableBinderExpression(Expression):
      def __eq__(self, other):
          r"""Defines equality modulo alphabetic variance.  If we are comparing
          \x.M  and \y.N, then check equality of M and N[x/y]."""
-        if isinstance(self, other.__class__) or \
-           isinstance(other, self.__class__):
+        if isinstance(self, other.__class__) or isinstance(other, self.__class__):
              if self.variable == other.variable:
                  return self.term == other.term
              else:
@@ -1570,12 +1683,10 @@ class VariableBinderExpression(Expression):
      __hash__ = Expression.__hash__
  
  
-@python_2_unicode_compatible
  class LambdaExpression(VariableBinderExpression):
      @property
      def type(self):
-        return ComplexType(self.term.findtype(self.variable),
-                           self.term.type)
+        return ComplexType(self.term.findtype(self.variable), self.term.type)
  
      def _set_type(self, other_type=ANY_TYPE, signature=None):
          """:see Expression._set_type()"""
@@ -1594,14 +1705,18 @@ class LambdaExpression(VariableBinderExpression):
          while term.__class__ == self.__class__:
              variables.append(term.variable)
              term = term.term
-        return Tokens.LAMBDA + ' '.join("%s" % v for v in variables) + \
-               Tokens.DOT + "%s" % term
+        return (
+            Tokens.LAMBDA
+            + " ".join("%s" % v for v in variables)
+            + Tokens.DOT
+            + "%s" % term
+        )
  
  
-@python_2_unicode_compatible
  class QuantifiedExpression(VariableBinderExpression):
      @property
-    def type(self): return TRUTH_TYPE
+    def type(self):
+        return TRUTH_TYPE
  
      def _set_type(self, other_type=ANY_TYPE, signature=None):
          """:see Expression._set_type()"""
@@ -1620,26 +1735,33 @@ class QuantifiedExpression(VariableBinderExpression):
          while term.__class__ == self.__class__:
              variables.append(term.variable)
              term = term.term
-        return self.getQuantifier() + ' ' + ' '.join("%s" % v for v in variables) + \
-               Tokens.DOT + "%s" % term
+        return (
+            self.getQuantifier()
+            + " "
+            + " ".join("%s" % v for v in variables)
+            + Tokens.DOT
+            + "%s" % term
+        )
+
  
  class ExistsExpression(QuantifiedExpression):
      def getQuantifier(self):
          return Tokens.EXISTS
  
+
  class AllExpression(QuantifiedExpression):
      def getQuantifier(self):
          return Tokens.ALL
  
  
-@python_2_unicode_compatible
  class NegatedExpression(Expression):
      def __init__(self, term):
          assert isinstance(term, Expression), "%s is not an Expression" % term
          self.term = term
  
      @property
-    def type(self): return TRUTH_TYPE
+    def type(self):
+        return TRUTH_TYPE
  
      def _set_type(self, other_type=ANY_TYPE, signature=None):
          """:see Expression._set_type()"""
@@ -1676,7 +1798,6 @@ class NegatedExpression(Expression):
          return Tokens.NOT + "%s" % self.term
  
  
-@python_2_unicode_compatible
  class BinaryExpression(Expression):
      def __init__(self, first, second):
          assert isinstance(first, Expression), "%s is not an Expression" % first
@@ -1685,7 +1806,8 @@ class BinaryExpression(Expression):
          self.second = second
  
      @property
-    def type(self): return TRUTH_TYPE
+    def type(self):
+        return TRUTH_TYPE
  
      def findtype(self, variable):
          """:see Expression.findtype()"""
@@ -1704,9 +1826,11 @@ class BinaryExpression(Expression):
          return combinator([function(self.first), function(self.second)])
  
      def __eq__(self, other):
-        return (isinstance(self, other.__class__) or \
-                isinstance(other, self.__class__)) and \
-               self.first == other.first and self.second == other.second
+        return (
+            (isinstance(self, other.__class__) or isinstance(other, self.__class__))
+            and self.first == other.first
+            and self.second == other.second
+        )
  
      def __ne__(self, other):
          return not self == other
@@ -1716,8 +1840,7 @@ class BinaryExpression(Expression):
      def __str__(self):
          first = self._str_subex(self.first)
          second = self._str_subex(self.second)
-        return Tokens.OPEN + first + ' ' + self.getOp() \
-                + ' ' + second + Tokens.CLOSE
+        return Tokens.OPEN + first + " " + self.getOp() + " " + second + Tokens.CLOSE
  
      def _str_subex(self, subex):
          return "%s" % subex
@@ -1736,8 +1859,10 @@ class BooleanExpression(BinaryExpression):
          self.first._set_type(TRUTH_TYPE, signature)
          self.second._set_type(TRUTH_TYPE, signature)
  
+
  class AndExpression(BooleanExpression):
      """This class represents conjunctions"""
+
      def getOp(self):
          return Tokens.AND
  
@@ -1747,8 +1872,10 @@ class AndExpression(BooleanExpression):
              return s[1:-1]
          return s
  
+
  class OrExpression(BooleanExpression):
      """This class represents disjunctions"""
+
      def getOp(self):
          return Tokens.OR
  
@@ -1758,19 +1885,24 @@ class OrExpression(BooleanExpression):
              return s[1:-1]
          return s
  
+
  class ImpExpression(BooleanExpression):
      """This class represents implications"""
+
      def getOp(self):
          return Tokens.IMP
  
+
  class IffExpression(BooleanExpression):
      """This class represents biconditionals"""
+
      def getOp(self):
          return Tokens.IFF
  
  
  class EqualityExpression(BinaryExpression):
      """This class represents equality expressions like "(x = y)"."""
+
      def _set_type(self, other_type=ANY_TYPE, signature=None):
          """:see Expression._set_type()"""
          assert isinstance(other_type, Type)
@@ -1789,29 +1921,36 @@ class EqualityExpression(BinaryExpression):
  
  ### Utilities
  
+
  class LogicalExpressionException(Exception):
      def __init__(self, index, message):
          self.index = index
          Exception.__init__(self, message)
  
+
  class UnexpectedTokenException(LogicalExpressionException):
      def __init__(self, index, unexpected=None, expected=None, message=None):
          if unexpected and expected:
-            msg = "Unexpected token: '%s'.  " \
-                  "Expected token '%s'." % (unexpected, expected)
+            msg = "Unexpected token: '%s'.  " "Expected token '%s'." % (
+                unexpected,
+                expected,
+            )
          elif unexpected:
              msg = "Unexpected token: '%s'." % unexpected
              if message:
-                msg += '  '+message
+                msg += "  " + message
          else:
              msg = "Expected token '%s'." % expected
          LogicalExpressionException.__init__(self, index, msg)
  
+
  class ExpectedMoreTokensException(LogicalExpressionException):
      def __init__(self, index, message=None):
          if not message:
-            message = 'More tokens expected.'
-        LogicalExpressionException.__init__(self, index, 'End of input found.  ' + message)
+            message = "More tokens expected."
+        LogicalExpressionException.__init__(
+            self, index, "End of input found.  " + message
+        )
  
  
  def is_indvar(expr):
@@ -1822,8 +1961,9 @@ def is_indvar(expr):
      :param expr: str
      :return: bool True if expr is of the correct form
      """
-    assert isinstance(expr, string_types), "%s is not a string" % expr
-    return re.match(r'^[a-df-z]\d*$', expr) is not None
+    assert isinstance(expr, str), "%s is not a string" % expr
+    return re.match(r"^[a-df-z]\d*$", expr) is not None
+
  
  def is_funcvar(expr):
      """
@@ -1833,8 +1973,9 @@ def is_funcvar(expr):
      :param expr: str
      :return: bool True if expr is of the correct form
      """
-    assert isinstance(expr, string_types), "%s is not a string" % expr
-    return re.match(r'^[A-Z]\d*$', expr) is not None
+    assert isinstance(expr, str), "%s is not a string" % expr
+    return re.match(r"^[A-Z]\d*$", expr) is not None
+
  
  def is_eventvar(expr):
      """
@@ -1844,57 +1985,59 @@ def is_eventvar(expr):
      :param expr: str
      :return: bool True if expr is of the correct form
      """
-    assert isinstance(expr, string_types), "%s is not a string" % expr
-    return re.match(r'^e\d*$', expr) is not None
+    assert isinstance(expr, str), "%s is not a string" % expr
+    return re.match(r"^e\d*$", expr) is not None
  
  
  def demo():
      lexpr = Expression.fromstring
-    print('='*20 + 'Test reader' + '='*20)
-    print(lexpr(r'john'))
-    print(lexpr(r'man(x)'))
-    print(lexpr(r'-man(x)'))
-    print(lexpr(r'(man(x) & tall(x) & walks(x))'))
-    print(lexpr(r'exists x.(man(x) & tall(x) & walks(x))'))
-    print(lexpr(r'\x.man(x)'))
-    print(lexpr(r'\x.man(x)(john)'))
-    print(lexpr(r'\x y.sees(x,y)'))
-    print(lexpr(r'\x y.sees(x,y)(a,b)'))
-    print(lexpr(r'(\x.exists y.walks(x,y))(x)'))
-    print(lexpr(r'exists x.x = y'))
-    print(lexpr(r'exists x.(x = y)'))
-    print(lexpr('P(x) & x=y & P(y)'))
-    print(lexpr(r'\P Q.exists x.(P(x) & Q(x))'))
-    print(lexpr(r'man(x) <-> tall(x)'))
-
-    print('='*20 + 'Test simplify' + '='*20)
-    print(lexpr(r'\x.\y.sees(x,y)(john)(mary)').simplify())
-    print(lexpr(r'\x.\y.sees(x,y)(john, mary)').simplify())
-    print(lexpr(r'all x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify())
-    print(lexpr(r'(\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x))(\x.bark(x))').simplify())
-
-    print('='*20 + 'Test alpha conversion and binder expression equality' + '='*20)
-    e1 = lexpr('exists x.P(x)')
+    print("=" * 20 + "Test reader" + "=" * 20)
+    print(lexpr(r"john"))
+    print(lexpr(r"man(x)"))
+    print(lexpr(r"-man(x)"))
+    print(lexpr(r"(man(x) & tall(x) & walks(x))"))
+    print(lexpr(r"exists x.(man(x) & tall(x) & walks(x))"))
+    print(lexpr(r"\x.man(x)"))
+    print(lexpr(r"\x.man(x)(john)"))
+    print(lexpr(r"\x y.sees(x,y)"))
+    print(lexpr(r"\x y.sees(x,y)(a,b)"))
+    print(lexpr(r"(\x.exists y.walks(x,y))(x)"))
+    print(lexpr(r"exists x.x = y"))
+    print(lexpr(r"exists x.(x = y)"))
+    print(lexpr("P(x) & x=y & P(y)"))
+    print(lexpr(r"\P Q.exists x.(P(x) & Q(x))"))
+    print(lexpr(r"man(x) <-> tall(x)"))
+
+    print("=" * 20 + "Test simplify" + "=" * 20)
+    print(lexpr(r"\x.\y.sees(x,y)(john)(mary)").simplify())
+    print(lexpr(r"\x.\y.sees(x,y)(john, mary)").simplify())
+    print(lexpr(r"all x.(man(x) & (\x.exists y.walks(x,y))(x))").simplify())
+    print(lexpr(r"(\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x))(\x.bark(x))").simplify())
+
+    print("=" * 20 + "Test alpha conversion and binder expression equality" + "=" * 20)
+    e1 = lexpr("exists x.P(x)")
      print(e1)
-    e2 = e1.alpha_convert(Variable('z'))
+    e2 = e1.alpha_convert(Variable("z"))
      print(e2)
      print(e1 == e2)
  
+
  def demo_errors():
-    print('='*20 + 'Test reader errors' + '='*20)
-    demoException('(P(x) & Q(x)')
-    demoException('((P(x) &) & Q(x))')
-    demoException('P(x) -> ')
-    demoException('P(x')
-    demoException('P(x,')
-    demoException('P(x,)')
-    demoException('exists')
-    demoException('exists x.')
-    demoException('\\')
-    demoException('\\ x y.')
-    demoException('P(x)Q(x)')
-    demoException('(P(x)Q(x)')
-    demoException('exists x -> y')
+    print("=" * 20 + "Test reader errors" + "=" * 20)
+    demoException("(P(x) & Q(x)")
+    demoException("((P(x) &) & Q(x))")
+    demoException("P(x) -> ")
+    demoException("P(x")
+    demoException("P(x,")
+    demoException("P(x,)")
+    demoException("exists")
+    demoException("exists x.")
+    demoException("\\")
+    demoException("\\ x y.")
+    demoException("P(x)Q(x)")
+    demoException("(P(x)Q(x)")
+    demoException("exists x -> y")
+
  
  def demoException(s):
      try:
@@ -1902,9 +2045,11 @@ def demoException(s):
      except LogicalExpressionException as e:
          print("%s: %s" % (e.__class__.__name__, e))
  
+
  def printtype(ex):
      print("%s : %s" % (ex.str(), ex.type))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
  #    demo_errors()
diff --git a/nlp_resource_data/nltk/sem/logic.pyc b/nlp_resource_data/nltk/sem/logic.pyc

deleted file mode 100755 (executable)

index e52d381..0000000

Binary files a/nlp_resource_data/nltk/sem/logic.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/relextract.py b/nlp_resource_data/nltk/sem/relextract.py

old mode 100755 (executable)

new mode 100644 (file)

index a54b5aa..1d1ec76
--- a/nlp_resource_data/nltk/sem/relextract.py
+++ b/nlp_resource_data/nltk/sem/relextract.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Relation Extraction
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -19,27 +19,45 @@ The two serialization outputs are "rtuple" and "clause".
  - A clause is an atom of the form ``relsym(subjsym, objsym)``,
    where the relation, subject and object have been canonicalized to single strings.
  """
-from __future__ import print_function
  
  # todo: get a more general solution to canonicalized symbols for clauses -- maybe use xmlcharrefs?
  
  from collections import defaultdict
+import html
  import re
  
-from six.moves import html_entities
-
  # Dictionary that associates corpora with NE classes
  NE_CLASSES = {
-    'ieer': ['LOCATION', 'ORGANIZATION', 'PERSON', 'DURATION',
-            'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE'],
-    'conll2002': ['LOC', 'PER', 'ORG'],
-    'ace': ['LOCATION', 'ORGANIZATION', 'PERSON', 'DURATION',
-            'DATE', 'CARDINAL', 'PERCENT', 'MONEY', 'MEASURE', 'FACILITY', 'GPE'],
-    }
+    "ieer": [
+        "LOCATION",
+        "ORGANIZATION",
+        "PERSON",
+        "DURATION",
+        "DATE",
+        "CARDINAL",
+        "PERCENT",
+        "MONEY",
+        "MEASURE",
+    ],
+    "conll2002": ["LOC", "PER", "ORG"],
+    "ace": [
+        "LOCATION",
+        "ORGANIZATION",
+        "PERSON",
+        "DURATION",
+        "DATE",
+        "CARDINAL",
+        "PERCENT",
+        "MONEY",
+        "MEASURE",
+        "FACILITY",
+        "GPE",
+    ],
+}
  
  # Allow abbreviated class labels
-short2long = dict(LOC = 'LOCATION', ORG = 'ORGANIZATION', PER = 'PERSON')
-long2short = dict(LOCATION ='LOC', ORGANIZATION = 'ORG', PERSON = 'PER')
+short2long = dict(LOC="LOCATION", ORG="ORGANIZATION", PER="PERSON")
+long2short = dict(LOCATION="LOC", ORGANIZATION="ORG", PERSON="PER")
  
  
  def _expand(type):
@@ -53,6 +71,7 @@ def _expand(type):
      except KeyError:
          return type
  
+
  def class_abbrev(type):
      """
      Abbreviate an NE class name.
@@ -65,7 +84,7 @@ def class_abbrev(type):
          return type
  
  
-def _join(lst, sep=' ', untag=False):
+def _join(lst, sep=" ", untag=False):
      """
      Join a list into a string, turning tags tuples into tag strings or just words.
      :param untag: if ``True``, omit the tag from tagged input strings.
@@ -78,26 +97,23 @@ def _join(lst, sep=' ', untag=False):
          if untag:
              return sep.join(tup[0] for tup in lst)
          from nltk.tag import tuple2str
+
          return sep.join(tuple2str(tup) for tup in lst)
  
-def descape_entity(m, defs=html_entities.entitydefs):
+
+def descape_entity(m, defs=html.entities.entitydefs):
      """
      Translate one entity to its ISO Latin value.
      Inspired by example from effbot.org
  
  
      """
-    #s = 'mcglashan_&amp;_sarrail'
-    #l = ['mcglashan', '&amp;', 'sarrail']
-    #pattern = re.compile("&(\w+?);")
-    #new = list2sym(l)
-    #s = pattern.sub(descape_entity, s)
-    #print s, new
      try:
          return defs[m.group(1)]
  
      except KeyError:
-        return m.group(0) # use as is
+        return m.group(0)  # use as is
+
  
  def list2sym(lst):
      """
@@ -106,13 +122,14 @@ def list2sym(lst):
      :return: a Unicode string without whitespace
      :rtype: unicode
      """
-    sym = _join(lst, '_', untag=True)
+    sym = _join(lst, "_", untag=True)
      sym = sym.lower()
      ENT = re.compile("&(\w+?);")
      sym = ENT.sub(descape_entity, sym)
-    sym = sym.replace('.', '')
+    sym = sym.replace(".", "")
      return sym
  
+
  def tree2semi_rel(tree):
      """
      Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
@@ -158,23 +175,31 @@ def semi_rel2reldict(pairs, window=5, trace=False):
      result = []
      while len(pairs) > 2:
          reldict = defaultdict(str)
-        reldict['lcon'] = _join(pairs[0][0][-window:])
-        reldict['subjclass'] = pairs[0][1].label()
-        reldict['subjtext'] = _join(pairs[0][1].leaves())
-        reldict['subjsym'] = list2sym(pairs[0][1].leaves())
-        reldict['filler'] = _join(pairs[1][0])
-        reldict['untagged_filler'] = _join(pairs[1][0], untag=True)
-        reldict['objclass'] = pairs[1][1].label()
-        reldict['objtext'] = _join(pairs[1][1].leaves())
-        reldict['objsym'] = list2sym(pairs[1][1].leaves())
-        reldict['rcon'] = _join(pairs[2][0][:window])
+        reldict["lcon"] = _join(pairs[0][0][-window:])
+        reldict["subjclass"] = pairs[0][1].label()
+        reldict["subjtext"] = _join(pairs[0][1].leaves())
+        reldict["subjsym"] = list2sym(pairs[0][1].leaves())
+        reldict["filler"] = _join(pairs[1][0])
+        reldict["untagged_filler"] = _join(pairs[1][0], untag=True)
+        reldict["objclass"] = pairs[1][1].label()
+        reldict["objtext"] = _join(pairs[1][1].leaves())
+        reldict["objsym"] = list2sym(pairs[1][1].leaves())
+        reldict["rcon"] = _join(pairs[2][0][:window])
          if trace:
-            print("(%s(%s, %s)" % (reldict['untagged_filler'], reldict['subjclass'], reldict['objclass']))
+            print(
+                "(%s(%s, %s)"
+                % (
+                    reldict["untagged_filler"],
+                    reldict["subjclass"],
+                    reldict["objclass"],
+                )
+            )
          result.append(reldict)
          pairs = pairs[1:]
      return result
  
-def extract_rels(subjclass, objclass, doc, corpus='ace', pattern=None, window=10):
+
+def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10):
      """
      Filter the output of ``semi_rel2reldict`` according to specified NE classes and a filler pattern.
  
@@ -204,26 +229,33 @@ def extract_rels(subjclass, objclass, doc, corpus='ace', pattern=None, window=10
          if _expand(subjclass) in NE_CLASSES[corpus]:
              subjclass = _expand(subjclass)
          else:
-            raise ValueError("your value for the subject type has not been recognized: %s" % subjclass)
+            raise ValueError(
+                "your value for the subject type has not been recognized: %s"
+                % subjclass
+            )
      if objclass and objclass not in NE_CLASSES[corpus]:
          if _expand(objclass) in NE_CLASSES[corpus]:
              objclass = _expand(objclass)
          else:
-            raise ValueError("your value for the object type has not been recognized: %s" % objclass)
+            raise ValueError(
+                "your value for the object type has not been recognized: %s" % objclass
+            )
  
-    if corpus == 'ace' or corpus == 'conll2002':
+    if corpus == "ace" or corpus == "conll2002":
          pairs = tree2semi_rel(doc)
-    elif corpus == 'ieer':
+    elif corpus == "ieer":
          pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline)
      else:
          raise ValueError("corpus type not recognized")
  
      reldicts = semi_rel2reldict(pairs)
  
-    relfilter = lambda x: (x['subjclass'] == subjclass and
-                           len(x['filler'].split()) <= window and
-                           pattern.match(x['filler']) and
-                           x['objclass'] == objclass)
+    relfilter = lambda x: (
+        x["subjclass"] == subjclass
+        and len(x["filler"].split()) <= window
+        and pattern.match(x["filler"])
+        and x["objclass"] == objclass
+    )
  
      return list(filter(relfilter, reldicts))
  
@@ -234,17 +266,24 @@ def rtuple(reldict, lcon=False, rcon=False):
      :param reldict: a relation dictionary
      :type reldict: defaultdict
      """
-    items = [class_abbrev(reldict['subjclass']), reldict['subjtext'], reldict['filler'], class_abbrev(reldict['objclass']), reldict['objtext']]
-    format = '[%s: %r] %r [%s: %r]'
+    items = [
+        class_abbrev(reldict["subjclass"]),
+        reldict["subjtext"],
+        reldict["filler"],
+        class_abbrev(reldict["objclass"]),
+        reldict["objtext"],
+    ]
+    format = "[%s: %r] %r [%s: %r]"
      if lcon:
-        items = [reldict['lcon']] + items
-        format = '...%r)' + format
+        items = [reldict["lcon"]] + items
+        format = "...%r)" + format
      if rcon:
-        items.append(reldict['rcon'])
-        format = format + '(%r...'
+        items.append(reldict["rcon"])
+        format = format + "(%r..."
      printargs = tuple(items)
      return format % printargs
  
+
  def clause(reldict, relsym):
      """
      Print the relation in clausal form.
@@ -253,7 +292,7 @@ def clause(reldict, relsym):
      :param relsym: a label for the relation
      :type relsym: str
      """
-    items = (relsym, reldict['subjsym'], reldict['objsym'])
+    items = (relsym, reldict["subjsym"], reldict["objsym"])
      return "%s(%r, %r)" % items
  
  
@@ -274,20 +313,24 @@ def in_demo(trace=0, sql=True):
      query.
      """
      from nltk.corpus import ieer
+
      if sql:
          try:
              import sqlite3
-            connection =  sqlite3.connect(":memory:")
+
+            connection = sqlite3.connect(":memory:")
              connection.text_factory = sqlite3.OptimizedUnicode
              cur = connection.cursor()
-            cur.execute("""create table Locations
-            (OrgName text, LocationName text, DocID text)""")
+            cur.execute(
+                """create table Locations
+            (OrgName text, LocationName text, DocID text)"""
+            )
          except ImportError:
              import warnings
-            warnings.warn("Cannot import sqlite; sql flag will be ignored.")
  
+            warnings.warn("Cannot import sqlite; sql flag will be ignored.")
  
-    IN = re.compile(r'.*\bin\b(?!\b.+ing)')
+    IN = re.compile(r".*\bin\b(?!\b.+ing)")
  
      print()
      print("IEER: in(ORG, LOC) -- just the clauses:")
@@ -298,21 +341,26 @@ def in_demo(trace=0, sql=True):
              if trace:
                  print(doc.docno)
                  print("=" * 15)
-            for rel in extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern=IN):
-                print(clause(rel, relsym='IN'))
+            for rel in extract_rels("ORG", "LOC", doc, corpus="ieer", pattern=IN):
+                print(clause(rel, relsym="IN"))
                  if sql:
                      try:
-                        rtuple = (rel['subjtext'], rel['objtext'], doc.docno)
-                        cur.execute("""insert into Locations
-                                    values (?, ?, ?)""", rtuple)
+                        rtuple = (rel["subjtext"], rel["objtext"], doc.docno)
+                        cur.execute(
+                            """insert into Locations
+                                    values (?, ?, ?)""",
+                            rtuple,
+                        )
                          connection.commit()
                      except NameError:
                          pass
  
      if sql:
          try:
-            cur.execute("""select OrgName from Locations
-                        where LocationName = 'Atlanta'""")
+            cur.execute(
+                """select OrgName from Locations
+                        where LocationName = 'Atlanta'"""
+            )
              print()
              print("Extract data from SQL table: ORGs in Atlanta")
              print("-" * 15)
@@ -326,8 +374,10 @@ def in_demo(trace=0, sql=True):
  # Example of has_role(PER, LOC)
  ############################################
  
+
  def roles_demo(trace=0):
      from nltk.corpus import ieer
+
      roles = """
      (.*(                   # assorted roles
      analyst|
@@ -367,7 +417,7 @@ def roles_demo(trace=0):
                  print(doc.docno)
                  print("=" * 15)
                  lcon = rcon = True
-            for rel in extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES):
+            for rel in extract_rels("PER", "ORG", doc, corpus="ieer", pattern=ROLES):
                  print(rtuple(rel, lcon=lcon, rcon=rcon))
  
  
@@ -384,17 +434,21 @@ def ieer_headlines():
      print("IEER: First 20 Headlines")
      print("=" * 45)
  
-    trees = [(doc.docno, doc.headline) for file in ieer.fileids() for doc in ieer.parsed_docs(file)]
+    trees = [
+        (doc.docno, doc.headline)
+        for file in ieer.fileids()
+        for doc in ieer.parsed_docs(file)
+    ]
      for tree in trees[:20]:
          print()
          print("%s:\n%s" % tree)
  
  
-
  #############################################
  ## Dutch CONLL2002: take_on_role(PER, ORG
  #############################################
  
+
  def conllned(trace=1):
      """
      Find the copula+'van' relation ('of') in the Dutch tagged training corpus
@@ -419,18 +473,21 @@ def conllned(trace=1):
      print("Dutch CoNLL2002: van(PER, ORG) -- raw rtuples with context:")
      print("=" * 45)
  
-
-    for doc in conll2002.chunked_sents('ned.train'):
+    for doc in conll2002.chunked_sents("ned.train"):
          lcon = rcon = False
          if trace:
-                lcon = rcon = True
-        for rel in extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN, window=10):
+            lcon = rcon = True
+        for rel in extract_rels(
+            "PER", "ORG", doc, corpus="conll2002", pattern=VAN, window=10
+        ):
              print(rtuple(rel, lcon=lcon, rcon=rcon))
  
+
  #############################################
  ## Spanish CONLL2002: (PER, ORG)
  #############################################
  
+
  def conllesp():
      from nltk.corpus import conll2002
  
@@ -446,9 +503,13 @@ def conllesp():
      print()
      print("Spanish CoNLL2002: de(ORG, LOC) -- just the first 10 clauses:")
      print("=" * 45)
-    rels = [rel for doc in conll2002.chunked_sents('esp.train')
-            for rel in extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
-    for r in rels[:10]: print(clause(r, relsym='DE'))
+    rels = [
+        rel
+        for doc in conll2002.chunked_sents("esp.train")
+        for rel in extract_rels("ORG", "LOC", doc, corpus="conll2002", pattern=DE)
+    ]
+    for r in rels[:10]:
+        print(clause(r, relsym="DE"))
      print()
  
  
@@ -456,18 +517,21 @@ def ne_chunked():
      print()
      print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker")
      print("=" * 45)
-    ROLE = re.compile(r'.*(chairman|president|trader|scientist|economist|analyst|partner).*')
+    ROLE = re.compile(
+        r".*(chairman|president|trader|scientist|economist|analyst|partner).*"
+    )
      rels = []
      for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]):
          sent = nltk.ne_chunk(sent)
-        rels = extract_rels('PER', 'ORG', sent, corpus='ace', pattern=ROLE, window=7)
+        rels = extract_rels("PER", "ORG", sent, corpus="ace", pattern=ROLE, window=7)
          for rel in rels:
-            print('{0:<5}{1}'.format(i, rtuple(rel)))
+            print("{0:<5}{1}".format(i, rtuple(rel)))
  
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      import nltk
      from nltk.sem import relextract
+
      in_demo(trace=0)
      roles_demo(trace=0)
      conllned()
diff --git a/nlp_resource_data/nltk/sem/relextract.pyc b/nlp_resource_data/nltk/sem/relextract.pyc

deleted file mode 100755 (executable)

index 98fa9b2..0000000

Binary files a/nlp_resource_data/nltk/sem/relextract.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/skolemize.py b/nlp_resource_data/nltk/sem/skolemize.py

old mode 100755 (executable)

new mode 100644 (file)

index 1c5c03f..3070480
--- a/nlp_resource_data/nltk/sem/skolemize.py
+++ b/nlp_resource_data/nltk/sem/skolemize.py
@@ -2,14 +2,25 @@
  #
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from nltk.sem.logic import (AllExpression, AndExpression, ApplicationExpression,
-                            EqualityExpression, ExistsExpression, IffExpression,
-                            ImpExpression, NegatedExpression, OrExpression,
-                            VariableExpression, skolem_function, unique_variable)
+from nltk.sem.logic import (
+    AllExpression,
+    AndExpression,
+    ApplicationExpression,
+    EqualityExpression,
+    ExistsExpression,
+    IffExpression,
+    ImpExpression,
+    NegatedExpression,
+    OrExpression,
+    VariableExpression,
+    skolem_function,
+    unique_variable,
+)
+
  
  def skolemize(expression, univ_scope=None, used_variables=None):
      """
@@ -21,60 +32,95 @@ def skolemize(expression, univ_scope=None, used_variables=None):
          used_variables = set()
  
      if isinstance(expression, AllExpression):
-        term = skolemize(expression.term, univ_scope|set([expression.variable]), used_variables|set([expression.variable]))
-        return term.replace(expression.variable, VariableExpression(unique_variable(ignore=used_variables)))
+        term = skolemize(
+            expression.term,
+            univ_scope | set([expression.variable]),
+            used_variables | set([expression.variable]),
+        )
+        return term.replace(
+            expression.variable,
+            VariableExpression(unique_variable(ignore=used_variables)),
+        )
      elif isinstance(expression, AndExpression):
-        return skolemize(expression.first, univ_scope, used_variables) &\
-               skolemize(expression.second, univ_scope, used_variables)
+        return skolemize(expression.first, univ_scope, used_variables) & skolemize(
+            expression.second, univ_scope, used_variables
+        )
      elif isinstance(expression, OrExpression):
-        return to_cnf(skolemize(expression.first, univ_scope, used_variables),
-                      skolemize(expression.second, univ_scope, used_variables))
+        return to_cnf(
+            skolemize(expression.first, univ_scope, used_variables),
+            skolemize(expression.second, univ_scope, used_variables),
+        )
      elif isinstance(expression, ImpExpression):
-        return to_cnf(skolemize(-expression.first, univ_scope, used_variables),
-                      skolemize(expression.second, univ_scope, used_variables))
+        return to_cnf(
+            skolemize(-expression.first, univ_scope, used_variables),
+            skolemize(expression.second, univ_scope, used_variables),
+        )
      elif isinstance(expression, IffExpression):
-        return to_cnf(skolemize(-expression.first, univ_scope, used_variables),
-                      skolemize(expression.second, univ_scope, used_variables)) &\
-               to_cnf(skolemize(expression.first, univ_scope, used_variables),
-                      skolemize(-expression.second, univ_scope, used_variables))
+        return to_cnf(
+            skolemize(-expression.first, univ_scope, used_variables),
+            skolemize(expression.second, univ_scope, used_variables),
+        ) & to_cnf(
+            skolemize(expression.first, univ_scope, used_variables),
+            skolemize(-expression.second, univ_scope, used_variables),
+        )
      elif isinstance(expression, EqualityExpression):
          return expression
      elif isinstance(expression, NegatedExpression):
          negated = expression.term
          if isinstance(negated, AllExpression):
-            term = skolemize(-negated.term, univ_scope, used_variables|set([negated.variable]))
+            term = skolemize(
+                -negated.term, univ_scope, used_variables | set([negated.variable])
+            )
              if univ_scope:
                  return term.replace(negated.variable, skolem_function(univ_scope))
              else:
-                skolem_constant = VariableExpression(unique_variable(ignore=used_variables))
+                skolem_constant = VariableExpression(
+                    unique_variable(ignore=used_variables)
+                )
                  return term.replace(negated.variable, skolem_constant)
          elif isinstance(negated, AndExpression):
-            return to_cnf(skolemize(-negated.first, univ_scope, used_variables),
-                          skolemize(-negated.second, univ_scope, used_variables))
+            return to_cnf(
+                skolemize(-negated.first, univ_scope, used_variables),
+                skolemize(-negated.second, univ_scope, used_variables),
+            )
          elif isinstance(negated, OrExpression):
-            return skolemize(-negated.first, univ_scope, used_variables) &\
-                   skolemize(-negated.second, univ_scope, used_variables)
+            return skolemize(-negated.first, univ_scope, used_variables) & skolemize(
+                -negated.second, univ_scope, used_variables
+            )
          elif isinstance(negated, ImpExpression):
-            return skolemize(negated.first, univ_scope, used_variables) &\
-                   skolemize(-negated.second, univ_scope, used_variables)
+            return skolemize(negated.first, univ_scope, used_variables) & skolemize(
+                -negated.second, univ_scope, used_variables
+            )
          elif isinstance(negated, IffExpression):
-            return to_cnf(skolemize(-negated.first, univ_scope, used_variables),
-                          skolemize(-negated.second, univ_scope, used_variables)) &\
-                   to_cnf(skolemize(negated.first, univ_scope, used_variables),
-                          skolemize(negated.second, univ_scope, used_variables))
+            return to_cnf(
+                skolemize(-negated.first, univ_scope, used_variables),
+                skolemize(-negated.second, univ_scope, used_variables),
+            ) & to_cnf(
+                skolemize(negated.first, univ_scope, used_variables),
+                skolemize(negated.second, univ_scope, used_variables),
+            )
          elif isinstance(negated, EqualityExpression):
              return expression
          elif isinstance(negated, NegatedExpression):
              return skolemize(negated.term, univ_scope, used_variables)
          elif isinstance(negated, ExistsExpression):
-            term = skolemize(-negated.term, univ_scope|set([negated.variable]), used_variables|set([negated.variable]))
-            return term.replace(negated.variable, VariableExpression(unique_variable(ignore=used_variables)))
+            term = skolemize(
+                -negated.term,
+                univ_scope | set([negated.variable]),
+                used_variables | set([negated.variable]),
+            )
+            return term.replace(
+                negated.variable,
+                VariableExpression(unique_variable(ignore=used_variables)),
+            )
          elif isinstance(negated, ApplicationExpression):
              return expression
          else:
-            raise Exception('\'%s\' cannot be skolemized' % expression)
+            raise Exception("'%s' cannot be skolemized" % expression)
      elif isinstance(expression, ExistsExpression):
-        term = skolemize(expression.term, univ_scope, used_variables|set([expression.variable]))
+        term = skolemize(
+            expression.term, univ_scope, used_variables | set([expression.variable])
+        )
          if univ_scope:
              return term.replace(expression.variable, skolem_function(univ_scope))
          else:
@@ -83,7 +129,8 @@ def skolemize(expression, univ_scope=None, used_variables=None):
      elif isinstance(expression, ApplicationExpression):
          return expression
      else:
-        raise Exception('\'%s\' cannot be skolemized' % expression)
+        raise Exception("'%s' cannot be skolemized" % expression)
+
  
  def to_cnf(first, second):
      """
diff --git a/nlp_resource_data/nltk/sem/skolemize.pyc b/nlp_resource_data/nltk/sem/skolemize.pyc

deleted file mode 100755 (executable)

index 04bcb22..0000000

Binary files a/nlp_resource_data/nltk/sem/skolemize.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sem/util.py b/nlp_resource_data/nltk/sem/util.py

old mode 100755 (executable)

new mode 100644 (file)

index edfcb0f..a36442b
--- a/nlp_resource_data/nltk/sem/util.py
+++ b/nlp_resource_data/nltk/sem/util.py
@@ -2,7 +2,7 @@
  #
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
@@ -12,7 +12,6 @@ extraction of the semantic representation of the root node of the the
  syntax tree, followed by evaluation of the semantic representation in
  a first-order model.
  """
-from __future__ import print_function, unicode_literals
  
  import codecs
  from nltk.sem import evaluate
@@ -22,6 +21,7 @@ from nltk.sem import evaluate
  ## Utility functions for connecting parse output to semantics
  ##############################################################
  
+
  def parse_sents(inputs, grammar, trace=0):
      """
      Convert input sentences into syntactic trees.
@@ -43,12 +43,13 @@ def parse_sents(inputs, grammar, trace=0):
          cp = load_parser(grammar, trace=trace)
      parses = []
      for sent in inputs:
-        tokens = sent.split() # use a tokenizer?
+        tokens = sent.split()  # use a tokenizer?
          syntrees = list(cp.parse(tokens))
          parses.append(syntrees)
      return parses
  
-def root_semrep(syntree, semkey='SEM'):
+
+def root_semrep(syntree, semkey="SEM"):
      """
      Find the semantic representation at the root of a tree.
  
@@ -64,11 +65,12 @@ def root_semrep(syntree, semkey='SEM'):
      try:
          return node[semkey]
      except KeyError:
-        print(node, end=' ')
+        print(node, end=" ")
          print("has no specification for the feature %s" % semkey)
      raise
  
-def interpret_sents(inputs, grammar, semkey='SEM', trace=0):
+
+def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
      """
      Add the semantic representation to each syntactic parse tree
      of each input sentence.
@@ -80,8 +82,11 @@ def interpret_sents(inputs, grammar, semkey='SEM', trace=0):
      :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)
      :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))
      """
-    return [[(syn, root_semrep(syn, semkey)) for syn in syntrees]
-            for syntrees in parse_sents(inputs, grammar, trace=trace)]
+    return [
+        [(syn, root_semrep(syn, semkey)) for syn in syntrees]
+        for syntrees in parse_sents(inputs, grammar, trace=trace)
+    ]
+
  
  def evaluate_sents(inputs, grammar, model, assignment, trace=0):
      """
@@ -95,49 +100,58 @@ def evaluate_sents(inputs, grammar, model, assignment, trace=0):
      :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)
      :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))
      """
-    return [[(syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
-            for (syn, sem) in interpretations]
-            for interpretations in interpret_sents(inputs, grammar)]
+    return [
+        [
+            (syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
+            for (syn, sem) in interpretations
+        ]
+        for interpretations in interpret_sents(inputs, grammar)
+    ]
  
  
  def demo_model0():
      global m0, g0
-    #Initialize a valuation of non-logical constants."""
-    v = [('john', 'b1'),
-        ('mary', 'g1'),
-        ('suzie', 'g2'),
-        ('fido', 'd1'),
-        ('tess', 'd2'),
-        ('noosa', 'n'),
-        ('girl', set(['g1', 'g2'])),
-        ('boy', set(['b1', 'b2'])),
-        ('dog', set(['d1', 'd2'])),
-        ('bark', set(['d1', 'd2'])),
-        ('walk', set(['b1', 'g2', 'd1'])),
-        ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])),
-        ('see', set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'),('d2', 'b1'), ('g2', 'n')])),
-        ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])),
-        ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')]))
-     ]
-    #Read in the data from ``v``
+    # Initialize a valuation of non-logical constants."""
+    v = [
+        ("john", "b1"),
+        ("mary", "g1"),
+        ("suzie", "g2"),
+        ("fido", "d1"),
+        ("tess", "d2"),
+        ("noosa", "n"),
+        ("girl", set(["g1", "g2"])),
+        ("boy", set(["b1", "b2"])),
+        ("dog", set(["d1", "d2"])),
+        ("bark", set(["d1", "d2"])),
+        ("walk", set(["b1", "g2", "d1"])),
+        ("chase", set([("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")])),
+        (
+            "see",
+            set([("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")]),
+        ),
+        ("in", set([("b1", "n"), ("b2", "n"), ("d2", "n")])),
+        ("with", set([("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")])),
+    ]
+    # Read in the data from ``v``
      val = evaluate.Valuation(v)
-    #Bind ``dom`` to the ``domain`` property of ``val``
+    # Bind ``dom`` to the ``domain`` property of ``val``
      dom = val.domain
-    #Initialize a model with parameters ``dom`` and ``val``.
+    # Initialize a model with parameters ``dom`` and ``val``.
      m0 = evaluate.Model(dom, val)
-    #Initialize a variable assignment with parameter ``dom``
+    # Initialize a variable assignment with parameter ``dom``
      g0 = evaluate.Assignment(dom)
  
  
-def read_sents(filename, encoding='utf8'):
-    with codecs.open(filename, 'r', encoding) as fp:
+def read_sents(filename, encoding="utf8"):
+    with codecs.open(filename, "r", encoding) as fp:
          sents = [l.rstrip() for l in fp]
  
      # get rid of blank lines
      sents = [l for l in sents if len(l) > 0]
-    sents = [l for l in sents if not l[0] == '#']
+    sents = [l for l in sents if not l[0] == "#"]
      return sents
  
+
  def demo_legacy_grammar():
      """
      Check that interpret_sents() is compatible with legacy grammars that use
@@ -148,62 +162,109 @@ def demo_legacy_grammar():
      """
      from nltk.grammar import FeatureGrammar
  
-    g = FeatureGrammar.fromstring("""
+    g = FeatureGrammar.fromstring(
+        """
      % start S
      S[sem=<hello>] -> 'hello'
-    """)
+    """
+    )
      print("Reading grammar: %s" % g)
      print("*" * 20)
-    for reading in interpret_sents(['hello'], g, semkey='sem'):
+    for reading in interpret_sents(["hello"], g, semkey="sem"):
          syn, sem = reading[0]
          print()
          print("output: ", sem)
  
+
  def demo():
      import sys
      from optparse import OptionParser
-    description = \
-    """
+
+    description = """
      Parse and evaluate some sentences.
      """
  
      opts = OptionParser(description=description)
  
-    opts.set_defaults(evaluate=True, beta=True, syntrace=0,
-                      semtrace=0, demo='default', grammar='', sentences='')
-
-    opts.add_option("-d", "--demo", dest="demo",
-                    help="choose demo D; omit this for the default demo, or specify 'chat80'", metavar="D")
-    opts.add_option("-g", "--gram", dest="grammar",
-                    help="read in grammar G", metavar="G")
-    opts.add_option("-m", "--model", dest="model",
-                        help="import model M (omit '.py' suffix)", metavar="M")
-    opts.add_option("-s", "--sentences", dest="sentences",
-                        help="read in a file of test sentences S", metavar="S")
-    opts.add_option("-e", "--no-eval", action="store_false", dest="evaluate",
-                    help="just do a syntactic analysis")
-    opts.add_option("-b", "--no-beta-reduction", action="store_false",
-                    dest="beta", help="don't carry out beta-reduction")
-    opts.add_option("-t", "--syntrace", action="count", dest="syntrace",
-                    help="set syntactic tracing on; requires '-e' option")
-    opts.add_option("-T", "--semtrace", action="count", dest="semtrace",
-                    help="set semantic tracing on")
+    opts.set_defaults(
+        evaluate=True,
+        beta=True,
+        syntrace=0,
+        semtrace=0,
+        demo="default",
+        grammar="",
+        sentences="",
+    )
+
+    opts.add_option(
+        "-d",
+        "--demo",
+        dest="demo",
+        help="choose demo D; omit this for the default demo, or specify 'chat80'",
+        metavar="D",
+    )
+    opts.add_option(
+        "-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
+    )
+    opts.add_option(
+        "-m",
+        "--model",
+        dest="model",
+        help="import model M (omit '.py' suffix)",
+        metavar="M",
+    )
+    opts.add_option(
+        "-s",
+        "--sentences",
+        dest="sentences",
+        help="read in a file of test sentences S",
+        metavar="S",
+    )
+    opts.add_option(
+        "-e",
+        "--no-eval",
+        action="store_false",
+        dest="evaluate",
+        help="just do a syntactic analysis",
+    )
+    opts.add_option(
+        "-b",
+        "--no-beta-reduction",
+        action="store_false",
+        dest="beta",
+        help="don't carry out beta-reduction",
+    )
+    opts.add_option(
+        "-t",
+        "--syntrace",
+        action="count",
+        dest="syntrace",
+        help="set syntactic tracing on; requires '-e' option",
+    )
+    opts.add_option(
+        "-T",
+        "--semtrace",
+        action="count",
+        dest="semtrace",
+        help="set semantic tracing on",
+    )
  
      (options, args) = opts.parse_args()
  
-    SPACER = '-' * 30
+    SPACER = "-" * 30
  
      demo_model0()
  
      sents = [
-    'Fido sees a boy with Mary',
-    'John sees Mary',
-    'every girl chases a dog',
-    'every boy chases a girl',
-    'John walks with a girl in Noosa',
-    'who walks']
+        "Fido sees a boy with Mary",
+        "John sees Mary",
+        "every girl chases a dog",
+        "every boy chases a girl",
+        "John walks with a girl in Noosa",
+        "who walks",
+    ]
  
-    gramfile = 'grammars/sample_grammars/sem2.fcfg'
+    gramfile = "grammars/sample_grammars/sem2.fcfg"
  
      if options.sentences:
          sentsfile = options.sentences
@@ -220,30 +281,29 @@ def demo():
      g = g0
  
      if options.evaluate:
-        evaluations = \
-            evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
+        evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
      else:
-        semreps = \
-            interpret_sents(sents, gramfile, trace=options.syntrace)
+        semreps = interpret_sents(sents, gramfile, trace=options.syntrace)
  
      for i, sent in enumerate(sents):
          n = 1
-        print('\nSentence: %s' % sent)
+        print("\nSentence: %s" % sent)
          print(SPACER)
          if options.evaluate:
  
              for (syntree, semrep, value) in evaluations[i]:
                  if isinstance(value, dict):
                      value = set(value.keys())
-                print('%d:  %s' % (n, semrep))
+                print("%d:  %s" % (n, semrep))
                  print(value)
                  n += 1
          else:
  
              for (syntree, semrep) in semreps[i]:
-                print('%d:  %s' % (n, semrep))
+                print("%d:  %s" % (n, semrep))
                  n += 1
  
+
  if __name__ == "__main__":
      demo()
      demo_legacy_grammar()
diff --git a/nlp_resource_data/nltk/sem/util.pyc b/nlp_resource_data/nltk/sem/util.pyc

deleted file mode 100755 (executable)

index 76eedc9..0000000

Binary files a/nlp_resource_data/nltk/sem/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sentiment/__init__.py b/nlp_resource_data/nltk/sentiment/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 6f879bf..f31e472
--- a/nlp_resource_data/nltk/sentiment/__init__.py
+++ b/nlp_resource_data/nltk/sentiment/__init__.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Sentiment Analysis
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
diff --git a/nlp_resource_data/nltk/sentiment/__init__.pyc b/nlp_resource_data/nltk/sentiment/__init__.pyc

deleted file mode 100755 (executable)

index 3bf1123..0000000

Binary files a/nlp_resource_data/nltk/sentiment/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sentiment/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/sentiment/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c945f36

Binary files /dev/null and b/nlp_resource_data/nltk/sentiment/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sentiment/__pycache__/sentiment_analyzer.cpython-37.pyc b/nlp_resource_data/nltk/sentiment/__pycache__/sentiment_analyzer.cpython-37.pyc

new file mode 100644 (file)

index 0000000..dbf8c53

Binary files /dev/null and b/nlp_resource_data/nltk/sentiment/__pycache__/sentiment_analyzer.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sentiment/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/sentiment/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5500f3a

Binary files /dev/null and b/nlp_resource_data/nltk/sentiment/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sentiment/__pycache__/vader.cpython-37.pyc b/nlp_resource_data/nltk/sentiment/__pycache__/vader.cpython-37.pyc

new file mode 100644 (file)

index 0000000..281428c

Binary files /dev/null and b/nlp_resource_data/nltk/sentiment/__pycache__/vader.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py b/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py

old mode 100755 (executable)

new mode 100644 (file)

index 4fd18fb..9befdd8
--- a/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py
+++ b/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py
@@ -2,7 +2,7 @@
  #
  # Natural Language Toolkit: Sentiment Analyzer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -13,22 +13,26 @@ using NLTK features and classifiers, especially for teaching and demonstrative
  purposes.
  """
  
-from __future__ import print_function
+import sys
  from collections import defaultdict
  
  from nltk.classify.util import apply_features, accuracy as eval_accuracy
  from nltk.collocations import BigramCollocationFinder
-from nltk.metrics import (BigramAssocMeasures, precision as eval_precision,
-    recall as eval_recall, f_measure as eval_f_measure)
+from nltk.metrics import (
+    BigramAssocMeasures,
+    precision as eval_precision,
+    recall as eval_recall,
+    f_measure as eval_f_measure,
+)
  
  from nltk.probability import FreqDist
  
-from nltk.sentiment.util import save_file, timer
  
  class SentimentAnalyzer(object):
      """
      A Sentiment Analysis tool based on machine learning approaches.
      """
+
      def __init__(self, classifier=None):
          self.feat_extractors = defaultdict(list)
          self.classifier = classifier
@@ -82,11 +86,15 @@ class SentimentAnalyzer(object):
          """
          # Stopwords are not removed
          unigram_feats_freqs = FreqDist(word for word in words)
-        return [w for w, f in unigram_feats_freqs.most_common(top_n)
-                if unigram_feats_freqs[w] > min_freq]
+        return [
+            w
+            for w, f in unigram_feats_freqs.most_common(top_n)
+            if unigram_feats_freqs[w] > min_freq
+        ]
  
-    def bigram_collocation_feats(self, documents, top_n=None, min_freq=3,
-                                 assoc_measure=BigramAssocMeasures.pmi):
+    def bigram_collocation_feats(
+        self, documents, top_n=None, min_freq=3, assoc_measure=BigramAssocMeasures.pmi
+    ):
          """
          Return `top_n` bigram features (using `assoc_measure`).
          Note that this method is based on bigram collocations measures, and not
@@ -167,17 +175,34 @@ class SentimentAnalyzer(object):
          :param kwargs: additional parameters that will be passed as arguments to
              the classifier `train` function.
          :return: A classifier instance trained on the training set.
-        :rtype: 
+        :rtype:
          """
          print("Training classifier")
          self.classifier = trainer(training_set, **kwargs)
          if save_classifier:
-            save_file(self.classifier, save_classifier)
+            self.save_file(self.classifier, save_classifier)
  
          return self.classifier
  
-    def evaluate(self, test_set, classifier=None, accuracy=True, f_measure=True,
-                 precision=True, recall=True, verbose=False):
+    def save_file(self, content, filename):
+        """
+        Store `content` in `filename`. Can be used to store a SentimentAnalyzer.
+        """
+        print("Saving", filename, file=sys.stderr)
+        with open(filename, 'wb') as storage_file:
+            # The protocol=2 parameter is for python2 compatibility
+            pickle.dump(content, storage_file, protocol=2)
+
+    def evaluate(
+        self,
+        test_set,
+        classifier=None,
+        accuracy=True,
+        f_measure=True,
+        precision=True,
+        recall=True,
+        verbose=False,
+    ):
          """
          Evaluate and print classifier performance on the test set.
  
@@ -196,7 +221,7 @@ class SentimentAnalyzer(object):
          metrics_results = {}
          if accuracy == True:
              accuracy_score = eval_accuracy(classifier, test_set)
-            metrics_results['Accuracy'] = accuracy_score
+            metrics_results["Accuracy"] = accuracy_score
  
          gold_results = defaultdict(set)
          test_results = defaultdict(set)
@@ -209,21 +234,22 @@ class SentimentAnalyzer(object):
  
          for label in labels:
              if precision == True:
-                precision_score = eval_precision(gold_results[label],
-                    test_results[label])
-                metrics_results['Precision [{0}]'.format(label)] = precision_score
+                precision_score = eval_precision(
+                    gold_results[label], test_results[label]
+                )
+                metrics_results["Precision [{0}]".format(label)] = precision_score
              if recall == True:
-                recall_score = eval_recall(gold_results[label],
-                    test_results[label])
-                metrics_results['Recall [{0}]'.format(label)] = recall_score
+                recall_score = eval_recall(gold_results[label], test_results[label])
+                metrics_results["Recall [{0}]".format(label)] = recall_score
              if f_measure == True:
-                f_measure_score = eval_f_measure(gold_results[label],
-                    test_results[label])
-                metrics_results['F-measure [{0}]'.format(label)] = f_measure_score
+                f_measure_score = eval_f_measure(
+                    gold_results[label], test_results[label]
+                )
+                metrics_results["F-measure [{0}]".format(label)] = f_measure_score
  
          # Print evaluation results (in alphabetical order)
          if verbose == True:
              for result in sorted(metrics_results):
-                print('{0}: {1}'.format(result, metrics_results[result]))
+                print("{0}: {1}".format(result, metrics_results[result]))
  
          return metrics_results
diff --git a/nlp_resource_data/nltk/sentiment/sentiment_analyzer.pyc b/nlp_resource_data/nltk/sentiment/sentiment_analyzer.pyc

deleted file mode 100755 (executable)

index aeb5ee6..0000000

Binary files a/nlp_resource_data/nltk/sentiment/sentiment_analyzer.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sentiment/util.py b/nlp_resource_data/nltk/sentiment/util.py

old mode 100755 (executable)

new mode 100644 (file)

index b8e3fbe..e2bf22a
--- a/nlp_resource_data/nltk/sentiment/util.py
+++ b/nlp_resource_data/nltk/sentiment/util.py
@@ -2,7 +2,7 @@
  #
  # Natural Language Toolkit: Sentiment Analyzer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Pierpaolo Pantone <24alsecondo@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -10,7 +10,6 @@
  """
  Utility methods for Sentiment Analysis.
  """
-from __future__ import division
  
  import codecs
  import csv
@@ -21,17 +20,15 @@ import re
  import sys
  import time
  from copy import deepcopy
-from itertools import tee
  
  import nltk
  from nltk.corpus import CategorizedPlaintextCorpusReader
  from nltk.data import load
  from nltk.tokenize.casual import EMOTICON_RE
-from nltk.twitter.common import outf_writer_compat, extract_fields
  
-#////////////////////////////////////////////////////////////
-#{ Regular expressions
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+# { Regular expressions
+# ////////////////////////////////////////////////////////////
  
  # Regular expression for negation by Christopher Potts
  NEGATION = r"""
@@ -46,30 +43,99 @@ NEGATION = r"""
  
  NEGATION_RE = re.compile(NEGATION, re.VERBOSE)
  
-CLAUSE_PUNCT = r'^[.:;!?]$'
+CLAUSE_PUNCT = r"^[.:;!?]$"
  CLAUSE_PUNCT_RE = re.compile(CLAUSE_PUNCT)
  
  # Happy and sad emoticons
  
-HAPPY = set([
-    ':-)', ':)', ';)', ':o)', ':]', ':3', ':c)', ':>', '=]', '8)', '=)', ':}',
-    ':^)', ':-D', ':D', '8-D', '8D', 'x-D', 'xD', 'X-D', 'XD', '=-D', '=D',
-    '=-3', '=3', ':-))', ":'-)", ":')", ':*', ':^*', '>:P', ':-P', ':P', 'X-P',
-    'x-p', 'xp', 'XP', ':-p', ':p', '=p', ':-b', ':b', '>:)', '>;)', '>:-)',
-    '<3'
-    ])
-
-SAD = set([
-    ':L', ':-/', '>:/', ':S', '>:[', ':@', ':-(', ':[', ':-||', '=L', ':<',
-    ':-[', ':-<', '=\\', '=/', '>:(', ':(', '>.<', ":'-(", ":'(", ':\\', ':-c',
-    ':c', ':{', '>:\\', ';('
-    ])
+HAPPY = set(
+    [
+        ":-)",
+        ":)",
+        ";)",
+        ":o)",
+        ":]",
+        ":3",
+        ":c)",
+        ":>",
+        "=]",
+        "8)",
+        "=)",
+        ":}",
+        ":^)",
+        ":-D",
+        ":D",
+        "8-D",
+        "8D",
+        "x-D",
+        "xD",
+        "X-D",
+        "XD",
+        "=-D",
+        "=D",
+        "=-3",
+        "=3",
+        ":-))",
+        ":'-)",
+        ":')",
+        ":*",
+        ":^*",
+        ">:P",
+        ":-P",
+        ":P",
+        "X-P",
+        "x-p",
+        "xp",
+        "XP",
+        ":-p",
+        ":p",
+        "=p",
+        ":-b",
+        ":b",
+        ">:)",
+        ">;)",
+        ">:-)",
+        "<3",
+    ]
+)
+
+SAD = set(
+    [
+        ":L",
+        ":-/",
+        ">:/",
+        ":S",
+        ">:[",
+        ":@",
+        ":-(",
+        ":[",
+        ":-||",
+        "=L",
+        ":<",
+        ":-[",
+        ":-<",
+        "=\\",
+        "=/",
+        ">:(",
+        ":(",
+        ">.<",
+        ":'-(",
+        ":'(",
+        ":\\",
+        ":-c",
+        ":c",
+        ":{",
+        ">:\\",
+        ";(",
+    ]
+)
  
  
  def timer(method):
      """
      A timer decorator to measure execution performance of methods.
      """
+
      def timed(*args, **kw):
          start = time.time()
          result = method(*args, **kw)
@@ -80,28 +146,28 @@ def timer(method):
          # in Python 2.x round() will return a float, so we convert it to int
          secs = int(round(tot_time % 60))
          if hours == 0 and mins == 0 and secs < 10:
-            print('[TIMER] {0}(): {:.3f} seconds'.format(method.__name__, tot_time))
+            print("[TIMER] {0}(): {:.3f} seconds".format(method.__name__, tot_time))
          else:
-            print('[TIMER] {0}(): {1}h {2}m {3}s'.format(method.__name__, hours, mins, secs))
+            print(
+                "[TIMER] {0}(): {1}h {2}m {3}s".format(
+                    method.__name__, hours, mins, secs
+                )
+            )
          return result
-    return timed
  
+    return timed
  
-def pairwise(iterable):
-    """s -> (s0,s1), (s1,s2), (s2, s3), ..."""
-    a, b = tee(iterable)
-    next(b, None)
-    return zip(a, b)
  
-#////////////////////////////////////////////////////////////
-#{ Feature extractor functions
-#////////////////////////////////////////////////////////////
+# ////////////////////////////////////////////////////////////
+# { Feature extractor functions
+# ////////////////////////////////////////////////////////////
  """
  Feature extractor functions are declared outside the SentimentAnalyzer class.
  Users should have the possibility to create their own feature extractors
  without modifying SentimentAnalyzer.
  """
  
+
  def extract_unigram_feats(document, unigrams, handle_negation=False):
      """
      Populate a dictionary of unigram features, reflecting the presence/absence in
@@ -123,9 +189,10 @@ def extract_unigram_feats(document, unigrams, handle_negation=False):
      if handle_negation:
          document = mark_negation(document)
      for word in unigrams:
-        features['contains({0})'.format(word)] = word in set(document)
+        features["contains({0})".format(word)] = word in set(document)
      return features
  
+
  def extract_bigram_feats(document, bigrams):
      """
      Populate a dictionary of bigram features, reflecting the presence/absence in
@@ -145,12 +212,16 @@ def extract_bigram_feats(document, bigrams):
      """
      features = {}
      for bigr in bigrams:
-        features['contains({0} - {1})'.format(bigr[0], bigr[1])] = bigr in nltk.bigrams(document)
+        features["contains({0} - {1})".format(bigr[0], bigr[1])] = bigr in nltk.bigrams(
+            document
+        )
      return features
  
-#////////////////////////////////////////////////////////////
-#{ Helper Functions
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
+# { Helper Functions
+# ////////////////////////////////////////////////////////////
+
  
  def mark_negation(document, double_neg_flip=False, shallow=False):
      """
@@ -184,43 +255,36 @@ def mark_negation(document, double_neg_flip=False, shallow=False):
                  neg_scope = not neg_scope
                  continue
              else:
-                doc[i] += '_NEG'
+                doc[i] += "_NEG"
          elif neg_scope and CLAUSE_PUNCT_RE.search(word):
              neg_scope = not neg_scope
          elif neg_scope and not CLAUSE_PUNCT_RE.search(word):
-            doc[i] += '_NEG'
+            doc[i] += "_NEG"
  
      return document
  
+
  def output_markdown(filename, **kwargs):
      """
      Write the output of an analysis to a file.
      """
-    with codecs.open(filename, 'at') as outfile:
-        text = '\n*** \n\n'
-        text += '{0} \n\n'.format(time.strftime("%d/%m/%Y, %H:%M"))
+    with codecs.open(filename, "at") as outfile:
+        text = "\n*** \n\n"
+        text += "{0} \n\n".format(time.strftime("%d/%m/%Y, %H:%M"))
          for k in sorted(kwargs):
              if isinstance(kwargs[k], dict):
                  dictionary = kwargs[k]
-                text += '  - **{0}:**\n'.format(k)
+                text += "  - **{0}:**\n".format(k)
                  for entry in sorted(dictionary):
-                    text += '    - {0}: {1} \n'.format(entry, dictionary[entry])
+                    text += "    - {0}: {1} \n".format(entry, dictionary[entry])
              elif isinstance(kwargs[k], list):
-                text += '  - **{0}:**\n'.format(k)
+                text += "  - **{0}:**\n".format(k)
                  for entry in kwargs[k]:
-                    text += '    - {0}\n'.format(entry)
+                    text += "    - {0}\n".format(entry)
              else:
-                text += '  - **{0}:** {1} \n'.format(k, kwargs[k])
+                text += "  - **{0}:** {1} \n".format(k, kwargs[k])
          outfile.write(text)
  
-def save_file(content, filename):
-    """
-    Store `content` in `filename`. Can be used to store a SentimentAnalyzer.
-    """
-    print("Saving", filename)
-    with codecs.open(filename, 'wb') as storage_file:
-        # The protocol=2 parameter is for python2 compatibility
-        pickle.dump(content, storage_file, protocol=2)
  
  def split_train_test(all_instances, n=None):
      """
@@ -236,40 +300,55 @@ def split_train_test(all_instances, n=None):
      random.shuffle(all_instances)
      if not n or n > len(all_instances):
          n = len(all_instances)
-    train_set = all_instances[:int(.8*n)]
-    test_set = all_instances[int(.8*n):n]
+    train_set = all_instances[: int(0.8 * n)]
+    test_set = all_instances[int(0.8 * n) : n]
  
      return train_set, test_set
  
+
  def _show_plot(x_values, y_values, x_labels=None, y_labels=None):
      try:
          import matplotlib.pyplot as plt
      except ImportError:
-        raise ImportError('The plot function requires matplotlib to be installed.'
-                         'See http://matplotlib.org/')
+        raise ImportError(
+            "The plot function requires matplotlib to be installed."
+            "See http://matplotlib.org/"
+        )
  
-    plt.locator_params(axis='y', nbins=3)
+    plt.locator_params(axis="y", nbins=3)
      axes = plt.axes()
      axes.yaxis.grid()
-    plt.plot(x_values, y_values, 'ro', color='red')
+    plt.plot(x_values, y_values, "ro", color="red")
      plt.ylim(ymin=-1.2, ymax=1.2)
      plt.tight_layout(pad=5)
      if x_labels:
-        plt.xticks(x_values, x_labels, rotation='vertical')
+        plt.xticks(x_values, x_labels, rotation="vertical")
      if y_labels:
-        plt.yticks([-1, 0, 1], y_labels, rotation='horizontal')
+        plt.yticks([-1, 0, 1], y_labels, rotation="horizontal")
      # Pad margins so that markers are not clipped by the axes
      plt.margins(0.2)
      plt.show()
  
-#////////////////////////////////////////////////////////////
-#{ Parsing and conversion functions
-#////////////////////////////////////////////////////////////
  
-def json2csv_preprocess(json_file, outfile, fields, encoding='utf8', errors='replace',
-            gzip_compress=False, skip_retweets=True, skip_tongue_tweets=True,
-            skip_ambiguous_tweets=True, strip_off_emoticons=True, remove_duplicates=True,
-            limit=None):
+# ////////////////////////////////////////////////////////////
+# { Parsing and conversion functions
+# ////////////////////////////////////////////////////////////
+
+
+def json2csv_preprocess(
+    json_file,
+    outfile,
+    fields,
+    encoding="utf8",
+    errors="replace",
+    gzip_compress=False,
+    skip_retweets=True,
+    skip_tongue_tweets=True,
+    skip_ambiguous_tweets=True,
+    strip_off_emoticons=True,
+    remove_duplicates=True,
+    limit=None,
+):
      """
      Convert json file to csv file, preprocessing each row to obtain a suitable
      dataset for tweets Semantic Analysis.
@@ -294,7 +373,7 @@ def json2csv_preprocess(json_file, outfile, fields, encoding='utf8', errors='rep
          subsets of the original tweets json data.
      """
      with codecs.open(json_file, encoding=encoding) as fp:
-        (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress)
+        (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress)
          # write the list of fields as header
          writer.writerow(fields)
  
@@ -305,14 +384,14 @@ def json2csv_preprocess(json_file, outfile, fields, encoding='utf8', errors='rep
              tweet = json.loads(line)
              row = extract_fields(tweet, fields)
              try:
-                text = row[fields.index('text')]
+                text = row[fields.index("text")]
                  # Remove retweets
                  if skip_retweets == True:
-                    if re.search(r'\bRT\b', text):
+                    if re.search(r"\bRT\b", text):
                          continue
                  # Remove tweets containing ":P" and ":-P" emoticons
                  if skip_tongue_tweets == True:
-                    if re.search(r'\:\-?P\b', text):
+                    if re.search(r"\:\-?P\b", text):
                          continue
                  # Remove tweets containing both happy and sad emoticons
                  if skip_ambiguous_tweets == True:
@@ -322,13 +401,15 @@ def json2csv_preprocess(json_file, outfile, fields, encoding='utf8', errors='rep
                              continue
                  # Strip off emoticons from all tweets
                  if strip_off_emoticons == True:
-                    row[fields.index('text')] = re.sub(r'(?!\n)\s+', ' ', EMOTICON_RE.sub('', text))
+                    row[fields.index("text")] = re.sub(
+                        r"(?!\n)\s+", " ", EMOTICON_RE.sub("", text)
+                    )
                  # Remove duplicate tweets
                  if remove_duplicates == True:
-                    if row[fields.index('text')] in tweets_cache:
+                    if row[fields.index("text")] in tweets_cache:
                          continue
                      else:
-                        tweets_cache.append(row[fields.index('text')])
+                        tweets_cache.append(row[fields.index("text")])
              except ValueError:
                  pass
              writer.writerow(row)
@@ -337,8 +418,10 @@ def json2csv_preprocess(json_file, outfile, fields, encoding='utf8', errors='rep
                  break
          outf.close()
  
-def parse_tweets_set(filename, label, word_tokenizer=None, sent_tokenizer=None,
-                     skip_header=True):
+
+def parse_tweets_set(
+    filename, label, word_tokenizer=None, sent_tokenizer=None, skip_header=True
+):
      """
      Parse csv file containing tweets and output data a list of (text, label) tuples.
  
@@ -356,51 +439,36 @@ def parse_tweets_set(filename, label, word_tokenizer=None, sent_tokenizer=None,
      """
      tweets = []
      if not sent_tokenizer:
-        sent_tokenizer = load('tokenizers/punkt/english.pickle')
-
-    # If we use Python3.x we can proceed using the 'rt' flag
-    if sys.version_info[0] == 3:
-        with codecs.open(filename, 'rt') as csvfile:
-            reader = csv.reader(csvfile)
-            if skip_header == True:
-                next(reader, None) # skip the header
-            i = 0
-            for tweet_id, text in reader:
-                # text = text[1]
-                i += 1
-                sys.stdout.write('Loaded {0} tweets\r'.format(i))
-                # Apply sentence and word tokenizer to text
-                if word_tokenizer:
-                    tweet = [w for sent in sent_tokenizer.tokenize(text)
-                                       for w in word_tokenizer.tokenize(sent)]
-                else:
-                    tweet = text
-                tweets.append((tweet, label))
-    # If we use Python2.x we need to handle encoding problems
-    elif sys.version_info[0] < 3:
-        with codecs.open(filename) as csvfile:
-            reader = csv.reader(csvfile)
-            if skip_header == True:
-                next(reader, None) # skip the header
-            i = 0
-            for row in reader:
-                unicode_row = [x.decode('utf8') for x in row]
-                text = unicode_row[1]
-                i += 1
-                sys.stdout.write('Loaded {0} tweets\r'.format(i))
-                # Apply sentence and word tokenizer to text
-                if word_tokenizer:
-                    tweet = [w.encode('utf8') for sent in sent_tokenizer.tokenize(text)
-                                       for w in word_tokenizer.tokenize(sent)]
-                else:
-                    tweet = text
-                tweets.append((tweet, label))
+        sent_tokenizer = load("tokenizers/punkt/english.pickle")
+
+    with codecs.open(filename, "rt") as csvfile:
+        reader = csv.reader(csvfile)
+        if skip_header == True:
+            next(reader, None)  # skip the header
+        i = 0
+        for tweet_id, text in reader:
+            # text = text[1]
+            i += 1
+            sys.stdout.write("Loaded {0} tweets\r".format(i))
+            # Apply sentence and word tokenizer to text
+            if word_tokenizer:
+                tweet = [
+                    w
+                    for sent in sent_tokenizer.tokenize(text)
+                    for w in word_tokenizer.tokenize(sent)
+                ]
+            else:
+                tweet = text
+            tweets.append((tweet, label))
+
      print("Loaded {0} tweets".format(i))
      return tweets
  
-#////////////////////////////////////////////////////////////
-#{ Demos
-#////////////////////////////////////////////////////////////
+
+# ////////////////////////////////////////////////////////////
+# { Demos
+# ////////////////////////////////////////////////////////////
+
  
  def demo_tweets(trainer, n_instances=None, output=None):
      """
@@ -426,27 +494,27 @@ def demo_tweets(trainer, n_instances=None, output=None):
      # tokenizer = TweetTokenizer(reduce_len=True, strip_handles=True)
  
      if n_instances is not None:
-        n_instances = int(n_instances/2)
+        n_instances = int(n_instances / 2)
  
-    fields = ['id', 'text']
+    fields = ["id", "text"]
      positive_json = twitter_samples.abspath("positive_tweets.json")
-    positive_csv = 'positive_tweets.csv'
+    positive_csv = "positive_tweets.csv"
      json2csv_preprocess(positive_json, positive_csv, fields, limit=n_instances)
  
      negative_json = twitter_samples.abspath("negative_tweets.json")
-    negative_csv = 'negative_tweets.csv'
+    negative_csv = "negative_tweets.csv"
      json2csv_preprocess(negative_json, negative_csv, fields, limit=n_instances)
  
-    neg_docs = parse_tweets_set(negative_csv, label='neg', word_tokenizer=tokenizer)
-    pos_docs = parse_tweets_set(positive_csv, label='pos', word_tokenizer=tokenizer)
+    neg_docs = parse_tweets_set(negative_csv, label="neg", word_tokenizer=tokenizer)
+    pos_docs = parse_tweets_set(positive_csv, label="pos", word_tokenizer=tokenizer)
  
      # We separately split subjective and objective instances to keep a balanced
      # uniform class distribution in both train and test sets.
      train_pos_docs, test_pos_docs = split_train_test(pos_docs)
      train_neg_docs, test_neg_docs = split_train_test(neg_docs)
  
-    training_tweets = train_pos_docs+train_neg_docs
-    testing_tweets = test_pos_docs+test_neg_docs
+    training_tweets = train_pos_docs + train_neg_docs
+    testing_tweets = test_pos_docs + test_neg_docs
  
      sentim_analyzer = SentimentAnalyzer()
      # stopwords = stopwords.words('english')
@@ -458,9 +526,12 @@ def demo_tweets(trainer, n_instances=None, output=None):
      sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
  
      # Add bigram collocation features
-    bigram_collocs_feats = sentim_analyzer.bigram_collocation_feats([tweet[0] for tweet in training_tweets],
-        top_n=100, min_freq=12)
-    sentim_analyzer.add_feat_extractor(extract_bigram_feats, bigrams=bigram_collocs_feats)
+    bigram_collocs_feats = sentim_analyzer.bigram_collocation_feats(
+        [tweet[0] for tweet in training_tweets], top_n=100, min_freq=12
+    )
+    sentim_analyzer.add_feat_extractor(
+        extract_bigram_feats, bigrams=bigram_collocs_feats
+    )
  
      training_set = sentim_analyzer.apply_features(training_tweets)
      test_set = sentim_analyzer.apply_features(testing_tweets)
@@ -470,14 +541,23 @@ def demo_tweets(trainer, n_instances=None, output=None):
      try:
          classifier.show_most_informative_features()
      except AttributeError:
-        print('Your classifier does not provide a show_most_informative_features() method.')
+        print(
+            "Your classifier does not provide a show_most_informative_features() method."
+        )
      results = sentim_analyzer.evaluate(test_set)
  
      if output:
          extr = [f.__name__ for f in sentim_analyzer.feat_extractors]
-        output_markdown(output, Dataset='labeled_tweets', Classifier=type(classifier).__name__,
-                        Tokenizer=tokenizer.__class__.__name__, Feats=extr,
-                        Results=results, Instances=n_instances)
+        output_markdown(
+            output,
+            Dataset="labeled_tweets",
+            Classifier=type(classifier).__name__,
+            Tokenizer=tokenizer.__class__.__name__,
+            Feats=extr,
+            Results=results,
+            Instances=n_instances,
+        )
+
  
  def demo_movie_reviews(trainer, n_instances=None, output=None):
      """
@@ -497,17 +577,23 @@ def demo_movie_reviews(trainer, n_instances=None, output=None):
      from nltk.sentiment import SentimentAnalyzer
  
      if n_instances is not None:
-        n_instances = int(n_instances/2)
-
-    pos_docs = [(list(movie_reviews.words(pos_id)), 'pos') for pos_id in movie_reviews.fileids('pos')[:n_instances]]
-    neg_docs = [(list(movie_reviews.words(neg_id)), 'neg') for neg_id in movie_reviews.fileids('neg')[:n_instances]]
+        n_instances = int(n_instances / 2)
+
+    pos_docs = [
+        (list(movie_reviews.words(pos_id)), "pos")
+        for pos_id in movie_reviews.fileids("pos")[:n_instances]
+    ]
+    neg_docs = [
+        (list(movie_reviews.words(neg_id)), "neg")
+        for neg_id in movie_reviews.fileids("neg")[:n_instances]
+    ]
      # We separately split positive and negative instances to keep a balanced
      # uniform class distribution in both train and test sets.
      train_pos_docs, test_pos_docs = split_train_test(pos_docs)
      train_neg_docs, test_neg_docs = split_train_test(neg_docs)
  
-    training_docs = train_pos_docs+train_neg_docs
-    testing_docs = test_pos_docs+test_neg_docs
+    training_docs = train_pos_docs + train_neg_docs
+    testing_docs = test_pos_docs + test_neg_docs
  
      sentim_analyzer = SentimentAnalyzer()
      all_words = sentim_analyzer.all_words(training_docs)
@@ -523,14 +609,23 @@ def demo_movie_reviews(trainer, n_instances=None, output=None):
      try:
          classifier.show_most_informative_features()
      except AttributeError:
-        print('Your classifier does not provide a show_most_informative_features() method.')
+        print(
+            "Your classifier does not provide a show_most_informative_features() method."
+        )
      results = sentim_analyzer.evaluate(test_set)
  
      if output:
          extr = [f.__name__ for f in sentim_analyzer.feat_extractors]
-        output_markdown(output, Dataset='Movie_reviews', Classifier=type(classifier).__name__,
-                        Tokenizer='WordPunctTokenizer', Feats=extr, Results=results,
-                        Instances=n_instances)
+        output_markdown(
+            output,
+            Dataset="Movie_reviews",
+            Classifier=type(classifier).__name__,
+            Tokenizer="WordPunctTokenizer",
+            Feats=extr,
+            Results=results,
+            Instances=n_instances,
+        )
+
  
  def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=None):
      """
@@ -550,21 +645,27 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non
      from nltk.corpus import subjectivity
  
      if n_instances is not None:
-        n_instances = int(n_instances/2)
+        n_instances = int(n_instances / 2)
  
-    subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
-    obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
+    subj_docs = [
+        (sent, "subj") for sent in subjectivity.sents(categories="subj")[:n_instances]
+    ]
+    obj_docs = [
+        (sent, "obj") for sent in subjectivity.sents(categories="obj")[:n_instances]
+    ]
  
      # We separately split subjective and objective instances to keep a balanced
      # uniform class distribution in both train and test sets.
      train_subj_docs, test_subj_docs = split_train_test(subj_docs)
      train_obj_docs, test_obj_docs = split_train_test(obj_docs)
  
-    training_docs = train_subj_docs+train_obj_docs
-    testing_docs = test_subj_docs+test_obj_docs
+    training_docs = train_subj_docs + train_obj_docs
+    testing_docs = test_subj_docs + test_obj_docs
  
      sentim_analyzer = SentimentAnalyzer()
-    all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
+    all_words_neg = sentim_analyzer.all_words(
+        [mark_negation(doc) for doc in training_docs]
+    )
  
      # Add simple unigram word features handling negation
      unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
@@ -578,20 +679,29 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non
      try:
          classifier.show_most_informative_features()
      except AttributeError:
-        print('Your classifier does not provide a show_most_informative_features() method.')
+        print(
+            "Your classifier does not provide a show_most_informative_features() method."
+        )
      results = sentim_analyzer.evaluate(test_set)
  
      if save_analyzer == True:
-        save_file(sentim_analyzer, 'sa_subjectivity.pickle')
+        save_file(sentim_analyzer, "sa_subjectivity.pickle")
  
      if output:
          extr = [f.__name__ for f in sentim_analyzer.feat_extractors]
-        output_markdown(output, Dataset='subjectivity', Classifier=type(classifier).__name__,
-                        Tokenizer='WhitespaceTokenizer', Feats=extr,
-                        Instances=n_instances, Results=results)
+        output_markdown(
+            output,
+            Dataset="subjectivity",
+            Classifier=type(classifier).__name__,
+            Tokenizer="WhitespaceTokenizer",
+            Feats=extr,
+            Instances=n_instances,
+            Results=results,
+        )
  
      return sentim_analyzer
  
+
  def demo_sent_subjectivity(text):
      """
      Classify a single sentence as subjective or objective using a stored
@@ -601,18 +711,20 @@ def demo_sent_subjectivity(text):
      """
      from nltk.classify import NaiveBayesClassifier
      from nltk.tokenize import regexp
+
      word_tokenizer = regexp.WhitespaceTokenizer()
      try:
-        sentim_analyzer = load('sa_subjectivity.pickle')
+        sentim_analyzer = load("sa_subjectivity.pickle")
      except LookupError:
-        print('Cannot find the sentiment analyzer you want to load.')
-        print('Training a new one using NaiveBayesClassifier.')
+        print("Cannot find the sentiment analyzer you want to load.")
+        print("Training a new one using NaiveBayesClassifier.")
          sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)
  
      # Tokenize and convert to lower case
      tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
      print(sentim_analyzer.classify(tokenized_text))
  
+
  def demo_liu_hu_lexicon(sentence, plot=False):
      """
      Basic example of sentiment classification using Liu and Hu opinion lexicon.
@@ -631,28 +743,31 @@ def demo_liu_hu_lexicon(sentence, plot=False):
      neg_words = 0
      tokenized_sent = [word.lower() for word in tokenizer.tokenize(sentence)]
  
-    x = list(range(len(tokenized_sent))) # x axis for the plot
+    x = list(range(len(tokenized_sent)))  # x axis for the plot
      y = []
  
      for word in tokenized_sent:
          if word in opinion_lexicon.positive():
              pos_words += 1
-            y.append(1) # positive
+            y.append(1)  # positive
          elif word in opinion_lexicon.negative():
              neg_words += 1
-            y.append(-1) # negative
+            y.append(-1)  # negative
          else:
-            y.append(0) # neutral
+            y.append(0)  # neutral
  
      if pos_words > neg_words:
-        print('Positive')
+        print("Positive")
      elif pos_words < neg_words:
-        print('Negative')
+        print("Negative")
      elif pos_words == neg_words:
-        print('Neutral')
+        print("Neutral")
  
      if plot == True:
-        _show_plot(x, y, x_labels=tokenized_sent, y_labels=['Negative', 'Neutral', 'Positive'])
+        _show_plot(
+            x, y, x_labels=tokenized_sent, y_labels=["Negative", "Neutral", "Positive"]
+        )
+
  
  def demo_vader_instance(text):
      """
@@ -661,9 +776,11 @@ def demo_vader_instance(text):
      :param text: a text whose polarity has to be evaluated.
      """
      from nltk.sentiment import SentimentIntensityAnalyzer
+
      vader_analyzer = SentimentIntensityAnalyzer()
      print(vader_analyzer.polarity_scores(text))
  
+
  def demo_vader_tweets(n_instances=None, output=None):
      """
      Classify 10000 positive and negative tweets using Vader approach.
@@ -674,33 +791,47 @@ def demo_vader_tweets(n_instances=None, output=None):
      from collections import defaultdict
      from nltk.corpus import twitter_samples
      from nltk.sentiment import SentimentIntensityAnalyzer
-    from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
-        recall as eval_recall, f_measure as eval_f_measure)
+    from nltk.metrics import (
+        accuracy as eval_accuracy,
+        precision as eval_precision,
+        recall as eval_recall,
+        f_measure as eval_f_measure,
+    )
  
      if n_instances is not None:
-        n_instances = int(n_instances/2)
+        n_instances = int(n_instances / 2)
  
-    fields = ['id', 'text']
+    fields = ["id", "text"]
      positive_json = twitter_samples.abspath("positive_tweets.json")
-    positive_csv = 'positive_tweets.csv'
-    json2csv_preprocess(positive_json, positive_csv, fields, strip_off_emoticons=False,
-                        limit=n_instances)
+    positive_csv = "positive_tweets.csv"
+    json2csv_preprocess(
+        positive_json,
+        positive_csv,
+        fields,
+        strip_off_emoticons=False,
+        limit=n_instances,
+    )
  
      negative_json = twitter_samples.abspath("negative_tweets.json")
-    negative_csv = 'negative_tweets.csv'
-    json2csv_preprocess(negative_json, negative_csv, fields, strip_off_emoticons=False,
-                        limit=n_instances)
+    negative_csv = "negative_tweets.csv"
+    json2csv_preprocess(
+        negative_json,
+        negative_csv,
+        fields,
+        strip_off_emoticons=False,
+        limit=n_instances,
+    )
  
-    pos_docs = parse_tweets_set(positive_csv, label='pos')
-    neg_docs = parse_tweets_set(negative_csv, label='neg')
+    pos_docs = parse_tweets_set(positive_csv, label="pos")
+    neg_docs = parse_tweets_set(negative_csv, label="neg")
  
      # We separately split subjective and objective instances to keep a balanced
      # uniform class distribution in both train and test sets.
      train_pos_docs, test_pos_docs = split_train_test(pos_docs)
      train_neg_docs, test_neg_docs = split_train_test(neg_docs)
  
-    training_tweets = train_pos_docs+train_neg_docs
-    testing_tweets = test_pos_docs+test_neg_docs
+    training_tweets = train_pos_docs + train_neg_docs
+    testing_tweets = test_pos_docs + test_neg_docs
  
      vader_analyzer = SentimentIntensityAnalyzer()
  
@@ -714,40 +845,43 @@ def demo_vader_tweets(n_instances=None, output=None):
          labels.add(label)
          gold_results[label].add(i)
          acc_gold_results.append(label)
-        score = vader_analyzer.polarity_scores(text)['compound']
+        score = vader_analyzer.polarity_scores(text)["compound"]
          if score > 0:
-            observed = 'pos'
+            observed = "pos"
          else:
-            observed = 'neg'
+            observed = "neg"
          num += 1
          acc_test_results.append(observed)
          test_results[observed].add(i)
      metrics_results = {}
      for label in labels:
-        accuracy_score = eval_accuracy(acc_gold_results,
-            acc_test_results)
-        metrics_results['Accuracy'] = accuracy_score
-        precision_score = eval_precision(gold_results[label],
-            test_results[label])
-        metrics_results['Precision [{0}]'.format(label)] = precision_score
-        recall_score = eval_recall(gold_results[label],
-            test_results[label])
-        metrics_results['Recall [{0}]'.format(label)] = recall_score
-        f_measure_score = eval_f_measure(gold_results[label],
-            test_results[label])
-        metrics_results['F-measure [{0}]'.format(label)] = f_measure_score
+        accuracy_score = eval_accuracy(acc_gold_results, acc_test_results)
+        metrics_results["Accuracy"] = accuracy_score
+        precision_score = eval_precision(gold_results[label], test_results[label])
+        metrics_results["Precision [{0}]".format(label)] = precision_score
+        recall_score = eval_recall(gold_results[label], test_results[label])
+        metrics_results["Recall [{0}]".format(label)] = recall_score
+        f_measure_score = eval_f_measure(gold_results[label], test_results[label])
+        metrics_results["F-measure [{0}]".format(label)] = f_measure_score
  
      for result in sorted(metrics_results):
-            print('{0}: {1}'.format(result, metrics_results[result]))
+        print("{0}: {1}".format(result, metrics_results[result]))
  
      if output:
-        output_markdown(output, Approach='Vader', Dataset='labeled_tweets',
-            Instances=n_instances, Results=metrics_results)
+        output_markdown(
+            output,
+            Approach="Vader",
+            Dataset="labeled_tweets",
+            Instances=n_instances,
+            Results=metrics_results,
+        )
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      from nltk.classify import NaiveBayesClassifier, MaxentClassifier
      from nltk.classify.scikitlearn import SklearnClassifier
      from sklearn.svm import LinearSVC
+    from nltk.twitter.common import _outf_writer, extract_fields
  
      naive_bayes = NaiveBayesClassifier.train
      svm = SklearnClassifier(LinearSVC()).train
diff --git a/nlp_resource_data/nltk/sentiment/util.pyc b/nlp_resource_data/nltk/sentiment/util.pyc

deleted file mode 100755 (executable)

index 7c5908b..0000000

Binary files a/nlp_resource_data/nltk/sentiment/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/sentiment/vader.py b/nlp_resource_data/nltk/sentiment/vader.py

old mode 100755 (executable)

new mode 100644 (file)

index 2d232ba..7ba4251
--- a/nlp_resource_data/nltk/sentiment/vader.py
+++ b/nlp_resource_data/nltk/sentiment/vader.py
@@ -1,11 +1,12 @@
  # coding: utf-8
  # Natural Language Toolkit: vader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: C.J. Hutto <Clayton.Hutto@gtri.gatech.edu>
  #         Ewan Klein <ewan@inf.ed.ac.uk> (modifications)
  #         Pierpaolo Pantone <24alsecondo@gmail.com> (modifications)
  #         George Berry <geb97@cornell.edu> (modifications)
+#         Malavika Suresh <malavika.suresh0794@gmail.com> (modifications)
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  #
@@ -21,142 +22,259 @@ Sentiment Analysis of Social Media Text. Eighth International Conference on
  Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
  """
  
-import codecs
  import math
  import re
  import string
  from itertools import product
+
  import nltk.data
-from .util import pairwise
-
-##Constants##
-
-# (empirically derived mean sentiment intensity rating increase for booster words)
-B_INCR = 0.293
-B_DECR = -0.293
-
-# (empirically derived mean sentiment intensity rating increase for using
-# ALLCAPs to emphasize a word)
-C_INCR = 0.733
-
-N_SCALAR = -0.74
-
-# for removing punctuation
-REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation)))
-
-PUNC_LIST = [".", "!", "?", ",", ";", ":", "-", "'", "\"",
-             "!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"]
-NEGATE = {"aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
- "ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
- "dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
- "don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
- "neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere",
- "oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
- "oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",
- "without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"}
-
-# booster/dampener 'intensifiers' or 'degree adverbs'
-# http://en.wiktionary.org/wiki/Category:English_degree_adverbs
-
-BOOSTER_DICT = \
-{"absolutely": B_INCR, "amazingly": B_INCR, "awfully": B_INCR, "completely": B_INCR, "considerably": B_INCR,
- "decidedly": B_INCR, "deeply": B_INCR, "effing": B_INCR, "enormously": B_INCR,
- "entirely": B_INCR, "especially": B_INCR, "exceptionally": B_INCR, "extremely": B_INCR,
- "fabulously": B_INCR, "flipping": B_INCR, "flippin": B_INCR,
- "fricking": B_INCR, "frickin": B_INCR, "frigging": B_INCR, "friggin": B_INCR, "fully": B_INCR, "fucking": B_INCR,
- "greatly": B_INCR, "hella": B_INCR, "highly": B_INCR, "hugely": B_INCR, "incredibly": B_INCR,
- "intensely": B_INCR, "majorly": B_INCR, "more": B_INCR, "most": B_INCR, "particularly": B_INCR,
- "purely": B_INCR, "quite": B_INCR, "really": B_INCR, "remarkably": B_INCR,
- "so": B_INCR, "substantially": B_INCR,
- "thoroughly": B_INCR, "totally": B_INCR, "tremendously": B_INCR,
- "uber": B_INCR, "unbelievably": B_INCR, "unusually": B_INCR, "utterly": B_INCR,
- "very": B_INCR,
- "almost": B_DECR, "barely": B_DECR, "hardly": B_DECR, "just enough": B_DECR,
- "kind of": B_DECR, "kinda": B_DECR, "kindof": B_DECR, "kind-of": B_DECR,
- "less": B_DECR, "little": B_DECR, "marginally": B_DECR, "occasionally": B_DECR, "partly": B_DECR,
- "scarcely": B_DECR, "slightly": B_DECR, "somewhat": B_DECR,
- "sort of": B_DECR, "sorta": B_DECR, "sortof": B_DECR, "sort-of": B_DECR}
-
-# check for special case idioms using a sentiment-laden keyword known to SAGE
-SPECIAL_CASE_IDIOMS = {"the shit": 3, "the bomb": 3, "bad ass": 1.5, "yeah right": -2,
-                       "cut the mustard": 2, "kiss of death": -1.5, "hand to mouth": -2}
-
-
-##Static methods##
-
-def negated(input_words, include_nt=True):
+from nltk.util import pairwise
+
+class VaderConstants:
      """
-    Determine if input contains negation words
+    A class to keep the Vader lists and constants.
      """
-    neg_words = NEGATE
-    if any(word.lower() in neg_words for word in input_words):
-        return True
-    if include_nt:
-        if any("n't" in word.lower() for word in input_words):
-            return True
-    for first, second in pairwise(input_words):
-        if second.lower() == "least" and first.lower() != 'at':
+    ##Constants##
+    # (empirically derived mean sentiment intensity rating increase for booster words)
+    B_INCR = 0.293
+    B_DECR = -0.293
+
+    # (empirically derived mean sentiment intensity rating increase for using
+    # ALLCAPs to emphasize a word)
+    C_INCR = 0.733
+
+    N_SCALAR = -0.74
+
+    NEGATE = {
+        "aint",
+        "arent",
+        "cannot",
+        "cant",
+        "couldnt",
+        "darent",
+        "didnt",
+        "doesnt",
+        "ain't",
+        "aren't",
+        "can't",
+        "couldn't",
+        "daren't",
+        "didn't",
+        "doesn't",
+        "dont",
+        "hadnt",
+        "hasnt",
+        "havent",
+        "isnt",
+        "mightnt",
+        "mustnt",
+        "neither",
+        "don't",
+        "hadn't",
+        "hasn't",
+        "haven't",
+        "isn't",
+        "mightn't",
+        "mustn't",
+        "neednt",
+        "needn't",
+        "never",
+        "none",
+        "nope",
+        "nor",
+        "not",
+        "nothing",
+        "nowhere",
+        "oughtnt",
+        "shant",
+        "shouldnt",
+        "uhuh",
+        "wasnt",
+        "werent",
+        "oughtn't",
+        "shan't",
+        "shouldn't",
+        "uh-uh",
+        "wasn't",
+        "weren't",
+        "without",
+        "wont",
+        "wouldnt",
+        "won't",
+        "wouldn't",
+        "rarely",
+        "seldom",
+        "despite",
+    }
+
+    # booster/dampener 'intensifiers' or 'degree adverbs'
+    # http://en.wiktionary.org/wiki/Category:English_degree_adverbs
+
+    BOOSTER_DICT = {
+        "absolutely": B_INCR,
+        "amazingly": B_INCR,
+        "awfully": B_INCR,
+        "completely": B_INCR,
+        "considerably": B_INCR,
+        "decidedly": B_INCR,
+        "deeply": B_INCR,
+        "effing": B_INCR,
+        "enormously": B_INCR,
+        "entirely": B_INCR,
+        "especially": B_INCR,
+        "exceptionally": B_INCR,
+        "extremely": B_INCR,
+        "fabulously": B_INCR,
+        "flipping": B_INCR,
+        "flippin": B_INCR,
+        "fricking": B_INCR,
+        "frickin": B_INCR,
+        "frigging": B_INCR,
+        "friggin": B_INCR,
+        "fully": B_INCR,
+        "fucking": B_INCR,
+        "greatly": B_INCR,
+        "hella": B_INCR,
+        "highly": B_INCR,
+        "hugely": B_INCR,
+        "incredibly": B_INCR,
+        "intensely": B_INCR,
+        "majorly": B_INCR,
+        "more": B_INCR,
+        "most": B_INCR,
+        "particularly": B_INCR,
+        "purely": B_INCR,
+        "quite": B_INCR,
+        "really": B_INCR,
+        "remarkably": B_INCR,
+        "so": B_INCR,
+        "substantially": B_INCR,
+        "thoroughly": B_INCR,
+        "totally": B_INCR,
+        "tremendously": B_INCR,
+        "uber": B_INCR,
+        "unbelievably": B_INCR,
+        "unusually": B_INCR,
+        "utterly": B_INCR,
+        "very": B_INCR,
+        "almost": B_DECR,
+        "barely": B_DECR,
+        "hardly": B_DECR,
+        "just enough": B_DECR,
+        "kind of": B_DECR,
+        "kinda": B_DECR,
+        "kindof": B_DECR,
+        "kind-of": B_DECR,
+        "less": B_DECR,
+        "little": B_DECR,
+        "marginally": B_DECR,
+        "occasionally": B_DECR,
+        "partly": B_DECR,
+        "scarcely": B_DECR,
+        "slightly": B_DECR,
+        "somewhat": B_DECR,
+        "sort of": B_DECR,
+        "sorta": B_DECR,
+        "sortof": B_DECR,
+        "sort-of": B_DECR,
+    }
+
+    # check for special case idioms using a sentiment-laden keyword known to SAGE
+    SPECIAL_CASE_IDIOMS = {
+        "the shit": 3,
+        "the bomb": 3,
+        "bad ass": 1.5,
+        "yeah right": -2,
+        "cut the mustard": 2,
+        "kiss of death": -1.5,
+        "hand to mouth": -2,
+    }
+
+    # for removing punctuation
+    REGEX_REMOVE_PUNCTUATION = re.compile("[{0}]".format(re.escape(string.punctuation)))
+
+    PUNC_LIST = [
+        ".",
+        "!",
+        "?",
+        ",",
+        ";",
+        ":",
+        "-",
+        "'",
+        '"',
+        "!!",
+        "!!!",
+        "??",
+        "???",
+        "?!?",
+        "!?!",
+        "?!?!",
+        "!?!?",
+    ]
+
+    def __init__(self):
+        pass
+
+    def negated(self, input_words, include_nt=True):
+        """
+        Determine if input contains negation words
+        """
+        neg_words = self.NEGATE
+        if any(word.lower() in neg_words for word in input_words):
              return True
-    return False
-
+        if include_nt:
+            if any("n't" in word.lower() for word in input_words):
+                return True
+        for first, second in pairwise(input_words):
+            if second.lower() == "least" and first.lower() != "at":
+                return True
+        return False
+
+    def normalize(self, score, alpha=15):
+        """
+        Normalize the score to be between -1 and 1 using an alpha that
+        approximates the max expected value
+        """
+        norm_score = score / math.sqrt((score * score) + alpha)
+        return norm_score
  
-def normalize(score, alpha=15):
-    """
-    Normalize the score to be between -1 and 1 using an alpha that
-    approximates the max expected value
-    """
-    norm_score = score/math.sqrt((score*score) + alpha)
-    return norm_score
  
+    def scalar_inc_dec(self, word, valence, is_cap_diff):
+        """
+        Check if the preceding words increase, decrease, or negate/nullify the
+        valence
+        """
+        scalar = 0.0
+        word_lower = word.lower()
+        if word_lower in self.BOOSTER_DICT:
+            scalar = self.BOOSTER_DICT[word_lower]
+            if valence < 0:
+                scalar *= -1
+            # check if booster/dampener word is in ALLCAPS (while others aren't)
+            if word.isupper() and is_cap_diff:
+                if valence > 0:
+                    scalar += self.C_INCR
+                else:
+                    scalar -= self.C_INCR
+        return scalar
  
-def allcap_differential(words):
-    """
-    Check whether just some words in the input are ALL CAPS
  
-    :param list words: The words to inspect
-    :returns: `True` if some but not all items in `words` are ALL CAPS
-    """
-    is_different = False
-    allcap_words = 0
-    for word in words:
-        if word.isupper():
-            allcap_words += 1
-    cap_differential = len(words) - allcap_words
-    if cap_differential > 0 and cap_differential < len(words):
-        is_different = True
-    return is_different
-
-
-def scalar_inc_dec(word, valence, is_cap_diff):
-    """
-    Check if the preceding words increase, decrease, or negate/nullify the
-    valence
-    """
-    scalar = 0.0
-    word_lower = word.lower()
-    if word_lower in BOOSTER_DICT:
-        scalar = BOOSTER_DICT[word_lower]
-        if valence < 0:
-            scalar *= -1
-        #check if booster/dampener word is in ALLCAPS (while others aren't)
-        if word.isupper() and is_cap_diff:
-            if valence > 0:
-                scalar += C_INCR
-            else: scalar -= C_INCR
-    return scalar
-
-class SentiText(object):
+class SentiText:
      """
      Identify sentiment-relevant string-level properties of input text.
      """
-    def __init__(self, text):
+
+    def __init__(self, text, punc_list, regex_remove_punctuation):
          if not isinstance(text, str):
-            text = str(text.encode('utf-8'))
+            text = str(text.encode("utf-8"))
          self.text = text
+        self.PUNC_LIST = punc_list
+        self.REGEX_REMOVE_PUNCTUATION = regex_remove_punctuation
          self.words_and_emoticons = self._words_and_emoticons()
-        # doesn't separate words from\
+        # doesn't separate words from
          # adjacent punctuation (keeps emoticons & contractions)
-        self.is_cap_diff = allcap_differential(self.words_and_emoticons)
+        self.is_cap_diff = self.allcap_differential(self.words_and_emoticons)
  
      def _words_plus_punc(self):
          """
@@ -166,14 +284,14 @@ class SentiText(object):
              ',cat': 'cat',
          }
          """
-        no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text)
+        no_punc_text = self.REGEX_REMOVE_PUNCTUATION.sub("", self.text)
          # removes punctuation (but loses emoticons & contractions)
          words_only = no_punc_text.split()
          # remove singletons
-        words_only = set( w for w in words_only if len(w) > 1 )
+        words_only = set(w for w in words_only if len(w) > 1)
          # the product gives ('cat', ',') and (',', 'cat')
-        punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)}
-        punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)}
+        punc_before = {"".join(p): p[1] for p in product(self.PUNC_LIST, words_only)}
+        punc_after = {"".join(p): p[0] for p in product(words_only, self.PUNC_LIST)}
          words_punc_dict = punc_before
          words_punc_dict.update(punc_after)
          return words_punc_dict
@@ -192,21 +310,43 @@ class SentiText(object):
                  wes[i] = words_punc_dict[we]
          return wes
  
-class SentimentIntensityAnalyzer(object):
+    def allcap_differential(self, words):
+        """
+        Check whether just some words in the input are ALL CAPS
+
+        :param list words: The words to inspect
+        :returns: `True` if some but not all items in `words` are ALL CAPS
+        """
+        is_different = False
+        allcap_words = 0
+        for word in words:
+            if word.isupper():
+                allcap_words += 1
+        cap_differential = len(words) - allcap_words
+        if 0 < cap_differential < len(words):
+            is_different = True
+        return is_different
+
+
+class SentimentIntensityAnalyzer:
      """
      Give a sentiment intensity score to sentences.
      """
-    def __init__(self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt"):
+
+    def __init__(
+        self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt",
+    ):
          self.lexicon_file = nltk.data.load(lexicon_file)
          self.lexicon = self.make_lex_dict()
+        self.constants = VaderConstants()
  
      def make_lex_dict(self):
          """
          Convert lexicon file to a dictionary
          """
          lex_dict = {}
-        for line in self.lexicon_file.split('\n'):
-            (word, measure) = line.strip().split('\t')[0:2]
+        for line in self.lexicon_file.split("\n"):
+            (word, measure) = line.strip().split("\t")[0:2]
              lex_dict[word] = float(measure)
          return lex_dict
  
@@ -216,17 +356,19 @@ class SentimentIntensityAnalyzer(object):
          Positive values are positive valence, negative value are negative
          valence.
          """
-        sentitext = SentiText(text)
-        #text, words_and_emoticons, is_cap_diff = self.preprocess(text)
-
+        # text, words_and_emoticons, is_cap_diff = self.preprocess(text)
+        sentitext = SentiText(text, self.constants.PUNC_LIST,
+                              self.constants.REGEX_REMOVE_PUNCTUATION)
          sentiments = []
          words_and_emoticons = sentitext.words_and_emoticons
          for item in words_and_emoticons:
              valence = 0
              i = words_and_emoticons.index(item)
-            if (i < len(words_and_emoticons) - 1 and item.lower() == "kind" and \
-                words_and_emoticons[i+1].lower() == "of") or \
-                item.lower() in BOOSTER_DICT:
+            if (
+                i < len(words_and_emoticons) - 1
+                and item.lower() == "kind"
+                and words_and_emoticons[i + 1].lower() == "of"
+            ) or item.lower() in self.constants.BOOSTER_DICT:
                  sentiments.append(valence)
                  continue
  
@@ -241,28 +383,36 @@ class SentimentIntensityAnalyzer(object):
          words_and_emoticons = sentitext.words_and_emoticons
          item_lowercase = item.lower()
          if item_lowercase in self.lexicon:
-            #get the sentiment valence
+            # get the sentiment valence
              valence = self.lexicon[item_lowercase]
  
-            #check if sentiment laden word is in ALL CAPS (while others aren't)
+            # check if sentiment laden word is in ALL CAPS (while others aren't)
              if item.isupper() and is_cap_diff:
                  if valence > 0:
-                    valence += C_INCR
+                    valence += self.constants.C_INCR
                  else:
-                    valence -= C_INCR
-
-            for start_i in range(0,3):
-                if i > start_i and words_and_emoticons[i-(start_i+1)].lower() not in self.lexicon:
+                    valence -= self.constants.C_INCR
+
+            for start_i in range(0, 3):
+                if (
+                    i > start_i
+                    and words_and_emoticons[i - (start_i + 1)].lower()
+                    not in self.lexicon
+                ):
                      # dampen the scalar modifier of preceding words and emoticons
                      # (excluding the ones that immediately preceed the item) based
                      # on their distance from the current item.
-                    s = scalar_inc_dec(words_and_emoticons[i-(start_i+1)], valence, is_cap_diff)
+                    s = self.constants.scalar_inc_dec(
+                        words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff
+                    )
                      if start_i == 1 and s != 0:
-                        s = s*0.95
+                        s = s * 0.95
                      if start_i == 2 and s != 0:
-                        s = s*0.9
-                    valence = valence+s
-                    valence = self._never_check(valence, words_and_emoticons, start_i, i)
+                        s = s * 0.9
+                    valence = valence + s
+                    valence = self._never_check(
+                        valence, words_and_emoticons, start_i, i
+                    )
                      if start_i == 2:
                          valence = self._idioms_check(valence, words_and_emoticons, i)
  
@@ -280,91 +430,119 @@ class SentimentIntensityAnalyzer(object):
  
      def _least_check(self, valence, words_and_emoticons, i):
          # check for negation case using "least"
-        if i > 1 and words_and_emoticons[i-1].lower() not in self.lexicon \
-           and words_and_emoticons[i-1].lower() == "least":
-            if words_and_emoticons[i-2].lower() != "at" and words_and_emoticons[i-2].lower() != "very":
-                valence = valence*N_SCALAR
-        elif i > 0 and words_and_emoticons[i-1].lower() not in self.lexicon \
-             and words_and_emoticons[i-1].lower() == "least":
-            valence = valence*N_SCALAR
+        if (
+            i > 1
+            and words_and_emoticons[i - 1].lower() not in self.lexicon
+            and words_and_emoticons[i - 1].lower() == "least"
+        ):
+            if (
+                words_and_emoticons[i - 2].lower() != "at"
+                and words_and_emoticons[i - 2].lower() != "very"
+            ):
+                valence = valence * self.constants.N_SCALAR
+        elif (
+            i > 0
+            and words_and_emoticons[i - 1].lower() not in self.lexicon
+            and words_and_emoticons[i - 1].lower() == "least"
+        ):
+            valence = valence * self.constants.N_SCALAR
          return valence
  
      def _but_check(self, words_and_emoticons, sentiments):
-        # check for modification in sentiment due to contrastive conjunction 'but'
-        if 'but' in words_and_emoticons or 'BUT' in words_and_emoticons:
-            try:
-                bi = words_and_emoticons.index('but')
-            except ValueError:
-                bi = words_and_emoticons.index('BUT')
-            for sentiment in sentiments:
-                si = sentiments.index(sentiment)
-                if si < bi:
-                    sentiments.pop(si)
-                    sentiments.insert(si, sentiment*0.5)
-                elif si > bi:
-                    sentiments.pop(si)
-                    sentiments.insert(si, sentiment*1.5)
+        but = {"but", "BUT"} & set(words_and_emoticons)
+        if but:
+            bi = words_and_emoticons.index(next(iter(but)))
+            for sidx, sentiment in enumerate(sentiments):
+                if sidx < bi:
+                    sentiments[sidx] = sentiment * 0.5
+                elif sidx > bi:
+                    sentiments[sidx] = sentiment * 1.5
          return sentiments
  
      def _idioms_check(self, valence, words_and_emoticons, i):
-        onezero = "{0} {1}".format(words_and_emoticons[i-1], words_and_emoticons[i])
+        onezero = "{0} {1}".format(words_and_emoticons[i - 1], words_and_emoticons[i])
  
-        twoonezero = "{0} {1} {2}".format(words_and_emoticons[i-2],
-                                       words_and_emoticons[i-1], words_and_emoticons[i])
+        twoonezero = "{0} {1} {2}".format(
+            words_and_emoticons[i - 2],
+            words_and_emoticons[i - 1],
+            words_and_emoticons[i],
+        )
  
-        twoone = "{0} {1}".format(words_and_emoticons[i-2], words_and_emoticons[i-1])
+        twoone = "{0} {1}".format(
+            words_and_emoticons[i - 2], words_and_emoticons[i - 1]
+        )
  
-        threetwoone = "{0} {1} {2}".format(words_and_emoticons[i-3],
-                                        words_and_emoticons[i-2], words_and_emoticons[i-1])
+        threetwoone = "{0} {1} {2}".format(
+            words_and_emoticons[i - 3],
+            words_and_emoticons[i - 2],
+            words_and_emoticons[i - 1],
+        )
  
-        threetwo = "{0} {1}".format(words_and_emoticons[i-3], words_and_emoticons[i-2])
+        threetwo = "{0} {1}".format(
+            words_and_emoticons[i - 3], words_and_emoticons[i - 2]
+        )
  
          sequences = [onezero, twoonezero, twoone, threetwoone, threetwo]
  
          for seq in sequences:
-            if seq in SPECIAL_CASE_IDIOMS:
-                valence = SPECIAL_CASE_IDIOMS[seq]
+            if seq in self.constants.SPECIAL_CASE_IDIOMS:
+                valence = self.constants.SPECIAL_CASE_IDIOMS[seq]
                  break
  
-        if len(words_and_emoticons)-1 > i:
-            zeroone = "{0} {1}".format(words_and_emoticons[i], words_and_emoticons[i+1])
-            if zeroone in SPECIAL_CASE_IDIOMS:
-                valence = SPECIAL_CASE_IDIOMS[zeroone]
-        if len(words_and_emoticons)-1 > i+1:
-            zeroonetwo = "{0} {1} {2}".format(words_and_emoticons[i], words_and_emoticons[i+1], words_and_emoticons[i+2])
-            if zeroonetwo in SPECIAL_CASE_IDIOMS:
-                valence = SPECIAL_CASE_IDIOMS[zeroonetwo]
+        if len(words_and_emoticons) - 1 > i:
+            zeroone = "{0} {1}".format(
+                words_and_emoticons[i], words_and_emoticons[i + 1]
+            )
+            if zeroone in self.constants.SPECIAL_CASE_IDIOMS:
+                valence = self.constants.SPECIAL_CASE_IDIOMS[zeroone]
+        if len(words_and_emoticons) - 1 > i + 1:
+            zeroonetwo = "{0} {1} {2}".format(
+                words_and_emoticons[i],
+                words_and_emoticons[i + 1],
+                words_and_emoticons[i + 2],
+            )
+            if zeroonetwo in self.constants.SPECIAL_CASE_IDIOMS:
+                valence = self.constants.SPECIAL_CASE_IDIOMS[zeroonetwo]
  
          # check for booster/dampener bi-grams such as 'sort of' or 'kind of'
-        if threetwo in BOOSTER_DICT or twoone in BOOSTER_DICT:
-            valence = valence+B_DECR
+        if threetwo in self.constants.BOOSTER_DICT or twoone in self.constants.BOOSTER_DICT:
+            valence = valence + self.constants.B_DECR
          return valence
  
      def _never_check(self, valence, words_and_emoticons, start_i, i):
          if start_i == 0:
-            if negated([words_and_emoticons[i-1]]):
-                    valence = valence*N_SCALAR
+            if self.constants.negated([words_and_emoticons[i - 1]]):
+                valence = valence * self.constants.N_SCALAR
          if start_i == 1:
-            if words_and_emoticons[i-2] == "never" and\
-               (words_and_emoticons[i-1] == "so" or
-                words_and_emoticons[i-1] == "this"):
-                valence = valence*1.5
-            elif negated([words_and_emoticons[i-(start_i+1)]]):
-                valence = valence*N_SCALAR
+            if words_and_emoticons[i - 2] == "never" and (
+                words_and_emoticons[i - 1] == "so"
+                or words_and_emoticons[i - 1] == "this"
+            ):
+                valence = valence * 1.5
+            elif self.constants.negated([words_and_emoticons[i - (start_i + 1)]]):
+                valence = valence * self.constants.N_SCALAR
          if start_i == 2:
-            if words_and_emoticons[i-3] == "never" and \
-               (words_and_emoticons[i-2] == "so" or words_and_emoticons[i-2] == "this") or \
-               (words_and_emoticons[i-1] == "so" or words_and_emoticons[i-1] == "this"):
-                valence = valence*1.25
-            elif negated([words_and_emoticons[i-(start_i+1)]]):
-                valence = valence*N_SCALAR
+            if (
+                words_and_emoticons[i - 3] == "never"
+                and (
+                    words_and_emoticons[i - 2] == "so"
+                    or words_and_emoticons[i - 2] == "this"
+                )
+                or (
+                    words_and_emoticons[i - 1] == "so"
+                    or words_and_emoticons[i - 1] == "this"
+                )
+            ):
+                valence = valence * 1.25
+            elif self.constants.negated([words_and_emoticons[i - (start_i + 1)]]):
+                valence = valence * self.constants.N_SCALAR
          return valence
  
      def _punctuation_emphasis(self, sum_s, text):
          # add emphasis from exclamation points and question marks
          ep_amplifier = self._amplify_ep(text)
          qm_amplifier = self._amplify_qm(text)
-        punct_emph_amplifier = ep_amplifier+qm_amplifier
+        punct_emph_amplifier = ep_amplifier + qm_amplifier
          return punct_emph_amplifier
  
      def _amplify_ep(self, text):
@@ -374,7 +552,7 @@ class SentimentIntensityAnalyzer(object):
              ep_count = 4
          # (empirically derived mean sentiment intensity rating increase for
          # exclamation points)
-        ep_amplifier = ep_count*0.292
+        ep_amplifier = ep_count * 0.292
          return ep_amplifier
  
      def _amplify_qm(self, text):
@@ -385,7 +563,7 @@ class SentimentIntensityAnalyzer(object):
              if qm_count <= 3:
                  # (empirically derived mean sentiment intensity rating increase for
                  # question marks)
-                qm_amplifier = qm_count*0.18
+                qm_amplifier = qm_count * 0.18
              else:
                  qm_amplifier = 0.96
          return qm_amplifier
@@ -397,9 +575,13 @@ class SentimentIntensityAnalyzer(object):
          neu_count = 0
          for sentiment_score in sentiments:
              if sentiment_score > 0:
-                pos_sum += (float(sentiment_score) +1) # compensates for neutral words that are counted as 1
+                pos_sum += (
+                    float(sentiment_score) + 1
+                )  # compensates for neutral words that are counted as 1
              if sentiment_score < 0:
-                neg_sum += (float(sentiment_score) -1) # when used with math.fabs(), compensates for neutrals
+                neg_sum += (
+                    float(sentiment_score) - 1
+                )  # when used with math.fabs(), compensates for neutrals
              if sentiment_score == 0:
                  neu_count += 1
          return pos_sum, neg_sum, neu_count
@@ -411,17 +593,17 @@ class SentimentIntensityAnalyzer(object):
              punct_emph_amplifier = self._punctuation_emphasis(sum_s, text)
              if sum_s > 0:
                  sum_s += punct_emph_amplifier
-            elif  sum_s < 0:
+            elif sum_s < 0:
                  sum_s -= punct_emph_amplifier
  
-            compound = normalize(sum_s)
+            compound = self.constants.normalize(sum_s)
              # discriminate between positive, negative and neutral sentiment scores
              pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments)
  
              if pos_sum > math.fabs(neg_sum):
-                pos_sum += (punct_emph_amplifier)
+                pos_sum += punct_emph_amplifier
              elif pos_sum < math.fabs(neg_sum):
-                neg_sum -= (punct_emph_amplifier)
+                neg_sum -= punct_emph_amplifier
  
              total = pos_sum + math.fabs(neg_sum) + neu_count
              pos = math.fabs(pos_sum / total)
@@ -434,10 +616,11 @@ class SentimentIntensityAnalyzer(object):
              neg = 0.0
              neu = 0.0
  
-        sentiment_dict = \
-            {"neg" : round(neg, 3),
-             "neu" : round(neu, 3),
-             "pos" : round(pos, 3),
-             "compound" : round(compound, 4)}
+        sentiment_dict = {
+            "neg": round(neg, 3),
+            "neu": round(neu, 3),
+            "pos": round(pos, 3),
+            "compound": round(compound, 4),
+        }
  
          return sentiment_dict
diff --git a/nlp_resource_data/nltk/sentiment/vader.pyc b/nlp_resource_data/nltk/sentiment/vader.pyc

deleted file mode 100755 (executable)

index 04a58fb..0000000

Binary files a/nlp_resource_data/nltk/sentiment/vader.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/__init__.py b/nlp_resource_data/nltk/stem/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 6886f7b..04efb34
--- a/nlp_resource_data/nltk/stem/__init__.py
+++ b/nlp_resource_data/nltk/stem/__init__.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Stemmers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  #         Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
@@ -29,3 +29,4 @@ from nltk.stem.porter import PorterStemmer
  from nltk.stem.snowball import SnowballStemmer
  from nltk.stem.wordnet import WordNetLemmatizer
  from nltk.stem.rslp import RSLPStemmer
+from nltk.stem.cistem import Cistem
diff --git a/nlp_resource_data/nltk/stem/__init__.pyc b/nlp_resource_data/nltk/stem/__init__.pyc

deleted file mode 100755 (executable)

index 02f7067..0000000

Binary files a/nlp_resource_data/nltk/stem/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8bf7162

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..79cd0fc

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/arlstem.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/arlstem.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7732a26

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/arlstem.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/cistem.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/cistem.cpython-37.pyc

new file mode 100644 (file)

index 0000000..25ab911

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/cistem.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/isri.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/isri.cpython-37.pyc

new file mode 100644 (file)

index 0000000..30f813a

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/isri.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/lancaster.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/lancaster.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4682904

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/lancaster.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/porter.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/porter.cpython-37.pyc

new file mode 100644 (file)

index 0000000..80bc252

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/porter.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/regexp.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/regexp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..135d65a

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/regexp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/rslp.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/rslp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3e18d37

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/rslp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/snowball.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/snowball.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a61a849

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/snowball.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4243018

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/__pycache__/wordnet.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/wordnet.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6c440a1

Binary files /dev/null and b/nlp_resource_data/nltk/stem/__pycache__/wordnet.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/stem/api.py b/nlp_resource_data/nltk/stem/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 92ab73d..dfa5c27
--- a/nlp_resource_data/nltk/stem/api.py
+++ b/nlp_resource_data/nltk/stem/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Stemmer Interface
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  #         Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
@@ -8,16 +8,15 @@
  # For license information, see LICENSE.TXT
  
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
  
-@add_metaclass(ABCMeta)
-class StemmerI(object):
+class StemmerI(metaclass=ABCMeta):
      """
      A processing interface for removing morphological affixes from
      words.  This process is known as stemming.
  
      """
+
      @abstractmethod
      def stem(self, token):
          """
diff --git a/nlp_resource_data/nltk/stem/api.pyc b/nlp_resource_data/nltk/stem/api.pyc

deleted file mode 100755 (executable)

index 40f1466..0000000

Binary files a/nlp_resource_data/nltk/stem/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/arlstem.py b/nlp_resource_data/nltk/stem/arlstem.py

old mode 100755 (executable)

new mode 100644 (file)

index 81de360..86cec73
--- a/nlp_resource_data/nltk/stem/arlstem.py
+++ b/nlp_resource_data/nltk/stem/arlstem.py
@@ -2,7 +2,7 @@
  #
  # Natural Language Toolkit: ARLSTem Stemmer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  #
  # Author: Kheireddine Abainia (x-programer) <k.abainia@gmail.com>
  # Algorithms: Kheireddine Abainia <k.abainia@gmail.com>
@@ -25,81 +25,78 @@ index, over-stemming index and stemming weight), and the results showed that
  ARLSTem is promising and producing high performances. This stemmer is not
  based on any dictionary and can be used on-line effectively.
  """
-from __future__ import unicode_literals
  import re
  
  from nltk.stem.api import StemmerI
  
  
  class ARLSTem(StemmerI):
-    '''
+    """
      ARLSTem stemmer : a light Arabic Stemming algorithm without any dictionary.
      Department of Telecommunication & Information Processing. USTHB University,
      Algiers, Algeria.
      ARLSTem.stem(token) returns the Arabic stem for the input token.
      The ARLSTem Stemmer requires that all tokens are encoded using Unicode
      encoding.
-    '''
+    """
  
      def __init__(self):
          # different Alif with hamza
-        self.re_hamzated_alif = re.compile(r'[\u0622\u0623\u0625]')
-        self.re_alifMaqsura = re.compile(r'[\u0649]')
-        self.re_diacritics = re.compile(r'[\u064B-\u065F]')
+        self.re_hamzated_alif = re.compile(r"[\u0622\u0623\u0625]")
+        self.re_alifMaqsura = re.compile(r"[\u0649]")
+        self.re_diacritics = re.compile(r"[\u064B-\u065F]")
  
          # Alif Laam, Laam Laam, Fa Laam, Fa Ba
-        self.pr2 = [
-            '\u0627\u0644', '\u0644\u0644',
-            '\u0641\u0644', '\u0641\u0628'
-            ]
+        self.pr2 = ["\u0627\u0644", "\u0644\u0644", "\u0641\u0644", "\u0641\u0628"]
          # Ba Alif Laam, Kaaf Alif Laam, Waaw Alif Laam
-        self.pr3 = [
-            '\u0628\u0627\u0644',
-            '\u0643\u0627\u0644',
-            '\u0648\u0627\u0644'
-            ]
+        self.pr3 = ["\u0628\u0627\u0644", "\u0643\u0627\u0644", "\u0648\u0627\u0644"]
          # Fa Laam Laam, Waaw Laam Laam
-        self.pr32 = ['\u0641\u0644\u0644', '\u0648\u0644\u0644']
+        self.pr32 = ["\u0641\u0644\u0644", "\u0648\u0644\u0644"]
          # Fa Ba Alif Laam, Waaw Ba Alif Laam, Fa Kaaf Alif Laam
          self.pr4 = [
-            '\u0641\u0628\u0627\u0644',
-            '\u0648\u0628\u0627\u0644',
-            '\u0641\u0643\u0627\u0644'
-            ]
+            "\u0641\u0628\u0627\u0644",
+            "\u0648\u0628\u0627\u0644",
+            "\u0641\u0643\u0627\u0644",
+        ]
  
          # Kaf Yaa, Kaf Miim
-        self.su2 = [
-            '\u0643\u064A',
-            '\u0643\u0645'
-            ]
+        self.su2 = ["\u0643\u064A", "\u0643\u0645"]
          # Ha Alif, Ha Miim
-        self.su22 = ['\u0647\u0627', '\u0647\u0645']
+        self.su22 = ["\u0647\u0627", "\u0647\u0645"]
          # Kaf Miim Alif, Kaf Noon Shadda
-        self.su3 = ['\u0643\u0645\u0627', '\u0643\u0646\u0651']
+        self.su3 = ["\u0643\u0645\u0627", "\u0643\u0646\u0651"]
          # Ha Miim Alif, Ha Noon Shadda
-        self.su32 = ['\u0647\u0645\u0627', '\u0647\u0646\u0651']
+        self.su32 = ["\u0647\u0645\u0627", "\u0647\u0646\u0651"]
  
          # Alif Noon, Ya Noon, Waaw Noon
-        self.pl_si2 = ['\u0627\u0646', '\u064A\u0646', '\u0648\u0646']
+        self.pl_si2 = ["\u0627\u0646", "\u064A\u0646", "\u0648\u0646"]
          # Taa Alif Noon, Taa Ya Noon
-        self.pl_si3 = ['\u062A\u0627\u0646', '\u062A\u064A\u0646']
+        self.pl_si3 = ["\u062A\u0627\u0646", "\u062A\u064A\u0646"]
  
          # Alif Noon, Waaw Noon
-        self.verb_su2 = ['\u0627\u0646', '\u0648\u0646']
+        self.verb_su2 = ["\u0627\u0646", "\u0648\u0646"]
          # Siin Taa, Siin Yaa
-        self.verb_pr2 = ['\u0633\u062A', '\u0633\u064A']
+        self.verb_pr2 = ["\u0633\u062A", "\u0633\u064A"]
          # Siin Alif, Siin Noon
-        self.verb_pr22 = ['\u0633\u0627', '\u0633\u0646']
-
+        self.verb_pr22 = ["\u0633\u0627", "\u0633\u0646"]
+        # Lam Noon, Lam Taa, Lam Yaa, Lam Hamza
+        self.verb_pr33 = [
+            "\u0644\u0646",
+            "\u0644\u062A",
+            "\u0644\u064A",
+            "\u0644\u0623",
+        ]
          # Taa Miim Alif, Taa Noon Shadda
-        self.verb_suf3 = ['\u062A\u0645\u0627', '\u062A\u0646\u0651']
+        self.verb_suf3 = ["\u062A\u0645\u0627", "\u062A\u0646\u0651"]
          # Noon Alif, Taa Miim, Taa Alif, Waaw Alif
          self.verb_suf2 = [
-            '\u0646\u0627', '\u062A\u0645',
-            '\u062A\u0627', '\u0648\u0627'
-            ]
+            "\u0646\u0627",
+            "\u062A\u0645",
+            "\u062A\u0627",
+            "\u0648\u0627",
+        ]
          # Taa, Alif, Noon
-        self.verb_suf1 = ['\u062A', '\u0627', '\u0646']
+        self.verb_suf1 = ["\u062A", "\u0627", "\u0646"]
  
      def stem(self, token):
          """
@@ -107,8 +104,10 @@ class ARLSTem(StemmerI):
          """
          try:
              if token is None:
-                raise ValueError("The word could not be stemmed, because \
-                                 it is empty !")
+                raise ValueError(
+                    "The word could not be stemmed, because \
+                                 it is empty !"
+                )
              # remove Arabic diacritics and replace some letters with others
              token = self.norm(token)
              # strip common prefixes of the nouns
@@ -141,14 +140,14 @@ class ARLSTem(StemmerI):
              beginning.
          """
          # strip Arabic diacritics
-        token = self.re_diacritics.sub('', token)
+        token = self.re_diacritics.sub("", token)
          # replace Hamzated Alif with Alif bare
-        token = self.re_hamzated_alif.sub('\u0627', token)
+        token = self.re_hamzated_alif.sub("\u0627", token)
          # replace alifMaqsura with Yaa
-        token = self.re_alifMaqsura.sub('\u064A', token)
+        token = self.re_alifMaqsura.sub("\u064A", token)
          # strip the Waaw from the word beginning if the remaining is 3 letters
          # at least
-        if token.startswith('\u0648') and len(token) > 3:
+        if token.startswith("\u0648") and len(token) > 3:
              token = token[1:]
          return token
  
@@ -177,7 +176,7 @@ class ARLSTem(StemmerI):
          """
              remove suffixes from the word's end.
          """
-        if token.endswith('\u0643') and len(token) > 3:
+        if token.endswith("\u0643") and len(token) > 3:
              return token[:-1]
          if len(token) > 4:
              for s2 in self.su2:
@@ -187,7 +186,7 @@ class ARLSTem(StemmerI):
              for s3 in self.su3:
                  if token.endswith(s3):
                      return token[:-3]
-        if token.endswith('\u0647') and len(token) > 3:
+        if token.endswith("\u0647") and len(token) > 3:
              token = token[:-1]
              return token
          if len(token) > 4:
@@ -198,7 +197,7 @@ class ARLSTem(StemmerI):
              for s3 in self.su32:
                  if token.endswith(s3):
                      return token[:-3]
-        if token.endswith('\u0646\u0627') and len(token) > 4:
+        if token.endswith("\u0646\u0627") and len(token) > 4:
              return token[:-2]
          return token
  
@@ -206,7 +205,7 @@ class ARLSTem(StemmerI):
          """
              transform the word from the feminine form to the masculine form.
          """
-        if token.endswith('\u0629') and len(token) > 3:
+        if token.endswith("\u0629") and len(token) > 3:
              return token[:-1]
  
      def plur2sing(self, token):
@@ -221,13 +220,11 @@ class ARLSTem(StemmerI):
              for ps3 in self.pl_si3:
                  if token.endswith(ps3):
                      return token[:-3]
-        if len(token) > 3 and token.endswith('\u0627\u062A'):
+        if len(token) > 3 and token.endswith("\u0627\u062A"):
              return token[:-2]
-        if (len(token) > 3 and token.startswith('\u0627')
-           and token[2] == '\u0627'):
+        if len(token) > 3 and token.startswith("\u0627") and token[2] == "\u0627":
              return token[:2] + token[3:]
-        if (len(token) > 4 and token.startswith('\u0627')
-           and token[-2] == '\u0627'):
+        if len(token) > 4 and token.startswith("\u0627") and token[-2] == "\u0627":
              return token[1:-2] + token[-1]
  
      def verb(self, token):
@@ -246,42 +243,41 @@ class ARLSTem(StemmerI):
          vb = self.verb_t4(token)
          if vb is not None:
              return vb
-        return self.verb_t5(token)
+        vb = self.verb_t5(token)
+        if vb is not None:
+            return vb
+        return self.verb_t6(token)
  
      def verb_t1(self, token):
          """
              stem the present prefixes and suffixes
          """
-        if len(token) > 5 and token.startswith('\u062A'):  # Taa
+        if len(token) > 5 and token.startswith("\u062A"):  # Taa
              for s2 in self.pl_si2:
                  if token.endswith(s2):
                      return token[1:-2]
-        if len(token) > 5 and token.startswith('\u064A'):  # Yaa
+        if len(token) > 5 and token.startswith("\u064A"):  # Yaa
              for s2 in self.verb_su2:
                  if token.endswith(s2):
                      return token[1:-2]
-        if len(token) > 4 and token.startswith('\u0627'):  # Alif
+        if len(token) > 4 and token.startswith("\u0627"):  # Alif
              # Waaw Alif
-            if len(token) > 5 and token.endswith('\u0648\u0627'):
+            if len(token) > 5 and token.endswith("\u0648\u0627"):
                  return token[1:-2]
              # Yaa
-            if token.endswith('\u064A'):
+            if token.endswith("\u064A"):
                  return token[1:-1]
              # Alif
-            if token.endswith('\u0627'):
+            if token.endswith("\u0627"):
                  return token[1:-1]
              # Noon
-            if token.endswith('\u0646'):
+            if token.endswith("\u0646"):
                  return token[1:-1]
          # ^Yaa, Noon$
-        if (len(token) > 4
-           and token.startswith('\u064A')
-           and token.endswith('\u0646')):
+        if len(token) > 4 and token.startswith("\u064A") and token.endswith("\u0646"):
              return token[1:-1]
          # ^Taa, Noon$
-        if (len(token) > 4
-           and token.startswith('\u062A')
-           and token.endswith('\u0646')):
+        if len(token) > 4 and token.startswith("\u062A") and token.endswith("\u0646"):
              return token[1:-1]
  
      def verb_t2(self, token):
@@ -291,26 +287,27 @@ class ARLSTem(StemmerI):
          if len(token) > 6:
              for s2 in self.pl_si2:
                  # ^Siin Taa
-                if (token.startswith(self.verb_pr2[0])
-                   and token.endswith(s2)):
+                if token.startswith(self.verb_pr2[0]) and token.endswith(s2):
                      return token[2:-2]
              # ^Siin Yaa, Alif Noon$
-            if (token.startswith(self.verb_pr2[1])
-               and token.endswith(self.pl_si2[0])):
+            if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[0]):
                  return token[2:-2]
              # ^Siin Yaa, Waaw Noon$
-            if (token.startswith(self.verb_pr2[1])
-               and token.endswith(self.pl_si2[2])):
+            if token.startswith(self.verb_pr2[1]) and token.endswith(self.pl_si2[2]):
                  return token[2:-2]
          # ^Siin Taa, Noon$
-        if (len(token) > 5
-           and token.startswith(self.verb_pr2[0])
-           and token.endswith('\u0646')):
+        if (
+            len(token) > 5
+            and token.startswith(self.verb_pr2[0])
+            and token.endswith("\u0646")
+        ):
              return token[2:-1]
          # ^Siin Yaa, Noon$
-        if (len(token) > 5
-           and token.startswith(self.verb_pr2[1])
-           and token.endswith('\u0646')):
+        if (
+            len(token) > 5
+            and token.startswith(self.verb_pr2[1])
+            and token.endswith("\u0646")
+        ):
              return token[2:-1]
  
      def verb_t3(self, token):
@@ -319,7 +316,7 @@ class ARLSTem(StemmerI):
          """
          if len(token) > 5:
              for su3 in self.verb_suf3:
-                if(token.endswith(su3)):
+                if token.endswith(su3):
                      return token[:-3]
          if len(token) > 4:
              for su2 in self.verb_suf2:
@@ -338,7 +335,7 @@ class ARLSTem(StemmerI):
              for pr1 in self.verb_suf1:
                  if token.startswith(pr1):
                      return token[1:]
-            if token.startswith('\u064A'):
+            if token.startswith("\u064A"):
                  return token[1:]
  
      def verb_t5(self, token):
@@ -353,3 +350,13 @@ class ARLSTem(StemmerI):
                  if token.startswith(pr2):
                      return token[2:]
          return token
+
+    def verb_t6(self, token):
+        """
+            stem the order prefixes
+        """
+        if len(token) > 4:
+            for pr3 in self.verb_pr33:
+                if token.startswith(pr3):
+                    return token[2:]
+        return token
diff --git a/nlp_resource_data/nltk/stem/arlstem.pyc b/nlp_resource_data/nltk/stem/arlstem.pyc

deleted file mode 100755 (executable)

index 169b8fd..0000000

Binary files a/nlp_resource_data/nltk/stem/arlstem.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/cistem.py b/nlp_resource_data/nltk/stem/cistem.py

new file mode 100644 (file)

index 0000000..ef1cc50
--- /dev/null
+++ b/nlp_resource_data/nltk/stem/cistem.py
@@ -0,0 +1,218 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: CISTEM Stemmer for German
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Leonie Weissweiler <l.weissweiler@outlook.de>
+# Algorithm: Leonie Weissweiler <l.weissweiler@outlook.de>
+#            Alexander Fraser <fraser@cis.lmu.de>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import re
+from nltk.stem.api import StemmerI
+
+
+class Cistem(StemmerI):
+    """
+    CISTEM Stemmer for German
+
+    This is the official Python implementation of the CISTEM stemmer.
+    It is based on the paper
+    Leonie Weissweiler, Alexander Fraser (2017). Developing a Stemmer for German
+    Based on a Comparative Analysis of Publicly Available Stemmers.
+    In Proceedings of the German Society for Computational Linguistics and Language
+    Technology (GSCL)
+    which can be read here:
+    http://www.cis.lmu.de/~weissweiler/cistem/
+
+    In the paper, we conducted an analysis of publicly available stemmers,
+    developed two gold standards for German stemming and evaluated the stemmers
+    based on the two gold standards. We then proposed the stemmer implemented here
+    and show that it achieves slightly better f-measure than the other stemmers and
+    is thrice as fast as the Snowball stemmer for German while being about as fast
+    as most other stemmers.
+
+    case_insensitive is a a boolean specifying if case-insensitive stemming
+    should be used. Case insensitivity improves performance only if words in the
+    text may be incorrectly upper case. For all-lowercase and correctly cased
+    text, best performance is achieved by setting case_insensitive for false.
+
+    :param case_insensitive: if True, the stemming is case insensitive. False by default.
+    :type case_insensitive: bool
+    """
+
+    strip_ge = re.compile(r"^ge(.{4,})")
+    repl_xx = re.compile(r"(.)\1")
+    strip_emr = re.compile(r"e[mr]$")
+    strip_nd = re.compile(r"nd$")
+    strip_t = re.compile(r"t$")
+    strip_esn = re.compile(r"[esn]$")
+    repl_xx_back = re.compile(r"(.)\*")
+
+    def __init__(self, case_insensitive=False):
+        self._case_insensitive = case_insensitive
+
+    @staticmethod
+    def replace_to(word):
+        word = word.replace("sch", "$")
+        word = word.replace("ei", "%")
+        word = word.replace("ie", "&")
+        word = Cistem.repl_xx.sub(r"\1*", word)
+
+        return word
+
+    @staticmethod
+    def replace_back(word):
+        word = Cistem.repl_xx_back.sub(r"\1\1", word)
+        word = word.replace("%", "ei")
+        word = word.replace("&", "ie")
+        word = word.replace("$", "sch")
+
+        return word
+
+    def stem(self, word):
+        """
+        This method takes the word to be stemmed and returns the stemmed word.
+
+        :param word: the word that is to be stemmed
+        :type word: unicode
+        :return word: the stemmed word
+        :rtype: unicode
+
+        >>> from nltk.stem.cistem import Cistem
+        >>> stemmer = Cistem()
+        >>> s1 = "Speicherbehältern"
+        >>> stemmer.stem(s1)
+        'speicherbehalt'
+        >>> s2 = "Grenzpostens"
+        >>> stemmer.stem(s2)
+        'grenzpost'
+        >>> s3 = "Ausgefeiltere"
+        >>> stemmer.stem(s3)
+        'ausgefeilt'
+        >>> stemmer = Cistem(True)
+        >>> stemmer.stem(s1)
+        'speicherbehal'
+        >>> stemmer.stem(s2)
+        'grenzpo'
+        >>> stemmer.stem(s3)
+        'ausgefeil'
+        """
+        if len(word) == 0:
+            return word
+
+        upper = word[0].isupper()
+        word = word.lower()
+
+        word = word.replace("ü", "u")
+        word = word.replace("ö", "o")
+        word = word.replace("ä", "a")
+        word = word.replace("ß", "ss")
+
+        word = Cistem.strip_ge.sub(r"\1", word)
+        word = Cistem.replace_to(word)
+
+        while len(word) > 3:
+            if len(word) > 5:
+                (word, success) = Cistem.strip_emr.subn("", word)
+                if success != 0:
+                    continue
+
+                (word, success) = Cistem.strip_nd.subn("", word)
+                if success != 0:
+                    continue
+
+            if not upper or self._case_insensitive:
+                (word, success) = Cistem.strip_t.subn("", word)
+                if success != 0:
+                    continue
+
+            (word, success) = Cistem.strip_esn.subn("", word)
+            if success != 0:
+                continue
+            else:
+                break
+
+        word = Cistem.replace_back(word)
+
+        return word
+
+    def segment(self, word):
+        """
+        This method works very similarly to stem (:func:'cistem.stem'). The difference is that in
+        addition to returning the stem, it also returns the rest that was removed at
+        the end. To be able to return the stem unchanged so the stem and the rest
+        can be concatenated to form the original word, all subsitutions that altered
+        the stem in any other way than by removing letters at the end were left out.
+
+        :param word: the word that is to be stemmed
+        :type word: unicode
+        :return word: the stemmed word
+        :rtype: unicode
+        :return word: the removed suffix
+        :rtype: unicode
+
+        >>> from nltk.stem.cistem import Cistem
+        >>> stemmer = Cistem()
+        >>> s1 = "Speicherbehältern"
+        >>> print("('" + stemmer.segment(s1)[0] + "', '" + stemmer.segment(s1)[1] + "')")
+        ('speicherbehält', 'ern')
+        >>> s2 = "Grenzpostens"
+        >>> stemmer.segment(s2)
+        ('grenzpost', 'ens')
+        >>> s3 = "Ausgefeiltere"
+        >>> stemmer.segment(s3)
+        ('ausgefeilt', 'ere')
+        >>> stemmer = Cistem(True)
+        >>> print("('" + stemmer.segment(s1)[0] + "', '" + stemmer.segment(s1)[1] + "')")
+        ('speicherbehäl', 'tern')
+        >>> stemmer.segment(s2)
+        ('grenzpo', 'stens')
+        >>> stemmer.segment(s3)
+        ('ausgefeil', 'tere')
+        """
+
+        rest_length = 0
+
+        if len(word) == 0:
+            return ("", "")
+
+        upper = word[0].isupper()
+        word = word.lower()
+
+        original = word[:]
+
+        word = Cistem.replace_to(word)
+
+        while len(word) > 3:
+            if len(word) > 5:
+                (word, success) = Cistem.strip_emr.subn("", word)
+                if success != 0:
+                    rest_length += 2
+                    continue
+
+                (word, success) = Cistem.strip_nd.subn("", word)
+                if success != 0:
+                    rest_length += 2
+                    continue
+
+            if not upper or self._case_insensitive:
+                (word, success) = Cistem.strip_t.subn("", word)
+                if success != 0:
+                    rest_length += 1
+                    continue
+
+            (word, success) = Cistem.strip_esn.subn("", word)
+            if success != 0:
+                rest_length += 1
+                continue
+            else:
+                break
+
+        word = Cistem.replace_back(word)
+
+        if rest_length:
+            rest = original[-rest_length:]
+        else:
+            rest = ""
+
+        return (word, rest)
diff --git a/nlp_resource_data/nltk/stem/isri.py b/nlp_resource_data/nltk/stem/isri.py

old mode 100755 (executable)

new mode 100644 (file)

index 44c187a..695e5fa
--- a/nlp_resource_data/nltk/stem/isri.py
+++ b/nlp_resource_data/nltk/stem/isri.py
@@ -2,7 +2,7 @@
  #
  # Natural Language Toolkit: The ISRI Arabic Stemmer
  #
-# Copyright (C) 2001-2017 NLTK Proejct
+# Copyright (C) 2001-2020 NLTK Proejct
  # Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005)
  # Author: Hosam Algasaier <hosam_hme@yahoo.com>
  # URL: <http://nltk.org/>
@@ -29,14 +29,13 @@ Additional adjustments were made to improve the algorithm:
  increases the word ambiguities and changes the original root.
  
  """
-from __future__ import unicode_literals
  import re
  
  from nltk.stem.api import StemmerI
  
  
  class ISRIStemmer(StemmerI):
-    '''
+    """
      ISRI Arabic stemmer based on algorithm: Arabic Stemming without a root dictionary.
      Information Science Research Institute. University of Nevada, Las Vegas, USA.
  
@@ -48,125 +47,169 @@ class ISRIStemmer(StemmerI):
      The ISRI Stemmer requires that all tokens have Unicode string types.
      If you use Python IDLE on Arabic Windows you have to decode text first
      using Arabic '1256' coding.
-    '''
+    """
  
      def __init__(self):
          # length three prefixes
-        self.p3 = ['\u0643\u0627\u0644', '\u0628\u0627\u0644',
-                   '\u0648\u0644\u0644', '\u0648\u0627\u0644']
+        self.p3 = [
+            "\u0643\u0627\u0644",
+            "\u0628\u0627\u0644",
+            "\u0648\u0644\u0644",
+            "\u0648\u0627\u0644",
+        ]
  
          # length two prefixes
-        self.p2 = ['\u0627\u0644', '\u0644\u0644']
+        self.p2 = ["\u0627\u0644", "\u0644\u0644"]
  
          # length one prefixes
-        self.p1 = ['\u0644', '\u0628', '\u0641', '\u0633', '\u0648',
-                   '\u064a', '\u062a', '\u0646', '\u0627']
+        self.p1 = [
+            "\u0644",
+            "\u0628",
+            "\u0641",
+            "\u0633",
+            "\u0648",
+            "\u064a",
+            "\u062a",
+            "\u0646",
+            "\u0627",
+        ]
  
          # length three suffixes
-        self.s3 = ['\u062a\u0645\u0644', '\u0647\u0645\u0644',
-                   '\u062a\u0627\u0646', '\u062a\u064a\u0646',
-                   '\u0643\u0645\u0644']
+        self.s3 = [
+            "\u062a\u0645\u0644",
+            "\u0647\u0645\u0644",
+            "\u062a\u0627\u0646",
+            "\u062a\u064a\u0646",
+            "\u0643\u0645\u0644",
+        ]
  
          # length two suffixes
-        self.s2 = ['\u0648\u0646', '\u0627\u062a', '\u0627\u0646',
-                   '\u064a\u0646', '\u062a\u0646', '\u0643\u0645',
-                   '\u0647\u0646', '\u0646\u0627', '\u064a\u0627',
-                   '\u0647\u0627', '\u062a\u0645', '\u0643\u0646',
-                   '\u0646\u064a', '\u0648\u0627', '\u0645\u0627',
-                   '\u0647\u0645']
+        self.s2 = [
+            "\u0648\u0646",
+            "\u0627\u062a",
+            "\u0627\u0646",
+            "\u064a\u0646",
+            "\u062a\u0646",
+            "\u0643\u0645",
+            "\u0647\u0646",
+            "\u0646\u0627",
+            "\u064a\u0627",
+            "\u0647\u0627",
+            "\u062a\u0645",
+            "\u0643\u0646",
+            "\u0646\u064a",
+            "\u0648\u0627",
+            "\u0645\u0627",
+            "\u0647\u0645",
+        ]
  
          # length one suffixes
-        self.s1 = ['\u0629', '\u0647', '\u064a', '\u0643', '\u062a',
-                   '\u0627', '\u0646']
+        self.s1 = ["\u0629", "\u0647", "\u064a", "\u0643", "\u062a", "\u0627", "\u0646"]
  
          # groups of length four patterns
-        self.pr4 = {0: ['\u0645'], 1: ['\u0627'],
-                    2: ['\u0627', '\u0648', '\u064A'], 3: ['\u0629']}
+        self.pr4 = {
+            0: ["\u0645"],
+            1: ["\u0627"],
+            2: ["\u0627", "\u0648", "\u064A"],
+            3: ["\u0629"],
+        }
  
          # Groups of length five patterns and length three roots
-        self.pr53 = {0: ['\u0627', '\u062a'],
-                     1: ['\u0627', '\u064a', '\u0648'],
-                     2: ['\u0627', '\u062a', '\u0645'],
-                     3: ['\u0645', '\u064a', '\u062a'],
-                     4: ['\u0645', '\u062a'],
-                     5: ['\u0627', '\u0648'],
-                     6: ['\u0627', '\u0645']}
-
-        self.re_short_vowels = re.compile(r'[\u064B-\u0652]')
-        self.re_hamza = re.compile(r'[\u0621\u0624\u0626]')
-        self.re_initial_hamza = re.compile(r'^[\u0622\u0623\u0625]')
-
-        self.stop_words = ['\u064a\u0643\u0648\u0646',
-                           '\u0648\u0644\u064a\u0633',
-                           '\u0648\u0643\u0627\u0646',
-                           '\u0643\u0630\u0644\u0643',
-                           '\u0627\u0644\u062a\u064a',
-                           '\u0648\u0628\u064a\u0646',
-                           '\u0639\u0644\u064a\u0647\u0627',
-                           '\u0645\u0633\u0627\u0621',
-                           '\u0627\u0644\u0630\u064a',
-                           '\u0648\u0643\u0627\u0646\u062a',
-                           '\u0648\u0644\u0643\u0646',
-                           '\u0648\u0627\u0644\u062a\u064a',
-                           '\u062a\u0643\u0648\u0646',
-                           '\u0627\u0644\u064a\u0648\u0645',
-                           '\u0627\u0644\u0644\u0630\u064a\u0646',
-                           '\u0639\u0644\u064a\u0647',
-                           '\u0643\u0627\u0646\u062a',
-                           '\u0644\u0630\u0644\u0643',
-                           '\u0623\u0645\u0627\u0645',
-                           '\u0647\u0646\u0627\u0643',
-                           '\u0645\u0646\u0647\u0627',
-                           '\u0645\u0627\u0632\u0627\u0644',
-                           '\u0644\u0627\u0632\u0627\u0644',
-                           '\u0644\u0627\u064a\u0632\u0627\u0644',
-                           '\u0645\u0627\u064a\u0632\u0627\u0644',
-                           '\u0627\u0635\u0628\u062d',
-                           '\u0623\u0635\u0628\u062d',
-                           '\u0623\u0645\u0633\u0649',
-                           '\u0627\u0645\u0633\u0649',
-                           '\u0623\u0636\u062d\u0649',
-                           '\u0627\u0636\u062d\u0649',
-                           '\u0645\u0627\u0628\u0631\u062d',
-                           '\u0645\u0627\u0641\u062a\u0626',
-                           '\u0645\u0627\u0627\u0646\u0641\u0643',
-                           '\u0644\u0627\u0633\u064a\u0645\u0627',
-                           '\u0648\u0644\u0627\u064a\u0632\u0627\u0644',
-                           '\u0627\u0644\u062d\u0627\u0644\u064a',
-                           '\u0627\u0644\u064a\u0647\u0627',
-                           '\u0627\u0644\u0630\u064a\u0646',
-                           '\u0641\u0627\u0646\u0647',
-                           '\u0648\u0627\u0644\u0630\u064a',
-                           '\u0648\u0647\u0630\u0627',
-                           '\u0644\u0647\u0630\u0627',
-                           '\u0641\u0643\u0627\u0646',
-                           '\u0633\u062a\u0643\u0648\u0646',
-                           '\u0627\u0644\u064a\u0647',
-                           '\u064a\u0645\u0643\u0646',
-                           '\u0628\u0647\u0630\u0627',
-                           '\u0627\u0644\u0630\u0649']
+        self.pr53 = {
+            0: ["\u0627", "\u062a"],
+            1: ["\u0627", "\u064a", "\u0648"],
+            2: ["\u0627", "\u062a", "\u0645"],
+            3: ["\u0645", "\u064a", "\u062a"],
+            4: ["\u0645", "\u062a"],
+            5: ["\u0627", "\u0648"],
+            6: ["\u0627", "\u0645"],
+        }
+
+        self.re_short_vowels = re.compile(r"[\u064B-\u0652]")
+        self.re_hamza = re.compile(r"[\u0621\u0624\u0626]")
+        self.re_initial_hamza = re.compile(r"^[\u0622\u0623\u0625]")
+
+        self.stop_words = [
+            "\u064a\u0643\u0648\u0646",
+            "\u0648\u0644\u064a\u0633",
+            "\u0648\u0643\u0627\u0646",
+            "\u0643\u0630\u0644\u0643",
+            "\u0627\u0644\u062a\u064a",
+            "\u0648\u0628\u064a\u0646",
+            "\u0639\u0644\u064a\u0647\u0627",
+            "\u0645\u0633\u0627\u0621",
+            "\u0627\u0644\u0630\u064a",
+            "\u0648\u0643\u0627\u0646\u062a",
+            "\u0648\u0644\u0643\u0646",
+            "\u0648\u0627\u0644\u062a\u064a",
+            "\u062a\u0643\u0648\u0646",
+            "\u0627\u0644\u064a\u0648\u0645",
+            "\u0627\u0644\u0644\u0630\u064a\u0646",
+            "\u0639\u0644\u064a\u0647",
+            "\u0643\u0627\u0646\u062a",
+            "\u0644\u0630\u0644\u0643",
+            "\u0623\u0645\u0627\u0645",
+            "\u0647\u0646\u0627\u0643",
+            "\u0645\u0646\u0647\u0627",
+            "\u0645\u0627\u0632\u0627\u0644",
+            "\u0644\u0627\u0632\u0627\u0644",
+            "\u0644\u0627\u064a\u0632\u0627\u0644",
+            "\u0645\u0627\u064a\u0632\u0627\u0644",
+            "\u0627\u0635\u0628\u062d",
+            "\u0623\u0635\u0628\u062d",
+            "\u0623\u0645\u0633\u0649",
+            "\u0627\u0645\u0633\u0649",
+            "\u0623\u0636\u062d\u0649",
+            "\u0627\u0636\u062d\u0649",
+            "\u0645\u0627\u0628\u0631\u062d",
+            "\u0645\u0627\u0641\u062a\u0626",
+            "\u0645\u0627\u0627\u0646\u0641\u0643",
+            "\u0644\u0627\u0633\u064a\u0645\u0627",
+            "\u0648\u0644\u0627\u064a\u0632\u0627\u0644",
+            "\u0627\u0644\u062d\u0627\u0644\u064a",
+            "\u0627\u0644\u064a\u0647\u0627",
+            "\u0627\u0644\u0630\u064a\u0646",
+            "\u0641\u0627\u0646\u0647",
+            "\u0648\u0627\u0644\u0630\u064a",
+            "\u0648\u0647\u0630\u0627",
+            "\u0644\u0647\u0630\u0627",
+            "\u0641\u0643\u0627\u0646",
+            "\u0633\u062a\u0643\u0648\u0646",
+            "\u0627\u0644\u064a\u0647",
+            "\u064a\u0645\u0643\u0646",
+            "\u0628\u0647\u0630\u0627",
+            "\u0627\u0644\u0630\u0649",
+        ]
  
      def stem(self, token):
          """
          Stemming a word token using the ISRI stemmer.
          """
-        token = self.norm(token, 1)   # remove diacritics which representing Arabic short vowels
+        token = self.norm(
+            token, 1
+        )  # remove diacritics which representing Arabic short vowels
          if token in self.stop_words:
-            return token              # exclude stop words from being processed
-        token = self.pre32(token)     # remove length three and length two prefixes in this order
-        token = self.suf32(token)     # remove length three and length two suffixes in this order
-        token = self.waw(token)       # remove connective ‘و’ if it precedes a word beginning with ‘و’
-        token = self.norm(token, 2)   # normalize initial hamza to bare alif
+            return token  # exclude stop words from being processed
+        token = self.pre32(
+            token
+        )  # remove length three and length two prefixes in this order
+        token = self.suf32(
+            token
+        )  # remove length three and length two suffixes in this order
+        token = self.waw(
+            token
+        )  # remove connective ‘و’ if it precedes a word beginning with ‘و’
+        token = self.norm(token, 2)  # normalize initial hamza to bare alif
          # if 4 <= word length <= 7, then stem; otherwise, no stemming
-        if len(token) == 4:           # length 4 word
+        if len(token) == 4:  # length 4 word
              token = self.pro_w4(token)
-        elif len(token) == 5:         # length 5 word
+        elif len(token) == 5:  # length 5 word
              token = self.pro_w53(token)
              token = self.end_w5(token)
-        elif len(token) == 6:         # length 6 word
+        elif len(token) == 6:  # length 6 word
              token = self.pro_w6(token)
              token = self.end_w6(token)
-        elif len(token) == 7:         # length 7 word
+        elif len(token) == 7:  # length 7 word
              token = self.suf1(token)
              if len(token) == 7:
                  token = self.pre1(token)
@@ -183,12 +226,12 @@ class ISRIStemmer(StemmerI):
          num=3  both 1&2
          """
          if num == 1:
-            word = self.re_short_vowels.sub('', word)
+            word = self.re_short_vowels.sub("", word)
          elif num == 2:
-            word = self.re_initial_hamza.sub('\u0627', word)
+            word = self.re_initial_hamza.sub("\u0627", word)
          elif num == 3:
-            word = self.re_short_vowels.sub('', word)
-            word = self.re_initial_hamza.sub('\u0627', word)
+            word = self.re_short_vowels.sub("", word)
+            word = self.re_initial_hamza.sub("\u0627", word)
          return word
  
      def pre32(self, word):
@@ -217,60 +260,60 @@ class ISRIStemmer(StemmerI):
  
      def waw(self, word):
          """remove connective ‘و’ if it precedes a word beginning with ‘و’ """
-        if len(word) >= 4 and word[:2] == '\u0648\u0648':
+        if len(word) >= 4 and word[:2] == "\u0648\u0648":
              word = word[1:]
          return word
  
      def pro_w4(self, word):
          """process length four patterns and extract length three roots"""
-        if word[0] in self.pr4[0]:      # مفعل
+        if word[0] in self.pr4[0]:  # مفعل
              word = word[1:]
-        elif word[1] in self.pr4[1]:    # فاعل
+        elif word[1] in self.pr4[1]:  # فاعل
              word = word[:1] + word[2:]
-        elif word[2] in self.pr4[2]:    # فعال - فعول - فعيل
+        elif word[2] in self.pr4[2]:  # فعال - فعول - فعيل
              word = word[:2] + word[3]
-        elif word[3] in self.pr4[3]:    # فعلة
+        elif word[3] in self.pr4[3]:  # فعلة
              word = word[:-1]
          else:
-            word = self.suf1(word)      # do - normalize short sufix
+            word = self.suf1(word)  # do - normalize short sufix
              if len(word) == 4:
                  word = self.pre1(word)  # do - normalize short prefix
          return word
  
      def pro_w53(self, word):
          """process length five patterns and extract length three roots"""
-        if word[2] in self.pr53[0] and word[0] == '\u0627':    # افتعل - افاعل
+        if word[2] in self.pr53[0] and word[0] == "\u0627":  # افتعل - افاعل
              word = word[1] + word[3:]
-        elif word[3] in self.pr53[1] and word[0] == '\u0645':  # مفعول - مفعال - مفعيل
+        elif word[3] in self.pr53[1] and word[0] == "\u0645":  # مفعول - مفعال - مفعيل
              word = word[1:3] + word[4]
-        elif word[0] in self.pr53[2] and word[4] == '\u0629':  # مفعلة - تفعلة - افعلة
+        elif word[0] in self.pr53[2] and word[4] == "\u0629":  # مفعلة - تفعلة - افعلة
              word = word[1:4]
-        elif word[0] in self.pr53[3] and word[2] == '\u062a':  # مفتعل - يفتعل - تفتعل
+        elif word[0] in self.pr53[3] and word[2] == "\u062a":  # مفتعل - يفتعل - تفتعل
              word = word[1] + word[3:]
-        elif word[0] in self.pr53[4] and word[2] == '\u0627':  # مفاعل - تفاعل
+        elif word[0] in self.pr53[4] and word[2] == "\u0627":  # مفاعل - تفاعل
              word = word[1] + word[3:]
-        elif word[2] in self.pr53[5] and word[4] == '\u0629':  # فعولة - فعالة
+        elif word[2] in self.pr53[5] and word[4] == "\u0629":  # فعولة - فعالة
              word = word[:2] + word[3]
-        elif word[0] in self.pr53[6] and word[1] == '\u0646':  # انفعل - منفعل
+        elif word[0] in self.pr53[6] and word[1] == "\u0646":  # انفعل - منفعل
              word = word[2:]
-        elif word[3] == '\u0627' and word[0] == '\u0627':      # افعال
+        elif word[3] == "\u0627" and word[0] == "\u0627":  # افعال
              word = word[1:3] + word[4]
-        elif word[4] == '\u0646' and word[3] == '\u0627':      # فعلان
+        elif word[4] == "\u0646" and word[3] == "\u0627":  # فعلان
              word = word[:3]
-        elif word[3] == '\u064a' and word[0] == '\u062a':      # تفعيل
+        elif word[3] == "\u064a" and word[0] == "\u062a":  # تفعيل
              word = word[1:3] + word[4]
-        elif word[3] == '\u0648' and word[1] == '\u0627':      # فاعول
+        elif word[3] == "\u0648" and word[1] == "\u0627":  # فاعول
              word = word[0] + word[2] + word[4]
-        elif word[2] == '\u0627' and word[1] == '\u0648':      # فواعل
+        elif word[2] == "\u0627" and word[1] == "\u0648":  # فواعل
              word = word[0] + word[3:]
-        elif word[3] == '\u0626' and word[2] == '\u0627':      # فعائل
+        elif word[3] == "\u0626" and word[2] == "\u0627":  # فعائل
              word = word[:2] + word[4]
-        elif word[4] == '\u0629' and word[1] == '\u0627':      # فاعلة
+        elif word[4] == "\u0629" and word[1] == "\u0627":  # فاعلة
              word = word[0] + word[2:4]
-        elif word[4] == '\u064a' and word[2] == '\u0627':      # فعالي
+        elif word[4] == "\u064a" and word[2] == "\u0627":  # فعالي
              word = word[:2] + word[3]
          else:
-            word = self.suf1(word)      # do - normalize short sufix
+            word = self.suf1(word)  # do - normalize short sufix
              if len(word) == 5:
                  word = self.pre1(word)  # do - normalize short prefix
          return word
@@ -279,9 +322,9 @@ class ISRIStemmer(StemmerI):
          """process length five patterns and extract length four roots"""
          if word[0] in self.pr53[2]:  # تفعلل - افعلل - مفعلل
              word = word[1:]
-        elif word[4] == '\u0629':    # فعللة
+        elif word[4] == "\u0629":  # فعللة
              word = word[:4]
-        elif word[2] == '\u0627':    # فعالل
+        elif word[2] == "\u0627":  # فعالل
              word = word[:2] + word[3:]
          return word
  
@@ -295,27 +338,37 @@ class ISRIStemmer(StemmerI):
  
      def pro_w6(self, word):
          """process length six patterns and extract length three roots"""
-        if word.startswith('\u0627\u0633\u062a') or word.startswith('\u0645\u0633\u062a'):  # مستفعل - استفعل
+        if word.startswith("\u0627\u0633\u062a") or word.startswith(
+            "\u0645\u0633\u062a"
+        ):  # مستفعل - استفعل
              word = word[3:]
-        elif word[0] == '\u0645' and word[3] == '\u0627' and word[5] == '\u0629':           # مفعالة
+        elif (
+            word[0] == "\u0645" and word[3] == "\u0627" and word[5] == "\u0629"
+        ):  # مفعالة
              word = word[1:3] + word[4]
-        elif word[0] == '\u0627' and word[2] == '\u062a' and word[4] == '\u0627':           # افتعال
+        elif (
+            word[0] == "\u0627" and word[2] == "\u062a" and word[4] == "\u0627"
+        ):  # افتعال
              word = word[1] + word[3] + word[5]
-        elif word[0] == '\u0627' and word[3] == '\u0648' and word[2] == word[4]:            # افعوعل
+        elif (
+            word[0] == "\u0627" and word[3] == "\u0648" and word[2] == word[4]
+        ):  # افعوعل
              word = word[1] + word[4:]
-        elif word[0] == '\u062a' and word[2] == '\u0627' and word[4] == '\u064a':           # تفاعيل   new pattern
+        elif (
+            word[0] == "\u062a" and word[2] == "\u0627" and word[4] == "\u064a"
+        ):  # تفاعيل   new pattern
              word = word[1] + word[3] + word[5]
          else:
-            word = self.suf1(word)      # do - normalize short sufix
+            word = self.suf1(word)  # do - normalize short sufix
              if len(word) == 6:
                  word = self.pre1(word)  # do - normalize short prefix
          return word
  
      def pro_w64(self, word):
          """process length six patterns and extract length four roots"""
-        if word[0] == '\u0627' and word[4] == '\u0627':  # افعلال
+        if word[0] == "\u0627" and word[4] == "\u0627":  # افعلال
              word = word[1:4] + word[5]
-        elif word.startswith('\u0645\u062a'):            # متفعلل
+        elif word.startswith("\u0645\u062a"):  # متفعلل
              word = word[2:]
          return word
  
@@ -341,5 +394,3 @@ class ISRIStemmer(StemmerI):
              if word.startswith(sp1):
                  return word[1:]
          return word
-
-
diff --git a/nlp_resource_data/nltk/stem/isri.pyc b/nlp_resource_data/nltk/stem/isri.pyc

deleted file mode 100755 (executable)

index 64a9bb7..0000000

Binary files a/nlp_resource_data/nltk/stem/isri.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/lancaster.py b/nlp_resource_data/nltk/stem/lancaster.py

old mode 100755 (executable)

new mode 100644 (file)

index e7e3b47..ef5eaa4
--- a/nlp_resource_data/nltk/stem/lancaster.py
+++ b/nlp_resource_data/nltk/stem/lancaster.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Stemmers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Tomcavage <stomcava@law.upenn.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -9,13 +9,11 @@
  A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm.
  Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61.
  """
-from __future__ import unicode_literals
  import re
  
  from nltk.stem.api import StemmerI
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class LancasterStemmer(StemmerI):
      """
      Lancaster Stemmer
@@ -54,121 +52,121 @@ class LancasterStemmer(StemmerI):
  
      # The rule list is static since it doesn't change between instances
      default_rule_tuple = (
-        "ai*2.",     # -ia > -   if intact
-        "a*1.",      # -a > -    if intact
-        "bb1.",      # -bb > -b
-        "city3s.",   # -ytic > -ys
-        "ci2>",      # -ic > -
-        "cn1t>",     # -nc > -nt
-        "dd1.",      # -dd > -d
-        "dei3y>",    # -ied > -y
+        "ai*2.",  # -ia > -   if intact
+        "a*1.",  # -a > -    if intact
+        "bb1.",  # -bb > -b
+        "city3s.",  # -ytic > -ys
+        "ci2>",  # -ic > -
+        "cn1t>",  # -nc > -nt
+        "dd1.",  # -dd > -d
+        "dei3y>",  # -ied > -y
          "deec2ss.",  # -ceed >", -cess
-        "dee1.",     # -eed > -ee
-        "de2>",      # -ed > -
-        "dooh4>",    # -hood > -
-        "e1>",       # -e > -
-        "feil1v.",   # -lief > -liev
-        "fi2>",      # -if > -
-        "gni3>",     # -ing > -
-        "gai3y.",    # -iag > -y
-        "ga2>",      # -ag > -
-        "gg1.",      # -gg > -g
-        "ht*2.",     # -th > -   if intact
-        "hsiug5ct.", # -guish > -ct
-        "hsi3>",     # -ish > -
-        "i*1.",      # -i > -    if intact
-        "i1y>",      # -i > -y
-        "ji1d.",     # -ij > -id   --  see nois4j> & vis3j>
-        "juf1s.",    # -fuj > -fus
-        "ju1d.",     # -uj > -ud
-        "jo1d.",     # -oj > -od
-        "jeh1r.",    # -hej > -her
-        "jrev1t.",   # -verj > -vert
-        "jsim2t.",   # -misj > -mit
-        "jn1d.",     # -nj > -nd
-        "j1s.",      # -j > -s
+        "dee1.",  # -eed > -ee
+        "de2>",  # -ed > -
+        "dooh4>",  # -hood > -
+        "e1>",  # -e > -
+        "feil1v.",  # -lief > -liev
+        "fi2>",  # -if > -
+        "gni3>",  # -ing > -
+        "gai3y.",  # -iag > -y
+        "ga2>",  # -ag > -
+        "gg1.",  # -gg > -g
+        "ht*2.",  # -th > -   if intact
+        "hsiug5ct.",  # -guish > -ct
+        "hsi3>",  # -ish > -
+        "i*1.",  # -i > -    if intact
+        "i1y>",  # -i > -y
+        "ji1d.",  # -ij > -id   --  see nois4j> & vis3j>
+        "juf1s.",  # -fuj > -fus
+        "ju1d.",  # -uj > -ud
+        "jo1d.",  # -oj > -od
+        "jeh1r.",  # -hej > -her
+        "jrev1t.",  # -verj > -vert
+        "jsim2t.",  # -misj > -mit
+        "jn1d.",  # -nj > -nd
+        "j1s.",  # -j > -s
          "lbaifi6.",  # -ifiabl > -
-        "lbai4y.",   # -iabl > -y
-        "lba3>",     # -abl > -
-        "lbi3.",     # -ibl > -
-        "lib2l>",    # -bil > -bl
-        "lc1.",      # -cl > c
-        "lufi4y.",   # -iful > -y
-        "luf3>",     # -ful > -
-        "lu2.",      # -ul > -
-        "lai3>",     # -ial > -
-        "lau3>",     # -ual > -
-        "la2>",      # -al > -
-        "ll1.",      # -ll > -l
-        "mui3.",     # -ium > -
-        "mu*2.",     # -um > -   if intact
-        "msi3>",     # -ism > -
-        "mm1.",      # -mm > -m
-        "nois4j>",   # -sion > -j
+        "lbai4y.",  # -iabl > -y
+        "lba3>",  # -abl > -
+        "lbi3.",  # -ibl > -
+        "lib2l>",  # -bil > -bl
+        "lc1.",  # -cl > c
+        "lufi4y.",  # -iful > -y
+        "luf3>",  # -ful > -
+        "lu2.",  # -ul > -
+        "lai3>",  # -ial > -
+        "lau3>",  # -ual > -
+        "la2>",  # -al > -
+        "ll1.",  # -ll > -l
+        "mui3.",  # -ium > -
+        "mu*2.",  # -um > -   if intact
+        "msi3>",  # -ism > -
+        "mm1.",  # -mm > -m
+        "nois4j>",  # -sion > -j
          "noix4ct.",  # -xion > -ct
-        "noi3>",     # -ion > -
-        "nai3>",     # -ian > -
-        "na2>",      # -an > -
-        "nee0.",     # protect  -een
-        "ne2>",      # -en > -
-        "nn1.",      # -nn > -n
-        "pihs4>",    # -ship > -
-        "pp1.",      # -pp > -p
-        "re2>",      # -er > -
-        "rae0.",     # protect  -ear
-        "ra2.",      # -ar > -
-        "ro2>",      # -or > -
-        "ru2>",      # -ur > -
-        "rr1.",      # -rr > -r
-        "rt1>",      # -tr > -t
-        "rei3y>",    # -ier > -y
-        "sei3y>",    # -ies > -y
-        "sis2.",     # -sis > -s
-        "si2>",      # -is > -
-        "ssen4>",    # -ness > -
-        "ss0.",      # protect  -ss
-        "suo3>",     # -ous > -
-        "su*2.",     # -us > -   if intact
-        "s*1>",      # -s > -    if intact
-        "s0.",       # -s > -s
-        "tacilp4y.", # -plicat > -ply
-        "ta2>",      # -at > -
-        "tnem4>",    # -ment > -
-        "tne3>",     # -ent > -
-        "tna3>",     # -ant > -
-        "tpir2b.",   # -ript > -rib
-        "tpro2b.",   # -orpt > -orb
-        "tcud1.",    # -duct > -duc
-        "tpmus2.",   # -sumpt > -sum
+        "noi3>",  # -ion > -
+        "nai3>",  # -ian > -
+        "na2>",  # -an > -
+        "nee0.",  # protect  -een
+        "ne2>",  # -en > -
+        "nn1.",  # -nn > -n
+        "pihs4>",  # -ship > -
+        "pp1.",  # -pp > -p
+        "re2>",  # -er > -
+        "rae0.",  # protect  -ear
+        "ra2.",  # -ar > -
+        "ro2>",  # -or > -
+        "ru2>",  # -ur > -
+        "rr1.",  # -rr > -r
+        "rt1>",  # -tr > -t
+        "rei3y>",  # -ier > -y
+        "sei3y>",  # -ies > -y
+        "sis2.",  # -sis > -s
+        "si2>",  # -is > -
+        "ssen4>",  # -ness > -
+        "ss0.",  # protect  -ss
+        "suo3>",  # -ous > -
+        "su*2.",  # -us > -   if intact
+        "s*1>",  # -s > -    if intact
+        "s0.",  # -s > -s
+        "tacilp4y.",  # -plicat > -ply
+        "ta2>",  # -at > -
+        "tnem4>",  # -ment > -
+        "tne3>",  # -ent > -
+        "tna3>",  # -ant > -
+        "tpir2b.",  # -ript > -rib
+        "tpro2b.",  # -orpt > -orb
+        "tcud1.",  # -duct > -duc
+        "tpmus2.",  # -sumpt > -sum
          "tpec2iv.",  # -cept > -ceiv
-        "tulo2v.",   # -olut > -olv
-        "tsis0.",    # protect  -sist
-        "tsi3>",     # -ist > -
-        "tt1.",      # -tt > -t
-        "uqi3.",     # -iqu > -
-        "ugo1.",     # -ogu > -og
-        "vis3j>",    # -siv > -j
-        "vie0.",     # protect  -eiv
-        "vi2>",      # -iv > -
-        "ylb1>",     # -bly > -bl
-        "yli3y>",    # -ily > -y
-        "ylp0.",     # protect  -ply
-        "yl2>",      # -ly > -
-        "ygo1.",     # -ogy > -og
-        "yhp1.",     # -phy > -ph
-        "ymo1.",     # -omy > -om
-        "ypo1.",     # -opy > -op
-        "yti3>",     # -ity > -
-        "yte3>",     # -ety > -
-        "ytl2.",     # -lty > -l
-        "yrtsi5.",   # -istry > -
-        "yra3>",     # -ary > -
-        "yro3>",     # -ory > -
-        "yfi3.",     # -ify > -
-        "ycn2t>",    # -ncy > -nt
-        "yca3>",     # -acy > -
-        "zi2>",      # -iz > -
-        "zy1s."      # -yz > -ys
+        "tulo2v.",  # -olut > -olv
+        "tsis0.",  # protect  -sist
+        "tsi3>",  # -ist > -
+        "tt1.",  # -tt > -t
+        "uqi3.",  # -iqu > -
+        "ugo1.",  # -ogu > -og
+        "vis3j>",  # -siv > -j
+        "vie0.",  # protect  -eiv
+        "vi2>",  # -iv > -
+        "ylb1>",  # -bly > -bl
+        "yli3y>",  # -ily > -y
+        "ylp0.",  # protect  -ply
+        "yl2>",  # -ly > -
+        "ygo1.",  # -ogy > -og
+        "yhp1.",  # -phy > -ph
+        "ymo1.",  # -omy > -om
+        "ypo1.",  # -opy > -op
+        "yti3>",  # -ity > -
+        "yte3>",  # -ety > -
+        "ytl2.",  # -lty > -l
+        "yrtsi5.",  # -istry > -
+        "yra3>",  # -ary > -
+        "yro3>",  # -ory > -
+        "yfi3.",  # -ify > -
+        "ycn2t>",  # -ncy > -nt
+        "yca3>",  # -acy > -
+        "zi2>",  # -iz > -
+        "zy1s.",  # -yz > -ys
      )
  
      def __init__(self, rule_tuple=None, strip_prefix_flag=False):
@@ -234,7 +232,10 @@ class LancasterStemmer(StemmerI):
              last_letter_position = self.__getLastLetter(word)
  
              # Only stem the word if it has a last letter and a rule matching that last letter
-            if last_letter_position < 0 or word[last_letter_position] not in self.rule_dictionary:
+            if (
+                last_letter_position < 0
+                or word[last_letter_position] not in self.rule_dictionary
+            ):
                  proceed = False
  
              else:
@@ -244,11 +245,13 @@ class LancasterStemmer(StemmerI):
                  for rule in self.rule_dictionary[word[last_letter_position]]:
                      rule_match = valid_rule.match(rule)
                      if rule_match:
-                        (ending_string,
-                         intact_flag,
-                         remove_total,
-                         append_string,
-                         cont_flag) = rule_match.groups()
+                        (
+                            ending_string,
+                            intact_flag,
+                            remove_total,
+                            append_string,
+                            cont_flag,
+                        ) = rule_match.groups()
  
                          # Convert the number of chars to remove when stemming
                          # from a string to an integer
@@ -257,21 +260,22 @@ class LancasterStemmer(StemmerI):
                          # Proceed if word's ending matches rule's word ending
                          if word.endswith(ending_string[::-1]):
                              if intact_flag:
-                                if (word == intact_word and
-                                    self.__isAcceptable(word, remove_total)):
-                                    word = self.__applyRule(word,
-                                                            remove_total,
-                                                            append_string)
+                                if word == intact_word and self.__isAcceptable(
+                                    word, remove_total
+                                ):
+                                    word = self.__applyRule(
+                                        word, remove_total, append_string
+                                    )
                                      rule_was_applied = True
-                                    if cont_flag == '.':
+                                    if cont_flag == ".":
                                          proceed = False
                                      break
                              elif self.__isAcceptable(word, remove_total):
-                                word = self.__applyRule(word,
-                                                        remove_total,
-                                                        append_string)
+                                word = self.__applyRule(
+                                    word, remove_total, append_string
+                                )
                                  rule_was_applied = True
-                                if cont_flag == '.':
+                                if cont_flag == ".":
                                      proceed = False
                                  break
                  # If no rules apply, the word doesn't need any more stemming
@@ -297,18 +301,17 @@ class LancasterStemmer(StemmerI):
          # If the word starts with a vowel, it must be at least 2
          # characters long to be stemmed
          if word[0] in "aeiouy":
-            if (len(word) - remove_total >= 2):
+            if len(word) - remove_total >= 2:
                  word_is_acceptable = True
          # If the word starts with a consonant, it must be at least 3
          # characters long (including one vowel) to be stemmed
-        elif (len(word) - remove_total >= 3):
+        elif len(word) - remove_total >= 3:
              if word[1] in "aeiouy":
                  word_is_acceptable = True
              elif word[2] in "aeiouy":
                  word_is_acceptable = True
          return word_is_acceptable
  
-
      def __applyRule(self, word, remove_total, append_string):
          """Apply the stemming rule to the word
          """
@@ -327,11 +330,20 @@ class LancasterStemmer(StemmerI):
          This function originally taken from Whoosh.
  
          """
-        for prefix in ("kilo", "micro", "milli", "intra", "ultra", "mega",
-                       "nano", "pico", "pseudo"):
+        for prefix in (
+            "kilo",
+            "micro",
+            "milli",
+            "intra",
+            "ultra",
+            "mega",
+            "nano",
+            "pico",
+            "pseudo",
+        ):
              if word.startswith(prefix):
-                return word[len(prefix):]
+                return word[len(prefix) :]
          return word
  
      def __repr__(self):
-        return '<LancasterStemmer>'
+        return "<LancasterStemmer>"
diff --git a/nlp_resource_data/nltk/stem/lancaster.pyc b/nlp_resource_data/nltk/stem/lancaster.pyc

deleted file mode 100755 (executable)

index cff52b8..0000000

Binary files a/nlp_resource_data/nltk/stem/lancaster.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/porter.py b/nlp_resource_data/nltk/stem/porter.py

old mode 100755 (executable)

new mode 100644 (file)

index db68050..cb04f52
--- a/nlp_resource_data/nltk/stem/porter.py
+++ b/nlp_resource_data/nltk/stem/porter.py
@@ -18,26 +18,23 @@ which includes another Python implementation and other implementations
  in many languages.
  """
  
-from __future__ import print_function, unicode_literals
-
-__docformat__ = 'plaintext'
+__docformat__ = "plaintext"
  
  import re
  
  from nltk.stem.api import StemmerI
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class PorterStemmer(StemmerI):
      """
      A word stemmer based on the Porter stemming algorithm.
  
          Porter, M. "An algorithm for suffix stripping."
          Program 14.3 (1980): 130-137.
-        
+
      See http://www.tartarus.org/~martin/PorterStemmer/ for the homepage
      of the algorithm.
-        
+
      Martin Porter has endorsed several modifications to the Porter
      algorithm since writing his original paper, and those extensions are
      included in the implementations on his website. Additionally, others
@@ -48,72 +45,72 @@ class PorterStemmer(StemmerI):
  
          PorterStemmer.ORIGINAL_ALGORITHM
          - Implementation that is faithful to the original paper.
-        
+
            Note that Martin Porter has deprecated this version of the
            algorithm. Martin distributes implementations of the Porter
            Stemmer in many languages, hosted at:
-          
+
              http://www.tartarus.org/~martin/PorterStemmer/
-            
+
            and all of these implementations include his extensions. He
            strongly recommends against using the original, published
            version of the algorithm; only use this mode if you clearly
            understand why you are choosing to do so.
-        
+
          PorterStemmer.MARTIN_EXTENSIONS
          - Implementation that only uses the modifications to the
            algorithm that are included in the implementations on Martin
            Porter's website. He has declared Porter frozen, so the
            behaviour of those implementations should never change.
-          
+
          PorterStemmer.NLTK_EXTENSIONS (default)
          - Implementation that includes further improvements devised by
            NLTK contributors or taken from other modified implementations
            found on the web.
-          
+
      For the best stemming, you should use the default NLTK_EXTENSIONS
      version. However, if you need to get the same results as either the
      original algorithm or one of Martin Porter's hosted versions for
-    compability with an existing implementation or dataset, you can use
+    compatibility with an existing implementation or dataset, you can use
      one of the other modes instead.
      """
-    
+
      # Modes the Stemmer can be instantiated in
-    NLTK_EXTENSIONS = 'NLTK_EXTENSIONS'
-    MARTIN_EXTENSIONS = 'MARTIN_EXTENSIONS'
-    ORIGINAL_ALGORITHM = 'ORIGINAL_ALGORITHM'
+    NLTK_EXTENSIONS = "NLTK_EXTENSIONS"
+    MARTIN_EXTENSIONS = "MARTIN_EXTENSIONS"
+    ORIGINAL_ALGORITHM = "ORIGINAL_ALGORITHM"
  
      def __init__(self, mode=NLTK_EXTENSIONS):
          if mode not in (
              self.NLTK_EXTENSIONS,
              self.MARTIN_EXTENSIONS,
-            self.ORIGINAL_ALGORITHM
+            self.ORIGINAL_ALGORITHM,
          ):
              raise ValueError(
                  "Mode must be one of PorterStemmer.NLTK_EXTENSIONS, "
                  "PorterStemmer.MARTIN_EXTENSIONS, or "
                  "PorterStemmer.ORIGINAL_ALGORITHM"
              )
-        
+
          self.mode = mode
-        
+
          if self.mode == self.NLTK_EXTENSIONS:
              # This is a table of irregular forms. It is quite short,
              # but still reflects the errors actually drawn to Martin
              # Porter's attention over a 20 year period!
              irregular_forms = {
-                "sky" :     ["sky", "skies"],
-                "die" :     ["dying"],
-                "lie" :     ["lying"],
-                "tie" :     ["tying"],
-                "news" :    ["news"],
-                "inning" :  ["innings", "inning"],
-                "outing" :  ["outings", "outing"],
-                "canning" : ["cannings", "canning"],
-                "howe" :    ["howe"],
-                "proceed" : ["proceed"],
-                "exceed"  : ["exceed"],
-                "succeed" : ["succeed"],
+                "sky": ["sky", "skies"],
+                "die": ["dying"],
+                "lie": ["lying"],
+                "tie": ["tying"],
+                "news": ["news"],
+                "inning": ["innings", "inning"],
+                "outing": ["outings", "outing"],
+                "canning": ["cannings", "canning"],
+                "howe": ["howe"],
+                "proceed": ["proceed"],
+                "exceed": ["exceed"],
+                "succeed": ["succeed"],
              }
  
              self.pool = {}
@@ -121,13 +118,13 @@ class PorterStemmer(StemmerI):
                  for val in irregular_forms[key]:
                      self.pool[val] = key
  
-        self.vowels = frozenset(['a', 'e', 'i', 'o', 'u'])
+        self.vowels = frozenset(["a", "e", "i", "o", "u"])
  
      def _is_consonant(self, word, i):
          """Returns True if word[i] is a consonant, False otherwise
-        
+
          A consonant is defined in the paper as follows:
-        
+
              A consonant in a word is a letter other than A, E, I, O or
              U, and other than Y preceded by a consonant. (The fact that
              the term `consonant' is defined to some extent in terms of
@@ -137,18 +134,18 @@ class PorterStemmer(StemmerI):
          """
          if word[i] in self.vowels:
              return False
-        if word[i] == 'y':
+        if word[i] == "y":
              if i == 0:
                  return True
              else:
-                return (not self._is_consonant(word, i - 1))
+                return not self._is_consonant(word, i - 1)
          return True
-        
+
      def _measure(self, stem):
          """Returns the 'measure' of stem, per definition in the paper
-        
+
          From the paper:
-        
+
              A consonant will be denoted by c, a vowel by v. A list
              ccc... of length greater than 0 will be denoted by C, and a
              list vvv... of length greater than 0 will be denoted by V.
@@ -159,11 +156,11 @@ class PorterStemmer(StemmerI):
                  CVCV ... V
                  VCVC ... C
                  VCVC ... V
-                
+
              These may all be represented by the single form
-            
+
                  [C]VCVC ... [V]
-                
+
              where the square brackets denote arbitrary presence of their
              contents. Using (VC){m} to denote VC repeated m times, this
              may again be written as
@@ -178,23 +175,23 @@ class PorterStemmer(StemmerI):
                  m=1    TROUBLE,  OATS,  TREES,  IVY.
                  m=2    TROUBLES,  PRIVATE,  OATEN,  ORRERY.
          """
-        cv_sequence = ''
-        
+        cv_sequence = ""
+
          # Construct a string of 'c's and 'v's representing whether each
          # character in `stem` is a consonant or a vowel.
          # e.g. 'falafel' becomes 'cvcvcvc',
          #      'architecture' becomes 'vcccvcvccvcv'
          for i in range(len(stem)):
              if self._is_consonant(stem, i):
-                cv_sequence += 'c'
+                cv_sequence += "c"
              else:
-                cv_sequence += 'v'
-                
+                cv_sequence += "v"
+
          # Count the number of 'vc' occurences, which is equivalent to
          # the number of 'VC' occurrences in Porter's reduced form in the
          # docstring above, which is in turn equivalent to `m`
-        return cv_sequence.count('vc')
-        
+        return cv_sequence.count("vc")
+
      def _has_positive_measure(self, stem):
          return self._measure(stem) > 0
  
@@ -204,50 +201,50 @@ class PorterStemmer(StemmerI):
              if not self._is_consonant(stem, i):
                  return True
          return False
-        
+
      def _ends_double_consonant(self, word):
          """Implements condition *d from the paper
-        
+
          Returns True if word ends with a double consonant
          """
          return (
-            len(word) >= 2 and
-            word[-1] == word[-2] and
-            self._is_consonant(word, len(word)-1)
+            len(word) >= 2
+            and word[-1] == word[-2]
+            and self._is_consonant(word, len(word) - 1)
          )
  
      def _ends_cvc(self, word):
          """Implements condition *o from the paper
-        
+
          From the paper:
-        
+
              *o  - the stem ends cvc, where the second c is not W, X or Y
                    (e.g. -WIL, -HOP).
          """
          return (
-            len(word) >= 3 and
-            self._is_consonant(word, len(word) - 3) and
-            not self._is_consonant(word, len(word) - 2) and
-            self._is_consonant(word, len(word) - 1) and
-            word[-1] not in ('w', 'x', 'y')
+            len(word) >= 3
+            and self._is_consonant(word, len(word) - 3)
+            and not self._is_consonant(word, len(word) - 2)
+            and self._is_consonant(word, len(word) - 1)
+            and word[-1] not in ("w", "x", "y")
          ) or (
-            self.mode == self.NLTK_EXTENSIONS and
-            len(word) == 2 and
-            not self._is_consonant(word, 0) and
-            self._is_consonant(word, 1)
+            self.mode == self.NLTK_EXTENSIONS
+            and len(word) == 2
+            and not self._is_consonant(word, 0)
+            and self._is_consonant(word, 1)
          )
-        
+
      def _replace_suffix(self, word, suffix, replacement):
          """Replaces `suffix` of `word` with `replacement"""
          assert word.endswith(suffix), "Given word doesn't end with given suffix"
-        if suffix == '':
+        if suffix == "":
              return word + replacement
          else:
-            return word[:-len(suffix)] + replacement
-                
+            return word[: -len(suffix)] + replacement
+
      def _apply_rule_list(self, word, rules):
          """Applies the first applicable suffix-removal rule to the word
-        
+
          Takes a word and a list of suffix-removal rules represented as
          3-tuples, with the first element being the suffix to remove,
          the second element being the string to replace it with, and the
@@ -256,7 +253,7 @@ class PorterStemmer(StemmerI):
          """
          for rule in rules:
              suffix, replacement, condition = rule
-            if suffix == '*d' and self._ends_double_consonant(word):
+            if suffix == "*d" and self._ends_double_consonant(word):
                  stem = word[:-2]
                  if condition is None or condition(stem):
                      return stem + replacement
@@ -264,20 +261,20 @@ class PorterStemmer(StemmerI):
                      # Don't try any further rules
                      return word
              if word.endswith(suffix):
-                stem = self._replace_suffix(word, suffix, '')
+                stem = self._replace_suffix(word, suffix, "")
                  if condition is None or condition(stem):
                      return stem + replacement
                  else:
                      # Don't try any further rules
                      return word
-                
+
          return word
-        
+
      def _step1a(self, word):
          """Implements Step 1a from "An algorithm for suffix stripping"
-        
+
          From the paper:
-            
+
              SSES -> SS                         caresses  ->  caress
              IES  -> I                          ponies    ->  poni
                                                 ties      ->  ti
@@ -287,28 +284,31 @@ class PorterStemmer(StemmerI):
          # this NLTK-only rule extends the original algorithm, so
          # that 'flies'->'fli' but 'dies'->'die' etc
          if self.mode == self.NLTK_EXTENSIONS:
-            if word.endswith('ies') and len(word) == 4:
-                return self._replace_suffix(word, 'ies', 'ie')
-            
-        return self._apply_rule_list(word, [
-            ('sses', 'ss', None), # SSES -> SS
-            ('ies', 'i', None),   # IES  -> I
-            ('ss', 'ss', None),   # SS   -> SS
-            ('s', '', None),      # S    ->
-        ])
-        
+            if word.endswith("ies") and len(word) == 4:
+                return self._replace_suffix(word, "ies", "ie")
+
+        return self._apply_rule_list(
+            word,
+            [
+                ("sses", "ss", None),  # SSES -> SS
+                ("ies", "i", None),  # IES  -> I
+                ("ss", "ss", None),  # SS   -> SS
+                ("s", "", None),  # S    ->
+            ],
+        )
+
      def _step1b(self, word):
          """Implements Step 1b from "An algorithm for suffix stripping"
-        
+
          From the paper:
-        
+
              (m>0) EED -> EE                    feed      ->  feed
                                                 agreed    ->  agree
              (*v*) ED  ->                       plastered ->  plaster
                                                 bled      ->  bled
              (*v*) ING ->                       motoring  ->  motor
                                                 sing      ->  sing
-                                               
+
          If the second or third of the rules in Step 1b is successful,
          the following is done:
  
@@ -333,62 +333,65 @@ class PorterStemmer(StemmerI):
          # this NLTK-only block extends the original algorithm, so that
          # 'spied'->'spi' but 'died'->'die' etc
          if self.mode == self.NLTK_EXTENSIONS:
-            if word.endswith('ied'):
+            if word.endswith("ied"):
                  if len(word) == 4:
-                    return self._replace_suffix(word, 'ied', 'ie')
+                    return self._replace_suffix(word, "ied", "ie")
                  else:
-                    return self._replace_suffix(word, 'ied', 'i')
-        
+                    return self._replace_suffix(word, "ied", "i")
+
          # (m>0) EED -> EE
-        if word.endswith('eed'):
-            stem = self._replace_suffix(word, 'eed', '')
+        if word.endswith("eed"):
+            stem = self._replace_suffix(word, "eed", "")
              if self._measure(stem) > 0:
-                return stem + 'ee'
+                return stem + "ee"
              else:
                  return word
-            
+
          rule_2_or_3_succeeded = False
-        
-        for suffix in ['ed', 'ing']:
+
+        for suffix in ["ed", "ing"]:
              if word.endswith(suffix):
-                intermediate_stem = self._replace_suffix(word, suffix, '')
+                intermediate_stem = self._replace_suffix(word, suffix, "")
                  if self._contains_vowel(intermediate_stem):
                      rule_2_or_3_succeeded = True
                      break
-                
+
          if not rule_2_or_3_succeeded:
              return word
  
-        return self._apply_rule_list(intermediate_stem, [
-            ('at', 'ate', None), # AT -> ATE
-            ('bl', 'ble', None), # BL -> BLE
-            ('iz', 'ize', None), # IZ -> IZE
-            # (*d and not (*L or *S or *Z))
-            # -> single letter
-            (
-                '*d',
-                intermediate_stem[-1],
-                lambda stem: intermediate_stem[-1] not in ('l', 's', 'z')
-            ),
-            # (m=1 and *o) -> E
-            (
-                '',
-                'e',
-                lambda stem: (self._measure(stem) == 1 and
-                              self._ends_cvc(stem))
-            ),
-        ])
-    
+        return self._apply_rule_list(
+            intermediate_stem,
+            [
+                ("at", "ate", None),  # AT -> ATE
+                ("bl", "ble", None),  # BL -> BLE
+                ("iz", "ize", None),  # IZ -> IZE
+                # (*d and not (*L or *S or *Z))
+                # -> single letter
+                (
+                    "*d",
+                    intermediate_stem[-1],
+                    lambda stem: intermediate_stem[-1] not in ("l", "s", "z"),
+                ),
+                # (m=1 and *o) -> E
+                (
+                    "",
+                    "e",
+                    lambda stem: (self._measure(stem) == 1 and self._ends_cvc(stem)),
+                ),
+            ],
+        )
+
      def _step1c(self, word):
          """Implements Step 1c from "An algorithm for suffix stripping"
-        
+
          From the paper:
-        
+
          Step 1c
  
              (*v*) Y -> I                    happy        ->  happi
                                              sky          ->  sky
          """
+
          def nltk_condition(stem):
              """
              This has been modified from the original Porter algorithm so
@@ -409,24 +412,28 @@ class PorterStemmer(StemmerI):
              conflate with 'spied', 'tried', 'flies' ...
              """
              return len(stem) > 1 and self._is_consonant(stem, len(stem) - 1)
-        
+
          def original_condition(stem):
              return self._contains_vowel(stem)
-        
-        return self._apply_rule_list(word, [
-            (
-                'y',
-                'i',
-                nltk_condition if self.mode == self.NLTK_EXTENSIONS
-                               else original_condition
-            )
-        ])
+
+        return self._apply_rule_list(
+            word,
+            [
+                (
+                    "y",
+                    "i",
+                    nltk_condition
+                    if self.mode == self.NLTK_EXTENSIONS
+                    else original_condition,
+                )
+            ],
+        )
  
      def _step2(self, word):
          """Implements Step 2 from "An algorithm for suffix stripping"
-        
+
          From the paper:
-        
+
          Step 2
  
              (m>0) ATIONAL ->  ATE       relational     ->  relate
@@ -456,70 +463,57 @@ class PorterStemmer(StemmerI):
              # Instead of applying the ALLI -> AL rule after '(a)bli' per
              # the published algorithm, instead we apply it first, and,
              # if it succeeds, run the result through step2 again.
-            if (
-                word.endswith('alli') and
-                self._has_positive_measure(
-                    self._replace_suffix(word, 'alli', '')
-                )
+            if word.endswith("alli") and self._has_positive_measure(
+                self._replace_suffix(word, "alli", "")
              ):
-                return self._step2(
-                    self._replace_suffix(word, 'alli', 'al')
-                )
-        
-        bli_rule = ('bli', 'ble', self._has_positive_measure)
-        abli_rule = ('abli', 'able', self._has_positive_measure)
-        
+                return self._step2(self._replace_suffix(word, "alli", "al"))
+
+        bli_rule = ("bli", "ble", self._has_positive_measure)
+        abli_rule = ("abli", "able", self._has_positive_measure)
+
          rules = [
-            ('ational', 'ate', self._has_positive_measure),
-            ('tional', 'tion', self._has_positive_measure),
-            ('enci', 'ence', self._has_positive_measure),
-            ('anci', 'ance', self._has_positive_measure),
-            ('izer', 'ize', self._has_positive_measure),
-            
+            ("ational", "ate", self._has_positive_measure),
+            ("tional", "tion", self._has_positive_measure),
+            ("enci", "ence", self._has_positive_measure),
+            ("anci", "ance", self._has_positive_measure),
+            ("izer", "ize", self._has_positive_measure),
              abli_rule if self.mode == self.ORIGINAL_ALGORITHM else bli_rule,
-            
-            ('alli', 'al', self._has_positive_measure),
-            ('entli', 'ent', self._has_positive_measure),
-            ('eli', 'e', self._has_positive_measure),
-            ('ousli', 'ous', self._has_positive_measure),
-            ('ization', 'ize', self._has_positive_measure),
-            ('ation', 'ate', self._has_positive_measure),
-            ('ator', 'ate', self._has_positive_measure),
-            ('alism', 'al', self._has_positive_measure),
-            ('iveness', 'ive', self._has_positive_measure),
-            ('fulness', 'ful', self._has_positive_measure),
-            ('ousness', 'ous', self._has_positive_measure),
-            ('aliti', 'al', self._has_positive_measure),
-            ('iviti', 'ive', self._has_positive_measure),
-            ('biliti', 'ble', self._has_positive_measure),
+            ("alli", "al", self._has_positive_measure),
+            ("entli", "ent", self._has_positive_measure),
+            ("eli", "e", self._has_positive_measure),
+            ("ousli", "ous", self._has_positive_measure),
+            ("ization", "ize", self._has_positive_measure),
+            ("ation", "ate", self._has_positive_measure),
+            ("ator", "ate", self._has_positive_measure),
+            ("alism", "al", self._has_positive_measure),
+            ("iveness", "ive", self._has_positive_measure),
+            ("fulness", "ful", self._has_positive_measure),
+            ("ousness", "ous", self._has_positive_measure),
+            ("aliti", "al", self._has_positive_measure),
+            ("iviti", "ive", self._has_positive_measure),
+            ("biliti", "ble", self._has_positive_measure),
          ]
-        
+
          if self.mode == self.NLTK_EXTENSIONS:
-            rules.append(
-                ('fulli', 'ful', self._has_positive_measure)
-            )
-            
+            rules.append(("fulli", "ful", self._has_positive_measure))
+
              # The 'l' of the 'logi' -> 'log' rule is put with the stem,
              # so that short stems like 'geo' 'theo' etc work like
              # 'archaeo' 'philo' etc.
-            rules.append((
-                "logi",
-                "log",
-                lambda stem: self._has_positive_measure(word[:-3])
-            ))
-
-        if self.mode == self.MARTIN_EXTENSIONS:
              rules.append(
-                ("logi", "log", self._has_positive_measure)
+                ("logi", "log", lambda stem: self._has_positive_measure(word[:-3]))
              )
-        
+
+        if self.mode == self.MARTIN_EXTENSIONS:
+            rules.append(("logi", "log", self._has_positive_measure))
+
          return self._apply_rule_list(word, rules)
  
      def _step3(self, word):
          """Implements Step 3 from "An algorithm for suffix stripping"
-        
+
          From the paper:
-        
+
          Step 3
  
              (m>0) ICATE ->  IC              triplicate     ->  triplic
@@ -530,19 +524,22 @@ class PorterStemmer(StemmerI):
              (m>0) FUL   ->                  hopeful        ->  hope
              (m>0) NESS  ->                  goodness       ->  good
          """
-        return self._apply_rule_list(word, [
-            ('icate', 'ic', self._has_positive_measure),
-            ('ative', '', self._has_positive_measure),
-            ('alize', 'al', self._has_positive_measure),
-            ('iciti', 'ic', self._has_positive_measure),
-            ('ical', 'ic', self._has_positive_measure),
-            ('ful', '', self._has_positive_measure),
-            ('ness', '', self._has_positive_measure),
-        ])
+        return self._apply_rule_list(
+            word,
+            [
+                ("icate", "ic", self._has_positive_measure),
+                ("ative", "", self._has_positive_measure),
+                ("alize", "al", self._has_positive_measure),
+                ("iciti", "ic", self._has_positive_measure),
+                ("ical", "ic", self._has_positive_measure),
+                ("ful", "", self._has_positive_measure),
+                ("ness", "", self._has_positive_measure),
+            ],
+        )
  
      def _step4(self, word):
          """Implements Step 4 from "An algorithm for suffix stripping"
-        
+
          Step 4
  
              (m>1) AL    ->                  revival        ->  reviv
@@ -569,41 +566,42 @@ class PorterStemmer(StemmerI):
          tidying up.
          """
          measure_gt_1 = lambda stem: self._measure(stem) > 1
-        
-        return self._apply_rule_list(word, [
-            ('al', '', measure_gt_1),
-            ('ance', '', measure_gt_1),
-            ('ence', '', measure_gt_1),
-            ('er', '', measure_gt_1),
-            ('ic', '', measure_gt_1),
-            ('able', '', measure_gt_1),
-            ('ible', '', measure_gt_1),
-            ('ant', '', measure_gt_1),
-            ('ement', '', measure_gt_1),
-            ('ment', '', measure_gt_1),
-            ('ent', '', measure_gt_1),
-            
-            # (m>1 and (*S or *T)) ION -> 
-            (
-                'ion',
-                '',
-                lambda stem: self._measure(stem) > 1 and stem[-1] in ('s', 't')
-            ),
-            
-            ('ou', '', measure_gt_1),
-            ('ism', '', measure_gt_1),
-            ('ate', '', measure_gt_1),
-            ('iti', '', measure_gt_1),
-            ('ous', '', measure_gt_1),
-            ('ive', '', measure_gt_1),
-            ('ize', '', measure_gt_1),
-        ])
-        
+
+        return self._apply_rule_list(
+            word,
+            [
+                ("al", "", measure_gt_1),
+                ("ance", "", measure_gt_1),
+                ("ence", "", measure_gt_1),
+                ("er", "", measure_gt_1),
+                ("ic", "", measure_gt_1),
+                ("able", "", measure_gt_1),
+                ("ible", "", measure_gt_1),
+                ("ant", "", measure_gt_1),
+                ("ement", "", measure_gt_1),
+                ("ment", "", measure_gt_1),
+                ("ent", "", measure_gt_1),
+                # (m>1 and (*S or *T)) ION ->
+                (
+                    "ion",
+                    "",
+                    lambda stem: self._measure(stem) > 1 and stem[-1] in ("s", "t"),
+                ),
+                ("ou", "", measure_gt_1),
+                ("ism", "", measure_gt_1),
+                ("ate", "", measure_gt_1),
+                ("iti", "", measure_gt_1),
+                ("ous", "", measure_gt_1),
+                ("ive", "", measure_gt_1),
+                ("ize", "", measure_gt_1),
+            ],
+        )
+
      def _step5a(self, word):
          """Implements Step 5a from "An algorithm for suffix stripping"
-        
+
          From the paper:
-        
+
          Step 5a
  
              (m>1) E     ->                  probate        ->  probat
@@ -627,8 +625,8 @@ class PorterStemmer(StemmerI):
          # no explicit mention of the inconsistency; you have to infer it
          # from the examples.
          # For this reason, we can't use _apply_rule_list here.
-        if word.endswith('e'):
-            stem = self._replace_suffix(word, 'e', '')
+        if word.endswith("e"):
+            stem = self._replace_suffix(word, "e", "")
              if self._measure(stem) > 1:
                  return stem
              if self._measure(stem) == 1 and not self._ends_cvc(stem):
@@ -637,22 +635,22 @@ class PorterStemmer(StemmerI):
  
      def _step5b(self, word):
          """Implements Step 5a from "An algorithm for suffix stripping"
-        
+
          From the paper:
-        
+
          Step 5b
  
              (m > 1 and *d and *L) -> single letter
                                      controll       ->  control
                                      roll           ->  roll
          """
-        return self._apply_rule_list(word, [
-            ('ll', 'l', lambda stem: self._measure(word[:-1]) > 1)
-        ])
+        return self._apply_rule_list(
+            word, [("ll", "l", lambda stem: self._measure(word[:-1]) > 1)]
+        )
  
      def stem(self, word):
          stem = word.lower()
-        
+
          if self.mode == self.NLTK_EXTENSIONS and word in self.pool:
              return self.pool[word]
  
@@ -670,11 +668,12 @@ class PorterStemmer(StemmerI):
          stem = self._step4(stem)
          stem = self._step5a(stem)
          stem = self._step5b(stem)
-        
+
          return stem
  
      def __repr__(self):
-        return '<PorterStemmer>'
+        return "<PorterStemmer>"
+
  
  def demo():
      """
@@ -695,16 +694,16 @@ def demo():
              stemmed.append(stemmer.stem(word))
  
      # Convert the results to a string, and word-wrap them.
-    results = ' '.join(stemmed)
-    results = re.sub(r"(.{,70})\s", r'\1\n', results+' ').rstrip()
+    results = " ".join(stemmed)
+    results = re.sub(r"(.{,70})\s", r"\1\n", results + " ").rstrip()
  
      # Convert the original to a string, and word wrap it.
-    original = ' '.join(orig)
-    original = re.sub(r"(.{,70})\s", r'\1\n', original+' ').rstrip()
+    original = " ".join(orig)
+    original = re.sub(r"(.{,70})\s", r"\1\n", original + " ").rstrip()
  
      # Print the results.
-    print('-Original-'.center(70).replace(' ', '*').replace('-', ' '))
+    print("-Original-".center(70).replace(" ", "*").replace("-", " "))
      print(original)
-    print('-Results-'.center(70).replace(' ', '*').replace('-', ' '))
+    print("-Results-".center(70).replace(" ", "*").replace("-", " "))
      print(results)
-    print('*'*70)
+    print("*" * 70)
diff --git a/nlp_resource_data/nltk/stem/porter.pyc b/nlp_resource_data/nltk/stem/porter.pyc

deleted file mode 100755 (executable)

index ae3db42..0000000

Binary files a/nlp_resource_data/nltk/stem/porter.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/regexp.py b/nlp_resource_data/nltk/stem/regexp.py

old mode 100755 (executable)

new mode 100644 (file)

index 9053571..e00f232
--- a/nlp_resource_data/nltk/stem/regexp.py
+++ b/nlp_resource_data/nltk/stem/regexp.py
@@ -1,18 +1,16 @@
  # Natural Language Toolkit: Stemmers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@cs.mu.oz.au>
  #         Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import unicode_literals
  import re
  
  from nltk.stem.api import StemmerI
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class RegexpStemmer(StemmerI):
      """
      A stemmer that uses regular expressions to identify morphological
@@ -40,9 +38,10 @@ class RegexpStemmer(StemmerI):
      :type min: int
      :param min: The minimum length of string to stem
      """
+
      def __init__(self, regexp, min=0):
  
-        if not hasattr(regexp, 'pattern'):
+        if not hasattr(regexp, "pattern"):
              regexp = re.compile(regexp)
          self._regexp = regexp
          self._min = min
@@ -51,11 +50,7 @@ class RegexpStemmer(StemmerI):
          if len(word) < self._min:
              return word
          else:
-            return self._regexp.sub('', word)
+            return self._regexp.sub("", word)
  
      def __repr__(self):
-        return '<RegexpStemmer: {!r}>'.format(self._regexp.pattern)
-
-
-
-
+        return "<RegexpStemmer: {!r}>".format(self._regexp.pattern)
diff --git a/nlp_resource_data/nltk/stem/regexp.pyc b/nlp_resource_data/nltk/stem/regexp.pyc

deleted file mode 100755 (executable)

index ed20601..0000000

Binary files a/nlp_resource_data/nltk/stem/regexp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/rslp.py b/nlp_resource_data/nltk/stem/rslp.py

old mode 100755 (executable)

new mode 100644 (file)

index ebf190d..10f5de5
--- a/nlp_resource_data/nltk/stem/rslp.py
+++ b/nlp_resource_data/nltk/stem/rslp.py
@@ -2,7 +2,7 @@
  
  # Natural Language Toolkit: RSLP Stemmer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tiago Tresoldi <tresoldi@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -30,11 +30,12 @@
  # comentário, inclusive sobre o desenvolvimento de um stemmer diferente
  # e/ou melhor para o português. Também sugiro utilizar-se a lista de discussão
  # do NLTK para o português para qualquer debate.
-from __future__ import print_function, unicode_literals
+
  from nltk.data import load
  
  from nltk.stem.api import StemmerI
  
+
  class RSLPStemmer(StemmerI):
      """
      A stemmer for Portuguese.
@@ -52,22 +53,22 @@ class RSLPStemmer(StemmerI):
          uma cas de port e janel , em cim dum coxilh .
      """
  
-    def __init__ (self):
+    def __init__(self):
          self._model = []
  
-        self._model.append( self.read_rule("step0.pt") )
-        self._model.append( self.read_rule("step1.pt") )
-        self._model.append( self.read_rule("step2.pt") )
-        self._model.append( self.read_rule("step3.pt") )
-        self._model.append( self.read_rule("step4.pt") )
-        self._model.append( self.read_rule("step5.pt") )
-        self._model.append( self.read_rule("step6.pt") )
+        self._model.append(self.read_rule("step0.pt"))
+        self._model.append(self.read_rule("step1.pt"))
+        self._model.append(self.read_rule("step2.pt"))
+        self._model.append(self.read_rule("step3.pt"))
+        self._model.append(self.read_rule("step4.pt"))
+        self._model.append(self.read_rule("step5.pt"))
+        self._model.append(self.read_rule("step6.pt"))
  
-    def read_rule (self, filename):
-        rules = load('nltk:stemmers/rslp/' + filename, format='raw').decode("utf8")
+    def read_rule(self, filename):
+        rules = load("nltk:stemmers/rslp/" + filename, format="raw").decode("utf8")
          lines = rules.split("\n")
  
-        lines = [line for line in lines if line != ""]     # remove blank lines
+        lines = [line for line in lines if line != ""]  # remove blank lines
          lines = [line for line in lines if line[0] != "#"]  # remove comments
  
          # NOTE: a simple but ugly hack to make this parser happy with double '\t's
@@ -80,16 +81,16 @@ class RSLPStemmer(StemmerI):
              tokens = line.split("\t")
  
              # text to be searched for at the end of the string
-            rule.append( tokens[0][1:-1] ) # remove quotes
+            rule.append(tokens[0][1:-1])  # remove quotes
  
              # minimum stem size to perform the replacement
-            rule.append( int(tokens[1]) )
+            rule.append(int(tokens[1]))
  
              # text to be replaced into
-            rule.append( tokens[2][1:-1] ) # remove quotes
+            rule.append(tokens[2][1:-1])  # remove quotes
  
              # exceptions to this rule
-            rule.append( [token[1:-1] for token in tokens[3].split(",")] )
+            rule.append([token[1:-1] for token in tokens[3].split(",")])
  
              # append to the results
              rules.append(rule)
@@ -130,13 +131,10 @@ class RSLPStemmer(StemmerI):
          rules = self._model[rule_index]
          for rule in rules:
              suffix_length = len(rule[0])
-            if word[-suffix_length:] == rule[0]:       # if suffix matches
-                if len(word) >= suffix_length + rule[1]: # if we have minimum size
-                    if word not in rule[3]:                # if not an exception
+            if word[-suffix_length:] == rule[0]:  # if suffix matches
+                if len(word) >= suffix_length + rule[1]:  # if we have minimum size
+                    if word not in rule[3]:  # if not an exception
                          word = word[:-suffix_length] + rule[2]
                          break
  
          return word
-
-
-
diff --git a/nlp_resource_data/nltk/stem/rslp.pyc b/nlp_resource_data/nltk/stem/rslp.pyc

deleted file mode 100755 (executable)

index 7807e0a..0000000

Binary files a/nlp_resource_data/nltk/stem/rslp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/snowball.py b/nlp_resource_data/nltk/stem/snowball.py

old mode 100755 (executable)

new mode 100644 (file)

index 00b511c..aede6a4
--- a/nlp_resource_data/nltk/stem/snowball.py
+++ b/nlp_resource_data/nltk/stem/snowball.py
@@ -2,7 +2,7 @@
  #
  # Natural Language Toolkit: Snowball Stemmer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Peter Michael Stahl <pemistahl@gmail.com>
  #         Peter Ljunglof <peter.ljunglof@heatherleaf.se> (revisions)
  #         Lakhdar Benzahia <lakhdar.benzahia@gmail.com>  (co-writer)
@@ -23,12 +23,9 @@ developed by Martin Porter.
  There is also a demo function: `snowball.demo()`.
  
  """
-from __future__ import unicode_literals, print_function
  
-from six.moves import input
  import re
  
-from nltk import compat
  from nltk.corpus import stopwords
  from nltk.stem import porter
  from nltk.stem.util import suffix_replace, prefix_replace
@@ -87,9 +84,24 @@ class SnowballStemmer(StemmerI):
                             language, a ValueError is raised.
      """
  
-    languages = ("arabic", "danish", "dutch", "english", "finnish", "french", "german",
-                 "hungarian", "italian", "norwegian", "porter", "portuguese",
-                 "romanian", "russian", "spanish", "swedish")
+    languages = (
+        "arabic",
+        "danish",
+        "dutch",
+        "english",
+        "finnish",
+        "french",
+        "german",
+        "hungarian",
+        "italian",
+        "norwegian",
+        "porter",
+        "portuguese",
+        "romanian",
+        "russian",
+        "spanish",
+        "swedish",
+    )
  
      def __init__(self, language, ignore_stopwords=False):
          if language not in self.languages:
@@ -98,12 +110,11 @@ class SnowballStemmer(StemmerI):
          self.stemmer = stemmerclass(ignore_stopwords)
          self.stem = self.stemmer.stem
          self.stopwords = self.stemmer.stopwords
-    
+
      def stem(self, token):
          return self.stemmer.stem(self, token)
  
  
-@compat.python_2_unicode_compatible
  class _LanguageSpecificStemmer(StemmerI):
  
      """
@@ -131,8 +142,10 @@ class _LanguageSpecificStemmer(StemmerI):
                  for word in stopwords.words(language):
                      self.stopwords.add(word)
              except IOError:
-                raise ValueError("{!r} has no list of stopwords. Please set"
-                                 " 'ignore_stopwords' to 'False'.".format(self))
+                raise ValueError(
+                    "{!r} has no list of stopwords. Please set"
+                    " 'ignore_stopwords' to 'False'.".format(self)
+                )
  
      def __repr__(self):
          """
@@ -154,6 +167,7 @@ class PorterStemmer(_LanguageSpecificStemmer, porter.PorterStemmer):
      nltk.stem.porter for more information.
  
      """
+
      def __init__(self, ignore_stopwords=False):
          _LanguageSpecificStemmer.__init__(self, ignore_stopwords)
          porter.PorterStemmer.__init__(self)
@@ -190,11 +204,11 @@ class _ScandinavianStemmer(_LanguageSpecificStemmer):
          """
          r1 = ""
          for i in range(1, len(word)):
-            if word[i] not in vowels and word[i-1] in vowels:
-                if len(word[:i+1]) < 3 and len(word[:i+1]) > 0:
+            if word[i] not in vowels and word[i - 1] in vowels:
+                if 3 > len(word[: i + 1]) > 0:
                      r1 = word[3:]
-                elif len(word[:i+1]) >= 3:
-                    r1 = word[i+1:]
+                elif len(word[: i + 1]) >= 3:
+                    r1 = word[i + 1 :]
                  else:
                      return word
                  break
@@ -241,19 +255,17 @@ class _StandardStemmer(_LanguageSpecificStemmer):
          r1 = ""
          r2 = ""
          for i in range(1, len(word)):
-            if word[i] not in vowels and word[i-1] in vowels:
-                r1 = word[i+1:]
+            if word[i] not in vowels and word[i - 1] in vowels:
+                r1 = word[i + 1 :]
                  break
  
          for i in range(1, len(r1)):
-            if r1[i] not in vowels and r1[i-1] in vowels:
-                r2 = r1[i+1:]
+            if r1[i] not in vowels and r1[i - 1] in vowels:
+                r2 = r1[i + 1 :]
                  break
  
          return (r1, r2)
  
-
-
      def _rv_standard(self, word, vowels):
          """
          Return the standard interpretation of the string region RV.
@@ -281,20 +293,21 @@ class _StandardStemmer(_LanguageSpecificStemmer):
              if word[1] not in vowels:
                  for i in range(2, len(word)):
                      if word[i] in vowels:
-                        rv = word[i+1:]
+                        rv = word[i + 1 :]
                          break
  
              elif word[0] in vowels and word[1] in vowels:
                  for i in range(2, len(word)):
                      if word[i] not in vowels:
-                        rv = word[i+1:]
+                        rv = word[i + 1 :]
                          break
              else:
                  rv = word[3:]
  
          return rv
  
-class ArabicStemmer(_LanguageSpecificStemmer):
+
+class ArabicStemmer(_StandardStemmer):
      """
          https://github.com/snowballstem/snowball/blob/master/algorithms/arabic/stem_Unicode.sbl (Original Algorithm)
          The Snowball Arabic light Stemmer
@@ -303,127 +316,199 @@ class ArabicStemmer(_LanguageSpecificStemmer):
                     Lakhdar Benzahia
          Nltk Version Author : Lakhdar Benzahia
      """
+
      # Normalize_pre stes
-    __vocalization = re.compile(r'[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]') # ً، ٌ، ٍ، َ، ُ، ِ، ّ، ْ
+    __vocalization = re.compile(
+        r"[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]"
+    )  # ً، ٌ، ٍ، َ، ُ، ِ، ّ، ْ
  
-    __kasheeda = re.compile(r'[\u0640]') # ـ tatweel/kasheeda
+    __kasheeda = re.compile(r"[\u0640]")  # ـ tatweel/kasheeda
  
-    __arabic_punctuation_marks = re.compile(r'[\u060C-\u061B-\u061F]') #  ؛ ، ؟
+    __arabic_punctuation_marks = re.compile(r"[\u060C-\u061B-\u061F]")  #  ؛ ، ؟
  
      # Normalize_post
-    __last_hamzat = ('\u0623', '\u0625', '\u0622', '\u0624', '\u0626') # أ، إ، آ، ؤ، ئ
+    __last_hamzat = ("\u0623", "\u0625", "\u0622", "\u0624", "\u0626")  # أ، إ، آ، ؤ، ئ
  
      # normalize other hamza's
-    __initial_hamzat = re.compile(r'^[\u0622\u0623\u0625]') #  أ، إ، آ
+    __initial_hamzat = re.compile(r"^[\u0622\u0623\u0625]")  #  أ، إ، آ
  
-    __waw_hamza = re.compile(r'[\u0624]') # ؤ
+    __waw_hamza = re.compile(r"[\u0624]")  # ؤ
  
-    __yeh_hamza = re.compile(r'[\u0626]') # ئ
+    __yeh_hamza = re.compile(r"[\u0626]")  # ئ
  
-    __alefat = re.compile(r'[\u0623\u0622\u0625]') #  أ، إ، آ
+    __alefat = re.compile(r"[\u0623\u0622\u0625]")  #  أ، إ، آ
  
      # Checks
-    __checks1 = ('\u0643\u0627\u0644', '\u0628\u0627\u0644',  # بال، كال
-                 '\u0627\u0644', '\u0644\u0644' # لل، ال
-                 )
+    __checks1 = (
+        "\u0643\u0627\u0644",
+        "\u0628\u0627\u0644",  # بال، كال
+        "\u0627\u0644",
+        "\u0644\u0644",  # لل، ال
+    )
  
-    __checks2 = ('\u0629', # ة
-                 '\u0627\u062a'  #  female plural ات
-                 )
+    __checks2 = ("\u0629", "\u0627\u062a")  # ة  #  female plural ات
  
      # Suffixes
-    __suffix_noun_step1a = ('\u064a', '\u0643', '\u0647', # ي، ك، ه
-                            '\u0646\u0627', '\u0643\u0645', '\u0647\u0627', '\u0647\u0646', '\u0647\u0645', # نا، كم، ها، هن، هم
-                            '\u0643\u0645\u0627', '\u0647\u0645\u0627' # كما، هما
-                            )
-
-    __suffix_noun_step1b = ('\u0646') # ن
-
-    __suffix_noun_step2a = ('\u0627', '\u064a', '\u0648') # ا، ي، و
-
-    __suffix_noun_step2b = ('\u0627\u062a') # ات
-
-    __suffix_noun_step2c1 = ('\u062a') # ت
-
-    __suffix_noun_step2c2 = ('\u0629') # ة
-
-    __suffix_noun_step3 = ('\u064a') # ي
-
-    __suffix_verb_step1 = ('\u0647', '\u0643', # ه، ك
-                           '\u0646\u064a', '\u0646\u0627', '\u0647\u0627', '\u0647\u0645', # ني، نا، ها، هم
-                           '\u0647\u0646', '\u0643\u0645', '\u0643\u0646', # هن، كم، كن
-                           '\u0647\u0645\u0627', '\u0643\u0645\u0627', '\u0643\u0645\u0648' # هما، كما، كمو
-                          )
-
-    __suffix_verb_step2a = ( '\u062a', '\u0627', '\u0646' , '\u064a', # ت، ا، ن، ي
-                             '\u0646\u0627', '\u062a\u0627', '\u062a\u0646', # نا، تا، تن Past
-                             '\u0627\u0646', '\u0648\u0646', '\u064a\u0646', # ان، هن، ين Present
-                             '\u062a\u0645\u0627' # تما
-                           )
-
-    __suffix_verb_step2b = ('\u0648\u0627','\u062a\u0645') # وا، تم
-
-    __suffix_verb_step2c = ('\u0648', # و
-                            '\u062a\u0645\u0648' # تمو
-                           )
-
-    __suffix_all_alef_maqsura = ('\u0649') # ى
+    __suffix_noun_step1a = (
+        "\u064a",
+        "\u0643",
+        "\u0647",  # ي، ك، ه
+        "\u0646\u0627",
+        "\u0643\u0645",
+        "\u0647\u0627",
+        "\u0647\u0646",
+        "\u0647\u0645",  # نا، كم، ها، هن، هم
+        "\u0643\u0645\u0627",
+        "\u0647\u0645\u0627",  # كما، هما
+    )
+
+    __suffix_noun_step1b = "\u0646"  # ن
+
+    __suffix_noun_step2a = ("\u0627", "\u064a", "\u0648")  # ا، ي، و
+
+    __suffix_noun_step2b = "\u0627\u062a"  # ات
+
+    __suffix_noun_step2c1 = "\u062a"  # ت
+
+    __suffix_noun_step2c2 = "\u0629"  # ة
+
+    __suffix_noun_step3 = "\u064a"  # ي
+
+    __suffix_verb_step1 = (
+        "\u0647",
+        "\u0643",  # ه، ك
+        "\u0646\u064a",
+        "\u0646\u0627",
+        "\u0647\u0627",
+        "\u0647\u0645",  # ني، نا، ها، هم
+        "\u0647\u0646",
+        "\u0643\u0645",
+        "\u0643\u0646",  # هن، كم، كن
+        "\u0647\u0645\u0627",
+        "\u0643\u0645\u0627",
+        "\u0643\u0645\u0648",  # هما، كما، كمو
+    )
+
+    __suffix_verb_step2a = (
+        "\u062a",
+        "\u0627",
+        "\u0646",
+        "\u064a",  # ت، ا، ن، ي
+        "\u0646\u0627",
+        "\u062a\u0627",
+        "\u062a\u0646",  # نا، تا، تن Past
+        "\u0627\u0646",
+        "\u0648\u0646",
+        "\u064a\u0646",  # ان، هن، ين Present
+        "\u062a\u0645\u0627",  # تما
+    )
+
+    __suffix_verb_step2b = ("\u0648\u0627", "\u062a\u0645")  # وا، تم
+
+    __suffix_verb_step2c = ("\u0648", "\u062a\u0645\u0648")  # و  # تمو
+
+    __suffix_all_alef_maqsura = "\u0649"  # ى
  
      # Prefixes
-    __prefix_step1 = ('\u0623', # أ
-                      '\u0623\u0623', '\u0623\u0622', '\u0623\u0624', '\u0623\u0627', '\u0623\u0625', # أأ، أآ، أؤ، أا، أإ
-                      )
-
-    __prefix_step2a = ('\u0641\u0627\u0644', '\u0648\u0627\u0644') # فال، وال
-
-    __prefix_step2b = ('\u0641', '\u0648') # ف، و
-
-    __prefix_step3a_noun = ('\u0627\u0644', '\u0644\u0644', # لل، ال
-                            '\u0643\u0627\u0644', '\u0628\u0627\u0644', # بال، كال
-                            )
-
-    __prefix_step3b_noun = ('\u0628', '\u0643', '\u0644', # ب، ك، ل
-                            '\u0628\u0628', '\u0643\u0643' # بب، كك
-                           )
-
-    __prefix_step3_verb = ('\u0633\u064a', '\u0633\u062a', '\u0633\u0646', '\u0633\u0623') # سي، ست، سن، سأ
-
-    __prefix_step4_verb = ('\u064a\u0633\u062a', '\u0646\u0633\u062a', '\u062a\u0633\u062a') # يست، نست، تست
+    __prefix_step1 = (
+        "\u0623",  # أ
+        "\u0623\u0623",
+        "\u0623\u0622",
+        "\u0623\u0624",
+        "\u0623\u0627",
+        "\u0623\u0625",  # أأ، أآ، أؤ، أا، أإ
+    )
+
+    __prefix_step2a = ("\u0641\u0627\u0644", "\u0648\u0627\u0644")  # فال، وال
+
+    __prefix_step2b = ("\u0641", "\u0648")  # ف، و
+
+    __prefix_step3a_noun = (
+        "\u0627\u0644",
+        "\u0644\u0644",  # لل، ال
+        "\u0643\u0627\u0644",
+        "\u0628\u0627\u0644",  # بال، كال
+    )
+
+    __prefix_step3b_noun = (
+        "\u0628",
+        "\u0643",
+        "\u0644",  # ب، ك، ل
+        "\u0628\u0628",
+        "\u0643\u0643",  # بب، كك
+    )
+
+    __prefix_step3_verb = (
+        "\u0633\u064a",
+        "\u0633\u062a",
+        "\u0633\u0646",
+        "\u0633\u0623",
+    )  # سي، ست، سن، سأ
+
+    __prefix_step4_verb = (
+        "\u064a\u0633\u062a",
+        "\u0646\u0633\u062a",
+        "\u062a\u0633\u062a",
+    )  # يست، نست، تست
  
      # Suffixes added due to Conjugation Verbs
-    __conjugation_suffix_verb_1 = ('\u0647', '\u0643') # ه، ك
-
-    __conjugation_suffix_verb_2 = ('\u0646\u064a', '\u0646\u0627','\u0647\u0627', # ني، نا، ها
-                                   '\u0647\u0645', '\u0647\u0646', '\u0643\u0645', # هم، هن، كم
-                                   '\u0643\u0646' # كن
-                                   )
-    __conjugation_suffix_verb_3 = ('\u0647\u0645\u0627', '\u0643\u0645\u0627', '\u0643\u0645\u0648') # هما، كما، كمو
-
-    __conjugation_suffix_verb_4 = ('\u0627', '\u0646', '\u064a') # ا، ن، ي
-
-    __conjugation_suffix_verb_past = ('\u0646\u0627', '\u062a\u0627', '\u062a\u0646') # نا، تا، تن
-
-    __conjugation_suffix_verb_presnet = ('\u0627\u0646', '\u0648\u0646', '\u064a\u0646') # ان، ون، ين
+    __conjugation_suffix_verb_1 = ("\u0647", "\u0643")  # ه، ك
+
+    __conjugation_suffix_verb_2 = (
+        "\u0646\u064a",
+        "\u0646\u0627",
+        "\u0647\u0627",  # ني، نا، ها
+        "\u0647\u0645",
+        "\u0647\u0646",
+        "\u0643\u0645",  # هم، هن، كم
+        "\u0643\u0646",  # كن
+    )
+    __conjugation_suffix_verb_3 = (
+        "\u0647\u0645\u0627",
+        "\u0643\u0645\u0627",
+        "\u0643\u0645\u0648",
+    )  # هما، كما، كمو
+
+    __conjugation_suffix_verb_4 = ("\u0627", "\u0646", "\u064a")  # ا، ن، ي
+
+    __conjugation_suffix_verb_past = (
+        "\u0646\u0627",
+        "\u062a\u0627",
+        "\u062a\u0646",
+    )  # نا، تا، تن
+
+    __conjugation_suffix_verb_present = (
+        "\u0627\u0646",
+        "\u0648\u0646",
+        "\u064a\u0646",
+    )  # ان، ون، ين
  
      # Suffixes added due to derivation Names
-    __conjugation_suffix_noun_1 = ('\u064a', '\u0643', '\u0647') # ي، ك، ه
+    __conjugation_suffix_noun_1 = ("\u064a", "\u0643", "\u0647")  # ي، ك، ه
  
-    __conjugation_suffix_noun_2 = ('\u0646\u0627', '\u0643\u0645', # نا، كم
-                                   '\u0647\u0627', '\u0647\u0646', '\u0647\u0645' # ها، هن، هم
-                                   )
+    __conjugation_suffix_noun_2 = (
+        "\u0646\u0627",
+        "\u0643\u0645",  # نا، كم
+        "\u0647\u0627",
+        "\u0647\u0646",
+        "\u0647\u0645",  # ها، هن، هم
+    )
  
-    __conjugation_suffix_noun_3 = ('\u0643\u0645\u0627', '\u0647\u0645\u0627') # كما، هما
+    __conjugation_suffix_noun_3 = (
+        "\u0643\u0645\u0627",
+        "\u0647\u0645\u0627",
+    )  # كما، هما
  
      # Prefixes added due to derivation Names
-    __prefixes1 = ('\u0648\u0627', '\u0641\u0627') # فا، وا
+    __prefixes1 = ("\u0648\u0627", "\u0641\u0627")  # فا، وا
  
-    __articles_3len = ('\u0643\u0627\u0644', '\u0628\u0627\u0644')  # بال كال
+    __articles_3len = ("\u0643\u0627\u0644", "\u0628\u0627\u0644")  # بال كال
  
-    __articles_2len = ('\u0627\u0644', '\u0644\u0644')  # ال لل
+    __articles_2len = ("\u0627\u0644", "\u0644\u0644")  # ال لل
  
      # Prepositions letters
-    __prepositions1 = ('\u0643', '\u0644') # ك، ل
-    __prepositions2 = ('\u0628\u0628', '\u0643\u0643') # بب، كك
+    __prepositions1 = ("\u0643", "\u0644")  # ك، ل
+    __prepositions2 = ("\u0628\u0628", "\u0643\u0643")  # بب، كك
  
      is_verb = True
      is_noun = True
@@ -447,36 +532,36 @@ class ArabicStemmer(_LanguageSpecificStemmer):
          :return: normalized token type string
          """
          # strip diacritics
-        token = self.__vocalization.sub('', token)
-        #strip kasheeda
-        token = self.__kasheeda.sub('', token)
+        token = self.__vocalization.sub("", token)
+        # strip kasheeda
+        token = self.__kasheeda.sub("", token)
          # strip punctuation marks
-        token = self.__arabic_punctuation_marks.sub('', token)
+        token = self.__arabic_punctuation_marks.sub("", token)
          return token
  
      def __normalize_post(self, token):
          # normalize last hamza
          for hamza in self.__last_hamzat:
              if token.endswith(hamza):
-                token = suffix_replace(token, hamza, '\u0621')
+                token = suffix_replace(token, hamza, "\u0621")
                  break
          # normalize other hamzat
-        token = self.__initial_hamzat.sub('\u0627', token)
-        token = self.__waw_hamza.sub('\u0648', token)
-        token = self.__yeh_hamza.sub('\u064a', token)
-        token = self.__alefat.sub('\u0627', token)
-        return  token
+        token = self.__initial_hamzat.sub("\u0627", token)
+        token = self.__waw_hamza.sub("\u0648", token)
+        token = self.__yeh_hamza.sub("\u064a", token)
+        token = self.__alefat.sub("\u0627", token)
+        return token
  
      def __checks_1(self, token):
-        for prefix in self.__checks1 :
+        for prefix in self.__checks1:
              if token.startswith(prefix):
-                if prefix in self.__articles_3len and len(token) > 4 :
+                if prefix in self.__articles_3len and len(token) > 4:
                      self.is_noun = True
                      self.is_verb = False
                      self.is_defined = True
                      break
  
-                if prefix in self.__articles_2len and len(token) > 3 :
+                if prefix in self.__articles_2len and len(token) > 3:
                      self.is_noun = True
                      self.is_verb = False
                      self.is_defined = True
@@ -485,12 +570,12 @@ class ArabicStemmer(_LanguageSpecificStemmer):
      def __checks_2(self, token):
          for suffix in self.__checks2:
              if token.endswith(suffix):
-                if suffix == '\u0629' and len(token) > 2:
+                if suffix == "\u0629" and len(token) > 2:
                      self.is_noun = True
                      self.is_verb = False
                      break
  
-                if suffix == '\u0627\u062a' and len(token) > 3:
+                if suffix == "\u0627\u062a" and len(token) > 3:
                      self.is_noun = True
                      self.is_verb = False
                      break
@@ -516,8 +601,8 @@ class ArabicStemmer(_LanguageSpecificStemmer):
  
      def __Suffix_Verb_Step2a(self, token):
          for suffix in self.__suffix_verb_step2a:
-            if token.endswith(suffix):
-                if suffix == '\u062a' and len(token) >= 4:
+            if token.endswith(suffix) and len(token) > 3:
+                if suffix == "\u062a" and len(token) >= 4:
                      token = token[:-1]
                      self.suffix_verb_step2a_success = True
                      break
@@ -537,20 +622,20 @@ class ArabicStemmer(_LanguageSpecificStemmer):
                      self.suffix_verb_step2a_success = True
                      break
  
-                if suffix == '\u062a\u0645\u0627' and len(token) >= 6:
+                if suffix == "\u062a\u0645\u0627" and len(token) >= 6:
                      token = token[:-3]
                      self.suffix_verb_step2a_success = True
                      break
-        return  token
+        return token
  
      def __Suffix_Verb_Step2c(self, token):
          for suffix in self.__suffix_verb_step2c:
              if token.endswith(suffix):
-                if suffix == '\u062a\u0645\u0648' and len(token) >= 6:
+                if suffix == "\u062a\u0645\u0648" and len(token) >= 6:
                      token = token[:-3]
                      break
  
-                if suffix == '\u0648' and len(token) >= 4:
+                if suffix == "\u0648" and len(token) >= 4:
                      token = token[:-1]
                      break
          return token
@@ -561,7 +646,7 @@ class ArabicStemmer(_LanguageSpecificStemmer):
                  token = token[:-2]
                  self.suffix_verb_step2b_success = True
                  break
-        return  token
+        return token
  
      def __Suffix_Noun_Step2c2(self, token):
          for suffix in self.__suffix_noun_step2c2:
@@ -604,7 +689,7 @@ class ArabicStemmer(_LanguageSpecificStemmer):
                  token = token[:-2]
                  self.suffix_noun_step2b_success = True
                  break
-        return  token
+        return token
  
      def __Suffix_Noun_Step2c1(self, token):
          for suffix in self.__suffix_noun_step2c1:
@@ -631,46 +716,46 @@ class ArabicStemmer(_LanguageSpecificStemmer):
      def __Suffix_All_alef_maqsura(self, token):
          for suffix in self.__suffix_all_alef_maqsura:
              if token.endswith(suffix):
-                token = suffix_replace(token, suffix, '\u064a')
-        return  token
+                token = suffix_replace(token, suffix, "\u064a")
+        return token
  
      def __Prefix_Step1(self, token):
          for prefix in self.__prefix_step1:
              if token.startswith(prefix) and len(token) > 3:
-                if prefix == '\u0623\u0623':
-                    token = prefix_replace(token, prefix, '\u0623')
+                if prefix == "\u0623\u0623":
+                    token = prefix_replace(token, prefix, "\u0623")
                      break
  
-                elif prefix == '\u0623\u0622':
-                    token = prefix_replace(token, prefix, '\u0622')
+                elif prefix == "\u0623\u0622":
+                    token = prefix_replace(token, prefix, "\u0622")
                      break
  
-                elif prefix == '\u0623\u0624':
-                    token = prefix_replace(token, prefix, '\u0624')
+                elif prefix == "\u0623\u0624":
+                    token = prefix_replace(token, prefix, "\u0624")
                      break
  
-                elif prefix == '\u0623\u0627' :
-                    token = prefix_replace(token, prefix, '\u0627')
+                elif prefix == "\u0623\u0627":
+                    token = prefix_replace(token, prefix, "\u0627")
                      break
  
-                elif prefix == '\u0623\u0625' :
-                    token = prefix_replace(token, prefix, '\u0625')
+                elif prefix == "\u0623\u0625":
+                    token = prefix_replace(token, prefix, "\u0625")
                      break
          return token
  
      def __Prefix_Step2a(self, token):
          for prefix in self.__prefix_step2a:
              if token.startswith(prefix) and len(token) > 5:
-                token = token[len(prefix):]
+                token = token[len(prefix) :]
                  self.prefix_step2a_success = True
                  break
-        return  token
+        return token
  
      def __Prefix_Step2b(self, token):
          for prefix in self.__prefix_step2b:
-            if token.startswith(prefix) and len(token) > 3 :
+            if token.startswith(prefix) and len(token) > 3:
                  if token[:2] not in self.__prefixes1:
-                    token = token[len(prefix):]
+                    token = token[len(prefix) :]
                      break
          return token
  
@@ -678,11 +763,11 @@ class ArabicStemmer(_LanguageSpecificStemmer):
          for prefix in self.__prefix_step3a_noun:
              if token.startswith(prefix):
                  if prefix in self.__articles_2len and len(token) > 4:
-                    token =  token[len(prefix):]
+                    token = token[len(prefix) :]
                      self.prefix_step3a_noun_success = True
                      break
-                if prefix in self.__articles_3len  and len(token) > 5:
-                    token = token[len(prefix):]
+                if prefix in self.__articles_3len and len(token) > 5:
+                    token = token[len(prefix) :]
                      break
          return token
  
@@ -690,8 +775,8 @@ class ArabicStemmer(_LanguageSpecificStemmer):
          for prefix in self.__prefix_step3b_noun:
              if token.startswith(prefix):
                  if len(token) > 3:
-                    if prefix == '\u0628':
-                        token = token[len(prefix):]
+                    if prefix == "\u0628":
+                        token = token[len(prefix) :]
                          self.prefix_step3b_noun_success = True
                          break
  
@@ -701,7 +786,7 @@ class ArabicStemmer(_LanguageSpecificStemmer):
                          break
  
                  if prefix in self.__prepositions1 and len(token) > 4:
-                    token = token[len(prefix):]  # BUG: cause confusion
+                    token = token[len(prefix) :]  # BUG: cause confusion
                      self.prefix_step3b_noun_success = True
                      break
          return token
@@ -716,7 +801,7 @@ class ArabicStemmer(_LanguageSpecificStemmer):
      def __Prefix_Step4_Verb(self, token):
          for prefix in self.__prefix_step4_verb:
              if token.startswith(prefix) and len(token) > 4:
-                token = prefix_replace(token, prefix, '\u0627\u0633\u062a')
+                token = prefix_replace(token, prefix, "\u0627\u0633\u062a")
                  self.is_verb = True
                  self.is_noun = False
                  break
@@ -750,14 +835,19 @@ class ArabicStemmer(_LanguageSpecificStemmer):
          self.__checks_1(modified_word)
          # checks2
          self.__checks_2(modified_word)
+        # Pre_Normalization
          modified_word = self.__normalize_pre(modified_word)
+        # Avoid stopwords
+        if modified_word in self.stopwords or len(modified_word) <= 2:
+            return modified_word
+        # Start stemming
          if self.is_verb:
              modified_word = self.__Suffix_Verb_Step1(modified_word)
-            if  self.suffixes_verb_step1_success:
+            if self.suffixes_verb_step1_success:
                  modified_word = self.__Suffix_Verb_Step2a(modified_word)
-                if not self.suffix_verb_step2a_success :
+                if not self.suffix_verb_step2a_success:
                      modified_word = self.__Suffix_Verb_Step2c(modified_word)
-                #or next
+                # or next TODO: How to deal with or next instruction
              else:
                  modified_word = self.__Suffix_Verb_Step2b(modified_word)
                  if not self.suffix_verb_step2b_success:
@@ -767,20 +857,26 @@ class ArabicStemmer(_LanguageSpecificStemmer):
              if not self.suffix_noun_step2c2_success:
                  if not self.is_defined:
                      modified_word = self.__Suffix_Noun_Step1a(modified_word)
-                    #if self.suffix_noun_step1a_success:
+                    # if self.suffix_noun_step1a_success:
                      modified_word = self.__Suffix_Noun_Step2a(modified_word)
                      if not self.suffix_noun_step2a_success:
-                         modified_word = self.__Suffix_Noun_Step2b(modified_word)
-                    if not self.suffix_noun_step2b_success and not self.suffix_noun_step2a_success:
+                        modified_word = self.__Suffix_Noun_Step2b(modified_word)
+                    if (
+                        not self.suffix_noun_step2b_success
+                        and not self.suffix_noun_step2a_success
+                    ):
                          modified_word = self.__Suffix_Noun_Step2c1(modified_word)
                      # or next ? todo : how to deal with or next
                  else:
-                    modified_word =  self.__Suffix_Noun_Step1b(modified_word)
+                    modified_word = self.__Suffix_Noun_Step1b(modified_word)
                      if self.suffixe_noun_step1b_success:
                          modified_word = self.__Suffix_Noun_Step2a(modified_word)
                          if not self.suffix_noun_step2a_success:
                              modified_word = self.__Suffix_Noun_Step2b(modified_word)
-                        if not self.suffix_noun_step2b_success and not self.suffix_noun_step2a_success:
+                        if (
+                            not self.suffix_noun_step2b_success
+                            and not self.suffix_noun_step2a_success
+                        ):
                              modified_word = self.__Suffix_Noun_Step2c1(modified_word)
                      else:
                          if not self.is_defined:
@@ -808,6 +904,7 @@ class ArabicStemmer(_LanguageSpecificStemmer):
          stemmed_word = modified_word
          return stemmed_word
  
+
  class DanishStemmer(_ScandinavianStemmer):
  
      """
@@ -836,20 +933,66 @@ class DanishStemmer(_ScandinavianStemmer):
      # The language's vowels and other important characters are defined.
      __vowels = "aeiouy\xE6\xE5\xF8"
      __consonants = "bcdfghjklmnpqrstvwxz"
-    __double_consonants = ("bb", "cc", "dd", "ff", "gg", "hh", "jj",
-                           "kk", "ll", "mm", "nn", "pp", "qq", "rr",
-                           "ss", "tt", "vv", "ww", "xx", "zz")
+    __double_consonants = (
+        "bb",
+        "cc",
+        "dd",
+        "ff",
+        "gg",
+        "hh",
+        "jj",
+        "kk",
+        "ll",
+        "mm",
+        "nn",
+        "pp",
+        "qq",
+        "rr",
+        "ss",
+        "tt",
+        "vv",
+        "ww",
+        "xx",
+        "zz",
+    )
      __s_ending = "abcdfghjklmnoprtvyz\xE5"
  
      # The different suffixes, divided into the algorithm's steps
      # and organized by length, are listed in tuples.
-    __step1_suffixes = ("erendes", "erende", "hedens", "ethed",
-                        "erede", "heden", "heder", "endes",
-                        "ernes", "erens", "erets", "ered",
-                        "ende", "erne", "eren", "erer", "heds",
-                        "enes", "eres", "eret", "hed", "ene", "ere",
-                        "ens", "ers", "ets", "en", "er", "es", "et",
-                        "e", "s")
+    __step1_suffixes = (
+        "erendes",
+        "erende",
+        "hedens",
+        "ethed",
+        "erede",
+        "heden",
+        "heder",
+        "endes",
+        "ernes",
+        "erens",
+        "erets",
+        "ered",
+        "ende",
+        "erne",
+        "eren",
+        "erer",
+        "heds",
+        "enes",
+        "eres",
+        "eret",
+        "hed",
+        "ene",
+        "ere",
+        "ens",
+        "ers",
+        "ets",
+        "en",
+        "er",
+        "es",
+        "et",
+        "e",
+        "s",
+    )
      __step2_suffixes = ("gd", "dt", "gt", "kt")
      __step3_suffixes = ("elig", "l\xF8st", "lig", "els", "ig")
  
@@ -885,8 +1028,8 @@ class DanishStemmer(_ScandinavianStemmer):
                          word = word[:-1]
                          r1 = r1[:-1]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
                  break
  
          # STEP 2
@@ -907,8 +1050,8 @@ class DanishStemmer(_ScandinavianStemmer):
                      word = word[:-1]
                      r1 = r1[:-1]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
  
                      if r1.endswith(self.__step2_suffixes):
                          word = word[:-1]
@@ -921,7 +1064,6 @@ class DanishStemmer(_ScandinavianStemmer):
                  word = word[:-1]
                  break
  
-
          return word
  
  
@@ -964,11 +1106,18 @@ class DutchStemmer(_StandardStemmer):
          step2_success = False
  
          # Vowel accents are removed.
-        word = (word.replace("\xE4", "a").replace("\xE1", "a")
-                    .replace("\xEB", "e").replace("\xE9", "e")
-                    .replace("\xED", "i").replace("\xEF", "i")
-                    .replace("\xF6", "o").replace("\xF3", "o")
-                    .replace("\xFC", "u").replace("\xFA", "u"))
+        word = (
+            word.replace("\xE4", "a")
+            .replace("\xE1", "a")
+            .replace("\xEB", "e")
+            .replace("\xE9", "e")
+            .replace("\xED", "i")
+            .replace("\xEF", "i")
+            .replace("\xF6", "o")
+            .replace("\xF3", "o")
+            .replace("\xFC", "u")
+            .replace("\xFA", "u")
+        )
  
          # An initial 'y', a 'y' after a vowel,
          # and an 'i' between self.__vowels is put into upper case.
@@ -977,23 +1126,26 @@ class DutchStemmer(_StandardStemmer):
              word = "".join(("Y", word[1:]))
  
          for i in range(1, len(word)):
-            if word[i-1] in self.__vowels and word[i] == "y":
-                word = "".join((word[:i], "Y", word[i+1:]))
+            if word[i - 1] in self.__vowels and word[i] == "y":
+                word = "".join((word[:i], "Y", word[i + 1 :]))
  
-        for i in range(1, len(word)-1):
-            if (word[i-1] in self.__vowels and word[i] == "i" and
-               word[i+1] in self.__vowels):
-                word = "".join((word[:i], "I", word[i+1:]))
+        for i in range(1, len(word) - 1):
+            if (
+                word[i - 1] in self.__vowels
+                and word[i] == "i"
+                and word[i + 1] in self.__vowels
+            ):
+                word = "".join((word[:i], "I", word[i + 1 :]))
  
          r1, r2 = self._r1r2_standard(word, self.__vowels)
  
          # R1 is adjusted so that the region before it
          # contains at least 3 letters.
          for i in range(1, len(word)):
-            if word[i] not in self.__vowels and word[i-1] in self.__vowels:
-                if len(word[:i+1]) < 3 and len(word[:i+1]) > 0:
+            if word[i] not in self.__vowels and word[i - 1] in self.__vowels:
+                if 3 > len(word[: i + 1]) > 0:
                      r1 = word[3:]
-                elif len(word[:i+1]) == 0:
+                elif len(word[: i + 1]) == 0:
                      return word
                  break
  
@@ -1006,24 +1158,28 @@ class DutchStemmer(_StandardStemmer):
                      if r2.endswith("heden"):
                          r2 = suffix_replace(r2, suffix, "heid")
  
-                elif (suffix in ("ene", "en") and
-                      not word.endswith("heden") and
-                      word[-len(suffix)-1] not in self.__vowels and
-                      word[-len(suffix)-3:-len(suffix)] != "gem"):
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
+                elif (
+                    suffix in ("ene", "en")
+                    and not word.endswith("heden")
+                    and word[-len(suffix) - 1] not in self.__vowels
+                    and word[-len(suffix) - 3 : -len(suffix)] != "gem"
+                ):
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
                      if word.endswith(("kk", "dd", "tt")):
                          word = word[:-1]
                          r1 = r1[:-1]
                          r2 = r2[:-1]
  
-                elif (suffix in ("se", "s") and
-                      word[-len(suffix)-1] not in self.__vowels and
-                      word[-len(suffix)-1] != "j"):
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
+                elif (
+                    suffix in ("se", "s")
+                    and word[-len(suffix) - 1] not in self.__vowels
+                    and word[-len(suffix) - 1] != "j"
+                ):
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
                  break
  
          # STEP 2
@@ -1044,8 +1200,11 @@ class DutchStemmer(_StandardStemmer):
              r1 = r1[:-4]
              r2 = r2[:-4]
  
-            if (r1.endswith("en") and word[-3] not in self.__vowels and
-                word[-5:-2] != "gem"):
+            if (
+                r1.endswith("en")
+                and word[-3] not in self.__vowels
+                and word[-5:-2] != "gem"
+            ):
                  word = word[:-2]
                  r1 = r1[:-2]
                  r2 = r2[:-2]
@@ -1097,11 +1256,9 @@ class DutchStemmer(_StandardStemmer):
          # All occurrences of 'I' and 'Y' are put back into lower case.
          word = word.replace("I", "i").replace("Y", "y")
  
-
          return word
  
  
-
  class EnglishStemmer(_StandardStemmer):
  
      """
@@ -1136,63 +1293,111 @@ class EnglishStemmer(_StandardStemmer):
      """
  
      __vowels = "aeiouy"
-    __double_consonants = ("bb", "dd", "ff", "gg", "mm", "nn",
-                           "pp", "rr", "tt")
+    __double_consonants = ("bb", "dd", "ff", "gg", "mm", "nn", "pp", "rr", "tt")
      __li_ending = "cdeghkmnrt"
      __step0_suffixes = ("'s'", "'s", "'")
      __step1a_suffixes = ("sses", "ied", "ies", "us", "ss", "s")
      __step1b_suffixes = ("eedly", "ingly", "edly", "eed", "ing", "ed")
-    __step2_suffixes = ('ization', 'ational', 'fulness', 'ousness',
-                        'iveness', 'tional', 'biliti', 'lessli',
-                        'entli', 'ation', 'alism', 'aliti', 'ousli',
-                        'iviti', 'fulli', 'enci', 'anci', 'abli',
-                        'izer', 'ator', 'alli', 'bli', 'ogi', 'li')
-    __step3_suffixes = ('ational', 'tional', 'alize', 'icate', 'iciti',
-                        'ative', 'ical', 'ness', 'ful')
-    __step4_suffixes = ('ement', 'ance', 'ence', 'able', 'ible', 'ment',
-                        'ant', 'ent', 'ism', 'ate', 'iti', 'ous',
-                        'ive', 'ize', 'ion', 'al', 'er', 'ic')
+    __step2_suffixes = (
+        "ization",
+        "ational",
+        "fulness",
+        "ousness",
+        "iveness",
+        "tional",
+        "biliti",
+        "lessli",
+        "entli",
+        "ation",
+        "alism",
+        "aliti",
+        "ousli",
+        "iviti",
+        "fulli",
+        "enci",
+        "anci",
+        "abli",
+        "izer",
+        "ator",
+        "alli",
+        "bli",
+        "ogi",
+        "li",
+    )
+    __step3_suffixes = (
+        "ational",
+        "tional",
+        "alize",
+        "icate",
+        "iciti",
+        "ative",
+        "ical",
+        "ness",
+        "ful",
+    )
+    __step4_suffixes = (
+        "ement",
+        "ance",
+        "ence",
+        "able",
+        "ible",
+        "ment",
+        "ant",
+        "ent",
+        "ism",
+        "ate",
+        "iti",
+        "ous",
+        "ive",
+        "ize",
+        "ion",
+        "al",
+        "er",
+        "ic",
+    )
      __step5_suffixes = ("e", "l")
-    __special_words = {"skis" : "ski",
-                       "skies" : "sky",
-                       "dying" : "die",
-                       "lying" : "lie",
-                       "tying" : "tie",
-                       "idly" : "idl",
-                       "gently" : "gentl",
-                       "ugly" : "ugli",
-                       "early" : "earli",
-                       "only" : "onli",
-                       "singly" : "singl",
-                       "sky" : "sky",
-                       "news" : "news",
-                       "howe" : "howe",
-                       "atlas" : "atlas",
-                       "cosmos" : "cosmos",
-                       "bias" : "bias",
-                       "andes" : "andes",
-                       "inning" : "inning",
-                       "innings" : "inning",
-                       "outing" : "outing",
-                       "outings" : "outing",
-                       "canning" : "canning",
-                       "cannings" : "canning",
-                       "herring" : "herring",
-                       "herrings" : "herring",
-                       "earring" : "earring",
-                       "earrings" : "earring",
-                       "proceed" : "proceed",
-                       "proceeds" : "proceed",
-                       "proceeded" : "proceed",
-                       "proceeding" : "proceed",
-                       "exceed" : "exceed",
-                       "exceeds" : "exceed",
-                       "exceeded" : "exceed",
-                       "exceeding" : "exceed",
-                       "succeed" : "succeed",
-                       "succeeds" : "succeed",
-                       "succeeded" : "succeed",
-                       "succeeding" : "succeed"}
+    __special_words = {
+        "skis": "ski",
+        "skies": "sky",
+        "dying": "die",
+        "lying": "lie",
+        "tying": "tie",
+        "idly": "idl",
+        "gently": "gentl",
+        "ugly": "ugli",
+        "early": "earli",
+        "only": "onli",
+        "singly": "singl",
+        "sky": "sky",
+        "news": "news",
+        "howe": "howe",
+        "atlas": "atlas",
+        "cosmos": "cosmos",
+        "bias": "bias",
+        "andes": "andes",
+        "inning": "inning",
+        "innings": "inning",
+        "outing": "outing",
+        "outings": "outing",
+        "canning": "canning",
+        "cannings": "canning",
+        "herring": "herring",
+        "herrings": "herring",
+        "earring": "earring",
+        "earrings": "earring",
+        "proceed": "proceed",
+        "proceeds": "proceed",
+        "proceeded": "proceed",
+        "proceeding": "proceed",
+        "exceed": "exceed",
+        "exceeds": "exceed",
+        "exceeded": "exceed",
+        "exceeding": "exceed",
+        "succeed": "succeed",
+        "succeeds": "succeed",
+        "succeeded": "succeed",
+        "succeeding": "succeed",
+    }
  
      def stem(self, word):
  
@@ -1214,9 +1419,11 @@ class EnglishStemmer(_StandardStemmer):
              return self.__special_words[word]
  
          # Map the different apostrophe characters to a single consistent one
-        word = (word.replace("\u2019", "\x27")
-                    .replace("\u2018", "\x27")
-                    .replace("\u201B", "\x27"))
+        word = (
+            word.replace("\u2019", "\x27")
+            .replace("\u2018", "\x27")
+            .replace("\u201B", "\x27")
+        )
  
          if word.startswith("\x27"):
              word = word[1:]
@@ -1225,8 +1432,8 @@ class EnglishStemmer(_StandardStemmer):
              word = "".join(("Y", word[1:]))
  
          for i in range(1, len(word)):
-            if word[i-1] in self.__vowels and word[i] == "y":
-                word = "".join((word[:i], "Y", word[i+1:]))
+            if word[i - 1] in self.__vowels and word[i] == "y":
+                word = "".join((word[:i], "Y", word[i + 1 :]))
  
          step1a_vowel_found = False
          step1b_vowel_found = False
@@ -1241,19 +1448,18 @@ class EnglishStemmer(_StandardStemmer):
                  r1 = word[6:]
  
              for i in range(1, len(r1)):
-                if r1[i] not in self.__vowels and r1[i-1] in self.__vowels:
-                    r2 = r1[i+1:]
+                if r1[i] not in self.__vowels and r1[i - 1] in self.__vowels:
+                    r2 = r1[i + 1 :]
                      break
          else:
              r1, r2 = self._r1r2_standard(word, self.__vowels)
  
-
          # STEP 0
          for suffix in self.__step0_suffixes:
              if word.endswith(suffix):
-                word = word[:-len(suffix)]
-                r1 = r1[:-len(suffix)]
-                r2 = r2[:-len(suffix)]
+                word = word[: -len(suffix)]
+                r1 = r1[: -len(suffix)]
+                r2 = r2[: -len(suffix)]
                  break
  
          # STEP 1a
@@ -1266,7 +1472,7 @@ class EnglishStemmer(_StandardStemmer):
                      r2 = r2[:-2]
  
                  elif suffix in ("ied", "ies"):
-                    if len(word[:-len(suffix)]) > 1:
+                    if len(word[: -len(suffix)]) > 1:
                          word = word[:-2]
                          r1 = r1[:-2]
                          r2 = r2[:-2]
@@ -1305,21 +1511,21 @@ class EnglishStemmer(_StandardStemmer):
                          else:
                              r2 = ""
                  else:
-                    for letter in word[:-len(suffix)]:
+                    for letter in word[: -len(suffix)]:
                          if letter in self.__vowels:
                              step1b_vowel_found = True
                              break
  
                      if step1b_vowel_found:
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
  
                          if word.endswith(("at", "bl", "iz")):
                              word = "".join((word, "e"))
                              r1 = "".join((r1, "e"))
  
-                            if len(word) > 5 or len(r1) >=3:
+                            if len(word) > 5 or len(r1) >= 3:
                                  r2 = "".join((r2, "e"))
  
                          elif word.endswith(self.__double_consonants):
@@ -1327,15 +1533,19 @@ class EnglishStemmer(_StandardStemmer):
                              r1 = r1[:-1]
                              r2 = r2[:-1]
  
-                        elif ((r1 == "" and len(word) >= 3 and
-                               word[-1] not in self.__vowels and
-                               word[-1] not in "wxY" and
-                               word[-2] in self.__vowels and
-                               word[-3] not in self.__vowels)
-                              or
-                              (r1 == "" and len(word) == 2 and
-                               word[0] in self.__vowels and
-                               word[1] not in self.__vowels)):
+                        elif (
+                            r1 == ""
+                            and len(word) >= 3
+                            and word[-1] not in self.__vowels
+                            and word[-1] not in "wxY"
+                            and word[-2] in self.__vowels
+                            and word[-3] not in self.__vowels
+                        ) or (
+                            r1 == ""
+                            and len(word) == 2
+                            and word[0] in self.__vowels
+                            and word[1] not in self.__vowels
+                        ):
  
                              word = "".join((word, "e"))
  
@@ -1526,9 +1736,9 @@ class EnglishStemmer(_StandardStemmer):
                              r2 = ""
  
                      elif suffix in ("ful", "ness"):
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
  
                      elif suffix == "ative" and r2.endswith(suffix):
                          word = word[:-5]
@@ -1546,9 +1756,9 @@ class EnglishStemmer(_StandardStemmer):
                              r1 = r1[:-3]
                              r2 = r2[:-3]
                      else:
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
                  break
  
          # STEP 5
@@ -1557,20 +1767,19 @@ class EnglishStemmer(_StandardStemmer):
          elif r2.endswith("e"):
              word = word[:-1]
          elif r1.endswith("e"):
-            if len(word) >= 4 and (word[-2] in self.__vowels or
-                                   word[-2] in "wxY" or
-                                   word[-3] not in self.__vowels or
-                                   word[-4] in self.__vowels):
+            if len(word) >= 4 and (
+                word[-2] in self.__vowels
+                or word[-2] in "wxY"
+                or word[-3] not in self.__vowels
+                or word[-4] in self.__vowels
+            ):
                  word = word[:-1]
  
-
          word = word.replace("Y", "y")
  
-
          return word
  
  
-
  class FinnishStemmer(_StandardStemmer):
  
      """
@@ -1601,26 +1810,91 @@ class FinnishStemmer(_StandardStemmer):
  
      __vowels = "aeiouy\xE4\xF6"
      __restricted_vowels = "aeiou\xE4\xF6"
-    __long_vowels = ("aa", "ee", "ii", "oo", "uu", "\xE4\xE4",
-                     "\xF6\xF6")
+    __long_vowels = ("aa", "ee", "ii", "oo", "uu", "\xE4\xE4", "\xF6\xF6")
      __consonants = "bcdfghjklmnpqrstvwxz"
-    __double_consonants = ("bb", "cc", "dd", "ff", "gg", "hh", "jj",
-                           "kk", "ll", "mm", "nn", "pp", "qq", "rr",
-                           "ss", "tt", "vv", "ww", "xx", "zz")
-    __step1_suffixes = ('kaan', 'k\xE4\xE4n', 'sti', 'kin', 'han',
-                        'h\xE4n', 'ko', 'k\xF6', 'pa', 'p\xE4')
-    __step2_suffixes = ('nsa', 'ns\xE4', 'mme', 'nne', 'si', 'ni',
-                        'an', '\xE4n', 'en')
-    __step3_suffixes = ('siin', 'tten', 'seen', 'han', 'hen', 'hin',
-                        'hon', 'h\xE4n', 'h\xF6n', 'den', 'tta',
-                        'tt\xE4', 'ssa', 'ss\xE4', 'sta',
-                        'st\xE4', 'lla', 'll\xE4', 'lta',
-                        'lt\xE4', 'lle', 'ksi', 'ine', 'ta',
-                        't\xE4', 'na', 'n\xE4', 'a', '\xE4',
-                        'n')
-    __step4_suffixes = ('impi', 'impa', 'imp\xE4', 'immi', 'imma',
-                        'imm\xE4', 'mpi', 'mpa', 'mp\xE4', 'mmi',
-                        'mma', 'mm\xE4', 'eja', 'ej\xE4')
+    __double_consonants = (
+        "bb",
+        "cc",
+        "dd",
+        "ff",
+        "gg",
+        "hh",
+        "jj",
+        "kk",
+        "ll",
+        "mm",
+        "nn",
+        "pp",
+        "qq",
+        "rr",
+        "ss",
+        "tt",
+        "vv",
+        "ww",
+        "xx",
+        "zz",
+    )
+    __step1_suffixes = (
+        "kaan",
+        "k\xE4\xE4n",
+        "sti",
+        "kin",
+        "han",
+        "h\xE4n",
+        "ko",
+        "k\xF6",
+        "pa",
+        "p\xE4",
+    )
+    __step2_suffixes = ("nsa", "ns\xE4", "mme", "nne", "si", "ni", "an", "\xE4n", "en")
+    __step3_suffixes = (
+        "siin",
+        "tten",
+        "seen",
+        "han",
+        "hen",
+        "hin",
+        "hon",
+        "h\xE4n",
+        "h\xF6n",
+        "den",
+        "tta",
+        "tt\xE4",
+        "ssa",
+        "ss\xE4",
+        "sta",
+        "st\xE4",
+        "lla",
+        "ll\xE4",
+        "lta",
+        "lt\xE4",
+        "lle",
+        "ksi",
+        "ine",
+        "ta",
+        "t\xE4",
+        "na",
+        "n\xE4",
+        "a",
+        "\xE4",
+        "n",
+    )
+    __step4_suffixes = (
+        "impi",
+        "impa",
+        "imp\xE4",
+        "immi",
+        "imma",
+        "imm\xE4",
+        "mpi",
+        "mpa",
+        "mp\xE4",
+        "mmi",
+        "mma",
+        "mm\xE4",
+        "eja",
+        "ej\xE4",
+    )
  
      def stem(self, word):
          """
@@ -1650,10 +1924,10 @@ class FinnishStemmer(_StandardStemmer):
                          r1 = r1[:-3]
                          r2 = r2[:-3]
                  else:
-                    if word[-len(suffix)-1] in "ntaeiouy\xE4\xF6":
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
+                    if word[-len(suffix) - 1] in "ntaeiouy\xE4\xF6":
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
                  break
  
          # STEP 2: Possessives
@@ -1679,16 +1953,23 @@ class FinnishStemmer(_StandardStemmer):
                          r2 = suffix_replace(r2, "kse", "ksi")
  
                  elif suffix == "an":
-                    if (word[-4:-2] in ("ta", "na") or
-                        word[-5:-2] in ("ssa", "sta", "lla", "lta")):
+                    if word[-4:-2] in ("ta", "na") or word[-5:-2] in (
+                        "ssa",
+                        "sta",
+                        "lla",
+                        "lta",
+                    ):
                          word = word[:-2]
                          r1 = r1[:-2]
                          r2 = r2[:-2]
  
                  elif suffix == "\xE4n":
-                    if (word[-4:-2] in ("t\xE4", "n\xE4") or
-                        word[-5:-2] in ("ss\xE4", "st\xE4",
-                                        "ll\xE4", "lt\xE4")):
+                    if word[-4:-2] in ("t\xE4", "n\xE4") or word[-5:-2] in (
+                        "ss\xE4",
+                        "st\xE4",
+                        "ll\xE4",
+                        "lt\xE4",
+                    ):
                          word = word[:-2]
                          r1 = r1[:-2]
                          r2 = r2[:-2]
@@ -1707,25 +1988,28 @@ class FinnishStemmer(_StandardStemmer):
          # STEP 3: Cases
          for suffix in self.__step3_suffixes:
              if r1.endswith(suffix):
-                if suffix in ("han", "hen", "hin", "hon", "h\xE4n",
-                              "h\xF6n"):
-                    if ((suffix == "han" and word[-4] == "a") or
-                        (suffix == "hen" and word[-4] == "e") or
-                        (suffix == "hin" and word[-4] == "i") or
-                        (suffix == "hon" and word[-4] == "o") or
-                        (suffix == "h\xE4n" and word[-4] == "\xE4") or
-                        (suffix == "h\xF6n" and word[-4] == "\xF6")):
+                if suffix in ("han", "hen", "hin", "hon", "h\xE4n", "h\xF6n"):
+                    if (
+                        (suffix == "han" and word[-4] == "a")
+                        or (suffix == "hen" and word[-4] == "e")
+                        or (suffix == "hin" and word[-4] == "i")
+                        or (suffix == "hon" and word[-4] == "o")
+                        or (suffix == "h\xE4n" and word[-4] == "\xE4")
+                        or (suffix == "h\xF6n" and word[-4] == "\xF6")
+                    ):
                          word = word[:-3]
                          r1 = r1[:-3]
                          r2 = r2[:-3]
                          step3_success = True
  
                  elif suffix in ("siin", "den", "tten"):
-                    if (word[-len(suffix)-1] == "i" and
-                        word[-len(suffix)-2] in self.__restricted_vowels):
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
+                    if (
+                        word[-len(suffix) - 1] == "i"
+                        and word[-len(suffix) - 2] in self.__restricted_vowels
+                    ):
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
                          step3_success = True
                      else:
                          continue
@@ -1764,25 +2048,24 @@ class FinnishStemmer(_StandardStemmer):
                          r1 = r1[:-1]
                          r2 = r2[:-1]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
                      step3_success = True
                  break
  
          # STEP 4: Other endings
          for suffix in self.__step4_suffixes:
              if r2.endswith(suffix):
-                if suffix in ("mpi", "mpa", "mp\xE4", "mmi", "mma",
-                              "mm\xE4"):
+                if suffix in ("mpi", "mpa", "mp\xE4", "mmi", "mma", "mm\xE4"):
                      if word[-5:-3] != "po":
                          word = word[:-3]
                          r1 = r1[:-3]
                          r2 = r2[:-3]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
                  break
  
          # STEP 5: Plurals
@@ -1790,8 +2073,12 @@ class FinnishStemmer(_StandardStemmer):
              word = word[:-1]
              r1 = r1[:-1]
  
-        elif (not step3_success and len(r1) >= 2 and
-              r1[-1] == "t" and r1[-2] in self.__vowels):
+        elif (
+            not step3_success
+            and len(r1) >= 2
+            and r1[-1] == "t"
+            and r1[-2] in self.__vowels
+        ):
              word = word[:-1]
              r1 = r1[:-1]
              r2 = r2[:-1]
@@ -1807,8 +2094,7 @@ class FinnishStemmer(_StandardStemmer):
              word = word[:-1]
              r1 = r1[:-1]
  
-        if (len(r1) >= 2 and r1[-2] in self.__consonants and
-            r1[-1] in "a\xE4ei"):
+        if len(r1) >= 2 and r1[-2] in self.__consonants and r1[-1] in "a\xE4ei":
              word = word[:-1]
              r1 = r1[:-1]
  
@@ -1827,18 +2113,16 @@ class FinnishStemmer(_StandardStemmer):
                  continue
              else:
                  if i == 1:
-                    if word[-i-1:] in self.__double_consonants:
+                    if word[-i - 1 :] in self.__double_consonants:
                          word = word[:-1]
                  else:
-                    if word[-i-1:-i+1] in self.__double_consonants:
-                        word = "".join((word[:-i], word[-i+1:]))
+                    if word[-i - 1 : -i + 1] in self.__double_consonants:
+                        word = "".join((word[:-i], word[-i + 1 :]))
                  break
  
-
          return word
  
  
-
  class FrenchStemmer(_StandardStemmer):
  
      """
@@ -1860,33 +2144,129 @@ class FrenchStemmer(_StandardStemmer):
      """
  
      __vowels = "aeiouy\xE2\xE0\xEB\xE9\xEA\xE8\xEF\xEE\xF4\xFB\xF9"
-    __step1_suffixes = ('issements', 'issement', 'atrices', 'atrice',
-                        'ateurs', 'ations', 'logies', 'usions',
-                        'utions', 'ements', 'amment', 'emment',
-                        'ances', 'iqUes', 'ismes', 'ables', 'istes',
-                        'ateur', 'ation', 'logie', 'usion', 'ution',
-                        'ences', 'ement', 'euses', 'ments', 'ance',
-                        'iqUe', 'isme', 'able', 'iste', 'ence',
-                        'it\xE9s', 'ives', 'eaux', 'euse', 'ment',
-                        'eux', 'it\xE9', 'ive', 'ifs', 'aux', 'if')
-    __step2a_suffixes = ('issaIent', 'issantes', 'iraIent', 'issante',
-                         'issants', 'issions', 'irions', 'issais',
-                         'issait', 'issant', 'issent', 'issiez', 'issons',
-                         'irais', 'irait', 'irent', 'iriez', 'irons',
-                         'iront', 'isses', 'issez', '\xEEmes',
-                         '\xEEtes', 'irai', 'iras', 'irez', 'isse',
-                         'ies', 'ira', '\xEEt', 'ie', 'ir', 'is',
-                         'it', 'i')
-    __step2b_suffixes = ('eraIent', 'assions', 'erions', 'assent',
-                         'assiez', '\xE8rent', 'erais', 'erait',
-                         'eriez', 'erons', 'eront', 'aIent', 'antes',
-                         'asses', 'ions', 'erai', 'eras', 'erez',
-                         '\xE2mes', '\xE2tes', 'ante', 'ants',
-                         'asse', '\xE9es', 'era', 'iez', 'ais',
-                         'ait', 'ant', '\xE9e', '\xE9s', 'er',
-                         'ez', '\xE2t', 'ai', 'as', '\xE9', 'a')
-    __step4_suffixes = ('i\xE8re', 'I\xE8re', 'ion', 'ier', 'Ier',
-                        'e', '\xEB')
+    __step1_suffixes = (
+        "issements",
+        "issement",
+        "atrices",
+        "atrice",
+        "ateurs",
+        "ations",
+        "logies",
+        "usions",
+        "utions",
+        "ements",
+        "amment",
+        "emment",
+        "ances",
+        "iqUes",
+        "ismes",
+        "ables",
+        "istes",
+        "ateur",
+        "ation",
+        "logie",
+        "usion",
+        "ution",
+        "ences",
+        "ement",
+        "euses",
+        "ments",
+        "ance",
+        "iqUe",
+        "isme",
+        "able",
+        "iste",
+        "ence",
+        "it\xE9s",
+        "ives",
+        "eaux",
+        "euse",
+        "ment",
+        "eux",
+        "it\xE9",
+        "ive",
+        "ifs",
+        "aux",
+        "if",
+    )
+    __step2a_suffixes = (
+        "issaIent",
+        "issantes",
+        "iraIent",
+        "issante",
+        "issants",
+        "issions",
+        "irions",
+        "issais",
+        "issait",
+        "issant",
+        "issent",
+        "issiez",
+        "issons",
+        "irais",
+        "irait",
+        "irent",
+        "iriez",
+        "irons",
+        "iront",
+        "isses",
+        "issez",
+        "\xEEmes",
+        "\xEEtes",
+        "irai",
+        "iras",
+        "irez",
+        "isse",
+        "ies",
+        "ira",
+        "\xEEt",
+        "ie",
+        "ir",
+        "is",
+        "it",
+        "i",
+    )
+    __step2b_suffixes = (
+        "eraIent",
+        "assions",
+        "erions",
+        "assent",
+        "assiez",
+        "\xE8rent",
+        "erais",
+        "erait",
+        "eriez",
+        "erons",
+        "eront",
+        "aIent",
+        "antes",
+        "asses",
+        "ions",
+        "erai",
+        "eras",
+        "erez",
+        "\xE2mes",
+        "\xE2tes",
+        "ante",
+        "ants",
+        "asse",
+        "\xE9es",
+        "era",
+        "iez",
+        "ais",
+        "ait",
+        "ant",
+        "\xE9e",
+        "\xE9s",
+        "er",
+        "ez",
+        "\xE2t",
+        "ai",
+        "as",
+        "\xE9",
+        "a",
+    )
+    __step4_suffixes = ("i\xE8re", "I\xE8re", "ion", "ier", "Ier", "e", "\xEB")
  
      def stem(self, word):
          """
@@ -1910,24 +2290,24 @@ class FrenchStemmer(_StandardStemmer):
  
          # Every occurrence of 'u' after 'q' is put into upper case.
          for i in range(1, len(word)):
-            if word[i-1] == "q" and word[i] == "u":
-                word = "".join((word[:i], "U", word[i+1:]))
+            if word[i - 1] == "q" and word[i] == "u":
+                word = "".join((word[:i], "U", word[i + 1 :]))
  
          # Every occurrence of 'u' and 'i'
          # between vowels is put into upper case.
          # Every occurrence of 'y' preceded or
          # followed by a vowel is also put into upper case.
-        for i in range(1, len(word)-1):
-            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+        for i in range(1, len(word) - 1):
+            if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels:
                  if word[i] == "u":
-                    word = "".join((word[:i], "U", word[i+1:]))
+                    word = "".join((word[:i], "U", word[i + 1 :]))
  
                  elif word[i] == "i":
-                    word = "".join((word[:i], "I", word[i+1:]))
+                    word = "".join((word[:i], "I", word[i + 1 :]))
  
-            if word[i-1] in self.__vowels or word[i+1] in self.__vowels:
+            if word[i - 1] in self.__vowels or word[i + 1] in self.__vowels:
                  if word[i] == "y":
-                    word = "".join((word[:i], "Y", word[i+1:]))
+                    word = "".join((word[:i], "Y", word[i + 1 :]))
  
          r1, r2 = self._r1r2_standard(word, self.__vowels)
          rv = self.__rv_french(word, self.__vowels)
@@ -1941,7 +2321,7 @@ class FrenchStemmer(_StandardStemmer):
  
                  elif suffix in ("euse", "euses"):
                      if suffix in r2:
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
                          step1_success = True
  
                      elif suffix in r1:
@@ -1949,7 +2329,7 @@ class FrenchStemmer(_StandardStemmer):
                          step1_success = True
  
                  elif suffix in ("ement", "ements") and suffix in rv:
-                    word = word[:-len(suffix)]
+                    word = word[: -len(suffix)]
                      step1_success = True
  
                      if word[-2:] == "iv" and "iv" in r2:
@@ -1981,31 +2361,54 @@ class FrenchStemmer(_StandardStemmer):
                      word = suffix_replace(word, "emment", "ent")
                      rv_ending_found = True
  
-                elif (suffix in ("ment", "ments") and suffix in rv and
-                      not rv.startswith(suffix) and
-                      rv[rv.rindex(suffix)-1] in self.__vowels):
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                elif (
+                    suffix in ("ment", "ments")
+                    and suffix in rv
+                    and not rv.startswith(suffix)
+                    and rv[rv.rindex(suffix) - 1] in self.__vowels
+                ):
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      rv_ending_found = True
  
                  elif suffix == "aux" and suffix in r1:
                      word = "".join((word[:-2], "l"))
                      step1_success = True
  
-                elif (suffix in ("issement", "issements") and suffix in r1
-                      and word[-len(suffix)-1] not in self.__vowels):
-                    word = word[:-len(suffix)]
+                elif (
+                    suffix in ("issement", "issements")
+                    and suffix in r1
+                    and word[-len(suffix) - 1] not in self.__vowels
+                ):
+                    word = word[: -len(suffix)]
                      step1_success = True
  
-                elif suffix in ("ance", "iqUe", "isme", "able", "iste",
-                              "eux", "ances", "iqUes", "ismes",
-                              "ables", "istes") and suffix in r2:
-                    word = word[:-len(suffix)]
+                elif (
+                    suffix
+                    in (
+                        "ance",
+                        "iqUe",
+                        "isme",
+                        "able",
+                        "iste",
+                        "eux",
+                        "ances",
+                        "iqUes",
+                        "ismes",
+                        "ables",
+                        "istes",
+                    )
+                    and suffix in r2
+                ):
+                    word = word[: -len(suffix)]
                      step1_success = True
  
-                elif suffix in ("atrice", "ateur", "ation", "atrices",
-                                "ateurs", "ations") and suffix in r2:
-                    word = word[:-len(suffix)]
+                elif (
+                    suffix
+                    in ("atrice", "ateur", "ation", "atrices", "ateurs", "ations")
+                    and suffix in r2
+                ):
+                    word = word[: -len(suffix)]
                      step1_success = True
  
                      if word[-2:] == "ic":
@@ -2018,8 +2421,7 @@ class FrenchStemmer(_StandardStemmer):
                      word = suffix_replace(word, suffix, "log")
                      step1_success = True
  
-                elif (suffix in ("usion", "ution", "usions", "utions") and
-                      suffix in r2):
+                elif suffix in ("usion", "ution", "usions", "utions") and suffix in r2:
                      word = suffix_replace(word, suffix, "u")
                      step1_success = True
  
@@ -2028,7 +2430,7 @@ class FrenchStemmer(_StandardStemmer):
                      step1_success = True
  
                  elif suffix in ("it\xE9", "it\xE9s") and suffix in r2:
-                    word = word[:-len(suffix)]
+                    word = word[: -len(suffix)]
                      step1_success = True
  
                      if word[-4:] == "abil":
@@ -2047,9 +2449,8 @@ class FrenchStemmer(_StandardStemmer):
                          if "iv" in r2:
                              word = word[:-2]
  
-                elif (suffix in ("if", "ive", "ifs", "ives") and
-                      suffix in r2):
-                    word = word[:-len(suffix)]
+                elif suffix in ("if", "ive", "ifs", "ives") and suffix in r2:
+                    word = word[: -len(suffix)]
                      step1_success = True
  
                      if word[-2:] == "at" and "at" in r2:
@@ -2066,13 +2467,16 @@ class FrenchStemmer(_StandardStemmer):
          if not step1_success or rv_ending_found:
              for suffix in self.__step2a_suffixes:
                  if word.endswith(suffix):
-                    if (suffix in rv and len(rv) > len(suffix) and
-                        rv[rv.rindex(suffix)-1] not in self.__vowels):
-                        word = word[:-len(suffix)]
+                    if (
+                        suffix in rv
+                        and len(rv) > len(suffix)
+                        and rv[rv.rindex(suffix) - 1] not in self.__vowels
+                    ):
+                        word = word[: -len(suffix)]
                          step2a_success = True
                      break
  
-        # STEP 2b: Other verb suffixes
+            # STEP 2b: Other verb suffixes
              if not step2a_success:
                  for suffix in self.__step2b_suffixes:
                      if rv.endswith(suffix):
@@ -2080,23 +2484,52 @@ class FrenchStemmer(_StandardStemmer):
                              word = word[:-4]
                              step2b_success = True
  
-                        elif suffix in ('eraIent', 'erions', '\xE8rent',
-                                        'erais', 'erait', 'eriez',
-                                        'erons', 'eront', 'erai', 'eras',
-                                        'erez', '\xE9es', 'era', 'iez',
-                                        '\xE9e', '\xE9s', 'er', 'ez',
-                                        '\xE9'):
-                            word = word[:-len(suffix)]
+                        elif suffix in (
+                            "eraIent",
+                            "erions",
+                            "\xE8rent",
+                            "erais",
+                            "erait",
+                            "eriez",
+                            "erons",
+                            "eront",
+                            "erai",
+                            "eras",
+                            "erez",
+                            "\xE9es",
+                            "era",
+                            "iez",
+                            "\xE9e",
+                            "\xE9s",
+                            "er",
+                            "ez",
+                            "\xE9",
+                        ):
+                            word = word[: -len(suffix)]
                              step2b_success = True
  
-                        elif suffix in ('assions', 'assent', 'assiez',
-                                        'aIent', 'antes', 'asses',
-                                        '\xE2mes', '\xE2tes', 'ante',
-                                        'ants', 'asse', 'ais', 'ait',
-                                        'ant', '\xE2t', 'ai', 'as',
-                                        'a'):
-                            word = word[:-len(suffix)]
-                            rv = rv[:-len(suffix)]
+                        elif suffix in (
+                            "assions",
+                            "assent",
+                            "assiez",
+                            "aIent",
+                            "antes",
+                            "asses",
+                            "\xE2mes",
+                            "\xE2tes",
+                            "ante",
+                            "ants",
+                            "asse",
+                            "ais",
+                            "ait",
+                            "ant",
+                            "\xE2t",
+                            "ai",
+                            "as",
+                            "a",
+                        ):
+                            word = word[: -len(suffix)]
+                            rv = rv[: -len(suffix)]
                              step2b_success = True
                              if rv.endswith("e"):
                                  word = word[:-1]
@@ -2111,19 +2544,16 @@ class FrenchStemmer(_StandardStemmer):
  
          # STEP 4: Residual suffixes
          else:
-            if (len(word) >= 2 and word[-1] == "s" and
-                word[-2] not in "aiou\xE8s"):
+            if len(word) >= 2 and word[-1] == "s" and word[-2] not in "aiou\xE8s":
                  word = word[:-1]
  
              for suffix in self.__step4_suffixes:
                  if word.endswith(suffix):
                      if suffix in rv:
-                        if (suffix == "ion" and suffix in r2 and
-                            rv[-4] in "st"):
+                        if suffix == "ion" and suffix in r2 and rv[-4] in "st":
                              word = word[:-3]
  
-                        elif suffix in ("ier", "i\xE8re", "Ier",
-                                        "I\xE8re"):
+                        elif suffix in ("ier", "i\xE8re", "Ier", "I\xE8re"):
                              word = suffix_replace(word, suffix, "i")
  
                          elif suffix == "e":
@@ -2143,18 +2573,13 @@ class FrenchStemmer(_StandardStemmer):
                  i += 1
              else:
                  if i != 1 and word[-i] in ("\xE9", "\xE8"):
-                    word = "".join((word[:-i], "e", word[-i+1:]))
+                    word = "".join((word[:-i], "e", word[-i + 1 :]))
                  break
  
-        word = (word.replace("I", "i")
-                    .replace("U", "u")
-                    .replace("Y", "y"))
-
+        word = word.replace("I", "i").replace("U", "u").replace("Y", "y")
  
          return word
  
-
-
      def __rv_french(self, word, vowels):
          """
          Return the region RV that is used by the French stemmer.
@@ -2179,19 +2604,19 @@ class FrenchStemmer(_StandardStemmer):
          """
          rv = ""
          if len(word) >= 2:
-            if (word.startswith(("par", "col", "tap")) or
-                (word[0] in vowels and word[1] in vowels)):
+            if word.startswith(("par", "col", "tap")) or (
+                word[0] in vowels and word[1] in vowels
+            ):
                  rv = word[3:]
              else:
                  for i in range(1, len(word)):
                      if word[i] in vowels:
-                        rv = word[i+1:]
+                        rv = word[i + 1 :]
                          break
  
          return rv
  
  
-
  class GermanStemmer(_StandardStemmer):
  
      """
@@ -2221,8 +2646,7 @@ class GermanStemmer(_StandardStemmer):
  
      __step1_suffixes = ("ern", "em", "er", "en", "es", "e", "s")
      __step2_suffixes = ("est", "en", "er", "st")
-    __step3_suffixes = ("isch", "lich", "heit", "keit",
-                          "end", "ung", "ig", "ik")
+    __step3_suffixes = ("isch", "lich", "heit", "keit", "end", "ung", "ig", "ik")
  
      def stem(self, word):
          """
@@ -2243,34 +2667,36 @@ class GermanStemmer(_StandardStemmer):
  
          # Every occurrence of 'u' and 'y'
          # between vowels is put into upper case.
-        for i in range(1, len(word)-1):
-            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+        for i in range(1, len(word) - 1):
+            if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels:
                  if word[i] == "u":
-                    word = "".join((word[:i], "U", word[i+1:]))
+                    word = "".join((word[:i], "U", word[i + 1 :]))
  
                  elif word[i] == "y":
-                    word = "".join((word[:i], "Y", word[i+1:]))
+                    word = "".join((word[:i], "Y", word[i + 1 :]))
  
          r1, r2 = self._r1r2_standard(word, self.__vowels)
  
          # R1 is adjusted so that the region before it
          # contains at least 3 letters.
          for i in range(1, len(word)):
-            if word[i] not in self.__vowels and word[i-1] in self.__vowels:
-                if len(word[:i+1]) < 3 and len(word[:i+1]) > 0:
+            if word[i] not in self.__vowels and word[i - 1] in self.__vowels:
+                if 3 > len(word[: i + 1]) > 0:
                      r1 = word[3:]
-                elif len(word[:i+1]) == 0:
+                elif len(word[: i + 1]) == 0:
                      return word
                  break
  
          # STEP 1
          for suffix in self.__step1_suffixes:
              if r1.endswith(suffix):
-                if (suffix in ("en", "es", "e") and
-                    word[-len(suffix)-4:-len(suffix)] == "niss"):
-                    word = word[:-len(suffix)-1]
-                    r1 = r1[:-len(suffix)-1]
-                    r2 = r2[:-len(suffix)-1]
+                if (
+                    suffix in ("en", "es", "e")
+                    and word[-len(suffix) - 4 : -len(suffix)] == "niss"
+                ):
+                    word = word[: -len(suffix) - 1]
+                    r1 = r1[: -len(suffix) - 1]
+                    r2 = r2[: -len(suffix) - 1]
  
                  elif suffix == "s":
                      if word[-2] in self.__s_ending:
@@ -2278,9 +2704,9 @@ class GermanStemmer(_StandardStemmer):
                          r1 = r1[:-1]
                          r2 = r2[:-1]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
                  break
  
          # STEP 2
@@ -2292,53 +2718,61 @@ class GermanStemmer(_StandardStemmer):
                          r1 = r1[:-2]
                          r2 = r2[:-2]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
                  break
  
          # STEP 3: Derivational suffixes
          for suffix in self.__step3_suffixes:
              if r2.endswith(suffix):
                  if suffix in ("end", "ung"):
-                    if ("ig" in r2[-len(suffix)-2:-len(suffix)] and
-                        "e" not in r2[-len(suffix)-3:-len(suffix)-2]):
-                        word = word[:-len(suffix)-2]
+                    if (
+                        "ig" in r2[-len(suffix) - 2 : -len(suffix)]
+                        and "e" not in r2[-len(suffix) - 3 : -len(suffix) - 2]
+                    ):
+                        word = word[: -len(suffix) - 2]
                      else:
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
  
-                elif (suffix in ("ig", "ik", "isch") and
-                      "e" not in r2[-len(suffix)-1:-len(suffix)]):
-                    word = word[:-len(suffix)]
+                elif (
+                    suffix in ("ig", "ik", "isch")
+                    and "e" not in r2[-len(suffix) - 1 : -len(suffix)]
+                ):
+                    word = word[: -len(suffix)]
  
                  elif suffix in ("lich", "heit"):
-                    if ("er" in r1[-len(suffix)-2:-len(suffix)] or
-                        "en" in r1[-len(suffix)-2:-len(suffix)]):
-                        word = word[:-len(suffix)-2]
+                    if (
+                        "er" in r1[-len(suffix) - 2 : -len(suffix)]
+                        or "en" in r1[-len(suffix) - 2 : -len(suffix)]
+                    ):
+                        word = word[: -len(suffix) - 2]
                      else:
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
  
                  elif suffix == "keit":
-                    if "lich" in r2[-len(suffix)-4:-len(suffix)]:
-                        word = word[:-len(suffix)-4]
+                    if "lich" in r2[-len(suffix) - 4 : -len(suffix)]:
+                        word = word[: -len(suffix) - 4]
  
-                    elif "ig" in r2[-len(suffix)-2:-len(suffix)]:
-                        word = word[:-len(suffix)-2]
+                    elif "ig" in r2[-len(suffix) - 2 : -len(suffix)]:
+                        word = word[: -len(suffix) - 2]
                      else:
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
                  break
  
          # Umlaut accents are removed and
          # 'u' and 'y' are put back into lower case.
-        word = (word.replace("\xE4", "a").replace("\xF6", "o")
-                    .replace("\xFC", "u").replace("U", "u")
-                    .replace("Y", "y"))
-
+        word = (
+            word.replace("\xE4", "a")
+            .replace("\xF6", "o")
+            .replace("\xFC", "u")
+            .replace("U", "u")
+            .replace("Y", "y")
+        )
  
          return word
  
  
-
  class HungarianStemmer(_LanguageSpecificStemmer):
  
      """
@@ -2376,48 +2810,181 @@ class HungarianStemmer(_LanguageSpecificStemmer):
  
      __vowels = "aeiou\xF6\xFC\xE1\xE9\xED\xF3\xF5\xFA\xFB"
      __digraphs = ("cs", "dz", "dzs", "gy", "ly", "ny", "ty", "zs")
-    __double_consonants = ("bb", "cc", "ccs", "dd", "ff", "gg",
-                             "ggy", "jj", "kk", "ll", "lly", "mm",
-                             "nn", "nny", "pp", "rr", "ss", "ssz",
-                             "tt", "tty", "vv", "zz", "zzs")
+    __double_consonants = (
+        "bb",
+        "cc",
+        "ccs",
+        "dd",
+        "ff",
+        "gg",
+        "ggy",
+        "jj",
+        "kk",
+        "ll",
+        "lly",
+        "mm",
+        "nn",
+        "nny",
+        "pp",
+        "rr",
+        "ss",
+        "ssz",
+        "tt",
+        "tty",
+        "vv",
+        "zz",
+        "zzs",
+    )
  
      __step1_suffixes = ("al", "el")
-    __step2_suffixes = ('k\xE9ppen', 'onk\xE9nt', 'enk\xE9nt',
-                        'ank\xE9nt', 'k\xE9pp', 'k\xE9nt', 'ban',
-                        'ben', 'nak', 'nek', 'val', 'vel', 't\xF3l',
-                        't\xF5l', 'r\xF3l', 'r\xF5l', 'b\xF3l',
-                        'b\xF5l', 'hoz', 'hez', 'h\xF6z',
-                        'n\xE1l', 'n\xE9l', '\xE9rt', 'kor',
-                        'ba', 'be', 'ra', 're', 'ig', 'at', 'et',
-                        'ot', '\xF6t', 'ul', '\xFCl', 'v\xE1',
-                        'v\xE9', 'en', 'on', 'an', '\xF6n',
-                        'n', 't')
+    __step2_suffixes = (
+        "k\xE9ppen",
+        "onk\xE9nt",
+        "enk\xE9nt",
+        "ank\xE9nt",
+        "k\xE9pp",
+        "k\xE9nt",
+        "ban",
+        "ben",
+        "nak",
+        "nek",
+        "val",
+        "vel",
+        "t\xF3l",
+        "t\xF5l",
+        "r\xF3l",
+        "r\xF5l",
+        "b\xF3l",
+        "b\xF5l",
+        "hoz",
+        "hez",
+        "h\xF6z",
+        "n\xE1l",
+        "n\xE9l",
+        "\xE9rt",
+        "kor",
+        "ba",
+        "be",
+        "ra",
+        "re",
+        "ig",
+        "at",
+        "et",
+        "ot",
+        "\xF6t",
+        "ul",
+        "\xFCl",
+        "v\xE1",
+        "v\xE9",
+        "en",
+        "on",
+        "an",
+        "\xF6n",
+        "n",
+        "t",
+    )
      __step3_suffixes = ("\xE1nk\xE9nt", "\xE1n", "\xE9n")
-    __step4_suffixes = ('astul', 'est\xFCl', '\xE1stul',
-                        '\xE9st\xFCl', 'stul', 'st\xFCl')
+    __step4_suffixes = (
+        "astul",
+        "est\xFCl",
+        "\xE1stul",
+        "\xE9st\xFCl",
+        "stul",
+        "st\xFCl",
+    )
      __step5_suffixes = ("\xE1", "\xE9")
-    __step6_suffixes = ('ok\xE9', '\xF6k\xE9', 'ak\xE9',
-                        'ek\xE9', '\xE1k\xE9', '\xE1\xE9i',
-                        '\xE9k\xE9', '\xE9\xE9i', 'k\xE9',
-                        '\xE9i', '\xE9\xE9', '\xE9')
-    __step7_suffixes = ('\xE1juk', '\xE9j\xFCk', '\xFCnk',
-                        'unk', 'juk', 'j\xFCk', '\xE1nk',
-                        '\xE9nk', 'nk', 'uk', '\xFCk', 'em',
-                        'om', 'am', 'od', 'ed', 'ad', '\xF6d',
-                        'ja', 'je', '\xE1m', '\xE1d', '\xE9m',
-                        '\xE9d', 'm', 'd', 'a', 'e', 'o',
-                        '\xE1', '\xE9')
-    __step8_suffixes = ('jaitok', 'jeitek', 'jaink', 'jeink', 'aitok',
-                        'eitek', '\xE1itok', '\xE9itek', 'jaim',
-                        'jeim', 'jaid', 'jeid', 'eink', 'aink',
-                        'itek', 'jeik', 'jaik', '\xE1ink',
-                        '\xE9ink', 'aim', 'eim', 'aid', 'eid',
-                        'jai', 'jei', 'ink', 'aik', 'eik',
-                        '\xE1im', '\xE1id', '\xE1ik', '\xE9im',
-                        '\xE9id', '\xE9ik', 'im', 'id', 'ai',
-                        'ei', 'ik', '\xE1i', '\xE9i', 'i')
-    __step9_suffixes = ("\xE1k", "\xE9k", "\xF6k", "ok",
-                        "ek", "ak", "k")
+    __step6_suffixes = (
+        "ok\xE9",
+        "\xF6k\xE9",
+        "ak\xE9",
+        "ek\xE9",
+        "\xE1k\xE9",
+        "\xE1\xE9i",
+        "\xE9k\xE9",
+        "\xE9\xE9i",
+        "k\xE9",
+        "\xE9i",
+        "\xE9\xE9",
+        "\xE9",
+    )
+    __step7_suffixes = (
+        "\xE1juk",
+        "\xE9j\xFCk",
+        "\xFCnk",
+        "unk",
+        "juk",
+        "j\xFCk",
+        "\xE1nk",
+        "\xE9nk",
+        "nk",
+        "uk",
+        "\xFCk",
+        "em",
+        "om",
+        "am",
+        "od",
+        "ed",
+        "ad",
+        "\xF6d",
+        "ja",
+        "je",
+        "\xE1m",
+        "\xE1d",
+        "\xE9m",
+        "\xE9d",
+        "m",
+        "d",
+        "a",
+        "e",
+        "o",
+        "\xE1",
+        "\xE9",
+    )
+    __step8_suffixes = (
+        "jaitok",
+        "jeitek",
+        "jaink",
+        "jeink",
+        "aitok",
+        "eitek",
+        "\xE1itok",
+        "\xE9itek",
+        "jaim",
+        "jeim",
+        "jaid",
+        "jeid",
+        "eink",
+        "aink",
+        "itek",
+        "jeik",
+        "jaik",
+        "\xE1ink",
+        "\xE9ink",
+        "aim",
+        "eim",
+        "aid",
+        "eid",
+        "jai",
+        "jei",
+        "ink",
+        "aik",
+        "eik",
+        "\xE1im",
+        "\xE1id",
+        "\xE1ik",
+        "\xE9im",
+        "\xE9id",
+        "\xE9ik",
+        "im",
+        "id",
+        "ai",
+        "ei",
+        "ik",
+        "\xE1i",
+        "\xE9i",
+        "i",
+    )
+    __step9_suffixes = ("\xE1k", "\xE9k", "\xF6k", "ok", "ek", "ak", "k")
  
      def stem(self, word):
          """
@@ -2439,10 +3006,10 @@ class HungarianStemmer(_LanguageSpecificStemmer):
          # STEP 1: Remove instrumental case
          if r1.endswith(self.__step1_suffixes):
              for double_cons in self.__double_consonants:
-                if word[-2-len(double_cons):-2] == double_cons:
+                if word[-2 - len(double_cons) : -2] == double_cons:
                      word = "".join((word[:-4], word[-3]))
  
-                    if r1[-2-len(double_cons):-2] == double_cons:
+                    if r1[-2 - len(double_cons) : -2] == double_cons:
                          r1 = "".join((r1[:-4], r1[-3]))
                      break
  
@@ -2450,8 +3017,8 @@ class HungarianStemmer(_LanguageSpecificStemmer):
          for suffix in self.__step2_suffixes:
              if word.endswith(suffix):
                  if r1.endswith(suffix):
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
  
                      if r1.endswith("\xE1"):
                          word = "".join((word[:-1], "a"))
@@ -2484,18 +3051,18 @@ class HungarianStemmer(_LanguageSpecificStemmer):
                      word = suffix_replace(word, suffix, "e")
                      r1 = suffix_replace(r1, suffix, "e")
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
                  break
  
          # STEP 5: Remove factive case
          for suffix in self.__step5_suffixes:
              if r1.endswith(suffix):
                  for double_cons in self.__double_consonants:
-                    if word[-1-len(double_cons):-1] == double_cons:
+                    if word[-1 - len(double_cons) : -1] == double_cons:
                          word = "".join((word[:-3], word[-2]))
  
-                        if r1[-1-len(double_cons):-1] == double_cons:
+                        if r1[-1 - len(double_cons) : -1] == double_cons:
                              r1 = "".join((r1[:-3], r1[-2]))
                          break
  
@@ -2506,49 +3073,58 @@ class HungarianStemmer(_LanguageSpecificStemmer):
                      word = suffix_replace(word, suffix, "a")
                      r1 = suffix_replace(r1, suffix, "a")
  
-                elif suffix in ("\xE9k\xE9", "\xE9\xE9i",
-                                "\xE9\xE9"):
+                elif suffix in ("\xE9k\xE9", "\xE9\xE9i", "\xE9\xE9"):
                      word = suffix_replace(word, suffix, "e")
                      r1 = suffix_replace(r1, suffix, "e")
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
                  break
  
          # STEP 7: Remove singular owner suffixes
          for suffix in self.__step7_suffixes:
              if word.endswith(suffix):
                  if r1.endswith(suffix):
-                    if suffix in ("\xE1nk", "\xE1juk", "\xE1m",
-                                  "\xE1d", "\xE1"):
+                    if suffix in ("\xE1nk", "\xE1juk", "\xE1m", "\xE1d", "\xE1"):
                          word = suffix_replace(word, suffix, "a")
                          r1 = suffix_replace(r1, suffix, "a")
  
-                    elif suffix in ("\xE9nk", "\xE9j\xFCk",
-                                    "\xE9m", "\xE9d", "\xE9"):
+                    elif suffix in ("\xE9nk", "\xE9j\xFCk", "\xE9m", "\xE9d", "\xE9"):
                          word = suffix_replace(word, suffix, "e")
                          r1 = suffix_replace(r1, suffix, "e")
                      else:
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
                  break
  
          # STEP 8: Remove plural owner suffixes
          for suffix in self.__step8_suffixes:
              if word.endswith(suffix):
                  if r1.endswith(suffix):
-                    if suffix in ("\xE1im", "\xE1id", "\xE1i",
-                                  "\xE1ink", "\xE1itok", "\xE1ik"):
+                    if suffix in (
+                        "\xE1im",
+                        "\xE1id",
+                        "\xE1i",
+                        "\xE1ink",
+                        "\xE1itok",
+                        "\xE1ik",
+                    ):
                          word = suffix_replace(word, suffix, "a")
                          r1 = suffix_replace(r1, suffix, "a")
  
-                    elif suffix in ("\xE9im", "\xE9id", "\xE9i",
-                                    "\xE9ink", "\xE9itek", "\xE9ik"):
+                    elif suffix in (
+                        "\xE9im",
+                        "\xE9id",
+                        "\xE9i",
+                        "\xE9ink",
+                        "\xE9itek",
+                        "\xE9ik",
+                    ):
                          word = suffix_replace(word, suffix, "e")
                          r1 = suffix_replace(r1, suffix, "e")
                      else:
-                        word = word[:-len(suffix)]
-                        r1 = r1[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r1 = r1[: -len(suffix)]
                  break
  
          # STEP 9: Remove plural suffixes
@@ -2560,14 +3136,11 @@ class HungarianStemmer(_LanguageSpecificStemmer):
                      elif suffix == "\xE9k":
                          word = suffix_replace(word, suffix, "e")
                      else:
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
                  break
  
-
          return word
  
-
-
      def __r1_hungarian(self, word, vowels, digraphs):
          """
          Return the region R1 that is used by the Hungarian stemmer.
@@ -2597,23 +3170,22 @@ class HungarianStemmer(_LanguageSpecificStemmer):
          if word[0] in vowels:
              for digraph in digraphs:
                  if digraph in word[1:]:
-                    r1 = word[word.index(digraph[-1])+1:]
+                    r1 = word[word.index(digraph[-1]) + 1 :]
                      return r1
  
              for i in range(1, len(word)):
                  if word[i] not in vowels:
-                    r1 = word[i+1:]
+                    r1 = word[i + 1 :]
                      break
          else:
              for i in range(1, len(word)):
                  if word[i] in vowels:
-                    r1 = word[i+1:]
+                    r1 = word[i + 1 :]
                      break
  
          return r1
  
  
-
  class ItalianStemmer(_StandardStemmer):
  
      """
@@ -2634,42 +3206,187 @@ class ItalianStemmer(_StandardStemmer):
      """
  
      __vowels = "aeiou\xE0\xE8\xEC\xF2\xF9"
-    __step0_suffixes = ('gliela', 'gliele', 'glieli', 'glielo',
-                        'gliene', 'sene', 'mela', 'mele', 'meli',
-                        'melo', 'mene', 'tela', 'tele', 'teli',
-                        'telo', 'tene', 'cela', 'cele', 'celi',
-                        'celo', 'cene', 'vela', 'vele', 'veli',
-                        'velo', 'vene', 'gli', 'ci', 'la', 'le',
-                        'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi')
-    __step1_suffixes = ('atrice', 'atrici', 'azione', 'azioni',
-                        'uzione', 'uzioni', 'usione', 'usioni',
-                        'amento', 'amenti', 'imento', 'imenti',
-                        'amente', 'abile', 'abili', 'ibile', 'ibili',
-                        'mente', 'atore', 'atori', 'logia', 'logie',
-                        'anza', 'anze', 'iche', 'ichi', 'ismo',
-                        'ismi', 'ista', 'iste', 'isti', 'ist\xE0',
-                        'ist\xE8', 'ist\xEC', 'ante', 'anti',
-                        'enza', 'enze', 'ico', 'ici', 'ica', 'ice',
-                        'oso', 'osi', 'osa', 'ose', 'it\xE0',
-                        'ivo', 'ivi', 'iva', 'ive')
-    __step2_suffixes = ('erebbero', 'irebbero', 'assero', 'assimo',
-                        'eranno', 'erebbe', 'eremmo', 'ereste',
-                        'eresti', 'essero', 'iranno', 'irebbe',
-                        'iremmo', 'ireste', 'iresti', 'iscano',
-                        'iscono', 'issero', 'arono', 'avamo', 'avano',
-                        'avate', 'eremo', 'erete', 'erono', 'evamo',
-                        'evano', 'evate', 'iremo', 'irete', 'irono',
-                        'ivamo', 'ivano', 'ivate', 'ammo', 'ando',
-                        'asse', 'assi', 'emmo', 'enda', 'ende',
-                        'endi', 'endo', 'erai', 'erei', 'Yamo',
-                        'iamo', 'immo', 'irai', 'irei', 'isca',
-                        'isce', 'isci', 'isco', 'ano', 'are', 'ata',
-                        'ate', 'ati', 'ato', 'ava', 'avi', 'avo',
-                        'er\xE0', 'ere', 'er\xF2', 'ete', 'eva',
-                        'evi', 'evo', 'ir\xE0', 'ire', 'ir\xF2',
-                        'ita', 'ite', 'iti', 'ito', 'iva', 'ivi',
-                        'ivo', 'ono', 'uta', 'ute', 'uti', 'uto',
-                        'ar', 'ir')
+    __step0_suffixes = (
+        "gliela",
+        "gliele",
+        "glieli",
+        "glielo",
+        "gliene",
+        "sene",
+        "mela",
+        "mele",
+        "meli",
+        "melo",
+        "mene",
+        "tela",
+        "tele",
+        "teli",
+        "telo",
+        "tene",
+        "cela",
+        "cele",
+        "celi",
+        "celo",
+        "cene",
+        "vela",
+        "vele",
+        "veli",
+        "velo",
+        "vene",
+        "gli",
+        "ci",
+        "la",
+        "le",
+        "li",
+        "lo",
+        "mi",
+        "ne",
+        "si",
+        "ti",
+        "vi",
+    )
+    __step1_suffixes = (
+        "atrice",
+        "atrici",
+        "azione",
+        "azioni",
+        "uzione",
+        "uzioni",
+        "usione",
+        "usioni",
+        "amento",
+        "amenti",
+        "imento",
+        "imenti",
+        "amente",
+        "abile",
+        "abili",
+        "ibile",
+        "ibili",
+        "mente",
+        "atore",
+        "atori",
+        "logia",
+        "logie",
+        "anza",
+        "anze",
+        "iche",
+        "ichi",
+        "ismo",
+        "ismi",
+        "ista",
+        "iste",
+        "isti",
+        "ist\xE0",
+        "ist\xE8",
+        "ist\xEC",
+        "ante",
+        "anti",
+        "enza",
+        "enze",
+        "ico",
+        "ici",
+        "ica",
+        "ice",
+        "oso",
+        "osi",
+        "osa",
+        "ose",
+        "it\xE0",
+        "ivo",
+        "ivi",
+        "iva",
+        "ive",
+    )
+    __step2_suffixes = (
+        "erebbero",
+        "irebbero",
+        "assero",
+        "assimo",
+        "eranno",
+        "erebbe",
+        "eremmo",
+        "ereste",
+        "eresti",
+        "essero",
+        "iranno",
+        "irebbe",
+        "iremmo",
+        "ireste",
+        "iresti",
+        "iscano",
+        "iscono",
+        "issero",
+        "arono",
+        "avamo",
+        "avano",
+        "avate",
+        "eremo",
+        "erete",
+        "erono",
+        "evamo",
+        "evano",
+        "evate",
+        "iremo",
+        "irete",
+        "irono",
+        "ivamo",
+        "ivano",
+        "ivate",
+        "ammo",
+        "ando",
+        "asse",
+        "assi",
+        "emmo",
+        "enda",
+        "ende",
+        "endi",
+        "endo",
+        "erai",
+        "erei",
+        "Yamo",
+        "iamo",
+        "immo",
+        "irai",
+        "irei",
+        "isca",
+        "isce",
+        "isci",
+        "isco",
+        "ano",
+        "are",
+        "ata",
+        "ate",
+        "ati",
+        "ato",
+        "ava",
+        "avi",
+        "avo",
+        "er\xE0",
+        "ere",
+        "er\xF2",
+        "ete",
+        "eva",
+        "evi",
+        "evo",
+        "ir\xE0",
+        "ire",
+        "ir\xF2",
+        "ita",
+        "ite",
+        "iti",
+        "ito",
+        "iva",
+        "ivi",
+        "ivo",
+        "ono",
+        "uta",
+        "ute",
+        "uti",
+        "uto",
+        "ar",
+        "ir",
+    )
  
      def stem(self, word):
          """
@@ -2689,27 +3406,29 @@ class ItalianStemmer(_StandardStemmer):
          step1_success = False
  
          # All acute accents are replaced by grave accents.
-        word = (word.replace("\xE1", "\xE0")
-                    .replace("\xE9", "\xE8")
-                    .replace("\xED", "\xEC")
-                    .replace("\xF3", "\xF2")
-                    .replace("\xFA", "\xF9"))
+        word = (
+            word.replace("\xE1", "\xE0")
+            .replace("\xE9", "\xE8")
+            .replace("\xED", "\xEC")
+            .replace("\xF3", "\xF2")
+            .replace("\xFA", "\xF9")
+        )
  
          # Every occurrence of 'u' after 'q'
          # is put into upper case.
          for i in range(1, len(word)):
-            if word[i-1] == "q" and word[i] == "u":
-                word = "".join((word[:i], "U", word[i+1:]))
+            if word[i - 1] == "q" and word[i] == "u":
+                word = "".join((word[:i], "U", word[i + 1 :]))
  
          # Every occurrence of 'u' and 'i'
          # between vowels is put into upper case.
-        for i in range(1, len(word)-1):
-            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+        for i in range(1, len(word) - 1):
+            if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels:
                  if word[i] == "u":
-                    word = "".join((word[:i], "U", word[i+1:]))
+                    word = "".join((word[:i], "U", word[i + 1 :]))
  
-                elif word [i] == "i":
-                    word = "".join((word[:i], "I", word[i+1:]))
+                elif word[i] == "i":
+                    word = "".join((word[:i], "I", word[i + 1 :]))
  
          r1, r2 = self._r1r2_standard(word, self.__vowels)
          rv = self._rv_standard(word, self.__vowels)
@@ -2717,14 +3436,13 @@ class ItalianStemmer(_StandardStemmer):
          # STEP 0: Attached pronoun
          for suffix in self.__step0_suffixes:
              if rv.endswith(suffix):
-                if rv[-len(suffix)-4:-len(suffix)] in ("ando", "endo"):
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
-
-                elif (rv[-len(suffix)-2:-len(suffix)] in
-                      ("ar", "er", "ir")):
+                if rv[-len(suffix) - 4 : -len(suffix)] in ("ando", "endo"):
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
+
+                elif rv[-len(suffix) - 2 : -len(suffix)] in ("ar", "er", "ir"):
                      word = suffix_replace(word, suffix, "e")
                      r1 = suffix_replace(r1, suffix, "e")
                      r2 = suffix_replace(r2, suffix, "e")
@@ -2753,13 +3471,13 @@ class ItalianStemmer(_StandardStemmer):
                          word = word[:-2]
                          rv = rv[:-2]
  
-                    elif r2 .endswith("abil"):
+                    elif r2.endswith("abil"):
                          word = word[:-4]
                          rv = rv[:-4]
  
-                elif (suffix in ("amento", "amenti",
-                                 "imento", "imenti") and
-                      rv.endswith(suffix)):
+                elif suffix in ("amento", "amenti", "imento", "imenti") and rv.endswith(
+                    suffix
+                ):
                      step1_success = True
                      word = word[:-6]
                      rv = rv[:-6]
@@ -2767,9 +3485,9 @@ class ItalianStemmer(_StandardStemmer):
                  elif r2.endswith(suffix):
                      step1_success = True
                      if suffix in ("azione", "azioni", "atore", "atori"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
  
                          if r2.endswith("ic"):
                              word = word[:-2]
@@ -2779,8 +3497,7 @@ class ItalianStemmer(_StandardStemmer):
                          word = word[:-2]
                          rv = word[:-2]
  
-                    elif suffix in ("uzione", "uzioni",
-                                    "usione", "usioni"):
+                    elif suffix in ("uzione", "uzioni", "usione", "usioni"):
                          word = word[:-5]
                          rv = rv[:-5]
  
@@ -2815,21 +3532,20 @@ class ItalianStemmer(_StandardStemmer):
                                  word = word[:-2]
                                  rv = rv[:-2]
                      else:
-                        word = word[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
                  break
  
          # STEP 2: Verb suffixes
          if not step1_success:
              for suffix in self.__step2_suffixes:
                  if rv.endswith(suffix):
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      break
  
          # STEP 3a
-        if rv.endswith(("a", "e", "i", "o", "\xE0", "\xE8",
-                        "\xEC", "\xF2")):
+        if rv.endswith(("a", "e", "i", "o", "\xE0", "\xE8", "\xEC", "\xF2")):
              word = word[:-1]
              rv = rv[:-1]
  
@@ -2843,11 +3559,9 @@ class ItalianStemmer(_StandardStemmer):
  
          word = word.replace("I", "i").replace("U", "u")
  
-
          return word
  
  
-
  class NorwegianStemmer(_ScandinavianStemmer):
  
      """
@@ -2871,16 +3585,53 @@ class NorwegianStemmer(_ScandinavianStemmer):
  
      __vowels = "aeiouy\xE6\xE5\xF8"
      __s_ending = "bcdfghjlmnoprtvyz"
-    __step1_suffixes = ("hetenes", "hetene", "hetens", "heter",
-                        "heten", "endes", "ande", "ende", "edes",
-                        "enes", "erte", "ede", "ane", "ene", "ens",
-                        "ers", "ets", "het", "ast", "ert", "en",
-                        "ar", "er", "as", "es", "et", "a", "e", "s")
+    __step1_suffixes = (
+        "hetenes",
+        "hetene",
+        "hetens",
+        "heter",
+        "heten",
+        "endes",
+        "ande",
+        "ende",
+        "edes",
+        "enes",
+        "erte",
+        "ede",
+        "ane",
+        "ene",
+        "ens",
+        "ers",
+        "ets",
+        "het",
+        "ast",
+        "ert",
+        "en",
+        "ar",
+        "er",
+        "as",
+        "es",
+        "et",
+        "a",
+        "e",
+        "s",
+    )
  
      __step2_suffixes = ("dt", "vt")
  
-    __step3_suffixes = ("hetslov", "eleg", "elig", "elov", "slov",
-                          "leg", "eig", "lig", "els", "lov", "ig")
+    __step3_suffixes = (
+        "hetslov",
+        "eleg",
+        "elig",
+        "elov",
+        "slov",
+        "leg",
+        "eig",
+        "lig",
+        "els",
+        "lov",
+        "ig",
+    )
  
      def stem(self, word):
          """
@@ -2907,13 +3658,14 @@ class NorwegianStemmer(_ScandinavianStemmer):
                      r1 = suffix_replace(r1, suffix, "er")
  
                  elif suffix == "s":
-                    if (word[-2] in self.__s_ending or
-                        (word[-2] == "k" and word[-3] not in self.__vowels)):
+                    if word[-2] in self.__s_ending or (
+                        word[-2] == "k" and word[-3] not in self.__vowels
+                    ):
                          word = word[:-1]
                          r1 = r1[:-1]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
                  break
  
          # STEP 2
@@ -2926,14 +3678,12 @@ class NorwegianStemmer(_ScandinavianStemmer):
          # STEP 3
          for suffix in self.__step3_suffixes:
              if r1.endswith(suffix):
-                word = word[:-len(suffix)]
+                word = word[: -len(suffix)]
                  break
  
-
          return word
  
  
-
  class PortugueseStemmer(_StandardStemmer):
  
      """
@@ -2954,46 +3704,178 @@ class PortugueseStemmer(_StandardStemmer):
      """
  
      __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4"
-    __step1_suffixes = ('amentos', 'imentos', 'uço~es', 'amento',
-                        'imento', 'adoras', 'adores', 'a\xE7o~es',
-                        'logias', '\xEAncias', 'amente',
-                        'idades', 'an\xE7as', 'ismos', 'istas', 'adora',
-                        'a\xE7a~o', 'antes', '\xE2ncia',
-                        'logia', 'uça~o', '\xEAncia',
-                        'mente', 'idade', 'an\xE7a', 'ezas', 'icos', 'icas',
-                        'ismo', '\xE1vel', '\xEDvel', 'ista',
-                        'osos', 'osas', 'ador', 'ante', 'ivas',
-                        'ivos', 'iras', 'eza', 'ico', 'ica',
-                        'oso', 'osa', 'iva', 'ivo', 'ira')
-    __step2_suffixes = ('ar\xEDamos', 'er\xEDamos', 'ir\xEDamos',
-                        '\xE1ssemos', '\xEAssemos', '\xEDssemos',
-                        'ar\xEDeis', 'er\xEDeis', 'ir\xEDeis',
-                        '\xE1sseis', '\xE9sseis', '\xEDsseis',
-                        '\xE1ramos', '\xE9ramos', '\xEDramos',
-                        '\xE1vamos', 'aremos', 'eremos', 'iremos',
-                        'ariam', 'eriam', 'iriam', 'assem', 'essem',
-                        'issem', 'ara~o', 'era~o', 'ira~o', 'arias',
-                        'erias', 'irias', 'ardes', 'erdes', 'irdes',
-                        'asses', 'esses', 'isses', 'astes', 'estes',
-                        'istes', '\xE1reis', 'areis', '\xE9reis',
-                        'ereis', '\xEDreis', 'ireis', '\xE1veis',
-                        '\xEDamos', 'armos', 'ermos', 'irmos',
-                        'aria', 'eria', 'iria', 'asse', 'esse',
-                        'isse', 'aste', 'este', 'iste', 'arei',
-                        'erei', 'irei', 'aram', 'eram', 'iram',
-                        'avam', 'arem', 'erem', 'irem',
-                        'ando', 'endo', 'indo', 'adas', 'idas',
-                        'ar\xE1s', 'aras', 'er\xE1s', 'eras',
-                        'ir\xE1s', 'avas', 'ares', 'eres', 'ires',
-                        '\xEDeis', 'ados', 'idos', '\xE1mos',
-                        'amos', 'emos', 'imos', 'iras', 'ada', 'ida',
-                        'ar\xE1', 'ara', 'er\xE1', 'era',
-                        'ir\xE1', 'ava', 'iam', 'ado', 'ido',
-                        'ias', 'ais', 'eis', 'ira', 'ia', 'ei', 'am',
-                        'em', 'ar', 'er', 'ir', 'as',
-                        'es', 'is', 'eu', 'iu', 'ou')
-    __step4_suffixes = ("os", "a", "i", "o", "\xE1",
-                        "\xED", "\xF3")
+    __step1_suffixes = (
+        "amentos",
+        "imentos",
+        "uço~es",
+        "amento",
+        "imento",
+        "adoras",
+        "adores",
+        "a\xE7o~es",
+        "logias",
+        "\xEAncias",
+        "amente",
+        "idades",
+        "an\xE7as",
+        "ismos",
+        "istas",
+        "adora",
+        "a\xE7a~o",
+        "antes",
+        "\xE2ncia",
+        "logia",
+        "uça~o",
+        "\xEAncia",
+        "mente",
+        "idade",
+        "an\xE7a",
+        "ezas",
+        "icos",
+        "icas",
+        "ismo",
+        "\xE1vel",
+        "\xEDvel",
+        "ista",
+        "osos",
+        "osas",
+        "ador",
+        "ante",
+        "ivas",
+        "ivos",
+        "iras",
+        "eza",
+        "ico",
+        "ica",
+        "oso",
+        "osa",
+        "iva",
+        "ivo",
+        "ira",
+    )
+    __step2_suffixes = (
+        "ar\xEDamos",
+        "er\xEDamos",
+        "ir\xEDamos",
+        "\xE1ssemos",
+        "\xEAssemos",
+        "\xEDssemos",
+        "ar\xEDeis",
+        "er\xEDeis",
+        "ir\xEDeis",
+        "\xE1sseis",
+        "\xE9sseis",
+        "\xEDsseis",
+        "\xE1ramos",
+        "\xE9ramos",
+        "\xEDramos",
+        "\xE1vamos",
+        "aremos",
+        "eremos",
+        "iremos",
+        "ariam",
+        "eriam",
+        "iriam",
+        "assem",
+        "essem",
+        "issem",
+        "ara~o",
+        "era~o",
+        "ira~o",
+        "arias",
+        "erias",
+        "irias",
+        "ardes",
+        "erdes",
+        "irdes",
+        "asses",
+        "esses",
+        "isses",
+        "astes",
+        "estes",
+        "istes",
+        "\xE1reis",
+        "areis",
+        "\xE9reis",
+        "ereis",
+        "\xEDreis",
+        "ireis",
+        "\xE1veis",
+        "\xEDamos",
+        "armos",
+        "ermos",
+        "irmos",
+        "aria",
+        "eria",
+        "iria",
+        "asse",
+        "esse",
+        "isse",
+        "aste",
+        "este",
+        "iste",
+        "arei",
+        "erei",
+        "irei",
+        "aram",
+        "eram",
+        "iram",
+        "avam",
+        "arem",
+        "erem",
+        "irem",
+        "ando",
+        "endo",
+        "indo",
+        "adas",
+        "idas",
+        "ar\xE1s",
+        "aras",
+        "er\xE1s",
+        "eras",
+        "ir\xE1s",
+        "avas",
+        "ares",
+        "eres",
+        "ires",
+        "\xEDeis",
+        "ados",
+        "idos",
+        "\xE1mos",
+        "amos",
+        "emos",
+        "imos",
+        "iras",
+        "ada",
+        "ida",
+        "ar\xE1",
+        "ara",
+        "er\xE1",
+        "era",
+        "ir\xE1",
+        "ava",
+        "iam",
+        "ado",
+        "ido",
+        "ias",
+        "ais",
+        "eis",
+        "ira",
+        "ia",
+        "ei",
+        "am",
+        "em",
+        "ar",
+        "er",
+        "ir",
+        "as",
+        "es",
+        "is",
+        "eu",
+        "iu",
+        "ou",
+    )
+    __step4_suffixes = ("os", "a", "i", "o", "\xE1", "\xED", "\xF3")
  
      def stem(self, word):
          """
@@ -3013,10 +3895,12 @@ class PortugueseStemmer(_StandardStemmer):
          step1_success = False
          step2_success = False
  
-        word = (word.replace("\xE3", "a~")
-                    .replace("\xF5", "o~")
-                    .replace("q\xFC", "qu")
-                    .replace("g\xFC", "gu"))
+        word = (
+            word.replace("\xE3", "a~")
+            .replace("\xF5", "o~")
+            .replace("q\xFC", "qu")
+            .replace("g\xFC", "gu")
+        )
  
          r1, r2 = self._r1r2_standard(word, self.__vowels)
          rv = self._rv_standard(word, self.__vowels)
@@ -3044,8 +3928,11 @@ class PortugueseStemmer(_StandardStemmer):
                          word = word[:-2]
                          rv = rv[:-2]
  
-                elif (suffix in ("ira", "iras") and rv.endswith(suffix) and
-                      word[-len(suffix)-1:-len(suffix)] == "e"):
+                elif (
+                    suffix in ("ira", "iras")
+                    and rv.endswith(suffix)
+                    and word[-len(suffix) - 1 : -len(suffix)] == "e"
+                ):
                      step1_success = True
  
                      word = suffix_replace(word, suffix, "ir")
@@ -3076,9 +3963,9 @@ class PortugueseStemmer(_StandardStemmer):
                              rv = rv[:-4]
  
                      elif suffix in ("idade", "idades"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
  
                          if r2.endswith(("ic", "iv")):
                              word = word[:-2]
@@ -3089,16 +3976,16 @@ class PortugueseStemmer(_StandardStemmer):
                              rv = rv[:-4]
  
                      elif suffix in ("iva", "ivo", "ivas", "ivos"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
  
                          if r2.endswith("at"):
                              word = word[:-2]
                              rv = rv[:-2]
                      else:
-                        word = word[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
                  break
  
          # STEP 2: Verb suffixes
@@ -3107,8 +3994,8 @@ class PortugueseStemmer(_StandardStemmer):
                  if rv.endswith(suffix):
                      step2_success = True
  
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      break
  
          # STEP 3
@@ -3121,8 +4008,8 @@ class PortugueseStemmer(_StandardStemmer):
          if not step1_success and not step2_success:
              for suffix in self.__step4_suffixes:
                  if rv.endswith(suffix):
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      break
  
          # STEP 5
@@ -3130,8 +4017,9 @@ class PortugueseStemmer(_StandardStemmer):
              word = word[:-1]
              rv = rv[:-1]
  
-            if ((word.endswith("gu") and rv.endswith("u")) or
-                (word.endswith("ci") and rv.endswith("i"))):
+            if (word.endswith("gu") and rv.endswith("u")) or (
+                word.endswith("ci") and rv.endswith("i")
+            ):
                  word = word[:-1]
  
          elif word.endswith("\xE7"):
@@ -3139,11 +4027,9 @@ class PortugueseStemmer(_StandardStemmer):
  
          word = word.replace("a~", "\xE3").replace("o~", "\xF5")
  
-
          return word
  
  
-
  class RomanianStemmer(_StandardStemmer):
  
      """
@@ -3166,64 +4052,232 @@ class RomanianStemmer(_StandardStemmer):
      """
  
      __vowels = "aeiou\u0103\xE2\xEE"
-    __step0_suffixes = ('iilor', 'ului', 'elor', 'iile', 'ilor',
-                        'atei', 'a\u0163ie', 'a\u0163ia', 'aua',
-                        'ele', 'iua', 'iei', 'ile', 'ul', 'ea',
-                        'ii')
-    __step1_suffixes = ('abilitate', 'abilitati', 'abilit\u0103\u0163i',
-                        'ibilitate', 'abilit\u0103i', 'ivitate',
-                        'ivitati', 'ivit\u0103\u0163i', 'icitate',
-                        'icitati', 'icit\u0103\u0163i', 'icatori',
-                        'ivit\u0103i', 'icit\u0103i', 'icator',
-                        'a\u0163iune', 'atoare', '\u0103toare',
-                        'i\u0163iune', 'itoare', 'iciva', 'icive',
-                        'icivi', 'iciv\u0103', 'icala', 'icale',
-                        'icali', 'ical\u0103', 'ativa', 'ative',
-                        'ativi', 'ativ\u0103', 'atori', '\u0103tori',
-                        'itiva', 'itive', 'itivi', 'itiv\u0103',
-                        'itori', 'iciv', 'ical', 'ativ', 'ator',
-                        '\u0103tor', 'itiv', 'itor')
-    __step2_suffixes = ('abila', 'abile', 'abili', 'abil\u0103',
-                        'ibila', 'ibile', 'ibili', 'ibil\u0103',
-                        'atori', 'itate', 'itati', 'it\u0103\u0163i',
-                        'abil', 'ibil', 'oasa', 'oas\u0103', 'oase',
-                        'anta', 'ante', 'anti', 'ant\u0103', 'ator',
-                        'it\u0103i', 'iune', 'iuni', 'isme', 'ista',
-                        'iste', 'isti', 'ist\u0103', 'i\u015Fti',
-                        'ata', 'at\u0103', 'ati', 'ate', 'uta',
-                        'ut\u0103', 'uti', 'ute', 'ita', 'it\u0103',
-                        'iti', 'ite', 'ica', 'ice', 'ici', 'ic\u0103',
-                        'osi', 'o\u015Fi', 'ant', 'iva', 'ive', 'ivi',
-                        'iv\u0103', 'ism', 'ist', 'at', 'ut', 'it',
-                        'ic', 'os', 'iv')
-    __step3_suffixes = ('seser\u0103\u0163i', 'aser\u0103\u0163i',
-                        'iser\u0103\u0163i', '\xE2ser\u0103\u0163i',
-                        'user\u0103\u0163i', 'seser\u0103m',
-                        'aser\u0103m', 'iser\u0103m', '\xE2ser\u0103m',
-                        'user\u0103m', 'ser\u0103\u0163i', 'sese\u015Fi',
-                        'seser\u0103', 'easc\u0103', 'ar\u0103\u0163i',
-                        'ur\u0103\u0163i', 'ir\u0103\u0163i',
-                        '\xE2r\u0103\u0163i', 'ase\u015Fi',
-                        'aser\u0103', 'ise\u015Fi', 'iser\u0103',
-                        '\xe2se\u015Fi', '\xE2ser\u0103',
-                        'use\u015Fi', 'user\u0103', 'ser\u0103m',
-                        'sesem', 'indu', '\xE2ndu', 'eaz\u0103',
-                        'e\u015Fti', 'e\u015Fte', '\u0103\u015Fti',
-                        '\u0103\u015Fte', 'ea\u0163i', 'ia\u0163i',
-                        'ar\u0103m', 'ur\u0103m', 'ir\u0103m',
-                        '\xE2r\u0103m', 'asem', 'isem',
-                        '\xE2sem', 'usem', 'se\u015Fi', 'ser\u0103',
-                        'sese', 'are', 'ere', 'ire', '\xE2re',
-                        'ind', '\xE2nd', 'eze', 'ezi', 'esc',
-                        '\u0103sc', 'eam', 'eai', 'eau', 'iam',
-                        'iai', 'iau', 'a\u015Fi', 'ar\u0103',
-                        'u\u015Fi', 'ur\u0103', 'i\u015Fi', 'ir\u0103',
-                        '\xE2\u015Fi', '\xe2r\u0103', 'ase',
-                        'ise', '\xE2se', 'use', 'a\u0163i',
-                        'e\u0163i', 'i\u0163i', '\xe2\u0163i', 'sei',
-                        'ez', 'am', 'ai', 'au', 'ea', 'ia', 'ui',
-                        '\xE2i', '\u0103m', 'em', 'im', '\xE2m',
-                        'se')
+    __step0_suffixes = (
+        "iilor",
+        "ului",
+        "elor",
+        "iile",
+        "ilor",
+        "atei",
+        "a\u0163ie",
+        "a\u0163ia",
+        "aua",
+        "ele",
+        "iua",
+        "iei",
+        "ile",
+        "ul",
+        "ea",
+        "ii",
+    )
+    __step1_suffixes = (
+        "abilitate",
+        "abilitati",
+        "abilit\u0103\u0163i",
+        "ibilitate",
+        "abilit\u0103i",
+        "ivitate",
+        "ivitati",
+        "ivit\u0103\u0163i",
+        "icitate",
+        "icitati",
+        "icit\u0103\u0163i",
+        "icatori",
+        "ivit\u0103i",
+        "icit\u0103i",
+        "icator",
+        "a\u0163iune",
+        "atoare",
+        "\u0103toare",
+        "i\u0163iune",
+        "itoare",
+        "iciva",
+        "icive",
+        "icivi",
+        "iciv\u0103",
+        "icala",
+        "icale",
+        "icali",
+        "ical\u0103",
+        "ativa",
+        "ative",
+        "ativi",
+        "ativ\u0103",
+        "atori",
+        "\u0103tori",
+        "itiva",
+        "itive",
+        "itivi",
+        "itiv\u0103",
+        "itori",
+        "iciv",
+        "ical",
+        "ativ",
+        "ator",
+        "\u0103tor",
+        "itiv",
+        "itor",
+    )
+    __step2_suffixes = (
+        "abila",
+        "abile",
+        "abili",
+        "abil\u0103",
+        "ibila",
+        "ibile",
+        "ibili",
+        "ibil\u0103",
+        "atori",
+        "itate",
+        "itati",
+        "it\u0103\u0163i",
+        "abil",
+        "ibil",
+        "oasa",
+        "oas\u0103",
+        "oase",
+        "anta",
+        "ante",
+        "anti",
+        "ant\u0103",
+        "ator",
+        "it\u0103i",
+        "iune",
+        "iuni",
+        "isme",
+        "ista",
+        "iste",
+        "isti",
+        "ist\u0103",
+        "i\u015Fti",
+        "ata",
+        "at\u0103",
+        "ati",
+        "ate",
+        "uta",
+        "ut\u0103",
+        "uti",
+        "ute",
+        "ita",
+        "it\u0103",
+        "iti",
+        "ite",
+        "ica",
+        "ice",
+        "ici",
+        "ic\u0103",
+        "osi",
+        "o\u015Fi",
+        "ant",
+        "iva",
+        "ive",
+        "ivi",
+        "iv\u0103",
+        "ism",
+        "ist",
+        "at",
+        "ut",
+        "it",
+        "ic",
+        "os",
+        "iv",
+    )
+    __step3_suffixes = (
+        "seser\u0103\u0163i",
+        "aser\u0103\u0163i",
+        "iser\u0103\u0163i",
+        "\xE2ser\u0103\u0163i",
+        "user\u0103\u0163i",
+        "seser\u0103m",
+        "aser\u0103m",
+        "iser\u0103m",
+        "\xE2ser\u0103m",
+        "user\u0103m",
+        "ser\u0103\u0163i",
+        "sese\u015Fi",
+        "seser\u0103",
+        "easc\u0103",
+        "ar\u0103\u0163i",
+        "ur\u0103\u0163i",
+        "ir\u0103\u0163i",
+        "\xE2r\u0103\u0163i",
+        "ase\u015Fi",
+        "aser\u0103",
+        "ise\u015Fi",
+        "iser\u0103",
+        "\xe2se\u015Fi",
+        "\xE2ser\u0103",
+        "use\u015Fi",
+        "user\u0103",
+        "ser\u0103m",
+        "sesem",
+        "indu",
+        "\xE2ndu",
+        "eaz\u0103",
+        "e\u015Fti",
+        "e\u015Fte",
+        "\u0103\u015Fti",
+        "\u0103\u015Fte",
+        "ea\u0163i",
+        "ia\u0163i",
+        "ar\u0103m",
+        "ur\u0103m",
+        "ir\u0103m",
+        "\xE2r\u0103m",
+        "asem",
+        "isem",
+        "\xE2sem",
+        "usem",
+        "se\u015Fi",
+        "ser\u0103",
+        "sese",
+        "are",
+        "ere",
+        "ire",
+        "\xE2re",
+        "ind",
+        "\xE2nd",
+        "eze",
+        "ezi",
+        "esc",
+        "\u0103sc",
+        "eam",
+        "eai",
+        "eau",
+        "iam",
+        "iai",
+        "iau",
+        "a\u015Fi",
+        "ar\u0103",
+        "u\u015Fi",
+        "ur\u0103",
+        "i\u015Fi",
+        "ir\u0103",
+        "\xE2\u015Fi",
+        "\xe2r\u0103",
+        "ase",
+        "ise",
+        "\xE2se",
+        "use",
+        "a\u0163i",
+        "e\u0163i",
+        "i\u0163i",
+        "\xe2\u0163i",
+        "sei",
+        "ez",
+        "am",
+        "ai",
+        "au",
+        "ea",
+        "ia",
+        "ui",
+        "\xE2i",
+        "\u0103m",
+        "em",
+        "im",
+        "\xE2m",
+        "se",
+    )
  
      def stem(self, word):
          """
@@ -3243,13 +4297,13 @@ class RomanianStemmer(_StandardStemmer):
          step1_success = False
          step2_success = False
  
-        for i in range(1, len(word)-1):
-            if word[i-1] in self.__vowels and word[i+1] in self.__vowels:
+        for i in range(1, len(word) - 1):
+            if word[i - 1] in self.__vowels and word[i + 1] in self.__vowels:
                  if word[i] == "u":
-                    word = "".join((word[:i], "U", word[i+1:]))
+                    word = "".join((word[:i], "U", word[i + 1 :]))
  
                  elif word[i] == "i":
-                    word = "".join((word[:i], "I", word[i+1:]))
+                    word = "".join((word[:i], "I", word[i + 1 :]))
  
          r1, r2 = self._r1r2_standard(word, self.__vowels)
          rv = self._rv_standard(word, self.__vowels)
@@ -3259,15 +4313,18 @@ class RomanianStemmer(_StandardStemmer):
              if word.endswith(suffix):
                  if suffix in r1:
                      if suffix in ("ul", "ului"):
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
  
                          if suffix in rv:
-                            rv = rv[:-len(suffix)]
+                            rv = rv[: -len(suffix)]
                          else:
                              rv = ""
  
-                    elif (suffix == "aua" or suffix == "atei" or
-                          (suffix == "ile" and word[-5:-3] != "ab")):
+                    elif (
+                        suffix == "aua"
+                        or suffix == "atei"
+                        or (suffix == "ile" and word[-5:-3] != "ab")
+                    ):
                          word = word[:-2]
  
                      elif suffix in ("ea", "ele", "elor"):
@@ -3278,8 +4335,7 @@ class RomanianStemmer(_StandardStemmer):
                          else:
                              rv = ""
  
-                    elif suffix in ("ii", "iua", "iei",
-                                    "iile", "iilor", "ilor"):
+                    elif suffix in ("ii", "iua", "iei", "iile", "iilor", "ilor"):
                          word = suffix_replace(word, suffix, "i")
  
                          if suffix in rv:
@@ -3302,40 +4358,75 @@ class RomanianStemmer(_StandardStemmer):
                          step1_success = True
                          replacement_done = True
  
-                        if suffix in ("abilitate", "abilitati",
-                                      "abilit\u0103i",
-                                      "abilit\u0103\u0163i"):
+                        if suffix in (
+                            "abilitate",
+                            "abilitati",
+                            "abilit\u0103i",
+                            "abilit\u0103\u0163i",
+                        ):
                              word = suffix_replace(word, suffix, "abil")
  
                          elif suffix == "ibilitate":
                              word = word[:-5]
  
-                        elif suffix in ("ivitate", "ivitati",
-                                        "ivit\u0103i",
-                                        "ivit\u0103\u0163i"):
+                        elif suffix in (
+                            "ivitate",
+                            "ivitati",
+                            "ivit\u0103i",
+                            "ivit\u0103\u0163i",
+                        ):
                              word = suffix_replace(word, suffix, "iv")
  
-                        elif suffix in ("icitate", "icitati", "icit\u0103i",
-                                        "icit\u0103\u0163i", "icator",
-                                        "icatori", "iciv", "iciva",
-                                        "icive", "icivi", "iciv\u0103",
-                                        "ical", "icala", "icale", "icali",
-                                        "ical\u0103"):
+                        elif suffix in (
+                            "icitate",
+                            "icitati",
+                            "icit\u0103i",
+                            "icit\u0103\u0163i",
+                            "icator",
+                            "icatori",
+                            "iciv",
+                            "iciva",
+                            "icive",
+                            "icivi",
+                            "iciv\u0103",
+                            "ical",
+                            "icala",
+                            "icale",
+                            "icali",
+                            "ical\u0103",
+                        ):
                              word = suffix_replace(word, suffix, "ic")
  
-                        elif suffix in ("ativ", "ativa", "ative", "ativi",
-                                        "ativ\u0103", "a\u0163iune",
-                                        "atoare", "ator", "atori",
-                                        "\u0103toare",
-                                        "\u0103tor", "\u0103tori"):
+                        elif suffix in (
+                            "ativ",
+                            "ativa",
+                            "ative",
+                            "ativi",
+                            "ativ\u0103",
+                            "a\u0163iune",
+                            "atoare",
+                            "ator",
+                            "atori",
+                            "\u0103toare",
+                            "\u0103tor",
+                            "\u0103tori",
+                        ):
                              word = suffix_replace(word, suffix, "at")
  
                              if suffix in r2:
                                  r2 = suffix_replace(r2, suffix, "at")
  
-                        elif suffix in ("itiv", "itiva", "itive", "itivi",
-                                        "itiv\u0103", "i\u0163iune",
-                                        "itoare", "itor", "itori"):
+                        elif suffix in (
+                            "itiv",
+                            "itiva",
+                            "itive",
+                            "itivi",
+                            "itiv\u0103",
+                            "i\u0163iune",
+                            "itoare",
+                            "itor",
+                            "itori",
+                        ):
                              word = suffix_replace(word, suffix, "it")
  
                              if suffix in r2:
@@ -3357,12 +4448,20 @@ class RomanianStemmer(_StandardStemmer):
                          if word[-5] == "\u0163":
                              word = "".join((word[:-5], "t"))
  
-                    elif suffix in ("ism", "isme", "ist", "ista", "iste",
-                                    "isti", "ist\u0103", "i\u015Fti"):
+                    elif suffix in (
+                        "ism",
+                        "isme",
+                        "ist",
+                        "ista",
+                        "iste",
+                        "isti",
+                        "ist\u0103",
+                        "i\u015Fti",
+                    ):
                          word = suffix_replace(word, suffix, "ist")
  
                      else:
-                        word = word[:-len(suffix)]
+                        word = word[: -len(suffix)]
                  break
  
          # STEP 3: Removal of verb suffixes
@@ -3370,36 +4469,50 @@ class RomanianStemmer(_StandardStemmer):
              for suffix in self.__step3_suffixes:
                  if word.endswith(suffix):
                      if suffix in rv:
-                        if suffix in ('seser\u0103\u0163i', 'seser\u0103m',
-                                      'ser\u0103\u0163i', 'sese\u015Fi',
-                                      'seser\u0103', 'ser\u0103m', 'sesem',
-                                      'se\u015Fi', 'ser\u0103', 'sese',
-                                      'a\u0163i', 'e\u0163i', 'i\u0163i',
-                                      '\xE2\u0163i', 'sei', '\u0103m',
-                                      'em', 'im', '\xE2m', 'se'):
-                            word = word[:-len(suffix)]
-                            rv = rv[:-len(suffix)]
+                        if suffix in (
+                            "seser\u0103\u0163i",
+                            "seser\u0103m",
+                            "ser\u0103\u0163i",
+                            "sese\u015Fi",
+                            "seser\u0103",
+                            "ser\u0103m",
+                            "sesem",
+                            "se\u015Fi",
+                            "ser\u0103",
+                            "sese",
+                            "a\u0163i",
+                            "e\u0163i",
+                            "i\u0163i",
+                            "\xE2\u0163i",
+                            "sei",
+                            "\u0103m",
+                            "em",
+                            "im",
+                            "\xE2m",
+                            "se",
+                        ):
+                            word = word[: -len(suffix)]
+                            rv = rv[: -len(suffix)]
                          else:
-                            if (not rv.startswith(suffix) and
-                                rv[rv.index(suffix)-1] not in
-                                "aeio\u0103\xE2\xEE"):
-                                word = word[:-len(suffix)]
+                            if (
+                                not rv.startswith(suffix)
+                                and rv[rv.index(suffix) - 1] not in "aeio\u0103\xE2\xEE"
+                            ):
+                                word = word[: -len(suffix)]
                          break
  
          # STEP 4: Removal of final vowel
          for suffix in ("ie", "a", "e", "i", "\u0103"):
              if word.endswith(suffix):
                  if suffix in rv:
-                    word = word[:-len(suffix)]
+                    word = word[: -len(suffix)]
                  break
  
          word = word.replace("I", "i").replace("U", "u")
  
-
          return word
  
  
-
  class RussianStemmer(_LanguageSpecificStemmer):
  
      """
@@ -3425,95 +4538,340 @@ class RussianStemmer(_LanguageSpecificStemmer):
  
      """
  
-    __perfective_gerund_suffixes = ("ivshis'", "yvshis'", "vshis'",
-                                      "ivshi", "yvshi", "vshi", "iv",
-                                      "yv", "v")
-    __adjectival_suffixes = ('ui^ushchi^ui^u', 'ui^ushchi^ai^a',
-                               'ui^ushchimi', 'ui^ushchymi', 'ui^ushchego',
-                               'ui^ushchogo', 'ui^ushchemu', 'ui^ushchomu',
-                               'ui^ushchikh', 'ui^ushchykh',
-                               'ui^ushchui^u', 'ui^ushchaia',
-                               'ui^ushchoi^u', 'ui^ushchei^u',
-                               'i^ushchi^ui^u', 'i^ushchi^ai^a',
-                               'ui^ushchee', 'ui^ushchie',
-                               'ui^ushchye', 'ui^ushchoe', 'ui^ushchei`',
-                               'ui^ushchii`', 'ui^ushchyi`',
-                               'ui^ushchoi`', 'ui^ushchem', 'ui^ushchim',
-                               'ui^ushchym', 'ui^ushchom', 'i^ushchimi',
-                               'i^ushchymi', 'i^ushchego', 'i^ushchogo',
-                               'i^ushchemu', 'i^ushchomu', 'i^ushchikh',
-                               'i^ushchykh', 'i^ushchui^u', 'i^ushchai^a',
-                               'i^ushchoi^u', 'i^ushchei^u', 'i^ushchee',
-                               'i^ushchie', 'i^ushchye', 'i^ushchoe',
-                               'i^ushchei`', 'i^ushchii`',
-                               'i^ushchyi`', 'i^ushchoi`', 'i^ushchem',
-                               'i^ushchim', 'i^ushchym', 'i^ushchom',
-                               'shchi^ui^u', 'shchi^ai^a', 'ivshi^ui^u',
-                               'ivshi^ai^a', 'yvshi^ui^u', 'yvshi^ai^a',
-                               'shchimi', 'shchymi', 'shchego', 'shchogo',
-                               'shchemu', 'shchomu', 'shchikh', 'shchykh',
-                               'shchui^u', 'shchai^a', 'shchoi^u',
-                               'shchei^u', 'ivshimi', 'ivshymi',
-                               'ivshego', 'ivshogo', 'ivshemu', 'ivshomu',
-                               'ivshikh', 'ivshykh', 'ivshui^u',
-                               'ivshai^a', 'ivshoi^u', 'ivshei^u',
-                               'yvshimi', 'yvshymi', 'yvshego', 'yvshogo',
-                               'yvshemu', 'yvshomu', 'yvshikh', 'yvshykh',
-                               'yvshui^u', 'yvshai^a', 'yvshoi^u',
-                               'yvshei^u', 'vshi^ui^u', 'vshi^ai^a',
-                               'shchee', 'shchie', 'shchye', 'shchoe',
-                               'shchei`', 'shchii`', 'shchyi`', 'shchoi`',
-                               'shchem', 'shchim', 'shchym', 'shchom',
-                               'ivshee', 'ivshie', 'ivshye', 'ivshoe',
-                               'ivshei`', 'ivshii`', 'ivshyi`',
-                               'ivshoi`', 'ivshem', 'ivshim', 'ivshym',
-                               'ivshom', 'yvshee', 'yvshie', 'yvshye',
-                               'yvshoe', 'yvshei`', 'yvshii`',
-                               'yvshyi`', 'yvshoi`', 'yvshem',
-                               'yvshim', 'yvshym', 'yvshom', 'vshimi',
-                               'vshymi', 'vshego', 'vshogo', 'vshemu',
-                               'vshomu', 'vshikh', 'vshykh', 'vshui^u',
-                               'vshai^a', 'vshoi^u', 'vshei^u',
-                               'emi^ui^u', 'emi^ai^a', 'nni^ui^u',
-                               'nni^ai^a', 'vshee',
-                               'vshie', 'vshye', 'vshoe', 'vshei`',
-                               'vshii`', 'vshyi`', 'vshoi`',
-                               'vshem', 'vshim', 'vshym', 'vshom',
-                               'emimi', 'emymi', 'emego', 'emogo',
-                               'ememu', 'emomu', 'emikh', 'emykh',
-                               'emui^u', 'emai^a', 'emoi^u', 'emei^u',
-                               'nnimi', 'nnymi', 'nnego', 'nnogo',
-                               'nnemu', 'nnomu', 'nnikh', 'nnykh',
-                               'nnui^u', 'nnai^a', 'nnoi^u', 'nnei^u',
-                               'emee', 'emie', 'emye', 'emoe',
-                               'emei`', 'emii`', 'emyi`',
-                               'emoi`', 'emem', 'emim', 'emym',
-                               'emom', 'nnee', 'nnie', 'nnye', 'nnoe',
-                               'nnei`', 'nnii`', 'nnyi`',
-                               'nnoi`', 'nnem', 'nnim', 'nnym',
-                               'nnom', 'i^ui^u', 'i^ai^a', 'imi', 'ymi',
-                               'ego', 'ogo', 'emu', 'omu', 'ikh',
-                               'ykh', 'ui^u', 'ai^a', 'oi^u', 'ei^u',
-                               'ee', 'ie', 'ye', 'oe', 'ei`',
-                               'ii`', 'yi`', 'oi`', 'em',
-                               'im', 'ym', 'om')
+    __perfective_gerund_suffixes = (
+        "ivshis'",
+        "yvshis'",
+        "vshis'",
+        "ivshi",
+        "yvshi",
+        "vshi",
+        "iv",
+        "yv",
+        "v",
+    )
+    __adjectival_suffixes = (
+        "ui^ushchi^ui^u",
+        "ui^ushchi^ai^a",
+        "ui^ushchimi",
+        "ui^ushchymi",
+        "ui^ushchego",
+        "ui^ushchogo",
+        "ui^ushchemu",
+        "ui^ushchomu",
+        "ui^ushchikh",
+        "ui^ushchykh",
+        "ui^ushchui^u",
+        "ui^ushchaia",
+        "ui^ushchoi^u",
+        "ui^ushchei^u",
+        "i^ushchi^ui^u",
+        "i^ushchi^ai^a",
+        "ui^ushchee",
+        "ui^ushchie",
+        "ui^ushchye",
+        "ui^ushchoe",
+        "ui^ushchei`",
+        "ui^ushchii`",
+        "ui^ushchyi`",
+        "ui^ushchoi`",
+        "ui^ushchem",
+        "ui^ushchim",
+        "ui^ushchym",
+        "ui^ushchom",
+        "i^ushchimi",
+        "i^ushchymi",
+        "i^ushchego",
+        "i^ushchogo",
+        "i^ushchemu",
+        "i^ushchomu",
+        "i^ushchikh",
+        "i^ushchykh",
+        "i^ushchui^u",
+        "i^ushchai^a",
+        "i^ushchoi^u",
+        "i^ushchei^u",
+        "i^ushchee",
+        "i^ushchie",
+        "i^ushchye",
+        "i^ushchoe",
+        "i^ushchei`",
+        "i^ushchii`",
+        "i^ushchyi`",
+        "i^ushchoi`",
+        "i^ushchem",
+        "i^ushchim",
+        "i^ushchym",
+        "i^ushchom",
+        "shchi^ui^u",
+        "shchi^ai^a",
+        "ivshi^ui^u",
+        "ivshi^ai^a",
+        "yvshi^ui^u",
+        "yvshi^ai^a",
+        "shchimi",
+        "shchymi",
+        "shchego",
+        "shchogo",
+        "shchemu",
+        "shchomu",
+        "shchikh",
+        "shchykh",
+        "shchui^u",
+        "shchai^a",
+        "shchoi^u",
+        "shchei^u",
+        "ivshimi",
+        "ivshymi",
+        "ivshego",
+        "ivshogo",
+        "ivshemu",
+        "ivshomu",
+        "ivshikh",
+        "ivshykh",
+        "ivshui^u",
+        "ivshai^a",
+        "ivshoi^u",
+        "ivshei^u",
+        "yvshimi",
+        "yvshymi",
+        "yvshego",
+        "yvshogo",
+        "yvshemu",
+        "yvshomu",
+        "yvshikh",
+        "yvshykh",
+        "yvshui^u",
+        "yvshai^a",
+        "yvshoi^u",
+        "yvshei^u",
+        "vshi^ui^u",
+        "vshi^ai^a",
+        "shchee",
+        "shchie",
+        "shchye",
+        "shchoe",
+        "shchei`",
+        "shchii`",
+        "shchyi`",
+        "shchoi`",
+        "shchem",
+        "shchim",
+        "shchym",
+        "shchom",
+        "ivshee",
+        "ivshie",
+        "ivshye",
+        "ivshoe",
+        "ivshei`",
+        "ivshii`",
+        "ivshyi`",
+        "ivshoi`",
+        "ivshem",
+        "ivshim",
+        "ivshym",
+        "ivshom",
+        "yvshee",
+        "yvshie",
+        "yvshye",
+        "yvshoe",
+        "yvshei`",
+        "yvshii`",
+        "yvshyi`",
+        "yvshoi`",
+        "yvshem",
+        "yvshim",
+        "yvshym",
+        "yvshom",
+        "vshimi",
+        "vshymi",
+        "vshego",
+        "vshogo",
+        "vshemu",
+        "vshomu",
+        "vshikh",
+        "vshykh",
+        "vshui^u",
+        "vshai^a",
+        "vshoi^u",
+        "vshei^u",
+        "emi^ui^u",
+        "emi^ai^a",
+        "nni^ui^u",
+        "nni^ai^a",
+        "vshee",
+        "vshie",
+        "vshye",
+        "vshoe",
+        "vshei`",
+        "vshii`",
+        "vshyi`",
+        "vshoi`",
+        "vshem",
+        "vshim",
+        "vshym",
+        "vshom",
+        "emimi",
+        "emymi",
+        "emego",
+        "emogo",
+        "ememu",
+        "emomu",
+        "emikh",
+        "emykh",
+        "emui^u",
+        "emai^a",
+        "emoi^u",
+        "emei^u",
+        "nnimi",
+        "nnymi",
+        "nnego",
+        "nnogo",
+        "nnemu",
+        "nnomu",
+        "nnikh",
+        "nnykh",
+        "nnui^u",
+        "nnai^a",
+        "nnoi^u",
+        "nnei^u",
+        "emee",
+        "emie",
+        "emye",
+        "emoe",
+        "emei`",
+        "emii`",
+        "emyi`",
+        "emoi`",
+        "emem",
+        "emim",
+        "emym",
+        "emom",
+        "nnee",
+        "nnie",
+        "nnye",
+        "nnoe",
+        "nnei`",
+        "nnii`",
+        "nnyi`",
+        "nnoi`",
+        "nnem",
+        "nnim",
+        "nnym",
+        "nnom",
+        "i^ui^u",
+        "i^ai^a",
+        "imi",
+        "ymi",
+        "ego",
+        "ogo",
+        "emu",
+        "omu",
+        "ikh",
+        "ykh",
+        "ui^u",
+        "ai^a",
+        "oi^u",
+        "ei^u",
+        "ee",
+        "ie",
+        "ye",
+        "oe",
+        "ei`",
+        "ii`",
+        "yi`",
+        "oi`",
+        "em",
+        "im",
+        "ym",
+        "om",
+    )
      __reflexive_suffixes = ("si^a", "s'")
-    __verb_suffixes = ("esh'", 'ei`te', 'ui`te', 'ui^ut',
-                         "ish'", 'ete', 'i`te', 'i^ut', 'nno',
-                         'ila', 'yla', 'ena', 'ite', 'ili', 'yli',
-                         'ilo', 'ylo', 'eno', 'i^at', 'uet', 'eny',
-                         "it'", "yt'", 'ui^u', 'la', 'na', 'li',
-                         'em', 'lo', 'no', 'et', 'ny', "t'",
-                         'ei`', 'ui`', 'il', 'yl', 'im',
-                         'ym', 'en', 'it', 'yt', 'i^u', 'i`',
-                         'l', 'n')
-    __noun_suffixes = ('ii^ami', 'ii^akh', 'i^ami', 'ii^am', 'i^akh',
-                         'ami', 'iei`', 'i^am', 'iem', 'akh',
-                         'ii^u', "'i^u", 'ii^a', "'i^a", 'ev', 'ov',
-                         'ie', "'e", 'ei', 'ii', 'ei`',
-                         'oi`', 'ii`', 'em', 'am', 'om',
-                         'i^u', 'i^a', 'a', 'e', 'i', 'i`',
-                         'o', 'u', 'y', "'")
+    __verb_suffixes = (
+        "esh'",
+        "ei`te",
+        "ui`te",
+        "ui^ut",
+        "ish'",
+        "ete",
+        "i`te",
+        "i^ut",
+        "nno",
+        "ila",
+        "yla",
+        "ena",
+        "ite",
+        "ili",
+        "yli",
+        "ilo",
+        "ylo",
+        "eno",
+        "i^at",
+        "uet",
+        "eny",
+        "it'",
+        "yt'",
+        "ui^u",
+        "la",
+        "na",
+        "li",
+        "em",
+        "lo",
+        "no",
+        "et",
+        "ny",
+        "t'",
+        "ei`",
+        "ui`",
+        "il",
+        "yl",
+        "im",
+        "ym",
+        "en",
+        "it",
+        "yt",
+        "i^u",
+        "i`",
+        "l",
+        "n",
+    )
+    __noun_suffixes = (
+        "ii^ami",
+        "ii^akh",
+        "i^ami",
+        "ii^am",
+        "i^akh",
+        "ami",
+        "iei`",
+        "i^am",
+        "iem",
+        "akh",
+        "ii^u",
+        "'i^u",
+        "ii^a",
+        "'i^a",
+        "ev",
+        "ov",
+        "ie",
+        "'e",
+        "ei",
+        "ii",
+        "ei`",
+        "oi`",
+        "ii`",
+        "em",
+        "am",
+        "om",
+        "i^u",
+        "i^a",
+        "a",
+        "e",
+        "i",
+        "i`",
+        "o",
+        "u",
+        "y",
+        "'",
+    )
      __superlative_suffixes = ("ei`she", "ei`sh")
      __derivational_suffixes = ("ost'", "ost")
  
@@ -3536,8 +4894,10 @@ class RussianStemmer(_LanguageSpecificStemmer):
                  chr_exceeded = True
                  break
  
-        if chr_exceeded:
-            word = self.__cyrillic_to_roman(word)
+        if not chr_exceeded:
+            return word
+
+        word = self.__cyrillic_to_roman(word)
  
          step1_success = False
          adjectival_removed = False
@@ -3551,106 +4911,224 @@ class RussianStemmer(_LanguageSpecificStemmer):
          for suffix in self.__perfective_gerund_suffixes:
              if rv.endswith(suffix):
                  if suffix in ("v", "vshi", "vshis'"):
-                    if (rv[-len(suffix)-3:-len(suffix)] == "i^a" or
-                        rv[-len(suffix)-1:-len(suffix)] == "a"):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                    if (
+                        rv[-len(suffix) - 3 : -len(suffix)] == "i^a"
+                        or rv[-len(suffix) - 1 : -len(suffix)] == "a"
+                    ):
+                        word = word[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
                          step1_success = True
                          break
                  else:
-                    word = word[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      step1_success = True
                      break
  
          if not step1_success:
              for suffix in self.__reflexive_suffixes:
                  if rv.endswith(suffix):
-                    word = word[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      break
  
              for suffix in self.__adjectival_suffixes:
                  if rv.endswith(suffix):
-                    if suffix in ('i^ushchi^ui^u', 'i^ushchi^ai^a',
-                              'i^ushchui^u', 'i^ushchai^a', 'i^ushchoi^u',
-                              'i^ushchei^u', 'i^ushchimi', 'i^ushchymi',
-                              'i^ushchego', 'i^ushchogo', 'i^ushchemu',
-                              'i^ushchomu', 'i^ushchikh', 'i^ushchykh',
-                              'shchi^ui^u', 'shchi^ai^a', 'i^ushchee',
-                              'i^ushchie', 'i^ushchye', 'i^ushchoe',
-                              'i^ushchei`', 'i^ushchii`', 'i^ushchyi`',
-                              'i^ushchoi`', 'i^ushchem', 'i^ushchim',
-                              'i^ushchym', 'i^ushchom', 'vshi^ui^u',
-                              'vshi^ai^a', 'shchui^u', 'shchai^a',
-                              'shchoi^u', 'shchei^u', 'emi^ui^u',
-                              'emi^ai^a', 'nni^ui^u', 'nni^ai^a',
-                              'shchimi', 'shchymi', 'shchego', 'shchogo',
-                              'shchemu', 'shchomu', 'shchikh', 'shchykh',
-                              'vshui^u', 'vshai^a', 'vshoi^u', 'vshei^u',
-                              'shchee', 'shchie', 'shchye', 'shchoe',
-                              'shchei`', 'shchii`', 'shchyi`', 'shchoi`',
-                              'shchem', 'shchim', 'shchym', 'shchom',
-                              'vshimi', 'vshymi', 'vshego', 'vshogo',
-                              'vshemu', 'vshomu', 'vshikh', 'vshykh',
-                              'emui^u', 'emai^a', 'emoi^u', 'emei^u',
-                              'nnui^u', 'nnai^a', 'nnoi^u', 'nnei^u',
-                              'vshee', 'vshie', 'vshye', 'vshoe',
-                              'vshei`', 'vshii`', 'vshyi`', 'vshoi`',
-                              'vshem', 'vshim', 'vshym', 'vshom',
-                              'emimi', 'emymi', 'emego', 'emogo',
-                              'ememu', 'emomu', 'emikh', 'emykh',
-                              'nnimi', 'nnymi', 'nnego', 'nnogo',
-                              'nnemu', 'nnomu', 'nnikh', 'nnykh',
-                              'emee', 'emie', 'emye', 'emoe', 'emei`',
-                              'emii`', 'emyi`', 'emoi`', 'emem', 'emim',
-                              'emym', 'emom', 'nnee', 'nnie', 'nnye',
-                              'nnoe', 'nnei`', 'nnii`', 'nnyi`', 'nnoi`',
-                              'nnem', 'nnim', 'nnym', 'nnom'):
-                        if (rv[-len(suffix)-3:-len(suffix)] == "i^a" or
-                            rv[-len(suffix)-1:-len(suffix)] == "a"):
-                            word = word[:-len(suffix)]
-                            r2 = r2[:-len(suffix)]
-                            rv = rv[:-len(suffix)]
+                    if suffix in (
+                        "i^ushchi^ui^u",
+                        "i^ushchi^ai^a",
+                        "i^ushchui^u",
+                        "i^ushchai^a",
+                        "i^ushchoi^u",
+                        "i^ushchei^u",
+                        "i^ushchimi",
+                        "i^ushchymi",
+                        "i^ushchego",
+                        "i^ushchogo",
+                        "i^ushchemu",
+                        "i^ushchomu",
+                        "i^ushchikh",
+                        "i^ushchykh",
+                        "shchi^ui^u",
+                        "shchi^ai^a",
+                        "i^ushchee",
+                        "i^ushchie",
+                        "i^ushchye",
+                        "i^ushchoe",
+                        "i^ushchei`",
+                        "i^ushchii`",
+                        "i^ushchyi`",
+                        "i^ushchoi`",
+                        "i^ushchem",
+                        "i^ushchim",
+                        "i^ushchym",
+                        "i^ushchom",
+                        "vshi^ui^u",
+                        "vshi^ai^a",
+                        "shchui^u",
+                        "shchai^a",
+                        "shchoi^u",
+                        "shchei^u",
+                        "emi^ui^u",
+                        "emi^ai^a",
+                        "nni^ui^u",
+                        "nni^ai^a",
+                        "shchimi",
+                        "shchymi",
+                        "shchego",
+                        "shchogo",
+                        "shchemu",
+                        "shchomu",
+                        "shchikh",
+                        "shchykh",
+                        "vshui^u",
+                        "vshai^a",
+                        "vshoi^u",
+                        "vshei^u",
+                        "shchee",
+                        "shchie",
+                        "shchye",
+                        "shchoe",
+                        "shchei`",
+                        "shchii`",
+                        "shchyi`",
+                        "shchoi`",
+                        "shchem",
+                        "shchim",
+                        "shchym",
+                        "shchom",
+                        "vshimi",
+                        "vshymi",
+                        "vshego",
+                        "vshogo",
+                        "vshemu",
+                        "vshomu",
+                        "vshikh",
+                        "vshykh",
+                        "emui^u",
+                        "emai^a",
+                        "emoi^u",
+                        "emei^u",
+                        "nnui^u",
+                        "nnai^a",
+                        "nnoi^u",
+                        "nnei^u",
+                        "vshee",
+                        "vshie",
+                        "vshye",
+                        "vshoe",
+                        "vshei`",
+                        "vshii`",
+                        "vshyi`",
+                        "vshoi`",
+                        "vshem",
+                        "vshim",
+                        "vshym",
+                        "vshom",
+                        "emimi",
+                        "emymi",
+                        "emego",
+                        "emogo",
+                        "ememu",
+                        "emomu",
+                        "emikh",
+                        "emykh",
+                        "nnimi",
+                        "nnymi",
+                        "nnego",
+                        "nnogo",
+                        "nnemu",
+                        "nnomu",
+                        "nnikh",
+                        "nnykh",
+                        "emee",
+                        "emie",
+                        "emye",
+                        "emoe",
+                        "emei`",
+                        "emii`",
+                        "emyi`",
+                        "emoi`",
+                        "emem",
+                        "emim",
+                        "emym",
+                        "emom",
+                        "nnee",
+                        "nnie",
+                        "nnye",
+                        "nnoe",
+                        "nnei`",
+                        "nnii`",
+                        "nnyi`",
+                        "nnoi`",
+                        "nnem",
+                        "nnim",
+                        "nnym",
+                        "nnom",
+                    ):
+                        if (
+                            rv[-len(suffix) - 3 : -len(suffix)] == "i^a"
+                            or rv[-len(suffix) - 1 : -len(suffix)] == "a"
+                        ):
+                            word = word[: -len(suffix)]
+                            r2 = r2[: -len(suffix)]
+                            rv = rv[: -len(suffix)]
                              adjectival_removed = True
                              break
                      else:
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
                          adjectival_removed = True
                          break
  
              if not adjectival_removed:
                  for suffix in self.__verb_suffixes:
                      if rv.endswith(suffix):
-                        if suffix in ("la", "na", "ete", "i`te", "li",
-                                      "i`", "l", "em", "n", "lo", "no",
-                                      "et", "i^ut", "ny", "t'", "esh'",
-                                      "nno"):
-                            if (rv[-len(suffix)-3:-len(suffix)] == "i^a" or
-                                rv[-len(suffix)-1:-len(suffix)] == "a"):
-                                word = word[:-len(suffix)]
-                                r2 = r2[:-len(suffix)]
-                                rv = rv[:-len(suffix)]
+                        if suffix in (
+                            "la",
+                            "na",
+                            "ete",
+                            "i`te",
+                            "li",
+                            "i`",
+                            "l",
+                            "em",
+                            "n",
+                            "lo",
+                            "no",
+                            "et",
+                            "i^ut",
+                            "ny",
+                            "t'",
+                            "esh'",
+                            "nno",
+                        ):
+                            if (
+                                rv[-len(suffix) - 3 : -len(suffix)] == "i^a"
+                                or rv[-len(suffix) - 1 : -len(suffix)] == "a"
+                            ):
+                                word = word[: -len(suffix)]
+                                r2 = r2[: -len(suffix)]
+                                rv = rv[: -len(suffix)]
                                  verb_removed = True
                                  break
                          else:
-                            word = word[:-len(suffix)]
-                            r2 = r2[:-len(suffix)]
-                            rv = rv[:-len(suffix)]
+                            word = word[: -len(suffix)]
+                            r2 = r2[: -len(suffix)]
+                            rv = rv[: -len(suffix)]
                              verb_removed = True
                              break
  
              if not adjectival_removed and not verb_removed:
                  for suffix in self.__noun_suffixes:
                      if rv.endswith(suffix):
-                        word = word[:-len(suffix)]
-                        r2 = r2[:-len(suffix)]
-                        rv = rv[:-len(suffix)]
+                        word = word[: -len(suffix)]
+                        r2 = r2[: -len(suffix)]
+                        rv = rv[: -len(suffix)]
                          break
  
          # Step 2
@@ -3661,7 +5139,7 @@ class RussianStemmer(_LanguageSpecificStemmer):
          # Step 3
          for suffix in self.__derivational_suffixes:
              if r2.endswith(suffix):
-                word = word[:-len(suffix)]
+                word = word[: -len(suffix)]
                  break
  
          # Step 4
@@ -3672,7 +5150,7 @@ class RussianStemmer(_LanguageSpecificStemmer):
          if not undouble_success:
              for suffix in self.__superlative_suffixes:
                  if word.endswith(suffix):
-                    word = word[:-len(suffix)]
+                    word = word[: -len(suffix)]
                      superlative_removed = True
                      break
              if word.endswith("nn"):
@@ -3682,14 +5160,10 @@ class RussianStemmer(_LanguageSpecificStemmer):
              if word.endswith("'"):
                  word = word[:-1]
  
-        if chr_exceeded:
-            word = self.__roman_to_cyrillic(word)
-
+        word = self.__roman_to_cyrillic(word)
  
          return word
  
-
-
      def __regions_russian(self, word):
          """
          Return the regions RV and R2 which are used by the Russian stemmer.
@@ -3716,37 +5190,28 @@ class RussianStemmer(_LanguageSpecificStemmer):
          rv = ""
  
          vowels = ("A", "U", "E", "a", "e", "i", "o", "u", "y")
-        word = (word.replace("i^a", "A")
-                    .replace("i^u", "U")
-                    .replace("e`", "E"))
+        word = word.replace("i^a", "A").replace("i^u", "U").replace("e`", "E")
  
          for i in range(1, len(word)):
-            if word[i] not in vowels and word[i-1] in vowels:
-                r1 = word[i+1:]
+            if word[i] not in vowels and word[i - 1] in vowels:
+                r1 = word[i + 1 :]
                  break
  
          for i in range(1, len(r1)):
-            if r1[i] not in vowels and r1[i-1] in vowels:
-                r2 = r1[i+1:]
+            if r1[i] not in vowels and r1[i - 1] in vowels:
+                r2 = r1[i + 1 :]
                  break
  
          for i in range(len(word)):
              if word[i] in vowels:
-                rv = word[i+1:]
+                rv = word[i + 1 :]
                  break
  
-        r2 = (r2.replace("A", "i^a")
-                .replace("U", "i^u")
-                .replace("E", "e`"))
-        rv = (rv.replace("A", "i^a")
-              .replace("U", "i^u")
-              .replace("E", "e`"))
-
+        r2 = r2.replace("A", "i^a").replace("U", "i^u").replace("E", "e`")
+        rv = rv.replace("A", "i^a").replace("U", "i^u").replace("E", "e`")
  
          return (rv, r2)
  
-
-
      def __cyrillic_to_roman(self, word):
          """
          Transliterate a Russian word into the Roman alphabet.
@@ -3763,45 +5228,77 @@ class RussianStemmer(_LanguageSpecificStemmer):
                 RussianStemmer. It is not to be invoked directly!
  
          """
-        word = (word.replace("\u0410", "a").replace("\u0430", "a")
-                    .replace("\u0411", "b").replace("\u0431", "b")
-                    .replace("\u0412", "v").replace("\u0432", "v")
-                    .replace("\u0413", "g").replace("\u0433", "g")
-                    .replace("\u0414", "d").replace("\u0434", "d")
-                    .replace("\u0415", "e").replace("\u0435", "e")
-                    .replace("\u0401", "e").replace("\u0451", "e")
-                    .replace("\u0416", "zh").replace("\u0436", "zh")
-                    .replace("\u0417", "z").replace("\u0437", "z")
-                    .replace("\u0418", "i").replace("\u0438", "i")
-                    .replace("\u0419", "i`").replace("\u0439", "i`")
-                    .replace("\u041A", "k").replace("\u043A", "k")
-                    .replace("\u041B", "l").replace("\u043B", "l")
-                    .replace("\u041C", "m").replace("\u043C", "m")
-                    .replace("\u041D", "n").replace("\u043D", "n")
-                    .replace("\u041E", "o").replace("\u043E", "o")
-                    .replace("\u041F", "p").replace("\u043F", "p")
-                    .replace("\u0420", "r").replace("\u0440", "r")
-                    .replace("\u0421", "s").replace("\u0441", "s")
-                    .replace("\u0422", "t").replace("\u0442", "t")
-                    .replace("\u0423", "u").replace("\u0443", "u")
-                    .replace("\u0424", "f").replace("\u0444", "f")
-                    .replace("\u0425", "kh").replace("\u0445", "kh")
-                    .replace("\u0426", "t^s").replace("\u0446", "t^s")
-                    .replace("\u0427", "ch").replace("\u0447", "ch")
-                    .replace("\u0428", "sh").replace("\u0448", "sh")
-                    .replace("\u0429", "shch").replace("\u0449", "shch")
-                    .replace("\u042A", "''").replace("\u044A", "''")
-                    .replace("\u042B", "y").replace("\u044B", "y")
-                    .replace("\u042C", "'").replace("\u044C", "'")
-                    .replace("\u042D", "e`").replace("\u044D", "e`")
-                    .replace("\u042E", "i^u").replace("\u044E", "i^u")
-                    .replace("\u042F", "i^a").replace("\u044F", "i^a"))
-
+        word = (
+            word.replace("\u0410", "a")
+            .replace("\u0430", "a")
+            .replace("\u0411", "b")
+            .replace("\u0431", "b")
+            .replace("\u0412", "v")
+            .replace("\u0432", "v")
+            .replace("\u0413", "g")
+            .replace("\u0433", "g")
+            .replace("\u0414", "d")
+            .replace("\u0434", "d")
+            .replace("\u0415", "e")
+            .replace("\u0435", "e")
+            .replace("\u0401", "e")
+            .replace("\u0451", "e")
+            .replace("\u0416", "zh")
+            .replace("\u0436", "zh")
+            .replace("\u0417", "z")
+            .replace("\u0437", "z")
+            .replace("\u0418", "i")
+            .replace("\u0438", "i")
+            .replace("\u0419", "i`")
+            .replace("\u0439", "i`")
+            .replace("\u041A", "k")
+            .replace("\u043A", "k")
+            .replace("\u041B", "l")
+            .replace("\u043B", "l")
+            .replace("\u041C", "m")
+            .replace("\u043C", "m")
+            .replace("\u041D", "n")
+            .replace("\u043D", "n")
+            .replace("\u041E", "o")
+            .replace("\u043E", "o")
+            .replace("\u041F", "p")
+            .replace("\u043F", "p")
+            .replace("\u0420", "r")
+            .replace("\u0440", "r")
+            .replace("\u0421", "s")
+            .replace("\u0441", "s")
+            .replace("\u0422", "t")
+            .replace("\u0442", "t")
+            .replace("\u0423", "u")
+            .replace("\u0443", "u")
+            .replace("\u0424", "f")
+            .replace("\u0444", "f")
+            .replace("\u0425", "kh")
+            .replace("\u0445", "kh")
+            .replace("\u0426", "t^s")
+            .replace("\u0446", "t^s")
+            .replace("\u0427", "ch")
+            .replace("\u0447", "ch")
+            .replace("\u0428", "sh")
+            .replace("\u0448", "sh")
+            .replace("\u0429", "shch")
+            .replace("\u0449", "shch")
+            .replace("\u042A", "''")
+            .replace("\u044A", "''")
+            .replace("\u042B", "y")
+            .replace("\u044B", "y")
+            .replace("\u042C", "'")
+            .replace("\u044C", "'")
+            .replace("\u042D", "e`")
+            .replace("\u044D", "e`")
+            .replace("\u042E", "i^u")
+            .replace("\u044E", "i^u")
+            .replace("\u042F", "i^a")
+            .replace("\u044F", "i^a")
+        )
  
          return word
  
-
-
      def __roman_to_cyrillic(self, word):
          """
          Transliterate a Russian word back into the Cyrillic alphabet.
@@ -3818,24 +5315,41 @@ class RussianStemmer(_LanguageSpecificStemmer):
                 RussianStemmer. It is not to be invoked directly!
  
          """
-        word = (word.replace("i^u", "\u044E").replace("i^a", "\u044F")
-                    .replace("shch", "\u0449").replace("kh", "\u0445")
-                    .replace("t^s", "\u0446").replace("ch", "\u0447")
-                    .replace("e`", "\u044D").replace("i`", "\u0439")
-                    .replace("sh", "\u0448").replace("k", "\u043A")
-                    .replace("e", "\u0435").replace("zh", "\u0436")
-                    .replace("a", "\u0430").replace("b", "\u0431")
-                    .replace("v", "\u0432").replace("g", "\u0433")
-                    .replace("d", "\u0434").replace("e", "\u0435")
-                    .replace("z", "\u0437").replace("i", "\u0438")
-                    .replace("l", "\u043B").replace("m", "\u043C")
-                    .replace("n", "\u043D").replace("o", "\u043E")
-                    .replace("p", "\u043F").replace("r", "\u0440")
-                    .replace("s", "\u0441").replace("t", "\u0442")
-                    .replace("u", "\u0443").replace("f", "\u0444")
-                    .replace("''", "\u044A").replace("y", "\u044B")
-                    .replace("'", "\u044C"))
-
+        word = (
+            word.replace("i^u", "\u044E")
+            .replace("i^a", "\u044F")
+            .replace("shch", "\u0449")
+            .replace("kh", "\u0445")
+            .replace("t^s", "\u0446")
+            .replace("ch", "\u0447")
+            .replace("e`", "\u044D")
+            .replace("i`", "\u0439")
+            .replace("sh", "\u0448")
+            .replace("k", "\u043A")
+            .replace("e", "\u0435")
+            .replace("zh", "\u0436")
+            .replace("a", "\u0430")
+            .replace("b", "\u0431")
+            .replace("v", "\u0432")
+            .replace("g", "\u0433")
+            .replace("d", "\u0434")
+            .replace("e", "\u0435")
+            .replace("z", "\u0437")
+            .replace("i", "\u0438")
+            .replace("l", "\u043B")
+            .replace("m", "\u043C")
+            .replace("n", "\u043D")
+            .replace("o", "\u043E")
+            .replace("p", "\u043F")
+            .replace("r", "\u0440")
+            .replace("s", "\u0441")
+            .replace("t", "\u0442")
+            .replace("u", "\u0443")
+            .replace("f", "\u0444")
+            .replace("''", "\u044A")
+            .replace("y", "\u044B")
+            .replace("'", "\u044C")
+        )
  
          return word
  
@@ -3864,50 +5378,182 @@ class SpanishStemmer(_StandardStemmer):
      """
  
      __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xFC"
-    __step0_suffixes = ("selas", "selos", "sela", "selo", "las",
-                        "les", "los", "nos", "me", "se", "la", "le",
-                        "lo")
-    __step1_suffixes = ('amientos', 'imientos', 'amiento', 'imiento',
-                        'aciones', 'uciones', 'adoras', 'adores',
-                        'ancias', 'log\xEDas', 'encias', 'amente',
-                        'idades', 'anzas', 'ismos', 'ables', 'ibles',
-                        'istas', 'adora', 'aci\xF3n', 'antes',
-                        'ancia', 'log\xEDa', 'uci\xf3n', 'encia',
-                        'mente', 'anza', 'icos', 'icas', 'ismo',
-                        'able', 'ible', 'ista', 'osos', 'osas',
-                        'ador', 'ante', 'idad', 'ivas', 'ivos',
-                        'ico',
-                        'ica', 'oso', 'osa', 'iva', 'ivo')
-    __step2a_suffixes = ('yeron', 'yendo', 'yamos', 'yais', 'yan',
-                         'yen', 'yas', 'yes', 'ya', 'ye', 'yo',
-                         'y\xF3')
-    __step2b_suffixes = ('ar\xEDamos', 'er\xEDamos', 'ir\xEDamos',
-                         'i\xE9ramos', 'i\xE9semos', 'ar\xEDais',
-                         'aremos', 'er\xEDais', 'eremos',
-                         'ir\xEDais', 'iremos', 'ierais', 'ieseis',
-                         'asteis', 'isteis', '\xE1bamos',
-                         '\xE1ramos', '\xE1semos', 'ar\xEDan',
-                         'ar\xEDas', 'ar\xE9is', 'er\xEDan',
-                         'er\xEDas', 'er\xE9is', 'ir\xEDan',
-                         'ir\xEDas', 'ir\xE9is',
-                         'ieran', 'iesen', 'ieron', 'iendo', 'ieras',
-                         'ieses', 'abais', 'arais', 'aseis',
-                         '\xE9amos', 'ar\xE1n', 'ar\xE1s',
-                         'ar\xEDa', 'er\xE1n', 'er\xE1s',
-                         'er\xEDa', 'ir\xE1n', 'ir\xE1s',
-                         'ir\xEDa', 'iera', 'iese', 'aste', 'iste',
-                         'aban', 'aran', 'asen', 'aron', 'ando',
-                         'abas', 'adas', 'idas', 'aras', 'ases',
-                         '\xEDais', 'ados', 'idos', 'amos', 'imos',
-                         'emos', 'ar\xE1', 'ar\xE9', 'er\xE1',
-                         'er\xE9', 'ir\xE1', 'ir\xE9', 'aba',
-                         'ada', 'ida', 'ara', 'ase', '\xEDan',
-                         'ado', 'ido', '\xEDas', '\xE1is',
-                         '\xE9is', '\xEDa', 'ad', 'ed', 'id',
-                         'an', 'i\xF3', 'ar', 'er', 'ir', 'as',
-                         '\xEDs', 'en', 'es')
-    __step3_suffixes = ("os", "a", "e", "o", "\xE1",
-                        "\xE9", "\xED", "\xF3")
+    __step0_suffixes = (
+        "selas",
+        "selos",
+        "sela",
+        "selo",
+        "las",
+        "les",
+        "los",
+        "nos",
+        "me",
+        "se",
+        "la",
+        "le",
+        "lo",
+    )
+    __step1_suffixes = (
+        "amientos",
+        "imientos",
+        "amiento",
+        "imiento",
+        "aciones",
+        "uciones",
+        "adoras",
+        "adores",
+        "ancias",
+        "log\xEDas",
+        "encias",
+        "amente",
+        "idades",
+        "anzas",
+        "ismos",
+        "ables",
+        "ibles",
+        "istas",
+        "adora",
+        "aci\xF3n",
+        "antes",
+        "ancia",
+        "log\xEDa",
+        "uci\xf3n",
+        "encia",
+        "mente",
+        "anza",
+        "icos",
+        "icas",
+        "ismo",
+        "able",
+        "ible",
+        "ista",
+        "osos",
+        "osas",
+        "ador",
+        "ante",
+        "idad",
+        "ivas",
+        "ivos",
+        "ico",
+        "ica",
+        "oso",
+        "osa",
+        "iva",
+        "ivo",
+    )
+    __step2a_suffixes = (
+        "yeron",
+        "yendo",
+        "yamos",
+        "yais",
+        "yan",
+        "yen",
+        "yas",
+        "yes",
+        "ya",
+        "ye",
+        "yo",
+        "y\xF3",
+    )
+    __step2b_suffixes = (
+        "ar\xEDamos",
+        "er\xEDamos",
+        "ir\xEDamos",
+        "i\xE9ramos",
+        "i\xE9semos",
+        "ar\xEDais",
+        "aremos",
+        "er\xEDais",
+        "eremos",
+        "ir\xEDais",
+        "iremos",
+        "ierais",
+        "ieseis",
+        "asteis",
+        "isteis",
+        "\xE1bamos",
+        "\xE1ramos",
+        "\xE1semos",
+        "ar\xEDan",
+        "ar\xEDas",
+        "ar\xE9is",
+        "er\xEDan",
+        "er\xEDas",
+        "er\xE9is",
+        "ir\xEDan",
+        "ir\xEDas",
+        "ir\xE9is",
+        "ieran",
+        "iesen",
+        "ieron",
+        "iendo",
+        "ieras",
+        "ieses",
+        "abais",
+        "arais",
+        "aseis",
+        "\xE9amos",
+        "ar\xE1n",
+        "ar\xE1s",
+        "ar\xEDa",
+        "er\xE1n",
+        "er\xE1s",
+        "er\xEDa",
+        "ir\xE1n",
+        "ir\xE1s",
+        "ir\xEDa",
+        "iera",
+        "iese",
+        "aste",
+        "iste",
+        "aban",
+        "aran",
+        "asen",
+        "aron",
+        "ando",
+        "abas",
+        "adas",
+        "idas",
+        "aras",
+        "ases",
+        "\xEDais",
+        "ados",
+        "idos",
+        "amos",
+        "imos",
+        "emos",
+        "ar\xE1",
+        "ar\xE9",
+        "er\xE1",
+        "er\xE9",
+        "ir\xE1",
+        "ir\xE9",
+        "aba",
+        "ada",
+        "ida",
+        "ara",
+        "ase",
+        "\xEDan",
+        "ado",
+        "ido",
+        "\xEDas",
+        "\xE1is",
+        "\xE9is",
+        "\xEDa",
+        "ad",
+        "ed",
+        "id",
+        "an",
+        "i\xF3",
+        "ar",
+        "er",
+        "ir",
+        "as",
+        "\xEDs",
+        "en",
+        "es",
+    )
+    __step3_suffixes = ("os", "a", "e", "o", "\xE1", "\xE9", "\xED", "\xF3")
  
      def stem(self, word):
          """
@@ -3934,18 +5580,30 @@ class SpanishStemmer(_StandardStemmer):
              if not (word.endswith(suffix) and rv.endswith(suffix)):
                  continue
  
-            if ((rv[:-len(suffix)].endswith(("ando", "\xE1ndo",
-                                             "ar", "\xE1r",
-                                             "er", "\xE9r",
-                                             "iendo", "i\xE9ndo",
-                                             "ir", "\xEDr"))) or
-                (rv[:-len(suffix)].endswith("yendo") and
-                    word[:-len(suffix)].endswith("uyendo"))):
-
-                word = self.__replace_accented(word[:-len(suffix)])
-                r1 = self.__replace_accented(r1[:-len(suffix)])
-                r2 = self.__replace_accented(r2[:-len(suffix)])
-                rv = self.__replace_accented(rv[:-len(suffix)])
+            if (
+                rv[: -len(suffix)].endswith(
+                    (
+                        "ando",
+                        "\xE1ndo",
+                        "ar",
+                        "\xE1r",
+                        "er",
+                        "\xE9r",
+                        "iendo",
+                        "i\xE9ndo",
+                        "ir",
+                        "\xEDr",
+                    )
+                )
+            ) or (
+                rv[: -len(suffix)].endswith("yendo")
+                and word[: -len(suffix)].endswith("uyendo")
+            ):
+
+                word = self.__replace_accented(word[: -len(suffix)])
+                r1 = self.__replace_accented(r1[: -len(suffix)])
+                r2 = self.__replace_accented(r2[: -len(suffix)])
+                rv = self.__replace_accented(rv[: -len(suffix)])
              break
  
          # STEP 1: Standard suffix removal
@@ -3974,12 +5632,21 @@ class SpanishStemmer(_StandardStemmer):
  
              elif r2.endswith(suffix):
                  step1_success = True
-                if suffix in ("adora", "ador", "aci\xF3n", "adoras",
-                              "adores", "aciones", "ante", "antes",
-                              "ancia", "ancias"):
-                    word = word[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                if suffix in (
+                    "adora",
+                    "ador",
+                    "aci\xF3n",
+                    "adoras",
+                    "adores",
+                    "aciones",
+                    "ante",
+                    "antes",
+                    "ancia",
+                    "ancias",
+                ):
+                    word = word[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
  
                      if r2.endswith("ic"):
                          word = word[:-2]
@@ -3998,50 +5665,49 @@ class SpanishStemmer(_StandardStemmer):
                      rv = suffix_replace(rv, suffix, "ente")
  
                  elif suffix == "mente":
-                    word = word[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
  
                      if r2.endswith(("ante", "able", "ible")):
                          word = word[:-4]
                          rv = rv[:-4]
  
                  elif suffix in ("idad", "idades"):
-                    word = word[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
  
                      for pre_suff in ("abil", "ic", "iv"):
                          if r2.endswith(pre_suff):
-                            word = word[:-len(pre_suff)]
-                            rv = rv[:-len(pre_suff)]
+                            word = word[: -len(pre_suff)]
+                            rv = rv[: -len(pre_suff)]
  
                  elif suffix in ("ivo", "iva", "ivos", "ivas"):
-                    word = word[:-len(suffix)]
-                    r2 = r2[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r2 = r2[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      if r2.endswith("at"):
                          word = word[:-2]
                          rv = rv[:-2]
                  else:
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
              break
  
          # STEP 2a: Verb suffixes beginning 'y'
          if not step1_success:
              for suffix in self.__step2a_suffixes:
-                if (rv.endswith(suffix) and
-                        word[-len(suffix)-1:-len(suffix)] == "u"):
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                if rv.endswith(suffix) and word[-len(suffix) - 1 : -len(suffix)] == "u":
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      break
  
-        # STEP 2b: Other verb suffixes
+            # STEP 2b: Other verb suffixes
              for suffix in self.__step2b_suffixes:
                  if rv.endswith(suffix):
-                    word = word[:-len(suffix)]
-                    rv = rv[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    rv = rv[: -len(suffix)]
                      if suffix in ("en", "es", "\xE9is", "emos"):
                          if word.endswith("gu"):
                              word = word[:-1]
@@ -4053,9 +5719,9 @@ class SpanishStemmer(_StandardStemmer):
          # STEP 3: Residual suffix
          for suffix in self.__step3_suffixes:
              if rv.endswith(suffix):
-                word = word[:-len(suffix)]
+                word = word[: -len(suffix)]
                  if suffix in ("e", "\xE9"):
-                    rv = rv[:-len(suffix)]
+                    rv = rv[: -len(suffix)]
  
                      if word[-2:] == "gu" and rv.endswith("u"):
                          word = word[:-1]
@@ -4076,11 +5742,13 @@ class SpanishStemmer(_StandardStemmer):
                   their non-accented counterparts (a, e, i, o, u)
          :rtype: str or unicode
          """
-        return (word.replace("\xE1", "a")
-                .replace("\xE9", "e")
-                .replace("\xED", "i")
-                .replace("\xF3", "o")
-                .replace("\xFA", "u"))
+        return (
+            word.replace("\xE1", "a")
+            .replace("\xE9", "e")
+            .replace("\xED", "i")
+            .replace("\xF3", "o")
+            .replace("\xFA", "u")
+        )
  
  
  class SwedishStemmer(_ScandinavianStemmer):
@@ -4106,13 +5774,45 @@ class SwedishStemmer(_ScandinavianStemmer):
  
      __vowels = "aeiouy\xE4\xE5\xF6"
      __s_ending = "bcdfghjklmnoprtvy"
-    __step1_suffixes = ("heterna", "hetens", "heter", "heten",
-                        "anden", "arnas", "ernas", "ornas", "andes",
-                        "andet", "arens", "arna", "erna", "orna",
-                        "ande", "arne", "aste", "aren", "ades",
-                        "erns", "ade", "are", "ern", "ens", "het",
-                        "ast", "ad", "en", "ar", "er", "or", "as",
-                        "es", "at", "a", "e", "s")
+    __step1_suffixes = (
+        "heterna",
+        "hetens",
+        "heter",
+        "heten",
+        "anden",
+        "arnas",
+        "ernas",
+        "ornas",
+        "andes",
+        "andet",
+        "arens",
+        "arna",
+        "erna",
+        "orna",
+        "ande",
+        "arne",
+        "aste",
+        "aren",
+        "ades",
+        "erns",
+        "ade",
+        "are",
+        "ern",
+        "ens",
+        "het",
+        "ast",
+        "ad",
+        "en",
+        "ar",
+        "er",
+        "or",
+        "as",
+        "es",
+        "at",
+        "a",
+        "e",
+        "s",
+    )
      __step2_suffixes = ("dd", "gd", "nn", "dt", "gt", "kt", "tt")
      __step3_suffixes = ("fullt", "l\xF6st", "els", "lig", "ig")
  
@@ -4141,8 +5841,8 @@ class SwedishStemmer(_ScandinavianStemmer):
                          word = word[:-1]
                          r1 = r1[:-1]
                  else:
-                    word = word[:-len(suffix)]
-                    r1 = r1[:-len(suffix)]
+                    word = word[: -len(suffix)]
+                    r1 = r1[: -len(suffix)]
                  break
  
          # STEP 2
@@ -4156,7 +5856,7 @@ class SwedishStemmer(_ScandinavianStemmer):
          for suffix in self.__step3_suffixes:
              if r1.endswith(suffix):
                  if suffix in ("els", "lig", "ig"):
-                    word = word[:-len(suffix)]
+                    word = word[: -len(suffix)]
                  elif suffix in ("fullt", "l\xF6st"):
                      word = word[:-1]
                  break
@@ -4175,26 +5875,26 @@ def demo():
  
      """
  
-    import re
      from nltk.corpus import udhr
  
-    udhr_corpus = {"arabic":     "Arabic_Alarabia-Arabic",
-                   "danish":     "Danish_Dansk-Latin1",
-                   "dutch":      "Dutch_Nederlands-Latin1",
-                   "english":    "English-Latin1",
-                   "finnish":    "Finnish_Suomi-Latin1",
-                   "french":     "French_Francais-Latin1",
-                   "german":     "German_Deutsch-Latin1",
-                   "hungarian":  "Hungarian_Magyar-UTF8",
-                   "italian":    "Italian_Italiano-Latin1",
-                   "norwegian":  "Norwegian-Latin1",
-                   "porter":     "English-Latin1",
-                   "portuguese": "Portuguese_Portugues-Latin1",
-                   "romanian":   "Romanian_Romana-Latin2",
-                   "russian":    "Russian-UTF8",
-                   "spanish":    "Spanish-Latin1",
-                   "swedish":    "Swedish_Svenska-Latin1",
-                   }
+    udhr_corpus = {
+        "arabic": "Arabic_Alarabia-Arabic",
+        "danish": "Danish_Dansk-Latin1",
+        "dutch": "Dutch_Nederlands-Latin1",
+        "english": "English-Latin1",
+        "finnish": "Finnish_Suomi-Latin1",
+        "french": "French_Francais-Latin1",
+        "german": "German_Deutsch-Latin1",
+        "hungarian": "Hungarian_Magyar-UTF8",
+        "italian": "Italian_Italiano-Latin1",
+        "norwegian": "Norwegian-Latin1",
+        "porter": "English-Latin1",
+        "portuguese": "Portuguese_Portugues-Latin1",
+        "romanian": "Romanian_Romana-Latin2",
+        "russian": "Russian-UTF8",
+        "spanish": "Spanish-Latin1",
+        "swedish": "Swedish_Svenska-Latin1",
+    }
  
      print("\n")
      print("******************************")
@@ -4203,34 +5903,40 @@ def demo():
  
      while True:
  
-        language = input("Please enter the name of the language " +
-                             "to be demonstrated\n" +
-                             "/".join(SnowballStemmer.languages) +
-                             "\n" +
-                             "(enter 'exit' in order to leave): ")
+        language = input(
+            "Please enter the name of the language "
+            + "to be demonstrated\n"
+            + "/".join(SnowballStemmer.languages)
+            + "\n"
+            + "(enter 'exit' in order to leave): "
+        )
  
          if language == "exit":
              break
  
          if language not in SnowballStemmer.languages:
-            print(("\nOops, there is no stemmer for this language. " +
-                   "Please try again.\n"))
+            print(
+                (
+                    "\nOops, there is no stemmer for this language. "
+                    + "Please try again.\n"
+                )
+            )
              continue
  
          stemmer = SnowballStemmer(language)
-        excerpt = udhr.words(udhr_corpus[language]) [:300]
+        excerpt = udhr.words(udhr_corpus[language])[:300]
  
          stemmed = " ".join(stemmer.stem(word) for word in excerpt)
-        stemmed = re.sub(r"(.{,70})\s", r'\1\n', stemmed+' ').rstrip()
+        stemmed = re.sub(r"(.{,70})\s", r"\1\n", stemmed + " ").rstrip()
          excerpt = " ".join(excerpt)
-        excerpt = re.sub(r"(.{,70})\s", r'\1\n', excerpt+' ').rstrip()
+        excerpt = re.sub(r"(.{,70})\s", r"\1\n", excerpt + " ").rstrip()
  
          print("\n")
-        print('-' * 70)
-        print('ORIGINAL'.center(70))
+        print("-" * 70)
+        print("ORIGINAL".center(70))
          print(excerpt)
          print("\n\n")
-        print('STEMMED RESULTS'.center(70))
+        print("STEMMED RESULTS".center(70))
          print(stemmed)
-        print('-' * 70)
+        print("-" * 70)
          print("\n")
diff --git a/nlp_resource_data/nltk/stem/snowball.pyc b/nlp_resource_data/nltk/stem/snowball.pyc

deleted file mode 100755 (executable)

index 50882bd..0000000

Binary files a/nlp_resource_data/nltk/stem/snowball.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/util.py b/nlp_resource_data/nltk/stem/util.py

old mode 100755 (executable)

new mode 100644 (file)

index 2ba8547..eec97bd
--- a/nlp_resource_data/nltk/stem/util.py
+++ b/nlp_resource_data/nltk/stem/util.py
@@ -1,15 +1,17 @@
  # Natural Language Toolkit: Stemmer Utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Helder <he7d3r@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
+
  def suffix_replace(original, old, new):
      """
      Replaces the old suffix of the original string by a new suffix
      """
-    return original[:-len(old)] + new
+    return original[: -len(old)] + new
+
  
  def prefix_replace(original, old, new):
      """
@@ -19,4 +21,4 @@ def prefix_replace(original, old, new):
      :param new: string
      :return: string
      """
-    return new + original[len(old):]
-\ No newline at end of file
+    return new + original[len(old) :]
diff --git a/nlp_resource_data/nltk/stem/util.pyc b/nlp_resource_data/nltk/stem/util.pyc

deleted file mode 100755 (executable)

index 62ec717..0000000

Binary files a/nlp_resource_data/nltk/stem/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/stem/wordnet.py b/nlp_resource_data/nltk/stem/wordnet.py

old mode 100755 (executable)

new mode 100644 (file)

index 3a217ff..33fe049
--- a/nlp_resource_data/nltk/stem/wordnet.py
+++ b/nlp_resource_data/nltk/stem/wordnet.py
@@ -1,17 +1,15 @@
  # Natural Language Toolkit: WordNet stemmer interface
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import unicode_literals
  
  from nltk.corpus.reader.wordnet import NOUN
  from nltk.corpus import wordnet
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class WordNetLemmatizer(object):
      """
      WordNet Lemmatizer
@@ -41,11 +39,11 @@ class WordNetLemmatizer(object):
          return min(lemmas, key=len) if lemmas else word
  
      def __repr__(self):
-        return '<WordNetLemmatizer>'
+        return "<WordNetLemmatizer>"
  
  
  # unload wordnet
  def teardown_module(module=None):
      from nltk.corpus import wordnet
-    wordnet._unload()
  
+    wordnet._unload()
diff --git a/nlp_resource_data/nltk/stem/wordnet.pyc b/nlp_resource_data/nltk/stem/wordnet.pyc

deleted file mode 100755 (executable)

index e7bc2b8..0000000

Binary files a/nlp_resource_data/nltk/stem/wordnet.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/__init__.py b/nlp_resource_data/nltk/tag/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 34c8798..7e6d272
--- a/nlp_resource_data/nltk/tag/__init__.py
+++ b/nlp_resource_data/nltk/tag/__init__.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Taggers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
@@ -26,7 +26,7 @@ An off-the-shelf tagger is available for English. It uses the Penn Treebank tags
      [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'), ('idea', 'NN'), ('is', 'VBZ'),
      ("n't", 'RB'), ('all', 'PDT'), ('that', 'DT'), ('bad', 'JJ'), ('.', '.')]
  
-A Russian tagger is also available if you specify lang="rus". It uses 
+A Russian tagger is also available if you specify lang="rus". It uses
  the Russian National Corpus tagset:
  
      >>> pos_tag(word_tokenize("Илья оторопел и дважды перечитал бумажку."), lang='rus')    # doctest: +SKIP
@@ -63,49 +63,76 @@ We evaluate a tagger on data that was not seen during training:
  
  For more information, please consult chapter 5 of the NLTK Book.
  """
-from __future__ import print_function
-
-from nltk.tag.api           import TaggerI
-from nltk.tag.util          import str2tuple, tuple2str, untag
-from nltk.tag.sequential    import (SequentialBackoffTagger, ContextTagger,
-                                    DefaultTagger, NgramTagger, UnigramTagger,
-                                    BigramTagger, TrigramTagger, AffixTagger,
-                                    RegexpTagger, ClassifierBasedTagger,
-                                    ClassifierBasedPOSTagger)
-from nltk.tag.brill         import BrillTagger
+
+from nltk.tag.api import TaggerI
+from nltk.tag.util import str2tuple, tuple2str, untag
+from nltk.tag.sequential import (
+    SequentialBackoffTagger,
+    ContextTagger,
+    DefaultTagger,
+    NgramTagger,
+    UnigramTagger,
+    BigramTagger,
+    TrigramTagger,
+    AffixTagger,
+    RegexpTagger,
+    ClassifierBasedTagger,
+    ClassifierBasedPOSTagger,
+)
+from nltk.tag.brill import BrillTagger
  from nltk.tag.brill_trainer import BrillTaggerTrainer
-from nltk.tag.tnt           import TnT
-from nltk.tag.hunpos        import HunposTagger
-from nltk.tag.stanford      import StanfordTagger, StanfordPOSTagger, StanfordNERTagger
-from nltk.tag.hmm           import HiddenMarkovModelTagger, HiddenMarkovModelTrainer
-from nltk.tag.senna         import SennaTagger, SennaChunkTagger, SennaNERTagger
-from nltk.tag.mapping       import tagset_mapping, map_tag
-from nltk.tag.crf           import CRFTagger
-from nltk.tag.perceptron    import PerceptronTagger
+from nltk.tag.tnt import TnT
+from nltk.tag.hunpos import HunposTagger
+from nltk.tag.stanford import StanfordTagger, StanfordPOSTagger, StanfordNERTagger
+from nltk.tag.hmm import HiddenMarkovModelTagger, HiddenMarkovModelTrainer
+from nltk.tag.senna import SennaTagger, SennaChunkTagger, SennaNERTagger
+from nltk.tag.mapping import tagset_mapping, map_tag
+from nltk.tag.crf import CRFTagger
+from nltk.tag.perceptron import PerceptronTagger
  
  from nltk.data import load, find
  
-RUS_PICKLE = 'taggers/averaged_perceptron_tagger_ru/averaged_perceptron_tagger_ru.pickle'
+RUS_PICKLE = (
+    "taggers/averaged_perceptron_tagger_ru/averaged_perceptron_tagger_ru.pickle"
+)
  
  
  def _get_tagger(lang=None):
-    if lang == 'rus':
+    if lang == "rus":
          tagger = PerceptronTagger(False)
-        ap_russian_model_loc = 'file:' + str(find(RUS_PICKLE))
+        ap_russian_model_loc = "file:" + str(find(RUS_PICKLE))
          tagger.load(ap_russian_model_loc)
      else:
          tagger = PerceptronTagger()
      return tagger
  
  
-def _pos_tag(tokens, tagset, tagger):
-    tagged_tokens = tagger.tag(tokens)
-    if tagset:
-        tagged_tokens = [(token, map_tag('en-ptb', tagset, tag)) for (token, tag) in tagged_tokens]
-    return tagged_tokens
-
-
-def pos_tag(tokens, tagset=None, lang='eng'):
+def _pos_tag(tokens, tagset=None, tagger=None, lang=None):
+    # Currently only supoorts English and Russian.
+    if lang not in ["eng", "rus"]:
+        raise NotImplementedError(
+            "Currently, NLTK pos_tag only supports English and Russian "
+            "(i.e. lang='eng' or lang='rus')"
+        )
+    else:
+        tagged_tokens = tagger.tag(tokens)
+        if tagset:  # Maps to the specified tagset.
+            if lang == "eng":
+                tagged_tokens = [
+                    (token, map_tag("en-ptb", tagset, tag))
+                    for (token, tag) in tagged_tokens
+                ]
+            elif lang == "rus":
+                # Note that the new Russion pos tags from the model contains suffixes,
+                # see https://github.com/nltk/nltk/issues/2151#issuecomment-430709018
+                tagged_tokens = [
+                    (token, map_tag("ru-rnc-new", tagset, tag.partition("=")[0]))
+                    for (token, tag) in tagged_tokens
+                ]
+        return tagged_tokens
+
+
+def pos_tag(tokens, tagset=None, lang="eng"):
      """
      Use NLTK's currently recommended part of speech tagger to
      tag the given list of tokens.
@@ -131,16 +158,16 @@ def pos_tag(tokens, tagset=None, lang='eng'):
      :rtype: list(tuple(str, str))
      """
      tagger = _get_tagger(lang)
-    return _pos_tag(tokens, tagset, tagger)    
+    return _pos_tag(tokens, tagset, tagger, lang)
  
  
-def pos_tag_sents(sentences, tagset=None, lang='eng'):
+def pos_tag_sents(sentences, tagset=None, lang="eng"):
      """
      Use NLTK's currently recommended part of speech tagger to tag the
      given list of sentences, each consisting of a list of tokens.
  
-    :param tokens: List of sentences to be tagged
-    :type tokens: list(list(str))
+    :param sentences: List of sentences to be tagged
+    :type sentences: list(list(str))
      :param tagset: the tagset to be used, e.g. universal, wsj, brown
      :type tagset: str
      :param lang: the ISO 639 code of the language, e.g. 'eng' for English, 'rus' for Russian
@@ -149,4 +176,4 @@ def pos_tag_sents(sentences, tagset=None, lang='eng'):
      :rtype: list(list(tuple(str, str)))
      """
      tagger = _get_tagger(lang)
-    return [_pos_tag(sent, tagset, tagger) for sent in sentences]
+    return [_pos_tag(sent, tagset, tagger, lang) for sent in sentences]
diff --git a/nlp_resource_data/nltk/tag/__init__.pyc b/nlp_resource_data/nltk/tag/__init__.pyc

deleted file mode 100755 (executable)

index 4fada9a..0000000

Binary files a/nlp_resource_data/nltk/tag/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..45da18c

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c683081

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/brill.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/brill.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1ccd1f8

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/brill.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc

new file mode 100644 (file)

index 0000000..cb58482

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/crf.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/crf.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9b51dc0

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/crf.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/hmm.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/hmm.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2123978

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/hmm.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/hunpos.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/hunpos.cpython-37.pyc

new file mode 100644 (file)

index 0000000..af03b86

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/hunpos.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/mapping.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/mapping.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a2feb16

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/mapping.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/perceptron.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/perceptron.cpython-37.pyc

new file mode 100644 (file)

index 0000000..962a5df

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/perceptron.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/senna.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/senna.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2c9238e

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/senna.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/sequential.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/sequential.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d808ebf

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/sequential.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/stanford.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/stanford.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f85ec79

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/stanford.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/tnt.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/tnt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e66f53c

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/tnt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..48708c0

Binary files /dev/null and b/nlp_resource_data/nltk/tag/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tag/api.py b/nlp_resource_data/nltk/tag/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 804c769..c72fb03
--- a/nlp_resource_data/nltk/tag/api.py
+++ b/nlp_resource_data/nltk/tag/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tagger Interface
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # URL: <http://nltk.org/>
@@ -11,17 +11,15 @@ Interface for tagging each token in a sentence with supplementary
  information, such as its part of speech.
  """
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  from itertools import chain
  
+
  from nltk.internals import overridden
  from nltk.metrics import accuracy
-
  from nltk.tag.util import untag
  
  
-@add_metaclass(ABCMeta)
-class TaggerI(object):
+class TaggerI(metaclass=ABCMeta):
      """
      A processing interface for assigning a tag to each token in a list.
      Tags are case sensitive strings that identify some property of each
@@ -35,6 +33,7 @@ class TaggerI(object):
      Subclasses must define:
        - either ``tag()`` or ``tag_sents()`` (or both)
      """
+
      @abstractmethod
      def tag(self, tokens):
          """
@@ -73,8 +72,7 @@ class TaggerI(object):
  
      def _check_params(self, train, model):
          if (train and model) or (not train and not model):
-            raise ValueError(
-                    'Must specify either training data or trained model.')
+            raise ValueError("Must specify either training data or trained model.")
  
  
  class FeaturesetTaggerI(TaggerI):
diff --git a/nlp_resource_data/nltk/tag/api.pyc b/nlp_resource_data/nltk/tag/api.pyc

deleted file mode 100755 (executable)

index 4aab404..0000000

Binary files a/nlp_resource_data/nltk/tag/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/brill.py b/nlp_resource_data/nltk/tag/brill.py

old mode 100755 (executable)

new mode 100644 (file)

index 24e4df4..fe280a3
--- a/nlp_resource_data/nltk/tag/brill.py
+++ b/nlp_resource_data/nltk/tag/brill.py
@@ -1,15 +1,13 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import print_function, division
-
  from collections import defaultdict, Counter
  
  from nltk.tag import TaggerI
@@ -21,13 +19,14 @@ from nltk import jsontags
  # Brill Templates
  ######################################################################
  
+
  @jsontags.register_tag
  class Word(Feature):
      """
      Feature which examines the text (word) of nearby tokens.
      """
  
-    json_tag = 'nltk.tag.brill.Word'
+    json_tag = "nltk.tag.brill.Word"
  
      @staticmethod
      def extract_property(tokens, index):
@@ -41,7 +40,7 @@ class Pos(Feature):
      Feature which examines the tags of nearby tokens.
      """
  
-    json_tag = 'nltk.tag.brill.Pos'
+    json_tag = "nltk.tag.brill.Pos"
  
      @staticmethod
      def extract_property(tokens, index):
@@ -133,7 +132,7 @@ def fntbl37():
          Template(Pos([-1]), Word([0]), Word([1])),
          Template(Pos([-2]), Pos([-1])),
          Template(Pos([1]), Pos([2])),
-        Template(Pos([1]), Pos([2]), Word([1]))
+        Template(Pos([1]), Pos([2]), Word([1])),
      ]
  
  
@@ -188,6 +187,7 @@ def describe_template_sets():
  # The Brill Tagger
  ######################################################################
  
+
  @jsontags.register_tag
  class BrillTagger(TaggerI):
      """
@@ -204,7 +204,7 @@ class BrillTagger(TaggerI):
      of the TaggerTrainers available.
      """
  
-    json_tag = 'nltk.tag.BrillTagger'
+    json_tag = "nltk.tag.BrillTagger"
  
      def __init__(self, initial_tagger, rules, training_stats=None):
          """
@@ -304,9 +304,11 @@ class BrillTagger(TaggerI):
          tids = [r.templateid for r in self._rules]
          train_stats = self.train_stats()
  
-        trainscores = train_stats['rulescores']
-        assert len(trainscores) == len(tids), "corrupt statistics: " \
+        trainscores = train_stats["rulescores"]
+        assert len(trainscores) == len(tids), (
+            "corrupt statistics: "
              "{0} train scores for {1} rules".format(trainscores, tids)
+        )
          template_counts = Counter(tids)
          weighted_traincounts = Counter()
          for (tid, score) in zip(tids, trainscores):
@@ -321,59 +323,75 @@ class BrillTagger(TaggerI):
              return (tpl_value[1], repr(tpl_value[0]))
  
          def print_train_stats():
-            print("TEMPLATE STATISTICS (TRAIN)  {0} templates, {1} rules)".format(
-                len(template_counts),
-                len(tids))
+            print(
+                "TEMPLATE STATISTICS (TRAIN)  {0} templates, {1} rules)".format(
+                    len(template_counts), len(tids)
+                )
+            )
+            print(
+                "TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
+                "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats)
              )
-            print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
-                  "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats))
              head = "#ID | Score (train) |  #Rules     | Template"
              print(head, "\n", "-" * len(head), sep="")
-            train_tplscores = sorted(weighted_traincounts.items(), key=det_tplsort, reverse=True)
+            train_tplscores = sorted(
+                weighted_traincounts.items(), key=det_tplsort, reverse=True
+            )
              for (tid, trainscore) in train_tplscores:
                  s = "{0} | {1:5d}   {2:5.3f} |{3:4d}   {4:.3f} | {5}".format(
                      tid,
                      trainscore,
-                    trainscore/tottrainscores,
+                    trainscore / tottrainscores,
                      template_counts[tid],
-                    template_counts[tid]/len(tids),
+                    template_counts[tid] / len(tids),
                      Template.ALLTEMPLATES[int(tid)],
                  )
                  print(s)
  
          def print_testtrain_stats():
-            testscores = test_stats['rulescores']
-            print("TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(
-                len(template_counts),
-                len(tids)),
+            testscores = test_stats["rulescores"]
+            print(
+                "TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format(
+                    len(template_counts), len(tids)
+                )
+            )
+            print(
+                "TEST  ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
+                "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats)
+            )
+            print(
+                "TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
+                "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats)
              )
-            print("TEST  ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
-                  "final: {finalerrors:5d} {finalacc:.4f} ".format(**test_stats))
-            print("TRAIN ({tokencount:7d} tokens) initial {initialerrors:5d} {initialacc:.4f} "
-                  "final: {finalerrors:5d} {finalacc:.4f} ".format(**train_stats))
              weighted_testcounts = Counter()
              for (tid, score) in zip(tids, testscores):
                  weighted_testcounts[tid] += score
              tottestscores = sum(testscores)
              head = "#ID | Score (test) | Score (train) |  #Rules     | Template"
              print(head, "\n", "-" * len(head), sep="")
-            test_tplscores = sorted(weighted_testcounts.items(), key=det_tplsort, reverse=True)
+            test_tplscores = sorted(
+                weighted_testcounts.items(), key=det_tplsort, reverse=True
+            )
              for (tid, testscore) in test_tplscores:
                  s = "{0:s} |{1:5d}  {2:6.3f} |  {3:4d}   {4:.3f} |{5:4d}   {6:.3f} | {7:s}".format(
                      tid,
                      testscore,
-                    testscore/tottestscores,
+                    testscore / tottestscores,
                      weighted_traincounts[tid],
-                    weighted_traincounts[tid]/tottrainscores,
+                    weighted_traincounts[tid] / tottrainscores,
                      template_counts[tid],
-                    template_counts[tid]/len(tids),
+                    template_counts[tid] / len(tids),
                      Template.ALLTEMPLATES[int(tid)],
                  )
                  print(s)
  
          def print_unused_templates():
-            usedtpls = set([int(tid) for tid in tids])
-            unused = [(tid, tpl) for (tid, tpl) in enumerate(Template.ALLTEMPLATES) if tid not in usedtpls]
+            usedtpls = set(int(tid) for tid in tids)
+            unused = [
+                (tid, tpl)
+                for (tid, tpl) in enumerate(Template.ALLTEMPLATES)
+                if tid not in usedtpls
+            ]
              print("UNUSED TEMPLATES ({0})".format(len(unused)))
  
              for (tid, tpl) in unused:
@@ -404,21 +422,29 @@ class BrillTagger(TaggerI):
          :type gold: list of list of strings
          :returns: tuple of (tagged_sequences, ordered list of rule scores (one for each rule))
          """
+
          def counterrors(xs):
              return sum(t[1] != g[1] for pair in zip(xs, gold) for (t, g) in zip(*pair))
+
          testing_stats = {}
-        testing_stats['tokencount'] = sum(len(t) for t in sequences)
-        testing_stats['sequencecount'] = len(sequences)
+        testing_stats["tokencount"] = sum(len(t) for t in sequences)
+        testing_stats["sequencecount"] = len(sequences)
          tagged_tokenses = [self._initial_tagger.tag(tokens) for tokens in sequences]
-        testing_stats['initialerrors'] = counterrors(tagged_tokenses)
-        testing_stats['initialacc'] = 1 - testing_stats['initialerrors']/testing_stats['tokencount']
+        testing_stats["initialerrors"] = counterrors(tagged_tokenses)
+        testing_stats["initialacc"] = (
+            1 - testing_stats["initialerrors"] / testing_stats["tokencount"]
+        )
          # Apply each rule to the entire corpus, in order
-        errors = [testing_stats['initialerrors']]
+        errors = [testing_stats["initialerrors"]]
          for rule in self._rules:
              for tagged_tokens in tagged_tokenses:
                  rule.apply(tagged_tokens)
              errors.append(counterrors(tagged_tokenses))
-        testing_stats['rulescores'] = [err0 - err1 for (err0, err1) in zip(errors, errors[1:])]
-        testing_stats['finalerrors'] = errors[-1]
-        testing_stats['finalacc'] = 1 - testing_stats['finalerrors']/testing_stats['tokencount']
+        testing_stats["rulescores"] = [
+            err0 - err1 for (err0, err1) in zip(errors, errors[1:])
+        ]
+        testing_stats["finalerrors"] = errors[-1]
+        testing_stats["finalacc"] = (
+            1 - testing_stats["finalerrors"] / testing_stats["tokencount"]
+        )
          return (tagged_tokenses, testing_stats)
diff --git a/nlp_resource_data/nltk/tag/brill.pyc b/nlp_resource_data/nltk/tag/brill.pyc

deleted file mode 100755 (executable)

index 8cd4eea..0000000

Binary files a/nlp_resource_data/nltk/tag/brill.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/brill_trainer.py b/nlp_resource_data/nltk/tag/brill_trainer.py

old mode 100755 (executable)

new mode 100644 (file)

index fde697e..b284a03
--- a/nlp_resource_data/nltk/tag/brill_trainer.py
+++ b/nlp_resource_data/nltk/tag/brill_trainer.py
@@ -8,8 +8,6 @@
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import print_function, division
-
  import bisect
  import textwrap
  from collections import defaultdict
@@ -25,8 +23,10 @@ class BrillTaggerTrainer(object):
      """
      A trainer for tbl taggers.
      """
-    def __init__(self, initial_tagger, templates, trace=0,
-                 deterministic=None, ruleformat="str"):
+
+    def __init__(
+        self, initial_tagger, templates, trace=0, deterministic=None, ruleformat="str"
+    ):
          """
          Construct a Brill tagger from a baseline tagger and a
          set of templates
@@ -46,7 +46,7 @@ class BrillTaggerTrainer(object):
          """
  
          if deterministic is None:
-            deterministic = (trace > 0)
+            deterministic = trace > 0
          self._initial_tagger = initial_tagger
          self._templates = templates
          self._trace = trace
@@ -245,26 +245,33 @@ class BrillTaggerTrainer(object):
          # Create a new copy of the training corpus, and run the
          # initial tagger on it.  We will progressively update this
          # test corpus to look more like the training corpus.
-        test_sents = [list(self._initial_tagger.tag(untag(sent)))
-                      for sent in train_sents]
+        test_sents = [
+            list(self._initial_tagger.tag(untag(sent))) for sent in train_sents
+        ]
  
          # Collect some statistics on the training process
          trainstats = {}
-        trainstats['min_acc'] = min_acc
-        trainstats['min_score'] = min_score
-        trainstats['tokencount'] = sum(len(t) for t in test_sents)
-        trainstats['sequencecount'] = len(test_sents)
-        trainstats['templatecount'] = len(self._templates)
-        trainstats['rulescores'] = []
-        trainstats['initialerrors'] = sum(
+        trainstats["min_acc"] = min_acc
+        trainstats["min_score"] = min_score
+        trainstats["tokencount"] = sum(len(t) for t in test_sents)
+        trainstats["sequencecount"] = len(test_sents)
+        trainstats["templatecount"] = len(self._templates)
+        trainstats["rulescores"] = []
+        trainstats["initialerrors"] = sum(
              tag[1] != truth[1]
              for paired in zip(test_sents, train_sents)
              for (tag, truth) in zip(*paired)
          )
-        trainstats['initialacc'] = 1 - trainstats['initialerrors']/trainstats['tokencount']
+        trainstats["initialacc"] = (
+            1 - trainstats["initialerrors"] / trainstats["tokencount"]
+        )
          if self._trace > 0:
-            print("TBL train (fast) (seqs: {sequencecount}; tokens: {tokencount}; "
-                  "tpls: {templatecount}; min score: {min_score}; min acc: {min_acc})".format(**trainstats))
+            print(
+                "TBL train (fast) (seqs: {sequencecount}; tokens: {tokencount}; "
+                "tpls: {templatecount}; min score: {min_score}; min acc: {min_acc})".format(
+                    **trainstats
+                )
+            )
  
          # Initialize our mappings.  This will find any errors made
          # by the initial tagger, and use those to generate repair
@@ -273,7 +280,7 @@ class BrillTaggerTrainer(object):
              print("Finding initial useful rules...")
          self._init_mappings(test_sents, train_sents)
          if self._trace:
-            print(("    Found %d useful rules." % len(self._rule_scores)))
+            print(("    Found {} useful rules.".format(len(self._rule_scores))))
  
          # Let the user know what we're up to.
          if self._trace > 2:
@@ -284,13 +291,13 @@ class BrillTaggerTrainer(object):
          # Repeatedly select the best rule, and add it to `rules`.
          rules = []
          try:
-            while (len(rules) < max_rules):
+            while len(rules) < max_rules:
                  # Find the best rule, and add it to our rule list.
                  rule = self._best_rule(train_sents, test_sents, min_score, min_acc)
                  if rule:
                      rules.append(rule)
                      score = self._rule_scores[rule]
-                    trainstats['rulescores'].append(score)
+                    trainstats["rulescores"].append(score)
                  else:
                      break  # No more good rules left!
  
@@ -311,12 +318,16 @@ class BrillTaggerTrainer(object):
  
          # The user can cancel training manually:
          except KeyboardInterrupt:
-            print("Training stopped manually -- %d rules found" % len(rules))
+            print("Training stopped manually -- {} rules found".format(len(rules)))
  
          # Discard our tag position mapping & rule mappings.
          self._clean()
-        trainstats['finalerrors'] = trainstats['initialerrors'] - sum(trainstats['rulescores'])
-        trainstats['finalacc'] = 1 - trainstats['finalerrors']/trainstats['tokencount']
+        trainstats["finalerrors"] = trainstats["initialerrors"] - sum(
+            trainstats["rulescores"]
+        )
+        trainstats["finalacc"] = (
+            1 - trainstats["finalerrors"] / trainstats["tokencount"]
+        )
          # Create and return a tagger from the rules we found.
          return BrillTagger(self._initial_tagger, rules, trainstats)
  
@@ -344,8 +355,7 @@ class BrillTaggerTrainer(object):
                  correct_tag = train_sents[sentnum][wordnum][1]
                  if tag != correct_tag:
                      for rule in self._find_rules(sent, wordnum, correct_tag):
-                        self._update_rule_applies(rule, sentnum, wordnum,
-                                                  train_sents)
+                        self._update_rule_applies(rule, sentnum, wordnum, train_sents)
  
      def _clean(self):
          self._tag_positions = None
@@ -445,11 +455,9 @@ class BrillTaggerTrainer(object):
                  for i in range(start, len(positions)):
                      sentnum, wordnum = positions[i]
                      if rule.applies(test_sents[sentnum], wordnum):
-                        self._update_rule_applies(rule, sentnum, wordnum,
-                                                  train_sents)
+                        self._update_rule_applies(rule, sentnum, wordnum, train_sents)
                          if self._rule_scores[rule] < max_score:
-                            self._first_unknown_position[rule] = (sentnum,
-                                                                  wordnum+1)
+                            self._first_unknown_position[rule] = (sentnum, wordnum + 1)
                              break  # The update demoted the rule.
  
                  if self._rule_scores[rule] == max_score:
@@ -463,7 +471,7 @@ class BrillTaggerTrainer(object):
                          num_broken = len([c for c in changes if c == -1])
                          # acc here is fixed/(fixed+broken); could also be
                          # fixed/(fixed+broken+other) == num_fixed/len(changes)
-                        acc = num_fixed/(num_fixed+num_broken)
+                        acc = num_fixed / (num_fixed + num_broken)
                          if acc >= min_acc:
                              return rule
                          # else: rule too inaccurate, discard and try next
@@ -536,15 +544,17 @@ class BrillTaggerTrainer(object):
              # Check if the change causes our templates to propose any
              # new rules for this position.
              for template in self._templates:
-                for new_rule in template.applicable_rules(test_sent, wordnum,
-                                                          correct_tag):
+                for new_rule in template.applicable_rules(
+                    test_sent, wordnum, correct_tag
+                ):
                      if new_rule not in old_rules:
                          num_new += 1
                          if new_rule not in self._rule_scores:
                              num_unseen += 1
                          old_rules.add(new_rule)
-                        self._update_rule_applies(new_rule, sentnum,
-                                                  wordnum, train_sents)
+                        self._update_rule_applies(
+                            new_rule, sentnum, wordnum, train_sents
+                        )
  
              # We may have caused other rules to match here, that are
              # not proposed by our templates -- in particular, rules
@@ -556,8 +566,9 @@ class BrillTaggerTrainer(object):
                      if new_rule not in old_rules:
                          num_new += 1
                          if new_rule.applies(test_sent, wordnum):
-                            self._update_rule_applies(new_rule, sentnum,
-                                                      wordnum, train_sents)
+                            self._update_rule_applies(
+                                new_rule, sentnum, wordnum, train_sents
+                            )
  
          if self._trace > 3:
              self._trace_update_rules(num_obsolete, num_new, num_unseen)
@@ -565,7 +576,8 @@ class BrillTaggerTrainer(object):
      # Tracing
  
      def _trace_header(self):
-        print("""
+        print(
+            """
             B      |
     S   F   r   O  |        Score = Fixed - Broken
     c   i   o   t  |  R     Fixed = num tags changed incorrect -> correct
@@ -573,7 +585,8 @@ class BrillTaggerTrainer(object):
     r   e   e   e  |  l     Other = num tags changed incorrect -> incorrect
     e   d   n   r  |  e
  ------------------+-------------------------------------------------------
-        """.rstrip())
+        """.rstrip()
+        )
  
      def _trace_rule(self, rule):
          assert self._rule_scores[rule] == sum(self._positions_by_rule[rule].values())
@@ -586,23 +599,31 @@ class BrillTaggerTrainer(object):
  
          rulestr = rule.format(self._ruleformat)
          if self._trace > 2:
-            print('%4d%4d%4d%4d  |' % (score, num_fixed, num_broken, num_other), end=' ')
-            print(textwrap.fill(rulestr, initial_indent=' '*20, width=79,
-                                subsequent_indent=' '*18+'|   ').strip())
+            print(
+                "{:4d}{:4d}{:4d}{:4d}  |".format(score, num_fixed, num_broken, num_other), end=" "
+            )
+            print(
+                textwrap.fill(
+                    rulestr,
+                    initial_indent=" " * 20,
+                    width=79,
+                    subsequent_indent=" " * 18 + "|   ",
+                ).strip()
+            )
          else:
              print(rulestr)
  
      def _trace_apply(self, num_updates):
-        prefix = ' '*18+'|'
+        prefix = " " * 18 + "|"
          print(prefix)
-        print(prefix, 'Applying rule to %d positions.' % num_updates)
+        print(prefix, "Applying rule to {} positions.".format(num_updates))
  
      def _trace_update_rules(self, num_obsolete, num_new, num_unseen):
-        prefix = ' '*18+'|'
-        print(prefix, 'Updated rule tables:')
-        print(prefix, ('  - %d rule applications removed' % num_obsolete))
-        print(prefix, ('  - %d rule applications added (%d novel)' %
-                       (num_new, num_unseen)))
+        prefix = " " * 18 + "|"
+        print(prefix, "Updated rule tables:")
+        print(prefix, ("  - {} rule applications removed".format(num_obsolete)))
+        print(
+            prefix,
+            ("  - {} rule applications added ({} novel)".format(num_new, num_unseen)),
+        )
          print(prefix)
-
-
diff --git a/nlp_resource_data/nltk/tag/brill_trainer.pyc b/nlp_resource_data/nltk/tag/brill_trainer.pyc

deleted file mode 100755 (executable)

index 910c147..0000000

Binary files a/nlp_resource_data/nltk/tag/brill_trainer.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/crf.py b/nlp_resource_data/nltk/tag/crf.py

old mode 100755 (executable)

new mode 100644 (file)

index 6a33aca..48f9de1
--- a/nlp_resource_data/nltk/tag/crf.py
+++ b/nlp_resource_data/nltk/tag/crf.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Interface to the CRFSuite Tagger
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Long Duong <longdt219@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -9,10 +9,9 @@
  """
  A module for POS tagging using CRFSuite
  """
-from __future__ import absolute_import
-from __future__ import unicode_literals
+
  import unicodedata
-import re 
+import re
  from nltk.tag.api import TaggerI
  
  try:
@@ -20,45 +19,45 @@ try:
  except ImportError:
      pass
  
+
  class CRFTagger(TaggerI):
      """
      A module for POS tagging using CRFSuite https://pypi.python.org/pypi/python-crfsuite
-    
+
      >>> from nltk.tag import CRFTagger
      >>> ct = CRFTagger()
- 
+
      >>> train_data = [[('University','Noun'), ('is','Verb'), ('a','Det'), ('good','Adj'), ('place','Noun')],
      ... [('dog','Noun'),('eat','Verb'),('meat','Noun')]]
-    
+
      >>> ct.train(train_data,'model.crf.tagger')
      >>> ct.tag_sents([['dog','is','good'], ['Cat','eat','meat']])
      [[('dog', 'Noun'), ('is', 'Verb'), ('good', 'Adj')], [('Cat', 'Noun'), ('eat', 'Verb'), ('meat', 'Noun')]]
-    
-    >>> gold_sentences = [[('dog','Noun'),('is','Verb'),('good','Adj')] , [('Cat','Noun'),('eat','Verb'), ('meat','Noun')]] 
-    >>> ct.evaluate(gold_sentences) 
+
+    >>> gold_sentences = [[('dog','Noun'),('is','Verb'),('good','Adj')] , [('Cat','Noun'),('eat','Verb'), ('meat','Noun')]]
+    >>> ct.evaluate(gold_sentences)
      1.0
-    
-    Setting learned model file  
-    >>> ct = CRFTagger() 
+
+    Setting learned model file
+    >>> ct = CRFTagger()
      >>> ct.set_model_file('model.crf.tagger')
      >>> ct.evaluate(gold_sentences)
      1.0
-    
+
      """
-    
-    
-    def __init__(self,  feature_func = None, verbose = False, training_opt = {}):
+
+    def __init__(self, feature_func=None, verbose=False, training_opt={}):
          """
-        Initialize the CRFSuite tagger 
-        :param feature_func: The function that extracts features for each token of a sentence. This function should take 
-        2 parameters: tokens and index which extract features at index position from tokens list. See the build in 
-        _get_features function for more detail.   
+        Initialize the CRFSuite tagger
+        :param feature_func: The function that extracts features for each token of a sentence. This function should take
+        2 parameters: tokens and index which extract features at index position from tokens list. See the build in
+        _get_features function for more detail.
          :param verbose: output the debugging messages during training.
-        :type verbose: boolean  
+        :type verbose: boolean
          :param training_opt: python-crfsuite training options
-        :type training_opt : dictionary 
-        
-        Set of possible training options (using LBFGS training algorithm).  
+        :type training_opt : dictionary
+
+        Set of possible training options (using LBFGS training algorithm).
           'feature.minfreq' : The minimum frequency of features.
           'feature.possible_states' : Force to generate possible state features.
           'feature.possible_transitions' : Force to generate possible transition features.
@@ -74,131 +73,132 @@ class CRFTagger(TaggerI):
                             { 'MoreThuente': More and Thuente's method,
                                'Backtracking': Backtracking method with regular Wolfe condition,
                                'StrongBacktracking': Backtracking method with strong Wolfe condition
-                           } 
+                           }
           'max_linesearch' :  The maximum number of trials for the line search algorithm.
-         
+
          """
-                   
-        self._model_file = ''
+
+        self._model_file = ""
          self._tagger = pycrfsuite.Tagger()
-        
+
          if feature_func is None:
-            self._feature_func =  self._get_features
+            self._feature_func = self._get_features
          else:
-            self._feature_func =  feature_func
-        
-        self._verbose = verbose 
+            self._feature_func = feature_func
+
+        self._verbose = verbose
          self._training_options = training_opt
-        self._pattern = re.compile(r'\d')
-        
+        self._pattern = re.compile(r"\d")
+
      def set_model_file(self, model_file):
          self._model_file = model_file
          self._tagger.open(self._model_file)
-            
+
      def _get_features(self, tokens, idx):
          """
-        Extract basic features about this word including 
-             - Current Word 
+        Extract basic features about this word including
+             - Current Word
               - Is Capitalized ?
               - Has Punctuation ?
               - Has Number ?
               - Suffixes up to length 3
-        Note that : we might include feature over previous word, next word ect. 
-        
+        Note that : we might include feature over previous word, next word ect.
+
          :return : a list which contains the features
-        :rtype : list(str)    
-        
-        """ 
+        :rtype : list(str)
+
+        """
          token = tokens[idx]
-        
+
          feature_list = []
-        
+
          if not token:
              return feature_list
-            
-        # Capitalization 
+
+        # Capitalization
          if token[0].isupper():
-            feature_list.append('CAPITALIZATION')
-        
-        # Number 
+            feature_list.append("CAPITALIZATION")
+
+        # Number
          if re.search(self._pattern, token) is not None:
-            feature_list.append('HAS_NUM') 
-        
+            feature_list.append("HAS_NUM")
+
          # Punctuation
          punc_cat = set(["Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"])
-        if all (unicodedata.category(x) in punc_cat for x in token):
-            feature_list.append('PUNCTUATION')
-        
+        if all(unicodedata.category(x) in punc_cat for x in token):
+            feature_list.append("PUNCTUATION")
+
          # Suffix up to length 3
          if len(token) > 1:
-            feature_list.append('SUF_' + token[-1:]) 
-        if len(token) > 2: 
-            feature_list.append('SUF_' + token[-2:])    
-        if len(token) > 3: 
-            feature_list.append('SUF_' + token[-3:])
-            
-        feature_list.append('WORD_' + token )
-        
+            feature_list.append("SUF_" + token[-1:])
+        if len(token) > 2:
+            feature_list.append("SUF_" + token[-2:])
+        if len(token) > 3:
+            feature_list.append("SUF_" + token[-3:])
+
+        feature_list.append("WORD_" + token)
+
          return feature_list
-        
+
      def tag_sents(self, sents):
-        '''
-        Tag a list of sentences. NB before using this function, user should specify the mode_file either by 
-                       - Train a new model using ``train'' function 
-                       - Use the pre-trained model which is set via ``set_model_file'' function  
-        :params sentences : list of sentences needed to tag. 
+        """
+        Tag a list of sentences. NB before using this function, user should specify the mode_file either by
+                       - Train a new model using ``train'' function
+                       - Use the pre-trained model which is set via ``set_model_file'' function
+        :params sentences : list of sentences needed to tag.
          :type sentences : list(list(str))
-        :return : list of tagged sentences. 
-        :rtype : list (list (tuple(str,str))) 
-        '''
-        if self._model_file == '':
-            raise Exception(' No model file is found !! Please use train or set_model_file function')
-        
+        :return : list of tagged sentences.
+        :rtype : list (list (tuple(str,str)))
+        """
+        if self._model_file == "":
+            raise Exception(
+                " No model file is found !! Please use train or set_model_file function"
+            )
+
          # We need the list of sentences instead of the list generator for matching the input and output
-        result = []  
+        result = []
          for tokens in sents:
-            features = [self._feature_func(tokens,i) for i in range(len(tokens))]
+            features = [self._feature_func(tokens, i) for i in range(len(tokens))]
              labels = self._tagger.tag(features)
-                
+
              if len(labels) != len(tokens):
-                raise Exception(' Predicted Length Not Matched, Expect Errors !')
-            
-            tagged_sent = list(zip(tokens,labels))
+                raise Exception(" Predicted Length Not Matched, Expect Errors !")
+
+            tagged_sent = list(zip(tokens, labels))
              result.append(tagged_sent)
-            
-        return result 
-    
+
+        return result
+
      def train(self, train_data, model_file):
-        '''
-        Train the CRF tagger using CRFSuite  
-        :params train_data : is the list of annotated sentences.        
+        """
+        Train the CRF tagger using CRFSuite
+        :params train_data : is the list of annotated sentences.
          :type train_data : list (list(tuple(str,str)))
-        :params model_file : the model will be saved to this file.     
-         
-        '''
+        :params model_file : the model will be saved to this file.
+
+        """
          trainer = pycrfsuite.Trainer(verbose=self._verbose)
          trainer.set_params(self._training_options)
-        
+
          for sent in train_data:
-            tokens,labels = zip(*sent)
-            features = [self._feature_func(tokens,i) for i in range(len(tokens))]
-            trainer.append(features,labels)
-                        
+            tokens, labels = zip(*sent)
+            features = [self._feature_func(tokens, i) for i in range(len(tokens))]
+            trainer.append(features, labels)
+
          # Now train the model, the output should be model_file
          trainer.train(model_file)
          # Save the model file
-        self.set_model_file(model_file) 
+        self.set_model_file(model_file)
  
      def tag(self, tokens):
-        '''
-        Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by 
-                       - Train a new model using ``train'' function 
-                       - Use the pre-trained model which is set via ``set_model_file'' function  
-        :params tokens : list of tokens needed to tag. 
+        """
+        Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by
+                       - Train a new model using ``train'' function
+                       - Use the pre-trained model which is set via ``set_model_file'' function
+        :params tokens : list of tokens needed to tag.
          :type tokens : list(str)
-        :return : list of tagged tokens. 
-        :rtype : list (tuple(str,str)) 
-        '''
-        
-        return self.tag_sents([tokens])[0]
+        :return : list of tagged tokens.
+        :rtype : list (tuple(str,str))
+        """
  
+        return self.tag_sents([tokens])[0]
diff --git a/nlp_resource_data/nltk/tag/crf.pyc b/nlp_resource_data/nltk/tag/crf.pyc

deleted file mode 100755 (executable)

index ccc71e7..0000000

Binary files a/nlp_resource_data/nltk/tag/crf.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/hmm.py b/nlp_resource_data/nltk/tag/hmm.py

old mode 100755 (executable)

new mode 100644 (file)

index 309f6fe..6e543d9
--- a/nlp_resource_data/nltk/tag/hmm.py
+++ b/nlp_resource_data/nltk/tag/hmm.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Hidden Markov Model
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Trevor Cohn <tacohn@csse.unimelb.edu.au>
  #         Philip Blunsom <pcbl@csse.unimelb.edu.au>
  #         Tiago Tresoldi <tiago@tresoldi.pro.br> (fixes)
@@ -68,36 +68,39 @@ of EM.
  For more information, please consult the source code for this module,
  which includes extensive demonstration code.
  """
-from __future__ import print_function, unicode_literals, division
  
  import re
  import itertools
  
-from six.moves import map, zip
-
  try:
      import numpy as np
  except ImportError:
      pass
  
-from nltk.probability import (FreqDist, ConditionalFreqDist,
-                              ConditionalProbDist, DictionaryProbDist,
-                              DictionaryConditionalProbDist,
-                              LidstoneProbDist, MutableProbDist,
-                              MLEProbDist, RandomProbDist)
+from nltk.probability import (
+    FreqDist,
+    ConditionalFreqDist,
+    ConditionalProbDist,
+    DictionaryProbDist,
+    DictionaryConditionalProbDist,
+    LidstoneProbDist,
+    MutableProbDist,
+    MLEProbDist,
+    RandomProbDist,
+)
  from nltk.metrics import accuracy
  from nltk.util import LazyMap, unique_list
-from nltk.compat import python_2_unicode_compatible
  from nltk.tag.api import TaggerI
  
  
  _TEXT = 0  # index of text in a tuple
-_TAG = 1   # index of tag in a tuple
+_TAG = 1  # index of tag in a tuple
+
  
  def _identity(labeled_symbols):
      return labeled_symbols
  
-@python_2_unicode_compatible
+
  class HiddenMarkovModelTagger(TaggerI):
      """
      Hidden Markov model class, a generative model for labelling sequence data.
@@ -133,8 +136,10 @@ class HiddenMarkovModelTagger(TaggerI):
          instances, defaults to the identity function.
      :type transform: callable
      """
-    def __init__(self, symbols, states, transitions, outputs, priors,
-                 transform=_identity):
+
+    def __init__(
+        self, symbols, states, transitions, outputs, priors, transform=_identity
+    ):
          self._symbols = unique_list(symbols)
          self._states = unique_list(states)
          self._transitions = transitions
@@ -144,40 +149,53 @@ class HiddenMarkovModelTagger(TaggerI):
          self._transform = transform
  
      @classmethod
-    def _train(cls, labeled_sequence, test_sequence=None,
-                    unlabeled_sequence=None, transform=_identity,
-                    estimator=None, **kwargs):
+    def _train(
+        cls,
+        labeled_sequence,
+        test_sequence=None,
+        unlabeled_sequence=None,
+        transform=_identity,
+        estimator=None,
+        **kwargs
+    ):
  
          if estimator is None:
+
              def estimator(fd, bins):
                  return LidstoneProbDist(fd, 0.1, bins)
  
          labeled_sequence = LazyMap(transform, labeled_sequence)
-        symbols = unique_list(word for sent in labeled_sequence
-            for word, tag in sent)
-        tag_set = unique_list(tag for sent in labeled_sequence
-            for word, tag in sent)
+        symbols = unique_list(word for sent in labeled_sequence for word, tag in sent)
+        tag_set = unique_list(tag for sent in labeled_sequence for word, tag in sent)
  
          trainer = HiddenMarkovModelTrainer(tag_set, symbols)
          hmm = trainer.train_supervised(labeled_sequence, estimator=estimator)
-        hmm = cls(hmm._symbols, hmm._states, hmm._transitions, hmm._outputs,
-                  hmm._priors, transform=transform)
+        hmm = cls(
+            hmm._symbols,
+            hmm._states,
+            hmm._transitions,
+            hmm._outputs,
+            hmm._priors,
+            transform=transform,
+        )
  
          if test_sequence:
-            hmm.test(test_sequence, verbose=kwargs.get('verbose', False))
+            hmm.test(test_sequence, verbose=kwargs.get("verbose", False))
  
          if unlabeled_sequence:
-            max_iterations = kwargs.get('max_iterations', 5)
-            hmm = trainer.train_unsupervised(unlabeled_sequence, model=hmm,
-                max_iterations=max_iterations)
+            max_iterations = kwargs.get("max_iterations", 5)
+            hmm = trainer.train_unsupervised(
+                unlabeled_sequence, model=hmm, max_iterations=max_iterations
+            )
              if test_sequence:
-                hmm.test(test_sequence, verbose=kwargs.get('verbose', False))
+                hmm.test(test_sequence, verbose=kwargs.get("verbose", False))
  
          return hmm
  
      @classmethod
-    def train(cls, labeled_sequence, test_sequence=None,
-                   unlabeled_sequence=None, **kwargs):
+    def train(
+        cls, labeled_sequence, test_sequence=None, unlabeled_sequence=None, **kwargs
+    ):
          """
          Train a new HiddenMarkovModelTagger using the given labeled and
          unlabeled training instances. Testing will be performed if test
@@ -206,8 +224,7 @@ class HiddenMarkovModelTagger(TaggerI):
          :param max_iterations: number of Baum-Welch interations to perform
          :type max_iterations: int
          """
-        return cls._train(labeled_sequence, test_sequence,
-                          unlabeled_sequence, **kwargs)
+        return cls._train(labeled_sequence, test_sequence, unlabeled_sequence, **kwargs)
  
      def probability(self, sequence):
          """
@@ -222,7 +239,7 @@ class HiddenMarkovModelTagger(TaggerI):
              property, and optionally the TAG property
          :type sequence:  Token
          """
-        return 2**(self.log_probability(self._transform(sequence)))
+        return 2 ** (self.log_probability(self._transform(sequence)))
  
      def log_probability(self, sequence):
          """
@@ -243,17 +260,19 @@ class HiddenMarkovModelTagger(TaggerI):
  
          if T > 0 and sequence[0][_TAG]:
              last_state = sequence[0][_TAG]
-            p = self._priors.logprob(last_state) + \
-                self._output_logprob(last_state, sequence[0][_TEXT])
+            p = self._priors.logprob(last_state) + self._output_logprob(
+                last_state, sequence[0][_TEXT]
+            )
              for t in range(1, T):
                  state = sequence[t][_TAG]
-                p += self._transitions[last_state].logprob(state) + \
-                     self._output_logprob(state, sequence[t][_TEXT])
+                p += self._transitions[last_state].logprob(
+                    state
+                ) + self._output_logprob(state, sequence[t][_TEXT])
                  last_state = state
              return p
          else:
              alpha = self._forward_probability(sequence)
-            p = logsumexp2(alpha[T-1])
+            p = logsumexp2(alpha[T - 1])
              return p
  
      def tag(self, unlabeled_sequence):
@@ -379,14 +398,14 @@ class HiddenMarkovModelTagger(TaggerI):
          V[0] = P + O[:, S[unlabeled_sequence[0]]]
          for t in range(1, T):
              for j in range(N):
-                vs = V[t-1, :] + X[:, j]
+                vs = V[t - 1, :] + X[:, j]
                  best = np.argmax(vs)
                  V[t, j] = vs[best] + O[j, S[unlabeled_sequence[t]]]
                  B[t, j] = best
  
-        current = np.argmax(V[T-1,:])
+        current = np.argmax(V[T - 1, :])
          sequence = [current]
-        for t in range(T-1, 0, -1):
+        for t in range(T - 1, 0, -1):
              last = B[t, current]
              sequence.append(last)
              current = last
@@ -418,8 +437,7 @@ class HiddenMarkovModelTagger(TaggerI):
          # find the starting log probabilities for each state
          symbol = unlabeled_sequence[0]
          for i, state in enumerate(self._states):
-            V[0, i] = self._priors.logprob(state) + \
-                      self._output_logprob(state, symbol)
+            V[0, i] = self._priors.logprob(state) + self._output_logprob(state, symbol)
              B[0, state] = None
  
          # find the maximum log probabilities for reaching each state at time t
@@ -430,7 +448,7 @@ class HiddenMarkovModelTagger(TaggerI):
                  best = None
                  for i in range(N):
                      si = self._states[i]
-                    va = V[t-1, i] + self._transitions[si].logprob(sj)
+                    va = V[t - 1, i] + self._transitions[si].logprob(sj)
                      if not best or va > best[0]:
                          best = (va, si)
                  V[t, j] = best[0] + self._output_logprob(sj, symbol)
@@ -439,14 +457,14 @@ class HiddenMarkovModelTagger(TaggerI):
          # find the highest probability final state
          best = None
          for i in range(N):
-            val = V[T-1, i]
+            val = V[T - 1, i]
              if not best or val > best[0]:
                  best = (val, self._states[i])
  
          # traverse the back-pointers B to find the state sequence
          current = best[1]
          sequence = [current]
-        for t in range(T-1, 0, -1):
+        for t in range(T - 1, 0, -1):
              last = B[t, current]
              sequence.append(last)
              current = last
@@ -477,16 +495,19 @@ class HiddenMarkovModelTagger(TaggerI):
          # sample the starting state and symbol prob dists
          tokens = []
          state = self._sample_probdist(self._priors, rng.random(), self._states)
-        symbol = self._sample_probdist(self._outputs[state],
-                                  rng.random(), self._symbols)
+        symbol = self._sample_probdist(
+            self._outputs[state], rng.random(), self._symbols
+        )
          tokens.append((symbol, state))
  
          for i in range(1, length):
              # sample the state transition and symbol prob dists
-            state = self._sample_probdist(self._transitions[state],
-                                     rng.random(), self._states)
-            symbol = self._sample_probdist(self._outputs[state],
-                                      rng.random(), self._symbols)
+            state = self._sample_probdist(
+                self._transitions[state], rng.random(), self._states
+            )
+            symbol = self._sample_probdist(
+                self._outputs[state], rng.random(), self._symbols
+            )
              tokens.append((symbol, state))
  
          return tokens
@@ -498,8 +519,7 @@ class HiddenMarkovModelTagger(TaggerI):
              if cum_p <= p <= cum_p + add_p:
                  return sample
              cum_p += add_p
-        raise Exception('Invalid probability distribution - '
-                        'does not sum to one')
+        raise Exception("Invalid probability distribution - " "does not sum to one")
  
      def entropy(self, unlabeled_sequence):
          """
@@ -536,35 +556,39 @@ class HiddenMarkovModelTagger(TaggerI):
  
          alpha = self._forward_probability(unlabeled_sequence)
          beta = self._backward_probability(unlabeled_sequence)
-        normalisation = logsumexp2(alpha[T-1])
+        normalisation = logsumexp2(alpha[T - 1])
  
          entropy = normalisation
  
          # starting state, t = 0
          for i, state in enumerate(self._states):
-            p = 2**(alpha[0, i] + beta[0, i] - normalisation)
+            p = 2 ** (alpha[0, i] + beta[0, i] - normalisation)
              entropy -= p * self._priors.logprob(state)
-            #print 'p(s_0 = %s) =' % state, p
+            # print('p(s_0 = %s) =' % state, p)
  
          # state transitions
          for t0 in range(T - 1):
              t1 = t0 + 1
              for i0, s0 in enumerate(self._states):
                  for i1, s1 in enumerate(self._states):
-                    p = 2**(alpha[t0, i0] + self._transitions[s0].logprob(s1) +
-                            self._outputs[s1].logprob(
-                                unlabeled_sequence[t1][_TEXT]) +
-                            beta[t1, i1] - normalisation)
+                    p = 2 ** (
+                        alpha[t0, i0]
+                        + self._transitions[s0].logprob(s1)
+                        + self._outputs[s1].logprob(unlabeled_sequence[t1][_TEXT])
+                        + beta[t1, i1]
+                        - normalisation
+                    )
                      entropy -= p * self._transitions[s0].logprob(s1)
-                    #print 'p(s_%d = %s, s_%d = %s) =' % (t0, s0, t1, s1), p
+                    # print('p(s_%d = %s, s_%d = %s) =' % (t0, s0, t1, s1), p)
  
          # symbol emissions
          for t in range(T):
              for i, state in enumerate(self._states):
-                p = 2**(alpha[t, i] + beta[t, i] - normalisation)
+                p = 2 ** (alpha[t, i] + beta[t, i] - normalisation)
                  entropy -= p * self._outputs[state].logprob(
-                    unlabeled_sequence[t][_TEXT])
-                #print 'p(s_%d = %s) =' % (t, state), p
+                    unlabeled_sequence[t][_TEXT]
+                )
+                # print('p(s_%d = %s) =' % (t, state), p)
  
          return entropy
  
@@ -580,7 +604,7 @@ class HiddenMarkovModelTagger(TaggerI):
  
          alpha = self._forward_probability(unlabeled_sequence)
          beta = self._backward_probability(unlabeled_sequence)
-        normalisation = logsumexp2(alpha[T-1])
+        normalisation = logsumexp2(alpha[T - 1])
  
          entropies = np.zeros(T, np.float64)
          probs = np.zeros(N, np.float64)
@@ -589,7 +613,7 @@ class HiddenMarkovModelTagger(TaggerI):
                  probs[s] = alpha[t, s] + beta[t, s] - normalisation
  
              for s in range(N):
-                entropies[t] -= 2**(probs[s]) * probs[s]
+                entropies[t] -= 2 ** (probs[s]) * probs[s]
  
          return entropies
  
@@ -616,19 +640,10 @@ class HiddenMarkovModelTagger(TaggerI):
              log_probs.append(lp)
          normalisation = _log_add(*log_probs)
  
-        #ps = zeros((T, N), float64)
-        #for labelling, lp in zip(labellings, log_probs):
-            #for t in range(T):
-                #ps[t, self._states.index(labelling[t])] += \
-                #    2**(lp - normalisation)
-
-        #for t in range(T):
-            #print 'prob[%d] =' % t, ps[t]
-
          entropy = 0
          for lp in log_probs:
              lp -= normalisation
-            entropy -= 2**(lp) * lp
+            entropy -= 2 ** (lp) * lp
  
          return entropy
  
@@ -656,7 +671,7 @@ class HiddenMarkovModelTagger(TaggerI):
  
          normalisation = _log_add(*log_probs)
  
-        probabilities = _ninf_array((T,N))
+        probabilities = _ninf_array((T, N))
  
          for labelling, lp in zip(labellings, log_probs):
              lp -= normalisation
@@ -667,15 +682,17 @@ class HiddenMarkovModelTagger(TaggerI):
          entropies = np.zeros(T, np.float64)
          for t in range(T):
              for s in range(N):
-                entropies[t] -= 2**(probabilities[t, s]) * probabilities[t, s]
+                entropies[t] -= 2 ** (probabilities[t, s]) * probabilities[t, s]
  
          return entropies
  
      def _transitions_matrix(self):
          """ Return a matrix of transition log probabilities. """
-        trans_iter = (self._transitions[sj].logprob(si)
-                      for sj in self._states
-                      for si in self._states)
+        trans_iter = (
+            self._transitions[sj].logprob(si)
+            for sj in self._states
+            for si in self._states
+        )
  
          transitions_logprob = np.fromiter(trans_iter, dtype=np.float64)
          N = len(self._states)
@@ -711,8 +728,9 @@ class HiddenMarkovModelTagger(TaggerI):
          # Initialization
          symbol = unlabeled_sequence[0][_TEXT]
          for i, state in enumerate(self._states):
-            alpha[0, i] = self._priors.logprob(state) + \
-                          self._output_logprob(state, symbol)
+            alpha[0, i] = self._priors.logprob(state) + self._output_logprob(
+                state, symbol
+            )
  
          # Induction
          for t in range(1, T):
@@ -720,7 +738,7 @@ class HiddenMarkovModelTagger(TaggerI):
              output_logprob = self._outputs_vector(symbol)
  
              for i in range(N):
-                summand = alpha[t-1] + transitions_logprob[i]
+                summand = alpha[t - 1] + transitions_logprob[i]
                  alpha[t, i] = logsumexp2(summand) + output_logprob[i]
  
          return alpha
@@ -746,15 +764,15 @@ class HiddenMarkovModelTagger(TaggerI):
  
          # initialise the backward values;
          # "1" is an arbitrarily chosen value from Rabiner tutorial
-        beta[T-1, :] = np.log2(1)
+        beta[T - 1, :] = np.log2(1)
  
          # inductively calculate remaining backward values
-        for t in range(T-2, -1, -1):
-            symbol = unlabeled_sequence[t+1][_TEXT]
+        for t in range(T - 2, -1, -1):
+            symbol = unlabeled_sequence[t + 1][_TEXT]
              outputs = self._outputs_vector(symbol)
  
              for i in range(N):
-                summand = transitions_logprob[i] + beta[t+1] + outputs
+                summand = transitions_logprob[i] + beta[t + 1] + outputs
                  beta[t, i] = logsumexp2(summand)
  
          return beta
@@ -784,33 +802,37 @@ class HiddenMarkovModelTagger(TaggerI):
  
          if verbose:
              for test_sent, predicted_sent in zip(test_sequence, predicted_sequence):
-                print('Test:',
-                    ' '.join('%s/%s' % (token, tag)
-                             for (token, tag) in test_sent))
+                print(
+                    "Test:",
+                    " ".join("%s/%s" % (token, tag) for (token, tag) in test_sent),
+                )
                  print()
-                print('Untagged:',
-                    ' '.join("%s" % token for (token, tag) in test_sent))
+                print("Untagged:", " ".join("%s" % token for (token, tag) in test_sent))
                  print()
-                print('HMM-tagged:',
-                    ' '.join('%s/%s' % (token, tag)
-                              for (token, tag) in predicted_sent))
+                print(
+                    "HMM-tagged:",
+                    " ".join("%s/%s" % (token, tag) for (token, tag) in predicted_sent),
+                )
                  print()
-                print('Entropy:',
-                    self.entropy([(token, None) for
-                                  (token, tag) in predicted_sent]))
+                print(
+                    "Entropy:",
+                    self.entropy([(token, None) for (token, tag) in predicted_sent]),
+                )
                  print()
-                print('-' * 60)
+                print("-" * 60)
  
          test_tags = flatten(map(tags, test_sequence))
          predicted_tags = flatten(map(tags, predicted_sequence))
  
          acc = accuracy(test_tags, predicted_tags)
          count = sum(len(sent) for sent in test_sequence)
-        print('accuracy over %d tokens: %.2f' % (count, acc * 100))
+        print("accuracy over %d tokens: %.2f" % (count, acc * 100))
  
      def __repr__(self):
-        return ('<HiddenMarkovModelTagger %d states and %d output symbols>'
-                % (len(self._states), len(self._symbols)))
+        return "<HiddenMarkovModelTagger %d states and %d output symbols>" % (
+            len(self._states),
+            len(self._symbols),
+        )
  
  
  class HiddenMarkovModelTrainer(object):
@@ -828,12 +850,12 @@ class HiddenMarkovModelTrainer(object):
      :param symbols: the set of observation symbols
      :type symbols:  sequence of any
      """
+
      def __init__(self, states=None, symbols=None):
-        self._states = (states if states else [])
-        self._symbols = (symbols if symbols else [])
+        self._states = states if states else []
+        self._symbols = symbols if symbols else []
  
-    def train(self, labeled_sequences=None, unlabeled_sequences=None,
-              **kwargs):
+    def train(self, labeled_sequences=None, unlabeled_sequences=None, **kwargs):
          """
          Trains the HMM using both (or either of) supervised and unsupervised
          techniques.
@@ -842,9 +864,11 @@ class HiddenMarkovModelTrainer(object):
          :rtype: HiddenMarkovModelTagger
          :param labelled_sequences: the supervised training data, a set of
              labelled sequences of observations
+            ex: [ (word_1, tag_1),...,(word_n,tag_n) ]
          :type labelled_sequences: list
          :param unlabeled_sequences: the unsupervised training data, a set of
              sequences of observations
+            ex: [ word_1, ..., word_n ]
          :type unlabeled_sequences: list
          :param kwargs: additional arguments to pass to the training methods
          """
@@ -853,11 +877,11 @@ class HiddenMarkovModelTrainer(object):
          if labeled_sequences:
              model = self.train_supervised(labeled_sequences, **kwargs)
          if unlabeled_sequences:
-            if model: kwargs['model'] = model
+            if model:
+                kwargs["model"] = model
              model = self.train_unsupervised(unlabeled_sequences, **kwargs)
          return model
  
-
      def _baum_welch_step(self, sequence, model, symbol_to_number):
  
          N = len(model._states)
@@ -869,7 +893,7 @@ class HiddenMarkovModelTrainer(object):
          beta = model._backward_probability(sequence)
  
          # find the log probability of the sequence
-        lpk = logsumexp2(alpha[T-1])
+        lpk = logsumexp2(alpha[T - 1])
  
          A_numer = _ninf_array((N, N))
          B_numer = _ninf_array((N, M))
@@ -882,26 +906,29 @@ class HiddenMarkovModelTrainer(object):
              symbol = sequence[t][_TEXT]  # not found? FIXME
              next_symbol = None
              if t < T - 1:
-                next_symbol = sequence[t+1][_TEXT]  # not found? FIXME
+                next_symbol = sequence[t + 1][_TEXT]  # not found? FIXME
              xi = symbol_to_number[symbol]
  
              next_outputs_logprob = model._outputs_vector(next_symbol)
              alpha_plus_beta = alpha[t] + beta[t]
  
              if t < T - 1:
-                numer_add = transitions_logprob + next_outputs_logprob + \
-                            beta[t+1] + alpha[t].reshape(N, 1)
+                numer_add = (
+                    transitions_logprob
+                    + next_outputs_logprob
+                    + beta[t + 1]
+                    + alpha[t].reshape(N, 1)
+                )
                  A_numer = np.logaddexp2(A_numer, numer_add)
                  A_denom = np.logaddexp2(A_denom, alpha_plus_beta)
              else:
                  B_denom = np.logaddexp2(A_denom, alpha_plus_beta)
  
-            B_numer[:,xi] = np.logaddexp2(B_numer[:,xi], alpha_plus_beta)
+            B_numer[:, xi] = np.logaddexp2(B_numer[:, xi], alpha_plus_beta)
  
          return lpk, A_numer, A_denom, B_numer, B_denom
  
-    def train_unsupervised(self, unlabeled_sequences, update_outputs=True,
-                           **kwargs):
+    def train_unsupervised(self, unlabeled_sequences, update_outputs=True, **kwargs):
          """
          Trains the HMM using the Baum-Welch algorithm to maximise the
          probability of the data sequence. This is a variant of the EM
@@ -927,17 +954,18 @@ class HiddenMarkovModelTrainer(object):
  
          # create a uniform HMM, which will be iteratively refined, unless
          # given an existing model
-        model = kwargs.get('model')
+        model = kwargs.get("model")
          if not model:
              priors = RandomProbDist(self._states)
              transitions = DictionaryConditionalProbDist(
-                            dict((state, RandomProbDist(self._states))
-                                  for state in self._states))
+                dict((state, RandomProbDist(self._states)) for state in self._states)
+            )
              outputs = DictionaryConditionalProbDist(
-                            dict((state, RandomProbDist(self._symbols))
-                                  for state in self._states))
-            model = HiddenMarkovModelTagger(self._symbols, self._states,
-                            transitions, outputs, priors)
+                dict((state, RandomProbDist(self._symbols)) for state in self._states)
+            )
+            model = HiddenMarkovModelTagger(
+                self._symbols, self._states, transitions, outputs, priors
+            )
  
          self._states = model._states
          self._symbols = model._symbols
@@ -950,13 +978,19 @@ class HiddenMarkovModelTrainer(object):
          # model._priors = MutableProbDist(model._priors, self._states)
  
          model._transitions = DictionaryConditionalProbDist(
-            dict((s, MutableProbDist(model._transitions[s], self._states))
-                 for s in self._states))
+            dict(
+                (s, MutableProbDist(model._transitions[s], self._states))
+                for s in self._states
+            )
+        )
  
          if update_outputs:
              model._outputs = DictionaryConditionalProbDist(
-                dict((s, MutableProbDist(model._outputs[s], self._symbols))
-                     for s in self._states))
+                dict(
+                    (s, MutableProbDist(model._outputs[s], self._symbols))
+                    for s in self._states
+                )
+            )
  
          model.reset_cache()
  
@@ -964,8 +998,8 @@ class HiddenMarkovModelTrainer(object):
          converged = False
          last_logprob = None
          iteration = 0
-        max_iterations = kwargs.get('max_iterations', 1000)
-        epsilon = kwargs.get('convergence_logprob', 1e-6)
+        max_iterations = kwargs.get("max_iterations", 1000)
+        epsilon = kwargs.get("convergence_logprob", 1e-6)
  
          while not converged and iteration < max_iterations:
              A_numer = _ninf_array((N, N))
@@ -979,16 +1013,21 @@ class HiddenMarkovModelTrainer(object):
                  if not sequence:
                      continue
  
-                (lpk, seq_A_numer, seq_A_denom,
-                seq_B_numer, seq_B_denom) = self._baum_welch_step(sequence, model, symbol_numbers)
+                (
+                    lpk,
+                    seq_A_numer,
+                    seq_A_denom,
+                    seq_B_numer,
+                    seq_B_denom,
+                ) = self._baum_welch_step(sequence, model, symbol_numbers)
  
                  # add these sums to the global A and B values
                  for i in range(N):
-                    A_numer[i] = np.logaddexp2(A_numer[i], seq_A_numer[i]-lpk)
-                    B_numer[i] = np.logaddexp2(B_numer[i], seq_B_numer[i]-lpk)
+                    A_numer[i] = np.logaddexp2(A_numer[i], seq_A_numer[i] - lpk)
+                    B_numer[i] = np.logaddexp2(B_numer[i], seq_B_numer[i] - lpk)
  
-                A_denom = np.logaddexp2(A_denom, seq_A_denom-lpk)
-                B_denom = np.logaddexp2(B_denom, seq_B_denom-lpk)
+                A_denom = np.logaddexp2(A_denom, seq_A_denom - lpk)
+                B_denom = np.logaddexp2(B_denom, seq_B_denom - lpk)
  
                  logprob += lpk
  
@@ -1025,7 +1064,7 @@ class HiddenMarkovModelTrainer(object):
              if iteration > 0 and abs(logprob - last_logprob) < epsilon:
                  converged = True
  
-            print('iteration', iteration, 'logprob', logprob)
+            print("iteration", iteration, "logprob", logprob)
              iteration += 1
              last_logprob = logprob
  
@@ -1100,7 +1139,7 @@ def _ninf_array(shape):
  
  def logsumexp2(arr):
      max_ = arr.max()
-    return np.log2(np.sum(2**(arr - max_))) + max_
+    return np.log2(np.sum(2 ** (arr - max_))) + max_
  
  
  def _log_add(*values):
@@ -1111,7 +1150,7 @@ def _log_add(*values):
      if x > -np.inf:
          sum_diffs = 0
          for value in values:
-            sum_diffs += 2**(value - x)
+            sum_diffs += 2 ** (value - x)
          return x + np.log2(sum_diffs)
      else:
          return x
@@ -1131,16 +1170,17 @@ def _create_hmm_tagger(states, symbols, A, B, pi):
      A = cpd(A, states, states)
      B = cpd(B, states, symbols)
      pi = pd(pi, states)
-    return HiddenMarkovModelTagger(symbols=symbols, states=states,
-                                   transitions=A, outputs=B, priors=pi)
+    return HiddenMarkovModelTagger(
+        symbols=symbols, states=states, transitions=A, outputs=B, priors=pi
+    )
  
  
  def _market_hmm_example():
      """
      Return an example HMM (described at page 381, Huang et al)
      """
-    states = ['bull', 'bear', 'static']
-    symbols = ['up', 'down', 'unchanged']
+    states = ["bull", "bear", "static"]
+    symbols = ["up", "down", "unchanged"]
      A = np.array([[0.6, 0.2, 0.2], [0.5, 0.3, 0.2], [0.4, 0.1, 0.5]], np.float64)
      B = np.array([[0.7, 0.1, 0.2], [0.1, 0.6, 0.3], [0.3, 0.3, 0.4]], np.float64)
      pi = np.array([0.5, 0.2, 0.3], np.float64)
@@ -1158,29 +1198,34 @@ def demo():
  
      model, states, symbols = _market_hmm_example()
  
-    print('Testing', model)
+    print("Testing", model)
  
-    for test in [['up', 'up'], ['up', 'down', 'up'],
-                 ['down'] * 5, ['unchanged'] * 5 + ['up']]:
+    for test in [
+        ["up", "up"],
+        ["up", "down", "up"],
+        ["down"] * 5,
+        ["unchanged"] * 5 + ["up"],
+    ]:
  
          sequence = [(t, None) for t in test]
  
-        print('Testing with state sequence', test)
-        print('probability =', model.probability(sequence))
-        print('tagging =    ', model.tag([word for (word,tag) in sequence]))
-        print('p(tagged) =  ', model.probability(sequence))
-        print('H =          ', model.entropy(sequence))
-        print('H_exh =      ', model._exhaustive_entropy(sequence))
-        print('H(point) =   ', model.point_entropy(sequence))
-        print('H_exh(point)=', model._exhaustive_point_entropy(sequence))
+        print("Testing with state sequence", test)
+        print("probability =", model.probability(sequence))
+        print("tagging =    ", model.tag([word for (word, tag) in sequence]))
+        print("p(tagged) =  ", model.probability(sequence))
+        print("H =          ", model.entropy(sequence))
+        print("H_exh =      ", model._exhaustive_entropy(sequence))
+        print("H(point) =   ", model.point_entropy(sequence))
+        print("H_exh(point)=", model._exhaustive_point_entropy(sequence))
          print()
  
+
  def load_pos(num_sents):
      from nltk.corpus import brown
  
-    sentences = brown.tagged_sents(categories='news')[:num_sents]
+    sentences = brown.tagged_sents(categories="news")[:num_sents]
  
-    tag_re = re.compile(r'[*]|--|[^+*-]+')
+    tag_re = re.compile(r"[*]|--|[^+*-]+")
      tag_set = set()
      symbols = set()
  
@@ -1189,7 +1234,7 @@ def load_pos(num_sents):
          for i in range(len(sentence)):
              word, tag = sentence[i]
              word = word.lower()  # normalize
-            symbols.add(word)    # log this word
+            symbols.add(word)  # log this word
              # Clean up the tag.
              tag = tag_re.match(tag).group()
              tag_set.add(tag)
@@ -1198,6 +1243,7 @@ def load_pos(num_sents):
  
      return cleaned_sentences, list(tag_set), list(symbols)
  
+
  def demo_pos():
      # demonstrates POS tagging using supervised training
  
@@ -1205,30 +1251,35 @@ def demo_pos():
      print("HMM POS tagging demo")
      print()
  
-    print('Training HMM...')
+    print("Training HMM...")
      labelled_sequences, tag_set, symbols = load_pos(20000)
      trainer = HiddenMarkovModelTrainer(tag_set, symbols)
-    hmm = trainer.train_supervised(labelled_sequences[10:],
-                    estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins))
+    hmm = trainer.train_supervised(
+        labelled_sequences[10:],
+        estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins),
+    )
  
-    print('Testing...')
+    print("Testing...")
      hmm.test(labelled_sequences[:10], verbose=True)
  
+
  def _untag(sentences):
      unlabeled = []
      for sentence in sentences:
          unlabeled.append([(token[_TEXT], None) for token in sentence])
      return unlabeled
  
-def demo_pos_bw(test=10, supervised=20, unsupervised=10, verbose=True,
-                max_iterations=5):
+
+def demo_pos_bw(
+    test=10, supervised=20, unsupervised=10, verbose=True, max_iterations=5
+):
      # demonstrates the Baum-Welch algorithm in POS tagging
  
      print()
      print("Baum-Welch demo for POS tagging")
      print()
  
-    print('Training HMM (supervised, %d sentences)...' % supervised)
+    print("Training HMM (supervised, %d sentences)..." % supervised)
  
      sentences, tag_set, symbols = load_pos(test + supervised + unsupervised)
  
@@ -1238,18 +1289,22 @@ def demo_pos_bw(test=10, supervised=20, unsupervised=10, verbose=True,
              symbols.add(token[_TEXT])
  
      trainer = HiddenMarkovModelTrainer(tag_set, list(symbols))
-    hmm = trainer.train_supervised(sentences[test:test+supervised],
-                    estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins))
+    hmm = trainer.train_supervised(
+        sentences[test : test + supervised],
+        estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins),
+    )
  
      hmm.test(sentences[:test], verbose=verbose)
  
-    print('Training (unsupervised, %d sentences)...' % unsupervised)
+    print("Training (unsupervised, %d sentences)..." % unsupervised)
      # it's rather slow - so only use 10 samples by default
-    unlabeled = _untag(sentences[test+supervised:])
-    hmm = trainer.train_unsupervised(unlabeled, model=hmm,
-                                     max_iterations=max_iterations)
+    unlabeled = _untag(sentences[test + supervised :])
+    hmm = trainer.train_unsupervised(
+        unlabeled, model=hmm, max_iterations=max_iterations
+    )
      hmm.test(sentences[:test], verbose=verbose)
  
+
  def demo_bw():
      # demo Baum Welch by generating some sequences and then performing
      # unsupervised training on them
@@ -1263,6 +1318,7 @@ def demo_bw():
      # generate some random sequences
      training = []
      import random
+
      rng = random.Random()
      rng.seed(0)
      for i in range(10):
@@ -1271,5 +1327,4 @@ def demo_bw():
  
      # train on those examples, starting with the model that generated them
      trainer = HiddenMarkovModelTrainer(states, symbols)
-    hmm = trainer.train_unsupervised(training, model=model,
-                                     max_iterations=1000)
+    hmm = trainer.train_unsupervised(training, model=model, max_iterations=1000)
diff --git a/nlp_resource_data/nltk/tag/hmm.pyc b/nlp_resource_data/nltk/tag/hmm.pyc

deleted file mode 100755 (executable)

index 44d684d..0000000

Binary files a/nlp_resource_data/nltk/tag/hmm.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/hunpos.py b/nlp_resource_data/nltk/tag/hunpos.py

old mode 100755 (executable)

new mode 100644 (file)

index e81b180..3053e8c
--- a/nlp_resource_data/nltk/tag/hunpos.py
+++ b/nlp_resource_data/nltk/tag/hunpos.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Interface to the HunPos POS-tagger
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
  #         Dávid Márk Nemeskey <nemeskeyd@gmail.com> (modifications)
  #         Attila Zséder <zseder@gmail.com> (modifications)
@@ -15,16 +15,15 @@ A module for interfacing with the HunPos open-source POS-tagger.
  import os
  from subprocess import Popen, PIPE
  
-from six import text_type
-
  from nltk.internals import find_binary, find_file
  from nltk.tag.api import TaggerI
  
-_hunpos_url = 'http://code.google.com/p/hunpos/'
+_hunpos_url = "http://code.google.com/p/hunpos/"
  
-_hunpos_charset = 'ISO-8859-1'
+_hunpos_charset = "ISO-8859-1"
  """The default encoding used by hunpos: ISO-8859-1."""
  
+
  class HunposTagger(TaggerI):
      """
      A class for pos tagging with HunPos. The input is the paths to:
@@ -51,8 +50,9 @@ class HunposTagger(TaggerI):
          [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'NN'), ('swallow', 'VB'), ('?', '.')]
      """
  
-    def __init__(self, path_to_model, path_to_bin=None,
-                 encoding=_hunpos_charset, verbose=False):
+    def __init__(
+        self, path_to_model, path_to_bin=None, encoding=_hunpos_charset, verbose=False
+    ):
          """
          Starts the hunpos-tag executable and establishes a connection with it.
  
@@ -67,23 +67,37 @@ class HunposTagger(TaggerI):
              The caller must ensure that tokens are encoded in the right charset.
          """
          self._closed = True
-        hunpos_paths = ['.', '/usr/bin', '/usr/local/bin', '/opt/local/bin',
-                        '/Applications/bin', '~/bin', '~/Applications/bin']
+        hunpos_paths = [
+            ".",
+            "/usr/bin",
+            "/usr/local/bin",
+            "/opt/local/bin",
+            "/Applications/bin",
+            "~/bin",
+            "~/Applications/bin",
+        ]
          hunpos_paths = list(map(os.path.expanduser, hunpos_paths))
  
          self._hunpos_bin = find_binary(
-            'hunpos-tag', path_to_bin,
-            env_vars=('HUNPOS_TAGGER',),
+            "hunpos-tag",
+            path_to_bin,
+            env_vars=("HUNPOS_TAGGER",),
              searchpath=hunpos_paths,
              url=_hunpos_url,
-            verbose=verbose
+            verbose=verbose,
          )
  
          self._hunpos_model = find_file(
-            path_to_model, env_vars=('HUNPOS_TAGGER',), verbose=verbose)
+            path_to_model, env_vars=("HUNPOS_TAGGER",), verbose=verbose
+        )
          self._encoding = encoding
-        self._hunpos = Popen([self._hunpos_bin, self._hunpos_model],
-                             shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+        self._hunpos = Popen(
+            [self._hunpos_bin, self._hunpos_model],
+            shell=False,
+            stdin=PIPE,
+            stdout=PIPE,
+            stderr=PIPE,
+        )
          self._closed = False
  
      def __del__(self):
@@ -97,6 +111,7 @@ class HunposTagger(TaggerI):
  
      def __enter__(self):
          return self
+
      def __exit__(self, exc_type, exc_value, traceback):
          self.close()
  
@@ -106,7 +121,7 @@ class HunposTagger(TaggerI):
          """
          for token in tokens:
              assert "\n" not in token, "Tokens should not contain newlines"
-            if isinstance(token, text_type):
+            if isinstance(token, str):
                  token = token.encode(self._encoding)
              self._hunpos.stdin.write(token + b"\n")
          # We write a final empty line to tell hunpos that the sentence is finished:
@@ -116,17 +131,19 @@ class HunposTagger(TaggerI):
          tagged_tokens = []
          for token in tokens:
              tagged = self._hunpos.stdout.readline().strip().split(b"\t")
-            tag = (tagged[1] if len(tagged) > 1 else None)
+            tag = tagged[1] if len(tagged) > 1 else None
              tagged_tokens.append((token, tag))
          # We have to read (and dismiss) the final empty line:
          self._hunpos.stdout.readline()
  
          return tagged_tokens
  
+
  # skip doctests if Hunpos tagger is not installed
  def setup_module(module):
      from nose import SkipTest
+
      try:
-        HunposTagger('en_wsj.model')
+        HunposTagger("en_wsj.model")
      except LookupError:
          raise SkipTest("HunposTagger is not available")
diff --git a/nlp_resource_data/nltk/tag/hunpos.pyc b/nlp_resource_data/nltk/tag/hunpos.pyc

deleted file mode 100755 (executable)

index 79372b2..0000000

Binary files a/nlp_resource_data/nltk/tag/hunpos.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/mapping.py b/nlp_resource_data/nltk/tag/mapping.py

old mode 100755 (executable)

new mode 100644 (file)

index fc37dbc..9dedbeb
--- a/nlp_resource_data/nltk/tag/mapping.py
+++ b/nlp_resource_data/nltk/tag/mapping.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tagset Mapping
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Nathan Schneider <nathan@cmu.edu>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
@@ -29,37 +29,51 @@ X - other: foreign words, typos, abbreviations
  
  """
  
-from __future__ import print_function, unicode_literals, division
  from collections import defaultdict
  from os.path import join
  
  from nltk.data import load
  
  _UNIVERSAL_DATA = "taggers/universal_tagset"
-_UNIVERSAL_TAGS = ('VERB','NOUN','PRON','ADJ','ADV','ADP','CONJ','DET','NUM','PRT','X','.')
+_UNIVERSAL_TAGS = (
+    "VERB",
+    "NOUN",
+    "PRON",
+    "ADJ",
+    "ADV",
+    "ADP",
+    "CONJ",
+    "DET",
+    "NUM",
+    "PRT",
+    "X",
+    ".",
+)
  
  # _MAPPINGS = defaultdict(lambda: defaultdict(dict))
  # the mapping between tagset T1 and T2 returns UNK if appied to an unrecognized tag
-_MAPPINGS = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 'UNK')))
+_MAPPINGS = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: "UNK")))
  
  
  def _load_universal_map(fileid):
-    contents = load(join(_UNIVERSAL_DATA, fileid+'.map'), format="text")
+    contents = load(join(_UNIVERSAL_DATA, fileid + ".map"), format="text")
  
      # When mapping to the Universal Tagset,
      # map unknown inputs to 'X' not 'UNK'
-    _MAPPINGS[fileid]['universal'].default_factory = lambda: 'X'
+    _MAPPINGS[fileid]["universal"].default_factory = lambda: "X"
  
      for line in contents.splitlines():
          line = line.strip()
-        if line == '':
+        if line == "":
              continue
-        fine, coarse = line.split('\t')
+        fine, coarse = line.split("\t")
  
-        assert coarse in _UNIVERSAL_TAGS, 'Unexpected coarse tag: {}'.format(coarse)
-        assert fine not in _MAPPINGS[fileid]['universal'], 'Multiple entries for original tag: {}'.format(fine)
+        assert coarse in _UNIVERSAL_TAGS, "Unexpected coarse tag: {}".format(coarse)
+        assert (
+            fine not in _MAPPINGS[fileid]["universal"]
+        ), "Multiple entries for original tag: {}".format(fine)
  
-        _MAPPINGS[fileid]['universal'][fine] = coarse
+        _MAPPINGS[fileid]["universal"][fine] = coarse
  
  
  def tagset_mapping(source, target):
@@ -73,10 +87,33 @@ def tagset_mapping(source, target):
      """
  
      if source not in _MAPPINGS or target not in _MAPPINGS[source]:
-        if target == 'universal':
+        if target == "universal":
              _load_universal_map(source)
+            # Added the new Russian National Corpus mappings because the
+            # Russian model for nltk.pos_tag() uses it.
+            _MAPPINGS["ru-rnc-new"]["universal"] = {
+                "A": "ADJ",
+                "A-PRO": "PRON",
+                "ADV": "ADV",
+                "ADV-PRO": "PRON",
+                "ANUM": "ADJ",
+                "CONJ": "CONJ",
+                "INTJ": "X",
+                "NONLEX": ".",
+                "NUM": "NUM",
+                "PARENTH": "PRT",
+                "PART": "PRT",
+                "PR": "ADP",
+                "PRAEDIC": "PRT",
+                "PRAEDIC-PRO": "PRON",
+                "S": "NOUN",
+                "S-PRO": "PRON",
+                "V": "VERB",
+            }
+
      return _MAPPINGS[source][target]
  
+
  def map_tag(source, target, source_tag):
      """
      Maps the tag from the source tagset to the target tagset.
@@ -90,12 +127,10 @@ def map_tag(source, target, source_tag):
      """
  
      # we need a systematic approach to naming
-    if target == 'universal':
-        if source == 'wsj':
-            source = 'en-ptb'
-        if source == 'brown':
-            source = 'en-brown'
+    if target == "universal":
+        if source == "wsj":
+            source = "en-ptb"
+        if source == "brown":
+            source = "en-brown"
  
      return tagset_mapping(source, target)[source_tag]
-
-
diff --git a/nlp_resource_data/nltk/tag/mapping.pyc b/nlp_resource_data/nltk/tag/mapping.pyc

deleted file mode 100755 (executable)

index f582a55..0000000

Binary files a/nlp_resource_data/nltk/tag/mapping.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/perceptron.py b/nlp_resource_data/nltk/tag/perceptron.py

old mode 100755 (executable)

new mode 100644 (file)

index 4cedd8d..1742a59
--- a/nlp_resource_data/nltk/tag/perceptron.py
+++ b/nlp_resource_data/nltk/tag/perceptron.py
@@ -1,6 +1,6 @@
  # -*- coding: utf-8 -*-
  # This module is a port of the Textblob Averaged Perceptron Tagger
-# Author: Matthew Honnibal <honnibal+gh@gmail.com>, 
+# Author: Matthew Honnibal <honnibal+gh@gmail.com>,
  #         Long Duong <longdt219@gmail.com> (NLTK port)
  # URL: <https://github.com/sloria/textblob-aptagger>
  #      <http://nltk.org/>
@@ -9,9 +9,6 @@
  #
  # This module is provided under the terms of the MIT License.
  
-from __future__ import absolute_import
-from __future__ import print_function, division
-
  import random
  from collections import defaultdict
  import pickle
@@ -19,21 +16,30 @@ import logging
  
  from nltk.tag.api import TaggerI
  from nltk.data import find, load
-from nltk.compat import python_2_unicode_compatible
+
+from nltk import jsontags
+
+try:
+    import numpy as np
+except ImportError:
+    pass
  
  PICKLE = "averaged_perceptron_tagger.pickle"
  
-class AveragedPerceptron(object):
+@jsontags.register_tag
+class AveragedPerceptron:
  
-    '''An averaged perceptron, as implemented by Matthew Honnibal.
+    """An averaged perceptron, as implemented by Matthew Honnibal.
  
      See more implementation details here:
          https://explosion.ai/blog/part-of-speech-pos-tagger-in-python
-    '''
+    """
  
-    def __init__(self):
+    json_tag = "nltk.tag.perceptron.AveragedPerceptron"
+
+    def __init__(self, weights=None):
          # Each feature gets its own weight vector, so weights is a dict-of-dicts
-        self.weights = {}
+        self.weights = weights if weights else {}
          self.classes = set()
          # The accumulated values, for the averaging. These will be keyed by
          # feature/clas tuples
@@ -45,8 +51,13 @@ class AveragedPerceptron(object):
          # Number of instances seen
          self.i = 0
  
-    def predict(self, features):
-        '''Dot-product the features and current weights and return the best label.'''
+    def _softmax(self, scores):
+        s = np.fromiter(scores.values(), dtype=float)
+        exps = np.exp(s)
+        return exps / np.sum(exps)
+
+    def predict(self, features, return_conf=False):
+        """Dot-product the features and current weights and return the best label."""
          scores = defaultdict(float)
          for feat, value in features.items():
              if feat not in self.weights or value == 0:
@@ -54,11 +65,17 @@ class AveragedPerceptron(object):
              weights = self.weights[feat]
              for label, weight in weights.items():
                  scores[label] += value * weight
+
          # Do a secondary alphabetic sort, for stability
-        return max(self.classes, key=lambda label: (scores[label], label))
+        best_label = max(self.classes, key=lambda label: (scores[label], label))
+        # compute the confidence
+        conf = max(self._softmax(scores)) if return_conf == True else None
+
+        return best_label, conf
  
      def update(self, truth, guess, features):
-        '''Update the feature weights.'''
+        """Update the feature weights."""
+
          def upd_feat(c, f, w, v):
              param = (f, c)
              self._totals[param] += (self.i - self._tstamps[param]) * w
@@ -74,7 +91,7 @@ class AveragedPerceptron(object):
              upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
  
      def average_weights(self):
-        '''Average weights from all iterations.'''
+        """Average weights from all iterations."""
          for feat, weights in self.weights.items():
              new_feat_weights = {}
              for clas, weight in weights.items():
@@ -87,89 +104,104 @@ class AveragedPerceptron(object):
              self.weights[feat] = new_feat_weights
  
      def save(self, path):
-        '''Save the pickled model weights.'''
-        with open(path, 'wb') as fout:
+        """Save the pickled model weights."""
+        with open(path, "wb") as fout:
              return pickle.dump(dict(self.weights), fout)
  
      def load(self, path):
-        '''Load the pickled model weights.'''
+        """Load the pickled model weights."""
          self.weights = load(path)
  
-@python_2_unicode_compatible
+    def encode_json_obj(self):
+        return self.weights
+
+    @classmethod
+    def decode_json_obj(cls, obj):
+        return cls(obj)
+
+
+@jsontags.register_tag
  class PerceptronTagger(TaggerI):
  
-    '''
+    """
      Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal.
      See more implementation details here:
          https://explosion.ai/blog/part-of-speech-pos-tagger-in-python
-    
+
      >>> from nltk.tag.perceptron import PerceptronTagger
  
-    Train the model 
-    
+    Train the model
+
      >>> tagger = PerceptronTagger(load=False)
-    
+
      >>> tagger.train([[('today','NN'),('is','VBZ'),('good','JJ'),('day','NN')],
      ... [('yes','NNS'),('it','PRP'),('beautiful','JJ')]])
-    
+
      >>> tagger.tag(['today','is','a','beautiful','day'])
      [('today', 'NN'), ('is', 'PRP'), ('a', 'PRP'), ('beautiful', 'JJ'), ('day', 'NN')]
-    
-    Use the pretrain model (the default constructor) 
-    
+
+    Use the pretrain model (the default constructor)
+
      >>> pretrain = PerceptronTagger()
-    
+
      >>> pretrain.tag('The quick brown fox jumps over the lazy dog'.split())
      [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN')]
-    
+
      >>> pretrain.tag("The red cat".split())
      [('The', 'DT'), ('red', 'JJ'), ('cat', 'NN')]
-    '''
+    """
+
+    json_tag = "nltk.tag.sequential.PerceptronTagger" 
+
+    START = ["-START-", "-START2-"]
+    END = ["-END-", "-END2-"]
  
-    START = ['-START-', '-START2-']
-    END = ['-END-', '-END2-']
-    
      def __init__(self, load=True):
-        '''
+        """
          :param load: Load the pickled model upon instantiation.
-        '''
+        """
          self.model = AveragedPerceptron()
          self.tagdict = {}
          self.classes = set()
          if load:
-            AP_MODEL_LOC = 'file:'+str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
+            AP_MODEL_LOC = "file:" + str(
+                find("taggers/averaged_perceptron_tagger/" + PICKLE)
+            )
              self.load(AP_MODEL_LOC)
  
-    def tag(self, tokens):
-        '''
+    def tag(self, tokens, return_conf=False, use_tagdict=True):
+        """
          Tag tokenized sentences.
          :params tokens: list of word
          :type tokens: list(str)
-        '''
+        """
          prev, prev2 = self.START
          output = []
-        
+
          context = self.START + [self.normalize(w) for w in tokens] + self.END
          for i, word in enumerate(tokens):
-            tag = self.tagdict.get(word)
+            tag, conf = (
+                (self.tagdict.get(word), 1.0) if use_tagdict == True else (None, None)
+            )
              if not tag:
                  features = self._get_features(i, word, context, prev, prev2)
-                tag = self.model.predict(features)
-            output.append((word, tag))
+                tag, conf = self.model.predict(features, return_conf)
+            output.append((word, tag, conf) if return_conf == True else (word, tag))
+
              prev2 = prev
              prev = tag
  
          return output
  
      def train(self, sentences, save_loc=None, nr_iter=5):
-        '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
+        """Train a model from sentences, and save it at ``save_loc``. ``nr_iter``
          controls the number of Perceptron training iterations.
  
          :param sentences: A list or iterator of sentences, where each sentence
              is a list of (words, tags) tuples.
          :param save_loc: If not ``None``, saves a pickled model in this location.
          :param nr_iter: Number of training iterations.
-        '''
+        """
          # We'd like to allow ``sentences`` to be either a list or an iterator,
          # the latter being especially important for a large training dataset.
          # Because ``self._make_tagdict(sentences)`` runs regardless, we make
@@ -185,15 +217,14 @@ class PerceptronTagger(TaggerI):
              n = 0
              for sentence in self._sentences:
                  words, tags = zip(*sentence)
-                
+
                  prev, prev2 = self.START
-                context = self.START + [self.normalize(w) for w in words] \
-                                                                    + self.END
+                context = self.START + [self.normalize(w) for w in words] + self.END
                  for i, word in enumerate(words):
                      guess = self.tagdict.get(word)
                      if not guess:
                          feats = self._get_features(i, word, context, prev, prev2)
-                        guess = self.model.predict(feats)
+                        guess, _ = self.model.predict(feats)
                          self.model.update(tags[i], guess, feats)
                      prev2 = prev
                      prev = guess
@@ -209,71 +240,81 @@ class PerceptronTagger(TaggerI):
          self.model.average_weights()
          # Pickle as a binary file
          if save_loc is not None:
-            with open(save_loc, 'wb') as fout:
+            with open(save_loc, "wb") as fout:
                  # changed protocol from -1 to 2 to make pickling Python 2 compatible
                  pickle.dump((self.model.weights, self.tagdict, self.classes), fout, 2)
-        
  
      def load(self, loc):
-        '''
+        """
          :param loc: Load a pickled model at location.
-        :type loc: str 
-        '''
+        :type loc: str
+        """
  
          self.model.weights, self.tagdict, self.classes = load(loc)
          self.model.classes = self.classes
-        
+
+    def encode_json_obj(self):
+        return self.model.weights, self.tagdict, list(self.classes)
+
+    @classmethod
+    def decode_json_obj(cls, obj):
+        tagger = cls(load=False)
+        tagger.model.weights, tagger.tagdict, tagger.classes = obj
+        tagger.classes = set(tagger.classes)
+        tagger.model.classes = tagger.classes
+        return tagger
  
      def normalize(self, word):
-        '''
+        """
          Normalization used in pre-processing.
          - All words are lower cased
          - Groups of digits of length 4 are represented as !YEAR;
          - Other digits are represented as !DIGITS
  
          :rtype: str
-        '''
-        if '-' in word and word[0] != '-':
-            return '!HYPHEN'
+        """
+        if "-" in word and word[0] != "-":
+            return "!HYPHEN"
          elif word.isdigit() and len(word) == 4:
-            return '!YEAR'
+            return "!YEAR"
          elif word[0].isdigit():
-            return '!DIGITS'
+            return "!DIGITS"
          else:
              return word.lower()
  
      def _get_features(self, i, word, context, prev, prev2):
-        '''Map tokens into a feature representation, implemented as a
+        """Map tokens into a feature representation, implemented as a
          {hashable: int} dict. If the features change, a new model must be
          trained.
-        '''
+        """
+
          def add(name, *args):
-            features[' '.join((name,) + tuple(args))] += 1
+            features[" ".join((name,) + tuple(args))] += 1
  
          i += len(self.START)
          features = defaultdict(int)
          # It's useful to have a constant feature, which acts sort of like a prior
-        add('bias')
-        add('i suffix', word[-3:])
-        add('i pref1', word[0])
-        add('i-1 tag', prev)
-        add('i-2 tag', prev2)
-        add('i tag+i-2 tag', prev, prev2)
-        add('i word', context[i])
-        add('i-1 tag+i word', prev, context[i])
-        add('i-1 word', context[i-1])
-        add('i-1 suffix', context[i-1][-3:])
-        add('i-2 word', context[i-2])
-        add('i+1 word', context[i+1])
-        add('i+1 suffix', context[i+1][-3:])
-        add('i+2 word', context[i+2])
+        add("bias")
+        add("i suffix", word[-3:])
+        add("i pref1", word[0])
+        add("i-1 tag", prev)
+        add("i-2 tag", prev2)
+        add("i tag+i-2 tag", prev, prev2)
+        add("i word", context[i])
+        add("i-1 tag+i word", prev, context[i])
+        add("i-1 word", context[i - 1])
+        add("i-1 suffix", context[i - 1][-3:])
+        add("i-2 word", context[i - 2])
+        add("i+1 word", context[i + 1])
+        add("i+1 suffix", context[i + 1][-3:])
+        add("i+2 word", context[i + 2])
          return features
  
      def _make_tagdict(self, sentences):
-        '''
+        """
          Make a tag dictionary for single-tag words.
          :param sentences: A list of list of (word, tag) tuples.
-        '''
+        """
          counts = defaultdict(lambda: defaultdict(int))
          for sentence in sentences:
              self._sentences.append(sentence)
@@ -294,36 +335,39 @@ class PerceptronTagger(TaggerI):
  def _pc(n, d):
      return (n / d) * 100
  
+
  def _load_data_conll_format(filename):
-    print ('Read from file: ', filename)
-    with open(filename,'rb') as fin:
+    print("Read from file: ", filename)
+    with open(filename, "rb") as fin:
          sentences = []
          sentence = []
          for line in fin.readlines():
              line = line.strip()
-            #print line
-            if len(line) ==0:
+            # print line
+            if len(line) == 0:
                  sentences.append(sentence)
                  sentence = []
                  continue
-            tokens = line.split('\t')
+            tokens = line.split("\t")
              word = tokens[1]
              tag = tokens[4]
-            sentence.append((word,tag)) 
+            sentence.append((word, tag))
          return sentences
  
+
  def _get_pretrain_model():
      # Train and test on English part of ConLL data (WSJ part of Penn Treebank)
-    # Train: section 2-11 
+    # Train: section 2-11
      # Test : section 23
      tagger = PerceptronTagger()
-    training = _load_data_conll_format('english_ptb_train.conll')
-    testing = _load_data_conll_format('english_ptb_test.conll')
-    print ('Size of training and testing (sentence)', len(training), len(testing))
-    # Train and save the model 
-    tagger.train(training, PICKLE) 
-    print ('Accuracy : ',tagger.evaluate(testing))
-    
-if __name__ == '__main__':
-    #_get_pretrain_model()
+    training = _load_data_conll_format("english_ptb_train.conll")
+    testing = _load_data_conll_format("english_ptb_test.conll")
+    print("Size of training and testing (sentence)", len(training), len(testing))
+    # Train and save the model
+    tagger.train(training, PICKLE)
+    print("Accuracy : ", tagger.evaluate(testing))
+
+
+if __name__ == "__main__":
+    # _get_pretrain_model()
      pass
diff --git a/nlp_resource_data/nltk/tag/perceptron.pyc b/nlp_resource_data/nltk/tag/perceptron.pyc

deleted file mode 100755 (executable)

index 764210e..0000000

Binary files a/nlp_resource_data/nltk/tag/perceptron.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/senna.py b/nlp_resource_data/nltk/tag/senna.py

old mode 100755 (executable)

new mode 100644 (file)

index c74ec94..5231d25
--- a/nlp_resource_data/nltk/tag/senna.py
+++ b/nlp_resource_data/nltk/tag/senna.py
@@ -1,7 +1,7 @@
  # encoding: utf-8
  # Natural Language Toolkit: Senna POS Tagger
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Rami Al-Rfou' <ralrfou@cs.stonybrook.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -39,13 +39,13 @@ Note: Unit tests for this module can be found in test/unit/test_senna.py
      ('NY', 'B-LOC'), (',', 'O'), ('USA', 'B-LOC'), ('.', 'O')]
  """
  
-from nltk.compat import python_2_unicode_compatible
  from nltk.classify import Senna
  
-@python_2_unicode_compatible
+
+
  class SennaTagger(Senna):
-    def __init__(self, path, encoding='utf-8'):
-        super(SennaTagger, self).__init__(path, ['pos'], encoding)
+    def __init__(self, path, encoding="utf-8"):
+        super(SennaTagger, self).__init__(path, ["pos"], encoding)
  
      def tag_sents(self, sentences):
          """
@@ -56,13 +56,14 @@ class SennaTagger(Senna):
          for i in range(len(tagged_sents)):
              for j in range(len(tagged_sents[i])):
                  annotations = tagged_sents[i][j]
-                tagged_sents[i][j] = (annotations['word'], annotations['pos'])
+                tagged_sents[i][j] = (annotations["word"], annotations["pos"])
          return tagged_sents
  
-@python_2_unicode_compatible
+
+
  class SennaChunkTagger(Senna):
-    def __init__(self, path, encoding='utf-8'):
-        super(SennaChunkTagger, self).__init__(path, ['chk'], encoding)
+    def __init__(self, path, encoding="utf-8"):
+        super(SennaChunkTagger, self).__init__(path, ["chk"], encoding)
  
      def tag_sents(self, sentences):
          """
@@ -73,7 +74,7 @@ class SennaChunkTagger(Senna):
          for i in range(len(tagged_sents)):
              for j in range(len(tagged_sents[i])):
                  annotations = tagged_sents[i][j]
-                tagged_sents[i][j] = (annotations['word'], annotations['chk'])
+                tagged_sents[i][j] = (annotations["word"], annotations["chk"])
          return tagged_sents
  
      def bio_to_chunks(self, tagged_sent, chunk_type):
@@ -104,24 +105,24 @@ class SennaChunkTagger(Senna):
          current_chunk_position = []
          for idx, word_pos in enumerate(tagged_sent):
              word, pos = word_pos
-            if '-'+chunk_type in pos: # Append the word to the current_chunk.
+            if "-" + chunk_type in pos:  # Append the word to the current_chunk.
                  current_chunk.append((word))
                  current_chunk_position.append((idx))
              else:
-                if current_chunk: # Flush the full chunk when out of an NP.
-                    _chunk_str = ' '.join(current_chunk)
-                    _chunk_pos_str = '-'.join(map(str, current_chunk_position))
+                if current_chunk:  # Flush the full chunk when out of an NP.
+                    _chunk_str = " ".join(current_chunk)
+                    _chunk_pos_str = "-".join(map(str, current_chunk_position))
                      yield _chunk_str, _chunk_pos_str
                      current_chunk = []
                      current_chunk_position = []
-        if current_chunk: # Flush the last chunk.
-            yield ' '.join(current_chunk), '-'.join(map(str, current_chunk_position))
+        if current_chunk:  # Flush the last chunk.
+            yield " ".join(current_chunk), "-".join(map(str, current_chunk_position))
+
  
  
-@python_2_unicode_compatible
  class SennaNERTagger(Senna):
-    def __init__(self, path, encoding='utf-8'):
-        super(SennaNERTagger, self).__init__(path, ['ner'], encoding)
+    def __init__(self, path, encoding="utf-8"):
+        super(SennaNERTagger, self).__init__(path, ["ner"], encoding)
  
      def tag_sents(self, sentences):
          """
@@ -132,16 +133,15 @@ class SennaNERTagger(Senna):
          for i in range(len(tagged_sents)):
              for j in range(len(tagged_sents[i])):
                  annotations = tagged_sents[i][j]
-                tagged_sents[i][j] = (annotations['word'], annotations['ner'])
+                tagged_sents[i][j] = (annotations["word"], annotations["ner"])
          return tagged_sents
  
  
-
  # skip doctests if Senna is not installed
  def setup_module(module):
      from nose import SkipTest
+
      try:
-        tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner'])
+        tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"])
      except OSError:
          raise SkipTest("Senna executable not found")
-
diff --git a/nlp_resource_data/nltk/tag/senna.pyc b/nlp_resource_data/nltk/tag/senna.pyc

deleted file mode 100755 (executable)

index 57ac951..0000000

Binary files a/nlp_resource_data/nltk/tag/senna.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/sequential.py b/nlp_resource_data/nltk/tag/sequential.py

old mode 100755 (executable)

new mode 100644 (file)

index 3cdcd05..e49d3ad
--- a/nlp_resource_data/nltk/tag/sequential.py
+++ b/nlp_resource_data/nltk/tag/sequential.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Sequential Backoff Taggers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  #         Tiago Tresoldi <tresoldi@users.sf.net> (original affix tagger)
@@ -17,14 +17,13 @@ determine a tag for the specified token, then its backoff tagger is
  consulted instead.  Any SequentialBackoffTagger may serve as a
  backoff tagger for any other SequentialBackoffTagger.
  """
-from __future__ import print_function, unicode_literals
+import ast
  from abc import abstractmethod
  
  import re
  
  from nltk.probability import ConditionalFreqDist
  from nltk.classify import NaiveBayesClassifier
-from nltk.compat import python_2_unicode_compatible
  
  from nltk.tag.api import TaggerI, FeaturesetTaggerI
  
@@ -45,6 +44,7 @@ class SequentialBackoffTagger(TaggerI):
      :ivar _taggers: A list of all the taggers that should be tried to
          tag a token (i.e., self and its backoff taggers).
      """
+
      def __init__(self, backoff=None):
          if backoff is None:
              self._taggers = [self]
@@ -105,7 +105,6 @@ class SequentialBackoffTagger(TaggerI):
          """
  
  
-@python_2_unicode_compatible
  class ContextTagger(SequentialBackoffTagger):
      """
      An abstract base class for sequential backoff taggers that choose
@@ -120,13 +119,14 @@ class ContextTagger(SequentialBackoffTagger):
  
      :ivar _context_to_tag: Dictionary mapping contexts to tags.
      """
+
      def __init__(self, context_to_tag, backoff=None):
          """
          :param context_to_tag: A dictionary mapping contexts to tags.
          :param backoff: The backoff tagger that should be used for this tagger.
          """
-        SequentialBackoffTagger.__init__(self, backoff)
-        self._context_to_tag = (context_to_tag if context_to_tag else {})
+        super().__init__(backoff)
+        self._context_to_tag = context_to_tag if context_to_tag else {}
  
      @abstractmethod
      def context(self, tokens, index, history):
@@ -149,7 +149,7 @@ class ContextTagger(SequentialBackoffTagger):
          return len(self._context_to_tag)
  
      def __repr__(self):
-        return '<%s: size=%d>' % (self.__class__.__name__, self.size())
+        return "<{}: size={}>".format(self.__class__.__name__, self.size())
  
      def _train(self, tagged_corpus, cutoff=0, verbose=False):
          """
@@ -187,9 +187,9 @@ class ContextTagger(SequentialBackoffTagger):
                      continue
                  fd[context][tag] += 1
                  # If the backoff got it wrong, this context is useful:
-                if (self.backoff is None or
-                        tag != self.backoff.tag_one(
-                        tokens, index, tags[:index])):
+                if self.backoff is None or tag != self.backoff.tag_one(
+                    tokens, index, tags[:index]
+                ):
                      useful_contexts.add(context)
  
          # Build the context_to_tag table -- for each context, figure
@@ -207,15 +207,15 @@ class ContextTagger(SequentialBackoffTagger):
              size = len(self._context_to_tag)
              backoff = 100 - (hit_count * 100.0) / token_count
              pruning = 100 - (size * 100.0) / len(fd.conditions())
-            print("[Trained Unigram tagger:", end=' ')
-            print("size=%d, backoff=%.2f%%, pruning=%.2f%%]" % (
-                size, backoff, pruning))
+            print("[Trained Unigram tagger:", end=" ")
+            print("size={}, backoff={:.2f}%, pruning={:.2f}%]".format(size, backoff, pruning))
  
  
  ######################################################################
  # Tagger Classes
  ######################################################################
-@python_2_unicode_compatible
+
+
  @jsontags.register_tag
  class DefaultTagger(SequentialBackoffTagger):
      """
@@ -234,11 +234,11 @@ class DefaultTagger(SequentialBackoffTagger):
      :type tag: str
      """
  
-    json_tag = 'nltk.tag.sequential.DefaultTagger'
+    json_tag = "nltk.tag.sequential.DefaultTagger"
  
      def __init__(self, tag):
          self._tag = tag
-        SequentialBackoffTagger.__init__(self, None)
+        super().__init__(None)
  
      def encode_json_obj(self):
          return self._tag
@@ -252,7 +252,7 @@ class DefaultTagger(SequentialBackoffTagger):
          return self._tag  # ignore token and history
  
      def __repr__(self):
-        return '<DefaultTagger: tag=%s>' % self._tag
+        return "<DefaultTagger: tag={}>".format(self._tag)
  
  
  @jsontags.register_tag
@@ -279,28 +279,46 @@ class NgramTagger(ContextTagger):
          fewer than *cutoff* times, then exclude it from the
          context-to-tag table for the new tagger.
      """
-    json_tag = 'nltk.tag.sequential.NgramTagger'
  
-    def __init__(self, n, train=None, model=None,
-                 backoff=None, cutoff=0, verbose=False):
+    json_tag = "nltk.tag.sequential.NgramTagger"
+
+    def __init__(
+        self, n, train=None, model=None, backoff=None, cutoff=0, verbose=False
+    ):
          self._n = n
          self._check_params(train, model)
  
-        ContextTagger.__init__(self, model, backoff)
+        super().__init__(model, backoff)
  
          if train:
              self._train(train, cutoff, verbose)
  
      def encode_json_obj(self):
-        return self._n, self._context_to_tag, self.backoff
+        _context_to_tag = {repr(k): v for k, v in self._context_to_tag.items()}
+        if "NgramTagger" in self.__class__.__name__:
+            return self._n, _context_to_tag, self.backoff
+        else:
+            return _context_to_tag, self.backoff
  
      @classmethod
      def decode_json_obj(cls, obj):
-        _n, _context_to_tag, backoff = obj
-        return cls(_n, model=_context_to_tag, backoff=backoff)
+        try:
+            _n, _context_to_tag, backoff = obj
+        except ValueError:
+            _context_to_tag, backoff = obj
+
+        if not _context_to_tag:
+            return backoff
+
+        _context_to_tag = {ast.literal_eval(k): v for k, v in _context_to_tag.items()}
+
+        if "NgramTagger" in cls.__name__:
+            return cls(_n, model=_context_to_tag, backoff=backoff)
+        else:
+            return cls(model=_context_to_tag, backoff=backoff)
  
      def context(self, tokens, index, history):
-        tag_context = tuple(history[max(0, index-self._n+1):index])
+        tag_context = tuple(history[max(0, index - self._n + 1) : index])
          return tag_context, tokens[index]
  
  
@@ -317,7 +335,7 @@ class UnigramTagger(NgramTagger):
          >>> test_sent = brown.sents(categories='news')[0]
          >>> unigram_tagger = UnigramTagger(brown.tagged_sents(categories='news')[:500])
          >>> for tok, tag in unigram_tagger.tag(test_sent):
-        ...     print("(%s, %s), " % (tok, tag))
+        ...     print("({}, {}), ".format(tok, tag))
          (The, AT), (Fulton, NP-TL), (County, NN-TL), (Grand, JJ-TL),
          (Jury, NN-TL), (said, VBD), (Friday, NR), (an, AT),
          (investigation, NN), (of, IN), (Atlanta's, NP$), (recent, JJ),
@@ -337,20 +355,10 @@ class UnigramTagger(NgramTagger):
      :type cutoff: int
      """
  
-    json_tag = 'nltk.tag.sequential.UnigramTagger'
-
-    def __init__(self, train=None, model=None,
-                 backoff=None, cutoff=0, verbose=False):
-        NgramTagger.__init__(self, 1, train, model,
-                             backoff, cutoff, verbose)
-
-    def encode_json_obj(self):
-        return self._context_to_tag, self.backoff
+    json_tag = "nltk.tag.sequential.UnigramTagger"
  
-    @classmethod
-    def decode_json_obj(cls, obj):
-        _context_to_tag, backoff = obj
-        return cls(model=_context_to_tag, backoff=backoff)
+    def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False):
+        super().__init__(1, train, model, backoff, cutoff, verbose)
  
      def context(self, tokens, index, history):
          return tokens[index]
@@ -375,20 +383,11 @@ class BigramTagger(NgramTagger):
          in order not to use the backoff tagger
      :type cutoff: int
      """
-    json_tag = 'nltk.tag.sequential.BigramTagger'
  
-    def __init__(self, train=None, model=None,
-                 backoff=None, cutoff=0, verbose=False):
-        NgramTagger.__init__(self, 2, train, model,
-                             backoff, cutoff, verbose)
+    json_tag = "nltk.tag.sequential.BigramTagger"
  
-    def encode_json_obj(self):
-        return self._context_to_tag, self.backoff
-
-    @classmethod
-    def decode_json_obj(cls, obj):
-        _context_to_tag, backoff = obj
-        return cls(model=_context_to_tag, backoff=backoff)
+    def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False):
+        super().__init__(2, train, model, backoff, cutoff, verbose)
  
  
  @jsontags.register_tag
@@ -410,20 +409,11 @@ class TrigramTagger(NgramTagger):
          in order not to use the backoff tagger
      :type cutoff: int
      """
-    json_tag = 'nltk.tag.sequential.TrigramTagger'
  
-    def __init__(self, train=None, model=None,
-                 backoff=None, cutoff=0, verbose=False):
-        NgramTagger.__init__(self, 3, train, model,
-                             backoff, cutoff, verbose)
+    json_tag = "nltk.tag.sequential.TrigramTagger"
  
-    def encode_json_obj(self):
-        return self._context_to_tag, self.backoff
-
-    @classmethod
-    def decode_json_obj(cls, obj):
-        _context_to_tag, backoff = obj
-        return cls(model=_context_to_tag, backoff=backoff)
+    def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False):
+        super().__init__(3, train, model, backoff, cutoff, verbose)
  
  
  @jsontags.register_tag
@@ -446,14 +436,22 @@ class AffixTagger(ContextTagger):
          tag of None by this tagger.
      """
  
-    json_tag = 'nltk.tag.sequential.AffixTagger'
+    json_tag = "nltk.tag.sequential.AffixTagger"
  
-    def __init__(self, train=None, model=None, affix_length=-3,
-                 min_stem_length=2, backoff=None, cutoff=0, verbose=False):
+    def __init__(
+        self,
+        train=None,
+        model=None,
+        affix_length=-3,
+        min_stem_length=2,
+        backoff=None,
+        cutoff=0,
+        verbose=False,
+    ):
  
          self._check_params(train, model)
  
-        ContextTagger.__init__(self, model, backoff)
+        super().__init__(model, backoff)
  
          self._affix_length = affix_length
          self._min_word_length = min_stem_length + abs(affix_length)
@@ -462,7 +460,12 @@ class AffixTagger(ContextTagger):
              self._train(train, cutoff, verbose)
  
      def encode_json_obj(self):
-        return self._affix_length, self._min_word_length, self._context_to_tag, self.backoff
+        return (
+            self._affix_length,
+            self._min_word_length,
+            self._context_to_tag,
+            self.backoff,
+        )
  
      @classmethod
      def decode_json_obj(cls, obj):
@@ -471,7 +474,7 @@ class AffixTagger(ContextTagger):
              affix_length=_affix_length,
              min_stem_length=_min_word_length - abs(_affix_length),
              model=_context_to_tag,
-            backoff=backoff
+            backoff=backoff,
          )
  
      def context(self, tokens, index, history):
@@ -479,12 +482,11 @@ class AffixTagger(ContextTagger):
          if len(token) < self._min_word_length:
              return None
          elif self._affix_length > 0:
-            return token[:self._affix_length]
+            return token[: self._affix_length]
          else:
-            return token[self._affix_length:]
+            return token[self._affix_length :]
  
  
-@python_2_unicode_compatible
  @jsontags.register_tag
  class RegexpTagger(SequentialBackoffTagger):
      """
@@ -528,36 +530,36 @@ class RegexpTagger(SequentialBackoffTagger):
          assigned the tag None.
      """
  
-    json_tag = 'nltk.tag.sequential.RegexpTagger'
+    json_tag = "nltk.tag.sequential.RegexpTagger"
  
      def __init__(self, regexps, backoff=None):
          """
          """
-        SequentialBackoffTagger.__init__(self, backoff)
-        self._regexs = [(re.compile(regexp), tag,) for regexp, tag in regexps]
+        super().__init__(backoff)
+        try:
+            self._regexps = [(re.compile(regexp), tag,) for regexp, tag in regexps]
+        except Exception as e:
+            raise Exception(
+                'Invalid RegexpTagger regexp:', str(e), 'regexp:', regexp, 'tag:', tag)
  
      def encode_json_obj(self):
-        return [(regexp.patten, tag,) for regexp, tag in self._regexs], self.backoff
+        return [(regexp.pattern, tag) for regexp, tag in self._regexps], self.backoff
  
      @classmethod
      def decode_json_obj(cls, obj):
          regexps, backoff = obj
-        self = cls(())
-        self._regexs = [(re.compile(regexp), tag,) for regexp, tag in regexps]
-        SequentialBackoffTagger.__init__(self, backoff)
-        return self
+        return cls(regexps, backoff)
  
      def choose_tag(self, tokens, index, history):
-        for regexp, tag in self._regexs:
+        for regexp, tag in self._regexps:
              if re.match(regexp, tokens[index]):
                  return tag
          return None
  
      def __repr__(self):
-        return '<Regexp Tagger: size=%d>' % len(self._regexs)
+        return "<Regexp Tagger: size={}>".format(len(self._regexps))
  
  
-@python_2_unicode_compatible
  class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI):
      """
      A sequential tagger that uses a classifier to choose the tag for
@@ -600,17 +602,25 @@ class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI):
          back on its backoff tagger if the probability of the most
          likely tag is less than *cutoff_prob*.
      """
-    def __init__(self, feature_detector=None, train=None,
-                 classifier_builder=NaiveBayesClassifier.train,
-                 classifier=None, backoff=None,
-                 cutoff_prob=None, verbose=False):
+
+    def __init__(
+        self,
+        feature_detector=None,
+        train=None,
+        classifier_builder=NaiveBayesClassifier.train,
+        classifier=None,
+        backoff=None,
+        cutoff_prob=None,
+        verbose=False,
+    ):
          self._check_params(train, classifier)
  
-        SequentialBackoffTagger.__init__(self, backoff)
+        super().__init__(backoff)
  
          if (train and classifier) or (not train and not classifier):
-            raise ValueError('Must specify either training data or '
-                             'trained classifier.')
+            raise ValueError(
+                "Must specify either training data or " "trained classifier."
+            )
  
          if feature_detector is not None:
              self._feature_detector = feature_detector
@@ -649,23 +659,22 @@ class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI):
  
          classifier_corpus = []
          if verbose:
-            print('Constructing training corpus for classifier.')
+            print("Constructing training corpus for classifier.")
  
          for sentence in tagged_corpus:
              history = []
              untagged_sentence, tags = zip(*sentence)
              for index in range(len(sentence)):
-                featureset = self.feature_detector(untagged_sentence,
-                                                   index, history)
+                featureset = self.feature_detector(untagged_sentence, index, history)
                  classifier_corpus.append((featureset, tags[index]))
                  history.append(tags[index])
  
          if verbose:
-            print('Training classifier (%d instances)' % len(classifier_corpus))
+            print("Training classifier ({} instances)".format(len(classifier_corpus)))
          self._classifier = classifier_builder(classifier_corpus)
  
      def __repr__(self):
-        return '<ClassifierBasedTagger: %r>' % self._classifier
+        return "<ClassifierBasedTagger: {}>".format(self._classifier)
  
      def feature_detector(self, tokens, index, history):
          """
@@ -693,48 +702,49 @@ class ClassifierBasedPOSTagger(ClassifierBasedTagger):
      """
      A classifier based part of speech tagger.
      """
+
      def feature_detector(self, tokens, index, history):
          word = tokens[index]
          if index == 0:
              prevword = prevprevword = None
              prevtag = prevprevtag = None
          elif index == 1:
-            prevword = tokens[index-1].lower()
+            prevword = tokens[index - 1].lower()
              prevprevword = None
-            prevtag = history[index-1]
+            prevtag = history[index - 1]
              prevprevtag = None
          else:
-            prevword = tokens[index-1].lower()
-            prevprevword = tokens[index-2].lower()
-            prevtag = history[index-1]
-            prevprevtag = history[index-2]
-
-        if re.match('[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$', word):
-            shape = 'number'
-        elif re.match('\W+$', word):
-            shape = 'punct'
-        elif re.match('[A-Z][a-z]+$', word):
-            shape = 'upcase'
-        elif re.match('[a-z]+$', word):
-            shape = 'downcase'
-        elif re.match('\w+$', word):
-            shape = 'mixedcase'
+            prevword = tokens[index - 1].lower()
+            prevprevword = tokens[index - 2].lower()
+            prevtag = history[index - 1]
+            prevprevtag = history[index - 2]
+
+        if re.match("[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$", word):
+            shape = "number"
+        elif re.match("\W+$", word):
+            shape = "punct"
+        elif re.match("[A-Z][a-z]+$", word):
+            shape = "upcase"
+        elif re.match("[a-z]+$", word):
+            shape = "downcase"
+        elif re.match("\w+$", word):
+            shape = "mixedcase"
          else:
-            shape = 'other'
+            shape = "other"
  
          features = {
-            'prevtag': prevtag,
-            'prevprevtag': prevprevtag,
-            'word': word,
-            'word.lower': word.lower(),
-            'suffix3': word.lower()[-3:],
-            'suffix2': word.lower()[-2:],
-            'suffix1': word.lower()[-1:],
-            'prevprevword': prevprevword,
-            'prevword': prevword,
-            'prevtag+word': '%s+%s' % (prevtag, word.lower()),
-            'prevprevtag+word': '%s+%s' % (prevprevtag, word.lower()),
-            'prevword+word': '%s+%s' % (prevword, word.lower()),
-            'shape': shape,
-            }
+            "prevtag": prevtag,
+            "prevprevtag": prevprevtag,
+            "word": word,
+            "word.lower": word.lower(),
+            "suffix3": word.lower()[-3:],
+            "suffix2": word.lower()[-2:],
+            "suffix1": word.lower()[-1:],
+            "prevprevword": prevprevword,
+            "prevword": prevword,
+            "prevtag+word": "{}+{}".format(prevtag, word.lower()),
+            "prevprevtag+word": "{}+{}".format(prevprevtag, word.lower()),
+            "prevword+word": "{}+{}".format(prevword, word.lower()),
+            "shape": shape,
+        }
          return features
diff --git a/nlp_resource_data/nltk/tag/sequential.pyc b/nlp_resource_data/nltk/tag/sequential.pyc

deleted file mode 100755 (executable)

index d9a4799..0000000

Binary files a/nlp_resource_data/nltk/tag/sequential.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/stanford.py b/nlp_resource_data/nltk/tag/stanford.py

old mode 100755 (executable)

new mode 100644 (file)

index 26f36db..cd7250c
--- a/nlp_resource_data/nltk/tag/stanford.py
+++ b/nlp_resource_data/nltk/tag/stanford.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Interface to the Stanford Part-of-speech and Named-Entity Taggers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Nitin Madnani <nmadnani@ets.org>
  #         Rami Al-Rfou' <ralrfou@cs.stonybrook.edu>
  # URL: <http://nltk.org/>
@@ -23,13 +23,10 @@ import tempfile
  from subprocess import PIPE
  import warnings
  
-from six import text_type
-
  from nltk.internals import find_file, find_jar, config_java, java, _java_options
  from nltk.tag.api import TaggerI
-from nltk.parse.corenlp import CoreNLPParser
  
-_stanford_url = 'https://nlp.stanford.edu/software'
+_stanford_url = "https://nlp.stanford.edu/software"
  
  
  class StanfordTagger(TaggerI):
@@ -43,31 +40,41 @@ class StanfordTagger(TaggerI):
      - ``_JAR`` file: Class constant that represents the jar file name.
      """
  
-    _SEPARATOR = ''
-    _JAR = ''
-
-    def __init__(self, model_filename, path_to_jar=None, encoding='utf8',
-                 verbose=False, java_options='-mx1000m'):
+    _SEPARATOR = ""
+    _JAR = ""
+
+    def __init__(
+        self,
+        model_filename,
+        path_to_jar=None,
+        encoding="utf8",
+        verbose=False,
+        java_options="-mx1000m",
+    ):
          # Raise deprecation warning.
-        warnings.simplefilter('always', DeprecationWarning)
-        warnings.warn(str("\nThe StanfordTokenizer will "
-                          "be deprecated in version 3.2.5.\n"
-                          "Please use \033[91mnltk.tag.stanford.CoreNLPPOSTagger\033[0m "
-                          "or \033[91mnltk.tag.stanford.CoreNLPNERTagger\033[0m instead."),
-                      DeprecationWarning, stacklevel=2)
-        warnings.simplefilter('ignore', DeprecationWarning)
+        warnings.warn(
+            str(
+                "\nThe StanfordTokenizer will "
+                "be deprecated in version 3.2.6.\n"
+                "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead."
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
          if not self._JAR:
-            warnings.warn('The StanfordTagger class is not meant to be '
-                          'instantiated directly. Did you mean '
-                          'StanfordPOSTagger or StanfordNERTagger?')
+            warnings.warn(
+                "The StanfordTagger class is not meant to be "
+                "instantiated directly. Did you mean "
+                "StanfordPOSTagger or StanfordNERTagger?"
+            )
          self._stanford_jar = find_jar(
-                self._JAR, path_to_jar,
-                searchpath=(), url=_stanford_url,
-                verbose=verbose)
+            self._JAR, path_to_jar, searchpath=(), url=_stanford_url, verbose=verbose
+        )
  
-        self._stanford_model = find_file(model_filename,
-                                         env_vars=('STANFORD_MODELS',),
-                                         verbose=verbose)
+        self._stanford_model = find_file(
+            model_filename, env_vars=("STANFORD_MODELS",), verbose=verbose
+        )
  
          self._encoding = encoding
          self.java_options = java_options
@@ -85,26 +92,27 @@ class StanfordTagger(TaggerI):
  
      def tag_sents(self, sentences):
          encoding = self._encoding
-        default_options = ' '.join(_java_options)
+        default_options = " ".join(_java_options)
          config_java(options=self.java_options, verbose=False)
  
          # Create a temporary input file
          _input_fh, self._input_file_path = tempfile.mkstemp(text=True)
  
          cmd = list(self._cmd)
-        cmd.extend(['-encoding', encoding])
+        cmd.extend(["-encoding", encoding])
  
          # Write the actual sentences to the temporary input file
-        _input_fh = os.fdopen(_input_fh, 'wb')
-        _input = '\n'.join((' '.join(x) for x in sentences))
-        if isinstance(_input, text_type) and encoding:
+        _input_fh = os.fdopen(_input_fh, "wb")
+        _input = "\n".join((" ".join(x) for x in sentences))
+        if isinstance(_input, str) and encoding:
              _input = _input.encode(encoding)
          _input_fh.write(_input)
          _input_fh.close()
  
          # Run the tagger and get the output
-        stanpos_output, _stderr = java(cmd, classpath=self._stanford_jar,
-                                       stdout=PIPE, stderr=PIPE)
+        stanpos_output, _stderr = java(
+            cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE
+        )
          stanpos_output = stanpos_output.decode(encoding)
  
          # Delete the temporary file
@@ -122,7 +130,7 @@ class StanfordTagger(TaggerI):
              sentence = []
              for tagged_word in tagged_sentence.strip().split():
                  word_tags = tagged_word.strip().split(self._SEPARATOR)
-                sentence.append((''.join(word_tags[:-1]), word_tags[-1]))
+                sentence.append(("".join(word_tags[:-1]), word_tags[-1]))
              tagged_sentences.append(sentence)
          return tagged_sentences
  
@@ -142,18 +150,26 @@ class StanfordPOSTagger(StanfordTagger):
          >>> st.tag('What is the airspeed of an unladen swallow ?'.split())
          [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')]
      """
-    _SEPARATOR = '_'
-    _JAR = 'stanford-postagger.jar'
+
+    _SEPARATOR = "_"
+    _JAR = "stanford-postagger.jar"
  
      def __init__(self, *args, **kwargs):
          super(StanfordPOSTagger, self).__init__(*args, **kwargs)
  
      @property
      def _cmd(self):
-        return ['edu.stanford.nlp.tagger.maxent.MaxentTagger',
-                '-model', self._stanford_model, '-textFile',
-                self._input_file_path, '-tokenize', 'false',
-                '-outputFormatOptions', 'keepEmptySentences']
+        return [
+            "edu.stanford.nlp.tagger.maxent.MaxentTagger",
+            "-model",
+            self._stanford_model,
+            "-textFile",
+            self._input_file_path,
+            "-tokenize",
+            "false",
+            "-outputFormatOptions",
+            "keepEmptySentences",
+        ]
  
  
  class StanfordNERTagger(StanfordTagger):
@@ -175,9 +191,9 @@ class StanfordNERTagger(StanfordTagger):
           ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'LOCATION')]
      """
  
-    _SEPARATOR = '/'
-    _JAR = 'stanford-ner.jar'
-    _FORMAT = 'slashTags'
+    _SEPARATOR = "/"
+    _JAR = "stanford-ner.jar"
+    _FORMAT = "slashTags"
  
      def __init__(self, *args, **kwargs):
          super(StanfordNERTagger, self).__init__(*args, **kwargs)
@@ -185,106 +201,47 @@ class StanfordNERTagger(StanfordTagger):
      @property
      def _cmd(self):
          # Adding -tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions tokenizeNLs=false for not using stanford Tokenizer
-        return ['edu.stanford.nlp.ie.crf.CRFClassifier',
-                '-loadClassifier', self._stanford_model, '-textFile',
-                self._input_file_path, '-outputFormat', self._FORMAT,
-                '-tokenizerFactory',
-                'edu.stanford.nlp.process.WhitespaceTokenizer',
-                '-tokenizerOptions', '\"tokenizeNLs=false\"']
+        return [
+            "edu.stanford.nlp.ie.crf.CRFClassifier",
+            "-loadClassifier",
+            self._stanford_model,
+            "-textFile",
+            self._input_file_path,
+            "-outputFormat",
+            self._FORMAT,
+            "-tokenizerFactory",
+            "edu.stanford.nlp.process.WhitespaceTokenizer",
+            "-tokenizerOptions",
+            '"tokenizeNLs=false"',
+        ]
  
      def parse_output(self, text, sentences):
-        if self._FORMAT == 'slashTags':
+        if self._FORMAT == "slashTags":
              # Joint together to a big list
              tagged_sentences = []
              for tagged_sentence in text.strip().split("\n"):
                  for tagged_word in tagged_sentence.strip().split():
                      word_tags = tagged_word.strip().split(self._SEPARATOR)
-                    tagged_sentences.append((''.join(word_tags[:-1]),
-                                             word_tags[-1]))
+                    tagged_sentences.append(("".join(word_tags[:-1]), word_tags[-1]))
  
              # Separate it according to the input
              result = []
              start = 0
              for sent in sentences:
-                result.append(tagged_sentences[start:start + len(sent)])
+                result.append(tagged_sentences[start : start + len(sent)])
                  start += len(sent)
              return result
  
          raise NotImplementedError
  
-class CoreNLPTagger(CoreNLPParser, TaggerI):
-    def __init__(self, tagtype, url='http://localhost:9000', encoding='utf8'):
-        """
-        An abstract interface to POS/NER taggers of CoreNLP that returns the
-        POS/NER tags from the Stanford CoreNLP API at nltk.parse.corenlp.
-        """
-        self.tagtype = tagtype
-        super(CoreNLPTagger, self).__init__(url, encoding)
-
-    def tag_sents(self, sentences):
-        # Converting list(list(str)) -> list(str)
-        sentences = (' '.join(words) for words in sentences)
-        return list(self.raw_tag_sents(sentences))
-
-
-    def tag(self, sentence):
-        return self.tag_sents([sentence])[0]
-
-    def raw_tag_sents(self, sentences):
-        """
-        This function will interface the `GenericCoreNLPParser.api_call` to
-        retreive the JSON output and return the annotations required.
-        """
-        default_properties = {'ssplit.isOneSentence': 'true',
-                              'annotators': 'tokenize,ssplit,' }
-        # Supports only 'pos' or 'ner' tags.
-        assert self.tagtype in ['pos', 'ner']
-        default_properties['annotators'] += self.tagtype
-        for sentence in sentences:
-            tagged_data = self.api_call(sentence, properties=default_properties)
-            assert len(tagged_data['sentences']) == 1
-            # Taggers only need to return 1-best sentence.
-            yield [(token['word'], token[self.tagtype]) for token in tagged_data['sentences'][0]['tokens']]
-
-
-class CoreNLPPOSTagger(CoreNLPTagger):
-    """
-    This is a subclass of the CoreNLPTagger that wraps around the
-    nltk.parse.CoreNLPParser for Part-of-Sppech tagging.
-
-        >>> from nltk.tag.stanford import CoreNLPPOSTagger
-        >>> CoreNLPPOSTagger(url='http://localhost:9000').tag('What is the airspeed of an unladen swallow ?'.split()) # doctest: +SKIP
-        [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')]
-    """
-    def __init__(self, url='http://localhost:9000', encoding='utf8'):
-        super(CoreNLPPOSTagger, self).__init__('pos', url, encoding)
-
-
-class CoreNLPNERTagger(CoreNLPTagger):
-    """
-    This is a subclass of the CoreNLPTagger that wraps around the
-    nltk.parse.CoreNLPParser for Named-Entity tagging.
-
-        >>> from nltk.tag.stanford import CoreNLPNERTagger
-        >>> CoreNLPNERTagger(url='http://localhost:9000').tag('Rami Eid is studying at Stony Brook University in NY'.split()) # doctest: +SKIP
-        [('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'), ('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'O')]
-    """
-    def __init__(self, url='http://localhost:9000', encoding='utf8'):
-        super(CoreNLPNERTagger, self).__init__('ner', url, encoding)
-
  
  def setup_module(module):
      from nose import SkipTest
  
      try:
-        StanfordPOSTagger('english-bidirectional-distsim.tagger')
-    except LookupError:
-        raise SkipTest('Doctests from nltk.tag.stanford are skipped because one \
-                       of the stanford jars cannot be found.')
-
-    try:
-        CoreNLPPOSTagger()
-        CoreNLPNERTagger()
+        StanfordPOSTagger("english-bidirectional-distsim.tagger")
      except LookupError:
-        raise SkipTest('Doctests from nltk.tag.stanford.CoreNLPTokenizer'
-                       'are skipped because the stanford corenlp server not started')
+        raise SkipTest(
+            "Doctests from nltk.tag.stanford are skipped because one \
+                       of the stanford jars cannot be found."
+        )
diff --git a/nlp_resource_data/nltk/tag/stanford.pyc b/nlp_resource_data/nltk/tag/stanford.pyc

deleted file mode 100755 (executable)

index 734982c..0000000

Binary files a/nlp_resource_data/nltk/tag/stanford.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/tnt.py b/nlp_resource_data/nltk/tag/tnt.py

old mode 100755 (executable)

new mode 100644 (file)

index 63db23a..eb2ce12
--- a/nlp_resource_data/nltk/tag/tnt.py
+++ b/nlp_resource_data/nltk/tag/tnt.py
@@ -1,18 +1,18 @@
  # Natural Language Toolkit: TnT Tagger
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Sam Huston <sjh900@gmail.com>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-'''
+"""
  Implementation of 'TnT - A Statisical Part of Speech Tagger'
  by Thorsten Brants
  
  http://acl.ldc.upenn.edu/A/A00/A00-1031.pdf
-'''
-from __future__ import print_function, division
+"""
+
  from math import log
  
  from operator import itemgetter
@@ -20,8 +20,9 @@ from operator import itemgetter
  from nltk.probability import FreqDist, ConditionalFreqDist
  from nltk.tag.api import TaggerI
  
+
  class TnT(TaggerI):
-    '''
+    """
      TnT - Statistical POS tagger
  
      IMPORTANT NOTES:
@@ -80,10 +81,10 @@ class TnT(TaggerI):
      It is possible to differentiate the tags which are assigned to
      capitalized words. However this does not result in a significant
      gain in the accuracy of the results.
-    '''
+    """
  
      def __init__(self, unk=None, Trained=False, N=1000, C=False):
-        '''
+        """
          Construct a TnT statistical tagger. Tagger must be trained
          before being used to tag input.
  
@@ -110,19 +111,19 @@ class TnT(TaggerI):
          information for tagging.
          NOTE: using capitalization may not increase the accuracy
          of the tagger
-        '''
-
-        self._uni  = FreqDist()
-        self._bi   = ConditionalFreqDist()
-        self._tri  = ConditionalFreqDist()
-        self._wd   = ConditionalFreqDist()
-        self._eos  = ConditionalFreqDist()
-        self._l1   = 0.0
-        self._l2   = 0.0
-        self._l3   = 0.0
-        self._N    = N
-        self._C    = C
-        self._T    = Trained
+        """
+
+        self._uni = FreqDist()
+        self._bi = ConditionalFreqDist()
+        self._tri = ConditionalFreqDist()
+        self._wd = ConditionalFreqDist()
+        self._eos = ConditionalFreqDist()
+        self._l1 = 0.0
+        self._l2 = 0.0
+        self._l3 = 0.0
+        self._N = N
+        self._C = C
+        self._T = Trained
  
          self._unk = unk
  
@@ -131,14 +132,14 @@ class TnT(TaggerI):
          self.known = 0
  
      def train(self, data):
-        '''
+        """
          Uses a set of tagged data to train the tagger.
          If an unknown word tagger is specified,
          it is trained on the same data.
  
          :param data: List of lists of (word, tag) tuples
          :type data: tuple(str)
-        '''
+        """
  
          # Ensure that local C flag is initialized before use
          C = False
@@ -147,38 +148,33 @@ class TnT(TaggerI):
              self._unk.train(data)
  
          for sent in data:
-            history = [('BOS',False), ('BOS',False)]
+            history = [("BOS", False), ("BOS", False)]
              for w, t in sent:
  
                  # if capitalization is requested,
                  # and the word begins with a capital
                  # set local flag C to True
-                if self._C and w[0].isupper(): C=True
+                if self._C and w[0].isupper():
+                    C = True
  
                  self._wd[w][t] += 1
-                self._uni[(t,C)] += 1
-                self._bi[history[1]][(t,C)] += 1
-                self._tri[tuple(history)][(t,C)] += 1
+                self._uni[(t, C)] += 1
+                self._bi[history[1]][(t, C)] += 1
+                self._tri[tuple(history)][(t, C)] += 1
  
-                history.append((t,C))
+                history.append((t, C))
                  history.pop(0)
  
                  # set local flag C to false for the next word
                  C = False
  
-            self._eos[t]['EOS'] += 1
-
+            self._eos[t]["EOS"] += 1
  
          # compute lambda values from the trained frequency distributions
          self._compute_lambda()
  
-        #(debugging -- ignore or delete me)
-        #print "lambdas"
-        #print i, self._l1, i, self._l2, i, self._l3
-
-
      def _compute_lambda(self):
-        '''
+        """
          creates lambda values based upon training data
  
          NOTE: no need to explicitly reference C,
@@ -195,7 +191,7 @@ class TnT(TaggerI):
          ISSUES -- Resolutions:
          if 2 values are equal, increment both lambda values
          by (f(t1,t2,t3) / 2)
-        '''
+        """
  
          # temporary lambda variables
          tl1 = 0.0
@@ -218,10 +214,11 @@ class TnT(TaggerI):
  
                  # safe_div provides a safe floating point division
                  # it returns -1 if the denominator is 0
-                c3 = self._safe_div((self._tri[history][tag]-1), (self._tri[history].N()-1))
-                c2 = self._safe_div((self._bi[h2][tag]-1), (self._bi[h2].N()-1))
-                c1 = self._safe_div((self._uni[tag]-1), (self._uni.N()-1))
-
+                c3 = self._safe_div(
+                    (self._tri[history][tag] - 1), (self._tri[history].N() - 1)
+                )
+                c2 = self._safe_div((self._bi[h2][tag] - 1), (self._bi[h2].N() - 1))
+                c1 = self._safe_div((self._uni[tag] - 1), (self._uni.N() - 1))
  
                  # if c1 is the maximum value:
                  if (c1 > c3) and (c1 > c2):
@@ -249,29 +246,26 @@ class TnT(TaggerI):
                  # otherwise there might be a problem
                  # eg: all values = 0
                  else:
-                    #print "Problem", c1, c2 ,c3
                      pass
  
          # Lambda normalisation:
          # ensures that l1+l2+l3 = 1
-        self._l1 = tl1 / (tl1+tl2+tl3)
-        self._l2 = tl2 / (tl1+tl2+tl3)
-        self._l3 = tl3 / (tl1+tl2+tl3)
-
-
+        self._l1 = tl1 / (tl1 + tl2 + tl3)
+        self._l2 = tl2 / (tl1 + tl2 + tl3)
+        self._l3 = tl3 / (tl1 + tl2 + tl3)
  
      def _safe_div(self, v1, v2):
-        '''
+        """
          Safe floating point division function, does not allow division by 0
          returns -1 if the denominator is 0
-        '''
+        """
          if v2 == 0:
              return -1
          else:
              return v1 / v2
  
      def tagdata(self, data):
-        '''
+        """
          Tags each sentence in a list of sentences
  
          :param data:list of list of words
@@ -281,16 +275,15 @@ class TnT(TaggerI):
          Invokes tag(sent) function for each sentence
          compiles the results into a list of tagged sentences
          each tagged sentence is a list of (word, tag) tuples
-        '''
+        """
          res = []
          for sent in data:
              res1 = self.tag(sent)
              res.append(res1)
          return res
  
-
      def tag(self, data):
-        '''
+        """
          Tags a single sentence
  
          :param data: list of words
@@ -305,9 +298,9 @@ class TnT(TaggerI):
          with the correct words in the input sequence
  
          returns a list of (word, tag) tuples
-        '''
+        """
  
-        current_state = [(['BOS', 'BOS'], 0.0)]
+        current_state = [(["BOS", "BOS"], 0.0)]
  
          sent = list(data)
  
@@ -316,14 +309,13 @@ class TnT(TaggerI):
          res = []
          for i in range(len(sent)):
              # unpack and discard the C flags
-            (t,C) = tags[i+2]
+            (t, C) = tags[i + 2]
              res.append((sent[i], t))
  
          return res
  
-
      def _tagword(self, sent, current_states):
-        '''
+        """
          :param sent : List of words remaining in the sentence
          :type sent  : [word,]
          :param current_states : List of possible tag combinations for
@@ -336,7 +328,7 @@ class TnT(TaggerI):
  
          Uses formula specified above to calculate the probability
          of a particular tag
-        '''
+        """
  
          # if this word marks the end of the sentance,
          # return the most probable tag
@@ -352,7 +344,8 @@ class TnT(TaggerI):
          # if the Capitalisation is requested,
          # initalise the flag for this word
          C = False
-        if self._C and word[0].isupper(): C=True
+        if self._C and word[0].isupper():
+            C = True
  
          # if word is known
          # compute the set of possible tags
@@ -364,17 +357,16 @@ class TnT(TaggerI):
                  logprobs = []
  
                  for t in self._wd[word].keys():
-                    tC = (t,C)
+                    tC = (t, C)
                      p_uni = self._uni.freq(tC)
                      p_bi = self._bi[history[-1]].freq(tC)
                      p_tri = self._tri[tuple(history[-2:])].freq(tC)
                      p_wd = self._wd[word][t] / self._uni[tC]
-                    p = self._l1 *p_uni + self._l2 *p_bi + self._l3 *p_tri
+                    p = self._l1 * p_uni + self._l2 * p_bi + self._l3 * p_tri
                      p2 = log(p, 2) + log(p_wd, 2)
  
                      # compute the result of appending each tag to this history
-                    new_states.append((history + [tC],
-                                       curr_sent_logprob + p2))
+                    new_states.append((history + [tC], curr_sent_logprob + p2))
  
          # otherwise a new word, set of possible tags is unknown
          else:
@@ -389,12 +381,12 @@ class TnT(TaggerI):
              # if no unknown word tagger has been specified
              # then use the tag 'Unk'
              if self._unk is None:
-                tag = ('Unk',C)
+                tag = ("Unk", C)
  
              # otherwise apply the unknown word tagger
              else:
                  [(_w, t)] = list(self._unk.tag([word]))
-                tag = (t,C)
+                tag = (t, C)
  
              for (history, logprob) in current_states:
                  history.append(tag)
@@ -410,7 +402,7 @@ class TnT(TaggerI):
          # del everything after N (threshold)
          # this is the beam search cut
          if len(new_states) > self._N:
-            new_states = new_states[:self._N]
+            new_states = new_states[: self._N]
  
          # compute the tags for the rest of the sentence
          # return the best list of tags for the sentence
@@ -421,8 +413,9 @@ class TnT(TaggerI):
  # helper function -- basic sentence tokenizer
  ########################################
  
+
  def basic_sent_chop(data, raw=True):
-    '''
+    """
      Basic method for tokenizing input into sentences
      for this tagger:
  
@@ -444,12 +437,11 @@ def basic_sent_chop(data, raw=True):
  
      This is a simple method which enhances the performance of the TnT
      tagger. Better sentence tokenization will further enhance the results.
-    '''
+    """
  
      new_data = []
      curr_sent = []
-    sent_mark = [',','.','?','!']
-
+    sent_mark = [",", ".", "?", "!"]
  
      if raw:
          for word in data:
@@ -461,35 +453,32 @@ def basic_sent_chop(data, raw=True):
                  curr_sent.append(word)
  
      else:
-        for (word,tag) in data:
+        for (word, tag) in data:
              if word in sent_mark:
-                curr_sent.append((word,tag))
+                curr_sent.append((word, tag))
                  new_data.append(curr_sent)
                  curr_sent = []
              else:
-                curr_sent.append((word,tag))
+                curr_sent.append((word, tag))
      return new_data
  
  
-
  def demo():
      from nltk.corpus import brown
+
      sents = list(brown.tagged_sents())
      test = list(brown.sents())
  
-    # create and train the tagger
      tagger = TnT()
      tagger.train(sents[200:1000])
  
-    # tag some data
      tagged_data = tagger.tagdata(test[100:120])
  
-    # print results
      for j in range(len(tagged_data)):
          s = tagged_data[j]
-        t = sents[j+100]
+        t = sents[j + 100]
          for i in range(len(s)):
-            print(s[i],'--', t[i])
+            print(s[i], "--", t[i])
          print()
  
  
@@ -500,33 +489,34 @@ def demo2():
  
      t = TnT(N=1000, C=False)
      s = TnT(N=1000, C=True)
-    t.train(d[(11)*100:])
-    s.train(d[(11)*100:])
+    t.train(d[(11) * 100 :])
+    s.train(d[(11) * 100 :])
  
      for i in range(10):
-        tacc = t.evaluate(d[i*100:((i+1)*100)])
+        tacc = t.evaluate(d[i * 100 : ((i + 1) * 100)])
          tp_un = t.unknown / (t.known + t.unknown)
          tp_kn = t.known / (t.known + t.unknown)
          t.unknown = 0
          t.known = 0
  
-        print('Capitalization off:')
-        print('Accuracy:', tacc)
-        print('Percentage known:', tp_kn)
-        print('Percentage unknown:', tp_un)
-        print('Accuracy over known words:', (tacc / tp_kn))
+        print("Capitalization off:")
+        print("Accuracy:", tacc)
+        print("Percentage known:", tp_kn)
+        print("Percentage unknown:", tp_un)
+        print("Accuracy over known words:", (tacc / tp_kn))
  
-        sacc = s.evaluate(d[i*100:((i+1)*100)])
+        sacc = s.evaluate(d[i * 100 : ((i + 1) * 100)])
          sp_un = s.unknown / (s.known + s.unknown)
          sp_kn = s.known / (s.known + s.unknown)
          s.unknown = 0
          s.known = 0
  
-        print('Capitalization on:')
-        print('Accuracy:', sacc)
-        print('Percentage known:', sp_kn)
-        print('Percentage unknown:', sp_un)
-        print('Accuracy over known words:', (sacc / sp_kn))
+        print("Capitalization on:")
+        print("Accuracy:", sacc)
+        print("Percentage known:", sp_kn)
+        print("Percentage unknown:", sp_un)
+        print("Accuracy over known words:", (sacc / sp_kn))
+
  
  def demo3():
      from nltk.corpus import treebank, brown
@@ -537,8 +527,8 @@ def demo3():
      d = d[:1000]
      e = e[:1000]
  
-    d10 = int(len(d)*0.1)
-    e10 = int(len(e)*0.1)
+    d10 = int(len(d) * 0.1)
+    e10 = int(len(e) * 0.1)
  
      tknacc = 0
      sknacc = 0
@@ -552,11 +542,11 @@ def demo3():
          t = TnT(N=1000, C=False)
          s = TnT(N=1000, C=False)
  
-        dtest = d[(i*d10):((i+1)*d10)]
-        etest = e[(i*e10):((i+1)*e10)]
+        dtest = d[(i * d10) : ((i + 1) * d10)]
+        etest = e[(i * e10) : ((i + 1) * e10)]
  
-        dtrain = d[:(i*d10)] + d[((i+1)*d10):]
-        etrain = e[:(i*e10)] + e[((i+1)*e10):]
+        dtrain = d[: (i * d10)] + d[((i + 1) * d10) :]
+        etrain = e[: (i * e10)] + e[((i + 1) * e10) :]
  
          t.train(dtrain)
          s.train(etrain)
@@ -575,13 +565,12 @@ def demo3():
          s.unknown = 0
          s.known = 0
  
-        tknacc += (tacc / tp_kn)
-        sknacc += (sacc / tp_kn)
+        tknacc += tacc / tp_kn
+        sknacc += sacc / tp_kn
          tallacc += tacc
          sallacc += sacc
  
-        #print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc
-
+        # print(i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc)
  
      print("brown: acc over words known:", 10 * tknacc)
      print("     : overall accuracy:", 10 * tallacc)
@@ -589,7 +578,3 @@ def demo3():
      print("treebank: acc over words known:", 10 * sknacc)
      print("        : overall accuracy:", 10 * sallacc)
      print("        : words known:", 10 * sknown)
-
-
-
-
diff --git a/nlp_resource_data/nltk/tag/tnt.pyc b/nlp_resource_data/nltk/tag/tnt.pyc

deleted file mode 100755 (executable)

index da8d8f7..0000000

Binary files a/nlp_resource_data/nltk/tag/tnt.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tag/util.py b/nlp_resource_data/nltk/tag/util.py

old mode 100755 (executable)

new mode 100644 (file)

index 5d72f01..9d2172e
--- a/nlp_resource_data/nltk/tag/util.py
+++ b/nlp_resource_data/nltk/tag/util.py
@@ -1,12 +1,13 @@
  # Natural Language Toolkit: Tagger Utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-def str2tuple(s, sep='/'):
+
+def str2tuple(s, sep="/"):
      """
      Given the string representation of a tagged token, return the
      corresponding tuple representation.  The rightmost occurrence of
@@ -25,11 +26,12 @@ def str2tuple(s, sep='/'):
      """
      loc = s.rfind(sep)
      if loc >= 0:
-        return (s[:loc], s[loc+len(sep):].upper())
+        return (s[:loc], s[loc + len(sep) :].upper())
      else:
          return (s, None)
  
-def tuple2str(tagged_token, sep='/'):
+
+def tuple2str(tagged_token, sep="/"):
      """
      Given the tuple representation of a tagged token, return the
      corresponding string representation.  This representation is
@@ -52,8 +54,9 @@ def tuple2str(tagged_token, sep='/'):
      if tag is None:
          return word
      else:
-        assert sep not in tag, 'tag may not contain sep!'
-        return '%s%s%s' % (word, sep, tag)
+        assert sep not in tag, "tag may not contain sep!"
+        return "%s%s%s" % (word, sep, tag)
+
  
  def untag(tagged_sentence):
      """
@@ -67,6 +70,3 @@ def untag(tagged_sentence):
  
      """
      return [w for (w, t) in tagged_sentence]
-
-
-
diff --git a/nlp_resource_data/nltk/tag/util.pyc b/nlp_resource_data/nltk/tag/util.pyc

deleted file mode 100755 (executable)

index c72489a..0000000

Binary files a/nlp_resource_data/nltk/tag/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/__init__.py b/nlp_resource_data/nltk/tbl/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index a71ca8c..dca2b46
--- a/nlp_resource_data/nltk/tbl/__init__.py
+++ b/nlp_resource_data/nltk/tbl/__init__.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
@@ -16,13 +16,15 @@ currently used by nltk.tag.BrillTagger.
  """
  
  from nltk.tbl.template import Template
-#API: Template(...), Template.expand(...)
+
+# API: Template(...), Template.expand(...)
  
  from nltk.tbl.feature import Feature
-#API: Feature(...), Feature.expand(...)
+
+# API: Feature(...), Feature.expand(...)
  
  from nltk.tbl.rule import Rule
-#API: Rule.format(...), Rule.templatetid
  
-from nltk.tbl.erroranalysis import error_list
+# API: Rule.format(...), Rule.templatetid
  
+from nltk.tbl.erroranalysis import error_list
diff --git a/nlp_resource_data/nltk/tbl/__init__.pyc b/nlp_resource_data/nltk/tbl/__init__.pyc

deleted file mode 100755 (executable)

index c1f1ea2..0000000

Binary files a/nlp_resource_data/nltk/tbl/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f6656e4

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..06de1f0

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/demo.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/demo.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fe124f3

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/demo.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5db4cae

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/feature.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/feature.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7b886bc

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/feature.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/rule.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/rule.cpython-37.pyc

new file mode 100644 (file)

index 0000000..de847c9

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/rule.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/__pycache__/template.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/template.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6393690

Binary files /dev/null and b/nlp_resource_data/nltk/tbl/__pycache__/template.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tbl/api.py b/nlp_resource_data/nltk/tbl/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 8b13789..e69de29
--- a/nlp_resource_data/nltk/tbl/api.py
+++ b/nlp_resource_data/nltk/tbl/api.py
@@ -1 +0,0 @@
-
diff --git a/nlp_resource_data/nltk/tbl/api.pyc b/nlp_resource_data/nltk/tbl/api.pyc

deleted file mode 100755 (executable)

index 7d5ba00..0000000

Binary files a/nlp_resource_data/nltk/tbl/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/demo.py b/nlp_resource_data/nltk/tbl/demo.py

old mode 100755 (executable)

new mode 100644 (file)

index fa70972..da30446
--- a/nlp_resource_data/nltk/tbl/demo.py
+++ b/nlp_resource_data/nltk/tbl/demo.py
@@ -1,14 +1,13 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import print_function, absolute_import, division
  import os
  import pickle
  
@@ -21,6 +20,7 @@ from nltk.tbl import error_list, Template
  from nltk.tag.brill import Word, Pos
  from nltk.tag import BrillTaggerTrainer, RegexpTagger, UnigramTagger
  
+
  def demo():
      """
      Run a demo with defaults. See source comments for details,
@@ -28,24 +28,28 @@ def demo():
      """
      postag()
  
+
  def demo_repr_rule_format():
      """
      Exemplify repr(Rule) (see also str(Rule) and Rule.format("verbose"))
      """
      postag(ruleformat="repr")
  
+
  def demo_str_rule_format():
      """
      Exemplify repr(Rule) (see also str(Rule) and Rule.format("verbose"))
      """
      postag(ruleformat="str")
  
+
  def demo_verbose_rule_format():
      """
      Exemplify Rule.format("verbose")
      """
      postag(ruleformat="verbose")
  
+
  def demo_multiposition_feature():
      """
      The feature/s of a template takes a list of positions
@@ -58,13 +62,15 @@ def demo_multiposition_feature():
      points can also be used: Pos(-3, -1) is the same as the arg
      below.
      """
-    postag(templates=[Template(Pos([-3,-2,-1]))])
+    postag(templates=[Template(Pos([-3, -2, -1]))])
+
  
  def demo_multifeature_template():
      """
      Templates can have more than a single feature.
      """
-    postag(templates=[Template(Word([0]), Pos([-2,-1]))])
+    postag(templates=[Template(Word([0]), Pos([-2, -1]))])
+
  
  def demo_template_statistics():
      """
@@ -77,6 +83,7 @@ def demo_template_statistics():
      """
      postag(incremental_stats=True, template_stats=True)
  
+
  def demo_generated_templates():
      """
      Template.expand and Feature.expand are class methods facilitating
@@ -86,19 +93,29 @@ def demo_generated_templates():
      Note: training with 500 templates can easily fill all available
      even on relatively small corpora
      """
-    wordtpls = Word.expand([-1,0,1], [1,2], excludezero=False)
-    tagtpls = Pos.expand([-2,-1,0,1], [1,2], excludezero=True)
-    templates = list(Template.expand([wordtpls, tagtpls], combinations=(1,3)))
-    print("Generated {0} templates for transformation-based learning".format(len(templates)))
+    wordtpls = Word.expand([-1, 0, 1], [1, 2], excludezero=False)
+    tagtpls = Pos.expand([-2, -1, 0, 1], [1, 2], excludezero=True)
+    templates = list(Template.expand([wordtpls, tagtpls], combinations=(1, 3)))
+    print(
+        "Generated {0} templates for transformation-based learning".format(
+            len(templates)
+        )
+    )
      postag(templates=templates, incremental_stats=True, template_stats=True)
  
+
  def demo_learning_curve():
      """
      Plot a learning curve -- the contribution on tagging accuracy of
      the individual rules.
      Note: requires matplotlib
      """
-    postag(incremental_stats=True, separate_baseline_data=True, learning_curve_output="learningcurve.png")
+    postag(
+        incremental_stats=True,
+        separate_baseline_data=True,
+        learning_curve_output="learningcurve.png",
+    )
+
  
  def demo_error_analysis():
      """
@@ -106,6 +123,7 @@ def demo_error_analysis():
      """
      postag(error_output="errors.txt")
  
+
  def demo_serialize_tagger():
      """
      Serializes the learned tagger to a file in pickle format; reloads it
@@ -113,6 +131,7 @@ def demo_serialize_tagger():
      """
      postag(serialize_output="tagger.pcl")
  
+
  def demo_high_accuracy_rules():
      """
      Discard rules with low accuracy. This may hurt performance a bit,
@@ -120,6 +139,7 @@ def demo_high_accuracy_rules():
      """
      postag(num_sents=3000, min_acc=0.96, min_score=10)
  
+
  def postag(
      templates=None,
      tagged_data=None,
@@ -139,7 +159,8 @@ def postag(
      learning_curve_take=300,
      baseline_backoff_tagger=None,
      separate_baseline_data=False,
-    cache_baseline_tagger=None):
+    cache_baseline_tagger=None,
+):
      """
      Brill Tagger Demonstration
      :param templates: how many sentences of training and testing data to use
@@ -210,34 +231,48 @@ def postag(
      baseline_backoff_tagger = baseline_backoff_tagger or REGEXP_TAGGER
      if templates is None:
          from nltk.tag.brill import describe_template_sets, brill24
+
          # some pre-built template sets taken from typical systems or publications are
          # available. Print a list with describe_template_sets()
          # for instance:
          templates = brill24()
-    (training_data, baseline_data, gold_data, testing_data) = \
-       _demo_prepare_data(tagged_data, train, num_sents, randomize, separate_baseline_data)
+    (training_data, baseline_data, gold_data, testing_data) = _demo_prepare_data(
+        tagged_data, train, num_sents, randomize, separate_baseline_data
+    )
  
      # creating (or reloading from cache) a baseline tagger (unigram tagger)
      # this is just a mechanism for getting deterministic output from the baseline between
      # python versions
      if cache_baseline_tagger:
          if not os.path.exists(cache_baseline_tagger):
-            baseline_tagger = UnigramTagger(baseline_data, backoff=baseline_backoff_tagger)
-            with open(cache_baseline_tagger, 'w') as print_rules:
+            baseline_tagger = UnigramTagger(
+                baseline_data, backoff=baseline_backoff_tagger
+            )
+            with open(cache_baseline_tagger, "w") as print_rules:
                  pickle.dump(baseline_tagger, print_rules)
-            print("Trained baseline tagger, pickled it to {0}".format(cache_baseline_tagger))
+            print(
+                "Trained baseline tagger, pickled it to {0}".format(
+                    cache_baseline_tagger
+                )
+            )
          with open(cache_baseline_tagger, "r") as print_rules:
-            baseline_tagger= pickle.load(print_rules)
+            baseline_tagger = pickle.load(print_rules)
              print("Reloaded pickled tagger from {0}".format(cache_baseline_tagger))
      else:
          baseline_tagger = UnigramTagger(baseline_data, backoff=baseline_backoff_tagger)
          print("Trained baseline tagger")
      if gold_data:
-        print("    Accuracy on test set: {0:0.4f}".format(baseline_tagger.evaluate(gold_data)))
+        print(
+            "    Accuracy on test set: {0:0.4f}".format(
+                baseline_tagger.evaluate(gold_data)
+            )
+        )
  
      # creating a Brill tagger
      tbrill = time.time()
-    trainer = BrillTaggerTrainer(baseline_tagger, templates, trace, ruleformat=ruleformat)
+    trainer = BrillTaggerTrainer(
+        baseline_tagger, templates, trace, ruleformat=ruleformat
+    )
      print("Training tbl tagger...")
      brill_tagger = trainer.train(training_data, max_rules, min_score, min_acc)
      print("Trained tbl tagger in {0:0.2f} seconds".format(time.time() - tbrill))
@@ -247,24 +282,31 @@ def postag(
      # printing the learned rules, if learned silently
      if trace == 1:
          print("\nLearned rules: ")
-        for (ruleno, rule) in enumerate(brill_tagger.rules(),1):
+        for (ruleno, rule) in enumerate(brill_tagger.rules(), 1):
              print("{0:4d} {1:s}".format(ruleno, rule.format(ruleformat)))
  
-
      # printing template statistics (optionally including comparison with the training data)
      # note: if not separate_baseline_data, then baseline accuracy will be artificially high
-    if  incremental_stats:
-        print("Incrementally tagging the test data, collecting individual rule statistics")
-        (taggedtest, teststats) = brill_tagger.batch_tag_incremental(testing_data, gold_data)
+    if incremental_stats:
+        print(
+            "Incrementally tagging the test data, collecting individual rule statistics"
+        )
+        (taggedtest, teststats) = brill_tagger.batch_tag_incremental(
+            testing_data, gold_data
+        )
          print("    Rule statistics collected")
          if not separate_baseline_data:
-            print("WARNING: train_stats asked for separate_baseline_data=True; the baseline "
-                  "will be artificially high")
+            print(
+                "WARNING: train_stats asked for separate_baseline_data=True; the baseline "
+                "will be artificially high"
+            )
          trainstats = brill_tagger.train_stats()
          if template_stats:
              brill_tagger.print_template_statistics(teststats)
          if learning_curve_output:
-            _demo_plot(learning_curve_output, teststats, trainstats, take=learning_curve_take)
+            _demo_plot(
+                learning_curve_output, teststats, trainstats, take=learning_curve_take
+            )
              print("Wrote plot of learning curve to {0}".format(learning_curve_output))
      else:
          print("Tagging the test data")
@@ -274,15 +316,17 @@ def postag(
  
      # writing error analysis to file
      if error_output is not None:
-        with open(error_output, 'w') as f:
-            f.write('Errors for Brill Tagger %r\n\n' % serialize_output)
-            f.write(u'\n'.join(error_list(gold_data, taggedtest)).encode('utf-8') + '\n')
+        with open(error_output, "w") as f:
+            f.write("Errors for Brill Tagger %r\n\n" % serialize_output)
+            f.write(
+                u"\n".join(error_list(gold_data, taggedtest)).encode("utf-8") + "\n"
+            )
          print("Wrote tagger errors including context to {0}".format(error_output))
  
      # serializing the tagger to a pickle file and reloading (just to see it works)
      if serialize_output is not None:
          taggedtest = brill_tagger.tag_sents(testing_data)
-        with open(serialize_output, 'w') as print_rules:
+        with open(serialize_output, "w") as print_rules:
              pickle.dump(brill_tagger, print_rules)
          print("Wrote pickled tagger to {0}".format(serialize_output))
          with open(serialize_output, "r") as print_rules:
@@ -294,7 +338,10 @@ def postag(
          else:
              print("PROBLEM: Reloaded tagger gave different results on test set")
  
-def _demo_prepare_data(tagged_data, train, num_sents, randomize, separate_baseline_data):
+
+def _demo_prepare_data(
+    tagged_data, train, num_sents, randomize, separate_baseline_data
+):
      # train is the proportion of data used in training; the rest is reserved
      # for testing.
      if tagged_data is None:
@@ -313,54 +360,64 @@ def _demo_prepare_data(tagged_data, train, num_sents, randomize, separate_baseli
          baseline_data = training_data
      else:
          bl_cutoff = len(training_data) // 3
-        (baseline_data, training_data) = (training_data[:bl_cutoff], training_data[bl_cutoff:])
+        (baseline_data, training_data) = (
+            training_data[:bl_cutoff],
+            training_data[bl_cutoff:],
+        )
      (trainseqs, traintokens) = corpus_size(training_data)
      (testseqs, testtokens) = corpus_size(testing_data)
      (bltrainseqs, bltraintokens) = corpus_size(baseline_data)
      print("Read testing data ({0:d} sents/{1:d} wds)".format(testseqs, testtokens))
      print("Read training data ({0:d} sents/{1:d} wds)".format(trainseqs, traintokens))
-    print("Read baseline data ({0:d} sents/{1:d} wds) {2:s}".format(
-        bltrainseqs, bltraintokens, "" if separate_baseline_data else "[reused the training set]"))
+    print(
+        "Read baseline data ({0:d} sents/{1:d} wds) {2:s}".format(
+            bltrainseqs,
+            bltraintokens,
+            "" if separate_baseline_data else "[reused the training set]",
+        )
+    )
      return (training_data, baseline_data, gold_data, testing_data)
  
  
  def _demo_plot(learning_curve_output, teststats, trainstats=None, take=None):
-   testcurve = [teststats['initialerrors']]
-   for rulescore in teststats['rulescores']:
-       testcurve.append(testcurve[-1] - rulescore)
-   testcurve = [1 - x/teststats['tokencount'] for x in testcurve[:take]]
+    testcurve = [teststats["initialerrors"]]
+    for rulescore in teststats["rulescores"]:
+        testcurve.append(testcurve[-1] - rulescore)
+    testcurve = [1 - x / teststats["tokencount"] for x in testcurve[:take]]
  
-   traincurve = [trainstats['initialerrors']]
-   for rulescore in trainstats['rulescores']:
-       traincurve.append(traincurve[-1] - rulescore)
-   traincurve = [1 - x/trainstats['tokencount'] for x in traincurve[:take]]
+    traincurve = [trainstats["initialerrors"]]
+    for rulescore in trainstats["rulescores"]:
+        traincurve.append(traincurve[-1] - rulescore)
+    traincurve = [1 - x / trainstats["tokencount"] for x in traincurve[:take]]
  
-   import matplotlib.pyplot as plt
-   r = list(range(len(testcurve)))
-   plt.plot(r, testcurve, r, traincurve)
-   plt.axis([None, None, None, 1.0])
-   plt.savefig(learning_curve_output)
+    import matplotlib.pyplot as plt
  
+    r = list(range(len(testcurve)))
+    plt.plot(r, testcurve, r, traincurve)
+    plt.axis([None, None, None, 1.0])
+    plt.savefig(learning_curve_output)
  
-NN_CD_TAGGER = RegexpTagger(
-    [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'),
-     (r'.*', 'NN')])
+
+NN_CD_TAGGER = RegexpTagger([(r"^-?[0-9]+(.[0-9]+)?$", "CD"), (r".*", "NN")])
  
  REGEXP_TAGGER = RegexpTagger(
-    [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
-     (r'(The|the|A|a|An|an)$', 'AT'),   # articles
-     (r'.*able$', 'JJ'),                # adjectives
-     (r'.*ness$', 'NN'),                # nouns formed from adjectives
-     (r'.*ly$', 'RB'),                  # adverbs
-     (r'.*s$', 'NNS'),                  # plural nouns
-     (r'.*ing$', 'VBG'),                # gerunds
-     (r'.*ed$', 'VBD'),                 # past tense verbs
-     (r'.*', 'NN')                      # nouns (default)
-])
+    [
+        (r"^-?[0-9]+(.[0-9]+)?$", "CD"),  # cardinal numbers
+        (r"(The|the|A|a|An|an)$", "AT"),  # articles
+        (r".*able$", "JJ"),  # adjectives
+        (r".*ness$", "NN"),  # nouns formed from adjectives
+        (r".*ly$", "RB"),  # adverbs
+        (r".*s$", "NNS"),  # plural nouns
+        (r".*ing$", "VBG"),  # gerunds
+        (r".*ed$", "VBD"),  # past tense verbs
+        (r".*", "NN"),  # nouns (default)
+    ]
+)
  
  
  def corpus_size(seqs):
      return (len(seqs), sum(len(x) for x in seqs))
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo_learning_curve()
diff --git a/nlp_resource_data/nltk/tbl/demo.pyc b/nlp_resource_data/nltk/tbl/demo.pyc

deleted file mode 100755 (executable)

index d941c53..0000000

Binary files a/nlp_resource_data/nltk/tbl/demo.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/erroranalysis.py b/nlp_resource_data/nltk/tbl/erroranalysis.py

old mode 100755 (executable)

new mode 100644 (file)

index aaceb01..9c0881a
--- a/nlp_resource_data/nltk/tbl/erroranalysis.py
+++ b/nlp_resource_data/nltk/tbl/erroranalysis.py
@@ -1,18 +1,16 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import print_function
-
-
  # returns a list of errors in string format
  
+
  def error_list(train_sents, test_sents):
      """
      Returns a list of human-readable strings indicating the errors in the
@@ -23,17 +21,21 @@ def error_list(train_sents, test_sents):
      :param test_sents: The tagged corpus
      :type test_sents: list(tuple)
      """
-    hdr = (('%25s | %s | %s\n' + '-'*26+'+'+'-'*24+'+'+'-'*26) %
-           ('left context', 'word/test->gold'.center(22), 'right context'))
+    hdr = ("%25s | %s | %s\n" + "-" * 26 + "+" + "-" * 24 + "+" + "-" * 26) % (
+        "left context",
+        "word/test->gold".center(22),
+        "right context",
+    )
      errors = [hdr]
      for (train_sent, test_sent) in zip(train_sents, test_sents):
          for wordnum, (word, train_pos) in enumerate(train_sent):
              test_pos = test_sent[wordnum][1]
              if train_pos != test_pos:
-                left = ' '.join('%s/%s' % w for w in train_sent[:wordnum])
-                right = ' '.join('%s/%s' % w for w in train_sent[wordnum+1:])
-                mid = '%s/%s->%s' % (word, test_pos, train_pos)
-                errors.append('%25s | %s | %s' %
-                              (left[-25:], mid.center(22), right[:25]))
+                left = " ".join("%s/%s" % w for w in train_sent[:wordnum])
+                right = " ".join("%s/%s" % w for w in train_sent[wordnum + 1 :])
+                mid = "%s/%s->%s" % (word, test_pos, train_pos)
+                errors.append(
+                    "%25s | %s | %s" % (left[-25:], mid.center(22), right[:25])
+                )
  
      return errors
diff --git a/nlp_resource_data/nltk/tbl/erroranalysis.pyc b/nlp_resource_data/nltk/tbl/erroranalysis.pyc

deleted file mode 100755 (executable)

index 78d4509..0000000

Binary files a/nlp_resource_data/nltk/tbl/erroranalysis.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/feature.py b/nlp_resource_data/nltk/tbl/feature.py

old mode 100755 (executable)

new mode 100644 (file)

index eb3539b..9a5bb00
--- a/nlp_resource_data/nltk/tbl/feature.py
+++ b/nlp_resource_data/nltk/tbl/feature.py
@@ -1,20 +1,17 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import division, print_function, unicode_literals
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
  
-@add_metaclass(ABCMeta)
-class Feature(object):
+class Feature(metaclass=ABCMeta):
      """
      An abstract base class for Features. A Feature is a combination of
      a specific property-computing method and a list of relative positions
@@ -33,7 +30,7 @@ class Feature(object):
  
      """
  
-    json_tag = 'nltk.tbl.Feature'
+    json_tag = "nltk.tbl.Feature"
      PROPERTY_NAME = None
  
      def __init__(self, positions, end=None):
@@ -79,15 +76,19 @@ class Feature(object):
          """
          self.positions = None  # to avoid warnings
          if end is None:
-            self.positions = tuple(sorted(set([int(i) for i in positions])))
-        else:                # positions was actually not a list, but only the start index
+            self.positions = tuple(sorted(set(int(i) for i in positions)))
+        else:  # positions was actually not a list, but only the start index
              try:
                  if positions > end:
                      raise TypeError
-                self.positions = tuple(range(positions, end+1))
+                self.positions = tuple(range(positions, end + 1))
              except TypeError:
                  # let any kind of erroneous spec raise ValueError
-                raise ValueError("illegal interval specification: (start={0}, end={1})".format(positions, end))
+                raise ValueError(
+                    "illegal interval specification: (start={0}, end={1})".format(
+                        positions, end
+                    )
+                )
  
          # set property name given in subclass, or otherwise name of subclass
          self.PROPERTY_NAME = self.__class__.PROPERTY_NAME or self.__class__.__name__
@@ -101,8 +102,7 @@ class Feature(object):
          return cls(positions)
  
      def __repr__(self):
-        return "%s(%r)" % (
-            self.__class__.__name__, list(self.positions))
+        return "%s(%r)" % (self.__class__.__name__, list(self.positions))
  
      @classmethod
      def expand(cls, starts, winlens, excludezero=False):
@@ -156,7 +156,7 @@ class Feature(object):
          """
          if not all(x > 0 for x in winlens):
              raise ValueError("non-positive window length in {0}".format(winlens))
-        xs = (starts[i:i+w] for w in winlens for i in range(len(starts)-w+1))
+        xs = (starts[i : i + w] for w in winlens for i in range(len(starts) - w + 1))
          return [cls(x) for x in xs if not (excludezero and 0 in x)]
  
      def issuperset(self, other):
@@ -187,7 +187,9 @@ class Feature(object):
  
  
          """
-        return self.__class__ is other.__class__ and set(self.positions) >= set(other.positions)
+        return self.__class__ is other.__class__ and set(self.positions) >= set(
+            other.positions
+        )
  
      def intersects(self, other):
          """
@@ -218,16 +220,22 @@ class Feature(object):
          :rtype: bool
          """
  
-        return bool((self.__class__ is other.__class__ and set(self.positions) & set(other.positions)))
+        return bool(
+            (
+                self.__class__ is other.__class__
+                and set(self.positions) & set(other.positions)
+            )
+        )
  
      # Rich comparisons for Features. With @functools.total_ordering (Python 2.7+),
      # it will be enough to define __lt__ and __eq__
      def __eq__(self, other):
-        return (self.__class__ is other.__class__ and self.positions == other.positions)
+        return self.__class__ is other.__class__ and self.positions == other.positions
  
      def __lt__(self, other):
          return (
-            self.__class__.__name__ < other.__class__.__name__ or
+            self.__class__.__name__ < other.__class__.__name__
+            or
              #    self.positions is a sorted tuple of ints
              self.positions < other.positions
          )
diff --git a/nlp_resource_data/nltk/tbl/feature.pyc b/nlp_resource_data/nltk/tbl/feature.pyc

deleted file mode 100755 (executable)

index 4d1c4d1..0000000

Binary files a/nlp_resource_data/nltk/tbl/feature.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/rule.py b/nlp_resource_data/nltk/tbl/rule.py

old mode 100755 (executable)

new mode 100644 (file)

index 7c5c3f2..3c872f8
--- a/nlp_resource_data/nltk/tbl/rule.py
+++ b/nlp_resource_data/nltk/tbl/rule.py
@@ -1,26 +1,22 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import print_function
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  
-from nltk.compat import python_2_unicode_compatible, unicode_repr
  from nltk import jsontags
  
  
  ######################################################################
  # Tag Rules
  ######################################################################
-@add_metaclass(ABCMeta)
-class TagRule(object):
+class TagRule(metaclass=ABCMeta):
      """
      An interface for tag transformations on a tagged corpus, as
      performed by tbl taggers.  Each transformation finds all tokens
@@ -96,7 +92,6 @@ class TagRule(object):
          raise TypeError("Rules must implement __hash__()")
  
  
-@python_2_unicode_compatible
  @jsontags.register_tag
  class Rule(TagRule):
      """
@@ -117,7 +112,7 @@ class Rule(TagRule):
  
      """
  
-    json_tag = 'nltk.tbl.Rule'
+    json_tag = "nltk.tbl.Rule"
  
      def __init__(self, templateid, original_tag, replacement_tag, conditions):
          """
@@ -142,15 +137,20 @@ class Rule(TagRule):
  
      def encode_json_obj(self):
          return {
-            'templateid':   self.templateid,
-            'original':     self.original_tag,
-            'replacement':  self.replacement_tag,
-            'conditions':   self._conditions,
+            "templateid": self.templateid,
+            "original": self.original_tag,
+            "replacement": self.replacement_tag,
+            "conditions": self._conditions,
          }
  
      @classmethod
      def decode_json_obj(cls, obj):
-        return cls(obj['templateid'], obj['original'], obj['replacement'], obj['conditions'])
+        return cls(
+            obj["templateid"],
+            obj["original"],
+            obj["replacement"],
+            tuple(tuple(feat) for feat in obj["conditions"])
+        )
  
      def applies(self, tokens, index):
          # Inherit docs from TagRule
@@ -166,7 +166,7 @@ class Rule(TagRule):
              for pos in feature.positions:
                  if not (0 <= index + pos < len(tokens)):
                      continue
-                if feature.extract_property(tokens, index+pos) == val:
+                if feature.extract_property(tokens, index + pos) == val:
                      break
              else:
                  # No token satisfied the condition; return false.
@@ -176,12 +176,13 @@ class Rule(TagRule):
          return True
  
      def __eq__(self, other):
-        return (self is other or
-                (other is not None and
-                 other.__class__ == self.__class__ and
-                 self.original_tag == other.original_tag and
-                 self.replacement_tag == other.replacement_tag and
-                 self._conditions == other._conditions))
+        return self is other or (
+            other is not None
+            and other.__class__ == self.__class__
+            and self.original_tag == other.original_tag
+            and self.replacement_tag == other.replacement_tag
+            and self._conditions == other._conditions
+        )
  
      def __ne__(self, other):
          return not (self == other)
@@ -201,17 +202,17 @@ class Rule(TagRule):
          try:
              return self.__repr
          except AttributeError:
-            self.__repr = (
-                "{0}('{1}', {2}, {3}, [{4}])".format(
-                    self.__class__.__name__,
-                    self.templateid,
-                    unicode_repr(self.original_tag),
-                    unicode_repr(self.replacement_tag),
-
-                    # list(self._conditions) would be simpler but will not generate
-                    # the same Rule.__repr__ in python 2 and 3 and thus break some tests
-                    ', '.join("({0},{1})".format(f, unicode_repr(v)) for (f, v) in self._conditions)
-                )
+            self.__repr = "{0}('{1}', {2}, {3}, [{4}])".format(
+                self.__class__.__name__,
+                self.templateid,
+                repr(self.original_tag),
+                repr(self.replacement_tag),
+                # list(self._conditions) would be simpler but will not generate
+                # the same Rule.__repr__ in python 2 and 3 and thus break some tests
+                ", ".join(
+                    "({0},{1})".format(f, repr(v))
+                    for (f, v) in self._conditions
+                ),
              )
  
              return self.__repr
@@ -222,17 +223,17 @@ class Rule(TagRule):
              Return a compact, predicate-logic styled string representation
              of the given condition.
              """
-            return '{0}:{1}@[{2}]'.format(
+            return "{0}:{1}@[{2}]".format(
                  feature.PROPERTY_NAME,
                  value,
-                ",".join(str(w) for w in feature.positions)
+                ",".join(str(w) for w in feature.positions),
              )
  
-        conditions = ' & '.join([_condition_to_logic(f, v) for (f, v) in self._conditions])
-        s = '{0}->{1} if {2}'.format(
-            self.original_tag,
-            self.replacement_tag,
-            conditions
+        conditions = " & ".join(
+            [_condition_to_logic(f, v) for (f, v) in self._conditions]
+        )
+        s = "{0}->{1} if {2}".format(
+            self.original_tag, self.replacement_tag, conditions
          )
  
          return s
@@ -288,34 +289,38 @@ class Rule(TagRule):
  
          Not sure how useful this is.
          """
+
          def condition_to_str(feature, value):
-            return ('the %s of %s is "%s"' %
-                    (feature.PROPERTY_NAME, range_to_str(feature.positions), value))
+            return 'the %s of %s is "%s"' % (
+                feature.PROPERTY_NAME,
+                range_to_str(feature.positions),
+                value,
+            )
  
          def range_to_str(positions):
              if len(positions) == 1:
                  p = positions[0]
                  if p == 0:
-                    return 'this word'
+                    return "this word"
                  if p == -1:
-                    return 'the preceding word'
+                    return "the preceding word"
                  elif p == 1:
-                    return 'the following word'
+                    return "the following word"
                  elif p < 0:
-                    return 'word i-%d' % -p
+                    return "word i-%d" % -p
                  elif p > 0:
-                    return 'word i+%d' % p
+                    return "word i+%d" % p
              else:
                  # for complete compatibility with the wordy format of nltk2
                  mx = max(positions)
                  mn = min(positions)
                  if mx - mn == len(positions) - 1:
-                    return 'words i%+d...i%+d' % (mn, mx)
+                    return "words i%+d...i%+d" % (mn, mx)
                  else:
-                    return 'words {%s}' % (",".join("i%+d" % d for d in positions),)
+                    return "words {%s}" % (",".join("i%+d" % d for d in positions),)
  
-        replacement = '%s -> %s' % (self.original_tag, self.replacement_tag)
-        conditions = (' if ' if self._conditions else "") + ', and '.join(
+        replacement = "%s -> %s" % (self.original_tag, self.replacement_tag)
+        conditions = (" if " if self._conditions else "") + ", and ".join(
              condition_to_str(f, v) for (f, v) in self._conditions
          )
          return replacement + conditions
diff --git a/nlp_resource_data/nltk/tbl/rule.pyc b/nlp_resource_data/nltk/tbl/rule.pyc

deleted file mode 100755 (executable)

index 86d294f..0000000

Binary files a/nlp_resource_data/nltk/tbl/rule.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tbl/template.py b/nlp_resource_data/nltk/tbl/template.py

old mode 100755 (executable)

new mode 100644 (file)

index 38db64b..06ddff0
--- a/nlp_resource_data/nltk/tbl/template.py
+++ b/nlp_resource_data/nltk/tbl/template.py
@@ -1,28 +1,26 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Transformation-based learning
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Marcus Uneson <marcus.uneson@gmail.com>
  #   based on previous (nltk2) version by
  #   Christopher Maloof, Edward Loper, Steven Bird
  # URL: <http://nltk.org/>
  # For license information, see  LICENSE.TXT
  
-from __future__ import print_function
  from abc import ABCMeta, abstractmethod
-from six import add_metaclass
  import itertools as it
  from nltk.tbl.feature import Feature
  from nltk.tbl.rule import Rule
  
  
-@add_metaclass(ABCMeta)
-class BrillTemplateI(object):
+class BrillTemplateI(metaclass=ABCMeta):
      """
      An interface for generating lists of transformational rules that
      apply at given sentence positions.  ``BrillTemplateI`` is used by
      ``Brill`` training algorithms to generate candidate rules.
      """
+
      @abstractmethod
      def applicable_rules(self, tokens, i, correctTag):
          """
@@ -72,6 +70,7 @@ class Template(BrillTemplateI):
        - use the given features, each at its own independent position; and
        - are applicable to the given token.
      """
+
      ALLTEMPLATES = []
      # record a unique id of form "001", for each template created
      # _ids = it.count(0)
@@ -132,16 +131,22 @@ class Template(BrillTemplateI):
          # Template(Feature1(args),  Feature2(args), ...)
          if all(isinstance(f, Feature) for f in features):
              self._features = features
-        elif issubclass(features[0], Feature) and all(isinstance(a, tuple) for a in features[1:]):
+        elif issubclass(features[0], Feature) and all(
+            isinstance(a, tuple) for a in features[1:]
+        ):
              self._features = [features[0](*tp) for tp in features[1:]]
          else:
              raise TypeError(
-                "expected either Feature1(args), Feature2(args), ... or Feature, (start1, end1), (start2, end2), ...")
+                "expected either Feature1(args), Feature2(args), ... or Feature, (start1, end1), (start2, end2), ..."
+            )
          self.id = "{0:03d}".format(len(self.ALLTEMPLATES))
          self.ALLTEMPLATES.append(self)
  
      def __repr__(self):
-        return "%s(%s)" % (self.__class__.__name__, ",".join([str(f) for f in self._features]))
+        return "%s(%s)" % (
+            self.__class__.__name__,
+            ",".join([str(f) for f in self._features]),
+        )
  
      def applicable_rules(self, tokens, index, correct_tag):
          if tokens[index][1] == correct_tag:
@@ -166,25 +171,25 @@ class Template(BrillTemplateI):
          for feature in self._features:
              conditions.append([])
              for pos in feature.positions:
-                if not (0 <= index+pos < len(tokens)):
+                if not (0 <= index + pos < len(tokens)):
                      continue
-                value = feature.extract_property(tokens, index+pos)
-                conditions[-1].append( (feature, value) )
+                value = feature.extract_property(tokens, index + pos)
+                conditions[-1].append((feature, value))
          return conditions
  
      def get_neighborhood(self, tokens, index):
          # inherit docs from BrillTemplateI
  
          # applicable_rules(tokens, index, ...) depends on index.
-        neighborhood = set([index])  #set literal for python 2.7+
+        neighborhood = set([index])  # set literal for python 2.7+
  
          # applicable_rules(tokens, i, ...) depends on index if
          # i+start < index <= i+end.
  
          allpositions = [0] + [p for feat in self._features for p in feat.positions]
          start, end = min(allpositions), max(allpositions)
-        s = max(0, index+(-end))
-        e = min(index+(-start)+1, len(tokens))
+        s = max(0, index + (-end))
+        e = min(index + (-start) + 1, len(tokens))
          for i in range(s, e):
              neighborhood.add(i)
          return neighborhood
@@ -264,31 +269,42 @@ class Template(BrillTemplateI):
          :returns: generator of Templates
  
          """
-        def nonempty_powerset(xs): #xs is a list
+
+        def nonempty_powerset(xs):  # xs is a list
              # itertools docnonempty_powerset([1,2,3]) --> (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)
  
              # find the correct tuple given combinations, one of {None, k, (k1,k2)}
-            k = combinations #for brevity
-            combrange = ((1, len(xs)+1) if k is None else     # n over 1 .. n over n (all non-empty combinations)
-                         (k, k+1) if isinstance(k, int) else  # n over k (only
-                         (k[0], k[1]+1))                      # n over k1, n over k1+1... n over k2
-            return it.chain.from_iterable(it.combinations(xs, r)
-                                          for r in range(*combrange))
+            k = combinations  # for brevity
+            combrange = (
+                (1, len(xs) + 1)
+                if k is None
+                else (k, k + 1)  # n over 1 .. n over n (all non-empty combinations)
+                if isinstance(k, int)
+                else (k[0], k[1] + 1)  # n over k (only
+            )  # n over k1, n over k1+1... n over k2
+            return it.chain.from_iterable(
+                it.combinations(xs, r) for r in range(*combrange)
+            )
+
          seentemplates = set()
          for picks in nonempty_powerset(featurelists):
              for pick in it.product(*picks):
-                if any(i != j and x.issuperset(y)
-                       for (i, x) in enumerate(pick)
-                       for (j, y) in enumerate(pick)):
+                if any(
+                    i != j and x.issuperset(y)
+                    for (i, x) in enumerate(pick)
+                    for (j, y) in enumerate(pick)
+                ):
                      continue
-                if skipintersecting and any(i != j and x.intersects(y)
-                                            for (i, x) in enumerate(pick)
-                                            for (j, y) in enumerate(pick)):
+                if skipintersecting and any(
+                    i != j and x.intersects(y)
+                    for (i, x) in enumerate(pick)
+                    for (j, y) in enumerate(pick)
+                ):
                      continue
                  thistemplate = cls(*sorted(pick))
                  strpick = str(thistemplate)
                  #!!FIXME --this is hackish
-                if strpick in seentemplates: #already added
+                if strpick in seentemplates:  # already added
                      cls._poptemplate()
                      continue
                  seentemplates.add(strpick)
diff --git a/nlp_resource_data/nltk/tbl/template.pyc b/nlp_resource_data/nltk/tbl/template.pyc

deleted file mode 100755 (executable)

index 2dff36c..0000000

Binary files a/nlp_resource_data/nltk/tbl/template.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__init__.py b/nlp_resource_data/nltk/test/__init__.py

new file mode 100644 (file)

index 0000000..639b0b1
--- /dev/null
+++ b/nlp_resource_data/nltk/test/__init__.py
@@ -0,0 +1,18 @@
+# Natural Language Toolkit: Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Unit tests for the NLTK modules.  These tests are intended to ensure
+that source code changes don't accidentally introduce bugs.
+For instructions, please see:
+
+../../web/dev/local_testing.rst
+
+https://github.com/nltk/nltk/blob/develop/web/dev/local_testing.rst
+
+
+"""
diff --git a/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..aa37dc5

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f4967ba

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..129e949

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..04e8a4e

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7bed297

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ee3454e

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1955f4d

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..97e9ae8

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..55fe85f

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..cbbc4e0

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f063665

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e7dacd8

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9cc299b

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..41dbea1

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..98f8e27

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b870953

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b4224b9

Binary files /dev/null and b/nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/all.py b/nlp_resource_data/nltk/test/all.py

new file mode 100644 (file)

index 0000000..5844a39
--- /dev/null
+++ b/nlp_resource_data/nltk/test/all.py
@@ -0,0 +1,24 @@
+"""Test suite that runs all NLTK tests.
+
+This module, `nltk.test.all`, is named as the NLTK ``test_suite`` in the
+project's ``setup-eggs.py`` file.  Here, we create a test suite that
+runs all of our doctests, and return it for processing by the setuptools
+test harness.
+
+"""
+import doctest, unittest
+from glob import glob
+import os.path
+
+
+def additional_tests():
+    # print("here-000000000000000")
+    # print("-----", glob(os.path.join(os.path.dirname(__file__), '*.doctest')))
+    dir = os.path.dirname(__file__)
+    paths = glob(os.path.join(dir, "*.doctest"))
+    files = [os.path.basename(path) for path in paths]
+    return unittest.TestSuite([doctest.DocFileSuite(file) for file in files])
+
+
+# if os.path.split(path)[-1] != 'index.rst'
+# skips time-dependent doctest in index.rst
diff --git a/nlp_resource_data/nltk/test/bleu.doctest b/nlp_resource_data/nltk/test/bleu.doctest

new file mode 100644 (file)

index 0000000..e5ed074
--- /dev/null
+++ b/nlp_resource_data/nltk/test/bleu.doctest
@@ -0,0 +1,14 @@
+==========
+BLEU tests
+==========
+
+>>> from nltk.translate import bleu
+
+If the candidate has no alignment to any of the references, the BLEU score is 0.
+
+>>> bleu(
+...     ['The candidate has no alignment to any of the references'.split()],
+...     'John loves Mary'.split(),
+...     [1],
+... )
+0
diff --git a/nlp_resource_data/nltk/test/bnc.doctest b/nlp_resource_data/nltk/test/bnc.doctest

new file mode 100644 (file)

index 0000000..4b27cde
--- /dev/null
+++ b/nlp_resource_data/nltk/test/bnc.doctest
@@ -0,0 +1,60 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+    >>> import os.path
+
+    >>> from nltk.corpus.reader import BNCCorpusReader
+    >>> import nltk.test
+
+    >>> root = os.path.dirname(nltk.test.__file__)
+    >>> bnc = BNCCorpusReader(root=root, fileids='FX8.xml')
+
+Checking the word access.
+-------------------------
+
+    >>> len(bnc.words())
+    151
+
+    >>> bnc.words()[:6]
+    ['Ah', 'there', 'we', 'are', ',', '.']
+    >>> bnc.words(stem=True)[:6]
+    ['ah', 'there', 'we', 'be', ',', '.']
+
+    >>> bnc.tagged_words()[:6]
+    [('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
+
+    >>> bnc.tagged_words(c5=True)[:6]
+    [('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
+
+Testing access to the sentences.
+--------------------------------
+
+    >>> len(bnc.sents())
+    15
+
+    >>> bnc.sents()[0]
+    ['Ah', 'there', 'we', 'are', ',', '.']
+    >>> bnc.sents(stem=True)[0]
+    ['ah', 'there', 'we', 'be', ',', '.']
+
+    >>> bnc.tagged_sents()[0]
+    [('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
+    >>> bnc.tagged_sents(c5=True)[0]
+    [('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
+
+A not lazy loader.
+------------------
+
+    >>> eager = BNCCorpusReader(root=root, fileids=r'FX8.xml', lazy=False)
+
+    >>> len(eager.words())
+    151
+    >>> eager.words(stem=True)[6:17]
+    ['right', 'abdominal', 'wound', ',', 'she', 'be', 'a', 'wee', 'bit', 'confuse', '.']
+
+    >>> eager.tagged_words()[6:11]
+    [('Right', 'ADV'), ('abdominal', 'ADJ'), ('wound', 'SUBST'), (',', 'PUN'), ('she', 'PRON')]
+    >>> eager.tagged_words(c5=True)[6:17]
+    [('Right', 'AV0'), ('abdominal', 'AJ0'), ('wound', 'NN1'), (',', 'PUN'), ('she', 'PNP'), ("'s", 'VBZ'), ('a', 'AT0'), ('wee', 'AJ0-NN1'), ('bit', 'NN1'), ('confused', 'VVN-AJ0'), ('.', 'PUN')]
+    >>> len(eager.sents())
+    15
diff --git a/nlp_resource_data/nltk/test/ccg.doctest b/nlp_resource_data/nltk/test/ccg.doctest

new file mode 100644 (file)

index 0000000..acc29d5
--- /dev/null
+++ b/nlp_resource_data/nltk/test/ccg.doctest
@@ -0,0 +1,376 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==============================
+Combinatory Categorial Grammar
+==============================
+
+Relative Clauses
+----------------
+
+    >>> from nltk.ccg import chart, lexicon
+
+Construct a lexicon:
+
+    >>> lex = lexicon.parseLexicon('''
+    ...     :- S, NP, N, VP
+    ...
+    ...     Det :: NP/N
+    ...     Pro :: NP
+    ...     Modal :: S\\NP/VP
+    ...
+    ...     TV :: VP/NP
+    ...     DTV :: TV/NP
+    ...
+    ...     the => Det
+    ...
+    ...     that => Det
+    ...     that => NP
+    ...
+    ...     I => Pro
+    ...     you => Pro
+    ...     we => Pro
+    ...
+    ...     chef => N
+    ...     cake => N
+    ...     children => N
+    ...     dough => N
+    ...
+    ...     will => Modal
+    ...     should => Modal
+    ...     might => Modal
+    ...     must => Modal
+    ...
+    ...     and => var\\.,var/.,var
+    ...
+    ...     to => VP[to]/VP
+    ...
+    ...     without => (VP\\VP)/VP[ing]
+    ...
+    ...     be => TV
+    ...     cook => TV
+    ...     eat => TV
+    ...
+    ...     cooking => VP[ing]/NP
+    ...
+    ...     give => DTV
+    ...
+    ...     is => (S\\NP)/NP
+    ...     prefer => (S\\NP)/NP
+    ...
+    ...     which => (N\\N)/(S/NP)
+    ...
+    ...     persuade => (VP/VP[to])/NP
+    ...     ''')
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> for parse in parser.parse("you prefer that cake".split()):
+    ...     chart.printCCGDerivation(parse)
+    ...     break
+    ...
+     you    prefer      that   cake
+     NP   ((S\NP)/NP)  (NP/N)   N
+                      -------------->
+                            NP
+         --------------------------->
+                   (S\NP)
+    --------------------------------<
+                   S
+
+    >>> for parse in parser.parse("that is the cake which you prefer".split()):
+    ...     chart.printCCGDerivation(parse)
+    ...     break
+    ...
+     that      is        the    cake      which       you    prefer
+      NP   ((S\NP)/NP)  (NP/N)   N    ((N\N)/(S/NP))  NP   ((S\NP)/NP)
+                                                     ----->T
+                                                  (S/(S\NP))
+                                                     ------------------>B
+                                                           (S/NP)
+                                     ---------------------------------->
+                                                   (N\N)
+                               ----------------------------------------<
+                                                  N
+                       ------------------------------------------------>
+                                              NP
+          ------------------------------------------------------------->
+                                     (S\NP)
+    -------------------------------------------------------------------<
+                                     S
+
+
+Some other sentences to try:
+"that is the cake which we will persuade the chef to cook"
+"that is the cake which we will persuade the chef to give the children"
+
+    >>> sent = "that is the dough which you will eat without cooking".split()
+    >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
+    ...                       chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
+
+Without Substitution (no output)
+
+    >>> for parse in nosub_parser.parse(sent):
+    ...     chart.printCCGDerivation(parse)
+
+With Substitution:
+
+    >>> for parse in parser.parse(sent):
+    ...     chart.printCCGDerivation(parse)
+    ...     break
+    ...
+     that      is        the    dough      which       you     will        eat          without           cooking
+      NP   ((S\NP)/NP)  (NP/N)    N    ((N\N)/(S/NP))  NP   ((S\NP)/VP)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+                                                      ----->T
+                                                   (S/(S\NP))
+                                                                                 ------------------------------------->B
+                                                                                             ((VP\VP)/NP)
+                                                                        ----------------------------------------------<Sx
+                                                                                           (VP/NP)
+                                                           ----------------------------------------------------------->B
+                                                                                   ((S\NP)/NP)
+                                                      ---------------------------------------------------------------->B
+                                                                                   (S/NP)
+                                      -------------------------------------------------------------------------------->
+                                                                           (N\N)
+                               ---------------------------------------------------------------------------------------<
+                                                                          N
+                       ----------------------------------------------------------------------------------------------->
+                                                                     NP
+          ------------------------------------------------------------------------------------------------------------>
+                                                             (S\NP)
+    ------------------------------------------------------------------------------------------------------------------<
+                                                            S
+
+
+Conjunction
+-----------
+
+    >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
+    >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
+    >>> from nltk.ccg import lexicon
+
+Lexicons for the tests:
+
+    >>> test1_lex = '''
+    ...        :- S,N,NP,VP
+    ...        I => NP
+    ...        you => NP
+    ...        will => S\\NP/VP
+    ...        cook => VP/NP
+    ...        which => (N\\N)/(S/NP)
+    ...        and => var\\.,var/.,var
+    ...        might => S\\NP/VP
+    ...        eat => VP/NP
+    ...        the => NP/N
+    ...        mushrooms => N
+    ...        parsnips => N'''
+    >>> test2_lex = '''
+    ...         :- N, S, NP, VP
+    ...         articles => N
+    ...         the => NP/N
+    ...         and => var\\.,var/.,var
+    ...         which => (N\\N)/(S/NP)
+    ...         I => NP
+    ...         anyone => NP
+    ...         will => (S/VP)\\NP
+    ...         file => VP/NP
+    ...         without => (VP\\VP)/VP[ing]
+    ...         forget => VP/NP
+    ...         reading => VP[ing]/NP
+    ...         '''
+
+Tests handling of conjunctions.
+Note that while the two derivations are different, they are semantically equivalent.
+
+    >>> lex = lexicon.parseLexicon(test1_lex)
+    >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
+    >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
+    ...     printCCGDerivation(parse)
+     I      will       cook               and                might       eat     the    mushrooms             and             parsnips
+     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
+        ---------------------->B
+             ((S\NP)/NP)
+                                                         ---------------------->B
+                                                              ((S\NP)/NP)
+                              ------------------------------------------------->
+                                         (((S\NP)/NP)\.,((S\NP)/NP))
+        -----------------------------------------------------------------------<
+                                      ((S\NP)/NP)
+                                                                                                  ------------------------------------->
+                                                                                                                 (N\.,N)
+                                                                                       ------------------------------------------------<
+                                                                                                              N
+                                                                               -------------------------------------------------------->
+                                                                                                          NP
+        ------------------------------------------------------------------------------------------------------------------------------->
+                                                                    (S\NP)
+    -----------------------------------------------------------------------------------------------------------------------------------<
+                                                                     S
+     I      will       cook               and                might       eat     the    mushrooms             and             parsnips
+     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
+        ---------------------->B
+             ((S\NP)/NP)
+                                                         ---------------------->B
+                                                              ((S\NP)/NP)
+                              ------------------------------------------------->
+                                         (((S\NP)/NP)\.,((S\NP)/NP))
+        -----------------------------------------------------------------------<
+                                      ((S\NP)/NP)
+        ------------------------------------------------------------------------------->B
+                                          ((S\NP)/N)
+                                                                                                  ------------------------------------->
+                                                                                                                 (N\.,N)
+                                                                                       ------------------------------------------------<
+                                                                                                              N
+        ------------------------------------------------------------------------------------------------------------------------------->
+                                                                    (S\NP)
+    -----------------------------------------------------------------------------------------------------------------------------------<
+                                                                     S
+
+
+Tests handling subject extraction.
+Interesting to point that the two parses are clearly semantically different.
+
+    >>> lex = lexicon.parseLexicon(test2_lex)
+    >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
+    >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
+    ...     printCCGDerivation(parse)
+     articles      which       I      will       file               and             forget         without           reading
+        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+                              -----------------<
+                                   (S/VP)
+                                                                                            ------------------------------------->B
+                                                                                                        ((VP\VP)/NP)
+                                                                                   ----------------------------------------------<Sx
+                                                                                                      (VP/NP)
+                                                        ------------------------------------------------------------------------->
+                                                                                   ((VP/NP)\.,(VP/NP))
+                                               ----------------------------------------------------------------------------------<
+                                                                                    (VP/NP)
+                              --------------------------------------------------------------------------------------------------->B
+                                                                            (S/NP)
+              ------------------------------------------------------------------------------------------------------------------->
+                                                                     (N\N)
+    -----------------------------------------------------------------------------------------------------------------------------<
+                                                                  N
+     articles      which       I      will       file               and             forget         without           reading
+        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
+                              -----------------<
+                                   (S/VP)
+                                                        ------------------------------------>
+                                                                ((VP/NP)\.,(VP/NP))
+                                               ---------------------------------------------<
+                                                                  (VP/NP)
+                                                                                            ------------------------------------->B
+                                                                                                        ((VP\VP)/NP)
+                                               ----------------------------------------------------------------------------------<Sx
+                                                                                    (VP/NP)
+                              --------------------------------------------------------------------------------------------------->B
+                                                                            (S/NP)
+              ------------------------------------------------------------------------------------------------------------------->
+                                                                     (N\N)
+    -----------------------------------------------------------------------------------------------------------------------------<
+                                                                  N
+
+
+Unicode support
+---------------
+
+Unicode words are supported.
+
+    >>> from nltk.ccg import chart, lexicon
+
+Lexicons for the tests:
+
+    >>> lex = lexicon.parseLexicon('''
+    ...        :- S, N, NP, PP
+    ...
+    ...        AdjI :: N\\N
+    ...        AdjD :: N/N
+    ...        AdvD :: S/S
+    ...        AdvI :: S\\S
+    ...        Det :: NP/N
+    ...        PrepNPCompl :: PP/NP
+    ...        PrepNAdjN :: S\\S/N
+    ...        PrepNAdjNP :: S\\S/NP
+    ...        VPNP :: S\\NP/NP
+    ...        VPPP :: S\\NP/PP
+    ...        VPser :: S\\NP/AdjI
+    ...
+    ...        auto => N
+    ...        bebidas => N
+    ...        cine => N
+    ...        ley => N
+    ...        libro => N
+    ...        ministro => N
+    ...        panadería => N
+    ...        presidente => N
+    ...        super => N
+    ...
+    ...        el => Det
+    ...        la => Det
+    ...        las => Det
+    ...        un => Det
+    ...
+    ...        Ana => NP
+    ...        Pablo => NP
+    ...
+    ...        y => var\\.,var/.,var
+    ...
+    ...        pero => (S/NP)\\(S/NP)/(S/NP)
+    ...
+    ...        anunció => VPNP
+    ...        compró => VPNP
+    ...        cree => S\\NP/S[dep]
+    ...        desmintió => VPNP
+    ...        lee => VPNP
+    ...        fueron => VPPP
+    ...
+    ...        es => VPser
+    ...
+    ...        interesante => AdjD
+    ...        interesante => AdjI
+    ...        nueva => AdjD
+    ...        nueva => AdjI
+    ...
+    ...        a => PrepNPCompl
+    ...        en => PrepNAdjN
+    ...        en => PrepNAdjNP
+    ...
+    ...        ayer => AdvI
+    ...
+    ...        que => (NP\\NP)/(S/NP)
+    ...        que => S[dep]/S
+    ...     ''')
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
+    ...     printCCGDerivation(parse) # doctest: +SKIP 
+    ...     # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
+    ...     break
+       el    ministro    anunció              pero              el    presidente   desmintió     la    nueva  ley
+     (NP/N)     N      ((S\NP)/NP)  (((S/NP)\(S/NP))/(S/NP))  (NP/N)      N       ((S\NP)/NP)  (NP/N)  (N/N)   N
+    ------------------>
+            NP
+    ------------------>T
+        (S/(S\NP))
+                                                             -------------------->
+                                                                      NP
+                                                             -------------------->T
+                                                                  (S/(S\NP))
+                                                             --------------------------------->B
+                                                                          (S/NP)
+                                   ----------------------------------------------------------->
+                                                         ((S/NP)\(S/NP))
+                                                                                                      ------------>
+                                                                                                           N
+                                                                                              -------------------->
+                                                                                                       NP
+                                                                                              --------------------<T
+                                                                                                   (S\(S/NP))
+                                   -------------------------------------------------------------------------------<B
+                                                                     (S\(S/NP))
+                      --------------------------------------------------------------------------------------------<B
+                                                                 (S/NP)
+    -------------------------------------------------------------------------------------------------------------->
+                                                          S
diff --git a/nlp_resource_data/nltk/test/ccg_semantics.doctest b/nlp_resource_data/nltk/test/ccg_semantics.doctest

new file mode 100644 (file)

index 0000000..450e78e
--- /dev/null
+++ b/nlp_resource_data/nltk/test/ccg_semantics.doctest
@@ -0,0 +1,553 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==============================================
+Combinatory Categorial Grammar with semantics
+==============================================
+
+-----
+Chart
+-----
+
+
+    >>> from nltk.ccg import chart, lexicon
+    >>> from nltk.ccg.chart import printCCGDerivation
+
+No semantics
+-------------------
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     She => NP
+    ...     has => (S\\NP)/NP
+    ...     books => NP
+    ...     ''',
+    ...     False)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("She has books".split()))
+    >>> print(str(len(parses)) + " parses")
+    3 parses
+
+    >>> printCCGDerivation(parses[0])
+     She      has      books
+     NP   ((S\NP)/NP)   NP
+         -------------------->
+                (S\NP)
+    -------------------------<
+                S
+
+    >>> printCCGDerivation(parses[1])
+     She      has      books
+     NP   ((S\NP)/NP)   NP
+    ----->T
+    (S/(S\NP))
+         -------------------->
+                (S\NP)
+    ------------------------->
+                S
+
+
+    >>> printCCGDerivation(parses[2])
+     She      has      books
+     NP   ((S\NP)/NP)   NP
+    ----->T
+    (S/(S\NP))
+    ------------------>B
+          (S/NP)
+    ------------------------->
+                S
+
+Simple semantics
+-------------------
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     She => NP {she}
+    ...     has => (S\\NP)/NP {\\x y.have(y, x)}
+    ...     a => NP/N {\\P.exists z.P(z)}
+    ...     book => N {book}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("She has a book".split()))
+    >>> print(str(len(parses)) + " parses")
+    7 parses
+
+    >>> printCCGDerivation(parses[0])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+                                            ------------------------------------->
+                                                    NP {exists z.book(z)}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    -----------------------------------------------------------------------------<
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[1])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+              --------------------------------------------------------->B
+                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    -----------------------------------------------------------------------------<
+                           S {have(she,exists z.book(z))}
+    
+    >>> printCCGDerivation(parses[2])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+                                            ------------------------------------->
+                                                    NP {exists z.book(z)}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[3])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+              --------------------------------------------------------->B
+                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+              ------------------------------------------------------------------->
+                             (S\NP) {\y.have(y,exists z.book(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[4])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+    ---------------------------------------->B
+            (S/NP) {\x.have(she,x)}
+                                            ------------------------------------->
+                                                    NP {exists z.book(z)}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[5])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+              --------------------------------------------------------->B
+                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
+    ------------------------------------------------------------------->B
+                    (S/N) {\P.have(she,exists z.P(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+    >>> printCCGDerivation(parses[6])
+       She                 has                           a                book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+    ---------------------------------------->B
+            (S/NP) {\x.have(she,x)}
+    ------------------------------------------------------------------->B
+                    (S/N) {\P.have(she,exists z.P(z))}
+    ----------------------------------------------------------------------------->
+                           S {have(she,exists z.book(z))}
+
+Complex semantics
+-------------------
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     She => NP {she}
+    ...     has => (S\\NP)/NP {\\x y.have(y, x)}
+    ...     a => ((S\\NP)\\((S\\NP)/NP))/N {\\P R x.(exists z.P(z) & R(z,x))}
+    ...     book => N {book}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("She has a book".split()))
+    >>> print(str(len(parses)) + " parses")
+    2 parses
+
+    >>> printCCGDerivation(parses[0])
+       She                 has                                           a                                 book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
+                                            ---------------------------------------------------------------------->
+                                                   ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
+              ----------------------------------------------------------------------------------------------------<
+                                           (S\NP) {\x.(exists z.book(z) & have(x,z))}
+    --------------------------------------------------------------------------------------------------------------<
+                                         S {(exists z.book(z) & have(she,z))}
+
+    >>> printCCGDerivation(parses[1])
+       She                 has                                           a                                 book
+     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
+    ---------->T
+    (S/(S\NP)) {\F.F(she)}
+                                            ---------------------------------------------------------------------->
+                                                   ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
+              ----------------------------------------------------------------------------------------------------<
+                                           (S\NP) {\x.(exists z.book(z) & have(x,z))}
+    -------------------------------------------------------------------------------------------------------------->
+                                         S {(exists z.book(z) & have(she,z))}
+
+Using conjunctions
+---------------------
+
+    # TODO: The semantics of "and" should have been more flexible
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP, N
+    ...     I => NP {I}
+    ...     cook => (S\\NP)/NP {\\x y.cook(x,y)}
+    ...     and => var\\.,var/.,var {\\P Q x y.(P(x,y) & Q(x,y))}
+    ...     eat => (S\\NP)/NP {\\x y.eat(x,y)}
+    ...     the => NP/N {\\x.the(x)}
+    ...     bacon => N {bacon}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("I cook and eat the bacon".split()))
+    >>> print(str(len(parses)) + " parses")
+    7 parses
+
+    >>> printCCGDerivation(parses[0])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+                                                                                                                               ------------------------------->
+                                                                                                                                       NP {the(bacon)}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------------------<
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[1])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+            --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------------------<
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[2])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+                                                                                                                               ------------------------------->
+                                                                                                                                       NP {the(bacon)}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[3])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+            --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+            -------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[4])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+    --------------------------------------------------------------------------------------------------------------------------->B
+                                                (S/NP) {\x.(eat(x,I) & cook(x,I))}
+                                                                                                                               ------------------------------->
+                                                                                                                                       NP {the(bacon)}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[5])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+            --------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+    >>> printCCGDerivation(parses[6])
+       I                 cook                                       and                                        eat                     the            bacon
+     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+                                          ------------------------------------------------------------------------------------->
+                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
+            -------------------------------------------------------------------------------------------------------------------<
+                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
+    --------------------------------------------------------------------------------------------------------------------------->B
+                                                (S/NP) {\x.(eat(x,I) & cook(x,I))}
+    ----------------------------------------------------------------------------------------------------------------------------------------------->B
+                                                      (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
+    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
+                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
+
+Tests from published papers
+------------------------------
+
+An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- S, NP
+    ...     I => NP {I}
+    ...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
+    ...     them => NP {them}
+    ...     money => NP {money}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("I give them money".split()))
+    >>> print(str(len(parses)) + " parses")
+    3 parses
+
+    >>> printCCGDerivation(parses[0])
+       I                     give                     them       money
+     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+            -------------------------------------------------->
+                    ((S\NP)/NP) {\y z.give(y,them,z)}
+            -------------------------------------------------------------->
+                            (S\NP) {\z.give(money,them,z)}
+    ----------------------------------------------------------------------<
+                            S {give(money,them,I)}
+
+    >>> printCCGDerivation(parses[1])
+       I                     give                     them       money
+     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+            -------------------------------------------------->
+                    ((S\NP)/NP) {\y z.give(y,them,z)}
+            -------------------------------------------------------------->
+                            (S\NP) {\z.give(money,them,z)}
+    ---------------------------------------------------------------------->
+                            S {give(money,them,I)}
+
+    
+    >>> printCCGDerivation(parses[2])
+       I                     give                     them       money
+     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
+    -------->T
+    (S/(S\NP)) {\F.F(I)}
+            -------------------------------------------------->
+                    ((S\NP)/NP) {\y z.give(y,them,z)}
+    ---------------------------------------------------------->B
+                    (S/NP) {\y.give(y,them,I)}
+    ---------------------------------------------------------------------->
+                            S {give(money,them,I)}
+
+
+An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf
+
+    >>> lex = lexicon.fromstring('''
+    ...     :- N, NP, S
+    ...     money => N {money}
+    ...     that => (N\\N)/(S/NP) {\\P Q x.(P(x) & Q(x))}
+    ...     I => NP {I}
+    ...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
+    ...     them => NP {them}
+    ...     ''',
+    ...     True)
+
+    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
+    >>> parses = list(parser.parse("money that I give them".split()))
+    >>> print(str(len(parses)) + " parses")
+    3 parses
+
+    >>> printCCGDerivation(parses[0])
+       money                    that                     I                     give                     them
+     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+                                                      -------->T
+                                                (S/(S\NP)) {\F.F(I)}
+                                                              -------------------------------------------------->
+                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                      ---------------------------------------------------------->B
+                                                                      (S/NP) {\y.give(y,them,I)}
+               ------------------------------------------------------------------------------------------------->
+                                             (N\N) {\Q x.(give(x,them,I) & Q(x))}
+    ------------------------------------------------------------------------------------------------------------<
+                                         N {\x.(give(x,them,I) & money(x))}
+
+    >>> printCCGDerivation(parses[1])
+       money                    that                     I                     give                     them
+     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+    ----------->T
+    (N/(N\N)) {\F.F(money)}
+                                                      -------->T
+                                                (S/(S\NP)) {\F.F(I)}
+                                                              -------------------------------------------------->
+                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                      ---------------------------------------------------------->B
+                                                                      (S/NP) {\y.give(y,them,I)}
+               ------------------------------------------------------------------------------------------------->
+                                             (N\N) {\Q x.(give(x,them,I) & Q(x))}
+    ------------------------------------------------------------------------------------------------------------>
+                                         N {\x.(give(x,them,I) & money(x))}
+
+    >>> printCCGDerivation(parses[2])
+       money                    that                     I                     give                     them
+     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
+    ----------->T
+    (N/(N\N)) {\F.F(money)}
+    -------------------------------------------------->B
+           (N/(S/NP)) {\P x.(P(x) & money(x))}
+                                                      -------->T
+                                                (S/(S\NP)) {\F.F(I)}
+                                                              -------------------------------------------------->
+                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
+                                                      ---------------------------------------------------------->B
+                                                                      (S/NP) {\y.give(y,them,I)}
+    ------------------------------------------------------------------------------------------------------------>
+                                         N {\x.(give(x,them,I) & money(x))}
+
+
+-------
+Lexicon
+-------
+
+    >>> from nltk.ccg import lexicon
+
+Parse lexicon with semantics
+
+    >>> print(str(lexicon.fromstring(
+    ...     '''
+    ...     :- S,NP
+    ...
+    ...     IntransVsg :: S\\NP[sg]
+    ...     
+    ...     sleeps => IntransVsg {\\x.sleep(x)}
+    ...     eats => S\\NP[sg]/NP {\\x y.eat(x,y)}
+    ...        
+    ...     and => var\\var/var {\\x y.x & y}
+    ...     ''',
+    ...     True
+    ... )))
+    and => ((_var0\_var0)/_var0) {(\x y.x & y)}
+    eats => ((S\NP['sg'])/NP) {\x y.eat(x,y)}
+    sleeps => (S\NP['sg']) {\x.sleep(x)}
+
+Parse lexicon without semantics
+
+    >>> print(str(lexicon.fromstring(
+    ...     '''
+    ...     :- S,NP
+    ...
+    ...     IntransVsg :: S\\NP[sg]
+    ...     
+    ...     sleeps => IntransVsg
+    ...     eats => S\\NP[sg]/NP {sem=\\x y.eat(x,y)}
+    ...        
+    ...     and => var\\var/var
+    ...     ''',
+    ...     False
+    ... )))
+    and => ((_var0\_var0)/_var0)
+    eats => ((S\NP['sg'])/NP)
+    sleeps => (S\NP['sg'])
+
+Semantics are missing
+
+    >>> print(str(lexicon.fromstring(
+    ...     '''
+    ...     :- S,NP
+    ...     
+    ...     eats => S\\NP[sg]/NP
+    ...     ''',
+    ...     True
+    ... )))
+    Traceback (most recent call last):
+      ...
+    AssertionError: eats => S\NP[sg]/NP must contain semantics because include_semantics is set to True
+
+
+------------------------------------
+CCG combinator semantics computation
+------------------------------------
+
+    >>> from nltk.sem.logic import *
+    >>> from nltk.ccg.logic import *
+
+    >>> read_expr = Expression.fromstring
+
+Compute semantics from function application
+
+    >>> print(str(compute_function_semantics(read_expr(r'\x.P(x)'), read_expr(r'book'))))
+    P(book)
+
+    >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'read'))))
+    read(book)
+
+    >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'\x.read(x)'))))
+    read(book)
+
+Compute semantics from composition
+
+    >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'\x.Q(x)'))))
+    \x.P(Q(x))
+
+    >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
+    Traceback (most recent call last):
+      ...
+    AssertionError: `read` must be a lambda expression
+
+Compute semantics from substitution
+
+    >>> print(str(compute_substitution_semantics(read_expr(r'\x y.P(x,y)'), read_expr(r'\x.Q(x)'))))
+    \x.P(x,Q(x))
+    
+    >>> print(str(compute_substitution_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
+    Traceback (most recent call last):
+      ...
+    AssertionError: `\x.P(x)` must be a lambda expression with 2 arguments
+
+Compute type-raise semantics
+
+    >>> print(str(compute_type_raised_semantics(read_expr(r'\x.P(x)'))))
+    \F x.F(P(x))
+
+    >>> print(str(compute_type_raised_semantics(read_expr(r'\x.F(x)'))))
+    \F1 x.F1(F(x))
+
+    >>> print(str(compute_type_raised_semantics(read_expr(r'\x y z.P(x,y,z)'))))
+    \F x y z.F(P(x,y,z))
+
diff --git a/nlp_resource_data/nltk/test/chat80.doctest b/nlp_resource_data/nltk/test/chat80.doctest

new file mode 100644 (file)

index 0000000..50d0c42
--- /dev/null
+++ b/nlp_resource_data/nltk/test/chat80.doctest
@@ -0,0 +1,234 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=======
+Chat-80
+=======
+
+Chat-80 was a natural language system which allowed the user to
+interrogate a Prolog knowledge base in the domain of world
+geography. It was developed in the early '80s by Warren and Pereira; see
+`<http://acl.ldc.upenn.edu/J/J82/J82-3002.pdf>`_ for a description and
+`<http://www.cis.upenn.edu/~pereira/oldies.html>`_ for the source
+files.
+
+The ``chat80`` module contains functions to extract data from the Chat-80
+relation files ('the world database'), and convert then into a format
+that can be incorporated in the FOL models of
+``nltk.sem.evaluate``. The code assumes that the Prolog
+input files are available in the NLTK corpora directory.
+
+The Chat-80 World Database consists of the following files::
+
+    world0.pl
+    rivers.pl
+    cities.pl
+    countries.pl
+    contain.pl
+    borders.pl
+
+This module uses a slightly modified version of ``world0.pl``, in which
+a set of Prolog rules have been omitted. The modified file is named
+``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since
+it uses a list rather than a string in the second field.
+
+Reading Chat-80 Files
+=====================
+
+Chat-80 relations are like tables in a relational database. The
+relation acts as the name of the table; the first argument acts as the
+'primary key'; and subsequent arguments are further fields in the
+table. In general, the name of the table provides a label for a unary
+predicate whose extension is all the primary keys. For example,
+relations in ``cities.pl`` are of the following form::
+
+   'city(athens,greece,1368).'
+
+Here, ``'athens'`` is the key, and will be mapped to a member of the
+unary predicate *city*.
+
+By analogy with NLTK corpora, ``chat80`` defines a number of 'items'
+which correspond to the relations.
+
+    >>> from nltk.sem import chat80
+    >>> print(chat80.items) # doctest: +ELLIPSIS
+    ('borders', 'circle_of_lat', 'circle_of_long', 'city', ...)
+
+The fields in the table are mapped to binary predicates. The first
+argument of the predicate is the primary key, while the second
+argument is the data in the relevant field. Thus, in the above
+example, the third field is mapped to the binary predicate
+*population_of*, whose extension is a set of pairs such as
+``'(athens, 1368)'``.
+
+An exception to this general framework is required by the relations in
+the files ``borders.pl`` and ``contains.pl``. These contain facts of the
+following form::
+
+    'borders(albania,greece).'
+
+    'contains0(africa,central_africa).'
+
+We do not want to form a unary concept out the element in
+the first field of these records, and we want the label of the binary
+relation just to be ``'border'``/``'contain'`` respectively.
+
+In order to drive the extraction process, we use 'relation metadata bundles'
+which are Python dictionaries such as the following::
+
+  city = {'label': 'city',
+          'closures': [],
+          'schema': ['city', 'country', 'population'],
+          'filename': 'cities.pl'}
+
+According to this, the file ``city['filename']`` contains a list of
+relational tuples (or more accurately, the corresponding strings in
+Prolog form) whose predicate symbol is ``city['label']`` and whose
+relational schema is ``city['schema']``. The notion of a ``closure`` is
+discussed in the next section.
+
+Concepts
+========
+In order to encapsulate the results of the extraction, a class of
+``Concept``\ s is introduced.  A ``Concept`` object has a number of
+attributes, in particular a ``prefLabel``, an arity and ``extension``.
+
+    >>> c1 = chat80.Concept('dog', arity=1, extension=set(['d1', 'd2']))
+    >>> print(c1)
+    Label = 'dog'
+    Arity = 1
+    Extension = ['d1', 'd2']
+
+
+
+The ``extension`` attribute makes it easier to inspect the output of
+the extraction.
+
+    >>> schema = ['city', 'country', 'population']
+    >>> concepts = chat80.clause2concepts('cities.pl', 'city', schema)
+    >>> concepts
+    [Concept('city'), Concept('country_of'), Concept('population_of')]
+    >>> for c in concepts: # doctest: +NORMALIZE_WHITESPACE
+    ...     print("%s:\n\t%s" % (c.prefLabel, c.extension[:4]))
+    city:
+        ['athens', 'bangkok', 'barcelona', 'berlin']
+    country_of:
+        [('athens', 'greece'), ('bangkok', 'thailand'), ('barcelona', 'spain'), ('berlin', 'east_germany')]
+    population_of:
+        [('athens', '1368'), ('bangkok', '1178'), ('barcelona', '1280'), ('berlin', '3481')]
+
+In addition, the ``extension`` can be further
+processed: in the case of the ``'border'`` relation, we check that the
+relation is **symmetric**, and in the case of the ``'contain'``
+relation, we carry out the **transitive closure**. The closure
+properties associated with a concept is indicated in the relation
+metadata, as indicated earlier.
+
+    >>> borders = set([('a1', 'a2'), ('a2', 'a3')])
+    >>> c2 = chat80.Concept('borders', arity=2, extension=borders)
+    >>> print(c2)
+    Label = 'borders'
+    Arity = 2
+    Extension = [('a1', 'a2'), ('a2', 'a3')]
+    >>> c3 = chat80.Concept('borders', arity=2, closures=['symmetric'], extension=borders)
+    >>> c3.close()
+    >>> print(c3)
+    Label = 'borders'
+    Arity = 2
+    Extension = [('a1', 'a2'), ('a2', 'a1'), ('a2', 'a3'), ('a3', 'a2')]
+
+The ``extension`` of a ``Concept`` object is then incorporated into a
+``Valuation`` object.
+
+Persistence
+===========
+The functions ``val_dump`` and ``val_load`` are provided to allow a
+valuation to be stored in a persistent database and re-loaded, rather
+than having to be re-computed each time.
+
+Individuals and Lexical Items
+=============================
+As well as deriving relations from the Chat-80 data, we also create a
+set of individual constants, one for each entity in the domain. The
+individual constants are string-identical to the entities. For
+example, given a data item such as ``'zloty'``, we add to the valuation
+a pair ``('zloty', 'zloty')``. In order to parse English sentences that
+refer to these entities, we also create a lexical item such as the
+following for each individual constant::
+
+   PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
+
+The set of rules is written to the file ``chat_pnames.fcfg`` in the
+current directory.
+
+SQL Query
+=========
+
+The ``city`` relation is also available in RDB form and can be queried
+using SQL statements.
+
+    >>> import nltk
+    >>> q = "SELECT City, Population FROM city_table WHERE Country = 'china' and Population > 1000"
+    >>> for answer in chat80.sql_query('corpora/city_database/city.db', q):
+    ...     print("%-10s %4s" % answer)
+    canton     1496
+    chungking  1100
+    mukden     1551
+    peking     2031
+    shanghai   5407
+    tientsin   1795
+
+The (deliberately naive) grammar ``sql.fcfg`` translates from English
+to SQL:
+
+    >>> nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')
+    % start S
+    S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
+    VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
+    VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
+    NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
+    PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
+    AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
+    NP[SEM='Country="greece"'] -> 'Greece'
+    NP[SEM='Country="china"'] -> 'China'
+    Det[SEM='SELECT'] -> 'Which' | 'What'
+    N[SEM='City FROM city_table'] -> 'cities'
+    IV[SEM=''] -> 'are'
+    A[SEM=''] -> 'located'
+    P[SEM=''] -> 'in'
+
+Given this grammar, we can express, and then execute, queries in English.
+
+    >>> cp = nltk.parse.load_parser('grammars/book_grammars/sql0.fcfg')
+    >>> query = 'What cities are in China'
+    >>> for tree in cp.parse(query.split()):
+    ...     answer = tree.label()['SEM']
+    ...     q = " ".join(answer)
+    ...     print(q)
+    ...
+    SELECT City FROM city_table WHERE   Country="china"
+
+    >>> rows = chat80.sql_query('corpora/city_database/city.db', q)
+    >>> for r in rows: print("%s" % r, end=' ')
+    canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin
+
+
+Using Valuations
+-----------------
+
+In order to convert such an extension into a valuation, we use the
+``make_valuation()`` method; setting ``read=True`` creates and returns
+a new ``Valuation`` object which contains the results.
+
+   >>> val = chat80.make_valuation(concepts, read=True)
+   >>> 'calcutta' in val['city']
+   True
+   >>> [town for (town, country) in val['country_of'] if country == 'india']
+   ['bombay', 'calcutta', 'delhi', 'hyderabad', 'madras']
+   >>> dom = val.domain
+   >>> g = nltk.sem.Assignment(dom)
+   >>> m = nltk.sem.Model(dom, val)
+   >>> m.evaluate(r'population_of(jakarta, 533)', g)
+   True
+
+
diff --git a/nlp_resource_data/nltk/test/childes.doctest b/nlp_resource_data/nltk/test/childes.doctest

new file mode 100644 (file)

index 0000000..7900c54
--- /dev/null
+++ b/nlp_resource_data/nltk/test/childes.doctest
@@ -0,0 +1,184 @@
+=======================
+ CHILDES Corpus Readers
+=======================
+
+Read the XML version of the CHILDES corpus.
+
+How to use CHILDESCorpusReader
+==============================
+
+Read the CHILDESCorpusReader class and read the CHILDES corpus saved in
+the nltk_data directory.
+
+    >>> import nltk
+    >>> from nltk.corpus.reader import CHILDESCorpusReader
+    >>> corpus_root = nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/')
+
+Reading files in the Valian corpus (Valian, 1991).
+
+    >>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
+    >>> valian.fileids()
+    ['Valian/01a.xml', 'Valian/01b.xml', 'Valian/02a.xml', 'Valian/02b.xml',...
+
+Count the number of files
+
+    >>> len(valian.fileids())
+    43
+
+Printing properties of the corpus files.
+
+    >>> corpus_data = valian.corpus(valian.fileids())
+    >>> print(corpus_data[0]['Lang'])
+    eng
+    >>> for key in sorted(corpus_data[0].keys()):
+    ...    print(key, ": ", corpus_data[0][key])
+    Corpus :  valian
+    Date :  1986-03-04
+    Id :  01a
+    Lang :  eng
+    Version :  2.0.1
+    {http://www.w3.org/2001/XMLSchema-instance}schemaLocation :  http://www.talkbank.org/ns/talkbank http://talkbank.org/software/talkbank.xsd
+
+Printing information of participants of the corpus. The most common codes for
+the participants are 'CHI' (target child), 'MOT' (mother), and 'INV' (investigator).
+
+    >>> corpus_participants = valian.participants(valian.fileids())
+    >>> for this_corpus_participants in corpus_participants[:2]:
+    ...     for key in sorted(this_corpus_participants.keys()):
+    ...         dct = this_corpus_participants[key]
+    ...         print(key, ": ", [(k, dct[k]) for k in sorted(dct.keys())])
+    CHI :  [('age', 'P2Y1M3D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
+    INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
+    MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
+    CHI :  [('age', 'P2Y1M12D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
+    INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
+    MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
+
+printing words.
+
+    >>> valian.words('Valian/01a.xml')
+    ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
+
+printing sentences.
+
+    >>> valian.sents('Valian/01a.xml')
+    [['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname',
+      'and', 'it', 'is', 'March', 'fourth', 'I', 'believe', 'and', 'when',
+      'was', "Parent's", 'birthday'], ["Child's"], ['oh', "I'm", 'sorry'],
+      ["that's", 'okay'], ...
+
+You can specify the participants with the argument *speaker*.
+
+    >>> valian.words('Valian/01a.xml',speaker=['INV'])
+    ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
+    >>> valian.words('Valian/01a.xml',speaker=['MOT'])
+    ["Child's", "that's", 'okay', 'February', 'first', 'nineteen', ...
+    >>> valian.words('Valian/01a.xml',speaker=['CHI'])
+    ['tape', 'it', 'up', 'and', 'two', 'tape', 'players', 'have',...
+
+
+tagged_words() and tagged_sents() return the usual (word,pos) tuple lists.
+POS tags in the CHILDES are automatically assigned by MOR and POST programs
+(MacWhinney, 2000).
+
+    >>> valian.tagged_words('Valian/01a.xml')[:30]
+    [('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
+    ('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
+    ('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
+    ('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
+    ('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n'), ("Child's", 'n:prop'),
+    ('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj'), ("that's", 'pro:dem'),
+    ('okay', 'adj'), ('February', 'n:prop'), ('first', 'adj'),
+    ('nineteen', 'det:num'), ('eighty', 'det:num'), ('four', 'det:num')]
+
+    >>> valian.tagged_sents('Valian/01a.xml')[:10]
+    [[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
+    ('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
+    ('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
+    ('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
+    ('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n')],
+    [("Child's", 'n:prop')], [('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj')],
+    [("that's", 'pro:dem'), ('okay', 'adj')],
+    [('February', 'n:prop'), ('first', 'adj'), ('nineteen', 'det:num'),
+    ('eighty', 'det:num'), ('four', 'det:num')],
+    [('great', 'adj')],
+    [('and', 'coord'), ("she's", 'pro:sub'), ('two', 'det:num'), ('years', 'n'), ('old', 'adj')],
+    [('correct', 'adj')],
+    [('okay', 'co')], [('she', 'pro:sub'), ('just', 'adv:int'), ('turned', 'part'), ('two', 'det:num'),
+    ('a', 'det'), ('month', 'n'), ('ago', 'adv')]]
+
+When the argument *stem* is true, the word stems (e.g., 'is' -> 'be-3PS') are
+used instread of the original words.
+
+    >>> valian.words('Valian/01a.xml')[:30]
+    ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'is', ...
+    >>> valian.words('Valian/01a.xml',stem=True)[:30]
+    ['at', 'Parent', 'Lastname', 's', 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'be-3S', ...
+
+When the argument *replace* is true, the replaced words are used instread of
+the original words.
+
+    >>> valian.words('Valian/01a.xml',speaker='CHI')[247]
+    'tikteat'
+    >>> valian.words('Valian/01a.xml',speaker='CHI',replace=True)[247]
+    'trick'
+
+When the argument *relation* is true, the relational relationships in the
+sentence are returned. See Sagae et al. (2010) for details of the relational
+structure adopted in the CHILDES.
+
+    >>> valian.words('Valian/01a.xml',relation=True)[:10]
+    [[('at', 'prep', '1|0|ROOT'), ('Parent', 'n', '2|5|VOC'), ('Lastname', 'n', '3|5|MOD'), ('s', 'poss', '4|5|MOD'), ('house', 'n', '5|1|POBJ'), ('with', 'prep', '6|1|JCT'), ('Child', 'n', '7|8|NAME'), ('Lastname', 'n', '8|6|POBJ'), ('and', 'coord', '9|8|COORD'), ('it', 'pro', '10|11|SUBJ'), ('be-3S', 'v', '11|9|COMP'), ('March', 'n', '12|11|PRED'), ('fourth', 'adj', '13|12|MOD'), ('I', 'pro', '15|16|SUBJ'), ('believe', 'v', '16|14|ROOT'), ('and', 'coord', '18|17|ROOT'), ('when', 'adv', '19|20|PRED'), ('be-PAST', 'v', '20|18|COMP'), ('Parent', 'n', '21|23|MOD'), ('s', 'poss', '22|23|MOD'), ('birth', 'n', '23|20|SUBJ')], [('Child', 'n', '1|2|MOD'), ('s', 'poss', '2|0|ROOT')], [('oh', 'co', '1|4|COM'), ('I', 'pro', '3|4|SUBJ'), ('be', 'v', '4|0|ROOT'), ('sorry', 'adj', '5|4|PRED')], [('that', 'pro', '1|2|SUBJ'), ('be', 'v', '2|0|ROOT'), ('okay', 'adj', '3|2|PRED')], [('February', 'n', '1|6|VOC'), ('first', 'adj', '2|6|ENUM'), ('nineteen', 'det', '4|6|ENUM'), ('eighty', 'det', '5|6|ENUM'), ('four', 'det', '6|0|ROOT')], [('great', 'adj', '1|0|ROOT')], [('and', 'coord', '1|0|ROOT'), ('she', 'pro', '2|1|ROOT'), ('be', 'aux', '3|5|AUX'), ('two', 'det', '4|5|QUANT'), ('year-PL', 'n', '5|2|ROOT'), ('old', 'adj', '6|5|MOD')], [('correct', 'adj', '1|0|ROOT')], [('okay', 'co', '1|0|ROOT')], [('she', 'pro', '1|0|ROOT'), ('just', 'adv', '2|3|JCT'), ('turn-PERF', 'part', '3|1|XCOMP'), ('two', 'det', '4|6|QUANT'), ('a', 'det', '5|6|DET'), ('month', 'n', '6|3|OBJ'), ('ago', 'adv', '7|3|JCT')]]
+
+Printing age. When the argument *month* is true, the age information in
+the CHILDES format is converted into the number of months.
+
+    >>> valian.age()
+    ['P2Y1M3D', 'P2Y1M12D', 'P1Y9M21D', 'P1Y9M28D', 'P2Y1M23D', ...
+    >>> valian.age('Valian/01a.xml')
+    ['P2Y1M3D']
+    >>> valian.age('Valian/01a.xml',month=True)
+    [25]
+
+Printing MLU. The criteria for the MLU computation is broadly based on
+Brown (1973).
+
+    >>> valian.MLU()
+    [2.3574660633484..., 2.292682926829..., 3.492857142857..., 2.961783439490...,
+     2.0842696629213..., 3.169811320754..., 3.137404580152..., 3.0578034682080...,
+     4.090163934426..., 3.488372093023..., 2.8773584905660..., 3.4792899408284...,
+     4.0111940298507..., 3.456790123456..., 4.487603305785..., 4.007936507936...,
+     5.25, 5.154696132596..., ...]
+
+    >>> valian.MLU('Valian/01a.xml')
+    [2.35746606334...]
+
+
+Basic stuff
+==============================
+
+Count the number of words and sentences of each file.
+
+    >>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
+    >>> for this_file in valian.fileids()[:6]:
+    ...     print(valian.corpus(this_file)[0]['Corpus'], valian.corpus(this_file)[0]['Id'])
+    ...     print("num of words: %i" % len(valian.words(this_file)))
+    ...     print("num of sents: %i" % len(valian.sents(this_file)))
+    valian 01a
+    num of words: 3606
+    num of sents: 1027
+    valian 01b
+    num of words: 4376
+    num of sents: 1274
+    valian 02a
+    num of words: 2673
+    num of sents: 801
+    valian 02b
+    num of words: 5020
+    num of sents: 1583
+    valian 03a
+    num of words: 2743
+    num of sents: 988
+    valian 03b
+    num of words: 4409
+    num of sents: 1397
diff --git a/nlp_resource_data/nltk/test/childes_fixt.py b/nlp_resource_data/nltk/test/childes_fixt.py

new file mode 100644 (file)

index 0000000..312449b
--- /dev/null
+++ b/nlp_resource_data/nltk/test/childes_fixt.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+
+
+def setup_module(module):
+    from nose import SkipTest
+    import nltk.data
+
+    try:
+        nltk.data.find("corpora/childes/data-xml/Eng-USA-MOR/")
+    except LookupError as e:
+        print(e)
+        raise SkipTest(
+            "The CHILDES corpus is not found. "
+            "It should be manually downloaded and saved/unpacked "
+            "to [NLTK_Data_Dir]/corpora/childes/"
+        )
diff --git a/nlp_resource_data/nltk/test/chunk.doctest b/nlp_resource_data/nltk/test/chunk.doctest

new file mode 100644 (file)

index 0000000..ff4f157
--- /dev/null
+++ b/nlp_resource_data/nltk/test/chunk.doctest
@@ -0,0 +1,373 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==========
+ Chunking
+==========
+
+    >>> from nltk.chunk import *
+    >>> from nltk.chunk.util import *
+    >>> from nltk.chunk.regexp import *
+    >>> from nltk import Tree
+
+    >>> tagged_text = "[ The/DT cat/NN ] sat/VBD on/IN [ the/DT mat/NN ] [ the/DT dog/NN ] chewed/VBD ./."
+    >>> gold_chunked_text = tagstr2tree(tagged_text)
+    >>> unchunked_text = gold_chunked_text.flatten()
+
+Chunking uses a special regexp syntax for rules that delimit the chunks. These
+rules must be converted to 'regular' regular expressions before a sentence can
+be chunked.
+
+    >>> tag_pattern = "<DT>?<JJ>*<NN.*>"
+    >>> regexp_pattern = tag_pattern2re_pattern(tag_pattern)
+    >>> regexp_pattern
+    '(<(DT)>)?(<(JJ)>)*(<(NN[^\\{\\}<>]*)>)'
+
+Construct some new chunking rules.
+
+    >>> chunk_rule = ChunkRule("<.*>+", "Chunk everything")
+    >>> chink_rule = ChinkRule("<VBD|IN|\.>", "Chink on verbs/prepositions")
+    >>> split_rule = SplitRule("<DT><NN>", "<DT><NN>",
+    ...                        "Split successive determiner/noun pairs")
+
+
+Create and score a series of chunk parsers, successively more complex.
+
+    >>> chunk_parser = RegexpChunkParser([chunk_rule], chunk_label='NP')
+    >>> chunked_text = chunk_parser.parse(unchunked_text)
+    >>> print(chunked_text)
+    (S
+      (NP
+        The/DT
+        cat/NN
+        sat/VBD
+        on/IN
+        the/DT
+        mat/NN
+        the/DT
+        dog/NN
+        chewed/VBD
+        ./.))
+
+    >>> chunkscore = ChunkScore()
+    >>> chunkscore.score(gold_chunked_text, chunked_text)
+    >>> print(chunkscore.precision())
+    0.0
+
+    >>> print(chunkscore.recall())
+    0.0
+
+    >>> print(chunkscore.f_measure())
+    0
+
+    >>> for chunk in sorted(chunkscore.missed()): print(chunk)
+    (NP The/DT cat/NN)
+    (NP the/DT dog/NN)
+    (NP the/DT mat/NN)
+
+    >>> for chunk in chunkscore.incorrect(): print(chunk)
+    (NP
+      The/DT
+      cat/NN
+      sat/VBD
+      on/IN
+      the/DT
+      mat/NN
+      the/DT
+      dog/NN
+      chewed/VBD
+      ./.)
+
+    >>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule],
+    ...                                  chunk_label='NP')
+    >>> chunked_text = chunk_parser.parse(unchunked_text)
+    >>> print(chunked_text)
+    (S
+      (NP The/DT cat/NN)
+      sat/VBD
+      on/IN
+      (NP the/DT mat/NN the/DT dog/NN)
+      chewed/VBD
+      ./.)
+    >>> assert chunked_text == chunk_parser.parse(list(unchunked_text))
+
+    >>> chunkscore = ChunkScore()
+    >>> chunkscore.score(gold_chunked_text, chunked_text)
+    >>> chunkscore.precision()
+    0.5
+
+    >>> print(chunkscore.recall())
+    0.33333333...
+
+    >>> print(chunkscore.f_measure())
+    0.4
+
+    >>> for chunk in sorted(chunkscore.missed()): print(chunk)
+    (NP the/DT dog/NN)
+    (NP the/DT mat/NN)
+
+    >>> for chunk in chunkscore.incorrect(): print(chunk)
+    (NP the/DT mat/NN the/DT dog/NN)
+
+    >>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule, split_rule],
+    ...                                  chunk_label='NP')
+    >>> chunked_text = chunk_parser.parse(unchunked_text, trace=True)
+    # Input:
+     <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
+    # Chunk everything:
+    {<DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>}
+    # Chink on verbs/prepositions:
+    {<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>  <DT>  <NN>} <VBD>  <.>
+    # Split successive determiner/noun pairs:
+    {<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
+    >>> print(chunked_text)
+    (S
+      (NP The/DT cat/NN)
+      sat/VBD
+      on/IN
+      (NP the/DT mat/NN)
+      (NP the/DT dog/NN)
+      chewed/VBD
+      ./.)
+
+    >>> chunkscore = ChunkScore()
+    >>> chunkscore.score(gold_chunked_text, chunked_text)
+    >>> chunkscore.precision()
+    1.0
+
+    >>> chunkscore.recall()
+    1.0
+
+    >>> chunkscore.f_measure()
+    1.0
+
+    >>> chunkscore.missed()
+    []
+
+    >>> chunkscore.incorrect()
+    []
+
+    >>> chunk_parser.rules() # doctest: +NORMALIZE_WHITESPACE
+    [<ChunkRule: '<.*>+'>, <ChinkRule: '<VBD|IN|\\.>'>,
+     <SplitRule: '<DT><NN>', '<DT><NN>'>]
+
+Printing parsers:
+
+    >>> print(repr(chunk_parser))
+    <RegexpChunkParser with 3 rules>
+    >>> print(chunk_parser)
+    RegexpChunkParser with 3 rules:
+        Chunk everything
+          <ChunkRule: '<.*>+'>
+        Chink on verbs/prepositions
+          <ChinkRule: '<VBD|IN|\\.>'>
+        Split successive determiner/noun pairs
+          <SplitRule: '<DT><NN>', '<DT><NN>'>
+
+Regression Tests
+~~~~~~~~~~~~~~~~
+ChunkParserI
+------------
+`ChunkParserI` is an abstract interface -- it is not meant to be
+instantiated directly.
+
+    >>> ChunkParserI().parse([])
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+
+
+ChunkString
+-----------
+ChunkString can be built from a tree of tagged tuples, a tree of
+trees, or a mixed list of both:
+
+    >>> t1 = Tree('S', [('w%d' % i, 't%d' % i) for i in range(10)])
+    >>> t2 = Tree('S', [Tree('t0', []), Tree('t1', ['c1'])])
+    >>> t3 = Tree('S', [('w0', 't0'), Tree('t1', ['c1'])])
+    >>> ChunkString(t1)
+    <ChunkString: '<t0><t1><t2><t3><t4><t5><t6><t7><t8><t9>'>
+    >>> ChunkString(t2)
+    <ChunkString: '<t0><t1>'>
+    >>> ChunkString(t3)
+    <ChunkString: '<t0><t1>'>
+
+Other values generate an error:
+
+    >>> ChunkString(Tree('S', ['x']))
+    Traceback (most recent call last):
+      . . .
+    ValueError: chunk structures must contain tagged tokens or trees
+
+The `str()` for a chunk string adds spaces to it, which makes it line
+up with `str()` output for other chunk strings over the same
+underlying input.
+
+    >>> cs = ChunkString(t1)
+    >>> print(cs)
+     <t0>  <t1>  <t2>  <t3>  <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
+    >>> cs.xform('<t3>', '{<t3>}')
+    >>> print(cs)
+     <t0>  <t1>  <t2> {<t3>} <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
+
+The `_verify()` method makes sure that our transforms don't corrupt
+the chunk string.  By setting debug_level=2, `_verify()` will be
+called at the end of every call to `xform`.
+
+    >>> cs = ChunkString(t1, debug_level=3)
+
+    >>> # tag not marked with <...>:
+    >>> cs.xform('<t3>', 't3')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Transformation generated invalid chunkstring:
+      <t0><t1><t2>t3<t4><t5><t6><t7><t8><t9>
+
+    >>> # brackets not balanced:
+    >>> cs.xform('<t3>', '{<t3>')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Transformation generated invalid chunkstring:
+      <t0><t1><t2>{<t3><t4><t5><t6><t7><t8><t9>
+
+    >>> # nested brackets:
+    >>> cs.xform('<t3><t4><t5>', '{<t3>{<t4>}<t5>}')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Transformation generated invalid chunkstring:
+      <t0><t1><t2>{<t3>{<t4>}<t5>}<t6><t7><t8><t9>
+
+    >>> # modified tags:
+    >>> cs.xform('<t3>', '<t9>')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Transformation generated invalid chunkstring: tag changed
+
+    >>> # added tags:
+    >>> cs.xform('<t9>', '<t9><t10>')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Transformation generated invalid chunkstring: tag changed
+
+Chunking Rules
+--------------
+
+Test the different rule constructors & __repr__ methods:
+
+    >>> r1 = RegexpChunkRule('<a|b>'+ChunkString.IN_CHINK_PATTERN,
+    ...                      '{<a|b>}', 'chunk <a> and <b>')
+    >>> r2 = RegexpChunkRule(re.compile('<a|b>'+ChunkString.IN_CHINK_PATTERN),
+    ...                      '{<a|b>}', 'chunk <a> and <b>')
+    >>> r3 = ChunkRule('<a|b>', 'chunk <a> and <b>')
+    >>> r4 = ChinkRule('<a|b>', 'chink <a> and <b>')
+    >>> r5 = UnChunkRule('<a|b>', 'unchunk <a> and <b>')
+    >>> r6 = MergeRule('<a>', '<b>', 'merge <a> w/ <b>')
+    >>> r7 = SplitRule('<a>', '<b>', 'split <a> from <b>')
+    >>> r8 = ExpandLeftRule('<a>', '<b>', 'expand left <a> <b>')
+    >>> r9 = ExpandRightRule('<a>', '<b>', 'expand right <a> <b>')
+    >>> for rule in r1, r2, r3, r4, r5, r6, r7, r8, r9:
+    ...     print(rule)
+    <RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
+    <RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
+    <ChunkRule: '<a|b>'>
+    <ChinkRule: '<a|b>'>
+    <UnChunkRule: '<a|b>'>
+    <MergeRule: '<a>', '<b>'>
+    <SplitRule: '<a>', '<b>'>
+    <ExpandLeftRule: '<a>', '<b>'>
+    <ExpandRightRule: '<a>', '<b>'>
+
+`tag_pattern2re_pattern()` complains if the tag pattern looks problematic:
+
+    >>> tag_pattern2re_pattern('{}')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Bad tag pattern: '{}'
+
+RegexpChunkParser
+-----------------
+
+A warning is printed when parsing an empty sentence:
+
+    >>> parser = RegexpChunkParser([ChunkRule('<a>', '')])
+    >>> parser.parse(Tree('S', []))
+    Warning: parsing empty text
+    Tree('S', [])
+
+RegexpParser
+------------
+
+    >>> parser = RegexpParser('''
+    ... NP: {<DT>? <JJ>* <NN>*} # NP
+    ... P: {<IN>}           # Preposition
+    ... V: {<V.*>}          # Verb
+    ... PP: {<P> <NP>}      # PP -> P NP
+    ... VP: {<V> <NP|PP>*}  # VP -> V (NP|PP)*
+    ... ''')
+    >>> print(repr(parser))
+    <chunk.RegexpParser with 5 stages>
+    >>> print(parser)
+    chunk.RegexpParser with 5 stages:
+    RegexpChunkParser with 1 rules:
+        NP   <ChunkRule: '<DT>? <JJ>* <NN>*'>
+    RegexpChunkParser with 1 rules:
+        Preposition   <ChunkRule: '<IN>'>
+    RegexpChunkParser with 1 rules:
+        Verb   <ChunkRule: '<V.*>'>
+    RegexpChunkParser with 1 rules:
+        PP -> P NP   <ChunkRule: '<P> <NP>'>
+    RegexpChunkParser with 1 rules:
+        VP -> V (NP|PP)*   <ChunkRule: '<V> <NP|PP>*'>
+    >>> print(parser.parse(unchunked_text, trace=True))
+    # Input:
+     <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
+    # NP:
+    {<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
+    # Input:
+     <NP>  <VBD>  <IN>  <NP>  <NP>  <VBD>  <.>
+    # Preposition:
+     <NP>  <VBD> {<IN>} <NP>  <NP>  <VBD>  <.>
+    # Input:
+     <NP>  <VBD>  <P>  <NP>  <NP>  <VBD>  <.>
+    # Verb:
+     <NP> {<VBD>} <P>  <NP>  <NP> {<VBD>} <.>
+    # Input:
+     <NP>  <V>  <P>  <NP>  <NP>  <V>  <.>
+    # PP -> P NP:
+     <NP>  <V> {<P>  <NP>} <NP>  <V>  <.>
+    # Input:
+     <NP>  <V>  <PP>  <NP>  <V>  <.>
+    # VP -> V (NP|PP)*:
+     <NP> {<V>  <PP>  <NP>}{<V>} <.>
+    (S
+      (NP The/DT cat/NN)
+      (VP
+        (V sat/VBD)
+        (PP (P on/IN) (NP the/DT mat/NN))
+        (NP the/DT dog/NN))
+      (VP (V chewed/VBD))
+      ./.)
+
+Test parsing of other rule types:
+
+    >>> print(RegexpParser('''
+    ... X:
+    ...   }<a><b>{     # chink rule
+    ...   <a>}{<b>     # split rule
+    ...   <a>{}<b>     # merge rule
+    ...   <a>{<b>}<c>  # chunk rule w/ context
+    ... '''))
+    chunk.RegexpParser with 1 stages:
+    RegexpChunkParser with 4 rules:
+        chink rule              <ChinkRule: '<a><b>'>
+        split rule              <SplitRule: '<a>', '<b>'>
+        merge rule              <MergeRule: '<a>', '<b>'>
+        chunk rule w/ context   <ChunkRuleWithContext: '<a>', '<b>', '<c>'>
+
+Illegal patterns give an error message:
+
+    >>> print(RegexpParser('X: {<foo>} {<bar>}'))
+    Traceback (most recent call last):
+      . . .
+    ValueError: Illegal chunk pattern: {<foo>} {<bar>}
+
diff --git a/nlp_resource_data/nltk/test/classify.doctest b/nlp_resource_data/nltk/test/classify.doctest

new file mode 100644 (file)

index 0000000..26d14e6
--- /dev/null
+++ b/nlp_resource_data/nltk/test/classify.doctest
@@ -0,0 +1,201 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=============
+ Classifiers
+=============
+
+Classifiers label tokens with category labels (or *class labels*).
+Typically, labels are represented with strings (such as ``"health"``
+or ``"sports"``.  In NLTK, classifiers are defined using classes that
+implement the `ClassifyI` interface:
+
+    >>> import nltk
+    >>> nltk.usage(nltk.classify.ClassifierI)
+    ClassifierI supports the following operations:
+      - self.classify(featureset)
+      - self.classify_many(featuresets)
+      - self.labels()
+      - self.prob_classify(featureset)
+      - self.prob_classify_many(featuresets)
+
+NLTK defines several classifier classes:
+
+- `ConditionalExponentialClassifier`
+- `DecisionTreeClassifier`
+- `MaxentClassifier`
+- `NaiveBayesClassifier`
+- `WekaClassifier`
+
+Classifiers are typically created by training them on a training
+corpus.
+
+
+Regression Tests
+~~~~~~~~~~~~~~~~
+
+We define a very simple training corpus with 3 binary features: ['a',
+'b', 'c'], and are two labels: ['x', 'y'].  We use a simple feature set so
+that the correct answers can be calculated analytically (although we
+haven't done this yet for all tests).
+
+    >>> train = [
+    ...     (dict(a=1,b=1,c=1), 'y'),
+    ...     (dict(a=1,b=1,c=1), 'x'),
+    ...     (dict(a=1,b=1,c=0), 'y'),
+    ...     (dict(a=0,b=1,c=1), 'x'),
+    ...     (dict(a=0,b=1,c=1), 'y'),
+    ...     (dict(a=0,b=0,c=1), 'y'),
+    ...     (dict(a=0,b=1,c=0), 'x'),
+    ...     (dict(a=0,b=0,c=0), 'x'),
+    ...     (dict(a=0,b=1,c=1), 'y'),
+    ...     (dict(a=None,b=1,c=0), 'x'),
+    ...     ]
+    >>> test = [
+    ...     (dict(a=1,b=0,c=1)), # unseen
+    ...     (dict(a=1,b=0,c=0)), # unseen
+    ...     (dict(a=0,b=1,c=1)), # seen 3 times, labels=y,y,x
+    ...     (dict(a=0,b=1,c=0)), # seen 1 time, label=x
+    ...     ]
+
+Test the Naive Bayes classifier:
+
+    >>> classifier = nltk.classify.NaiveBayesClassifier.train(train)
+    >>> sorted(classifier.labels())
+    ['x', 'y']
+    >>> classifier.classify_many(test)
+    ['y', 'x', 'y', 'x']
+    >>> for pdist in classifier.prob_classify_many(test):
+    ...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
+    0.2500 0.7500
+    0.5833 0.4167
+    0.3571 0.6429
+    0.7000 0.3000
+    >>> classifier.show_most_informative_features()
+    Most Informative Features
+                           c = 0                   x : y      =      2.3 : 1.0
+                           c = 1                   y : x      =      1.8 : 1.0
+                           a = 1                   y : x      =      1.7 : 1.0
+                           a = 0                   x : y      =      1.0 : 1.0
+                           b = 0                   x : y      =      1.0 : 1.0
+                           b = 1                   x : y      =      1.0 : 1.0
+
+Test the Decision Tree classifier (without None):
+
+    >>> classifier = nltk.classify.DecisionTreeClassifier.train(
+    ...     train[:-1], entropy_cutoff=0,
+    ...     support_cutoff=0)
+    >>> sorted(classifier.labels())
+    ['x', 'y']
+    >>> print(classifier)
+    c=0? .................................................. x
+      a=0? ................................................ x
+      a=1? ................................................ y
+    c=1? .................................................. y
+    <BLANKLINE>
+    >>> classifier.classify_many(test)
+    ['y', 'y', 'y', 'x']
+    >>> for pdist in classifier.prob_classify_many(test):
+    ...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+    
+    
+Test the Decision Tree classifier (with None):
+
+    >>> classifier = nltk.classify.DecisionTreeClassifier.train(
+    ...     train, entropy_cutoff=0,
+    ...     support_cutoff=0)
+    >>> sorted(classifier.labels())
+    ['x', 'y']
+    >>> print(classifier)
+    c=0? .................................................. x
+      a=0? ................................................ x
+      a=1? ................................................ y
+      a=None? ............................................. x
+    c=1? .................................................. y
+    <BLANKLINE>
+
+
+Test SklearnClassifier, which requires the scikit-learn package.
+
+    >>> from nltk.classify import SklearnClassifier
+    >>> from sklearn.naive_bayes import BernoulliNB
+    >>> from sklearn.svm import SVC
+    >>> train_data = [({"a": 4, "b": 1, "c": 0}, "ham"),
+    ...               ({"a": 5, "b": 2, "c": 1}, "ham"),
+    ...               ({"a": 0, "b": 3, "c": 4}, "spam"),
+    ...               ({"a": 5, "b": 1, "c": 1}, "ham"),
+    ...               ({"a": 1, "b": 4, "c": 3}, "spam")]
+    >>> classif = SklearnClassifier(BernoulliNB()).train(train_data)
+    >>> test_data = [{"a": 3, "b": 2, "c": 1},
+    ...              {"a": 0, "b": 3, "c": 7}]
+    >>> classif.classify_many(test_data)
+    ['ham', 'spam']
+    >>> classif = SklearnClassifier(SVC(), sparse=False).train(train_data)
+    >>> classif.classify_many(test_data)
+    ['ham', 'spam']
+
+Test the Maximum Entropy classifier training algorithms; they should all
+generate the same results.
+
+    >>> def print_maxent_test_header():
+    ...     print(' '*11+''.join(['      test[%s]  ' % i
+    ...                           for i in range(len(test))]))
+    ...     print(' '*11+'     p(x)  p(y)'*len(test))
+    ...     print('-'*(11+15*len(test)))
+
+    >>> def test_maxent(algorithm):
+    ...     print('%11s' % algorithm, end=' ')
+    ...     try:
+    ...         classifier = nltk.classify.MaxentClassifier.train(
+    ...                         train, algorithm, trace=0, max_iter=1000)
+    ...     except Exception as e:
+    ...         print('Error: %r' % e)
+    ...         return
+    ...
+    ...     for featureset in test:
+    ...         pdist = classifier.prob_classify(featureset)
+    ...         print('%8.2f%6.2f' % (pdist.prob('x'), pdist.prob('y')), end=' ')
+    ...     print()
+
+    >>> print_maxent_test_header(); test_maxent('GIS'); test_maxent('IIS')
+                     test[0]        test[1]        test[2]        test[3]
+                    p(x)  p(y)     p(x)  p(y)     p(x)  p(y)     p(x)  p(y)
+    -----------------------------------------------------------------------
+            GIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+            IIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+
+    >>> test_maxent('MEGAM'); test_maxent('TADM') # doctest: +SKIP
+            MEGAM   0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+            TADM    0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
+
+
+
+Regression tests for TypedMaxentFeatureEncoding
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    >>> from nltk.classify import maxent
+    >>> train = [
+    ...     ({'a': 1, 'b': 1, 'c': 1}, 'y'),
+    ...     ({'a': 5, 'b': 5, 'c': 5}, 'x'),
+    ...     ({'a': 0.9, 'b': 0.9, 'c': 0.9}, 'y'),
+    ...     ({'a': 5.5, 'b': 5.4, 'c': 5.3}, 'x'),
+    ...     ({'a': 0.8, 'b': 1.2, 'c': 1}, 'y'),
+    ...     ({'a': 5.1, 'b': 4.9, 'c': 5.2}, 'x')
+    ... ]
+
+    >>> test = [
+    ...     {'a': 1, 'b': 0.8, 'c': 1.2},
+    ...     {'a': 5.2, 'b': 5.1, 'c': 5}
+    ... ]
+
+    >>> encoding = maxent.TypedMaxentFeatureEncoding.train(
+    ...     train, count_cutoff=3, alwayson_features=True)
+
+    >>> classifier = maxent.MaxentClassifier.train(
+    ...     train, bernoulli=False, encoding=encoding, trace=0)
+
+    >>> classifier.classify_many(test)
+    ['y', 'x']
diff --git a/nlp_resource_data/nltk/test/classify_fixt.py b/nlp_resource_data/nltk/test/classify_fixt.py

new file mode 100644 (file)

index 0000000..b9d1496
--- /dev/null
+++ b/nlp_resource_data/nltk/test/classify_fixt.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+
+# most of classify.doctest requires numpy
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("classify.doctest requires numpy")
diff --git a/nlp_resource_data/nltk/test/collections.doctest b/nlp_resource_data/nltk/test/collections.doctest

new file mode 100644 (file)

index 0000000..241913c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/collections.doctest
@@ -0,0 +1,20 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===========
+Collections
+===========
+
+    >>> import nltk
+    >>> from nltk.collections import *
+
+Trie
+----
+
+Trie can be pickled:
+
+    >>> import pickle
+    >>> trie = nltk.collections.Trie(['a'])
+    >>> s = pickle.dumps(trie)
+    >>> pickle.loads(s)
+    {'a': {True: None}}
+\ No newline at end of file
diff --git a/nlp_resource_data/nltk/test/collocations.doctest b/nlp_resource_data/nltk/test/collocations.doctest

new file mode 100644 (file)

index 0000000..b1bb33a
--- /dev/null
+++ b/nlp_resource_data/nltk/test/collocations.doctest
@@ -0,0 +1,283 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==============
+ Collocations
+==============
+
+Overview
+~~~~~~~~
+
+Collocations are expressions of multiple words which commonly co-occur. For
+example, the top ten bigram collocations in Genesis are listed below, as
+measured using Pointwise Mutual Information.
+
+    >>> import nltk
+    >>> from nltk.collocations import *
+    >>> bigram_measures = nltk.collocations.BigramAssocMeasures()
+    >>> trigram_measures = nltk.collocations.TrigramAssocMeasures()
+    >>> fourgram_measures = nltk.collocations.QuadgramAssocMeasures()
+    >>> finder = BigramCollocationFinder.from_words(
+    ...     nltk.corpus.genesis.words('english-web.txt'))
+    >>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
+    [('Allon', 'Bacuth'), ('Ashteroth', 'Karnaim'), ('Ben', 'Ammi'),
+     ('En', 'Mishpat'), ('Jegar', 'Sahadutha'), ('Salt', 'Sea'),
+     ('Whoever', 'sheds'), ('appoint', 'overseers'), ('aromatic', 'resin'),
+     ('cutting', 'instrument')]
+
+While these words are highly collocated, the expressions are also very
+infrequent.  Therefore it is useful to apply filters, such as ignoring all
+bigrams which occur less than three times in the corpus:
+
+    >>> finder.apply_freq_filter(3)
+    >>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
+    [('Beer', 'Lahai'), ('Lahai', 'Roi'), ('gray', 'hairs'),
+     ('Most', 'High'), ('ewe', 'lambs'), ('many', 'colors'),
+     ('burnt', 'offering'), ('Paddan', 'Aram'), ('east', 'wind'),
+     ('living', 'creature')]
+
+We may similarly find collocations among tagged words:
+
+    >>> finder = BigramCollocationFinder.from_words(
+    ...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
+    >>> finder.nbest(bigram_measures.pmi, 5)  # doctest: +NORMALIZE_WHITESPACE
+    [(('1,119', 'NUM'), ('votes', 'NOUN')),
+     (('1962', 'NUM'), ("governor's", 'NOUN')),
+     (('637', 'NUM'), ('E.', 'NOUN')),
+     (('Alpharetta', 'NOUN'), ('prison', 'NOUN')),
+     (('Bar', 'NOUN'), ('Association', 'NOUN'))]
+
+Or tags alone:
+
+    >>> finder = BigramCollocationFinder.from_words(t for w, t in
+    ...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
+    >>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
+    [('PRT', 'VERB'), ('PRON', 'VERB'), ('ADP', 'DET'), ('.', 'PRON'), ('DET', 'ADJ'),
+     ('CONJ', 'PRON'), ('ADP', 'NUM'), ('NUM', '.'), ('ADV', 'ADV'), ('VERB', 'ADV')]
+
+Or spanning intervening words:
+
+    >>> finder = BigramCollocationFinder.from_words(
+    ...     nltk.corpus.genesis.words('english-web.txt'),
+    ...     window_size = 20)
+    >>> finder.apply_freq_filter(2)
+    >>> ignored_words = nltk.corpus.stopwords.words('english')
+    >>> finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
+    >>> finder.nbest(bigram_measures.likelihood_ratio, 10) # doctest: +NORMALIZE_WHITESPACE
+    [('chief', 'chief'), ('became', 'father'), ('years', 'became'),
+     ('hundred', 'years'), ('lived', 'became'), ('king', 'king'),
+     ('lived', 'years'), ('became', 'became'), ('chief', 'chiefs'),
+     ('hundred', 'became')]
+
+Finders
+~~~~~~~
+
+The collocations package provides collocation finders which by default
+consider all ngrams in a text as candidate collocations:
+
+    >>> text = "I do not like green eggs and ham, I do not like them Sam I am!"
+    >>> tokens = nltk.wordpunct_tokenize(text)
+    >>> finder = BigramCollocationFinder.from_words(tokens)
+    >>> scored = finder.score_ngrams(bigram_measures.raw_freq)
+    >>> sorted(bigram for bigram, score in scored)  # doctest: +NORMALIZE_WHITESPACE
+    [(',', 'I'), ('I', 'am'), ('I', 'do'), ('Sam', 'I'), ('am', '!'),
+     ('and', 'ham'), ('do', 'not'), ('eggs', 'and'), ('green', 'eggs'),
+     ('ham', ','), ('like', 'green'), ('like', 'them'), ('not', 'like'),
+     ('them', 'Sam')]
+
+We could otherwise construct the collocation finder from manually-derived
+FreqDists:
+
+    >>> word_fd = nltk.FreqDist(tokens)
+    >>> bigram_fd = nltk.FreqDist(nltk.bigrams(tokens))
+    >>> finder = BigramCollocationFinder(word_fd, bigram_fd)
+    >>> scored == finder.score_ngrams(bigram_measures.raw_freq)
+    True
+
+A similar interface is provided for trigrams:
+
+    >>> finder = TrigramCollocationFinder.from_words(tokens)
+    >>> scored = finder.score_ngrams(trigram_measures.raw_freq)
+    >>> set(trigram for trigram, score in scored) == set(nltk.trigrams(tokens))
+    True
+
+We may want to select only the top n results:
+
+    >>> sorted(finder.nbest(trigram_measures.raw_freq, 2))
+    [('I', 'do', 'not'), ('do', 'not', 'like')]
+
+Alternatively, we can select those above a minimum score value:
+
+    >>> sorted(finder.above_score(trigram_measures.raw_freq,
+    ...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
+    [('I', 'do', 'not'), ('do', 'not', 'like')]
+
+Now spanning intervening words:
+
+    >>> finder = TrigramCollocationFinder.from_words(tokens)
+    >>> finder = TrigramCollocationFinder.from_words(tokens, window_size=4)
+    >>> sorted(finder.nbest(trigram_measures.raw_freq, 4))
+    [('I', 'do', 'like'), ('I', 'do', 'not'), ('I', 'not', 'like'), ('do', 'not', 'like')]
+    
+A closer look at the finder's ngram frequencies:
+
+    >>> sorted(finder.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10]  # doctest: +NORMALIZE_WHITESPACE
+    [(('I', 'do', 'like'), 2), (('I', 'do', 'not'), 2), (('I', 'not', 'like'), 2),
+     (('do', 'not', 'like'), 2), ((',', 'I', 'do'), 1), ((',', 'I', 'not'), 1),
+     ((',', 'do', 'not'), 1), (('I', 'am', '!'), 1), (('Sam', 'I', '!'), 1),
+     (('Sam', 'I', 'am'), 1)]
+
+A similar interface is provided for fourgrams:
+
+    >>> finder_4grams = QuadgramCollocationFinder.from_words(tokens)
+    >>> scored_4grams = finder_4grams.score_ngrams(fourgram_measures.raw_freq)
+    >>> set(fourgram for fourgram, score in scored_4grams) == set(nltk.ngrams(tokens, n=4))
+    True
+
+Filtering candidates
+~~~~~~~~~~~~~~~~~~~~
+
+All the ngrams in a text are often too many to be useful when finding
+collocations.  It is generally useful to remove some words or punctuation,
+and to require a minimum frequency for candidate collocations.
+
+Given our sample text above, if we remove all trigrams containing personal
+pronouns from candidature, score_ngrams should return 6 less results, and
+'do not like' will be the only candidate which occurs more than once:
+
+    >>> finder = TrigramCollocationFinder.from_words(tokens)
+    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
+    14
+    >>> finder.apply_word_filter(lambda w: w in ('I', 'me'))
+    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
+    8
+    >>> sorted(finder.above_score(trigram_measures.raw_freq,
+    ...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
+    [('do', 'not', 'like')]
+
+Sometimes a filter is a function on the whole ngram, rather than each word,
+such as if we may permit 'and' to appear in the middle of a trigram, but
+not on either edge:
+
+    >>> finder.apply_ngram_filter(lambda w1, w2, w3: 'and' in (w1, w3))
+    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
+    6
+
+Finally, it is often important to remove low frequency candidates, as we
+lack sufficient evidence about their significance as collocations:
+
+    >>> finder.apply_freq_filter(2)
+    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
+    1
+
+Association measures
+~~~~~~~~~~~~~~~~~~~~
+
+A number of measures are available to score collocations or other associations.
+The arguments to measure functions are marginals of a contingency table, in the
+bigram case (n_ii, (n_ix, n_xi), n_xx)::
+
+            w1    ~w1
+         ------ ------
+     w2 | n_ii | n_oi | = n_xi
+         ------ ------
+    ~w2 | n_io | n_oo |
+         ------ ------
+         = n_ix        TOTAL = n_xx
+
+We test their calculation using some known values presented in Manning and
+Schutze's text and other papers.
+
+Student's t: examples from Manning and Schutze 5.3.2
+
+   >>> print('%0.4f' % bigram_measures.student_t(8, (15828, 4675), 14307668))
+   0.9999
+   >>> print('%0.4f' % bigram_measures.student_t(20, (42, 20), 14307668))
+   4.4721
+
+Chi-square: examples from Manning and Schutze 5.3.3
+
+   >>> print('%0.2f' % bigram_measures.chi_sq(8, (15828, 4675), 14307668))
+   1.55
+   >>> print('%0.0f' % bigram_measures.chi_sq(59, (67, 65), 571007))
+   456400
+
+Likelihood ratios: examples from Dunning, CL, 1993
+
+   >>> print('%0.2f' % bigram_measures.likelihood_ratio(110, (2552, 221), 31777))
+   270.72
+   >>> print('%0.2f' % bigram_measures.likelihood_ratio(8, (13, 32), 31777))
+   95.29
+
+Pointwise Mutual Information: examples from Manning and Schutze 5.4
+
+   >>> print('%0.2f' % bigram_measures.pmi(20, (42, 20), 14307668))
+   18.38
+   >>> print('%0.2f' % bigram_measures.pmi(20, (15019, 15629), 14307668))
+   0.29
+
+TODO: Find authoritative results for trigrams.
+
+Using contingency table values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While frequency counts make marginals readily available for collocation
+finding, it is common to find published contingency table values. The
+collocations package therefore provides a wrapper, ContingencyMeasures, which
+wraps an association measures class, providing association measures which
+take contingency values as arguments, (n_ii, n_io, n_oi, n_oo) in the
+bigram case.
+
+   >>> from nltk.metrics import ContingencyMeasures
+   >>> cont_bigram_measures = ContingencyMeasures(bigram_measures)
+   >>> print('%0.2f' % cont_bigram_measures.likelihood_ratio(8, 5, 24, 31740))
+   95.29
+   >>> print('%0.2f' % cont_bigram_measures.chi_sq(8, 15820, 4667, 14287173))
+   1.55
+
+Ranking and correlation
+~~~~~~~~~~~~~~~~~~~~~~~
+
+It is useful to consider the results of finding collocations as a ranking, and
+the rankings output using different association measures can be compared using
+the Spearman correlation coefficient.
+
+Ranks can be assigned to a sorted list of results trivially by assigning
+strictly increasing ranks to each result:
+
+    >>> from nltk.metrics.spearman import *
+    >>> results_list = ['item1', 'item2', 'item3', 'item4', 'item5']
+    >>> print(list(ranks_from_sequence(results_list)))
+    [('item1', 0), ('item2', 1), ('item3', 2), ('item4', 3), ('item5', 4)]
+
+If scores are available for each result, we may allow sufficiently similar
+results (differing by no more than rank_gap) to be assigned the same rank:
+
+    >>> results_scored = [('item1', 50.0), ('item2', 40.0), ('item3', 38.0),
+    ...                   ('item4', 35.0), ('item5', 14.0)]
+    >>> print(list(ranks_from_scores(results_scored, rank_gap=5)))
+    [('item1', 0), ('item2', 1), ('item3', 1), ('item4', 1), ('item5', 4)]
+
+The Spearman correlation coefficient gives a number from -1.0 to 1.0 comparing
+two rankings.  A coefficient of 1.0 indicates identical rankings; -1.0 indicates
+exact opposite rankings.
+
+    >>> print('%0.1f' % spearman_correlation(
+    ...         ranks_from_sequence(results_list),
+    ...         ranks_from_sequence(results_list)))
+    1.0
+    >>> print('%0.1f' % spearman_correlation(
+    ...         ranks_from_sequence(reversed(results_list)),
+    ...         ranks_from_sequence(results_list)))
+    -1.0
+    >>> results_list2 = ['item2', 'item3', 'item1', 'item5', 'item4']
+    >>> print('%0.1f' % spearman_correlation(
+    ...        ranks_from_sequence(results_list),
+    ...        ranks_from_sequence(results_list2)))
+    0.6
+    >>> print('%0.1f' % spearman_correlation(
+    ...        ranks_from_sequence(reversed(results_list)),
+    ...        ranks_from_sequence(results_list2)))
+    -0.6
+
+
diff --git a/nlp_resource_data/nltk/test/concordance.doctest b/nlp_resource_data/nltk/test/concordance.doctest

new file mode 100644 (file)

index 0000000..8f11fc8
--- /dev/null
+++ b/nlp_resource_data/nltk/test/concordance.doctest
@@ -0,0 +1,68 @@
+.. Copyright (C) 2001-2016 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==================================
+Concordance Example
+==================================
+
+A concordance view shows us every occurrence of a given
+word, together with some context. Here we look up the word monstrous
+in Moby Dick by entering text1 followed by a period, then the term
+concordance, and then placing "monstrous" in parentheses:
+
+>>> from nltk.corpus import gutenberg
+>>> from nltk.text import Text
+>>> corpus = gutenberg.words('melville-moby_dick.txt')
+>>> text = Text(corpus)
+
+>>> text.concordance("monstrous") # doctest:+NORMALIZE_WHITESPACE
+Displaying 11 of 11 matches:
+ong the former , one was of a most monstrous size . ... This came towards us ,
+ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+ll over with a heathenish array of monstrous clubs and spears . Some were thick
+d as you gazed , and wondered what monstrous cannibal and savage could ever hav
+that has survived the flood ; most monstrous and most mountainous ! That Himmal
+they might scout at Moby Dick as a monstrous fable , or still worse and more de
+th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
+ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
+ere to enter upon those still more monstrous stories of them which are to be fo
+ght have been rummaged out of this monstrous cabinet there is no telling . But
+of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
+
+>>> text.concordance("monstrous") # doctest:+ELLIPSIS, +NORMALIZE_WHITESPACE
+Displaying 11 of 11 matches:
+ong the former , one was of a most monstrous size . ... This came towards us ,
+ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+ll over with a heathenish array of monstrous clubs and spears . Some were thick
+...
+
+=================================
+Concordance List
+=================================
+
+Often we need to store the results of concordance for further usage.
+To do so, call the concordance function with the stdout argument set
+to false:
+
+>>> from nltk.corpus import gutenberg
+>>> from nltk.text import Text
+>>> corpus = gutenberg.words('melville-moby_dick.txt')
+>>> text = Text(corpus)
+>>> con_list = text.concordance_list("monstrous")
+>>> con_list[2].line
+'ll over with a heathenish array of monstrous clubs and spears . Some were thick'
+>>> len(con_list)
+11
+
+=================================
+Patching Issue #2088
+=================================
+
+Patching https://github.com/nltk/nltk/issues/2088
+The left slice of the left context should be clip to 0 if the `i-context` < 0.
+
+>>> from nltk import Text, word_tokenize
+>>> jane_eyre = 'Chapter 1\nTHERE was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further outdoor exercise was now out of the question.'
+>>> text = Text(word_tokenize(jane_eyre))
+>>> text.concordance_list('taking')[0].left
+['Chapter', '1', 'THERE', 'was', 'no', 'possibility', 'of']
diff --git a/nlp_resource_data/nltk/test/corpus.doctest b/nlp_resource_data/nltk/test/corpus.doctest

new file mode 100644 (file)

index 0000000..73b8fd7
--- /dev/null
+++ b/nlp_resource_data/nltk/test/corpus.doctest
@@ -0,0 +1,2199 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+================
+ Corpus Readers
+================
+
+The `nltk.corpus` package defines a collection of *corpus reader*
+classes, which can be used to access the contents of a diverse set of
+corpora.  The list of available corpora is given at:
+
+http://www.nltk.org/nltk_data/
+
+Each corpus reader class is specialized to handle a specific
+corpus format.  In addition, the `nltk.corpus` package automatically
+creates a set of corpus reader instances that can be used to access
+the corpora in the NLTK data package.
+Section `Corpus Reader Objects`_ ("Corpus Reader Objects") describes
+the corpus reader instances that can be used to read the corpora in
+the NLTK data package.  Section `Corpus Reader Classes`_ ("Corpus
+Reader Classes") describes the corpus reader classes themselves, and
+discusses the issues involved in creating new corpus reader objects
+and new corpus reader classes.  Section `Regression Tests`_
+("Regression Tests") contains regression tests for the corpus readers
+and associated functions and classes.
+
+.. contents:: **Table of Contents**
+  :depth: 2
+  :backlinks: none
+
+---------------------
+Corpus Reader Objects
+---------------------
+
+Overview
+========
+
+NLTK includes a diverse set of corpora which can be
+read using the ``nltk.corpus`` package.  Each corpus is accessed by
+means of a "corpus reader" object from ``nltk.corpus``:
+
+    >>> import nltk.corpus
+    >>> # The Brown corpus:
+    >>> print(str(nltk.corpus.brown).replace('\\\\','/'))
+    <CategorizedTaggedCorpusReader in '.../corpora/brown'...>
+    >>> # The Penn Treebank Corpus:
+    >>> print(str(nltk.corpus.treebank).replace('\\\\','/'))
+    <BracketParseCorpusReader in '.../corpora/treebank/combined'...>
+    >>> # The Name Genders Corpus:
+    >>> print(str(nltk.corpus.names).replace('\\\\','/'))
+    <WordListCorpusReader in '.../corpora/names'...>
+    >>> # The Inaugural Address Corpus:
+    >>> print(str(nltk.corpus.inaugural).replace('\\\\','/'))
+    <PlaintextCorpusReader in '.../corpora/inaugural'...>
+
+Most corpora consist of a set of files, each containing a document (or
+other pieces of text).  A list of identifiers for these files is
+accessed via the ``fileids()`` method of the corpus reader:
+
+    >>> nltk.corpus.treebank.fileids() # doctest: +ELLIPSIS
+    ['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
+    >>> nltk.corpus.inaugural.fileids() # doctest: +ELLIPSIS
+    ['1789-Washington.txt', '1793-Washington.txt', '1797-Adams.txt', ...]
+
+Each corpus reader provides a variety of methods to read data from the
+corpus, depending on the format of the corpus.  For example, plaintext
+corpora support methods to read the corpus as raw text, a list of
+words, a list of sentences, or a list of paragraphs.
+
+    >>> from nltk.corpus import inaugural
+    >>> inaugural.raw('1789-Washington.txt') # doctest: +ELLIPSIS
+    'Fellow-Citizens of the Senate ...'
+    >>> inaugural.words('1789-Washington.txt')
+    ['Fellow', '-', 'Citizens', 'of', 'the', ...]
+    >>> inaugural.sents('1789-Washington.txt') # doctest: +ELLIPSIS
+    [['Fellow', '-', 'Citizens'...], ['Among', 'the', 'vicissitudes'...]...]
+    >>> inaugural.paras('1789-Washington.txt') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [[['Fellow', '-', 'Citizens'...]],
+     [['Among', 'the', 'vicissitudes'...],
+      ['On', 'the', 'one', 'hand', ',', 'I'...]...]...]
+
+Each of these reader methods may be given a single document's item
+name or a list of document item names.  When given a list of document
+item names, the reader methods will concatenate together the contents
+of the individual documents.
+
+    >>> l1 = len(inaugural.words('1789-Washington.txt'))
+    >>> l2 = len(inaugural.words('1793-Washington.txt'))
+    >>> l3 = len(inaugural.words(['1789-Washington.txt', '1793-Washington.txt']))
+    >>> print('%s+%s == %s' % (l1, l2, l3))
+    1538+147 == 1685
+
+If the reader methods are called without any arguments, they will
+typically load all documents in the corpus.
+
+    >>> len(inaugural.words())
+    149797
+
+If a corpus contains a README file, it can be accessed with a ``readme()`` method:
+
+    >>> inaugural.readme()[:32]
+    'C-Span Inaugural Address Corpus\n'
+
+Plaintext Corpora
+=================
+
+Here are the first few words from each of NLTK's plaintext corpora:
+
+    >>> nltk.corpus.abc.words()
+    ['PM', 'denies', 'knowledge', 'of', 'AWB', ...]
+    >>> nltk.corpus.genesis.words()
+    ['In', 'the', 'beginning', 'God', 'created', ...]
+    >>> nltk.corpus.gutenberg.words(fileids='austen-emma.txt')
+    ['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ...]
+    >>> nltk.corpus.inaugural.words()
+    ['Fellow', '-', 'Citizens', 'of', 'the', ...]
+    >>> nltk.corpus.state_union.words()
+    ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...]
+    >>> nltk.corpus.webtext.words()
+    ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...]
+
+Tagged Corpora
+==============
+
+In addition to the plaintext corpora, NLTK's data package also
+contains a wide variety of annotated corpora.  For example, the Brown
+Corpus is annotated with part-of-speech tags, and defines additional
+methods ``tagged_*()`` which words as `(word,tag)` tuples, rather
+than just bare word strings.
+
+    >>> from nltk.corpus import brown
+    >>> print(brown.words())
+    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+    >>> print(brown.tagged_words())
+    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+    >>> print(brown.sents()) # doctest: +ELLIPSIS
+    [['The', 'Fulton', 'County'...], ['The', 'jury', 'further'...], ...]
+    >>> print(brown.tagged_sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [[('The', 'AT'), ('Fulton', 'NP-TL')...],
+     [('The', 'AT'), ('jury', 'NN'), ('further', 'RBR')...]...]
+    >>> print(brown.paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [[['It', 'is', 'not', 'news', 'that', 'Nathan', 'Milstein'...],
+      ['Certainly', 'not', 'in', 'Orchestra', 'Hall', 'where'...]],
+     [['There', 'was', 'about', 'that', 'song', 'something', ...],
+      ['Not', 'the', 'noblest', 'performance', 'we', 'have', ...], ...], ...]
+    >>> print(brown.tagged_paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [[[('It', 'PPS'), ('is', 'BEZ'), ('not', '*'), ...],
+      [('Certainly', 'RB'), ('not', '*'), ('in', 'IN'), ...]],
+     [[('There', 'EX'), ('was', 'BEDZ'), ('about', 'IN'), ...],
+      [('Not', '*'), ('the', 'AT'), ('noblest', 'JJT'), ...], ...], ...]
+
+Similarly, the Indian Language POS-Tagged Corpus includes samples of
+Indian text annotated with part-of-speech tags:
+
+    >>> from nltk.corpus import indian
+    >>> print(indian.words()) # doctest: +SKIP
+    ['\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf\...',
+     '\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', ...]
+    >>> print(indian.tagged_words()) # doctest: +SKIP
+    [('\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf...', 'NN'),
+     ('\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', 'NN'), ...]
+
+Several tagged corpora support access to a simplified, universal tagset, e.g. where all nouns
+tags are collapsed to a single category ``NOUN``:
+
+    >>> print(brown.tagged_sents(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [[('The', 'DET'), ('Fulton', 'NOUN'), ('County', 'NOUN'), ('Grand', 'ADJ'), ('Jury', 'NOUN'), ...],
+     [('The', 'DET'), ('jury', 'NOUN'), ('further', 'ADV'), ('said', 'VERB'), ('in', 'ADP'), ...]...]
+    >>> from nltk.corpus import conll2000, switchboard
+    >>> print(conll2000.tagged_words(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [('Confidence', 'NOUN'), ('in', 'ADP'), ...]
+
+Use ``nltk.app.pos_concordance()`` to access a GUI for searching tagged corpora.
+
+Chunked Corpora
+===============
+
+The CoNLL corpora also provide chunk structures, which are encoded as
+flat trees.  The CoNLL 2000 Corpus includes phrasal chunks; and the
+CoNLL 2002 Corpus includes named entity chunks.
+
+    >>> from nltk.corpus import conll2000, conll2002
+    >>> print(conll2000.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [['Confidence', 'in', 'the', 'pound', 'is', 'widely', ...],
+     ['Chancellor', 'of', 'the', 'Exchequer', ...], ...]
+    >>> for tree in conll2000.chunked_sents()[:2]:
+    ...     print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    (S
+      (NP Confidence/NN)
+      (PP in/IN)
+      (NP the/DT pound/NN)
+      (VP is/VBZ widely/RB expected/VBN to/TO take/VB)
+      (NP another/DT sharp/JJ dive/NN)
+      if/IN
+      ...)
+    (S
+      Chancellor/NNP
+      (PP of/IN)
+      (NP the/DT Exchequer/NNP)
+      ...)
+    >>> print(conll2002.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [['Sao', 'Paulo', '(', 'Brasil', ')', ',', ...], ['-'], ...]
+    >>> for tree in conll2002.chunked_sents()[:2]:
+    ...     print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    (S
+      (LOC Sao/NC Paulo/VMI)
+      (/Fpa
+      (LOC Brasil/NC)
+      )/Fpt
+      ...)
+    (S -/Fg)
+
+.. note:: Since the CONLL corpora do not contain paragraph break
+   information, these readers do not support the ``para()`` method.)
+
+.. warning:: if you call the conll corpora reader methods without any
+   arguments, they will return the contents of the entire corpus,
+   *including* the 'test' portions of the corpus.)
+
+SemCor is a subset of the Brown corpus tagged with WordNet senses and
+named entities. Both kinds of lexical items include multiword units,
+which are encoded as chunks (senses and part-of-speech tags pertain
+to the entire chunk).
+
+    >>> from nltk.corpus import semcor
+    >>> semcor.words()
+    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+    >>> semcor.chunks()
+    [['The'], ['Fulton', 'County', 'Grand', 'Jury'], ...]
+    >>> semcor.sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...],
+    ['The', 'jury', 'further', 'said', ...], ...]
+    >>> semcor.chunk_sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ['said'], ...
+    ['.']], [['The'], ['jury'], ['further'], ['said'], ... ['.']], ...]
+    >>> list(map(str, semcor.tagged_chunks(tag='both')[:3]))
+    ['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", "(Lemma('state.v.01.say') (VB said))"]
+    >>> [[str(c) for c in s] for s in semcor.tagged_sents(tag='both')[:2]]
+    [['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", ...
+     '(None .)'], ['(DT The)', ... '(None .)']]
+
+
+The IEER corpus is another chunked corpus.  This corpus is unusual in
+that each corpus item contains multiple documents.  (This reflects the
+fact that each corpus file contains multiple documents.)  The IEER
+corpus defines the `parsed_docs` method, which returns the documents
+in a given item as `IEERDocument` objects:
+
+    >>> from nltk.corpus import ieer
+    >>> ieer.fileids() # doctest: +NORMALIZE_WHITESPACE
+    ['APW_19980314', 'APW_19980424', 'APW_19980429',
+     'NYT_19980315', 'NYT_19980403', 'NYT_19980407']
+    >>> docs = ieer.parsed_docs('APW_19980314')
+    >>> print(docs[0])
+    <IEERDocument APW19980314.0391: 'Kenyans protest tax hikes'>
+    >>> print(docs[0].docno)
+    APW19980314.0391
+    >>> print(docs[0].doctype)
+    NEWS STORY
+    >>> print(docs[0].date_time)
+    03/14/1998 10:36:00
+    >>> print(docs[0].headline)
+    (DOCUMENT Kenyans protest tax hikes)
+    >>> print(docs[0].text) # doctest: +ELLIPSIS
+    (DOCUMENT
+      (LOCATION NAIROBI)
+      ,
+      (LOCATION Kenya)
+      (
+      (ORGANIZATION AP)
+      )
+      _
+      (CARDINAL Thousands)
+      of
+      laborers,
+      ...
+      on
+      (DATE Saturday)
+      ...)
+
+Parsed Corpora
+==============
+
+The Treebank corpora provide a syntactic parse for each sentence.  The
+NLTK data package includes a 10% sample of the Penn Treebank (in
+``treebank``), as well as the Sinica Treebank (in ``sinica_treebank``).
+
+Reading the Penn Treebank (Wall Street Journal sample):
+
+    >>> from nltk.corpus import treebank
+    >>> print(treebank.fileids()) # doctest: +ELLIPSIS
+    ['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
+    >>> print(treebank.words('wsj_0003.mrg'))
+    ['A', 'form', 'of', 'asbestos', 'once', 'used', ...]
+    >>> print(treebank.tagged_words('wsj_0003.mrg'))
+    [('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
+    >>> print(treebank.parsed_sents('wsj_0003.mrg')[0]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    (S
+      (S-TPC-1
+        (NP-SBJ
+          (NP (NP (DT A) (NN form)) (PP (IN of) (NP (NN asbestos))))
+          (RRC ...)...)...)
+      ...
+      (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1))))
+      (. .))
+
+If you have access to a full installation of the Penn Treebank, NLTK
+can be configured to load it as well. Download the ``ptb`` package,
+and in the directory ``nltk_data/corpora/ptb`` place the ``BROWN``
+and ``WSJ`` directories of the Treebank installation (symlinks work
+as well). Then use the ``ptb`` module instead of ``treebank``:
+
+   >>> from nltk.corpus import ptb
+   >>> print(ptb.fileids()) # doctest: +SKIP
+   ['BROWN/CF/CF01.MRG', 'BROWN/CF/CF02.MRG', 'BROWN/CF/CF03.MRG', 'BROWN/CF/CF04.MRG', ...]
+   >>> print(ptb.words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP
+   ['A', 'form', 'of', 'asbestos', 'once', 'used', '*', ...]
+   >>> print(ptb.tagged_words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP
+   [('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
+
+...and so forth, like ``treebank`` but with extended fileids. Categories
+specified in ``allcats.txt`` can be used to filter by genre; they consist
+of ``news`` (for WSJ articles) and names of the Brown subcategories
+(``fiction``, ``humor``, ``romance``, etc.):
+
+   >>> ptb.categories() # doctest: +SKIP
+   ['adventure', 'belles_lettres', 'fiction', 'humor', 'lore', 'mystery', 'news', 'romance', 'science_fiction']
+   >>> print(ptb.fileids('news')) # doctest: +SKIP
+   ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG', ...]
+   >>> print(ptb.words(categories=['humor','fiction'])) # doctest: +SKIP
+   ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back', ...]
+
+As PropBank and NomBank depend on the (WSJ portion of the) Penn Treebank,
+the modules ``propbank_ptb`` and ``nombank_ptb`` are provided for access
+to a full PTB installation.
+
+Reading the Sinica Treebank:
+
+    >>> from nltk.corpus import sinica_treebank
+    >>> print(sinica_treebank.sents()) # doctest: +SKIP
+    [['\xe4\xb8\x80'], ['\xe5\x8f\x8b\xe6\x83\x85'], ...]
+    >>> sinica_treebank.parsed_sents()[25] # doctest: +SKIP
+    Tree('S',
+        [Tree('NP',
+            [Tree('Nba', ['\xe5\x98\x89\xe7\x8f\x8d'])]),
+         Tree('V\xe2\x80\xa7\xe5\x9c\xb0',
+            [Tree('VA11', ['\xe4\xb8\x8d\xe5\x81\x9c']),
+             Tree('DE', ['\xe7\x9a\x84'])]),
+         Tree('VA4', ['\xe5\x93\xad\xe6\xb3\xa3'])])
+
+Reading the CoNLL 2007 Dependency Treebanks:
+
+    >>> from nltk.corpus import conll2007
+    >>> conll2007.sents('esp.train')[0] # doctest: +SKIP
+    ['El', 'aumento', 'del', 'índice', 'de', 'desempleo', ...]
+    >>> conll2007.parsed_sents('esp.train')[0] # doctest: +SKIP
+    <DependencyGraph with 38 nodes>
+    >>> print(conll2007.parsed_sents('esp.train')[0].tree()) # doctest: +SKIP
+    (fortaleció
+      (aumento El (del (índice (de (desempleo estadounidense)))))
+      hoy
+      considerablemente
+      (al
+        (euro
+          (cotizaba
+            ,
+            que
+            (a (15.35 las GMT))
+            se
+            (en (mercado el (de divisas) (de Fráncfort)))
+            (a 0,9452_dólares)
+            (frente_a , (0,9349_dólares los (de (mañana esta)))))))
+      .)
+
+Word Lists and Lexicons
+=======================
+
+The NLTK data package also includes a number of lexicons and word
+lists.  These are accessed just like text corpora.  The following
+examples illustrate the use of the wordlist corpora:
+
+    >>> from nltk.corpus import names, stopwords, words
+    >>> words.fileids()
+    ['en', 'en-basic']
+    >>> words.words('en') # doctest: +ELLIPSIS
+    ['A', 'a', 'aa', 'aal', 'aalii', 'aam', 'Aani', 'aardvark', 'aardwolf', ...]
+
+    >>> stopwords.fileids() # doctest: +ELLIPSIS
+    ['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...]
+    >>> sorted(stopwords.words('portuguese')) # doctest: +ELLIPSIS
+    ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', ...]
+    >>> names.fileids()
+    ['female.txt', 'male.txt']
+    >>> names.words('male.txt') # doctest: +ELLIPSIS
+    ['Aamir', 'Aaron', 'Abbey', 'Abbie', 'Abbot', 'Abbott', ...]
+    >>> names.words('female.txt') # doctest: +ELLIPSIS
+    ['Abagael', 'Abagail', 'Abbe', 'Abbey', 'Abbi', 'Abbie', ...]
+
+The CMU Pronunciation Dictionary corpus contains pronounciation
+transcriptions for over 100,000 words.  It can be accessed as a list
+of entries (where each entry consists of a word, an identifier, and a
+transcription) or as a dictionary from words to lists of
+transcriptions.  Transcriptions are encoded as tuples of phoneme
+strings.
+
+    >>> from nltk.corpus import cmudict
+    >>> print(cmudict.entries()[653:659]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [('acetate', ['AE1', 'S', 'AH0', 'T', 'EY2', 'T']),
+    ('acetic', ['AH0', 'S', 'EH1', 'T', 'IH0', 'K']),
+    ('acetic', ['AH0', 'S', 'IY1', 'T', 'IH0', 'K']),
+    ('aceto', ['AA0', 'S', 'EH1', 'T', 'OW0']),
+    ('acetochlor', ['AA0', 'S', 'EH1', 'T', 'OW0', 'K', 'L', 'AO2', 'R']),
+    ('acetone', ['AE1', 'S', 'AH0', 'T', 'OW2', 'N'])]
+    >>> # Load the entire cmudict corpus into a Python dictionary:
+    >>> transcr = cmudict.dict()
+    >>> print([transcr[w][0] for w in 'Natural Language Tool Kit'.lower().split()]) # doctest: +NORMALIZE_WHITESPACE
+    [['N', 'AE1', 'CH', 'ER0', 'AH0', 'L'],
+     ['L', 'AE1', 'NG', 'G', 'W', 'AH0', 'JH'],
+     ['T', 'UW1', 'L'],
+     ['K', 'IH1', 'T']]
+
+
+WordNet
+=======
+
+Please see the separate WordNet howto.
+
+FrameNet
+========
+
+Please see the separate FrameNet howto.
+
+PropBank
+========
+
+Please see the separate PropBank howto.
+
+SentiWordNet
+============
+
+Please see the separate SentiWordNet howto.
+
+Categorized Corpora
+===================
+
+Several corpora included with NLTK contain documents that have been categorized for
+topic, genre, polarity, etc.  In addition to the standard corpus interface, these
+corpora provide access to the list of categories and the mapping between the documents
+and their categories (in both directions).  Access the categories using the ``categories()``
+method, e.g.:
+
+    >>> from nltk.corpus import brown, movie_reviews, reuters
+    >>> brown.categories() # doctest: +NORMALIZE_WHITESPACE
+    ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
+    'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
+    >>> movie_reviews.categories()
+    ['neg', 'pos']
+    >>> reuters.categories() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    ['acq', 'alum', 'barley', 'bop', 'carcass', 'castor-oil', 'cocoa',
+    'coconut', 'coconut-oil', 'coffee', 'copper', 'copra-cake', 'corn',
+    'cotton', 'cotton-oil', 'cpi', 'cpu', 'crude', 'dfl', 'dlr', ...]
+
+This method has an optional argument that specifies a document or a list
+of documents, allowing us to map from (one or more) documents to (one or more) categories:
+
+    >>> brown.categories('ca01')
+    ['news']
+    >>> brown.categories(['ca01','cb01'])
+    ['editorial', 'news']
+    >>> reuters.categories('training/9865')
+    ['barley', 'corn', 'grain', 'wheat']
+    >>> reuters.categories(['training/9865', 'training/9880'])
+    ['barley', 'corn', 'grain', 'money-fx', 'wheat']
+
+We can go back the other way using the optional argument of the ``fileids()`` method:
+
+    >>> reuters.fileids('barley') # doctest: +ELLIPSIS
+    ['test/15618', 'test/15649', 'test/15676', 'test/15728', 'test/15871', ...]
+
+Both the ``categories()`` and ``fileids()`` methods return a sorted list containing
+no duplicates.
+
+In addition to mapping between categories and documents, these corpora permit
+direct access to their contents via the categories.  Instead of accessing a subset
+of a corpus by specifying one or more fileids, we can identify one or more categories, e.g.:
+
+    >>> brown.tagged_words(categories='news')
+    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+    >>> brown.sents(categories=['editorial','reviews']) # doctest: +NORMALIZE_WHITESPACE
+    [['Assembly', 'session', 'brought', 'much', 'good'], ['The', 'General',
+    'Assembly', ',', 'which', 'adjourns', 'today', ',', 'has', 'performed',
+    'in', 'an', 'atmosphere', 'of', 'crisis', 'and', 'struggle', 'from',
+    'the', 'day', 'it', 'convened', '.'], ...]
+
+Note that it is an error to specify both documents and categories.
+
+In the context of a text categorization system, we can easily test if the
+category assigned to a document is correct as follows:
+
+    >>> def classify(doc): return 'news'   # Trivial classifier
+    >>> doc = 'ca01'
+    >>> classify(doc) in brown.categories(doc)
+    True
+
+
+Other Corpora
+=============
+
+comparative_sentences
+---------------------
+A list of sentences from various sources, especially reviews and articles. Each
+line contains one sentence; sentences were separated by using a sentence tokenizer.
+Comparative sentences have been annotated with their type, entities, features and
+keywords.
+
+    >>> from nltk.corpus import comparative_sentences
+    >>> comparison = comparative_sentences.comparisons()[0]
+    >>> comparison.text
+    ['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly',
+    'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve",
+    'had', '.']
+    >>> comparison.entity_2
+    'models'
+    >>> (comparison.feature, comparison.keyword)
+    ('rewind', 'more')
+    >>> len(comparative_sentences.comparisons())
+    853
+
+opinion_lexicon
+---------------
+A list of positive and negative opinion words or sentiment words for English.
+
+    >>> from nltk.corpus import opinion_lexicon
+    >>> opinion_lexicon.words()[:4]
+        ['2-faced', '2-faces', 'abnormal', 'abolish']
+
+The OpinionLexiconCorpusReader also provides shortcuts to retrieve positive/negative
+words:
+
+    >>> opinion_lexicon.negative()[:4]
+    ['2-faced', '2-faces', 'abnormal', 'abolish']
+
+Note that words from `words()` method in opinion_lexicon are sorted by file id,
+not alphabetically:
+
+    >>> opinion_lexicon.words()[0:10]
+    ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',
+    'abominate', 'abomination', 'abort', 'aborted']
+    >>> sorted(opinion_lexicon.words())[0:10]
+    ['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',
+    'abominate', 'abomination', 'abort']
+
+ppattach
+--------
+The Prepositional Phrase Attachment corpus is a corpus of
+prepositional phrase attachment decisions.  Each instance in the
+corpus is encoded as a ``PPAttachment`` object:
+
+    >>> from nltk.corpus import ppattach
+    >>> ppattach.attachments('training') # doctest: +NORMALIZE_WHITESPACE
+    [PPAttachment(sent='0', verb='join', noun1='board',
+                  prep='as', noun2='director', attachment='V'),
+     PPAttachment(sent='1', verb='is', noun1='chairman',
+                  prep='of', noun2='N.V.', attachment='N'),
+     ...]
+    >>> inst = ppattach.attachments('training')[0]
+    >>> (inst.sent, inst.verb, inst.noun1, inst.prep, inst.noun2)
+    ('0', 'join', 'board', 'as', 'director')
+    >>> inst.attachment
+    'V'
+
+product_reviews_1 and product_reviews_2
+---------------------------------------
+These two datasets respectively contain annotated customer reviews of 5 and 9
+products from amazon.com.
+
+    >>> from nltk.corpus import product_reviews_1
+    >>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt')
+    >>> review = camera_reviews[0]
+    >>> review.sents()[0]
+    ['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am',
+    'extremely', 'satisfied', 'with', 'the', 'purchase', '.']
+    >>> review.features()
+    [('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'),
+    ('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'),
+    ('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'),
+    ('option', '+1')]
+
+It is also possible to reach the same information directly from the stream:
+
+    >>> product_reviews_1.features('Canon_G3.txt')
+    [('canon powershot g3', '+3'), ('use', '+2'), ...]
+
+We can compute stats for specific product features:
+
+    >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
+    >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
+    >>> mean = tot / n_reviews
+    >>> print(n_reviews, tot, mean)
+    15 24 1.6
+
+pros_cons
+---------
+A list of pros/cons sentences for determining context (aspect) dependent
+sentiment words, which are then applied to sentiment analysis of comparative
+sentences.
+
+    >>> from nltk.corpus import pros_cons
+    >>> pros_cons.sents(categories='Cons')
+    [['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy',
+    'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'],
+    ...]
+    >>> pros_cons.words('IntegratedPros.txt')
+    ['Easy', 'to', 'use', ',', 'economical', '!', ...]
+
+semcor
+------
+The Brown Corpus, annotated with WordNet senses.
+
+    >>> from nltk.corpus import semcor
+    >>> semcor.words('brown2/tagfiles/br-n12.xml')  # doctest: +ELLIPSIS
+    ['When', 'several', 'minutes', 'had', 'passed', ...]
+    >>> sent = semcor.xml('brown2/tagfiles/br-n12.xml').findall('context/p/s')[0]
+    >>> for wordform in sent.getchildren():
+    ...     print(wordform.text, end=' ')
+    ...     for key in sorted(wordform.keys()):
+    ...         print(key + '=' + wordform.get(key), end=' ')
+    ...     print()
+    ...
+    When cmd=ignore pos=WRB
+    several cmd=done lemma=several lexsn=5:00:00:some(a):00 pos=JJ wnsn=1
+    minutes cmd=done lemma=minute lexsn=1:28:00:: pos=NN wnsn=1
+    had cmd=done ot=notag pos=VBD
+    passed cmd=done lemma=pass lexsn=2:38:03:: pos=VB wnsn=4
+    and cmd=ignore pos=CC
+    Curt cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
+    had cmd=done ot=notag pos=VBD
+    n't cmd=done lemma=n't lexsn=4:02:00:: pos=RB wnsn=0
+    emerged cmd=done lemma=emerge lexsn=2:30:00:: pos=VB wnsn=1
+    from cmd=ignore pos=IN
+    the cmd=ignore pos=DT
+    livery_stable cmd=done lemma=livery_stable lexsn=1:06:00:: pos=NN wnsn=1
+    ,
+    Brenner cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
+    re-entered cmd=done lemma=re-enter lexsn=2:38:00:: pos=VB wnsn=1
+    the cmd=ignore pos=DT
+    hotel cmd=done lemma=hotel lexsn=1:06:00:: pos=NN wnsn=1
+    and cmd=ignore pos=CC
+    faced cmd=done lemma=face lexsn=2:42:02:: pos=VB wnsn=4
+    Summers cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
+    across cmd=ignore pos=IN
+    the cmd=ignore pos=DT
+    counter cmd=done lemma=counter lexsn=1:06:00:: pos=NN wnsn=1
+    .
+
+senseval
+--------
+The Senseval 2 corpus is a word sense disambiguation corpus.  Each
+item in the corpus corresponds to a single ambiguous word.  For each
+of these words, the corpus contains a list of instances, corresponding
+to occurrences of that word.  Each instance provides the word; a list
+of word senses that apply to the word occurrence; and the word's
+context.
+
+    >>> from nltk.corpus import senseval
+    >>> senseval.fileids()
+    ['hard.pos', 'interest.pos', 'line.pos', 'serve.pos']
+    >>> senseval.instances('hard.pos')
+    ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [SensevalInstance(word='hard-a',
+        position=20,
+        context=[('``', '``'), ('he', 'PRP'), ...('hard', 'JJ'), ...],
+        senses=('HARD1',)),
+     SensevalInstance(word='hard-a',
+        position=10,
+        context=[('clever', 'NNP'), ...('hard', 'JJ'), ('time', 'NN'), ...],
+        senses=('HARD1',)), ...]
+
+The following code looks at instances of the word 'interest', and
+displays their local context (2 words on each side) and word sense(s):
+
+    >>> for inst in senseval.instances('interest.pos')[:10]:
+    ...     p = inst.position
+    ...     left = ' '.join(w for (w,t) in inst.context[p-2:p])
+    ...     word = ' '.join(w for (w,t) in inst.context[p:p+1])
+    ...     right = ' '.join(w for (w,t) in inst.context[p+1:p+3])
+    ...     senses = ' '.join(inst.senses)
+    ...     print('%20s |%10s | %-15s -> %s' % (left, word, right, senses))
+             declines in |  interest | rates .         -> interest_6
+      indicate declining |  interest | rates because   -> interest_6
+           in short-term |  interest | rates .         -> interest_6
+                     4 % |  interest | in this         -> interest_5
+            company with | interests | in the          -> interest_5
+                  , plus |  interest | .               -> interest_6
+                 set the |  interest | rate on         -> interest_6
+                  's own |  interest | , prompted      -> interest_4
+           principal and |  interest | is the          -> interest_6
+            increase its |  interest | to 70           -> interest_5
+
+sentence_polarity
+-----------------
+The Sentence Polarity dataset contains 5331 positive and 5331 negative processed
+sentences.
+
+    >>> from nltk.corpus import sentence_polarity
+    >>> sentence_polarity.sents()
+    [['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish',
+    'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find',
+    'it', 'funny', '.'], ...]
+    >>> sentence_polarity.categories()
+    ['neg', 'pos']
+    >>> sentence_polarity.sents()[1]
+    ["it's", 'so', 'laddish', 'and', 'juvenile', ',', 'only', 'teenage', 'boys',
+    'could', 'possibly', 'find', 'it', 'funny', '.']
+
+shakespeare
+-----------
+The Shakespeare corpus contains a set of Shakespeare plays, formatted
+as XML files.  These corpora are returned as ElementTree objects:
+
+    >>> from nltk.corpus import shakespeare
+    >>> from xml.etree import ElementTree
+    >>> shakespeare.fileids() # doctest: +ELLIPSIS
+    ['a_and_c.xml', 'dream.xml', 'hamlet.xml', 'j_caesar.xml', ...]
+    >>> play = shakespeare.xml('dream.xml')
+    >>> print(play) # doctest: +ELLIPSIS
+    <Element 'PLAY' at ...>
+    >>> print('%s: %s' % (play[0].tag, play[0].text))
+    TITLE: A Midsummer Night's Dream
+    >>> personae = [persona.text for persona in
+    ...             play.findall('PERSONAE/PERSONA')]
+    >>> print(personae) # doctest: +ELLIPSIS
+    ['THESEUS, Duke of Athens.', 'EGEUS, father to Hermia.', ...]
+    >>> # Find and print speakers not listed as personae
+    >>> names = [persona.split(',')[0] for persona in personae]
+    >>> speakers = set(speaker.text for speaker in
+    ...                play.findall('*/*/*/SPEAKER'))
+    >>> print(sorted(speakers.difference(names))) # doctest: +NORMALIZE_WHITESPACE
+    ['ALL', 'COBWEB', 'DEMETRIUS', 'Fairy', 'HERNIA', 'LYSANDER',
+     'Lion', 'MOTH', 'MUSTARDSEED', 'Moonshine', 'PEASEBLOSSOM',
+     'Prologue', 'Pyramus', 'Thisbe', 'Wall']
+
+subjectivity
+-----------
+The Subjectivity Dataset contains 5000 subjective and 5000 objective processed
+sentences.
+
+    >>> from nltk.corpus import subjectivity
+    >>> subjectivity.categories()
+    ['obj', 'subj']
+    >>> subjectivity.sents()[23]
+    ['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits',
+    'happened', 'off', 'screen', '.']
+    >>> subjectivity.words(categories='subj')
+    ['smart', 'and', 'alert', ',', 'thirteen', ...]
+
+toolbox
+-------
+The Toolbox corpus distributed with NLTK contains a sample lexicon and
+several sample texts from the Rotokas language.  The Toolbox corpus
+reader returns Toolbox files as XML ElementTree objects.  The
+following example loads the Rotokas dictionary, and figures out the
+distribution of part-of-speech tags for reduplicated words.
+
+.. doctest: +SKIP
+
+    >>> from nltk.corpus import toolbox
+    >>> from nltk.probability import FreqDist
+    >>> from xml.etree import ElementTree
+    >>> import re
+    >>> rotokas = toolbox.xml('rotokas.dic')
+    >>> redup_pos_freqdist = FreqDist()
+    >>> # Note: we skip over the first record, which is actually
+    >>> # the header.
+    >>> for record in rotokas[1:]:
+    ...     lexeme = record.find('lx').text
+    ...     if re.match(r'(.*)\1$', lexeme):
+    ...         redup_pos_freqdist[record.find('ps').text] += 1
+    >>> for item, count in redup_pos_freqdist.most_common():
+    ...     print(item, count)
+    V 41
+    N 14
+    ??? 4
+
+This example displays some records from a Rotokas text:
+
+.. doctest: +SKIP
+
+    >>> river = toolbox.xml('rotokas/river.txt', key='ref')
+    >>> for record in river.findall('record')[:3]:
+    ...     for piece in record:
+    ...         if len(piece.text) > 60:
+    ...             print('%-6s %s...' % (piece.tag, piece.text[:57]))
+    ...         else:
+    ...             print('%-6s %s' % (piece.tag, piece.text))
+    ref    Paragraph 1
+    t      ``Viapau oisio              ra   ovaupasi                ...
+    m      viapau   oisio              ra   ovau   -pa       -si    ...
+    g      NEG      this way/like this and  forget -PROG     -2/3.DL...
+    p      NEG      ???                CONJ V.I    -SUFF.V.3 -SUFF.V...
+    f      ``No ken lus tingting wanema samting papa i bin tok,'' Na...
+    fe     ``Don't forget what Dad said,'' yelled Naomi.
+    ref    2
+    t      Osa     Ira  ora  Reviti viapau uvupasiva.
+    m      osa     Ira  ora  Reviti viapau uvu        -pa       -si ...
+    g      as/like name and  name   NEG    hear/smell -PROG     -2/3...
+    p      CONJ    N.PN CONJ N.PN   NEG    V.T        -SUFF.V.3 -SUF...
+    f      Tasol Ila na David no bin harim toktok.
+    fe     But Ila and David took no notice.
+    ref    3
+    t      Ikaupaoro                     rokosiva                   ...
+    m      ikau      -pa       -oro      roko    -si       -va      ...
+    g      run/hurry -PROG     -SIM      go down -2/3.DL.M -RP      ...
+    p      V.T       -SUFF.V.3 -SUFF.V.4 ADV     -SUFF.V.4 -SUFF.VT....
+    f      Tupela i bin hariap i go long wara .
+    fe     They raced to the river.
+
+timit
+-----
+The NLTK data package includes a fragment of the TIMIT
+Acoustic-Phonetic Continuous Speech Corpus.  This corpus is broken
+down into small speech samples, each of which is available as a wave
+file, a phonetic transcription, and a tokenized word list.
+
+    >>> from nltk.corpus import timit
+    >>> print(timit.utteranceids()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466',
+    'dr1-fvmh0/si2096', 'dr1-fvmh0/si836', 'dr1-fvmh0/sx116',
+    'dr1-fvmh0/sx206', 'dr1-fvmh0/sx26', 'dr1-fvmh0/sx296', ...]
+
+    >>> item = timit.utteranceids()[5]
+    >>> print(timit.phones(item)) # doctest: +NORMALIZE_WHITESPACE
+    ['h#', 'k', 'l', 'ae', 's', 'pcl', 'p', 'dh', 'ax',
+     's', 'kcl', 'k', 'r', 'ux', 'ix', 'nx', 'y', 'ax',
+     'l', 'eh', 'f', 'tcl', 't', 'hh', 'ae', 'n', 'dcl',
+     'd', 'h#']
+    >>> print(timit.words(item))
+    ['clasp', 'the', 'screw', 'in', 'your', 'left', 'hand']
+    >>> timit.play(item) # doctest: +SKIP
+
+The corpus reader can combine the word segmentation information with
+the phonemes to produce a single tree structure:
+
+    >>> for tree in timit.phone_trees(item):
+    ...     print(tree)
+    (S
+      h#
+      (clasp k l ae s pcl p)
+      (the dh ax)
+      (screw s kcl k r ux)
+      (in ix nx)
+      (your y ax)
+      (left l eh f tcl t)
+      (hand hh ae n dcl d)
+      h#)
+
+The start time and stop time of each phoneme, word, and sentence are
+also available:
+
+    >>> print(timit.phone_times(item)) # doctest: +ELLIPSIS
+    [('h#', 0, 2190), ('k', 2190, 3430), ('l', 3430, 4326), ...]
+    >>> print(timit.word_times(item)) # doctest: +ELLIPSIS
+    [('clasp', 2190, 8804), ('the', 8804, 9734), ...]
+    >>> print(timit.sent_times(item))
+    [('Clasp the screw in your left hand.', 0, 32154)]
+
+We can use these times to play selected pieces of a speech sample:
+
+    >>> timit.play(item, 2190, 8804) # 'clasp'  # doctest: +SKIP
+
+The corpus reader can also be queried for information about the
+speaker and sentence identifier for a given speech sample:
+
+    >>> print(timit.spkrid(item))
+    dr1-fvmh0
+    >>> print(timit.sentid(item))
+    sx116
+    >>> print(timit.spkrinfo(timit.spkrid(item))) # doctest: +NORMALIZE_WHITESPACE
+    SpeakerInfo(id='VMH0',
+                sex='F',
+                dr='1',
+                use='TRN',
+                recdate='03/11/86',
+                birthdate='01/08/60',
+                ht='5\'05"',
+                race='WHT',
+                edu='BS',
+                comments='BEST NEW ENGLAND ACCENT SO FAR')
+
+    >>> # List the speech samples from the same speaker:
+    >>> timit.utteranceids(spkrid=timit.spkrid(item)) # doctest: +ELLIPSIS
+    ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
+
+twitter_samples
+---------------
+
+Twitter is well-known microblog service that allows public data to be
+collected via APIs. NLTK's twitter corpus currently contains a sample of 20k Tweets
+retrieved from the Twitter Streaming API.
+
+    >>> from nltk.corpus import twitter_samples
+    >>> twitter_samples.fileids()
+    ['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']
+
+We follow standard practice in storing full Tweets as line-separated
+JSON. These data structures can be accessed via `tweets.docs()`. However, in general it
+is more practical to focus just on the text field of the Tweets, which
+are accessed via the `strings()` method.
+
+    >>> twitter_samples.strings('tweets.20150430-223406.json')
+    ['RT @KirkKus: Indirect cost of the UK being in the EU is estimated to be costing Britain \xa3170 billion per year! #BetterOffOut #UKIP', ...]
+
+The default tokenizer for Tweets is specialised for 'casual' text, and
+the `tokenized()` method returns a list of lists of tokens.
+
+    >>> twitter_samples.tokenized('tweets.20150430-223406.json')
+    [['RT', '@KirkKus', ':', 'Indirect', 'cost', 'of', 'the', 'UK', 'being', 'in', ...],
+     ['VIDEO', ':', 'Sturgeon', 'on', 'post-election', 'deals', 'http://t.co/BTJwrpbmOY'], ...]
+
+rte
+---
+The RTE (Recognizing Textual Entailment) corpus was derived from the
+RTE1, RTE2 and RTE3 datasets (dev and test data), and consists of a
+list of XML-formatted 'text'/'hypothesis' pairs.
+
+    >>> from nltk.corpus import rte
+    >>> print(rte.fileids()) # doctest: +ELLIPSIS
+    ['rte1_dev.xml', 'rte1_test.xml', 'rte2_dev.xml', ..., 'rte3_test.xml']
+    >>> rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml'])
+    >>> print(rtepairs)  # doctest: +ELLIPSIS
+    [<RTEPair: gid=2-8>, <RTEPair: gid=2-9>, <RTEPair: gid=2-15>, ...]
+
+In the gold standard test sets, each pair is labeled according to
+whether or not the text 'entails' the hypothesis; the
+entailment value is mapped to an integer 1 (True) or 0 (False).
+
+    >>> rtepairs[5]
+    <RTEPair: gid=2-23>
+    >>> rtepairs[5].text # doctest: +NORMALIZE_WHITESPACE
+    'His wife Strida won a seat in parliament after forging an alliance
+    with the main anti-Syrian coalition in the recent election.'
+    >>> rtepairs[5].hyp
+    'Strida elected to parliament.'
+    >>> rtepairs[5].value
+    1
+
+The RTE corpus also supports an ``xml()`` method which produces ElementTrees.
+
+    >>> xmltree = rte.xml('rte3_dev.xml')
+    >>> xmltree # doctest: +SKIP
+    <Element entailment-corpus at ...>
+    >>> xmltree[7].findtext('t') # doctest: +NORMALIZE_WHITESPACE
+    "Mrs. Bush's approval ratings have remained very high, above 80%,
+    even as her husband's have recently dropped below 50%."
+
+verbnet
+-------
+The VerbNet corpus is a lexicon that divides verbs into classes, based
+on their syntax-semantics linking behavior.  The basic elements in the
+lexicon are verb lemmas, such as 'abandon' and 'accept', and verb
+classes, which have identifiers such as 'remove-10.1' and
+'admire-31.2-1'.  These class identifiers consist of a representative
+verb selected from the class, followed by a numerical identifier.  The
+list of verb lemmas, and the list of class identifiers, can be
+retrieved with the following methods:
+
+    >>> from nltk.corpus import verbnet
+    >>> verbnet.lemmas()[20:25]
+    ['accelerate', 'accept', 'acclaim', 'accompany', 'accrue']
+    >>> verbnet.classids()[:5]
+    ['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93']
+
+The `classids()` method may also be used to retrieve the classes that
+a given lemma belongs to:
+
+    >>> verbnet.classids('accept')
+    ['approve-77', 'characterize-29.2-1-1', 'obtain-13.5.2']
+
+The `classids()` method may additionally be used to retrieve all classes
+within verbnet if nothing is passed:
+
+    >>> verbnet.classids()
+    ['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93', 'advise-37.9', 'advise-37.9-1', 'allow-64', 'amalgamate-22.2', 'amalgamate-22.2-1', 'amalgamate-22.2-1-1', 'amalgamate-22.2-2', 'amalgamate-22.2-2-1', 'amalgamate-22.2-3', 'amalgamate-22.2-3-1', 'amalgamate-22.2-3-1-1', 'amalgamate-22.2-3-2', 'amuse-31.1', 'animal_sounds-38', 'appeal-31.4', 'appeal-31.4-1', 'appeal-31.4-2', 'appeal-31.4-3', 'appear-48.1.1', 'appoint-29.1', 'approve-77', 'assessment-34', 'assuming_position-50', 'avoid-52', 'banish-10.2', 'battle-36.4', 'battle-36.4-1', 'begin-55.1', 'begin-55.1-1', 'being_dressed-41.3.3', 'bend-45.2', 'berry-13.7', 'bill-54.5', 'body_internal_motion-49', 'body_internal_states-40.6', 'braid-41.2.2', 'break-45.1', 'breathe-40.1.2', 'breathe-40.1.2-1', 'bring-11.3', 'bring-11.3-1', 'build-26.1', 'build-26.1-1', 'bulge-47.5.3', 'bump-18.4', 'bump-18.4-1', 'butter-9.9', 'calibratable_cos-45.6', 'calibratable_cos-45.6-1', 'calve-28', 'captain-29.8', 'captain-29.8-1', 'captain-29.8-1-1', 'care-88', 'care-88-1', 'carry-11.4', 'carry-11.4-1', 'carry-11.4-1-1', 'carve-21.2', 'carve-21.2-1', 'carve-21.2-2', 'change_bodily_state-40.8.4', 'characterize-29.2', 'characterize-29.2-1', 'characterize-29.2-1-1', 'characterize-29.2-1-2', 'chase-51.6', 'cheat-10.6', 'cheat-10.6-1', 'cheat-10.6-1-1', 'chew-39.2', 'chew-39.2-1', 'chew-39.2-2', 'chit_chat-37.6', 'clear-10.3', 'clear-10.3-1', 'cling-22.5', 'coil-9.6', 'coil-9.6-1', 'coloring-24', 'complain-37.8', 'complete-55.2', 'concealment-16', 'concealment-16-1', 'confess-37.10', 'confine-92', 'confine-92-1', 'conjecture-29.5', 'conjecture-29.5-1', 'conjecture-29.5-2', 'consider-29.9', 'consider-29.9-1', 'consider-29.9-1-1', 'consider-29.9-1-1-1', 'consider-29.9-2', 'conspire-71', 'consume-66', 'consume-66-1', 'contiguous_location-47.8', 'contiguous_location-47.8-1', 'contiguous_location-47.8-2', 'continue-55.3', 'contribute-13.2', 'contribute-13.2-1', 'contribute-13.2-1-1', 'contribute-13.2-1-1-1', 'contribute-13.2-2', 'contribute-13.2-2-1', 'convert-26.6.2', 'convert-26.6.2-1', 'cooking-45.3', 'cooperate-73', 'cooperate-73-1', 'cooperate-73-2', 'cooperate-73-3', 'cope-83', 'cope-83-1', 'cope-83-1-1', 'correlate-86', 'correspond-36.1', 'correspond-36.1-1', 'correspond-36.1-1-1', 'cost-54.2', 'crane-40.3.2', 'create-26.4', 'create-26.4-1', 'curtsey-40.3.3', 'cut-21.1', 'cut-21.1-1', 'debone-10.8', 'declare-29.4', 'declare-29.4-1', 'declare-29.4-1-1', 'declare-29.4-1-1-1', 'declare-29.4-1-1-2', 'declare-29.4-1-1-3', 'declare-29.4-2', 'dedicate-79', 'defend-85', 'destroy-44', 'devour-39.4', 'devour-39.4-1', 'devour-39.4-2', 'differ-23.4', 'dine-39.5', 'disappearance-48.2', 'disassemble-23.3', 'discover-84', 'discover-84-1', 'discover-84-1-1', 'dress-41.1.1', 'dressing_well-41.3.2', 'drive-11.5', 'drive-11.5-1', 'dub-29.3', 'dub-29.3-1', 'eat-39.1', 'eat-39.1-1', 'eat-39.1-2', 'enforce-63', 'engender-27', 'entity_specific_cos-45.5', 'entity_specific_modes_being-47.2', 'equip-13.4.2', 'equip-13.4.2-1', 'equip-13.4.2-1-1', 'escape-51.1', 'escape-51.1-1', 'escape-51.1-2', 'escape-51.1-2-1', 'exceed-90', 'exchange-13.6', 'exchange-13.6-1', 'exchange-13.6-1-1', 'exhale-40.1.3', 'exhale-40.1.3-1', 'exhale-40.1.3-2', 'exist-47.1', 'exist-47.1-1', 'exist-47.1-1-1', 'feeding-39.7', 'ferret-35.6', 'fill-9.8', 'fill-9.8-1', 'fit-54.3', 'flinch-40.5', 'floss-41.2.1', 'focus-87', 'forbid-67', 'force-59', 'force-59-1', 'free-80', 'free-80-1', 'fulfilling-13.4.1', 'fulfilling-13.4.1-1', 'fulfilling-13.4.1-2', 'funnel-9.3', 'funnel-9.3-1', 'funnel-9.3-2', 'funnel-9.3-2-1', 'future_having-13.3', 'get-13.5.1', 'get-13.5.1-1', 'give-13.1', 'give-13.1-1', 'gobble-39.3', 'gobble-39.3-1', 'gobble-39.3-2', 'gorge-39.6', 'groom-41.1.2', 'grow-26.2', 'help-72', 'help-72-1', 'herd-47.5.2', 'hiccup-40.1.1', 'hit-18.1', 'hit-18.1-1', 'hold-15.1', 'hold-15.1-1', 'hunt-35.1', 'hurt-40.8.3', 'hurt-40.8.3-1', 'hurt-40.8.3-1-1', 'hurt-40.8.3-2', 'illustrate-25.3', 'image_impression-25.1', 'indicate-78', 'indicate-78-1', 'indicate-78-1-1', 'inquire-37.1.2', 'instr_communication-37.4', 'investigate-35.4', 'judgement-33', 'keep-15.2', 'knead-26.5', 'learn-14', 'learn-14-1', 'learn-14-2', 'learn-14-2-1', 'leave-51.2', 'leave-51.2-1', 'lecture-37.11', 'lecture-37.11-1', 'lecture-37.11-1-1', 'lecture-37.11-2', 'light_emission-43.1', 'limit-76', 'linger-53.1', 'linger-53.1-1', 'lodge-46', 'long-32.2', 'long-32.2-1', 'long-32.2-2', 'manner_speaking-37.3', 'marry-36.2', 'marvel-31.3', 'marvel-31.3-1', 'marvel-31.3-2', 'marvel-31.3-3', 'marvel-31.3-4', 'marvel-31.3-5', 'marvel-31.3-6', 'marvel-31.3-7', 'marvel-31.3-8', 'marvel-31.3-9', 'masquerade-29.6', 'masquerade-29.6-1', 'masquerade-29.6-2', 'matter-91', 'meander-47.7', 'meet-36.3', 'meet-36.3-1', 'meet-36.3-2', 'mine-10.9', 'mix-22.1', 'mix-22.1-1', 'mix-22.1-1-1', 'mix-22.1-2', 'mix-22.1-2-1', 'modes_of_being_with_motion-47.3', 'murder-42.1', 'murder-42.1-1', 'neglect-75', 'neglect-75-1', 'neglect-75-1-1', 'neglect-75-2', 'nonvehicle-51.4.2', 'nonverbal_expression-40.2', 'obtain-13.5.2', 'obtain-13.5.2-1', 'occurrence-48.3', 'order-60', 'order-60-1', 'orphan-29.7', 'other_cos-45.4', 'pain-40.8.1', 'pay-68', 'peer-30.3', 'pelt-17.2', 'performance-26.7', 'performance-26.7-1', 'performance-26.7-1-1', 'performance-26.7-2', 'performance-26.7-2-1', 'pit-10.7', 'pocket-9.10', 'pocket-9.10-1', 'poison-42.2', 'poke-19', 'pour-9.5', 'preparing-26.3', 'preparing-26.3-1', 'preparing-26.3-2', 'price-54.4', 'push-12', 'push-12-1', 'push-12-1-1', 'put-9.1', 'put-9.1-1', 'put-9.1-2', 'put_direction-9.4', 'put_spatial-9.2', 'put_spatial-9.2-1', 'reach-51.8', 'reflexive_appearance-48.1.2', 'refrain-69', 'register-54.1', 'rely-70', 'remove-10.1', 'risk-94', 'risk-94-1', 'roll-51.3.1', 'rummage-35.5', 'run-51.3.2', 'rush-53.2', 'say-37.7', 'say-37.7-1', 'say-37.7-1-1', 'say-37.7-2', 'scribble-25.2', 'search-35.2', 'see-30.1', 'see-30.1-1', 'see-30.1-1-1', 'send-11.1', 'send-11.1-1', 'separate-23.1', 'separate-23.1-1', 'separate-23.1-2', 'settle-89', 'shake-22.3', 'shake-22.3-1', 'shake-22.3-1-1', 'shake-22.3-2', 'shake-22.3-2-1', 'sight-30.2', 'simple_dressing-41.3.1', 'slide-11.2', 'slide-11.2-1-1', 'smell_emission-43.3', 'snooze-40.4', 'sound_emission-43.2', 'sound_existence-47.4', 'spank-18.3', 'spatial_configuration-47.6', 'split-23.2', 'spray-9.7', 'spray-9.7-1', 'spray-9.7-1-1', 'spray-9.7-2', 'stalk-35.3', 'steal-10.5', 'stimulus_subject-30.4', 'stop-55.4', 'stop-55.4-1', 'substance_emission-43.4', 'succeed-74', 'succeed-74-1', 'succeed-74-1-1', 'succeed-74-2', 'suffocate-40.7', 'suspect-81', 'swarm-47.5.1', 'swarm-47.5.1-1', 'swarm-47.5.1-2', 'swarm-47.5.1-2-1', 'swat-18.2', 'talk-37.5', 'tape-22.4', 'tape-22.4-1', 'tell-37.2', 'throw-17.1', 'throw-17.1-1', 'throw-17.1-1-1', 'tingle-40.8.2', 'touch-20', 'touch-20-1', 'transcribe-25.4', 'transfer_mesg-37.1.1', 'transfer_mesg-37.1.1-1', 'transfer_mesg-37.1.1-1-1', 'try-61', 'turn-26.6.1', 'turn-26.6.1-1', 'urge-58', 'vehicle-51.4.1', 'vehicle-51.4.1-1', 'waltz-51.5', 'want-32.1', 'want-32.1-1', 'want-32.1-1-1', 'weather-57', 'weekend-56', 'wink-40.3.1', 'wink-40.3.1-1', 'wipe_instr-10.4.2', 'wipe_instr-10.4.2-1', 'wipe_manner-10.4.1', 'wipe_manner-10.4.1-1', 'wish-62', 'withdraw-82', 'withdraw-82-1', 'withdraw-82-2', 'withdraw-82-3']
+
+The primary object in the lexicon is a class record, which is stored
+as an ElementTree xml object.  The class record for a given class
+identifier is returned by the `vnclass()` method:
+
+    >>> verbnet.vnclass('remove-10.1') # doctest: +ELLIPSIS
+    <Element 'VNCLASS' at ...>
+
+The `vnclass()` method also accepts "short" identifiers, such as '10.1':
+
+    >>> verbnet.vnclass('10.1') # doctest: +ELLIPSIS
+    <Element 'VNCLASS' at ...>
+
+See the Verbnet documentation, or the Verbnet files, for information
+about the structure of this xml.  As an example, we can retrieve a
+list of thematic roles for a given Verbnet class:
+
+    >>> vn_31_2 = verbnet.vnclass('admire-31.2')
+    >>> for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
+    ...     print(themrole.attrib['type'], end=' ')
+    ...     for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
+    ...         print('[%(Value)s%(type)s]' % selrestr.attrib, end=' ')
+    ...     print()
+    Theme
+    Experiencer [+animate]
+    Predicate
+
+The Verbnet corpus also provides a variety of pretty printing
+functions that can be used to display the xml contents in a more
+concise form.  The simplest such method is `pprint()`:
+
+    >>> print(verbnet.pprint('57'))
+    weather-57
+      Subclasses: (none)
+      Members: blow clear drizzle fog freeze gust hail howl lightning mist
+        mizzle pelt pour precipitate rain roar shower sleet snow spit spot
+        sprinkle storm swelter teem thaw thunder
+      Thematic roles:
+        * Theme[+concrete +force]
+      Frames:
+        Intransitive (Expletive Subject)
+          Example: It's raining.
+          Syntax: LEX[it] LEX[[+be]] VERB
+          Semantics:
+            * weather(during(E), Weather_type, ?Theme)
+        NP (Expletive Subject, Theme Object)
+          Example: It's raining cats and dogs.
+          Syntax: LEX[it] LEX[[+be]] VERB NP[Theme]
+          Semantics:
+            * weather(during(E), Weather_type, Theme)
+        PP (Expletive Subject, Theme-PP)
+          Example: It was pelting with rain.
+          Syntax: LEX[it[+be]] VERB PREP[with] NP[Theme]
+          Semantics:
+            * weather(during(E), Weather_type, Theme)
+
+Verbnet gives us frames that link the syntax and semantics using an example.
+These frames are part of the corpus and we can use `frames()` to get a frame
+for a given verbnet class.
+
+    >>> frame = verbnet.frames('57')
+    >>> frame == [{'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': '?Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'Intransitive', 'secondary': 'Expletive Subject'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining cats and dogs.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'NP', 'secondary': 'Expletive Subject, Theme Object'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': 'It was pelting with rain.', 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'PREP', 'modifiers': {'value': 'with', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'PP', 'secondary': 'Expletive Subject, Theme-PP'}}]
+    True
+
+Verbnet corpus lets us access thematic roles individually using `themroles()`.
+
+    >>> themroles = verbnet.themroles('57')
+    >>> themroles == [{'modifiers': [{'type': 'concrete', 'value': '+'}, {'type': 'force', 'value': '+'}], 'type': 'Theme'}]
+    True
+
+Verbnet classes may also have subclasses sharing similar syntactic and semantic properties
+while having differences with the superclass. The Verbnet corpus allows us to access these
+subclasses using `subclasses()`.
+
+    >>> print(verbnet.subclasses('9.1')) #Testing for 9.1 since '57' does not have subclasses
+    ['put-9.1-1', 'put-9.1-2']
+
+
+nps_chat
+--------
+
+The NPS Chat Corpus, Release 1.0 consists of over 10,000 posts in age-specific
+chat rooms, which have been anonymized, POS-tagged and dialogue-act tagged.
+
+    >>> print(nltk.corpus.nps_chat.words())
+    ['now', 'im', 'left', 'with', 'this', 'gay', ...]
+    >>> print(nltk.corpus.nps_chat.tagged_words())
+    [('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ...]
+    >>> print(nltk.corpus.nps_chat.tagged_posts()) # doctest: +NORMALIZE_WHITESPACE
+    [[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ('with', 'IN'),
+    ('this', 'DT'), ('gay', 'JJ'), ('name', 'NN')], [(':P', 'UH')], ...]
+
+We can access the XML elements corresponding to individual posts.  These elements
+have ``class`` and ``user`` attributes that we can access using ``p.attrib['class']``
+and ``p.attrib['user']``.  They also have text content, accessed using ``p.text``.
+
+    >>> print(nltk.corpus.nps_chat.xml_posts()) # doctest: +ELLIPSIS
+    [<Element 'Post' at 0...>, <Element 'Post' at 0...>, ...]
+    >>> posts = nltk.corpus.nps_chat.xml_posts()
+    >>> sorted(nltk.FreqDist(p.attrib['class'] for p in posts).keys())
+    ['Accept', 'Bye', 'Clarify', 'Continuer', 'Emotion', 'Emphasis',
+    'Greet', 'Other', 'Reject', 'Statement', 'System', 'nAnswer',
+    'whQuestion', 'yAnswer', 'ynQuestion']
+    >>> posts[0].text
+    'now im left with this gay name'
+
+In addition to the above methods for accessing tagged text, we can navigate
+the XML structure directly, as follows:
+
+    >>> tokens = posts[0].findall('terminals/t')
+    >>> [t.attrib['pos'] + "/" + t.attrib['word'] for t in tokens]
+    ['RB/now', 'PRP/im', 'VBD/left', 'IN/with', 'DT/this', 'JJ/gay', 'NN/name']
+
+multext_east
+------------
+
+The Multext-East Corpus consists of POS-tagged versions of George Orwell's book
+1984 in 12 languages: English, Czech, Hungarian, Macedonian, Slovenian, Serbian,
+Slovak, Romanian, Estonian, Farsi, Bulgarian and Polish.
+The corpus can be accessed using the usual methods for tagged corpora. The tagset
+can be transformed from the Multext-East specific MSD tags to the Universal tagset
+using the "tagset" parameter of all functions returning tagged parts of the corpus.
+
+    >>> print(nltk.corpus.multext_east.words("oana-en.xml"))
+    ['It', 'was', 'a', 'bright', ...]
+    >>> print(nltk.corpus.multext_east.tagged_words("oana-en.xml"))
+    [('It', '#Pp3ns'), ('was', '#Vmis3s'), ('a', '#Di'), ...]
+    >>> print(nltk.corpus.multext_east.tagged_sents("oana-en.xml", "universal"))
+    [[('It', 'PRON'), ('was', 'VERB'), ('a', 'DET'), ...]
+
+
+
+---------------------
+Corpus Reader Classes
+---------------------
+
+NLTK's *corpus reader* classes are used to access the contents of a
+diverse set of corpora.  Each corpus reader class is specialized to
+handle a specific corpus format.  Examples include the
+`PlaintextCorpusReader`, which handles corpora that consist of a set
+of unannotated text files, and the `BracketParseCorpusReader`, which
+handles corpora that consist of files containing
+parenthesis-delineated parse trees.
+
+Automatically Created Corpus Reader Instances
+=============================================
+
+When the `nltk.corpus` module is imported, it automatically creates a
+set of corpus reader instances that can be used to access the corpora
+in the NLTK data distribution.  Here is a small sample of those
+corpus reader instances:
+
+    >>> import nltk
+    >>> nltk.corpus.brown # doctest: +ELLIPSIS
+    <CategorizedTaggedCorpusReader ...>
+    >>> nltk.corpus.treebank # doctest: +ELLIPSIS
+    <BracketParseCorpusReader ...>
+    >>> nltk.corpus.names # doctest: +ELLIPSIS
+    <WordListCorpusReader ...>
+    >>> nltk.corpus.genesis # doctest: +ELLIPSIS
+    <PlaintextCorpusReader ...>
+    >>> nltk.corpus.inaugural # doctest: +ELLIPSIS
+    <PlaintextCorpusReader ...>
+
+This sample illustrates that different corpus reader classes are used
+to read different corpora; but that the same corpus reader class may
+be used for more than one corpus (e.g., ``genesis`` and ``inaugural``).
+
+Creating New Corpus Reader Instances
+====================================
+
+Although the `nltk.corpus` module automatically creates corpus reader
+instances for the corpora in the NLTK data distribution, you may
+sometimes need to create your own corpus reader.  In particular, you
+would need to create your own corpus reader if you want...
+
+- To access a corpus that is not included in the NLTK data
+  distribution.
+
+- To access a full copy of a corpus for which the NLTK data
+  distribution only provides a sample.
+
+- To access a corpus using a customized corpus reader (e.g., with
+  a customized tokenizer).
+
+To create a new corpus reader, you will first need to look up the
+signature for that corpus reader's constructor.  Different corpus
+readers have different constructor signatures, but most of the
+constructor signatures have the basic form::
+
+    SomeCorpusReader(root, files, ...options...)
+
+Where ``root`` is an absolute path to the directory containing the
+corpus data files; ``files`` is either a list of file names (relative
+to ``root``) or a regexp specifying which files should be included;
+and ``options`` are additional reader-specific options.  For example,
+we can create a customized corpus reader for the genesis corpus that
+uses a different sentence tokenizer as follows:
+
+    >>> # Find the directory where the corpus lives.
+    >>> genesis_dir = nltk.data.find('corpora/genesis')
+    >>> # Create our custom sentence tokenizer.
+    >>> my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+')
+    >>> # Create the new corpus reader object.
+    >>> my_genesis = nltk.corpus.PlaintextCorpusReader(
+    ...     genesis_dir, '.*\.txt', sent_tokenizer=my_sent_tokenizer)
+    >>> # Use the new corpus reader object.
+    >>> print(my_genesis.sents('english-kjv.txt')[0]) # doctest: +NORMALIZE_WHITESPACE
+    ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven',
+     'and', 'the', 'earth']
+
+If you wish to read your own plaintext corpus, which is stored in the
+directory '/usr/share/some-corpus', then you can create a corpus
+reader for it with::
+
+    >>> my_corpus = nltk.corpus.PlaintextCorpusReader(
+    ...     '/usr/share/some-corpus', '.*\.txt') # doctest: +SKIP
+
+For a complete list of corpus reader subclasses, see the API
+documentation for `nltk.corpus.reader`.
+
+Corpus Types
+============
+
+Corpora vary widely in the types of content they include.  This is
+reflected in the fact that the base class `CorpusReader` only defines
+a few general-purpose methods for listing and accessing the files that
+make up a corpus.  It is up to the subclasses to define *data access
+methods* that provide access to the information in the corpus.
+However, corpus reader subclasses should be consistent in their
+definitions of these data access methods wherever possible.
+
+At a high level, corpora can be divided into three basic types:
+
+- A *token corpus* contains information about specific occurrences of
+  language use (or linguistic tokens), such as dialogues or written
+  texts.  Examples of token corpora are collections of written text
+  and collections of speech.
+
+- A *type corpus*, or *lexicon*, contains information about a coherent
+  set of lexical items (or linguistic types).  Examples of lexicons
+  are dictionaries and word lists.
+
+- A *language description corpus* contains information about a set of
+  non-lexical linguistic constructs, such as grammar rules.
+
+However, many individual corpora blur the distinctions between these
+types.  For example, corpora that are primarily lexicons may include
+token data in the form of example sentences; and corpora that are
+primarily token corpora may be accompanied by one or more word lists
+or other lexical data sets.
+
+Because corpora vary so widely in their information content, we have
+decided that it would not be wise to use separate corpus reader base
+classes for different corpus types.  Instead, we simply try to make
+the corpus readers consistent wherever possible, but let them differ
+where the underlying data itself differs.
+
+Common Corpus Reader Methods
+============================
+
+As mentioned above, there are only a handful of methods that all
+corpus readers are guaranteed to implement.  These methods provide
+access to the files that contain the corpus data.  Every corpus is
+assumed to consist of one or more files, all located in a common root
+directory (or in subdirectories of that root directory).  The absolute
+path to the root directory is stored in the ``root`` property:
+
+    >>> import os
+    >>> str(nltk.corpus.genesis.root).replace(os.path.sep,'/') # doctest: +ELLIPSIS
+    '.../nltk_data/corpora/genesis'
+
+Each file within the corpus is identified by a platform-independent
+identifier, which is basically a path string that uses ``/`` as the
+path separator.  I.e., this identifier can be converted to a relative
+path as follows:
+
+    >>> some_corpus_file_id = nltk.corpus.reuters.fileids()[0]
+    >>> import os.path
+    >>> os.path.normpath(some_corpus_file_id).replace(os.path.sep,'/')
+    'test/14826'
+
+To get a list of all data files that make up a corpus, use the
+``fileids()`` method.  In some corpora, these files will not all contain
+the same type of data; for example, for the ``nltk.corpus.timit``
+corpus, ``fileids()`` will return a list including text files, word
+segmentation files, phonetic transcription files, sound files, and
+metadata files.  For corpora with diverse file types, the ``fileids()``
+method will often take one or more optional arguments, which can be
+used to get a list of the files with a specific file type:
+
+    >>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS
+    ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
+    >>> nltk.corpus.timit.fileids('phn') # doctest: +ELLIPSIS
+    ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa2.phn', 'dr1-fvmh0/si1466.phn', ...]
+
+In some corpora, the files are divided into distinct categories.  For
+these corpora, the ``fileids()`` method takes an optional argument,
+which can be used to get a list of the files within a specific category:
+
+    >>> nltk.corpus.brown.fileids('hobbies') # doctest: +ELLIPSIS
+    ['ce01', 'ce02', 'ce03', 'ce04', 'ce05', 'ce06', 'ce07', ...]
+
+The ``abspath()`` method can be used to find the absolute path to a
+corpus file, given its file identifier:
+
+    >>> str(nltk.corpus.brown.abspath('ce06')).replace(os.path.sep,'/') # doctest: +ELLIPSIS
+    '.../corpora/brown/ce06'
+
+The ``abspaths()`` method can be used to find the absolute paths for
+one corpus file, a list of corpus files, or (if no fileids are specified),
+all corpus files.
+
+This method is mainly useful as a helper method when defining corpus
+data access methods, since data access methods can usually be called
+with a string argument (to get a view for a specific file), with a
+list argument (to get a view for a specific list of files), or with no
+argument (to get a view for the whole corpus).
+
+Data Access Methods
+===================
+
+Individual corpus reader subclasses typically extend this basic set of
+file-access methods with one or more *data access methods*, which provide
+easy access to the data contained in the corpus.  The signatures for
+data access methods often have the basic form::
+
+    corpus_reader.some_data access(fileids=None, ...options...)
+
+Where ``fileids`` can be a single file identifier string (to get a view
+for a specific file); a list of file identifier strings (to get a view
+for a specific list of files); or None (to get a view for the entire
+corpus).  Some of the common data access methods, and their return
+types, are:
+
+  - I{corpus}.words(): list of str
+  - I{corpus}.sents(): list of (list of str)
+  - I{corpus}.paras(): list of (list of (list of str))
+  - I{corpus}.tagged_words(): list of (str,str) tuple
+  - I{corpus}.tagged_sents(): list of (list of (str,str))
+  - I{corpus}.tagged_paras(): list of (list of (list of (str,str)))
+  - I{corpus}.chunked_sents(): list of (Tree w/ (str,str) leaves)
+  - I{corpus}.parsed_sents(): list of (Tree with str leaves)
+  - I{corpus}.parsed_paras(): list of (list of (Tree with str leaves))
+  - I{corpus}.xml(): A single xml ElementTree
+  - I{corpus}.raw(): str (unprocessed corpus contents)
+
+For example, the `words()` method is supported by many different
+corpora, and returns a flat list of word strings:
+
+    >>> nltk.corpus.brown.words()
+    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+    >>> nltk.corpus.treebank.words()
+    ['Pierre', 'Vinken', ',', '61', 'years', 'old', ...]
+    >>> nltk.corpus.conll2002.words()
+    ['Sao', 'Paulo', '(', 'Brasil', ')', ',', '23', ...]
+    >>> nltk.corpus.genesis.words()
+    ['In', 'the', 'beginning', 'God', 'created', ...]
+
+On the other hand, the `tagged_words()` method is only supported by
+corpora that include part-of-speech annotations:
+
+    >>> nltk.corpus.brown.tagged_words()
+    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+    >>> nltk.corpus.treebank.tagged_words()
+    [('Pierre', 'NNP'), ('Vinken', 'NNP'), ...]
+    >>> nltk.corpus.conll2002.tagged_words()
+    [('Sao', 'NC'), ('Paulo', 'VMI'), ('(', 'Fpa'), ...]
+    >>> nltk.corpus.genesis.tagged_words()
+    Traceback (most recent call last):
+      ...
+    AttributeError: 'PlaintextCorpusReader' object has no attribute 'tagged_words'
+
+Although most corpus readers use file identifiers to index their
+content, some corpora use different identifiers instead.  For example,
+the data access methods for the ``timit`` corpus uses *utterance
+identifiers* to select which corpus items should be returned:
+
+    >>> nltk.corpus.timit.utteranceids() # doctest: +ELLIPSIS
+    ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
+    >>> nltk.corpus.timit.words('dr1-fvmh0/sa2')
+    ["don't", 'ask', 'me', 'to', 'carry', 'an', 'oily', 'rag', 'like', 'that']
+
+Attempting to call ``timit``\ 's data access methods with a file
+identifier will result in an exception:
+
+    >>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS
+    ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
+    >>> nltk.corpus.timit.words('dr1-fvmh0/sa1.txt') # doctest: +SKIP
+    Traceback (most recent call last):
+      ...
+    IOError: No such file or directory: '.../dr1-fvmh0/sa1.txt.wrd'
+
+As another example, the ``propbank`` corpus defines the ``roleset()``
+method, which expects a roleset identifier, not a file identifier:
+
+    >>> roleset = nltk.corpus.propbank.roleset('eat.01')
+    >>> from xml.etree import ElementTree as ET
+    >>> print(ET.tostring(roleset).decode('utf8')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    <roleset id="eat.01" name="consume" vncls="39.1">
+      <roles>
+        <role descr="consumer, eater" n="0">...</role>...
+      </roles>...
+    </roleset>...
+
+Stream Backed Corpus Views
+==========================
+An important feature of NLTK's corpus readers is that many of them
+access the underlying data files using "corpus views."  A *corpus
+view* is an object that acts like a simple data structure (such as a
+list), but does not store the data elements in memory; instead, data
+elements are read from the underlying data files on an as-needed
+basis.
+
+By only loading items from the file on an as-needed basis, corpus
+views maintain both memory efficiency and responsiveness.  The memory
+efficiency of corpus readers is important because some corpora contain
+very large amounts of data, and storing the entire data set in memory
+could overwhelm many machines.  The responsiveness is important when
+experimenting with corpora in interactive sessions and in in-class
+demonstrations.
+
+The most common corpus view is the `StreamBackedCorpusView`, which
+acts as a read-only list of tokens.  Two additional corpus view
+classes, `ConcatenatedCorpusView` and `LazySubsequence`, make it
+possible to create concatenations and take slices of
+`StreamBackedCorpusView` objects without actually storing the
+resulting list-like object's elements in memory.
+
+In the future, we may add additional corpus views that act like other
+basic data structures, such as dictionaries.
+
+Writing New Corpus Readers
+==========================
+
+In order to add support for new corpus formats, it is necessary to
+define new corpus reader classes.  For many corpus formats, writing
+new corpus readers is relatively straight-forward.  In this section,
+we'll describe what's involved in creating a new corpus reader.  If
+you do create a new corpus reader, we encourage you to contribute it
+back to the NLTK project.
+
+Don't Reinvent the Wheel
+------------------------
+Before you start writing a new corpus reader, you should check to be
+sure that the desired format can't be read using an existing corpus
+reader with appropriate constructor arguments.  For example, although
+the `TaggedCorpusReader` assumes that words and tags are separated by
+``/`` characters by default, an alternative tag-separation character
+can be specified via the ``sep`` constructor argument.  You should
+also check whether the new corpus format can be handled by subclassing
+an existing corpus reader, and tweaking a few methods or variables.
+
+Design
+------
+If you decide to write a new corpus reader from scratch, then you
+should first decide which data access methods you want the reader to
+provide, and what their signatures should be.  You should look at
+existing corpus readers that process corpora with similar data
+contents, and try to be consistent with those corpus readers whenever
+possible.
+
+You should also consider what sets of identifiers are appropriate for
+the corpus format.  Where it's practical, file identifiers should be
+used.  However, for some corpora, it may make sense to use additional
+sets of identifiers.  Each set of identifiers should have a distinct
+name (e.g., fileids, utteranceids, rolesets); and you should be consistent
+in using that name to refer to that identifier.  Do not use parameter
+names like ``id``, which leave it unclear what type of identifier is
+required.
+
+Once you've decided what data access methods and identifiers are
+appropriate for your corpus, you should decide if there are any
+customizable parameters that you'd like the corpus reader to handle.
+These parameters make it possible to use a single corpus reader to
+handle a wider variety of corpora.  The ``sep`` argument for
+`TaggedCorpusReader`, mentioned above, is an example of a customizable
+corpus reader parameter.
+
+Implementation
+--------------
+
+Constructor
+~~~~~~~~~~~
+If your corpus reader implements any customizable parameters, then
+you'll need to override the constructor.  Typically, the new
+constructor will first call its base class's constructor, and then
+store the customizable parameters.  For example, the
+`ConllChunkCorpusReader`\ 's constructor is defined as follows:
+
+    def __init__(self, root, fileids, chunk_types, encoding='utf8',
+                 tagset=None, separator=None):
+        ConllCorpusReader.__init__(
+                self, root, fileids, ('words', 'pos', 'chunk'),
+                chunk_types=chunk_types, encoding=encoding,
+                tagset=tagset, separator=separator)
+
+If your corpus reader does not implement any customization parameters,
+then you can often just inherit the base class's constructor.
+
+Data Access Methods
+~~~~~~~~~~~~~~~~~~~
+
+The most common type of data access method takes an argument
+identifying which files to access, and returns a view covering those
+files.  This argument may be a single file identifier string (to get a
+view for a specific file); a list of file identifier strings (to get a
+view for a specific list of files); or None (to get a view for the
+entire corpus).  The method's implementation converts this argument to
+a list of path names using the `abspaths()` method, which handles all
+three value types (string, list, and None):
+
+    >>> print(str(nltk.corpus.brown.abspaths()).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [FileSystemPathPointer('.../corpora/brown/ca01'),
+     FileSystemPathPointer('.../corpora/brown/ca02'), ...]
+    >>> print(str(nltk.corpus.brown.abspaths('ce06')).replace('\\\\','/')) # doctest: +ELLIPSIS
+    [FileSystemPathPointer('.../corpora/brown/ce06')]
+    >>> print(str(nltk.corpus.brown.abspaths(['ce06', 'ce07'])).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [FileSystemPathPointer('.../corpora/brown/ce06'),
+     FileSystemPathPointer('.../corpora/brown/ce07')]
+
+An example of this type of method is the `words()` method, defined by
+the `PlaintextCorpusReader` as follows:
+
+    >>> def words(self, fileids=None):
+    ...     return concat([self.CorpusView(fileid, self._read_word_block)
+    ...                    for fileid in self.abspaths(fileids)])
+
+This method first uses `abspaths()` to convert ``fileids`` to a list of
+absolute paths.  It then creates a corpus view for each file, using
+the `PlaintextCorpusReader._read_word_block()` method to read elements
+from the data file (see the discussion of corpus views below).
+Finally, it combines these corpus views using the
+`nltk.corpus.reader.util.concat()` function.
+
+When writing a corpus reader for a corpus that is never expected to be
+very large, it can sometimes be appropriate to read the files
+directly, rather than using a corpus view.  For example, the
+`WordListCorpusView` class defines its `words()` method as follows:
+
+    >>> def words(self, fileids=None):
+    ...     return concat([[w for w in open(fileid).read().split('\n') if w]
+    ...                    for fileid in self.abspaths(fileids)])
+
+(This is usually more appropriate for lexicons than for token corpora.)
+
+If the type of data returned by a data access method is one for which
+NLTK has a conventional representation (e.g., words, tagged words, and
+parse trees), then you should use that representation.  Otherwise, you
+may find it necessary to define your own representation.  For data
+structures that are relatively corpus-specific, it's usually best to
+define new classes for these elements.  For example, the ``propbank``
+corpus defines the `PropbankInstance` class to store the semantic role
+labeling instances described by the corpus; and the ``ppattach``
+corpus defines the `PPAttachment` class to store the prepositional
+attachment instances described by the corpus.
+
+Corpus Views
+~~~~~~~~~~~~
+.. (Much of the content for this section is taken from the
+   StreamBackedCorpusView docstring.)
+
+The heart of a `StreamBackedCorpusView` is its *block reader*
+function, which reads zero or more tokens from a stream, and returns
+them as a list.  A very simple example of a block reader is:
+
+    >>> def simple_block_reader(stream):
+    ...     return stream.readline().split()
+
+This simple block reader reads a single line at a time, and returns a
+single token (consisting of a string) for each whitespace-separated
+substring on the line.  A `StreamBackedCorpusView` built from this
+block reader will act like a read-only list of all the
+whitespace-separated tokens in an underlying file.
+
+When deciding how to define the block reader for a given corpus,
+careful consideration should be given to the size of blocks handled by
+the block reader.  Smaller block sizes will increase the memory
+requirements of the corpus view's internal data structures (by 2
+integers per block).  On the other hand, larger block sizes may
+decrease performance for random access to the corpus.  (But note that
+larger block sizes will *not* decrease performance for iteration.)
+
+Internally, the `StreamBackedCorpusView` class maintains a partial
+mapping from token index to file position, with one entry per block.
+When a token with a given index *i* is requested, the corpus view
+constructs it as follows:
+
+1. First, it searches the toknum/filepos mapping for the token index
+   closest to (but less than or equal to) *i*.
+
+2. Then, starting at the file position corresponding to that index, it
+   reads one block at a time using the block reader until it reaches
+   the requested token.
+
+The toknum/filepos mapping is created lazily: it is initially empty,
+but every time a new block is read, the block's initial token is added
+to the mapping.  (Thus, the toknum/filepos map has one entry per
+block.)
+
+You can create your own corpus view in one of two ways:
+
+1. Call the `StreamBackedCorpusView` constructor, and provide your
+   block reader function via the ``block_reader`` argument.
+
+2. Subclass `StreamBackedCorpusView`, and override the
+   `read_block()` method.
+
+The first option is usually easier, but the second option can allow
+you to write a single `read_block` method whose behavior can be
+customized by different parameters to the subclass's constructor.  For
+an example of this design pattern, see the `TaggedCorpusView` class,
+which is used by `TaggedCorpusView`.
+
+----------------
+Regression Tests
+----------------
+
+The following helper functions are used to create and then delete
+testing corpora that are stored in temporary directories.  These
+testing corpora are used to make sure the readers work correctly.
+
+    >>> import tempfile, os.path, textwrap
+    >>> def make_testcorpus(ext='', **fileids):
+    ...     root = tempfile.mkdtemp()
+    ...     for fileid, contents in fileids.items():
+    ...         fileid += ext
+    ...         f = open(os.path.join(root, fileid), 'w')
+    ...         f.write(textwrap.dedent(contents))
+    ...         f.close()
+    ...     return root
+    >>> def del_testcorpus(root):
+    ...     for fileid in os.listdir(root):
+    ...         os.remove(os.path.join(root, fileid))
+    ...     os.rmdir(root)
+
+Plaintext Corpus Reader
+=======================
+The plaintext corpus reader is used to access corpora that consist of
+unprocessed plaintext data.  It assumes that paragraph breaks are
+indicated by blank lines.  Sentences and words can be tokenized using
+the default tokenizers, or by custom tokenizers specified as
+parameters to the constructor.
+
+    >>> root = make_testcorpus(ext='.txt',
+    ...     a="""\
+    ...     This is the first sentence.  Here is another
+    ...     sentence!  And here's a third sentence.
+    ...
+    ...     This is the second paragraph.  Tokenization is currently
+    ...     fairly simple, so the period in Mr. gets tokenized.
+    ...     """,
+    ...     b="""This is the second file.""")
+
+    >>> from nltk.corpus.reader.plaintext import PlaintextCorpusReader
+
+The list of documents can be specified explicitly, or implicitly (using a
+regexp).  The ``ext`` argument specifies a file extension.
+
+    >>> corpus = PlaintextCorpusReader(root, ['a.txt', 'b.txt'])
+    >>> corpus.fileids()
+    ['a.txt', 'b.txt']
+    >>> corpus = PlaintextCorpusReader(root, '.*\.txt')
+    >>> corpus.fileids()
+    ['a.txt', 'b.txt']
+
+The directory containing the corpus is corpus.root:
+
+    >>> str(corpus.root) == str(root)
+    True
+
+We can get a list of words, or the raw string:
+
+    >>> corpus.words()
+    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
+    >>> corpus.raw()[:40]
+    'This is the first sentence.  Here is ano'
+
+Check that reading individual documents works, and reading all documents at
+once works:
+
+    >>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
+    (46, [40, 6])
+    >>> corpus.words('a.txt')
+    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
+    >>> corpus.words('b.txt')
+    ['This', 'is', 'the', 'second', 'file', '.']
+    >>> corpus.words()[:4], corpus.words()[-4:]
+    (['This', 'is', 'the', 'first'], ['the', 'second', 'file', '.'])
+
+We're done with the test corpus:
+
+    >>> del_testcorpus(root)
+
+Test the plaintext corpora that come with nltk:
+
+    >>> from nltk.corpus import abc, genesis, inaugural
+    >>> from nltk.corpus import state_union, webtext
+    >>> for corpus in (abc, genesis, inaugural, state_union,
+    ...                webtext):
+    ...     print(str(corpus).replace('\\\\','/'))
+    ...     print('  ', repr(corpus.fileids())[:60])
+    ...     print('  ', repr(corpus.words()[:10])[:60])
+    <PlaintextCorpusReader in '.../nltk_data/corpora/ab...'>
+       ['rural.txt', 'science.txt']
+       ['PM', 'denies', 'knowledge', 'of', 'AWB', ...
+    <PlaintextCorpusReader in '.../nltk_data/corpora/genesi...'>
+       ['english-kjv.txt', 'english-web.txt', 'finnish.txt', ...
+       ['In', 'the', 'beginning', 'God', 'created', 'the', ...
+    <PlaintextCorpusReader in '.../nltk_data/corpora/inaugura...'>
+       ['1789-Washington.txt', '1793-Washington.txt', ...
+       ['Fellow', '-', 'Citizens', 'of', 'the', 'Senate', ...
+    <PlaintextCorpusReader in '.../nltk_data/corpora/state_unio...'>
+       ['1945-Truman.txt', '1946-Truman.txt', ...
+       ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...
+    <PlaintextCorpusReader in '.../nltk_data/corpora/webtex...'>
+       ['firefox.txt', 'grail.txt', 'overheard.txt', ...
+       ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...
+
+
+Tagged Corpus Reader
+====================
+The Tagged Corpus reader can give us words, sentences, and paragraphs,
+each tagged or untagged.  All of the read methods can take one item
+(in which case they return the contents of that file) or a list of
+documents (in which case they concatenate the contents of those files).
+By default, they apply to all documents in the corpus.
+
+    >>> root = make_testcorpus(
+    ...     a="""\
+    ...     This/det is/verb the/det first/adj sentence/noun ./punc
+    ...     Here/det  is/verb  another/adj    sentence/noun ./punc
+    ...     Note/verb that/comp you/pron can/verb use/verb \
+    ...           any/noun tag/noun set/noun
+    ...
+    ...     This/det is/verb the/det second/adj paragraph/noun ./punc
+    ...     word/n without/adj a/det tag/noun :/: hello ./punc
+    ...     """,
+    ...     b="""\
+    ...     This/det is/verb the/det second/adj file/noun ./punc
+    ...     """)
+
+    >>> from nltk.corpus.reader.tagged import TaggedCorpusReader
+    >>> corpus = TaggedCorpusReader(root, list('ab'))
+    >>> corpus.fileids()
+    ['a', 'b']
+    >>> str(corpus.root) == str(root)
+    True
+    >>> corpus.words()
+    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
+    >>> corpus.sents() # doctest: +ELLIPSIS
+    [['This', 'is', 'the', 'first', ...], ['Here', 'is', 'another'...], ...]
+    >>> corpus.paras() # doctest: +ELLIPSIS
+    [[['This', ...], ['Here', ...], ...], [['This', ...], ...], ...]
+    >>> corpus.tagged_words() # doctest: +ELLIPSIS
+    [('This', 'DET'), ('is', 'VERB'), ('the', 'DET'), ...]
+    >>> corpus.tagged_sents() # doctest: +ELLIPSIS
+    [[('This', 'DET'), ('is', 'VERB'), ...], [('Here', 'DET'), ...], ...]
+    >>> corpus.tagged_paras() # doctest: +ELLIPSIS
+    [[[('This', 'DET'), ...], ...], [[('This', 'DET'), ...], ...], ...]
+    >>> corpus.raw()[:40]
+    'This/det is/verb the/det first/adj sente'
+    >>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
+    (38, [32, 6])
+    >>> len(corpus.sents()), [len(corpus.sents(d)) for d in corpus.fileids()]
+    (6, [5, 1])
+    >>> len(corpus.paras()), [len(corpus.paras(d)) for d in corpus.fileids()]
+    (3, [2, 1])
+    >>> print(corpus.words('a'))
+    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
+    >>> print(corpus.words('b'))
+    ['This', 'is', 'the', 'second', 'file', '.']
+    >>> del_testcorpus(root)
+
+The Brown Corpus uses the tagged corpus reader:
+
+    >>> from nltk.corpus import brown
+    >>> brown.fileids() # doctest: +ELLIPSIS
+    ['ca01', 'ca02', 'ca03', 'ca04', 'ca05', 'ca06', 'ca07', ...]
+    >>> brown.categories() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
+    'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
+    >>> print(repr(brown.root).replace('\\\\','/')) # doctest: +ELLIPSIS
+    FileSystemPathPointer('.../corpora/brown')
+    >>> brown.words()
+    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
+    >>> brown.sents() # doctest: +ELLIPSIS
+    [['The', 'Fulton', 'County', 'Grand', ...], ...]
+    >>> brown.paras() # doctest: +ELLIPSIS
+    [[['The', 'Fulton', 'County', ...]], [['The', 'jury', ...]], ...]
+    >>> brown.tagged_words() # doctest: +ELLIPSIS
+    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
+    >>> brown.tagged_sents() # doctest: +ELLIPSIS
+    [[('The', 'AT'), ('Fulton', 'NP-TL'), ('County', 'NN-TL'), ...], ...]
+    >>> brown.tagged_paras() # doctest: +ELLIPSIS
+    [[[('The', 'AT'), ...]], [[('The', 'AT'), ...]], ...]
+
+Verbnet Corpus Reader
+=====================
+
+Make sure we're picking up the right number of elements:
+
+    >>> from nltk.corpus import verbnet
+    >>> len(verbnet.lemmas())
+    3621
+    >>> len(verbnet.wordnetids())
+    4953
+    >>> len(verbnet.classids())
+    429
+
+Selecting classids based on various selectors:
+
+    >>> verbnet.classids(lemma='take') # doctest: +NORMALIZE_WHITESPACE
+    ['bring-11.3', 'characterize-29.2', 'convert-26.6.2', 'cost-54.2',
+    'fit-54.3', 'performance-26.7-2', 'steal-10.5']
+    >>> verbnet.classids(wordnetid='lead%2:38:01')
+    ['accompany-51.7']
+    >>> verbnet.classids(fileid='approve-77.xml')
+    ['approve-77']
+    >>> verbnet.classids(classid='admire-31.2') # subclasses
+    ['admire-31.2-1']
+
+vnclass() accepts filenames, long ids, and short ids:
+
+    >>> a = ElementTree.tostring(verbnet.vnclass('admire-31.2.xml'))
+    >>> b = ElementTree.tostring(verbnet.vnclass('admire-31.2'))
+    >>> c = ElementTree.tostring(verbnet.vnclass('31.2'))
+    >>> a == b == c
+    True
+
+fileids() can be used to get files based on verbnet class ids:
+
+    >>> verbnet.fileids('admire-31.2')
+    ['admire-31.2.xml']
+    >>> verbnet.fileids(['admire-31.2', 'obtain-13.5.2'])
+    ['admire-31.2.xml', 'obtain-13.5.2.xml']
+    >>> verbnet.fileids('badidentifier')
+    Traceback (most recent call last):
+      . . .
+    ValueError: vnclass identifier 'badidentifier' not found
+
+longid() and shortid() can be used to convert identifiers:
+
+    >>> verbnet.longid('31.2')
+    'admire-31.2'
+    >>> verbnet.longid('admire-31.2')
+    'admire-31.2'
+    >>> verbnet.shortid('31.2')
+    '31.2'
+    >>> verbnet.shortid('admire-31.2')
+    '31.2'
+    >>> verbnet.longid('badidentifier')
+    Traceback (most recent call last):
+      . . .
+    ValueError: vnclass identifier 'badidentifier' not found
+    >>> verbnet.shortid('badidentifier')
+    Traceback (most recent call last):
+      . . .
+    ValueError: vnclass identifier 'badidentifier' not found
+
+Corpus View Regression Tests
+============================
+
+Select some corpus files to play with:
+
+    >>> import nltk.data
+    >>> # A very short file (160 chars):
+    >>> f1 = nltk.data.find('corpora/inaugural/README')
+    >>> # A relatively short file (791 chars):
+    >>> f2 = nltk.data.find('corpora/inaugural/1793-Washington.txt')
+    >>> # A longer file (32k chars):
+    >>> f3 = nltk.data.find('corpora/inaugural/1909-Taft.txt')
+    >>> fileids = [f1, f2, f3]
+
+
+Concatenation
+-------------
+Check that concatenation works as intended.
+
+    >>> from nltk.corpus.reader.util import *
+
+    >>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
+    >>> c2 = StreamBackedCorpusView(f2, read_whitespace_block, encoding='utf-8')
+    >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
+    >>> c123 = c1+c2+c3
+    >>> print(c123)
+    ['C-Span', 'Inaugural', 'Address', 'Corpus', 'US', ...]
+
+    >>> l1 = f1.open(encoding='utf-8').read().split()
+    >>> l2 = f2.open(encoding='utf-8').read().split()
+    >>> l3 = f3.open(encoding='utf-8').read().split()
+    >>> l123 = l1+l2+l3
+
+    >>> list(c123) == l123
+    True
+
+    >>> (c1+c2+c3)[100] == l123[100]
+    True
+
+Slicing
+-------
+First, do some tests with fairly small slices.  These will all
+generate tuple values.
+
+    >>> from nltk.util import LazySubsequence
+    >>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
+    >>> l1 = f1.open(encoding='utf-8').read().split()
+    >>> print(len(c1))
+    21
+    >>> len(c1) < LazySubsequence.MIN_SIZE
+    True
+
+Choose a list of indices, based on the length, that covers the
+important corner cases:
+
+    >>> indices = [-60, -30, -22, -21, -20, -1,
+    ...            0, 1, 10, 20, 21, 22, 30, 60]
+
+Test slicing with explicit start & stop value:
+
+    >>> for s in indices:
+    ...     for e in indices:
+    ...         assert list(c1[s:e]) == l1[s:e]
+
+Test slicing with stop=None:
+
+    >>> for s in indices:
+    ...     assert list(c1[s:]) == l1[s:]
+
+Test slicing with start=None:
+
+    >>> for e in indices:
+    ...     assert list(c1[:e]) == l1[:e]
+
+Test slicing with start=stop=None:
+
+    >>> list(c1[:]) == list(l1[:])
+    True
+
+Next, we'll do some tests with much longer slices.  These will
+generate LazySubsequence objects.
+
+    >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
+    >>> l3 = f3.open(encoding='utf-8').read().split()
+    >>> print(len(c3))
+    5430
+    >>> len(c3) > LazySubsequence.MIN_SIZE*2
+    True
+
+Choose a list of indices, based on the length, that covers the
+important corner cases:
+
+    >>> indices = [-12000, -6000, -5431, -5430, -5429, -3000, -200, -1,
+    ...            0, 1, 200, 3000, 5000, 5429, 5430, 5431, 6000, 12000]
+
+Test slicing with explicit start & stop value:
+
+    >>> for s in indices:
+    ...     for e in indices:
+    ...         assert list(c3[s:e]) == l3[s:e]
+
+Test slicing with stop=None:
+
+    >>> for s in indices:
+    ...     assert list(c3[s:]) == l3[s:]
+
+Test slicing with start=None:
+
+    >>> for e in indices:
+    ...     assert list(c3[:e]) == l3[:e]
+
+Test slicing with start=stop=None:
+
+    >>> list(c3[:]) == list(l3[:])
+    True
+
+Multiple Iterators
+------------------
+If multiple iterators are created for the same corpus view, their
+iteration can be interleaved:
+
+    >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block)
+    >>> iterators = [c3.iterate_from(n) for n in [0,15,30,45]]
+    >>> for i in range(15):
+    ...     for iterator in iterators:
+    ...         print('%-15s' % next(iterator), end=' ')
+    ...     print()
+    My              a               duties          in
+    fellow          heavy           of              a
+    citizens:       weight          the             proper
+    Anyone          of              office          sense
+    who             responsibility. upon            of
+    has             If              which           the
+    taken           not,            he              obligation
+    the             he              is              which
+    oath            has             about           the
+    I               no              to              oath
+    have            conception      enter,          imposes.
+    just            of              or              The
+    taken           the             he              office
+    must            powers          is              of
+    feel            and             lacking         an
+
+SeekableUnicodeStreamReader
+===========================
+
+The file-like objects provided by the ``codecs`` module unfortunately
+suffer from a bug that prevents them from working correctly with
+corpus view objects.  In particular, although the expose ``seek()``
+and ``tell()`` methods, those methods do not exhibit the expected
+behavior, because they are not synchronized with the internal buffers
+that are kept by the file-like objects.  For example, the ``tell()``
+method will return the file position at the end of the buffers (whose
+contents have not yet been returned by the stream); and therefore this
+file position can not be used to return to the 'current' location in
+the stream (since ``seek()`` has no way to reconstruct the buffers).
+
+To get around these problems, we define a new class,
+`SeekableUnicodeStreamReader`, to act as a file-like interface to
+files containing encoded unicode data.  This class is loosely based on
+the ``codecs.StreamReader`` class.  To construct a new reader, we call
+the constructor with an underlying stream and an encoding name:
+
+    >>> from io import StringIO, BytesIO
+    >>> from nltk.data import SeekableUnicodeStreamReader
+    >>> stream = BytesIO(b"""\
+    ... This is a test file.
+    ... It is encoded in ascii.
+    ... """.decode('ascii').encode('ascii'))
+    >>> reader = SeekableUnicodeStreamReader(stream, 'ascii')
+
+`SeekableUnicodeStreamReader`\ s support all of the normal operations
+supplied by a read-only stream.  Note that all of the read operations
+return ``unicode`` objects (not ``str`` objects).
+
+    >>> reader.read()         # read the entire file.
+    'This is a test file.\nIt is encoded in ascii.\n'
+    >>> reader.seek(0)        # rewind to the start.
+    >>> reader.read(5)        # read at most 5 bytes.
+    'This '
+    >>> reader.readline()     # read to the end of the line.
+    'is a test file.\n'
+    >>> reader.seek(0)        # rewind to the start.
+    >>> for line in reader:
+    ...     print(repr(line))      # iterate over lines
+    'This is a test file.\n'
+    'It is encoded in ascii.\n'
+    >>> reader.seek(0)        # rewind to the start.
+    >>> reader.readlines()    # read a list of line strings
+    ['This is a test file.\n', 'It is encoded in ascii.\n']
+    >>> reader.close()
+
+Size argument to ``read()``
+---------------------------
+The ``size`` argument to ``read()`` specifies the maximum number of
+*bytes* to read, not the maximum number of *characters*.  Thus, for
+encodings that use multiple bytes per character, it may return fewer
+characters than the ``size`` argument:
+
+    >>> stream = BytesIO(b"""\
+    ... This is a test file.
+    ... It is encoded in utf-16.
+    ... """.decode('ascii').encode('utf-16'))
+    >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
+    >>> reader.read(10)
+    'This '
+
+If a read block ends in the middle of the byte string encoding a
+single character, then that byte string is stored in an internal
+buffer, and re-used on the next call to ``read()``.  However, if the
+size argument is too small to read even a single character, even
+though at least one character is available, then the ``read()`` method
+will read additional bytes until it can return a single character.
+This ensures that the ``read()`` method does not return an empty
+string, which could be mistaken for indicating the end of the file.
+
+    >>> reader.seek(0)            # rewind to the start.
+    >>> reader.read(1)            # we actually need to read 4 bytes
+    'T'
+    >>> int(reader.tell())
+    4
+
+The ``readline()`` method may read more than a single line of text, in
+which case it stores the text that it does not return in a buffer.  If
+this buffer is not empty, then its contents will be included in the
+value returned by the next call to ``read()``, regardless of the
+``size`` argument, since they are available without reading any new
+bytes from the stream:
+
+    >>> reader.seek(0)            # rewind to the start.
+    >>> reader.readline()         # stores extra text in a buffer
+    'This is a test file.\n'
+    >>> print(reader.linebuffer)   # examine the buffer contents
+    ['It is encoded i']
+    >>> reader.read(0)            # returns the contents of the buffer
+    'It is encoded i'
+    >>> print(reader.linebuffer)   # examine the buffer contents
+    None
+
+Seek and Tell
+-------------
+In addition to these basic read operations,
+`SeekableUnicodeStreamReader` also supports the ``seek()`` and
+``tell()`` operations.  However, some care must still be taken when
+using these operations.  In particular, the only file offsets that
+should be passed to ``seek()`` are ``0`` and any offset that has been
+returned by ``tell``.
+
+    >>> stream = BytesIO(b"""\
+    ... This is a test file.
+    ... It is encoded in utf-16.
+    ... """.decode('ascii').encode('utf-16'))
+    >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
+    >>> reader.read(20)
+    'This is a '
+    >>> pos = reader.tell(); print(pos)
+    22
+    >>> reader.read(20)
+    'test file.'
+    >>> reader.seek(pos)     # rewind to the position from tell.
+    >>> reader.read(20)
+    'test file.'
+
+The ``seek()`` and ``tell()`` methods work property even when
+``readline()`` is used.
+
+    >>> stream = BytesIO(b"""\
+    ... This is a test file.
+    ... It is encoded in utf-16.
+    ... """.decode('ascii').encode('utf-16'))
+    >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
+    >>> reader.readline()
+    'This is a test file.\n'
+    >>> pos = reader.tell(); print(pos)
+    44
+    >>> reader.readline()
+    'It is encoded in utf-16.\n'
+    >>> reader.seek(pos)     # rewind to the position from tell.
+    >>> reader.readline()
+    'It is encoded in utf-16.\n'
+
+
+Squashed Bugs
+=============
+
+svn 5276 fixed a bug in the comment-stripping behavior of
+parse_sexpr_block.
+
+    >>> from io import StringIO
+    >>> from nltk.corpus.reader.util import read_sexpr_block
+    >>> f = StringIO(b"""
+    ... (a b c)
+    ... # This line is a comment.
+    ... (d e f\ng h)""".decode('ascii'))
+    >>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
+    ['(a b c)']
+    >>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
+    ['(d e f\ng h)']
+
+svn 5277 fixed a bug in parse_sexpr_block, which would cause it to
+enter an infinite loop if a file ended mid-sexpr, or ended with a
+token that was not followed by whitespace.  A related bug caused
+an infinite loop if the corpus ended in an unmatched close paren --
+this was fixed in svn 5279
+
+    >>> f = StringIO(b"""
+    ... This file ends mid-sexpr
+    ... (hello (world""".decode('ascii'))
+    >>> for i in range(3): print(read_sexpr_block(f))
+    ['This', 'file', 'ends', 'mid-sexpr']
+    ['(hello (world']
+    []
+
+    >>> f = StringIO(b"This file has no trailing whitespace.".decode('ascii'))
+    >>> for i in range(3): print(read_sexpr_block(f))
+    ['This', 'file', 'has', 'no', 'trailing']
+    ['whitespace.']
+    []
+
+    >>> # Bug fixed in 5279:
+    >>> f = StringIO(b"a b c)".decode('ascii'))
+    >>> for i in range(3): print(read_sexpr_block(f))
+    ['a', 'b']
+    ['c)']
+    []
+
+
+svn 5624 & 5265 fixed a bug in ConcatenatedCorpusView, which caused it
+to return the wrong items when indexed starting at any index beyond
+the first file.
+
+    >>> import nltk
+    >>> sents = nltk.corpus.brown.sents()
+    >>> print(sents[6000])
+    ['Cholesterol', 'and', 'thyroid']
+    >>> print(sents[6000])
+    ['Cholesterol', 'and', 'thyroid']
+
+svn 5728 fixed a bug in Categorized*CorpusReader, which caused them
+to return words from *all* files when just one file was specified.
+
+    >>> from nltk.corpus import reuters
+    >>> reuters.words('training/13085')
+    ['SNYDER', '&', 'lt', ';', 'SOI', '>', 'MAKES', ...]
+    >>> reuters.words('training/5082')
+    ['SHEPPARD', 'RESOURCES', 'TO', 'MERGE', 'WITH', ...]
+
+svn 7227 fixed a bug in the qc corpus reader, which prevented
+access to its tuples() method
+
+    >>> from nltk.corpus import qc
+    >>> qc.tuples('test.txt')
+    [('NUM:dist', 'How far is it from Denver to Aspen ?'), ('LOC:city', 'What county is Modesto , California in ?'), ...]
+
+
+
diff --git a/nlp_resource_data/nltk/test/corpus_fixt.py b/nlp_resource_data/nltk/test/corpus_fixt.py

new file mode 100644 (file)

index 0000000..17b011b
--- /dev/null
+++ b/nlp_resource_data/nltk/test/corpus_fixt.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from nltk.corpus import teardown_module
diff --git a/nlp_resource_data/nltk/test/crubadan.doctest b/nlp_resource_data/nltk/test/crubadan.doctest

new file mode 100644 (file)

index 0000000..2894a41
--- /dev/null
+++ b/nlp_resource_data/nltk/test/crubadan.doctest
@@ -0,0 +1,65 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+Crubadan Corpus Reader
+======================
+
+Crubadan is an NLTK corpus reader for ngram files provided
+by the Crubadan project. It supports several languages.
+
+    >>> from nltk.corpus import crubadan
+    >>> crubadan.langs() # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+    ['abk', 'abn',..., 'zpa', 'zul']
+
+----------------------------------------
+Language code mapping and helper methods
+----------------------------------------
+
+The web crawler that generates the 3-gram frequencies works at the
+level of "writing systems" rather than languages. Writing systems
+are assigned internal 2-3 letter codes that require mapping to the
+standard ISO 639-3 codes. For more information, please refer to 
+the README in nltk_data/crubadan folder after installing it.
+
+To translate ISO 639-3 codes to "Crubadan Code":
+
+    >>> crubadan.iso_to_crubadan('eng')
+    'en'
+    >>> crubadan.iso_to_crubadan('fra')
+    'fr'
+    >>> crubadan.iso_to_crubadan('aaa')
+
+In reverse, print ISO 639-3 code if we have the Crubadan Code:
+
+    >>> crubadan.crubadan_to_iso('en')
+    'eng'
+    >>> crubadan.crubadan_to_iso('fr')
+    'fra'
+    >>> crubadan.crubadan_to_iso('aa')
+
+---------------------------
+Accessing ngram frequencies
+---------------------------
+
+On initialization the reader will create a dictionary of every
+language supported by the Crubadan project, mapping the ISO 639-3
+language code to its corresponding ngram frequency.
+
+You can access individual language FreqDist and the ngrams within them as follows:
+
+    >>> english_fd = crubadan.lang_freq('eng')
+    >>> english_fd['the']
+    728135
+
+Above accesses the FreqDist of English and returns the frequency of the ngram 'the'.
+A ngram that isn't found within the language will return 0:
+
+    >>> english_fd['sometest']
+    0
+
+A language that isn't supported will raise an exception:
+
+    >>> crubadan.lang_freq('elvish')
+    Traceback (most recent call last):
+    ...
+    RuntimeError: Unsupported language.
diff --git a/nlp_resource_data/nltk/test/data.doctest b/nlp_resource_data/nltk/test/data.doctest

new file mode 100644 (file)

index 0000000..1fcfb29
--- /dev/null
+++ b/nlp_resource_data/nltk/test/data.doctest
@@ -0,0 +1,378 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=========================================
+ Loading Resources From the Data Package
+=========================================
+
+    >>> import nltk.data
+
+Overview
+~~~~~~~~
+The `nltk.data` module contains functions that can be used to load
+NLTK resource files, such as corpora, grammars, and saved processing
+objects.
+
+Loading Data Files
+~~~~~~~~~~~~~~~~~~
+Resources are loaded using the function `nltk.data.load()`, which
+takes as its first argument a URL specifying what file should be
+loaded.  The ``nltk:`` protocol loads files from the NLTK data
+distribution:
+
+    >>> tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle')
+    >>> tokenizer.tokenize('Hello.  This is a test.  It works!')
+    ['Hello.', 'This is a test.', 'It works!']
+
+It is important to note that there should be no space following the
+colon (':') in the URL; 'nltk: tokenizers/punkt/english.pickle' will
+not work!
+
+The ``nltk:`` protocol is used by default if no protocol is specified:
+
+    >>> nltk.data.load('tokenizers/punkt/english.pickle') # doctest: +ELLIPSIS
+    <nltk.tokenize.punkt.PunktSentenceTokenizer object at ...>
+
+But it is also possible to load resources from ``http:``, ``ftp:``,
+and ``file:`` URLs, e.g. ``cfg = nltk.data.load('http://example.com/path/to/toy.cfg')``
+
+    >>> # Load a grammar using an absolute path.
+    >>> url = 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg')
+    >>> url.replace('\\', '/') # doctest: +ELLIPSIS
+    'file:...toy.cfg'
+    >>> print(nltk.data.load(url)) # doctest: +ELLIPSIS
+    Grammar with 14 productions (start state = S)
+        S -> NP VP
+        PP -> P NP
+        ...
+        P -> 'on'
+        P -> 'in'
+
+The second argument to the `nltk.data.load()` function specifies the
+file format, which determines how the file's contents are processed
+before they are returned by ``load()``.  The formats that are
+currently supported by the data module are described by the dictionary
+`nltk.data.FORMATS`:
+
+    >>> for format, descr in sorted(nltk.data.FORMATS.items()):
+    ...     print('{0:<7} {1:}'.format(format, descr)) # doctest: +NORMALIZE_WHITESPACE
+    cfg     A context free grammar.
+    fcfg    A feature CFG.
+    fol     A list of first order logic expressions, parsed with
+    nltk.sem.logic.Expression.fromstring.
+    json    A serialized python object, stored using the json module.
+    logic   A list of first order logic expressions, parsed with
+    nltk.sem.logic.LogicParser.  Requires an additional logic_parser
+    parameter
+    pcfg    A probabilistic CFG.
+    pickle  A serialized python object, stored using the pickle
+    module.
+    raw     The raw (byte string) contents of a file.
+    text    The raw (unicode string) contents of a file. 
+    val     A semantic valuation, parsed by
+    nltk.sem.Valuation.fromstring.
+    yaml    A serialized python object, stored using the yaml module.
+
+`nltk.data.load()` will raise a ValueError if a bad format name is
+specified:
+
+    >>> nltk.data.load('grammars/sample_grammars/toy.cfg', 'bar')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Unknown format type!
+
+By default, the ``"auto"`` format is used, which chooses a format
+based on the filename's extension.  The mapping from file extensions
+to format names is specified by `nltk.data.AUTO_FORMATS`:
+
+    >>> for ext, format in sorted(nltk.data.AUTO_FORMATS.items()):
+    ...     print('.%-7s -> %s' % (ext, format))
+    .cfg     -> cfg
+    .fcfg    -> fcfg
+    .fol     -> fol
+    .json    -> json
+    .logic   -> logic
+    .pcfg    -> pcfg
+    .pickle  -> pickle
+    .text    -> text
+    .txt     -> text
+    .val     -> val
+    .yaml    -> yaml
+
+If `nltk.data.load()` is unable to determine the format based on the
+filename's extension, it will raise a ValueError:
+
+    >>> nltk.data.load('foo.bar')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Could not determine format for foo.bar based on its file
+    extension; use the "format" argument to specify the format explicitly.
+
+Note that by explicitly specifying the ``format`` argument, you can
+override the load method's default processing behavior.  For example,
+to get the raw contents of any file, simply use ``format="raw"``:
+
+    >>> s = nltk.data.load('grammars/sample_grammars/toy.cfg', 'text') 
+    >>> print(s) # doctest: +ELLIPSIS
+    S -> NP VP
+    PP -> P NP
+    NP -> Det N | NP PP
+    VP -> V NP | VP PP
+    ...
+
+Making Local Copies
+~~~~~~~~~~~~~~~~~~~
+..  This will not be visible in the html output: create a tempdir to
+    play in.
+    >>> import tempfile, os
+    >>> tempdir = tempfile.mkdtemp()
+    >>> old_dir = os.path.abspath('.')
+    >>> os.chdir(tempdir)
+
+The function `nltk.data.retrieve()` copies a given resource to a local
+file.  This can be useful, for example, if you want to edit one of the
+sample grammars.
+
+    >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg')
+    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy.cfg'
+
+    >>> # Simulate editing the grammar.
+    >>> with open('toy.cfg') as inp:
+    ...     s = inp.read().replace('NP', 'DP')
+    >>> with open('toy.cfg', 'w') as out:
+    ...     _bytes_written = out.write(s)
+
+    >>> # Load the edited grammar, & display it.
+    >>> cfg = nltk.data.load('file:///' + os.path.abspath('toy.cfg'))
+    >>> print(cfg) # doctest: +ELLIPSIS
+    Grammar with 14 productions (start state = S)
+        S -> DP VP
+        PP -> P DP
+        ...
+        P -> 'on'
+        P -> 'in'
+
+The second argument to `nltk.data.retrieve()` specifies the filename
+for the new copy of the file.  By default, the source file's filename
+is used.
+
+    >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg', 'mytoy.cfg')
+    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'mytoy.cfg'
+    >>> os.path.isfile('./mytoy.cfg')
+    True
+    >>> nltk.data.retrieve('grammars/sample_grammars/np.fcfg')
+    Retrieving 'nltk:grammars/sample_grammars/np.fcfg', saving to 'np.fcfg'
+    >>> os.path.isfile('./np.fcfg')
+    True
+
+If a file with the specified (or default) filename already exists in
+the current directory, then `nltk.data.retrieve()` will raise a
+ValueError exception.  It will *not* overwrite the file:
+
+    >>> os.path.isfile('./toy.cfg')
+    True
+    >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg') # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+      . . .
+    ValueError: File '...toy.cfg' already exists!
+
+..  This will not be visible in the html output: clean up the tempdir.
+    >>> os.chdir(old_dir)
+    >>> for f in os.listdir(tempdir):
+    ...     os.remove(os.path.join(tempdir, f))
+    >>> os.rmdir(tempdir)
+
+Finding Files in the NLTK Data Package
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The `nltk.data.find()` function searches the NLTK data package for a
+given file, and returns a pointer to that file.  This pointer can
+either be a `FileSystemPathPointer` (whose `path` attribute gives the
+absolute path of the file); or a `ZipFilePathPointer`, specifying a
+zipfile and the name of an entry within that zipfile.  Both pointer
+types define the `open()` method, which can be used to read the string
+contents of the file.
+
+    >>> path = nltk.data.find('corpora/abc/rural.txt')
+    >>> str(path) # doctest: +ELLIPSIS
+    '...rural.txt'
+    >>> print(path.open().read(60).decode())
+    PM denies knowledge of AWB kickbacks
+    The Prime Minister has 
+
+Alternatively, the `nltk.data.load()` function can be used with the
+keyword argument ``format="raw"``:
+
+    >>> s = nltk.data.load('corpora/abc/rural.txt', format='raw')[:60]
+    >>> print(s.decode())
+    PM denies knowledge of AWB kickbacks
+    The Prime Minister has 
+
+Alternatively, you can use the keyword argument ``format="text"``:
+
+    >>> s = nltk.data.load('corpora/abc/rural.txt', format='text')[:60]
+    >>> print(s)
+    PM denies knowledge of AWB kickbacks
+    The Prime Minister has 
+
+Resource Caching
+~~~~~~~~~~~~~~~~
+
+NLTK uses a weakref dictionary to maintain a cache of resources that
+have been loaded.  If you load a resource that is already stored in
+the cache, then the cached copy will be returned.  This behavior can
+be seen by the trace output generated when verbose=True:
+
+    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
+    <<Loading nltk:grammars/book_grammars/feat0.fcfg>>
+    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
+    <<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
+
+If you wish to load a resource from its source, bypassing the cache,
+use the ``cache=False`` argument to `nltk.data.load()`.  This can be
+useful, for example, if the resource is loaded from a local file, and
+you are actively editing that file:
+
+    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',cache=False,verbose=True)
+    <<Loading nltk:grammars/book_grammars/feat0.fcfg>>
+
+The cache *no longer* uses weak references.  A resource will not be
+automatically expunged from the cache when no more objects are using
+it.  In the following example, when we clear the variable ``feat0``,
+the reference count for the feature grammar object drops to zero.
+However, the object remains cached:
+
+    >>> del feat0
+    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',
+    ...                        verbose=True)
+    <<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
+
+You can clear the entire contents of the cache, using
+`nltk.data.clear_cache()`:
+
+    >>> nltk.data.clear_cache()
+
+Retrieving other Data Sources
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    >>> formulas = nltk.data.load('grammars/book_grammars/background.fol')
+    >>> for f in formulas: print(str(f))
+    all x.(boxerdog(x) -> dog(x))
+    all x.(boxer(x) -> person(x))
+    all x.-(dog(x) & person(x))
+    all x.(married(x) <-> exists y.marry(x,y))
+    all x.(bark(x) -> dog(x))
+    all x y.(marry(x,y) -> (person(x) & person(y)))
+    -(Vincent = Mia)
+    -(Vincent = Fido)
+    -(Mia = Fido)
+
+Regression Tests
+~~~~~~~~~~~~~~~~
+Create a temp dir for tests that write files:
+
+    >>> import tempfile, os
+    >>> tempdir = tempfile.mkdtemp()
+    >>> old_dir = os.path.abspath('.')
+    >>> os.chdir(tempdir)
+
+The `retrieve()` function accepts all url types:
+
+    >>> urls = ['https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg',
+    ...         'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg'),
+    ...         'nltk:grammars/sample_grammars/toy.cfg',
+    ...         'grammars/sample_grammars/toy.cfg']
+    >>> for i, url in enumerate(urls):
+    ...     nltk.data.retrieve(url, 'toy-%d.cfg' % i) # doctest: +ELLIPSIS
+    Retrieving 'https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', saving to 'toy-0.cfg'
+    Retrieving 'file:...toy.cfg', saving to 'toy-1.cfg'
+    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-2.cfg'
+    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-3.cfg'
+
+Clean up the temp dir:
+
+    >>> os.chdir(old_dir)
+    >>> for f in os.listdir(tempdir):
+    ...     os.remove(os.path.join(tempdir, f))
+    >>> os.rmdir(tempdir)
+
+Lazy Loader
+-----------
+A lazy loader is a wrapper object that defers loading a resource until
+it is accessed or used in any way.  This is mainly intended for
+internal use by NLTK's corpus readers.
+
+    >>> # Create a lazy loader for toy.cfg.
+    >>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
+
+    >>> # Show that it's not loaded yet:
+    >>> object.__repr__(ll) # doctest: +ELLIPSIS
+    '<nltk.data.LazyLoader object at ...>'
+
+    >>> # printing it is enough to cause it to be loaded:
+    >>> print(ll)
+    <Grammar with 14 productions>
+
+    >>> # Show that it's now been loaded:
+    >>> object.__repr__(ll) # doctest: +ELLIPSIS
+    '<nltk.grammar.CFG object at ...>'
+
+
+    >>> # Test that accessing an attribute also loads it:
+    >>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
+    >>> ll.start()
+    S
+    >>> object.__repr__(ll) # doctest: +ELLIPSIS
+    '<nltk.grammar.CFG object at ...>'
+
+Buffered Gzip Reading and Writing
+---------------------------------
+Write performance to gzip-compressed is extremely poor when the files become large.
+File creation can become a bottleneck in those cases.
+
+Read performance from large gzipped pickle files was improved in data.py by
+buffering the reads. A similar fix can be applied to writes by buffering
+the writes to a StringIO object first.
+
+This is mainly intended for internal use. The test simply tests that reading
+and writing work as intended and does not test how much improvement buffering
+provides.
+
+    >>> from io import StringIO
+    >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'wb', size=2**10)
+    >>> ans = []
+    >>> for i in range(10000):
+    ...     ans.append(str(i).encode('ascii'))
+    ...     test.write(str(i).encode('ascii'))
+    >>> test.close()
+    >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'rb')
+    >>> test.read() == b''.join(ans)
+    True
+    >>> test.close()
+    >>> import os
+    >>> os.unlink('testbuf.gz')
+
+JSON Encoding and Decoding
+--------------------------
+JSON serialization is used instead of pickle for some classes.
+
+    >>> from nltk import jsontags
+    >>> from nltk.jsontags import JSONTaggedEncoder, JSONTaggedDecoder, register_tag
+    >>> @jsontags.register_tag
+    ... class JSONSerializable:
+    ...     json_tag = 'JSONSerializable'
+    ...
+    ...     def __init__(self, n):
+    ...         self.n = n
+    ...
+    ...     def encode_json_obj(self):
+    ...         return self.n
+    ...
+    ...     @classmethod
+    ...     def decode_json_obj(cls, obj):
+    ...         n = obj
+    ...         return cls(n)
+    ...
+    >>> JSONTaggedEncoder().encode(JSONSerializable(1))
+    '{"!JSONSerializable": 1}'
+    >>> JSONTaggedDecoder().decode('{"!JSONSerializable": 1}').n
+    1
+
diff --git a/nlp_resource_data/nltk/test/dependency.doctest b/nlp_resource_data/nltk/test/dependency.doctest

new file mode 100755 (executable)

index 0000000..854e11a
--- /dev/null
+++ b/nlp_resource_data/nltk/test/dependency.doctest
@@ -0,0 +1,241 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===================
+Dependency Grammars
+===================
+
+    >>> from nltk.grammar import DependencyGrammar
+    >>> from nltk.parse import (
+    ...     DependencyGraph,
+    ...     ProjectiveDependencyParser,
+    ...     NonprojectiveDependencyParser,
+    ... )
+
+CoNLL Data
+----------
+
+    >>> treebank_data = """Pierre  NNP     2       NMOD
+    ... Vinken  NNP     8       SUB
+    ... ,       ,       2       P
+    ... 61      CD      5       NMOD
+    ... years   NNS     6       AMOD
+    ... old     JJ      2       NMOD
+    ... ,       ,       2       P
+    ... will    MD      0       ROOT
+    ... join    VB      8       VC
+    ... the     DT      11      NMOD
+    ... board   NN      9       OBJ
+    ... as      IN      9       VMOD
+    ... a       DT      15      NMOD
+    ... nonexecutive    JJ      15      NMOD
+    ... director        NN      12      PMOD
+    ... Nov.    NNP     9       VMOD
+    ... 29      CD      16      NMOD
+    ... .       .       9       VMOD
+    ... """
+
+    >>> dg = DependencyGraph(treebank_data)
+    >>> dg.tree().pprint()
+    (will
+      (Vinken Pierre , (old (years 61)) ,)
+      (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
+    >>> for head, rel, dep in dg.triples():
+    ...     print(
+    ...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
+    ...         .format(h=head, r=rel, d=dep)
+    ...     )
+    (will, MD), SUB, (Vinken, NNP)
+    (Vinken, NNP), NMOD, (Pierre, NNP)
+    (Vinken, NNP), P, (,, ,)
+    (Vinken, NNP), NMOD, (old, JJ)
+    (old, JJ), AMOD, (years, NNS)
+    (years, NNS), NMOD, (61, CD)
+    (Vinken, NNP), P, (,, ,)
+    (will, MD), VC, (join, VB)
+    (join, VB), OBJ, (board, NN)
+    (board, NN), NMOD, (the, DT)
+    (join, VB), VMOD, (as, IN)
+    (as, IN), PMOD, (director, NN)
+    (director, NN), NMOD, (a, DT)
+    (director, NN), NMOD, (nonexecutive, JJ)
+    (join, VB), VMOD, (Nov., NNP)
+    (Nov., NNP), NMOD, (29, CD)
+    (join, VB), VMOD, (., .)
+
+Using a custom cell extractor.
+
+    >>> def custom_extractor(cells):
+    ...     _, tag, head, rel = cells
+    ...     return 'spam', 'spam', tag, tag, '', head, rel
+    >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
+    >>> dg.tree().pprint()
+    (spam
+      (spam spam spam (spam (spam spam)) spam)
+      (spam (spam spam) (spam (spam spam spam)) (spam spam) spam))
+
+Custom cell extractors can take in and return an index.
+
+    >>> def custom_extractor(cells, index):
+    ...     word, tag, head, rel = cells
+    ...     return (index, '{}-{}'.format(word, index), word,
+    ...             tag, tag, '', head, rel)
+    >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
+    >>> dg.tree().pprint()
+    (will-8
+      (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)
+      (join-9
+        (board-11 the-10)
+        (as-12 (director-15 a-13 nonexecutive-14))
+        (Nov.-16 29-17)
+        .-18))
+
+Using the dependency-parsed version of the Penn Treebank corpus sample.
+
+    >>> from nltk.corpus import dependency_treebank
+    >>> t = dependency_treebank.parsed_sents()[0]
+    >>> print(t.to_conll(3))  # doctest: +NORMALIZE_WHITESPACE
+    Pierre      NNP     2
+    Vinken      NNP     8
+    ,   ,       2
+    61  CD      5
+    years       NNS     6
+    old JJ      2
+    ,   ,       2
+    will        MD      0
+    join        VB      8
+    the DT      11
+    board       NN      9
+    as  IN      9
+    a   DT      15
+    nonexecutive        JJ      15
+    director    NN      12
+    Nov.        NNP     9
+    29  CD      16
+    .   .       8
+
+Using the output of zpar (like Malt-TAB but with zero-based indexing)
+
+    >>> zpar_data = """
+    ... Pierre NNP     1       NMOD
+    ... Vinken NNP     7       SUB
+    ... ,      ,       1       P
+    ... 61     CD      4       NMOD
+    ... years  NNS     5       AMOD
+    ... old    JJ      1       NMOD
+    ... ,      ,       1       P
+    ... will   MD      -1      ROOT
+    ... join   VB      7       VC
+    ... the    DT      10      NMOD
+    ... board  NN      8       OBJ
+    ... as     IN      8       VMOD
+    ... a      DT      14      NMOD
+    ... nonexecutive   JJ      14      NMOD
+    ... director       NN      11      PMOD
+    ... Nov.   NNP     8       VMOD
+    ... 29     CD      15      NMOD
+    ... .      .       7       P
+    ... """
+
+    >>> zdg = DependencyGraph(zpar_data, zero_based=True)
+    >>> print(zdg.tree())
+    (will
+      (Vinken Pierre , (old (years 61)) ,)
+      (join (board the) (as (director a nonexecutive)) (Nov. 29))
+      .)
+
+
+Projective Dependency Parsing
+-----------------------------
+
+    >>> grammar = DependencyGrammar.fromstring("""
+    ... 'fell' -> 'price' | 'stock'
+    ... 'price' -> 'of' 'the'
+    ... 'of' -> 'stock'
+    ... 'stock' -> 'the'
+    ... """)
+    >>> print(grammar)
+    Dependency grammar with 5 productions
+      'fell' -> 'price'
+      'fell' -> 'stock'
+      'price' -> 'of' 'the'
+      'of' -> 'stock'
+      'stock' -> 'the'
+
+    >>> dp = ProjectiveDependencyParser(grammar)
+    >>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
+    ...     print(t)
+    (fell (price the (of (stock the))))
+    (fell (price the of) (stock the))
+    (fell (price the of the) stock)
+
+Non-Projective Dependency Parsing
+---------------------------------
+
+    >>> grammar = DependencyGrammar.fromstring("""
+    ... 'taught' -> 'play' | 'man'
+    ... 'man' -> 'the'
+    ... 'play' -> 'golf' | 'dog' | 'to'
+    ... 'dog' -> 'his'
+    ... """)
+    >>> print(grammar)
+    Dependency grammar with 7 productions
+      'taught' -> 'play'
+      'taught' -> 'man'
+      'man' -> 'the'
+      'play' -> 'golf'
+      'play' -> 'dog'
+      'play' -> 'to'
+      'dog' -> 'his'
+
+    >>> dp = NonprojectiveDependencyParser(grammar)
+    >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
+
+    >>> print(g.root['word'])
+    taught
+
+    >>> for _, node in sorted(g.nodes.items()):
+    ...     if node['word'] is not None:
+    ...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
+    1 the: []
+    2 man: [1]
+    3 taught: [2, 7]
+    4 his: []
+    5 dog: [4]
+    6 to: []
+    7 play: [5, 6, 8]
+    8 golf: []
+
+    >>> print(g.tree())
+    (taught (man the) (play (dog his) to golf))
+
+Integration with MALT parser
+============================
+
+In case the top relation is different from the default, we can set it. In case
+of MALT parser, it's set to `'null'`.
+
+>>> dg_str = """1       I       _       NN      NN      _       2       nn      _       _
+... 2   shot    _       NN      NN      _       0       null    _       _
+... 3   an      _       AT      AT      _       2       dep     _       _
+... 4   elephant        _       NN      NN      _       7       nn      _       _
+... 5   in      _       NN      NN      _       7       nn      _       _
+... 6   my      _       NN      NN      _       7       nn      _       _
+... 7   pajamas _       NNS     NNS     _       3       dobj    _       _
+... """
+>>> dg = DependencyGraph(dg_str, top_relation_label='null')
+
+>>> len(dg.nodes)
+8
+
+>>> dg.root['word'], dg.root['address']
+('shot', 2)
+
+>>> print(dg.to_conll(10))  # doctest: +NORMALIZE_WHITESPACE
+1   I       _       NN      NN      _       2       nn      _       _
+2   shot    _       NN      NN      _       0       null    _       _
+3   an      _       AT      AT      _       2       dep     _       _
+4   elephant        _       NN      NN      _       7       nn      _       _
+5   in      _       NN      NN      _       7       nn      _       _
+6   my      _       NN      NN      _       7       nn      _       _
+7   pajamas _       NNS     NNS     _       3       dobj    _       _
diff --git a/nlp_resource_data/nltk/test/discourse.doctest b/nlp_resource_data/nltk/test/discourse.doctest

new file mode 100644 (file)

index 0000000..a5dabe8
--- /dev/null
+++ b/nlp_resource_data/nltk/test/discourse.doctest
@@ -0,0 +1,546 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==================
+Discourse Checking
+==================
+
+    >>> from nltk import *
+    >>> from nltk.sem import logic
+    >>> logic._counter._value = 0
+
+Introduction
+============
+
+The NLTK discourse module makes it possible to test consistency and
+redundancy of simple discourses, using theorem-proving and
+model-building from `nltk.inference`.
+
+The ``DiscourseTester`` constructor takes a list of sentences as a
+parameter.
+
+    >>> dt = DiscourseTester(['a boxer walks', 'every boxer chases a girl'])
+
+The ``DiscourseTester`` parses each sentence into a list of logical
+forms.  Once we have created ``DiscourseTester`` object, we can
+inspect various properties of the discourse. First off, we might want
+to double-check what sentences are currently stored as the discourse.
+
+    >>> dt.sentences()
+    s0: a boxer walks
+    s1: every boxer chases a girl
+
+As you will see, each sentence receives an identifier `s`\ :subscript:`i`.
+We might also want to check what grammar the ``DiscourseTester`` is
+using (by default, ``book_grammars/discourse.fcfg``):
+
+    >>> dt.grammar() # doctest: +ELLIPSIS
+    % start S
+    # Grammar Rules
+    S[SEM = <app(?subj,?vp)>] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp]
+    NP[NUM=?n,SEM=<app(?det,?nom)> ] -> Det[NUM=?n,SEM=?det]  Nom[NUM=?n,SEM=?nom]
+    NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np]
+    ...
+
+A different grammar can be invoked by using the optional ``gramfile``
+parameter when a ``DiscourseTester`` object is created.
+
+Readings and Threads
+====================
+
+Depending on
+the grammar used, we may find some sentences have more than one
+logical form. To check this, use the ``readings()`` method. Given a
+sentence identifier of the form `s`\ :subscript:`i`, each reading of
+that sentence is given an identifier `s`\ :sub:`i`-`r`\ :sub:`j`.
+
+
+    >>> dt.readings()
+    <BLANKLINE>
+    s0 readings:
+    <BLANKLINE>
+    s0-r0: exists z1.(boxer(z1) & walk(z1))
+    s0-r1: exists z1.(boxerdog(z1) & walk(z1))
+    <BLANKLINE>
+    s1 readings:
+    <BLANKLINE>
+    s1-r0: all z2.(boxer(z2) -> exists z3.(girl(z3) & chase(z2,z3)))
+    s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+
+
+In this case, the only source of ambiguity lies in the word *boxer*,
+which receives two translations: ``boxer`` and ``boxerdog``. The
+intention is that one of these corresponds to the ``person`` sense and
+one to the ``dog`` sense. In principle, we would also expect to see a
+quantifier scope ambiguity in ``s1``. However, the simple grammar we
+are using, namely `sem4.fcfg <sem4.fcfg>`_, doesn't support quantifier
+scope ambiguity.
+
+We can also investigate the readings of a specific sentence:
+
+    >>> dt.readings('a boxer walks')
+    The sentence 'a boxer walks' has these readings:
+        exists x.(boxer(x) & walk(x))
+        exists x.(boxerdog(x) & walk(x))
+
+Given that each sentence is two-ways ambiguous, we potentially have
+four different discourse 'threads', taking all combinations of
+readings. To see these, specify the ``threaded=True`` parameter on
+the ``readings()`` method. Again, each thread is assigned an
+identifier of the form `d`\ :sub:`i`. Following the identifier is a
+list of the readings that constitute that thread.
+
+    >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
+    d0: ['s0-r0', 's1-r0']
+    d1: ['s0-r0', 's1-r1']
+    d2: ['s0-r1', 's1-r0']
+    d3: ['s0-r1', 's1-r1']
+
+Of course, this simple-minded approach doesn't scale: a discourse with, say, three
+sentences, each of which has 3 readings, will generate 27 different
+threads. It is an interesting exercise to consider how to manage
+discourse ambiguity more efficiently.
+
+Checking Consistency
+====================
+
+Now, we can check whether some or all of the discourse threads are
+consistent, using the ``models()`` method. With no parameter, this
+method will try to find a model for every discourse thread in the
+current discourse. However, we can also specify just one thread, say ``d1``.
+
+    >>> dt.models('d1')
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d1
+    --------------------------------------------------------------------------------
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 2
+    <BLANKLINE>
+    c1 = 0.
+    <BLANKLINE>
+    f1(0) = 0.
+    f1(1) = 0.
+    <BLANKLINE>
+      boxer(0).
+    - boxer(1).
+    <BLANKLINE>
+    - boxerdog(0).
+    - boxerdog(1).
+    <BLANKLINE>
+    - girl(0).
+    - girl(1).
+    <BLANKLINE>
+      walk(0).
+    - walk(1).
+    <BLANKLINE>
+    - chase(0,0).
+    - chase(0,1).
+    - chase(1,0).
+    - chase(1,1).
+    <BLANKLINE>
+    Consistent discourse: d1 ['s0-r0', 's1-r1']:
+        s0-r0: exists z1.(boxer(z1) & walk(z1))
+        s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+    <BLANKLINE>
+
+There are various formats for rendering **Mace4** models --- here,
+we have used the 'cooked' format (which is intended to be
+human-readable). There are a number of points to note.
+
+#. The entities in the domain are all treated as non-negative
+   integers. In this case, there are only two entities, ``0`` and
+   ``1``.
+
+#. The ``-`` symbol indicates negation. So ``0`` is the only
+   ``boxerdog`` and the only thing that ``walk``\ s. Nothing is a
+   ``boxer``, or a ``girl`` or in the ``chase`` relation. Thus the
+   universal sentence is vacuously true.
+
+#. ``c1`` is an introduced constant that denotes ``0``.
+
+#. ``f1`` is a Skolem function, but it plays no significant role in
+   this model.
+
+
+We might want to now add another sentence to the discourse, and there
+is method ``add_sentence()`` for doing just this.
+
+    >>> dt.add_sentence('John is a boxer')
+    >>> dt.sentences()
+    s0: a boxer walks
+    s1: every boxer chases a girl
+    s2: John is a boxer
+
+We can now test all the properties as before; here, we just show a
+couple of them.
+
+    >>> dt.readings()
+    <BLANKLINE>
+    s0 readings:
+    <BLANKLINE>
+    s0-r0: exists z1.(boxer(z1) & walk(z1))
+    s0-r1: exists z1.(boxerdog(z1) & walk(z1))
+    <BLANKLINE>
+    s1 readings:
+    <BLANKLINE>
+    s1-r0: all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+    s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+    <BLANKLINE>
+    s2 readings:
+    <BLANKLINE>
+    s2-r0: boxer(John)
+    s2-r1: boxerdog(John)
+    >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
+    d0: ['s0-r0', 's1-r0', 's2-r0']
+    d1: ['s0-r0', 's1-r0', 's2-r1']
+    d2: ['s0-r0', 's1-r1', 's2-r0']
+    d3: ['s0-r0', 's1-r1', 's2-r1']
+    d4: ['s0-r1', 's1-r0', 's2-r0']
+    d5: ['s0-r1', 's1-r0', 's2-r1']
+    d6: ['s0-r1', 's1-r1', 's2-r0']
+    d7: ['s0-r1', 's1-r1', 's2-r1']
+
+If you are interested in a particular thread, the ``expand_threads()``
+method will remind you of what readings it consists of:
+
+    >>> thread = dt.expand_threads('d1')
+    >>> for rid, reading in thread:
+    ...     print(rid, str(reading.normalize()))
+    s0-r0 exists z1.(boxer(z1) & walk(z1))
+    s1-r0 all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
+    s2-r1 boxerdog(John)
+
+Suppose we have already defined a discourse, as follows:
+
+    >>> dt = DiscourseTester(['A student dances', 'Every student is a person'])
+
+Now, when we add a new sentence, is it consistent with what we already
+have? The `` consistchk=True`` parameter of ``add_sentence()`` allows
+us to check:
+
+    >>> dt.add_sentence('No person dances', consistchk=True)
+    Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
+        s0-r0: exists z1.(student(z1) & dance(z1))
+        s1-r0: all z1.(student(z1) -> person(z1))
+        s2-r0: -exists z1.(person(z1) & dance(z1))
+    <BLANKLINE>
+    >>> dt.readings()
+    <BLANKLINE>
+    s0 readings:
+    <BLANKLINE>
+    s0-r0: exists z1.(student(z1) & dance(z1))
+    <BLANKLINE>
+    s1 readings:
+    <BLANKLINE>
+    s1-r0: all z1.(student(z1) -> person(z1))
+    <BLANKLINE>
+    s2 readings:
+    <BLANKLINE>
+    s2-r0: -exists z1.(person(z1) & dance(z1))
+
+So let's retract the inconsistent sentence:
+
+    >>> dt.retract_sentence('No person dances', verbose=True) # doctest: +NORMALIZE_WHITESPACE
+    Current sentences are
+    s0: A student dances
+    s1: Every student is a person
+
+We can now verify that result is consistent.
+
+    >>> dt.models()
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d0
+    --------------------------------------------------------------------------------
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 2
+    <BLANKLINE>
+    c1 = 0.
+    <BLANKLINE>
+      dance(0).
+    - dance(1).
+    <BLANKLINE>
+      person(0).
+    - person(1).
+    <BLANKLINE>
+      student(0).
+    - student(1).
+    <BLANKLINE>
+    Consistent discourse: d0 ['s0-r0', 's1-r0']:
+        s0-r0: exists z1.(student(z1) & dance(z1))
+        s1-r0: all z1.(student(z1) -> person(z1))
+    <BLANKLINE>
+
+Checking Informativity
+======================
+
+Let's assume that we are still trying to extend the discourse *A
+student dances.* *Every student is a person.* We add a new sentence,
+but this time, we check whether it is informative with respect to what
+has gone before.
+
+    >>> dt.add_sentence('A person dances', informchk=True)
+    Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))':
+    Not informative relative to thread 'd0'
+
+In fact, we are just checking whether the new sentence is entailed by
+the preceding discourse.
+
+    >>> dt.models()
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d0
+    --------------------------------------------------------------------------------
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 2
+    <BLANKLINE>
+    c1 = 0.
+    <BLANKLINE>
+    c2 = 0.
+    <BLANKLINE>
+      dance(0).
+    - dance(1).
+    <BLANKLINE>
+      person(0).
+    - person(1).
+    <BLANKLINE>
+      student(0).
+    - student(1).
+    <BLANKLINE>
+    Consistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
+        s0-r0: exists z1.(student(z1) & dance(z1))
+        s1-r0: all z1.(student(z1) -> person(z1))
+        s2-r0: exists z1.(person(z1) & dance(z1))
+    <BLANKLINE>
+
+
+
+Adding Background Knowledge
+===========================
+
+Let's build a new discourse, and look at the readings of the component sentences:
+
+    >>> dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'])
+    >>> dt.readings()
+    <BLANKLINE>
+    s0 readings:
+    <BLANKLINE>
+    s0-r0: boxer(Vincent)
+    s0-r1: boxerdog(Vincent)
+    <BLANKLINE>
+    s1 readings:
+    <BLANKLINE>
+    s1-r0: boxer(Fido)
+    s1-r1: boxerdog(Fido)
+    <BLANKLINE>
+    s2 readings:
+    <BLANKLINE>
+    s2-r0: married(Vincent)
+    <BLANKLINE>
+    s3 readings:
+    <BLANKLINE>
+    s3-r0: bark(Fido)
+
+This gives us a lot of threads:
+
+    >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
+    d0: ['s0-r0', 's1-r0', 's2-r0', 's3-r0']
+    d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
+    d2: ['s0-r1', 's1-r0', 's2-r0', 's3-r0']
+    d3: ['s0-r1', 's1-r1', 's2-r0', 's3-r0']
+
+
+We can eliminate some of the readings, and hence some of the threads,
+by adding background information.
+
+    >>> import nltk.data
+    >>> bg = nltk.data.load('grammars/book_grammars/background.fol')
+    >>> dt.add_background(bg)
+    >>> dt.background()
+    all x.(boxerdog(x) -> dog(x))
+    all x.(boxer(x) -> person(x))
+    all x.-(dog(x) & person(x))
+    all x.(married(x) <-> exists y.marry(x,y))
+    all x.(bark(x) -> dog(x))
+    all x y.(marry(x,y) -> (person(x) & person(y)))
+    -(Vincent = Mia)
+    -(Vincent = Fido)
+    -(Mia = Fido)
+
+The background information allows us to reject three of the threads as
+inconsistent. To see what remains, use the ``filter=True`` parameter
+on ``readings()``.
+
+    >>> dt.readings(filter=True) # doctest: +NORMALIZE_WHITESPACE
+    d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
+
+The ``models()`` method gives us more information about the surviving thread.
+
+    >>> dt.models()
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d0
+    --------------------------------------------------------------------------------
+    No model found!
+    <BLANKLINE>
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d1
+    --------------------------------------------------------------------------------
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 3
+    <BLANKLINE>
+    Fido = 0.
+    <BLANKLINE>
+    Mia = 1.
+    <BLANKLINE>
+    Vincent = 2.
+    <BLANKLINE>
+    f1(0) = 0.
+    f1(1) = 0.
+    f1(2) = 2.
+    <BLANKLINE>
+      bark(0).
+    - bark(1).
+    - bark(2).
+    <BLANKLINE>
+    - boxer(0).
+    - boxer(1).
+      boxer(2).
+    <BLANKLINE>
+      boxerdog(0).
+    - boxerdog(1).
+    - boxerdog(2).
+    <BLANKLINE>
+      dog(0).
+    - dog(1).
+    - dog(2).
+    <BLANKLINE>
+    - married(0).
+    - married(1).
+      married(2).
+    <BLANKLINE>
+    - person(0).
+    - person(1).
+      person(2).
+    <BLANKLINE>
+    - marry(0,0).
+    - marry(0,1).
+    - marry(0,2).
+    - marry(1,0).
+    - marry(1,1).
+    - marry(1,2).
+    - marry(2,0).
+    - marry(2,1).
+      marry(2,2).
+    <BLANKLINE>
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d2
+    --------------------------------------------------------------------------------
+    No model found!
+    <BLANKLINE>
+    --------------------------------------------------------------------------------
+    Model for Discourse Thread d3
+    --------------------------------------------------------------------------------
+    No model found!
+    <BLANKLINE>
+    Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0', 's3-r0']:
+        s0-r0: boxer(Vincent)
+        s1-r0: boxer(Fido)
+        s2-r0: married(Vincent)
+        s3-r0: bark(Fido)
+    <BLANKLINE>
+    Consistent discourse: d1 ['s0-r0', 's1-r1', 's2-r0', 's3-r0']:
+        s0-r0: boxer(Vincent)
+        s1-r1: boxerdog(Fido)
+        s2-r0: married(Vincent)
+        s3-r0: bark(Fido)
+    <BLANKLINE>
+    Inconsistent discourse: d2 ['s0-r1', 's1-r0', 's2-r0', 's3-r0']:
+        s0-r1: boxerdog(Vincent)
+        s1-r0: boxer(Fido)
+        s2-r0: married(Vincent)
+        s3-r0: bark(Fido)
+    <BLANKLINE>
+    Inconsistent discourse: d3 ['s0-r1', 's1-r1', 's2-r0', 's3-r0']:
+        s0-r1: boxerdog(Vincent)
+        s1-r1: boxerdog(Fido)
+        s2-r0: married(Vincent)
+        s3-r0: bark(Fido)
+    <BLANKLINE>
+
+
+..  This will not be visible in the html output: create a tempdir to
+    play in.
+    >>> import tempfile, os
+    >>> tempdir = tempfile.mkdtemp()
+    >>> old_dir = os.path.abspath('.')
+    >>> os.chdir(tempdir)
+
+In order to play around with your own version of background knowledge,
+you might want to start off with a local copy of ``background.fol``:
+
+    >>> nltk.data.retrieve('grammars/book_grammars/background.fol')
+    Retrieving 'nltk:grammars/book_grammars/background.fol', saving to 'background.fol'
+
+After you have modified the file, the ``load_fol()`` function will parse
+the strings in the file into expressions of ``nltk.sem.logic``.
+
+    >>> from nltk.inference.discourse import load_fol
+    >>> mybg = load_fol(open('background.fol').read())
+
+The result can be loaded as an argument of ``add_background()`` in the
+manner shown earlier.
+
+..  This will not be visible in the html output: clean up the tempdir.
+    >>> os.chdir(old_dir)
+    >>> for f in os.listdir(tempdir):
+    ...     os.remove(os.path.join(tempdir, f))
+    >>> os.rmdir(tempdir)
+    >>> nltk.data.clear_cache()
+
+
+Regression Testing from book
+============================
+
+    >>> logic._counter._value = 0
+
+    >>> from nltk.tag import RegexpTagger
+    >>> tagger = RegexpTagger(
+    ...     [('^(chases|runs)$', 'VB'),
+    ...      ('^(a)$', 'ex_quant'),
+    ...      ('^(every)$', 'univ_quant'),
+    ...      ('^(dog|boy)$', 'NN'),
+    ...      ('^(He)$', 'PRP')
+    ... ])
+    >>> rc = DrtGlueReadingCommand(depparser=MaltParser(tagger=tagger))
+    >>> dt = DiscourseTester(map(str.split, ['Every dog chases a boy', 'He runs']), rc)
+    >>> dt.readings()
+    <BLANKLINE>
+    s0 readings:
+    <BLANKLINE>
+    s0-r0: ([z2],[boy(z2), (([z5],[dog(z5)]) -> ([],[chases(z5,z2)]))])
+    s0-r1: ([],[(([z1],[dog(z1)]) -> ([z2],[boy(z2), chases(z1,z2)]))])
+    <BLANKLINE>
+    s1 readings:
+    <BLANKLINE>
+    s1-r0: ([z1],[PRO(z1), runs(z1)])
+    >>> dt.readings(show_thread_readings=True)
+    d0: ['s0-r0', 's1-r0'] : ([z1,z2],[boy(z1), (([z3],[dog(z3)]) -> ([],[chases(z3,z1)])), (z2 = z1), runs(z2)])
+    d1: ['s0-r1', 's1-r0'] : INVALID: AnaphoraResolutionException
+    >>> dt.readings(filter=True, show_thread_readings=True)
+    d0: ['s0-r0', 's1-r0'] : ([z1,z3],[boy(z1), (([z2],[dog(z2)]) -> ([],[chases(z2,z1)])), (z3 = z1), runs(z3)])
+
+    >>> logic._counter._value = 0
+
+    >>> from nltk.parse import FeatureEarleyChartParser
+    >>> from nltk.sem.drt import DrtParser
+    >>> grammar = nltk.data.load('grammars/book_grammars/drt.fcfg', logic_parser=DrtParser())
+    >>> parser = FeatureEarleyChartParser(grammar, trace=0)
+    >>> trees = parser.parse('Angus owns a dog'.split())
+    >>> print(list(trees)[0].label()['SEM'].simplify().normalize())
+    ([z1,z2],[Angus(z1), dog(z2), own(z1,z2)])
diff --git a/nlp_resource_data/nltk/test/discourse_fixt.py b/nlp_resource_data/nltk/test/discourse_fixt.py

new file mode 100644 (file)

index 0000000..9a10215
--- /dev/null
+++ b/nlp_resource_data/nltk/test/discourse_fixt.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+
+# FIXME: the entire discourse.doctest is skipped if Prover9/Mace4 is
+# not installed, but there are pure-python parts that don't need Prover9.
+def setup_module(module):
+    from nose import SkipTest
+    from nltk.inference.mace import Mace
+
+    try:
+        m = Mace()
+        m._find_binary("mace4")
+    except LookupError:
+        raise SkipTest("Mace4/Prover9 is not available so discourse.doctest is skipped")
diff --git a/nlp_resource_data/nltk/test/drt.doctest b/nlp_resource_data/nltk/test/drt.doctest

new file mode 100644 (file)

index 0000000..a0cd1f3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/drt.doctest
@@ -0,0 +1,517 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+================================
+ Discourse Representation Theory
+================================
+
+    >>> from nltk.sem import logic
+    >>> from nltk.inference import TableauProver
+
+Overview
+========
+
+A DRS can be created with the ``DRS()`` constructor. This takes two arguments: a list of
+discourse referents and list of conditions. .
+
+    >>> from nltk.sem.drt import *
+    >>> dexpr = DrtExpression.fromstring
+    >>> man_x = dexpr('man(x)')
+    >>> walk_x = dexpr('walk(x)')
+    >>> x = dexpr('x')
+    >>> print(DRS([x], [man_x, walk_x]))
+    ([x],[man(x), walk(x)])
+
+The ``parse()`` method can also be applied directly to DRS
+expressions, which allows them to be specified more
+easily.
+
+    >>> drs1 = dexpr('([x],[man(x),walk(x)])')
+    >>> print(drs1)
+    ([x],[man(x), walk(x)])
+
+DRSs can be *merged* using the ``+`` operator.
+
+    >>> drs2 = dexpr('([y],[woman(y),stop(y)])')
+    >>> drs3 = drs1 + drs2
+    >>> print(drs3)
+    (([x],[man(x), walk(x)]) + ([y],[woman(y), stop(y)]))
+    >>> print(drs3.simplify())
+    ([x,y],[man(x), walk(x), woman(y), stop(y)])
+
+We can embed DRSs as components of an ``implies`` condition.
+
+    >>> s = '([], [(%s -> %s)])' % (drs1, drs2)
+    >>> print(dexpr(s))
+    ([],[(([x],[man(x), walk(x)]) -> ([y],[woman(y), stop(y)]))])
+
+The ``fol()`` method converts DRSs into FOL formulae.
+
+    >>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
+    exists x.(man(x) & walks(x))
+    >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
+    all x.(man(x) -> walks(x))
+
+In order to visualize a DRS, the ``pretty_format()`` method can be used.
+
+    >>> print(drs3.pretty_format())
+      _________     __________
+     | x       |   | y        |
+    (|---------| + |----------|)
+     | man(x)  |   | woman(y) |
+     | walk(x) |   | stop(y)  |
+     |_________|   |__________|
+
+
+Parse to semantics
+------------------
+
+..
+    >>> logic._counter._value = 0
+
+DRSs can be used for building compositional semantics in a feature
+based grammar. To specify that we want to use DRSs, the appropriate
+logic parser needs be passed as a parameter to ``load_earley()``
+
+    >>> from nltk.parse import load_parser
+    >>> from nltk.sem.drt import DrtParser
+    >>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, logic_parser=DrtParser())
+    >>> for tree in parser.parse('a dog barks'.split()):
+    ...     print(tree.label()['SEM'].simplify())
+    ...
+    ([x],[dog(x), bark(x)])
+
+Alternatively, a ``FeatStructReader`` can be passed with the ``logic_parser`` set on it
+
+    >>> from nltk.featstruct import FeatStructReader
+    >>> from nltk.grammar import FeatStructNonterminal
+    >>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, fstruct_reader=FeatStructReader(fdict_class=FeatStructNonterminal, logic_parser=DrtParser()))
+    >>> for tree in parser.parse('every girl chases a dog'.split()):
+    ...     print(tree.label()['SEM'].simplify().normalize())
+    ...
+    ([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chase(z1,z2)]))])
+
+
+
+Unit Tests
+==========
+
+Parser
+------
+
+    >>> print(dexpr(r'([x,y],[sees(x,y)])'))
+    ([x,y],[sees(x,y)])
+    >>> print(dexpr(r'([x],[man(x), walks(x)])'))
+    ([x],[man(x), walks(x)])
+    >>> print(dexpr(r'\x.([],[man(x), walks(x)])'))
+    \x.([],[man(x), walks(x)])
+    >>> print(dexpr(r'\x.\y.([],[sees(x,y)])'))
+    \x y.([],[sees(x,y)])
+
+    >>> print(dexpr(r'([x,y],[(x = y)])'))
+    ([x,y],[(x = y)])
+    >>> print(dexpr(r'([x,y],[(x != y)])'))
+    ([x,y],[-(x = y)])
+
+    >>> print(dexpr(r'\x.([],[walks(x)])(john)'))
+    (\x.([],[walks(x)]))(john)
+    >>> print(dexpr(r'\R.\x.([],[big(x,R)])(\y.([],[mouse(y)]))'))
+    (\R x.([],[big(x,R)]))(\y.([],[mouse(y)]))
+
+    >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))'))
+    (([x],[walks(x)]) + ([y],[runs(y)]))
+    >>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))'))
+    (([x,y],[walks(x), jumps(y)]) + ([z],[twos(z)]) + ([w],[runs(w)]))
+    >>> print(dexpr(r'((([],[walks(x)]) + ([],[twos(x)])) + ([],[runs(x)]))'))
+    (([],[walks(x)]) + ([],[twos(x)]) + ([],[runs(x)]))
+    >>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)])) + (([],[threes(x)]) + ([],[fours(x)])))'))
+    (([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)]))
+
+    >>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))'))
+    (([],[walks(x)]) -> ([],[runs(x)]))
+
+    >>> print(dexpr(r'([x],[PRO(x), sees(John,x)])'))
+    ([x],[PRO(x), sees(John,x)])
+    >>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])'))
+    ([x],[man(x), -([],[walks(x)])])
+    >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])'))
+    ([],[(([x],[man(x)]) -> ([],[walks(x)]))])
+
+    >>> print(dexpr(r'DRS([x],[walk(x)])'))
+    ([x],[walk(x)])
+    >>> print(dexpr(r'DRS([x][walk(x)])'))
+    ([x],[walk(x)])
+    >>> print(dexpr(r'([x][walk(x)])'))
+    ([x],[walk(x)])
+
+``simplify()``
+--------------
+
+    >>> print(dexpr(r'\x.([],[man(x), walks(x)])(john)').simplify())
+    ([],[man(john), walks(john)])
+    >>> print(dexpr(r'\x.\y.([z],[dog(z),sees(x,y)])(john)(mary)').simplify())
+    ([z],[dog(z), sees(john,mary)])
+    >>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').simplify())
+    \x.([],[big(x,\y.([],[mouse(y)]))])
+
+    >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').simplify())
+    ([x,y],[walks(x), runs(y)])
+    >>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))').simplify())
+    ([w,x,y,z],[walks(x), jumps(y), twos(z), runs(w)])
+    >>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)])))').simplify())
+    ([],[walks(x), runs(x), threes(x), fours(x)])
+    >>> dexpr(r'([x],[man(x)])+([x],[walks(x)])').simplify() == \
+    ... dexpr(r'([x,z1],[man(x), walks(z1)])')
+    True
+    >>> dexpr(r'([y],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)]))])+([x],[run(x)])').simplify() == \
+    ... dexpr(r'([y,z1],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)])), run(z1)])')
+    True
+
+    >>> dexpr(r'\Q.(([x],[john(x),walks(x)]) + Q)(([x],[PRO(x),leaves(x)]))').simplify() == \
+    ... dexpr(r'([x,z1],[john(x), walks(x), PRO(z1), leaves(z1)])')
+    True
+
+    >>> logic._counter._value = 0
+    >>> print(dexpr('([],[(([x],[dog(x)]) -> ([e,y],[boy(y), chase(e), subj(e,x), obj(e,y)]))])+([e,x],[PRO(x), run(e), subj(e,x)])').simplify().normalize().normalize())
+    ([e02,z5],[(([z3],[dog(z3)]) -> ([e01,z4],[boy(z4), chase(e01), subj(e01,z3), obj(e01,z4)])), PRO(z5), run(e02), subj(e02,z5)])
+
+``fol()``
+-----------
+
+    >>> print(dexpr(r'([x,y],[sees(x,y)])').fol())
+    exists x y.sees(x,y)
+    >>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
+    exists x.(man(x) & walks(x))
+    >>> print(dexpr(r'\x.([],[man(x), walks(x)])').fol())
+    \x.(man(x) & walks(x))
+    >>> print(dexpr(r'\x y.([],[sees(x,y)])').fol())
+    \x y.sees(x,y)
+
+    >>> print(dexpr(r'\x.([],[walks(x)])(john)').fol())
+    \x.walks(x)(john)
+    >>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').fol())
+    (\R x.big(x,R))(\y.mouse(y))
+
+    >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').fol())
+    (exists x.walks(x) & exists y.runs(y))
+
+    >>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))').fol())
+    (walks(x) -> runs(x))
+
+    >>> print(dexpr(r'([x],[PRO(x), sees(John,x)])').fol())
+    exists x.(PRO(x) & sees(John,x))
+    >>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])').fol())
+    exists x.(man(x) & -walks(x))
+    >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
+    all x.(man(x) -> walks(x))
+
+    >>> print(dexpr(r'([x],[man(x) | walks(x)])').fol())
+    exists x.(man(x) | walks(x))
+    >>> print(dexpr(r'P(x) + ([x],[walks(x)])').fol())
+    (P(x) & exists x.walks(x))
+
+``resolve_anaphora()``
+----------------------
+
+    >>> from nltk.sem.drt import AnaphoraResolutionException
+
+    >>> print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])')))
+    ([x,y,z],[dog(x), cat(y), walks(z), (z = [x,y])])
+    >>> print(resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')))
+    ([],[(([x],[dog(x)]) -> ([y],[walks(y), (y = x)]))])
+    >>> print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))')).simplify())
+    ([x,y],[(x = y)])
+    >>> try: print(resolve_anaphora(dexpr(r'([x],[walks(x), PRO(x)])')))
+    ... except AnaphoraResolutionException as e: print(e)
+    Variable 'x' does not resolve to anything.
+    >>> print(resolve_anaphora(dexpr('([e01,z6,z7],[boy(z6), PRO(z7), run(e01), subj(e01,z7)])')))
+    ([e01,z6,z7],[boy(z6), (z7 = z6), run(e01), subj(e01,z7)])
+
+``equiv()``:
+----------------
+
+    >>> a = dexpr(r'([x],[man(x), walks(x)])')
+    >>> b = dexpr(r'([x],[walks(x), man(x)])')
+    >>> print(a.equiv(b, TableauProver()))
+    True
+
+
+``replace()``:
+--------------
+
+    >>> a = dexpr(r'a')
+    >>> w = dexpr(r'w')
+    >>> x = dexpr(r'x')
+    >>> y = dexpr(r'y')
+    >>> z = dexpr(r'z')
+
+
+replace bound
+-------------
+
+    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, False))
+    ([x],[give(x,y,z)])
+    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, True))
+    ([a],[give(a,y,z)])
+
+replace unbound
+---------------
+
+    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, False))
+    ([x],[give(x,a,z)])
+    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, True))
+    ([x],[give(x,a,z)])
+
+replace unbound with bound
+--------------------------
+
+    >>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, False) == \
+    ... dexpr('([z1],[give(z1,x,z)])')
+    True
+    >>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, True) == \
+    ... dexpr('([z1],[give(z1,x,z)])')
+    True
+
+replace unbound with unbound
+----------------------------
+
+    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, False))
+    ([x],[give(x,z,z)])
+    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, True))
+    ([x],[give(x,z,z)])
+
+
+replace unbound
+---------------
+
+    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
+    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
+    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+
+replace bound
+-------------
+
+    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, False))
+    (([x],[P(x,y,z)]) + ([y],[Q(x,y,z)]))
+    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, True))
+    (([a],[P(a,y,z)]) + ([y],[Q(a,y,z)]))
+
+replace unbound with unbound
+----------------------------
+
+    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
+    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
+    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
+
+replace unbound with bound on same side
+---------------------------------------
+
+    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, False) == \
+    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
+    True
+    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, True) == \
+    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
+    True
+
+replace unbound with bound on other side
+----------------------------------------
+
+    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, False) == \
+    ... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
+    True
+    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, True) == \
+    ... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
+    True
+
+replace unbound with double bound
+---------------------------------
+
+    >>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, False) == \
+    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
+    True
+    >>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, True) == \
+    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
+    True
+
+
+regression tests
+----------------
+
+    >>> d = dexpr('([x],[A(c), ([y],[B(x,y,z,a)])->([z],[C(x,y,z,a)])])')
+    >>> print(d)
+    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+    >>> print(d.pretty_format())
+     ____________________________________
+    | x                                  |
+    |------------------------------------|
+    | A(c)                               |
+    |   ____________      ____________   |
+    |  | y          |    | z          |  |
+    | (|------------| -> |------------|) |
+    |  | B(x,y,z,a) |    | C(x,y,z,a) |  |
+    |  |____________|    |____________|  |
+    |____________________________________|
+    >>> print(str(d))
+    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+    >>> print(d.fol())
+    exists x.(A(c) & all y.(B(x,y,z,a) -> exists z.C(x,y,z,a)))
+    >>> print(d.replace(Variable('a'), DrtVariableExpression(Variable('r'))))
+    ([x],[A(c), (([y],[B(x,y,z,r)]) -> ([z],[C(x,y,z,r)]))])
+    >>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r'))))
+    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+    >>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r'))))
+    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
+    >>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r'))))
+    ([x],[A(c), (([y],[B(x,y,r,a)]) -> ([z],[C(x,y,z,a)]))])
+    >>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r')), True))
+    ([r],[A(c), (([y],[B(r,y,z,a)]) -> ([z],[C(r,y,z,a)]))])
+    >>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r')), True))
+    ([x],[A(c), (([r],[B(x,r,z,a)]) -> ([z],[C(x,r,z,a)]))])
+    >>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r')), True))
+    ([x],[A(c), (([y],[B(x,y,r,a)]) -> ([r],[C(x,y,r,a)]))])
+    >>> print(d == dexpr('([l],[A(c), ([m],[B(l,m,z,a)])->([n],[C(l,m,n,a)])])'))
+    True
+    >>> d = dexpr('([],[([x,y],[B(x,y,h), ([a,b],[dee(x,a,g)])])->([z,w],[cee(x,y,f), ([c,d],[E(x,c,d,e)])])])')
+    >>> sorted(d.free())
+    [Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
+    >>> sorted(d.variables())
+    [Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
+    >>> sorted(d.get_refs(True))
+    [Variable('a'), Variable('b'), Variable('c'), Variable('d'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
+    >>> sorted(d.conds[0].get_refs(False))
+    [Variable('x'), Variable('y')]
+    >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])->([],[C(x,y)]), ([x,y],[D(x,y)])->([],[E(x,y)]), ([],[F(x,y)])->([x,y],[G(x,y)])])').eliminate_equality())
+    ([x],[A(x,x), (([],[B(x,x)]) -> ([],[C(x,x)])), (([x,y],[D(x,y)]) -> ([],[E(x,y)])), (([],[F(x,x)]) -> ([x,y],[G(x,y)]))])
+    >>> print(dexpr('([x,y],[A(x,y), (x=y)]) -> ([],[B(x,y)])').eliminate_equality())
+    (([x],[A(x,x)]) -> ([],[B(x,x)]))
+    >>> print(dexpr('([x,y],[A(x,y)]) -> ([],[B(x,y), (x=y)])').eliminate_equality())
+    (([x,y],[A(x,y)]) -> ([],[B(x,x)]))
+    >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])])').eliminate_equality())
+    ([x],[A(x,x), ([],[B(x,x)])])
+    >>> print(dexpr('([x,y],[A(x,y), ([],[B(x,y), (x=y)])])').eliminate_equality())
+    ([x,y],[A(x,y), ([],[B(x,x)])])
+    >>> print(dexpr('([z8 z9 z10],[A(z8), z8=z10, z9=z10, B(z9), C(z10), D(z10)])').eliminate_equality())
+    ([z9],[A(z9), B(z9), C(z9), D(z9)])
+
+    >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)]), ([x,y],[C(x,y)])])').eliminate_equality())
+    ([x],[A(x,x), ([],[B(x,x)]), ([x,y],[C(x,y)])])
+    >>> print(dexpr('([x,y],[A(x,y)]) + ([],[B(x,y), (x=y)]) + ([],[C(x,y)])').eliminate_equality())
+    ([x],[A(x,x), B(x,x), C(x,x)])
+    >>> print(dexpr('([x,y],[B(x,y)])+([x,y],[C(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
+    (([x,y],[B(x,y)]) + ([x,y],[C(x,y)]))
+    >>> print(dexpr('(([x,y],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
+    (([x,y],[B(x,y)]) + ([],[C(x,y)]) + ([],[D(x,y)]))
+    >>> print(dexpr('(([],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
+    (([],[B(x,x)]) + ([],[C(x,x)]) + ([],[D(x,x)]))
+    >>> print(dexpr('(([],[B(x,y), ([x,y],[A(x,y)])])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))).normalize())
+    (([],[B(z3,z1), ([z2,z3],[A(z3,z2)])]) + ([],[C(z3,z1)]) + ([],[D(z3,z1)]))
+
+
+Parse errors
+============
+
+    >>> def parse_error(drtstring):
+    ...     try: dexpr(drtstring)
+    ...     except logic.LogicalExpressionException as e: print(e)
+
+    >>> parse_error(r'')
+    End of input found.  Expression expected.
+    <BLANKLINE>
+    ^
+    >>> parse_error(r'(')
+    End of input found.  Expression expected.
+    (
+     ^
+    >>> parse_error(r'()')
+    Unexpected token: ')'.  Expression expected.
+    ()
+     ^
+    >>> parse_error(r'([')
+    End of input found.  Expected token ']'.
+    ([
+      ^
+    >>> parse_error(r'([,')
+    ',' is an illegal variable name.  Constants may not be quantified.
+    ([,
+      ^
+    >>> parse_error(r'([x,')
+    End of input found.  Variable expected.
+    ([x,
+        ^
+    >>> parse_error(r'([]')
+    End of input found.  Expected token '['.
+    ([]
+       ^
+    >>> parse_error(r'([][')
+    End of input found.  Expected token ']'.
+    ([][
+        ^
+    >>> parse_error(r'([][,')
+    Unexpected token: ','.  Expression expected.
+    ([][,
+        ^
+    >>> parse_error(r'([][]')
+    End of input found.  Expected token ')'.
+    ([][]
+         ^
+    >>> parse_error(r'([x][man(x)]) |')
+    End of input found.  Expression expected.
+    ([x][man(x)]) |
+                   ^
+
+Pretty Printing
+===============
+
+    >>> dexpr(r"([],[])").pretty_print()
+     __
+    |  |
+    |--|
+    |__|
+
+    >>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print()
+     _____________________________
+    |                             |
+    |-----------------------------|
+    |   ________      _________   |
+    |  | x      |    |         |  |
+    | (|--------| -> |---------|) |
+    |  | big(x) |    | bark(x) |  |
+    |  | dog(x) |    |_________|  |
+    |  |________|                 |
+    |      _________              |
+    |     | x       |             |
+    | __  |---------|             |
+    |   | | walk(x) |             |
+    |     |_________|             |
+    |_____________________________|
+
+    >>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print()
+      _________     _________
+     | x y     |   | z       |
+    (|---------| + |---------|)
+     | (x = y) |   | dog(z)  |
+     |_________|   | walk(z) |
+                   |_________|
+
+    >>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print()
+     _______________________________
+    |                               |
+    |-------------------------------|
+    |   ___     ___     _________   |
+    |  | x |   | y |   | z       |  |
+    | (|---| | |---| | |---------|) |
+    |  |___|   |___|   | dog(z)  |  |
+    |                  | walk(z) |  |
+    |                  |_________|  |
+    |_______________________________|
+
+    >>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print()
+              ___                        ________
+     \       | x |                 \    |        |
+     /\ P Q.(|---| + P(x) + Q(x))( /\ x.|--------|)
+             |___|                      | dog(x) |
+                                        |________|
+
+
diff --git a/nlp_resource_data/nltk/test/featgram.doctest b/nlp_resource_data/nltk/test/featgram.doctest

new file mode 100644 (file)

index 0000000..b866978
--- /dev/null
+++ b/nlp_resource_data/nltk/test/featgram.doctest
@@ -0,0 +1,606 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=========================
+ Feature Grammar Parsing
+=========================
+
+.. include:: ../../../nltk_book/definitions.rst
+
+Grammars can be parsed from strings.
+
+    >>> import nltk
+    >>> from nltk import grammar, parse
+    >>> g = """
+    ... % start DP
+    ... DP[AGR=?a] -> D[AGR=?a] N[AGR=?a]
+    ... D[AGR=[NUM='sg', PERS=3]] -> 'this' | 'that'
+    ... D[AGR=[NUM='pl', PERS=3]] -> 'these' | 'those'
+    ... D[AGR=[NUM='pl', PERS=1]] -> 'we'
+    ... D[AGR=[PERS=2]] -> 'you'
+    ... N[AGR=[NUM='sg', GND='m']] -> 'boy'
+    ... N[AGR=[NUM='pl', GND='m']] -> 'boys'
+    ... N[AGR=[NUM='sg', GND='f']] -> 'girl'
+    ... N[AGR=[NUM='pl', GND='f']] -> 'girls'
+    ... N[AGR=[NUM='sg']] -> 'student'
+    ... N[AGR=[NUM='pl']] -> 'students'
+    ... """
+    >>> grammar = grammar.FeatureGrammar.fromstring(g)
+    >>> tokens = 'these girls'.split()
+    >>> parser = parse.FeatureEarleyChartParser(grammar)
+    >>> trees = parser.parse(tokens)
+    >>> for tree in trees: print(tree)
+    (DP[AGR=[GND='f', NUM='pl', PERS=3]]
+      (D[AGR=[NUM='pl', PERS=3]] these)
+      (N[AGR=[GND='f', NUM='pl']] girls))
+
+In general, when we are trying to develop even a very small grammar,
+it is convenient to put the rules in a file where they can be edited,
+tested and revised. Let's assume that we have saved feat0cfg_ as a file named
+``'feat0.fcfg'`` and placed it in the NLTK ``data`` directory. We can
+inspect it as follows:
+
+.. _feat0cfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/feat0.fcfg
+
+    >>> nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')
+    % start S
+    # ###################
+    # Grammar Productions
+    # ###################
+    # S expansion productions
+    S -> NP[NUM=?n] VP[NUM=?n]
+    # NP expansion productions
+    NP[NUM=?n] -> N[NUM=?n]
+    NP[NUM=?n] -> PropN[NUM=?n]
+    NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
+    NP[NUM=pl] -> N[NUM=pl]
+    # VP expansion productions
+    VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
+    VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
+    # ###################
+    # Lexical Productions
+    # ###################
+    Det[NUM=sg] -> 'this' | 'every'
+    Det[NUM=pl] -> 'these' | 'all'
+    Det -> 'the' | 'some' | 'several'
+    PropN[NUM=sg]-> 'Kim' | 'Jody'
+    N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
+    N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children'
+    IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
+    TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
+    IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
+    TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
+    IV[TENSE=past] -> 'disappeared' | 'walked'
+    TV[TENSE=past] -> 'saw' | 'liked'
+
+Assuming we have saved feat0cfg_ as a file named
+``'feat0.fcfg'``, the function ``parse.load_parser`` allows us to
+read the grammar into NLTK, ready for use in parsing.
+
+
+    >>> cp = parse.load_parser('grammars/book_grammars/feat0.fcfg', trace=1)
+    >>> sent = 'Kim likes children'
+    >>> tokens = sent.split()
+    >>> tokens
+    ['Kim', 'likes', 'children']
+    >>> trees = cp.parse(tokens)
+    |.Kim .like.chil.|
+    |[----]    .    .| [0:1] 'Kim'
+    |.    [----]    .| [1:2] 'likes'
+    |.    .    [----]| [2:3] 'children'
+    |[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
+    |[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
+    |[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
+    |.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
+    |.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
+    |.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
+    |.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
+    |.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
+    |.    [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *
+    |[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *
+    >>> for tree in trees: print(tree)
+    (S[]
+      (NP[NUM='sg'] (PropN[NUM='sg'] Kim))
+      (VP[NUM='sg', TENSE='pres']
+        (TV[NUM='sg', TENSE='pres'] likes)
+        (NP[NUM='pl'] (N[NUM='pl'] children))))
+
+The parser works directly with
+the underspecified productions given by the grammar. That is, the
+Predictor rule does not attempt to compile out all admissible feature
+combinations before trying to expand the non-terminals on the left hand
+side of a production. However, when the Scanner matches an input word
+against a lexical production that has been predicted, the new edge will
+typically contain fully specified features; e.g., the edge
+[PropN[`num`:feat: = `sg`:fval:] |rarr| 'Kim', (0, 1)]. Recall from
+Chapter 8 that the Fundamental (or Completer) Rule in
+standard CFGs is used to combine an incomplete edge that's expecting a
+nonterminal *B* with a following, complete edge whose left hand side
+matches *B*. In our current setting, rather than checking for a
+complete match, we test whether the expected category *B* will
+`unify`:dt: with the left hand side *B'* of a following complete
+edge. We will explain in more detail in Section 9.2 how
+unification works; for the moment, it is enough to know that as a
+result of unification, any variable values of features in *B* will be
+instantiated by constant values in the corresponding feature structure
+in *B'*, and these instantiated values will be used in the new edge
+added by the Completer. This instantiation can be seen, for example,
+in the edge
+[NP [`num`:feat:\ =\ `sg`:fval:] |rarr| PropN[`num`:feat:\ =\ `sg`:fval:] |dot|, (0, 1)]
+in Example 9.2, where the feature `num`:feat: has been assigned the value `sg`:fval:.
+
+Feature structures in NLTK are ... Atomic feature values can be strings or
+integers.
+
+    >>> fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
+    >>> print(fs1)
+    [ NUM   = 'sg'   ]
+    [ TENSE = 'past' ]
+
+We can think of a feature structure as being like a Python dictionary,
+and access its values by indexing in the usual way.
+
+    >>> fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
+    >>> print(fs1['GND'])
+    fem
+
+We can also define feature structures which have complex values, as
+discussed earlier.
+
+    >>> fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
+    >>> print(fs2)
+    [       [ GND = 'fem' ] ]
+    [ AGR = [ NUM = 'pl'  ] ]
+    [       [ PER = 3     ] ]
+    [                       ]
+    [ POS = 'N'             ]
+    >>> print(fs2['AGR'])
+    [ GND = 'fem' ]
+    [ NUM = 'pl'  ]
+    [ PER = 3     ]
+    >>> print(fs2['AGR']['PER'])
+    3
+
+Feature structures can also be constructed using the ``parse()``
+method of the ``nltk.FeatStruct`` class. Note that in this case, atomic
+feature values do not need to be enclosed in quotes.
+
+    >>> f1 = nltk.FeatStruct("[NUMBER = sg]")
+    >>> f2 = nltk.FeatStruct("[PERSON = 3]")
+    >>> print(nltk.unify(f1, f2))
+    [ NUMBER = 'sg' ]
+    [ PERSON = 3    ]
+
+    >>> f1 = nltk.FeatStruct("[A = [B = b, D = d]]")
+    >>> f2 = nltk.FeatStruct("[A = [C = c, D = d]]")
+    >>> print(nltk.unify(f1, f2))
+    [     [ B = 'b' ] ]
+    [ A = [ C = 'c' ] ]
+    [     [ D = 'd' ] ]
+
+
+Feature Structures as Graphs
+----------------------------
+
+Feature structures are not inherently tied to linguistic objects; they are
+general purpose structures for representing knowledge. For example, we
+could encode information about a person in a feature structure:
+
+    >>> person01 = nltk.FeatStruct("[NAME=Lee, TELNO='01 27 86 42 96',AGE=33]")
+    >>> print(person01)
+    [ AGE   = 33               ]
+    [ NAME  = 'Lee'            ]
+    [ TELNO = '01 27 86 42 96' ]
+
+There are a number of notations for representing reentrancy in
+matrix-style representations of feature structures. In NLTK, we adopt
+the following convention: the first occurrence of a shared feature structure
+is prefixed with an integer in parentheses, such as ``(1)``, and any
+subsequent reference to that structure uses the notation
+``->(1)``, as shown below.
+
+
+    >>> fs = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
+    ...                               SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
+    >>> print(fs)
+    [ ADDRESS = (1) [ NUMBER = 74           ] ]
+    [               [ STREET = 'rue Pascal' ] ]
+    [                                         ]
+    [ NAME    = 'Lee'                         ]
+    [                                         ]
+    [ SPOUSE  = [ ADDRESS -> (1)  ]           ]
+    [           [ NAME    = 'Kim' ]           ]
+
+There can be any number of tags within a single feature structure.
+
+    >>> fs3 = nltk.FeatStruct("[A=(1)[B=b], C=(2)[], D->(1), E->(2)]")
+    >>> print(fs3)
+    [ A = (1) [ B = 'b' ] ]
+    [                     ]
+    [ C = (2) []          ]
+    [                     ]
+    [ D -> (1)            ]
+    [ E -> (2)            ]
+    >>> fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
+    >>> fs2 = nltk.FeatStruct(CITY='Paris')
+    >>> print(nltk.unify(fs1, fs2))
+    [ CITY   = 'Paris'      ]
+    [ NUMBER = 74           ]
+    [ STREET = 'rue Pascal' ]
+
+Unification is symmetric:
+
+    >>> nltk.unify(fs1, fs2) == nltk.unify(fs2, fs1)
+    True
+
+Unification is commutative:
+
+    >>> fs3 = nltk.FeatStruct(TELNO='01 27 86 42 96')
+    >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3))
+    True
+
+Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\
+:subscript:`1` will fail if the two feature structures share a path |pi|,
+but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct
+atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK,
+this is implemented by setting the result of unification to be
+``None``.
+
+    >>> fs0 = nltk.FeatStruct(A='a')
+    >>> fs1 = nltk.FeatStruct(A='b')
+    >>> print(nltk.unify(fs0, fs1))
+    None
+
+Now, if we look at how unification interacts with structure-sharing,
+things become really interesting.
+
+
+
+    >>> fs0 = nltk.FeatStruct("""[NAME=Lee,
+    ...                                ADDRESS=[NUMBER=74,
+    ...                                         STREET='rue Pascal'],
+    ...                                SPOUSE= [NAME=Kim,
+    ...                                         ADDRESS=[NUMBER=74,
+    ...                                                  STREET='rue Pascal']]]""")
+    >>> print(fs0)
+    [ ADDRESS = [ NUMBER = 74           ]               ]
+    [           [ STREET = 'rue Pascal' ]               ]
+    [                                                   ]
+    [ NAME    = 'Lee'                                   ]
+    [                                                   ]
+    [           [ ADDRESS = [ NUMBER = 74           ] ] ]
+    [ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
+    [           [                                     ] ]
+    [           [ NAME    = 'Kim'                     ] ]
+
+
+    >>> fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]")
+    >>> print(nltk.unify(fs0, fs1))
+    [ ADDRESS = [ NUMBER = 74           ]               ]
+    [           [ STREET = 'rue Pascal' ]               ]
+    [                                                   ]
+    [ NAME    = 'Lee'                                   ]
+    [                                                   ]
+    [           [           [ CITY   = 'Paris'      ] ] ]
+    [           [ ADDRESS = [ NUMBER = 74           ] ] ]
+    [ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
+    [           [                                     ] ]
+    [           [ NAME    = 'Kim'                     ] ]
+
+    >>> fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
+    ...                                SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
+
+
+    >>> print(fs2)
+    [ ADDRESS = (1) [ NUMBER = 74           ] ]
+    [               [ STREET = 'rue Pascal' ] ]
+    [                                         ]
+    [ NAME    = 'Lee'                         ]
+    [                                         ]
+    [ SPOUSE  = [ ADDRESS -> (1)  ]           ]
+    [           [ NAME    = 'Kim' ]           ]
+
+
+    >>> print(nltk.unify(fs2, fs1))
+    [               [ CITY   = 'Paris'      ] ]
+    [ ADDRESS = (1) [ NUMBER = 74           ] ]
+    [               [ STREET = 'rue Pascal' ] ]
+    [                                         ]
+    [ NAME    = 'Lee'                         ]
+    [                                         ]
+    [ SPOUSE  = [ ADDRESS -> (1)  ]           ]
+    [           [ NAME    = 'Kim' ]           ]
+
+
+    >>> fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")
+    >>> fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
+    >>> print(fs2)
+    [ ADDRESS1 = ?x ]
+    [ ADDRESS2 = ?x ]
+    >>> print(nltk.unify(fs1, fs2))
+    [ ADDRESS1 = (1) [ NUMBER = 74           ] ]
+    [                [ STREET = 'rue Pascal' ] ]
+    [                                          ]
+    [ ADDRESS2 -> (1)                          ]
+
+
+
+
+    >>> sent = 'who do you claim that you like'
+    >>> tokens = sent.split()
+    >>> cp = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1)
+    >>> trees = cp.parse(tokens)
+    |.w.d.y.c.t.y.l.|
+    |[-] . . . . . .| [0:1] 'who'
+    |. [-] . . . . .| [1:2] 'do'
+    |. . [-] . . . .| [2:3] 'you'
+    |. . . [-] . . .| [3:4] 'claim'
+    |. . . . [-] . .| [4:5] 'that'
+    |. . . . . [-] .| [5:6] 'you'
+    |. . . . . . [-]| [6:7] 'like'
+    |# . . . . . . .| [0:0] NP[]/NP[] -> *
+    |. # . . . . . .| [1:1] NP[]/NP[] -> *
+    |. . # . . . . .| [2:2] NP[]/NP[] -> *
+    |. . . # . . . .| [3:3] NP[]/NP[] -> *
+    |. . . . # . . .| [4:4] NP[]/NP[] -> *
+    |. . . . . # . .| [5:5] NP[]/NP[] -> *
+    |. . . . . . # .| [6:6] NP[]/NP[] -> *
+    |. . . . . . . #| [7:7] NP[]/NP[] -> *
+    |[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
+    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
+    |[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
+    |. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
+    |. [-> . . . . .| [1:2] S[+INV] -> V[+AUX] * NP[] VP[] {}
+    |. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
+    |. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
+    |. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
+    |. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
+    |. . [-> . . . .| [2:3] S[-INV] -> NP[] * VP[] {}
+    |. . [-> . . . .| [2:3] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+    |. . [-> . . . .| [2:3] S[-INV] -> NP[] * S[]/NP[] {}
+    |. [---> . . . .| [1:3] S[+INV] -> V[+AUX] NP[] * VP[] {}
+    |. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
+    |. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
+    |. . . [-> . . .| [3:4] VP[] -> V[-AUX, SUBCAT='clause'] * SBar[] {}
+    |. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
+    |. . . . [-] . .| [4:5] Comp[] -> 'that' *
+    |. . . . [-> . .| [4:5] SBar[] -> Comp[] * S[-INV] {}
+    |. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
+    |. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
+    |. . . . . [-> .| [5:6] S[-INV] -> NP[] * VP[] {}
+    |. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+    |. . . . . [-> .| [5:6] S[-INV] -> NP[] * S[]/NP[] {}
+    |. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
+    |. . . . . . [->| [6:7] VP[] -> V[-AUX, SUBCAT='trans'] * NP[] {}
+    |. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
+    |. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
+    |. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
+    |. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
+    |. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
+    |. . [---------]| [2:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
+    |. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
+    |[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
+
+    >>> trees = list(trees)
+    >>> for tree in trees: print(tree)
+    (S[-INV]
+      (NP[+WH] who)
+      (S[+INV]/NP[]
+        (V[+AUX] do)
+        (NP[-WH] you)
+        (VP[]/NP[]
+          (V[-AUX, SUBCAT='clause'] claim)
+          (SBar[]/NP[]
+            (Comp[] that)
+            (S[-INV]/NP[]
+              (NP[-WH] you)
+              (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] )))))))
+
+A different parser should give the same parse trees, but perhaps in a different order:
+
+    >>> cp2 = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1,
+    ...                         parser=parse.FeatureEarleyChartParser)
+    >>> trees2 = cp2.parse(tokens)
+    |.w.d.y.c.t.y.l.|
+    |[-] . . . . . .| [0:1] 'who'
+    |. [-] . . . . .| [1:2] 'do'
+    |. . [-] . . . .| [2:3] 'you'
+    |. . . [-] . . .| [3:4] 'claim'
+    |. . . . [-] . .| [4:5] 'that'
+    |. . . . . [-] .| [5:6] 'you'
+    |. . . . . . [-]| [6:7] 'like'
+    |> . . . . . . .| [0:0] S[-INV] -> * NP[] VP[] {}
+    |> . . . . . . .| [0:0] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
+    |> . . . . . . .| [0:0] S[-INV] -> * NP[] S[]/NP[] {}
+    |> . . . . . . .| [0:0] S[-INV] -> * Adv[+NEG] S[+INV] {}
+    |> . . . . . . .| [0:0] S[+INV] -> * V[+AUX] NP[] VP[] {}
+    |> . . . . . . .| [0:0] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
+    |> . . . . . . .| [0:0] NP[+WH] -> * 'who' {}
+    |[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
+    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
+    |[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
+    |. > . . . . . .| [1:1] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
+    |. > . . . . . .| [1:1] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
+    |. > . . . . . .| [1:1] V[+AUX] -> * 'do' {}
+    |. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+    |. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+    |. > . . . . . .| [1:1] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+    |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
+    |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
+    |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
+    |. > . . . . . .| [1:1] VP[] -> * V[+AUX] VP[] {}
+    |. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
+    |. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
+    |. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
+    |. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
+    |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
+    |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
+    |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
+    |. . > . . . . .| [2:2] VP[] -> * V[+AUX] VP[] {}
+    |. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+    |. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+    |. . > . . . . .| [2:2] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+    |. . > . . . . .| [2:2] NP[-WH] -> * 'you' {}
+    |. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
+    |. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
+    |. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+    |. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+    |. . . > . . . .| [3:3] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+    |. . . > . . . .| [3:3] V[-AUX, SUBCAT='clause'] -> * 'claim' {}
+    |. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
+    |. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
+    |. . . . > . . .| [4:4] SBar[]/?x[] -> * Comp[] S[-INV]/?x[] {}
+    |. . . . > . . .| [4:4] Comp[] -> * 'that' {}
+    |. . . . [-] . .| [4:5] Comp[] -> 'that' *
+    |. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
+    |. . . . . > . .| [5:5] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
+    |. . . . . > . .| [5:5] NP[-WH] -> * 'you' {}
+    |. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
+    |. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
+    |. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
+    |. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
+    |. . . . . . > .| [6:6] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
+    |. . . . . . > .| [6:6] V[-AUX, SUBCAT='trans'] -> * 'like' {}
+    |. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
+    |. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
+    |. . . . . . . #| [7:7] NP[]/NP[] -> *
+    |. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
+    |. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
+    |. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
+    |. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
+    |. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
+    |[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
+
+    >>> sorted(trees) == sorted(trees2)
+    True
+
+
+Let's load a German grammar:
+
+    >>> cp = parse.load_parser('grammars/book_grammars/german.fcfg', trace=0)
+    >>> sent = 'die Katze sieht den Hund'
+    >>> tokens = sent.split()
+    >>> trees = cp.parse(tokens)
+    >>> for tree in trees: print(tree)
+    (S[]
+      (NP[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom']
+        (Det[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom'] die)
+        (N[AGR=[GND='fem', NUM='sg', PER=3]] Katze))
+      (VP[AGR=[NUM='sg', PER=3]]
+        (TV[AGR=[NUM='sg', PER=3], OBJCASE='acc'] sieht)
+        (NP[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc']
+          (Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] den)
+          (N[AGR=[GND='masc', NUM='sg', PER=3]] Hund))))
+
+Grammar with Binding Operators
+------------------------------
+The `bindop.fcfg`_ grammar is a semantic grammar that uses lambda
+calculus.  Each element has a core semantics, which is a single lambda
+calculus expression; and a set of binding operators, which bind
+variables.
+
+.. _bindop.fcfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/bindop.fcfg
+
+In order to make the binding operators work right, they need to
+instantiate their bound variable every time they are added to the
+chart.  To do this, we use a special subclass of `Chart`, called
+`InstantiateVarsChart`.
+
+    >>> from nltk.parse.featurechart import InstantiateVarsChart
+    >>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=1,
+    ...                        chart_class=InstantiateVarsChart)
+    >>> print(cp.grammar())
+    Grammar with 15 productions (start state = S[])
+        S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] VP[SEM=[BO=?b2, CORE=?vp]]
+        VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] NP[SEM=[BO=?b2, CORE=?obj]]
+        VP[SEM=?s] -> IV[SEM=?s]
+        NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] N[SEM=[BO=?b2, CORE=?n]]
+        Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a'
+        N[SEM=[BO={/}, CORE=<dog>]] -> 'dog'
+        N[SEM=[BO={/}, CORE=<dog>]] -> 'cat'
+        N[SEM=[BO={/}, CORE=<dog>]] -> 'mouse'
+        IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks'
+        IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'eats'
+        IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'walks'
+        TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds'
+        TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'walks'
+        NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'john'
+        NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'alex'
+
+A simple intransitive sentence:
+
+    >>> from nltk.sem import logic
+    >>> logic._counter._value = 100
+
+    >>> trees = cp.parse('john barks'.split())
+    |. john.barks.|
+    |[-----]     .| [0:1] 'john'
+    |.     [-----]| [1:2] 'barks'
+    |[-----]     .| [0:1] NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] -> 'john' *
+    |[----->     .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
+    |.     [-----]| [1:2] IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks' *
+    |.     [-----]| [1:2] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
+    |[===========]| [0:2] S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
+    >>> for tree in trees: print(tree)
+    (S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]]
+      (NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] john)
+      (VP[SEM=[BO={/}, CORE=<\x.bark(x)>]]
+        (IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] barks)))
+
+A transitive sentence:
+
+    >>> trees = cp.parse('john feeds a dog'.split())
+    |.joh.fee. a .dog.|
+    |[---]   .   .   .| [0:1] 'john'
+    |.   [---]   .   .| [1:2] 'feeds'
+    |.   .   [---]   .| [2:3] 'a'
+    |.   .   .   [---]| [3:4] 'dog'
+    |[---]   .   .   .| [0:1] NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] -> 'john' *
+    |[--->   .   .   .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
+    |.   [---]   .   .| [1:2] TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds' *
+    |.   [--->   .   .| [1:2] VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] * NP[SEM=[BO=?b2, CORE=?obj]] {?b1: {/}, ?v: <LambdaExpression \x y.feed(y,x)>}
+    |.   .   [---]   .| [2:3] Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a' *
+    |.   .   [--->   .| [2:3] NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] * N[SEM=[BO=?b2, CORE=?n]] {?b1: {/}, ?det: <LambdaExpression \Q P.exists x.(Q(x) & P(x))>}
+    |.   .   .   [---]| [3:4] N[SEM=[BO={/}, CORE=<dog>]] -> 'dog' *
+    |.   .   [-------]| [2:4] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]] -> Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] N[SEM=[BO={/}, CORE=<dog>]] *
+    |.   .   [------->| [2:4] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.exists x.(dog(x) & P(x)),z2)}, ?subj: <IndividualVariableExpression z2>}
+    |.   [-----------]| [1:4] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] -> TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<z2>]] *
+    |[===============]| [0:4] S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<\y.feed(y,z3)>]] *
+
+    >>> for tree in trees: print(tree)
+    (S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
+      (NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] john)
+      (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
+        (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
+        (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]]
+          (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
+          (N[SEM=[BO={/}, CORE=<dog>]] dog))))
+
+Turn down the verbosity:
+
+    >>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=0,
+    ...                       chart_class=InstantiateVarsChart)
+
+Reuse the same lexical item twice:
+
+    >>> trees = cp.parse('john feeds john'.split())
+    >>> for tree in trees: print(tree)
+    (S[SEM=[BO={bo(\P.P(John),z2), bo(\P.P(John),z3)}, CORE=<feed(z2,z3)>]]
+      (NP[SEM=[BO={bo(\P.P(John),z104)}, CORE=<z104>]] john)
+      (VP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<\y.feed(y,z2)>]]
+        (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
+        (NP[SEM=[BO={bo(\P.P(John),z105)}, CORE=<z105>]] john)))
+
+    >>> trees = cp.parse('a dog feeds a dog'.split())
+    >>> for tree in trees: print(tree)
+    (S[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
+      (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z106)}, CORE=<z106>]]
+        (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
+        (N[SEM=[BO={/}, CORE=<dog>]] dog))
+      (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
+        (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
+        (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z107)}, CORE=<z107>]]
+          (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
+          (N[SEM=[BO={/}, CORE=<dog>]] dog))))
diff --git a/nlp_resource_data/nltk/test/featstruct.doctest b/nlp_resource_data/nltk/test/featstruct.doctest

new file mode 100644 (file)

index 0000000..0c14435
--- /dev/null
+++ b/nlp_resource_data/nltk/test/featstruct.doctest
@@ -0,0 +1,1228 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==================================
+ Feature Structures & Unification
+==================================
+    >>> from nltk.featstruct import FeatStruct
+    >>> from nltk.sem.logic import Variable, VariableExpression, Expression
+
+.. note:: For now, featstruct uses the older lambdalogic semantics
+   module.  Eventually, it should be updated to use the new first
+   order predicate logic module.
+
+Overview
+~~~~~~~~
+A feature structure is a mapping from feature identifiers to feature
+values, where feature values can be simple values (like strings or
+ints), nested feature structures, or variables:
+
+    >>> fs1 = FeatStruct(number='singular', person=3)
+    >>> print(fs1)
+    [ number = 'singular' ]
+    [ person = 3          ]
+
+Feature structure may be nested:
+
+    >>> fs2 = FeatStruct(type='NP', agr=fs1)
+    >>> print(fs2)
+    [ agr  = [ number = 'singular' ] ]
+    [        [ person = 3          ] ]
+    [                                ]
+    [ type = 'NP'                    ]
+
+Variables are used to indicate that two features should be assigned
+the same value.  For example, the following feature structure requires
+that the feature fs3['agr']['number'] be bound to the same value as the
+feature fs3['subj']['number'].
+
+    >>> fs3 = FeatStruct(agr=FeatStruct(number=Variable('?n')),
+    ...                  subj=FeatStruct(number=Variable('?n')))
+    >>> print(fs3)
+    [ agr  = [ number = ?n ] ]
+    [                        ]
+    [ subj = [ number = ?n ] ]
+
+Feature structures are typically used to represent partial information
+about objects.  A feature name that is not mapped to a value stands
+for a feature whose value is unknown (*not* a feature without a
+value).  Two feature structures that represent (potentially
+overlapping) information about the same object can be combined by
+*unification*.
+
+    >>> print(fs2.unify(fs3))
+    [ agr  = [ number = 'singular' ] ]
+    [        [ person = 3          ] ]
+    [                                ]
+    [ subj = [ number = 'singular' ] ]
+    [                                ]
+    [ type = 'NP'                    ]
+
+When two inconsistent feature structures are unified, the unification
+fails and returns ``None``.
+
+    >>> fs4 = FeatStruct(agr=FeatStruct(person=1))
+    >>> print(fs4.unify(fs2))
+    None
+    >>> print(fs2.unify(fs4))
+    None
+
+..
+    >>> del fs1, fs2, fs3, fs4 # clean-up
+
+Feature Structure Types
+-----------------------
+There are actually two types of feature structure:
+
+- *feature dictionaries*, implemented by `FeatDict`, act like
+  Python dictionaries.  Feature identifiers may be strings or
+  instances of the `Feature` class.
+- *feature lists*, implemented by `FeatList`, act like Python
+  lists.  Feature identifiers are integers.
+
+When you construct a feature structure using the `FeatStruct`
+constructor, it will automatically decide which type is appropriate:
+
+    >>> type(FeatStruct(number='singular'))
+    <class 'nltk.featstruct.FeatDict'>
+    >>> type(FeatStruct([1,2,3]))
+    <class 'nltk.featstruct.FeatList'>
+
+Usually, we will just use feature dictionaries; but sometimes feature
+lists can be useful too.  Two feature lists will unify with each other
+only if they have equal lengths, and all of their feature values
+match.  If you wish to write a feature list that contains 'unknown'
+values, you must use variables:
+
+    >>> fs1 = FeatStruct([1,2,Variable('?y')])
+    >>> fs2 = FeatStruct([1,Variable('?x'),3])
+    >>> fs1.unify(fs2)
+    [1, 2, 3]
+
+..
+    >>> del fs1, fs2 # clean-up
+
+Parsing Feature Structure Strings
+---------------------------------
+Feature structures can be constructed directly from strings.  Often,
+this is more convenient than constructing them directly.  NLTK can
+parse most feature strings to produce the corresponding feature
+structures.  (But you must restrict your base feature values to
+strings, ints, logic expressions (`nltk.sem.logic.Expression`), and a
+few other types discussed below).
+
+Feature dictionaries are written like Python dictionaries, except that
+keys are not put in quotes; and square brackets (``[]``) are used
+instead of braces (``{}``):
+
+    >>> FeatStruct('[tense="past", agr=[number="sing", person=3]]')
+    [agr=[number='sing', person=3], tense='past']
+
+If a feature value is a single alphanumeric word, then it does not
+need to be quoted -- it will be automatically treated as a string:
+
+    >>> FeatStruct('[tense=past, agr=[number=sing, person=3]]')
+    [agr=[number='sing', person=3], tense='past']
+
+Feature lists are written like python lists:
+
+    >>> FeatStruct('[1, 2, 3]')
+    [1, 2, 3]
+
+The expression ``[]`` is treated as an empty feature dictionary, not
+an empty feature list:
+
+    >>> type(FeatStruct('[]'))
+    <class 'nltk.featstruct.FeatDict'>
+
+Feature Paths
+-------------
+Features can be specified using *feature paths*, or tuples of feature
+identifiers that specify path through the nested feature structures to
+a value.
+
+    >>> fs1 = FeatStruct('[x=1, y=[1,2,[z=3]]]')
+    >>> fs1['y']
+    [1, 2, [z=3]]
+    >>> fs1['y', 2]
+    [z=3]
+    >>> fs1['y', 2, 'z']
+    3
+
+..
+    >>> del fs1 # clean-up
+
+Reentrance
+----------
+Feature structures may contain reentrant feature values.  A *reentrant
+feature value* is a single feature structure that can be accessed via
+multiple feature paths.
+
+    >>> fs1 = FeatStruct(x='val')
+    >>> fs2 = FeatStruct(a=fs1, b=fs1)
+    >>> print(fs2)
+    [ a = (1) [ x = 'val' ] ]
+    [                       ]
+    [ b -> (1)              ]
+    >>> fs2
+    [a=(1)[x='val'], b->(1)]
+
+As you can see, reentrane is displayed by marking a feature structure
+with a unique identifier, in this case ``(1)``, the first time it is
+encountered; and then using the special form ``var -> id`` whenever it
+is encountered again.  You can use the same notation to directly
+create reentrant feature structures from strings.
+
+    >>> FeatStruct('[a=(1)[], b->(1), c=[d->(1)]]')
+    [a=(1)[], b->(1), c=[d->(1)]]
+
+Reentrant feature structures may contain cycles:
+
+    >>> fs3 = FeatStruct('(1)[a->(1)]')
+    >>> fs3['a', 'a', 'a', 'a']
+    (1)[a->(1)]
+    >>> fs3['a', 'a', 'a', 'a'] is fs3
+    True
+
+Unification preserves the reentrance relations imposed by both of the
+unified feature structures.  In the feature structure resulting from
+unification, any modifications to a reentrant feature value will be
+visible using any of its feature paths.
+
+    >>> fs3.unify(FeatStruct('[a=[b=12], c=33]'))
+    (1)[a->(1), b=12, c=33]
+
+..
+    >>> del fs1, fs2, fs3 # clean-up
+
+Feature Structure Equality
+--------------------------
+Two feature structures are considered equal if they assign the same
+values to all features, *and* they contain the same reentrances.
+
+    >>> fs1 = FeatStruct('[a=(1)[x=1], b->(1)]')
+    >>> fs2 = FeatStruct('[a=(1)[x=1], b->(1)]')
+    >>> fs3 = FeatStruct('[a=[x=1], b=[x=1]]')
+    >>> fs1 == fs1, fs1 is fs1
+    (True, True)
+    >>> fs1 == fs2, fs1 is fs2
+    (True, False)
+    >>> fs1 == fs3, fs1 is fs3
+    (False, False)
+
+Note that this differs from how Python dictionaries and lists define
+equality -- in particular, Python dictionaries and lists ignore
+reentrance relations.  To test two feature structures for equality
+while ignoring reentrance relations, use the `equal_values()` method:
+
+    >>> fs1.equal_values(fs1)
+    True
+    >>> fs1.equal_values(fs2)
+    True
+    >>> fs1.equal_values(fs3)
+    True
+
+..
+    >>> del fs1, fs2, fs3 # clean-up
+
+Feature Value Sets & Feature Value Tuples
+-----------------------------------------
+`nltk.featstruct` defines two new data types that are intended to be
+used as feature values: `FeatureValueTuple` and `FeatureValueSet`.
+Both of these types are considered base values -- i.e., unification
+does *not* apply to them.  However, variable binding *does* apply to
+any values that they contain.
+
+Feature value tuples are written with parentheses:
+
+    >>> fs1 = FeatStruct('[x=(?x, ?y)]')
+    >>> fs1
+    [x=(?x, ?y)]
+    >>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
+    [x=(1, 2)]
+
+Feature sets are written with braces:
+
+    >>> fs1 = FeatStruct('[x={?x, ?y}]')
+    >>> fs1
+    [x={?x, ?y}]
+    >>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
+    [x={1, 2}]
+
+In addition to the basic feature value tuple & set classes, nltk
+defines feature value unions (for sets) and feature value
+concatenations (for tuples).  These are written using '+', and can be
+used to combine sets & tuples:
+
+    >>> fs1 = FeatStruct('[x=((1, 2)+?z), z=?z]')
+    >>> fs1
+    [x=((1, 2)+?z), z=?z]
+    >>> fs1.unify(FeatStruct('[z=(3, 4, 5)]'))
+    [x=(1, 2, 3, 4, 5), z=(3, 4, 5)]
+
+Thus, feature value tuples and sets can be used to build up tuples
+and sets of values over the corse of unification.  For example, when
+parsing sentences using a semantic feature grammar, feature sets or
+feature tuples can be used to build a list of semantic predicates as
+the sentence is parsed.
+
+As was mentioned above, unification does not apply to feature value
+tuples and sets.  One reason for this that it's impossible to define a
+single correct answer for unification when concatenation is used.
+Consider the following example:
+
+    >>> fs1 = FeatStruct('[x=(1, 2, 3, 4)]')
+    >>> fs2 = FeatStruct('[x=(?a+?b), a=?a, b=?b]')
+
+If unification applied to feature tuples, then the unification
+algorithm would have to arbitrarily choose how to divide the tuple
+(1,2,3,4) into two parts.  Instead, the unification algorithm refuses
+to make this decision, and simply unifies based on value.  Because
+(1,2,3,4) is not equal to (?a+?b), fs1 and fs2 will not unify:
+
+    >>> print(fs1.unify(fs2))
+    None
+
+If you need a list-like structure that unification does apply to, use
+`FeatList`.
+
+..
+    >>> del fs1, fs2 # clean-up
+
+Light-weight Feature Structures
+-------------------------------
+Many of the functions defined by `nltk.featstruct` can be applied
+directly to simple Python dictionaries and lists, rather than to
+full-fledged `FeatDict` and `FeatList` objects.  In other words,
+Python ``dicts`` and ``lists`` can be used as "light-weight" feature
+structures.
+
+    >>> # Note: pprint prints dicts sorted
+    >>> from pprint import pprint
+    >>> from nltk.featstruct import unify
+    >>> pprint(unify(dict(x=1, y=dict()), dict(a='a', y=dict(b='b'))))
+    {'a': 'a', 'x': 1, 'y': {'b': 'b'}}
+
+However, you should keep in mind the following caveats:
+
+- Python dictionaries & lists ignore reentrance when checking for
+  equality between values.  But two FeatStructs with different
+  reentrances are considered nonequal, even if all their base
+  values are equal.
+
+- FeatStructs can be easily frozen, allowing them to be used as
+  keys in hash tables.  Python dictionaries and lists can not.
+
+- FeatStructs display reentrance in their string representations;
+  Python dictionaries and lists do not.
+
+- FeatStructs may *not* be mixed with Python dictionaries and lists
+  (e.g., when performing unification).
+
+- FeatStructs provide a number of useful methods, such as `walk()`
+  and `cyclic()`, which are not available for Python dicts & lists.
+
+In general, if your feature structures will contain any reentrances,
+or if you plan to use them as dictionary keys, it is strongly
+recommended that you use full-fledged `FeatStruct` objects.
+
+Custom Feature Values
+---------------------
+The abstract base class `CustomFeatureValue` can be used to define new
+base value types that have custom unification methods.  For example,
+the following feature value type encodes a range, and defines
+unification as taking the intersection on the ranges:
+
+    >>> from functools import total_ordering
+    >>> from nltk.featstruct import CustomFeatureValue, UnificationFailure
+    >>> @total_ordering
+    ... class Range(CustomFeatureValue):
+    ...     def __init__(self, low, high):
+    ...         assert low <= high
+    ...         self.low = low
+    ...         self.high = high
+    ...     def unify(self, other):
+    ...         if not isinstance(other, Range):
+    ...             return UnificationFailure
+    ...         low = max(self.low, other.low)
+    ...         high = min(self.high, other.high)
+    ...         if low <= high: return Range(low, high)
+    ...         else: return UnificationFailure
+    ...     def __repr__(self):
+    ...         return '(%s<x<%s)' % (self.low, self.high)
+    ...     def __eq__(self, other):
+    ...         if not isinstance(other, Range):
+    ...             return False
+    ...         return (self.low == other.low) and (self.high == other.high)
+    ...     def __lt__(self, other):
+    ...         if not isinstance(other, Range):
+    ...             return True
+    ...         return (self.low, self.high) < (other.low, other.high)
+
+    >>> fs1 = FeatStruct(x=Range(5,8), y=FeatStruct(z=Range(7,22)))
+    >>> print(fs1.unify(FeatStruct(x=Range(6, 22))))
+    [ x = (6<x<8)          ]
+    [                      ]
+    [ y = [ z = (7<x<22) ] ]
+    >>> print(fs1.unify(FeatStruct(x=Range(9, 12))))
+    None
+    >>> print(fs1.unify(FeatStruct(x=12)))
+    None
+    >>> print(fs1.unify(FeatStruct('[x=?x, y=[z=?x]]')))
+    [ x = (7<x<8)         ]
+    [                     ]
+    [ y = [ z = (7<x<8) ] ]
+
+Regression Tests
+~~~~~~~~~~~~~~~~
+
+Dictionary access methods (non-mutating)
+----------------------------------------
+
+    >>> fs1 = FeatStruct(a=1, b=2, c=3)
+    >>> fs2 = FeatStruct(x=fs1, y='x')
+
+Feature structures support all dictionary methods (excluding the class
+method `dict.fromkeys()`).  Non-mutating methods:
+
+    >>> sorted(fs2.keys())                               # keys()
+    ['x', 'y']
+    >>> sorted(fs2.values())                             # values()
+    [[a=1, b=2, c=3], 'x']
+    >>> sorted(fs2.items())                              # items()
+    [('x', [a=1, b=2, c=3]), ('y', 'x')]
+    >>> sorted(fs2)                                      # __iter__()
+    ['x', 'y']
+    >>> 'a' in fs2, 'x' in fs2                           # __contains__()
+    (False, True)
+    >>> fs2.has_key('a'), fs2.has_key('x')               # has_key()
+    (False, True)
+    >>> fs2['x'], fs2['y']                               # __getitem__()
+    ([a=1, b=2, c=3], 'x')
+    >>> fs2['a']                                         # __getitem__()
+    Traceback (most recent call last):
+      . . .
+    KeyError: 'a'
+    >>> fs2.get('x'), fs2.get('y'), fs2.get('a')         # get()
+    ([a=1, b=2, c=3], 'x', None)
+    >>> fs2.get('x', 'hello'), fs2.get('a', 'hello')     # get()
+    ([a=1, b=2, c=3], 'hello')
+    >>> len(fs1), len(fs2)                               # __len__
+    (3, 2)
+    >>> fs2.copy()                                       # copy()
+    [x=[a=1, b=2, c=3], y='x']
+    >>> fs2.copy() is fs2                                # copy()
+    False
+
+Note: by default, `FeatStruct.copy()` does a deep copy.  Use
+`FeatStruct.copy(deep=False)` for a shallow copy.
+
+..
+    >>> del fs1, fs2 # clean-up.
+
+Dictionary access methods (mutating)
+------------------------------------
+    >>> fs1 = FeatStruct(a=1, b=2, c=3)
+    >>> fs2 = FeatStruct(x=fs1, y='x')
+
+Setting features (`__setitem__()`)
+
+    >>> fs1['c'] = 5
+    >>> fs1
+    [a=1, b=2, c=5]
+    >>> fs1['x'] = 12
+    >>> fs1
+    [a=1, b=2, c=5, x=12]
+    >>> fs2['x', 'a'] = 2
+    >>> fs2
+    [x=[a=2, b=2, c=5, x=12], y='x']
+    >>> fs1
+    [a=2, b=2, c=5, x=12]
+
+Deleting features (`__delitem__()`)
+
+    >>> del fs1['x']
+    >>> fs1
+    [a=2, b=2, c=5]
+    >>> del fs2['x', 'a']
+    >>> fs1
+    [b=2, c=5]
+
+`setdefault()`:
+
+    >>> fs1.setdefault('b', 99)
+    2
+    >>> fs1
+    [b=2, c=5]
+    >>> fs1.setdefault('x', 99)
+    99
+    >>> fs1
+    [b=2, c=5, x=99]
+
+`update()`:
+
+    >>> fs2.update({'a':'A', 'b':'B'}, c='C')
+    >>> fs2
+    [a='A', b='B', c='C', x=[b=2, c=5, x=99], y='x']
+
+`pop()`:
+
+    >>> fs2.pop('a')
+    'A'
+    >>> fs2
+    [b='B', c='C', x=[b=2, c=5, x=99], y='x']
+    >>> fs2.pop('a')
+    Traceback (most recent call last):
+      . . .
+    KeyError: 'a'
+    >>> fs2.pop('a', 'foo')
+    'foo'
+    >>> fs2
+    [b='B', c='C', x=[b=2, c=5, x=99], y='x']
+
+`clear()`:
+
+    >>> fs1.clear()
+    >>> fs1
+    []
+    >>> fs2
+    [b='B', c='C', x=[], y='x']
+
+`popitem()`:
+
+    >>> sorted([fs2.popitem() for i in range(len(fs2))])
+    [('b', 'B'), ('c', 'C'), ('x', []), ('y', 'x')]
+    >>> fs2
+    []
+
+Once a feature structure has been frozen, it may not be mutated.
+
+    >>> fs1 = FeatStruct('[x=1, y=2, z=[a=3]]')
+    >>> fs1.freeze()
+    >>> fs1.frozen()
+    True
+    >>> fs1['z'].frozen()
+    True
+
+    >>> fs1['x'] = 5
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+    >>> del fs1['x']
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+    >>> fs1.clear()
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+    >>> fs1.pop('x')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+    >>> fs1.popitem()
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+    >>> fs1.setdefault('x')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+    >>> fs1.update(z=22)
+    Traceback (most recent call last):
+      . . .
+    ValueError: Frozen FeatStructs may not be modified.
+
+..
+    >>> del fs1, fs2 # clean-up.
+
+Feature Paths
+-------------
+Make sure that __getitem__ with feature paths works as intended:
+
+    >>> fs1 = FeatStruct(a=1, b=2,
+    ...                 c=FeatStruct(
+    ...                     d=FeatStruct(e=12),
+    ...                     f=FeatStruct(g=55, h='hello')))
+    >>> fs1[()]
+    [a=1, b=2, c=[d=[e=12], f=[g=55, h='hello']]]
+    >>> fs1['a'], fs1[('a',)]
+    (1, 1)
+    >>> fs1['c','d','e']
+    12
+    >>> fs1['c','f','g']
+    55
+
+Feature paths that select unknown features raise KeyError:
+
+    >>> fs1['c', 'f', 'e']
+    Traceback (most recent call last):
+      . . .
+    KeyError: ('c', 'f', 'e')
+    >>> fs1['q', 'p']
+    Traceback (most recent call last):
+      . . .
+    KeyError: ('q', 'p')
+
+Feature paths that try to go 'through' a feature that's not a feature
+structure raise KeyError:
+
+    >>> fs1['a', 'b']
+    Traceback (most recent call last):
+      . . .
+    KeyError: ('a', 'b')
+
+Feature paths can go through reentrant structures:
+
+    >>> fs2 = FeatStruct('(1)[a=[b=[c->(1), d=5], e=11]]')
+    >>> fs2['a', 'b', 'c', 'a', 'e']
+    11
+    >>> fs2['a', 'b', 'c', 'a', 'b', 'd']
+    5
+    >>> fs2[tuple('abcabcabcabcabcabcabcabcabcabca')]
+    (1)[b=[c=[a->(1)], d=5], e=11]
+
+Indexing requires strings, `Feature`\s, or tuples; other types raise a
+TypeError:
+
+    >>> fs2[12]
+    Traceback (most recent call last):
+      . . .
+    TypeError: Expected feature name or path.  Got 12.
+    >>> fs2[list('abc')]
+    Traceback (most recent call last):
+      . . .
+    TypeError: Expected feature name or path.  Got ['a', 'b', 'c'].
+
+Feature paths can also be used with `get()`, `has_key()`, and
+`__contains__()`.
+
+    >>> fpath1 = tuple('abcabc')
+    >>> fpath2 = tuple('abcabz')
+    >>> fs2.get(fpath1), fs2.get(fpath2)
+    ((1)[a=[b=[c->(1), d=5], e=11]], None)
+    >>> fpath1 in fs2, fpath2 in fs2
+    (True, False)
+    >>> fs2.has_key(fpath1), fs2.has_key(fpath2)
+    (True, False)
+
+..
+    >>> del fs1, fs2 # clean-up
+
+Reading Feature Structures
+--------------------------
+
+Empty feature struct:
+
+    >>> FeatStruct('[]')
+    []
+
+Test features with integer values:
+
+    >>> FeatStruct('[a=12, b=-33, c=0]')
+    [a=12, b=-33, c=0]
+
+Test features with string values.  Either single or double quotes may
+be used.  Strings are evaluated just like python strings -- in
+particular, you can use escape sequences and 'u' and 'r' prefixes, and
+triple-quoted strings.
+
+    >>> FeatStruct('[a="", b="hello", c="\'", d=\'\', e=\'"\']')
+    [a='', b='hello', c="'", d='', e='"']
+    >>> FeatStruct(r'[a="\\", b="\"", c="\x6f\\y", d="12"]')
+    [a='\\', b='"', c='o\\y', d='12']
+    >>> FeatStruct(r'[b=r"a\b\c"]')
+    [b='a\\b\\c']
+    >>> FeatStruct('[x="""a"""]')
+    [x='a']
+
+Test parsing of reentrant feature structures.
+
+    >>> FeatStruct('[a=(1)[], b->(1)]')
+    [a=(1)[], b->(1)]
+    >>> FeatStruct('[a=(1)[x=1, y=2], b->(1)]')
+    [a=(1)[x=1, y=2], b->(1)]
+
+Test parsing of cyclic feature structures.
+
+    >>> FeatStruct('[a=(1)[b->(1)]]')
+    [a=(1)[b->(1)]]
+    >>> FeatStruct('(1)[a=[b=[c->(1)]]]')
+    (1)[a=[b=[c->(1)]]]
+
+Strings of the form "+name" and "-name" may be used to specify boolean
+values.
+
+    >>> FeatStruct('[-bar, +baz, +foo]')
+    [-bar, +baz, +foo]
+
+None, True, and False are recognized as values:
+
+    >>> FeatStruct('[bar=True, baz=False, foo=None]')
+    [+bar, -baz, foo=None]
+
+Special features:
+
+    >>> FeatStruct('NP/VP')
+    NP[]/VP[]
+    >>> FeatStruct('?x/?x')
+    ?x[]/?x[]
+    >>> print(FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'))
+    [ *type*  = 'VP'              ]
+    [                             ]
+    [           [ *type* = 'NP' ] ]
+    [ *slash* = [ agr    = ?x   ] ]
+    [           [ pl     = True ] ]
+    [                             ]
+    [ agr     = ?x                ]
+    [ fin     = True              ]
+    [ tense   = 'past'            ]
+
+Here the slash feature gets coerced:
+    >>> FeatStruct('[*slash*=a, x=b, *type*="NP"]')
+    NP[x='b']/a[]
+
+    >>> FeatStruct('NP[sem=<bob>]/NP')
+    NP[sem=<bob>]/NP[]
+    >>> FeatStruct('S[sem=<walk(bob)>]')
+    S[sem=<walk(bob)>]
+    >>> print(FeatStruct('NP[sem=<bob>]/NP'))
+    [ *type*  = 'NP'              ]
+    [                             ]
+    [ *slash* = [ *type* = 'NP' ] ]
+    [                             ]
+    [ sem     = <bob>             ]
+
+Playing with ranges:
+
+    >>> from nltk.featstruct import RangeFeature, FeatStructReader
+    >>> width = RangeFeature('width')
+    >>> reader = FeatStructReader([width])
+    >>> fs1 = reader.fromstring('[*width*=-5:12]')
+    >>> fs2 = reader.fromstring('[*width*=2:123]')
+    >>> fs3 = reader.fromstring('[*width*=-7:-2]')
+    >>> fs1.unify(fs2)
+    [*width*=(2, 12)]
+    >>> fs1.unify(fs3)
+    [*width*=(-5, -2)]
+    >>> print(fs2.unify(fs3)) # no overlap in width.
+    None
+
+The slash feature has a default value of 'False':
+
+    >>> print(FeatStruct('NP[]/VP').unify(FeatStruct('NP[]'), trace=1))
+    <BLANKLINE>
+    Unification trace:
+       / NP[]/VP[]
+      |\ NP[]
+      |
+      | Unify feature: *type*
+      |    / 'NP'
+      |   |\ 'NP'
+      |   |
+      |   +-->'NP'
+      |
+      | Unify feature: *slash*
+      |    / VP[]
+      |   |\ False
+      |   |
+      X   X <-- FAIL
+    None
+
+The demo structures from category.py.  They all parse, but they don't
+do quite the right thing, -- ?x vs x.
+
+    >>> FeatStruct(pos='n', agr=FeatStruct(number='pl', gender='f'))
+    [agr=[gender='f', number='pl'], pos='n']
+    >>> FeatStruct(r'NP[sem=<bob>]/NP')
+    NP[sem=<bob>]/NP[]
+    >>> FeatStruct(r'S[sem=<app(?x, ?y)>]')
+    S[sem=<?x(?y)>]
+    >>> FeatStruct('?x/?x')
+    ?x[]/?x[]
+    >>> FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')
+    VP[agr=?x, +fin, tense='past']/NP[agr=?x, +pl]
+    >>> FeatStruct('S[sem = <app(?subj, ?vp)>]')
+    S[sem=<?subj(?vp)>]
+
+    >>> FeatStruct('S')
+    S[]
+
+The parser also includes support for reading sets and tuples.
+
+    >>> FeatStruct('[x={1,2,2,2}, y={/}]')
+    [x={1, 2}, y={/}]
+    >>> FeatStruct('[x=(1,2,2,2), y=()]')
+    [x=(1, 2, 2, 2), y=()]
+    >>> print(FeatStruct('[x=(1,[z=(1,2,?x)],?z,{/})]'))
+    [ x = (1, [ z = (1, 2, ?x) ], ?z, {/}) ]
+
+Note that we can't put a featstruct inside a tuple, because doing so
+would hash it, and it's not frozen yet:
+
+    >>> print(FeatStruct('[x={[]}]'))
+    Traceback (most recent call last):
+      . . .
+    TypeError: FeatStructs must be frozen before they can be hashed.
+
+There's a special syntax for taking the union of sets: "{...+...}".
+The elements should only be variables or sets.
+
+    >>> FeatStruct('[x={?a+?b+{1,2,3}}]')
+    [x={?a+?b+{1, 2, 3}}]
+
+There's a special syntax for taking the concatenation of tuples:
+"(...+...)".  The elements should only be variables or tuples.
+
+    >>> FeatStruct('[x=(?a+?b+(1,2,3))]')
+    [x=(?a+?b+(1, 2, 3))]
+
+Parsing gives helpful messages if your string contains an error.
+
+    >>> FeatStruct('[a=, b=5]]')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+        [a=, b=5]]
+           ^ Expected value
+    >>> FeatStruct('[a=12 22, b=33]')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+        [a=12 22, b=33]
+             ^ Expected comma
+    >>> FeatStruct('[a=5] [b=6]')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+        [a=5] [b=6]
+              ^ Expected end of string
+    >>> FeatStruct(' *++*')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+        *++*
+        ^ Expected open bracket or identifier
+    >>> FeatStruct('[x->(1)]')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+        [x->(1)]
+            ^ Expected bound identifier
+    >>> FeatStruct('[x->y]')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+        [x->y]
+            ^ Expected identifier
+    >>> FeatStruct('')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Error parsing feature structure
+    <BLANKLINE>
+        ^ Expected open bracket or identifier
+
+
+Unification
+-----------
+Very simple unifications give the expected results:
+
+    >>> FeatStruct().unify(FeatStruct())
+    []
+    >>> FeatStruct(number='singular').unify(FeatStruct())
+    [number='singular']
+    >>> FeatStruct().unify(FeatStruct(number='singular'))
+    [number='singular']
+    >>> FeatStruct(number='singular').unify(FeatStruct(person=3))
+    [number='singular', person=3]
+
+Merging nested structures:
+
+    >>> fs1 = FeatStruct('[A=[B=b]]')
+    >>> fs2 = FeatStruct('[A=[C=c]]')
+    >>> fs1.unify(fs2)
+    [A=[B='b', C='c']]
+    >>> fs2.unify(fs1)
+    [A=[B='b', C='c']]
+
+A basic case of reentrant unification
+
+    >>> fs4 = FeatStruct('[A=(1)[B=b], E=[F->(1)]]')
+    >>> fs5 = FeatStruct("[A=[C='c'], E=[F=[D='d']]]")
+    >>> fs4.unify(fs5)
+    [A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
+    >>> fs5.unify(fs4)
+    [A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
+
+More than 2 paths to a value
+
+    >>> fs1 = FeatStruct("[a=[],b=[],c=[],d=[]]")
+    >>> fs2 = FeatStruct('[a=(1)[], b->(1), c->(1), d->(1)]')
+    >>> fs1.unify(fs2)
+    [a=(1)[], b->(1), c->(1), d->(1)]
+
+fs1[a] gets unified with itself
+
+    >>> fs1 = FeatStruct('[x=(1)[], y->(1)]')
+    >>> fs2 = FeatStruct('[x=(1)[], y->(1)]')
+    >>> fs1.unify(fs2)
+    [x=(1)[], y->(1)]
+
+Bound variables should get forwarded appropriately
+
+    >>> fs1 = FeatStruct('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]')
+    >>> fs2 = FeatStruct('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]')
+    >>> fs1.unify(fs2)
+    [A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
+    >>> fs2.unify(fs1)
+    [A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
+
+Cyclic structure created by unification.
+
+    >>> fs1 = FeatStruct('[F=(1)[], G->(1)]')
+    >>> fs2 = FeatStruct('[F=[H=(2)[]], G->(2)]')
+    >>> fs3 = fs1.unify(fs2)
+    >>> fs3
+    [F=(1)[H->(1)], G->(1)]
+    >>> fs3['F'] is fs3['G']
+    True
+    >>> fs3['F'] is fs3['G']['H']
+    True
+    >>> fs3['F'] is fs3['G']['H']['H']
+    True
+    >>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
+    True
+
+Cyclic structure created w/ variables.
+
+    >>> fs1 = FeatStruct('[F=[H=?x]]')
+    >>> fs2 = FeatStruct('[F=?x]')
+    >>> fs3 = fs1.unify(fs2, rename_vars=False)
+    >>> fs3
+    [F=(1)[H->(1)]]
+    >>> fs3['F'] is fs3['F']['H']
+    True
+    >>> fs3['F'] is fs3['F']['H']['H']
+    True
+    >>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
+    True
+
+Unifying w/ a cyclic feature structure.
+
+    >>> fs4 = FeatStruct('[F=[H=[H=[H=(1)[]]]], K->(1)]')
+    >>> fs3.unify(fs4)
+    [F=(1)[H->(1)], K->(1)]
+    >>> fs4.unify(fs3)
+    [F=(1)[H->(1)], K->(1)]
+
+Variable bindings should preserve reentrance.
+
+    >>> bindings = {}
+    >>> fs1 = FeatStruct("[a=?x]")
+    >>> fs2 = fs1.unify(FeatStruct("[a=[]]"), bindings)
+    >>> fs2['a'] is bindings[Variable('?x')]
+    True
+    >>> fs2.unify(FeatStruct("[b=?x]"), bindings)
+    [a=(1)[], b->(1)]
+
+Aliased variable tests
+
+    >>> fs1 = FeatStruct("[a=?x, b=?x]")
+    >>> fs2 = FeatStruct("[b=?y, c=?y]")
+    >>> bindings = {}
+    >>> fs3 = fs1.unify(fs2, bindings)
+    >>> fs3
+    [a=?x, b=?x, c=?x]
+    >>> bindings
+    {Variable('?y'): Variable('?x')}
+    >>> fs3.unify(FeatStruct("[a=1]"))
+    [a=1, b=1, c=1]
+
+If we keep track of the bindings, then we can use the same variable
+over multiple calls to unify.
+
+    >>> bindings = {}
+    >>> fs1 = FeatStruct('[a=?x]')
+    >>> fs2 = fs1.unify(FeatStruct('[a=[]]'), bindings)
+    >>> fs2.unify(FeatStruct('[b=?x]'), bindings)
+    [a=(1)[], b->(1)]
+    >>> bindings
+    {Variable('?x'): []}
+
+..
+    >>> del fs1, fs2, fs3, fs4, fs5 # clean-up
+
+Unification Bindings
+--------------------
+
+    >>> bindings = {}
+    >>> fs1 = FeatStruct('[a=?x]')
+    >>> fs2 = FeatStruct('[a=12]')
+    >>> fs3 = FeatStruct('[b=?x]')
+    >>> fs1.unify(fs2, bindings)
+    [a=12]
+    >>> bindings
+    {Variable('?x'): 12}
+    >>> fs3.substitute_bindings(bindings)
+    [b=12]
+    >>> fs3 # substitute_bindings didn't mutate fs3.
+    [b=?x]
+    >>> fs2.unify(fs3, bindings)
+    [a=12, b=12]
+
+    >>> bindings = {}
+    >>> fs1 = FeatStruct('[a=?x, b=1]')
+    >>> fs2 = FeatStruct('[a=5, b=?x]')
+    >>> fs1.unify(fs2, bindings)
+    [a=5, b=1]
+    >>> sorted(bindings.items())
+    [(Variable('?x'), 5), (Variable('?x2'), 1)]
+
+..
+    >>> del fs1, fs2, fs3 # clean-up
+
+Expressions
+-----------
+
+    >>> e = Expression.fromstring('\\P y.P(z,y)')
+    >>> fs1 = FeatStruct(x=e, y=Variable('z'))
+    >>> fs2 = FeatStruct(y=VariableExpression(Variable('John')))
+    >>> fs1.unify(fs2)
+    [x=<\P y.P(John,y)>, y=<John>]
+
+Remove Variables
+----------------
+
+    >>> FeatStruct('[a=?x, b=12, c=[d=?y]]').remove_variables()
+    [b=12, c=[]]
+    >>> FeatStruct('(1)[a=[b=?x,c->(1)]]').remove_variables()
+    (1)[a=[c->(1)]]
+
+Equality & Hashing
+------------------
+The `equal_values` method checks whether two feature structures assign
+the same value to every feature.  If the optional argument
+``check_reentrances`` is supplied, then it also returns false if there
+is any difference in the reentrances.
+
+    >>> a = FeatStruct('(1)[x->(1)]')
+    >>> b = FeatStruct('(1)[x->(1)]')
+    >>> c = FeatStruct('(1)[x=[x->(1)]]')
+    >>> d = FeatStruct('[x=(1)[x->(1)]]')
+    >>> e = FeatStruct('(1)[x=[x->(1), y=1], y=1]')
+    >>> def compare(x,y):
+    ...     assert x.equal_values(y, True) == y.equal_values(x, True)
+    ...     assert x.equal_values(y, False) == y.equal_values(x, False)
+    ...     if x.equal_values(y, True):
+    ...         assert x.equal_values(y, False)
+    ...         print('equal values, same reentrance')
+    ...     elif x.equal_values(y, False):
+    ...         print('equal values, different reentrance')
+    ...     else:
+    ...         print('different values')
+
+    >>> compare(a, a)
+    equal values, same reentrance
+    >>> compare(a, b)
+    equal values, same reentrance
+    >>> compare(a, c)
+    equal values, different reentrance
+    >>> compare(a, d)
+    equal values, different reentrance
+    >>> compare(c, d)
+    equal values, different reentrance
+    >>> compare(a, e)
+    different values
+    >>> compare(c, e)
+    different values
+    >>> compare(d, e)
+    different values
+    >>> compare(e, e)
+    equal values, same reentrance
+
+Feature structures may not be hashed until they are frozen:
+
+    >>> hash(a)
+    Traceback (most recent call last):
+      . . .
+    TypeError: FeatStructs must be frozen before they can be hashed.
+    >>> a.freeze()
+    >>> v = hash(a)
+
+Feature structures define hash consistently.  The following example
+looks at the hash value for each (fs1,fs2) pair; if their hash values
+are not equal, then they must not be equal.  If their hash values are
+equal, then display a message, and indicate whether their values are
+indeed equal.  Note that c and d currently have the same hash value,
+even though they are not equal.  That is not a bug, strictly speaking,
+but it wouldn't be a bad thing if it changed.
+
+    >>> for fstruct in (a, b, c, d, e):
+    ...     fstruct.freeze()
+    >>> for fs1_name in 'abcde':
+    ...     for fs2_name in 'abcde':
+    ...         fs1 = locals()[fs1_name]
+    ...         fs2 = locals()[fs2_name]
+    ...         if hash(fs1) != hash(fs2):
+    ...             assert fs1 != fs2
+    ...         else:
+    ...             print('%s and %s have the same hash value,' %
+    ...                    (fs1_name, fs2_name))
+    ...             if fs1 == fs2: print('and are equal')
+    ...             else: print('and are not equal')
+    a and a have the same hash value, and are equal
+    a and b have the same hash value, and are equal
+    b and a have the same hash value, and are equal
+    b and b have the same hash value, and are equal
+    c and c have the same hash value, and are equal
+    c and d have the same hash value, and are not equal
+    d and c have the same hash value, and are not equal
+    d and d have the same hash value, and are equal
+    e and e have the same hash value, and are equal
+
+..
+    >>> del a, b, c, d, e, v # clean-up
+
+Tracing
+-------
+
+    >>> fs1 = FeatStruct('[a=[b=(1)[], c=?x], d->(1), e=[f=?x]]')
+    >>> fs2 = FeatStruct('[a=(1)[c="C"], e=[g->(1)]]')
+    >>> fs1.unify(fs2, trace=True)
+    <BLANKLINE>
+    Unification trace:
+       / [a=[b=(1)[], c=?x], d->(1), e=[f=?x]]
+      |\ [a=(1)[c='C'], e=[g->(1)]]
+      |
+      | Unify feature: a
+      |    / [b=[], c=?x]
+      |   |\ [c='C']
+      |   |
+      |   | Unify feature: a.c
+      |   |    / ?x
+      |   |   |\ 'C'
+      |   |   |
+      |   |   +-->Variable('?x')
+      |   |
+      |   +-->[b=[], c=?x]
+      |       Bindings: {?x: 'C'}
+      |
+      | Unify feature: e
+      |    / [f=?x]
+      |   |\ [g=[c='C']]
+      |   |
+      |   +-->[f=?x, g=[b=[], c=?x]]
+      |       Bindings: {?x: 'C'}
+      |
+      +-->[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
+          Bindings: {?x: 'C'}
+    [a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
+    >>>
+    >>> fs1 = FeatStruct('[a=?x, b=?z, c=?z]')
+    >>> fs2 = FeatStruct('[a=?y, b=?y, c=?q]')
+    >>> #fs1.unify(fs2, trace=True)
+    >>>
+
+..
+    >>> del fs1, fs2 # clean-up
+
+Unification on Dicts & Lists
+----------------------------
+It's possible to do unification on dictionaries:
+
+    >>> from nltk.featstruct import unify
+    >>> pprint(unify(dict(x=1, y=dict(z=2)), dict(x=1, q=5)), width=1)
+    {'q': 5, 'x': 1, 'y': {'z': 2}}
+
+It's possible to do unification on lists as well:
+
+    >>> unify([1, 2, 3], [1, Variable('x'), 3])
+    [1, 2, 3]
+
+Mixing dicts and lists is fine:
+
+    >>> pprint(unify([dict(x=1, y=dict(z=2)),3], [dict(x=1, q=5),3]),
+    ...               width=1)
+    [{'q': 5, 'x': 1, 'y': {'z': 2}}, 3]
+
+Mixing dicts and FeatStructs is discouraged:
+
+    >>> unify(dict(x=1), FeatStruct(x=1))
+    Traceback (most recent call last):
+      . . .
+    ValueError: Mixing FeatStruct objects with Python dicts and lists is not supported.
+
+But you can do it if you really want, by explicitly stating that both
+dictionaries and FeatStructs should be treated as feature structures:
+
+    >>> unify(dict(x=1), FeatStruct(x=1), fs_class=(dict, FeatStruct))
+    {'x': 1}
+
+Finding Conflicts
+-----------------
+
+    >>> from nltk.featstruct import conflicts
+    >>> fs1 = FeatStruct('[a=[b=(1)[c=2], d->(1), e=[f->(1)]]]')
+    >>> fs2 = FeatStruct('[a=[b=[c=[x=5]], d=[c=2], e=[f=[c=3]]]]')
+    >>> for path in conflicts(fs1, fs2):
+    ...     print('%-8s: %r vs %r' % ('.'.join(path), fs1[path], fs2[path]))
+    a.b.c   : 2 vs [x=5]
+    a.e.f.c : 2 vs 3
+
+..
+    >>> del fs1, fs2 # clean-up
+
+Retracting Bindings
+-------------------
+
+    >>> from nltk.featstruct import retract_bindings
+    >>> bindings = {}
+    >>> fs1 = FeatStruct('[a=?x, b=[c=?y]]')
+    >>> fs2 = FeatStruct('[a=(1)[c=[d=1]], b->(1)]')
+    >>> fs3 = fs1.unify(fs2, bindings)
+    >>> print(fs3)
+    [ a = (1) [ c = [ d = 1 ] ] ]
+    [                           ]
+    [ b -> (1)                  ]
+    >>> pprint(bindings)
+    {Variable('?x'): [c=[d=1]], Variable('?y'): [d=1]}
+    >>> retract_bindings(fs3, bindings)
+    [a=?x, b=?x]
+    >>> pprint(bindings)
+    {Variable('?x'): [c=?y], Variable('?y'): [d=1]}
+
+Squashed Bugs
+~~~~~~~~~~~~~
+In svn rev 5167, unifying two feature structures that used the same
+variable would cause those variables to become aliased in the output.
+
+    >>> fs1 = FeatStruct('[a=?x]')
+    >>> fs2 = FeatStruct('[b=?x]')
+    >>> fs1.unify(fs2)
+    [a=?x, b=?x2]
+
+There was a bug in svn revision 5172 that caused `rename_variables` to
+rename variables to names that are already used.
+
+    >>> FeatStruct('[a=?x, b=?x2]').rename_variables(
+    ...     vars=[Variable('?x')])
+    [a=?x3, b=?x2]
+    >>> fs1 = FeatStruct('[a=?x]')
+    >>> fs2 = FeatStruct('[a=?x, b=?x2]')
+    >>> fs1.unify(fs2)
+    [a=?x, b=?x2]
+
+There was a bug in svn rev 5167 that caused us to get the following
+example wrong.  Basically the problem was that we only followed
+'forward' pointers for other, not self, when unifying two feature
+structures.  (nb: this test assumes that features are unified in
+alphabetical order -- if they are not, it might pass even if the bug
+is present.)
+
+    >>> fs1 = FeatStruct('[a=[x=1], b=?x, c=?x]')
+    >>> fs2 = FeatStruct('[a=(1)[], b->(1), c=[x=2]]')
+    >>> print(fs1.unify(fs2))
+    None
+
+..
+    >>> del fs1, fs2 # clean-up
diff --git a/nlp_resource_data/nltk/test/framenet.doctest b/nlp_resource_data/nltk/test/framenet.doctest

new file mode 100644 (file)

index 0000000..d1ecc80
--- /dev/null
+++ b/nlp_resource_data/nltk/test/framenet.doctest
@@ -0,0 +1,288 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+========
+FrameNet
+========
+
+The FrameNet corpus is a lexical database of English that is both human-
+and machine-readable, based on annotating examples of how words are used
+in actual texts. FrameNet is based on a theory of meaning called Frame
+Semantics, deriving from the work of Charles J. Fillmore and colleagues.
+The basic idea is straightforward: that the meanings of most words can
+best be understood on the basis of a semantic frame: a description of a
+type of event, relation, or entity and the participants in it. For
+example, the concept of cooking typically involves a person doing the
+cooking (Cook), the food that is to be cooked (Food), something to hold
+the food while cooking (Container) and a source of heat
+(Heating_instrument). In the FrameNet project, this is represented as a
+frame called Apply_heat, and the Cook, Food, Heating_instrument and
+Container are called frame elements (FEs). Words that evoke this frame,
+such as fry, bake, boil, and broil, are called lexical units (LUs) of
+the Apply_heat frame. The job of FrameNet is to define the frames
+and to annotate sentences to show how the FEs fit syntactically around
+the word that evokes the frame.
+
+------
+Frames
+------
+
+A Frame is a script-like conceptual structure that describes a
+particular type of situation, object, or event along with the
+participants and props that are needed for that Frame. For
+example, the "Apply_heat" frame describes a common situation
+involving a Cook, some Food, and a Heating_Instrument, and is
+evoked by words such as bake, blanch, boil, broil, brown,
+simmer, steam, etc.
+
+We call the roles of a Frame "frame elements" (FEs) and the
+frame-evoking words are called "lexical units" (LUs).
+
+FrameNet includes relations between Frames. Several types of
+relations are defined, of which the most important are:
+
+- Inheritance: An IS-A relation. The child frame is a subtype
+  of the parent frame, and each FE in the parent is bound to
+  a corresponding FE in the child. An example is the
+  "Revenge" frame which inherits from the
+  "Rewards_and_punishments" frame.
+
+- Using: The child frame presupposes the parent frame as
+  background, e.g the "Speed" frame "uses" (or presupposes)
+  the "Motion" frame; however, not all parent FEs need to be
+  bound to child FEs.
+
+- Subframe: The child frame is a subevent of a complex event
+  represented by the parent, e.g. the "Criminal_process" frame
+  has subframes of "Arrest", "Arraignment", "Trial", and
+  "Sentencing".
+
+- Perspective_on: The child frame provides a particular
+  perspective on an un-perspectivized parent frame. A pair of
+  examples consists of the "Hiring" and "Get_a_job" frames,
+  which perspectivize the "Employment_start" frame from the
+  Employer's and the Employee's point of view, respectively.
+
+To get a list of all of the Frames in FrameNet, you can use the
+`frames()` function. If you supply a regular expression pattern to the
+`frames()` function, you will get a list of all Frames whose names match
+that pattern:
+
+    >>> from pprint import pprint
+    >>> from operator import itemgetter
+    >>> from nltk.corpus import framenet as fn
+    >>> from nltk.corpus.reader.framenet import PrettyList
+    >>> x = fn.frames(r'(?i)crim')
+    >>> x.sort(key=itemgetter('ID'))
+    >>> x
+    [<frame ID=200 name=Criminal_process>, <frame ID=500 name=Criminal_investigation>, ...]
+    >>> PrettyList(sorted(x, key=itemgetter('ID')))
+    [<frame ID=200 name=Criminal_process>, <frame ID=500 name=Criminal_investigation>, ...]
+
+To get the details of a particular Frame, you can use the `frame()`
+function passing in the frame number:
+
+    >>> from pprint import pprint
+    >>> from nltk.corpus import framenet as fn
+    >>> f = fn.frame(202)
+    >>> f.ID
+    202
+    >>> f.name
+    'Arrest'
+    >>> f.definition # doctest: +ELLIPSIS
+    "Authorities charge a Suspect, who is under suspicion of having committed a crime..."
+    >>> len(f.lexUnit)
+    11
+    >>> pprint(sorted([x for x in f.FE]))
+    ['Authorities',
+     'Charges',
+     'Co-participant',
+     'Manner',
+     'Means',
+     'Offense',
+     'Place',
+     'Purpose',
+     'Source_of_legal_authority',
+     'Suspect',
+     'Time',
+     'Type']
+    >>> pprint(f.frameRelations)
+    [<Parent=Intentionally_affect -- Inheritance -> Child=Arrest>, <Complex=Criminal_process -- Subframe -> Component=Arrest>, ...]
+
+The `frame()` function shown above returns a dict object containing
+detailed information about the Frame. See the documentation on the
+`frame()` function for the specifics.
+
+You can also search for Frames by their Lexical Units (LUs). The
+`frames_by_lemma()` function returns a list of all frames that contain
+LUs in which the 'name' attribute of the LU matchs the given regular
+expression. Note that LU names are composed of "lemma.POS", where the
+"lemma" part can be made up of either a single lexeme (e.g. 'run') or
+multiple lexemes (e.g. 'a little') (see below).
+
+    >>> PrettyList(sorted(fn.frames_by_lemma(r'(?i)a little'), key=itemgetter('ID'))) # doctest: +ELLIPSIS
+    [<frame ID=189 name=Quanti...>, <frame ID=2001 name=Degree>]
+
+-------------
+Lexical Units
+-------------
+
+A lexical unit (LU) is a pairing of a word with a meaning. For
+example, the "Apply_heat" Frame describes a common situation
+involving a Cook, some Food, and a Heating Instrument, and is
+_evoked_ by words such as bake, blanch, boil, broil, brown,
+simmer, steam, etc. These frame-evoking words are the LUs in the
+Apply_heat frame. Each sense of a polysemous word is a different
+LU.
+
+We have used the word "word" in talking about LUs. The reality
+is actually rather complex. When we say that the word "bake" is
+polysemous, we mean that the lemma "bake.v" (which has the
+word-forms "bake", "bakes", "baked", and "baking") is linked to
+three different frames:
+
+- Apply_heat: "Michelle baked the potatoes for 45 minutes."
+
+- Cooking_creation: "Michelle baked her mother a cake for her birthday."
+
+- Absorb_heat: "The potatoes have to bake for more than 30 minutes."
+
+These constitute three different LUs, with different
+definitions.
+
+Multiword expressions such as "given name" and hyphenated words
+like "shut-eye" can also be LUs. Idiomatic phrases such as
+"middle of nowhere" and "give the slip (to)" are also defined as
+LUs in the appropriate frames ("Isolated_places" and "Evading",
+respectively), and their internal structure is not analyzed.
+
+Framenet provides multiple annotated examples of each sense of a
+word (i.e. each LU).  Moreover, the set of examples
+(approximately 20 per LU) illustrates all of the combinatorial
+possibilities of the lexical unit.
+
+Each LU is linked to a Frame, and hence to the other words which
+evoke that Frame. This makes the FrameNet database similar to a
+thesaurus, grouping together semantically similar words.
+
+In the simplest case, frame-evoking words are verbs such as
+"fried" in:
+
+   "Matilde fried the catfish in a heavy iron skillet."
+
+Sometimes event nouns may evoke a Frame. For example,
+"reduction" evokes "Cause_change_of_scalar_position" in:
+
+   "...the reduction of debt levels to $665 million from $2.6 billion."
+
+Adjectives may also evoke a Frame. For example, "asleep" may
+evoke the "Sleep" frame as in:
+
+   "They were asleep for hours."
+
+Many common nouns, such as artifacts like "hat" or "tower",
+typically serve as dependents rather than clearly evoking their
+own frames.
+
+Details for a specific lexical unit can be obtained using this class's
+`lus()` function, which takes an optional regular expression
+pattern that will be matched against the name of the lexical unit:
+
+    >>> from pprint import pprint
+    >>> PrettyList(sorted(fn.lus(r'(?i)a little'), key=itemgetter('ID')))
+    [<lu ID=14733 name=a little.n>, <lu ID=14743 name=a little.adv>, ...]
+
+You can obtain detailed information on a particular LU by calling the
+`lu()` function and passing in an LU's 'ID' number:
+
+    >>> from pprint import pprint
+    >>> from nltk.corpus import framenet as fn
+    >>> fn.lu(256).name
+    'foresee.v'
+    >>> fn.lu(256).definition
+    'COD: be aware of beforehand; predict.'
+    >>> fn.lu(256).frame.name
+    'Expectation'
+    >>> fn.lu(256).lexemes[0].name
+    'foresee'
+
+Note that LU names take the form of a dotted string (e.g. "run.v" or "a
+little.adv") in which a lemma preceeds the "." and a part of speech
+(POS) follows the dot. The lemma may be composed of a single lexeme
+(e.g. "run") or of multiple lexemes (e.g. "a little"). The list of
+POSs used in the LUs is:
+
+v    - verb
+n    - noun
+a    - adjective
+adv  - adverb
+prep - preposition
+num  - numbers
+intj - interjection
+art  - article
+c    - conjunction
+scon - subordinating conjunction
+
+For more detailed information about the info that is contained in the
+dict that is returned by the `lu()` function, see the documentation on
+the `lu()` function.
+
+-------------------
+Annotated Documents
+-------------------
+
+The FrameNet corpus contains a small set of annotated documents. A list
+of these documents can be obtained by calling the `docs()` function:
+
+    >>> from pprint import pprint
+    >>> from nltk.corpus import framenet as fn
+    >>> d = fn.docs('BellRinging')[0]
+    >>> d.corpname
+    'PropBank'
+    >>> d.sentence[49] # doctest: +ELLIPSIS
+    full-text sentence (...) in BellRinging:
+    <BLANKLINE>
+    <BLANKLINE>
+    [POS] 17 tags
+    <BLANKLINE>
+    [POS_tagset] PENN
+    <BLANKLINE>
+    [text] + [annotationSet]
+    <BLANKLINE>
+    `` I live in hopes that the ringers themselves will be drawn into
+                 *****          *******                    *****
+                 Desir          Cause_t                    Cause
+                 [1]            [3]                        [2]
+    <BLANKLINE>
+     that fuller life .
+          ******
+          Comple
+          [4]
+     (Desir=Desiring, Cause_t=Cause_to_make_noise, Cause=Cause_motion, Comple=Completeness)
+    <BLANKLINE>
+
+    >>> d.sentence[49].annotationSet[1] # doctest: +ELLIPSIS
+    annotation set (...):
+    <BLANKLINE>
+    [status] MANUAL
+    <BLANKLINE>
+    [LU] (6605) hope.n in Desiring
+    <BLANKLINE>
+    [frame] (366) Desiring
+    <BLANKLINE>
+    [GF] 2 relations
+    <BLANKLINE>
+    [PT] 2 phrases
+    <BLANKLINE>
+    [text] + [Target] + [FE] + [Noun]
+    <BLANKLINE>
+    `` I live in hopes that the ringers themselves will be drawn into
+       - ^^^^ ^^ ***** ----------------------------------------------
+       E supp su       Event
+    <BLANKLINE>
+     that fuller life .
+    -----------------
+    <BLANKLINE>
+     (E=Experiencer, su=supp)
+    <BLANKLINE>
+    <BLANKLINE>
diff --git a/nlp_resource_data/nltk/test/generate.doctest b/nlp_resource_data/nltk/test/generate.doctest

new file mode 100644 (file)

index 0000000..2c7f3d8
--- /dev/null
+++ b/nlp_resource_data/nltk/test/generate.doctest
@@ -0,0 +1,67 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===============================================
+Generating sentences from context-free grammars
+===============================================
+
+An example grammar:
+
+    >>> from nltk.parse.generate import generate, demo_grammar
+    >>> from nltk import CFG
+    >>> grammar = CFG.fromstring(demo_grammar)
+    >>> print(grammar)
+    Grammar with 13 productions (start state = S)
+        S -> NP VP
+        NP -> Det N
+        PP -> P NP
+        VP -> 'slept'
+        VP -> 'saw' NP
+        VP -> 'walked' PP
+        Det -> 'the'
+        Det -> 'a'
+        N -> 'man'
+        N -> 'park'
+        N -> 'dog'
+        P -> 'in'
+        P -> 'with'
+
+The first 10 generated sentences:
+
+    >>> for sentence in generate(grammar, n=10):
+    ...     print(' '.join(sentence))
+    the man slept
+    the man saw the man
+    the man saw the park
+    the man saw the dog
+    the man saw a man
+    the man saw a park
+    the man saw a dog
+    the man walked in the man
+    the man walked in the park
+    the man walked in the dog
+
+All sentences of max depth 4:
+
+    >>> for sentence in generate(grammar, depth=4):
+    ...     print(' '.join(sentence))
+    the man slept
+    the park slept
+    the dog slept
+    a man slept
+    a park slept
+    a dog slept
+
+The number of sentences of different max depths:
+
+    >>> len(list(generate(grammar, depth=3)))
+    0
+    >>> len(list(generate(grammar, depth=4)))
+    6
+    >>> len(list(generate(grammar, depth=5)))
+    42
+    >>> len(list(generate(grammar, depth=6)))
+    114
+    >>> len(list(generate(grammar)))
+    114
+
diff --git a/nlp_resource_data/nltk/test/gensim.doctest b/nlp_resource_data/nltk/test/gensim.doctest

new file mode 100644 (file)

index 0000000..386e3e0
--- /dev/null
+++ b/nlp_resource_data/nltk/test/gensim.doctest
@@ -0,0 +1,140 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=======================================
+Demonstrate word embedding using Gensim
+=======================================
+
+We demonstrate three functions:
+- Train the word embeddings using brown corpus;
+- Load the pre-trained model and perform simple tasks; and
+- Pruning the pre-trained binary model.
+
+    >>> import gensim
+
+---------------
+Train the model
+---------------
+
+Here we train a word embedding using the Brown Corpus:
+
+    >>> from nltk.corpus import brown
+    >>> model = gensim.models.Word2Vec(brown.sents())
+
+It might take some time to train the model. So, after it is trained, it can be saved as follows:
+
+    >>> model.save('brown.embedding')
+    >>> new_model = gensim.models.Word2Vec.load('brown.embedding')
+
+The model will be the list of words with their embedding. We can easily get the vector representation of a word.
+    >>> len(new_model['university'])
+    100
+
+There are some supporting functions already implemented in Gensim to manipulate with word embeddings.
+For example, to compute the cosine similarity between 2 words:
+
+    >>> new_model.similarity('university','school') > 0.3
+    True
+
+---------------------------
+Using the pre-trained model
+---------------------------
+
+NLTK includes a pre-trained model which is part of a model that is trained on 100 billion words from the Google News Dataset.
+The full model is from https://code.google.com/p/word2vec/ (about 3 GB).
+
+    >>> from nltk.data import find
+    >>> word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))
+    >>> model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_sample, binary=False)
+
+We pruned the model to only include the most common words (~44k words).
+
+    >>> len(model.vocab)
+    43981
+
+Each word is represented in the space of 300 dimensions:
+
+    >>> len(model['university'])
+    300
+
+Finding the top n words that are similar to a target word is simple. The result is the list of n words with the score.
+
+    >>> model.most_similar(positive=['university'], topn = 3)
+    [('universities', 0.70039...), ('faculty', 0.67809...), ('undergraduate', 0.65870...)]
+
+Finding a word that is not in a list is also supported, although, implementing this by yourself is simple.
+
+    >>> model.doesnt_match('breakfast cereal dinner lunch'.split())
+    'cereal'
+
+Mikolov et al. (2013) figured out that word embedding captures much of syntactic and semantic regularities. For example,
+the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Paris' is close to 'France'.
+
+    >>> model.most_similar(positive=['woman','king'], negative=['man'], topn = 1)
+    [('queen', 0.71181...)]
+
+    >>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1)
+    [('France', 0.78840...)]
+
+We can visualize the word embeddings using t-SNE (http://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words.
+
+|    import numpy as np
+|    labels = []
+|    count = 0
+|    max_count = 1000
+|    X = np.zeros(shape=(max_count,len(model['university'])))
+|
+|    for term in model.vocab:
+|        X[count] = model[term]
+|        labels.append(term)
+|        count+= 1
+|        if count >= max_count: break
+|
+|    # It is recommended to use PCA first to reduce to ~50 dimensions
+|    from sklearn.decomposition import PCA
+|    pca = PCA(n_components=50)
+|    X_50 = pca.fit_transform(X)
+|
+|    # Using TSNE to further reduce to 2 dimensions
+|    from sklearn.manifold import TSNE
+|    model_tsne = TSNE(n_components=2, random_state=0)
+|    Y = model_tsne.fit_transform(X_50)
+|
+|    # Show the scatter plot
+|    import matplotlib.pyplot as plt
+|    plt.scatter(Y[:,0], Y[:,1], 20)
+|
+|    # Add labels
+|    for label, x, y in zip(labels, Y[:, 0], Y[:, 1]):
+|        plt.annotate(label, xy = (x,y), xytext = (0, 0), textcoords = 'offset points', size = 10)
+|
+|    plt.show()
+
+------------------------------
+Prune the trained binary model
+------------------------------
+
+Here is the supporting code to extract part of the binary model (GoogleNews-vectors-negative300.bin.gz) from https://code.google.com/p/word2vec/
+We use this code to get the `word2vec_sample` model.
+
+|    import gensim
+|    from gensim.models.word2vec import Word2Vec
+|    # Load the binary model
+|    model = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary = True);
+|
+|    # Only output word that appear in the Brown corpus
+|    from nltk.corpus import brown
+|    words = set(brown.words())
+|    print (len(words))
+|
+|    # Output presented word to a temporary file
+|    out_file = 'pruned.word2vec.txt'
+|    f = open(out_file,'wb')
+|
+|    word_presented = words.intersection(model.vocab.keys())
+|    f.write('{} {}\n'.format(len(word_presented),len(model['word'])))
+|
+|    for word in word_presented:
+|        f.write('{} {}\n'.format(word, ' '.join(str(value) for value in model[word])))
+|
+|    f.close()
diff --git a/nlp_resource_data/nltk/test/gensim_fixt.py b/nlp_resource_data/nltk/test/gensim_fixt.py

new file mode 100644 (file)

index 0000000..2de144c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/gensim_fixt.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import gensim
+    except ImportError:
+        raise SkipTest("Gensim doctest requires gensim")
diff --git a/nlp_resource_data/nltk/test/gluesemantics.doctest b/nlp_resource_data/nltk/test/gluesemantics.doctest

new file mode 100644 (file)

index 0000000..08b96e3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/gluesemantics.doctest
@@ -0,0 +1,384 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==============================================================================
+ Glue Semantics
+==============================================================================
+
+.. include:: ../../../nltk_book/definitions.rst
+
+
+======================
+Linear logic
+======================
+
+    >>> from nltk.sem import logic
+    >>> from nltk.sem.glue import *
+    >>> from nltk.sem.linearlogic import *
+
+    >>> from nltk.sem.linearlogic import Expression
+    >>> read_expr = Expression.fromstring
+
+Parser
+
+    >>> print(read_expr(r'f'))
+    f
+    >>> print(read_expr(r'(g -o f)'))
+    (g -o f)
+    >>> print(read_expr(r'(g -o (h -o f))'))
+    (g -o (h -o f))
+    >>> print(read_expr(r'((g -o G) -o G)'))
+    ((g -o G) -o G)
+    >>> print(read_expr(r'(g -o f)(g)'))
+    (g -o f)(g)
+    >>> print(read_expr(r'((g -o G) -o G)((g -o f))'))
+    ((g -o G) -o G)((g -o f))
+
+Simplify
+
+    >>> print(read_expr(r'f').simplify())
+    f
+    >>> print(read_expr(r'(g -o f)').simplify())
+    (g -o f)
+    >>> print(read_expr(r'((g -o G) -o G)').simplify())
+    ((g -o G) -o G)
+    >>> print(read_expr(r'(g -o f)(g)').simplify())
+    f
+    >>> try: read_expr(r'(g -o f)(f)').simplify()
+    ... except LinearLogicApplicationException as e: print(e)
+    ...
+    Cannot apply (g -o f) to f. Cannot unify g with f given {}
+    >>> print(read_expr(r'(G -o f)(g)').simplify())
+    f
+    >>> print(read_expr(r'((g -o G) -o G)((g -o f))').simplify())
+    f
+
+Test BindingDict
+
+    >>> h = ConstantExpression('h')
+    >>> g = ConstantExpression('g')
+    >>> f = ConstantExpression('f')
+
+    >>> H = VariableExpression('H')
+    >>> G = VariableExpression('G')
+    >>> F = VariableExpression('F')
+
+    >>> d1 = BindingDict({H: h})
+    >>> d2 = BindingDict({F: f, G: F})
+    >>> d12 = d1 + d2
+    >>> all12 = ['%s: %s' % (v, d12[v]) for v in d12.d]
+    >>> all12.sort()
+    >>> print(all12)
+    ['F: f', 'G: f', 'H: h']
+
+    >>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h})
+    True
+
+    >>> d4 = BindingDict({F: f})
+    >>> try: d4[F] = g
+    ... except VariableBindingException as e: print(e)
+    Variable F already bound to another value
+
+Test Unify
+
+    >>> try: f.unify(g, BindingDict())
+    ... except UnificationException as e: print(e)
+    ...
+    Cannot unify f with g given {}
+
+    >>> f.unify(G, BindingDict()) == BindingDict({G: f})
+    True
+    >>> try: f.unify(G, BindingDict({G: h}))
+    ... except UnificationException as e: print(e)
+    ...
+    Cannot unify f with G given {G: h}
+    >>> f.unify(G, BindingDict({G: f})) == BindingDict({G: f})
+    True
+    >>> f.unify(G, BindingDict({H: f})) == BindingDict({G: f, H: f})
+    True
+
+    >>> G.unify(f, BindingDict()) == BindingDict({G: f})
+    True
+    >>> try: G.unify(f, BindingDict({G: h}))
+    ... except UnificationException as e: print(e)
+    ...
+    Cannot unify G with f given {G: h}
+    >>> G.unify(f, BindingDict({G: f})) == BindingDict({G: f})
+    True
+    >>> G.unify(f, BindingDict({H: f})) == BindingDict({G: f, H: f})
+    True
+
+    >>> G.unify(F, BindingDict()) == BindingDict({G: F})
+    True
+    >>> try: G.unify(F, BindingDict({G: H}))
+    ... except UnificationException as e: print(e)
+    ...
+    Cannot unify G with F given {G: H}
+    >>> G.unify(F, BindingDict({G: F})) == BindingDict({G: F})
+    True
+    >>> G.unify(F, BindingDict({H: F})) == BindingDict({G: F, H: F})
+    True
+
+Test Compile
+
+    >>> print(read_expr('g').compile_pos(Counter(), GlueFormula))
+    (<ConstantExpression g>, [])
+    >>> print(read_expr('(g -o f)').compile_pos(Counter(), GlueFormula))
+    (<ImpExpression (g -o f)>, [])
+    >>> print(read_expr('(g -o (h -o f))').compile_pos(Counter(), GlueFormula))
+    (<ImpExpression (g -o (h -o f))>, [])
+
+
+======================
+Glue
+======================
+
+Demo of "John walks"
+--------------------
+
+    >>> john = GlueFormula("John", "g")
+    >>> print(john)
+    John : g
+    >>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
+    >>> print(walks)
+    \x.walks(x) : (g -o f)
+    >>> print(walks.applyto(john))
+    \x.walks(x)(John) : (g -o f)(g)
+    >>> print(walks.applyto(john).simplify())
+    walks(John) : f
+
+
+Demo of "A dog walks"
+---------------------
+
+    >>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
+    >>> print(a)
+    \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
+    >>> man = GlueFormula(r"\x.man(x)", "(gv -o gr)")
+    >>> print(man)
+    \x.man(x) : (gv -o gr)
+    >>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
+    >>> print(walks)
+    \x.walks(x) : (g -o f)
+    >>> a_man = a.applyto(man)
+    >>> print(a_man.simplify())
+    \Q.exists x.(man(x) & Q(x)) : ((g -o G) -o G)
+    >>> a_man_walks = a_man.applyto(walks)
+    >>> print(a_man_walks.simplify())
+    exists x.(man(x) & walks(x)) : f
+
+
+Demo of 'every girl chases a dog'
+---------------------------------
+
+Individual words:
+
+    >>> every = GlueFormula("\P Q.all x.(P(x) -> Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
+    >>> print(every)
+    \P Q.all x.(P(x) -> Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
+    >>> girl = GlueFormula(r"\x.girl(x)", "(gv -o gr)")
+    >>> print(girl)
+    \x.girl(x) : (gv -o gr)
+    >>> chases = GlueFormula(r"\x y.chases(x,y)", "(g -o (h -o f))")
+    >>> print(chases)
+    \x y.chases(x,y) : (g -o (h -o f))
+    >>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((hv -o hr) -o ((h -o H) -o H))")
+    >>> print(a)
+    \P Q.exists x.(P(x) & Q(x)) : ((hv -o hr) -o ((h -o H) -o H))
+    >>> dog = GlueFormula(r"\x.dog(x)", "(hv -o hr)")
+    >>> print(dog)
+    \x.dog(x) : (hv -o hr)
+
+Noun Quantification can only be done one way:
+
+    >>> every_girl = every.applyto(girl)
+    >>> print(every_girl.simplify())
+    \Q.all x.(girl(x) -> Q(x)) : ((g -o G) -o G)
+    >>> a_dog = a.applyto(dog)
+    >>> print(a_dog.simplify())
+    \Q.exists x.(dog(x) & Q(x)) : ((h -o H) -o H)
+
+The first reading is achieved by combining 'chases' with 'a dog' first.
+Since 'a girl' requires something of the form '(h -o H)' we must
+get rid of the 'g' in the glue of 'see'.  We will do this with
+the '-o elimination' rule.  So, x1 will be our subject placeholder.
+
+    >>> xPrime = GlueFormula("x1", "g")
+    >>> print(xPrime)
+    x1 : g
+    >>> xPrime_chases = chases.applyto(xPrime)
+    >>> print(xPrime_chases.simplify())
+    \y.chases(x1,y) : (h -o f)
+    >>> xPrime_chases_a_dog = a_dog.applyto(xPrime_chases)
+    >>> print(xPrime_chases_a_dog.simplify())
+    exists x.(dog(x) & chases(x1,x)) : f
+
+Now we can retract our subject placeholder using lambda-abstraction and
+combine with the true subject.
+
+    >>> chases_a_dog = xPrime_chases_a_dog.lambda_abstract(xPrime)
+    >>> print(chases_a_dog.simplify())
+    \x1.exists x.(dog(x) & chases(x1,x)) : (g -o f)
+    >>> every_girl_chases_a_dog = every_girl.applyto(chases_a_dog)
+    >>> r1 = every_girl_chases_a_dog.simplify()
+    >>> r2 = GlueFormula(r'all x.(girl(x) -> exists z1.(dog(z1) & chases(x,z1)))', 'f')
+    >>> r1 == r2
+    True
+
+The second reading is achieved by combining 'every girl' with 'chases' first.
+
+    >>> xPrime = GlueFormula("x1", "g")
+    >>> print(xPrime)
+    x1 : g
+    >>> xPrime_chases = chases.applyto(xPrime)
+    >>> print(xPrime_chases.simplify())
+    \y.chases(x1,y) : (h -o f)
+    >>> yPrime = GlueFormula("x2", "h")
+    >>> print(yPrime)
+    x2 : h
+    >>> xPrime_chases_yPrime = xPrime_chases.applyto(yPrime)
+    >>> print(xPrime_chases_yPrime.simplify())
+    chases(x1,x2) : f
+    >>> chases_yPrime = xPrime_chases_yPrime.lambda_abstract(xPrime)
+    >>> print(chases_yPrime.simplify())
+    \x1.chases(x1,x2) : (g -o f)
+    >>> every_girl_chases_yPrime = every_girl.applyto(chases_yPrime)
+    >>> print(every_girl_chases_yPrime.simplify())
+    all x.(girl(x) -> chases(x,x2)) : f
+    >>> every_girl_chases = every_girl_chases_yPrime.lambda_abstract(yPrime)
+    >>> print(every_girl_chases.simplify())
+    \x2.all x.(girl(x) -> chases(x,x2)) : (h -o f)
+    >>> every_girl_chases_a_dog = a_dog.applyto(every_girl_chases)
+    >>> r1 = every_girl_chases_a_dog.simplify()
+    >>> r2 = GlueFormula(r'exists x.(dog(x) & all z2.(girl(z2) -> chases(z2,x)))', 'f')
+    >>> r1 == r2
+    True
+
+
+Compilation
+-----------
+
+    >>> for cp in GlueFormula('m', '(b -o a)').compile(Counter()): print(cp)
+    m : (b -o a) : {1}
+    >>> for cp in GlueFormula('m', '((c -o b) -o a)').compile(Counter()): print(cp)
+    v1 : c : {1}
+    m : (b[1] -o a) : {2}
+    >>> for cp in GlueFormula('m', '((d -o (c -o b)) -o a)').compile(Counter()): print(cp)
+    v1 : c : {1}
+    v2 : d : {2}
+    m : (b[1, 2] -o a) : {3}
+    >>> for cp in GlueFormula('m', '((d -o e) -o ((c -o b) -o a))').compile(Counter()): print(cp)
+    v1 : d : {1}
+    v2 : c : {2}
+    m : (e[1] -o (b[2] -o a)) : {3}
+    >>> for cp in GlueFormula('m', '(((d -o c) -o b) -o a)').compile(Counter()): print(cp)
+    v1 : (d -o c) : {1}
+    m : (b[1] -o a) : {2}
+    >>> for cp in GlueFormula('m', '((((e -o d) -o c) -o b) -o a)').compile(Counter()): print(cp)
+    v1 : e : {1}
+    v2 : (d[1] -o c) : {2}
+    m : (b[2] -o a) : {3}
+
+
+Demo of 'a man walks' using Compilation
+---------------------------------------
+
+Premises
+
+    >>> a = GlueFormula('\\P Q.some x.(P(x) and Q(x))', '((gv -o gr) -o ((g -o G) -o G))')
+    >>> print(a)
+    \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
+
+    >>> man = GlueFormula('\\x.man(x)', '(gv -o gr)')
+    >>> print(man)
+    \x.man(x) : (gv -o gr)
+
+    >>> walks = GlueFormula('\\x.walks(x)', '(g -o f)')
+    >>> print(walks)
+    \x.walks(x) : (g -o f)
+
+Compiled Premises:
+
+    >>> counter = Counter()
+    >>> ahc = a.compile(counter)
+    >>> g1 = ahc[0]
+    >>> print(g1)
+    v1 : gv : {1}
+    >>> g2 = ahc[1]
+    >>> print(g2)
+    v2 : g : {2}
+    >>> g3 = ahc[2]
+    >>> print(g3)
+    \P Q.exists x.(P(x) & Q(x)) : (gr[1] -o (G[2] -o G)) : {3}
+    >>> g4 = man.compile(counter)[0]
+    >>> print(g4)
+    \x.man(x) : (gv -o gr) : {4}
+    >>> g5 = walks.compile(counter)[0]
+    >>> print(g5)
+    \x.walks(x) : (g -o f) : {5}
+
+Derivation:
+
+    >>> g14 = g4.applyto(g1)
+    >>> print(g14.simplify())
+    man(v1) : gr : {1, 4}
+    >>> g134 = g3.applyto(g14)
+    >>> print(g134.simplify())
+    \Q.exists x.(man(x) & Q(x)) : (G[2] -o G) : {1, 3, 4}
+    >>> g25 = g5.applyto(g2)
+    >>> print(g25.simplify())
+    walks(v2) : f : {2, 5}
+    >>> g12345 = g134.applyto(g25)
+    >>> print(g12345.simplify())
+    exists x.(man(x) & walks(x)) : f : {1, 2, 3, 4, 5}
+
+---------------------------------
+Dependency Graph to Glue Formulas
+---------------------------------
+    >>> from nltk.corpus.reader.dependency import DependencyGraph
+
+    >>> depgraph = DependencyGraph("""1        John    _       NNP     NNP     _       2       SUBJ    _       _
+    ... 2      sees    _       VB      VB      _       0       ROOT    _       _
+    ... 3      a       _       ex_quant        ex_quant        _       4       SPEC    _       _
+    ... 4      dog     _       NN      NN      _       2       OBJ     _       _
+    ... """)
+    >>> gfl = GlueDict('nltk:grammars/sample_grammars/glue.semtype').to_glueformula_list(depgraph)
+    >>> print(gfl) # doctest: +SKIP
+    [\x y.sees(x,y) : (f -o (i -o g)),
+     \x.dog(x) : (iv -o ir),
+     \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I3) -o I3)),
+     \P Q.exists x.(P(x) & Q(x)) : ((fv -o fr) -o ((f -o F4) -o F4)),
+     \x.John(x) : (fv -o fr)]
+    >>> glue = Glue()
+    >>> for r in sorted([r.simplify().normalize() for r in glue.get_readings(glue.gfl_to_compiled(gfl))], key=str):
+    ...     print(r)
+    exists z1.(John(z1) & exists z2.(dog(z2) & sees(z1,z2)))
+    exists z1.(dog(z1) & exists z2.(John(z2) & sees(z2,z1)))
+
+-----------------------------------
+Dependency Graph to LFG f-structure
+-----------------------------------
+    >>> from nltk.sem.lfg import FStructure
+
+    >>> fstruct = FStructure.read_depgraph(depgraph)
+
+    >>> print(fstruct) # doctest: +SKIP
+    f:[pred 'sees'
+       obj h:[pred 'dog'
+              spec 'a']
+       subj g:[pred 'John']]
+
+    >>> fstruct.to_depgraph().tree().pprint()
+    (sees (dog a) John)
+
+---------------------------------
+LFG f-structure to Glue
+---------------------------------
+    >>> fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype')) # doctest: +SKIP
+    [\x y.sees(x,y) : (i -o (g -o f)),
+     \x.dog(x) : (gv -o gr),
+     \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G3) -o G3)),
+     \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I4) -o I4)),
+     \x.John(x) : (iv -o ir)]
+
+.. see gluesemantics_malt.doctest for more
diff --git a/nlp_resource_data/nltk/test/gluesemantics_malt.doctest b/nlp_resource_data/nltk/test/gluesemantics_malt.doctest

new file mode 100644 (file)

index 0000000..a76e96f
--- /dev/null
+++ b/nlp_resource_data/nltk/test/gluesemantics_malt.doctest
@@ -0,0 +1,68 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+.. see also: gluesemantics.doctest
+
+==============================================================================
+ Glue Semantics
+==============================================================================
+
+    >>> from nltk.sem.glue import *
+    >>> nltk.sem.logic._counter._value = 0
+
+--------------------------------
+Initialize the Dependency Parser
+--------------------------------
+    >>> from nltk.parse.malt import MaltParser
+
+    >>> tagger = RegexpTagger(
+    ...     [('^(John|Mary)$', 'NNP'),
+    ...      ('^(sees|chases)$', 'VB'),
+    ...      ('^(a)$', 'ex_quant'),
+    ...      ('^(every)$', 'univ_quant'),
+    ...      ('^(girl|dog)$', 'NN')
+    ... ])
+    >>> depparser = MaltParser(tagger=tagger)
+
+--------------------
+Automated Derivation
+--------------------
+    >>> glue = Glue(depparser=depparser)
+    >>> readings = glue.parse_to_meaning('every girl chases a dog'.split())
+    >>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
+    ...     print(reading.normalize())
+    all z1.(girl(z1) -> exists z2.(dog(z2) & chases(z1,z2)))
+    exists z1.(dog(z1) & all z2.(girl(z2) -> chases(z2,z1)))
+
+    >>> drtglue = DrtGlue(depparser=depparser)
+    >>> readings = drtglue.parse_to_meaning('every girl chases a dog'.split())
+    >>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
+    ...     print(reading)
+    ([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chases(z1,z2)]))])
+    ([z1],[dog(z1), (([z2],[girl(z2)]) -> ([],[chases(z2,z1)]))])
+
+--------------
+With inference
+--------------
+
+Checking for equality of two DRSs is very useful when generating readings of a sentence.
+For example, the ``glue`` module generates two readings for the sentence
+*John sees Mary*:
+
+    >>> from nltk.sem.glue import DrtGlue
+    >>> readings = drtglue.parse_to_meaning('John sees Mary'.split())
+    >>> for drs in sorted([r.simplify().normalize() for r in readings], key=str):
+    ...     print(drs)
+    ([z1,z2],[John(z1), Mary(z2), sees(z1,z2)])
+    ([z1,z2],[Mary(z1), John(z2), sees(z2,z1)])
+
+However, it is easy to tell that these two readings are logically the
+same, and therefore one of them is superfluous.  We can use the theorem prover
+to determine this equivalence, and then delete one of them.  A particular
+theorem prover may be specified, or the argument may be left off to use the
+default.
+
+    >>> readings[0].equiv(readings[1])
+    True
+
+
diff --git a/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py b/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py

new file mode 100644 (file)

index 0000000..1a7fee3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+
+def setup_module(module):
+    from nose import SkipTest
+    from nltk.parse.malt import MaltParser
+
+    try:
+        depparser = MaltParser("maltparser-1.7.2")
+    except LookupError:
+        raise SkipTest("MaltParser is not available")
diff --git a/nlp_resource_data/nltk/test/grammar.doctest b/nlp_resource_data/nltk/test/grammar.doctest

new file mode 100644 (file)

index 0000000..c604069
--- /dev/null
+++ b/nlp_resource_data/nltk/test/grammar.doctest
@@ -0,0 +1,48 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===============
+Grammar Parsing
+===============
+
+Grammars can be parsed from strings:
+
+    >>> from nltk import CFG
+    >>> grammar = CFG.fromstring("""
+    ... S -> NP VP
+    ... PP -> P NP
+    ... NP -> Det N | NP PP
+    ... VP -> V NP | VP PP
+    ... Det -> 'a' | 'the'
+    ... N -> 'dog' | 'cat'
+    ... V -> 'chased' | 'sat'
+    ... P -> 'on' | 'in'
+    ... """)
+    >>> grammar
+    <Grammar with 14 productions>
+    >>> grammar.start()
+    S
+    >>> grammar.productions() # doctest: +NORMALIZE_WHITESPACE
+    [S -> NP VP, PP -> P NP, NP -> Det N, NP -> NP PP, VP -> V NP, VP -> VP PP,
+    Det -> 'a', Det -> 'the', N -> 'dog', N -> 'cat', V -> 'chased', V -> 'sat',
+    P -> 'on', P -> 'in']
+
+Probabilistic CFGs:
+   
+    >>> from nltk import PCFG
+    >>> toy_pcfg1 = PCFG.fromstring("""
+    ... S -> NP VP [1.0]
+    ... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
+    ... Det -> 'the' [0.8] | 'my' [0.2]
+    ... N -> 'man' [0.5] | 'telescope' [0.5]
+    ... VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
+    ... V -> 'ate' [0.35] | 'saw' [0.65]
+    ... PP -> P NP [1.0]
+    ... P -> 'with' [0.61] | 'under' [0.39]
+    ... """)
+
+Chomsky Normal Form grammar (Test for bug 474)
+
+    >>> g = CFG.fromstring("VP^<TOP> -> VBP NP^<VP-TOP>")
+    >>> g.productions()[0].lhs()
+    VP^<TOP>
diff --git a/nlp_resource_data/nltk/test/grammartestsuites.doctest b/nlp_resource_data/nltk/test/grammartestsuites.doctest

new file mode 100644 (file)

index 0000000..2eab462
--- /dev/null
+++ b/nlp_resource_data/nltk/test/grammartestsuites.doctest
@@ -0,0 +1,109 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==========================
+ Test Suites for Grammars
+==========================
+
+Sentences in the test suite are divided into two classes:
+
+- grammatical (*accept*) and
+- ungrammatical (*reject*).
+
+If a sentence should parse accordng to the grammar, the value of
+``trees`` will be a non-empty list. If a sentence should be rejected
+according to the grammar, then the value of ``trees`` will be ``None``.
+
+    >>> from nltk.parse import TestGrammar
+    >>> germantest1 = {}
+    >>> germantest1['doc'] = "Tests for person agreement"
+    >>> germantest1['accept'] = [
+    ... 'ich komme',
+    ... 'ich sehe mich',
+    ... 'du kommst',
+    ... 'du siehst mich',
+    ... 'sie kommt',
+    ... 'sie sieht mich',
+    ... 'ihr kommt',
+    ... 'wir kommen',
+    ... 'sie kommen',
+    ... 'du magst mich',
+    ... 'er mag mich',
+    ... 'du folgst mir',
+    ... 'sie hilft mir',
+    ... ]
+    >>> germantest1['reject'] = [
+    ... 'ich kommt',
+    ... 'ich kommst',
+    ... 'ich siehst mich',
+    ... 'du komme',
+    ... 'du sehe mich',
+    ... 'du kommt',
+    ... 'er komme',
+    ... 'er siehst mich',
+    ... 'wir komme',
+    ... 'wir kommst',
+    ... 'die Katzen kommst',
+    ... 'sie komme',
+    ... 'sie kommst',
+    ... 'du mag mich',
+    ... 'er magst mich',
+    ... 'du folgt mir',
+    ... 'sie hilfst mir',
+    ... ]
+    >>> germantest2 = {}
+    >>> germantest2['doc'] = "Tests for number agreement"
+    >>> germantest2['accept'] = [
+    ... 'der Hund kommt',
+    ... 'die Hunde kommen',
+    ... 'ich komme',
+    ... 'wir kommen',
+    ... 'ich sehe die Katzen',
+    ... 'ich folge den Katzen',
+    ... 'ich sehe die Katzen',
+    ... 'ich folge den Katzen',
+    ... 'wir sehen die Katzen',
+    ... 'wir folgen den Katzen'
+    ... ]
+    >>> germantest2['reject'] = [
+    ... 'ich kommen',
+    ... 'wir komme',
+    ... 'der Hunde kommt',
+    ... 'der Hunde kommen',
+    ... 'die Katzen kommt',
+    ... 'ich sehe der Hunde', 
+    ... 'ich folge den Hund',
+    ... 'ich sehen der Hunde', 
+    ... 'ich folgen den Hund',
+    ... 'wir sehe die Katzen',
+    ... 'wir folge den Katzen'
+    ... ]
+    >>> germantest3 = {}
+    >>> germantest3['doc'] = "Tests for case government and subcategorization"
+    >>> germantest3['accept'] = [
+    ... 'der Hund sieht mich', 
+    ... 'der Hund kommt',
+    ... 'ich sehe den Hund',
+    ... 'ich helfe dem Hund',
+    ... ]
+    >>> germantest3['reject'] = [
+    ... 'ich sehe',
+    ... 'ich helfe',
+    ... 'ich komme den Hund',
+    ... 'ich sehe den Hund die Katzen',
+    ... 'du hilfst mich',
+    ... 'du siehst mir',
+    ... 'du siehst ich',
+    ... 'der Hunde kommt mich',
+    ... 'die Hunde sehe die Hunde', 
+    ... 'der Hund sehe die Hunde', 
+    ... 'ich hilft den Hund',
+    ... 'ich hilft der Hund',
+    ... 'ich sehe dem Hund',
+    ... ]
+    >>> germantestsuites = [germantest1, germantest2, germantest3]
+    >>> tester = TestGrammar('grammars/book_grammars/german.fcfg', germantestsuites)
+    >>> tester.run()
+    Tests for person agreement: All tests passed!
+    Tests for number agreement: All tests passed!
+    Tests for case government and subcategorization: All tests passed!
diff --git a/nlp_resource_data/nltk/test/index.doctest b/nlp_resource_data/nltk/test/index.doctest

new file mode 100644 (file)

index 0000000..31b46c7
--- /dev/null
+++ b/nlp_resource_data/nltk/test/index.doctest
@@ -0,0 +1,100 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+.. _align howto: align.html
+.. _ccg howto: ccg.html
+.. _chat80 howto: chat80.html
+.. _childes howto: childes.html
+.. _chunk howto: chunk.html
+.. _classify howto: classify.html
+.. _collocations howto: collocations.html
+.. _compat howto: compat.html
+.. _corpus howto: corpus.html
+.. _data howto: data.html
+.. _dependency howto: dependency.html
+.. _discourse howto: discourse.html
+.. _drt howto: drt.html
+.. _featgram howto: featgram.html
+.. _featstruct howto: featstruct.html
+.. _framenet howto: framenet.html
+.. _generate howto: generate.html
+.. _gluesemantics howto: gluesemantics.html
+.. _gluesemantics_malt howto: gluesemantics_malt.html
+.. _grammar howto: grammar.html
+.. _grammartestsuites howto: grammartestsuites.html
+.. _index howto: index.html
+.. _inference howto: inference.html
+.. _internals howto: internals.html
+.. _japanese howto: japanese.html
+.. _logic howto: logic.html
+.. _metrics howto: metrics.html
+.. _misc howto: misc.html
+.. _nonmonotonic howto: nonmonotonic.html
+.. _parse howto: parse.html
+.. _portuguese_en howto: portuguese_en.html
+.. _probability howto: probability.html
+.. _propbank howto: propbank.html
+.. _relextract howto: relextract.html
+.. _resolution howto: resolution.html
+.. _semantics howto: semantics.html
+.. _simple howto: simple.html
+.. _stem howto: stem.html
+.. _tag howto: tag.html
+.. _tokenize howto: tokenize.html
+.. _toolbox howto: toolbox.html
+.. _tree howto: tree.html
+.. _treetransforms howto: treetransforms.html
+.. _util howto: util.html
+.. _wordnet howto: wordnet.html
+.. _wordnet_lch howto: wordnet_lch.html
+
+===========
+NLTK HOWTOs
+===========
+
+* `align HOWTO`_
+* `ccg HOWTO`_
+* `chat80 HOWTO`_
+* `childes HOWTO`_
+* `chunk HOWTO`_
+* `classify HOWTO`_
+* `collocations HOWTO`_
+* `compat HOWTO`_
+* `corpus HOWTO`_
+* `data HOWTO`_
+* `dependency HOWTO`_
+* `discourse HOWTO`_
+* `drt HOWTO`_
+* `featgram HOWTO`_
+* `featstruct HOWTO`_
+* `framenet HOWTO`_
+* `generate HOWTO`_
+* `gluesemantics HOWTO`_
+* `gluesemantics_malt HOWTO`_
+* `grammar HOWTO`_
+* `grammartestsuites HOWTO`_
+* `index HOWTO`_
+* `inference HOWTO`_
+* `internals HOWTO`_
+* `japanese HOWTO`_
+* `logic HOWTO`_
+* `metrics HOWTO`_
+* `misc HOWTO`_
+* `nonmonotonic HOWTO`_
+* `parse HOWTO`_
+* `portuguese_en HOWTO`_
+* `probability HOWTO`_
+* `propbank HOWTO`_
+* `relextract HOWTO`_
+* `resolution HOWTO`_
+* `semantics HOWTO`_
+* `simple HOWTO`_
+* `stem HOWTO`_
+* `tag HOWTO`_
+* `tokenize HOWTO`_
+* `toolbox HOWTO`_
+* `tree HOWTO`_
+* `treetransforms HOWTO`_
+* `util HOWTO`_
+* `wordnet HOWTO`_
+* `wordnet_lch HOWTO`_
diff --git a/nlp_resource_data/nltk/test/inference.doctest b/nlp_resource_data/nltk/test/inference.doctest

new file mode 100644 (file)

index 0000000..5bf1501
--- /dev/null
+++ b/nlp_resource_data/nltk/test/inference.doctest
@@ -0,0 +1,534 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+====================================
+Logical Inference and Model Building
+====================================
+
+    >>> from nltk import *
+    >>> from nltk.sem.drt import DrtParser
+    >>> from nltk.sem import logic
+    >>> logic._counter._value = 0
+
+------------
+Introduction
+------------
+
+Within the area of automated reasoning, first order theorem proving
+and model building (or model generation) have both received much
+attention, and have given rise to highly sophisticated techniques. We
+focus therefore on providing an NLTK interface to third party tools
+for these tasks.  In particular, the module ``nltk.inference`` can be
+used to access both theorem provers and model builders.
+
+---------------------------------
+NLTK Interface to Theorem Provers
+---------------------------------
+
+The main class used to interface with a theorem prover is the ``Prover``
+class, found in ``nltk.api``.  The ``prove()`` method takes three optional
+arguments: a goal, a list of assumptions, and a ``verbose`` boolean to
+indicate whether the proof should be printed to the console.  The proof goal
+and any assumptions need to be instances of the ``Expression`` class
+specified by ``nltk.sem.logic``.  There are currently three theorem provers
+included with NLTK: ``Prover9``, ``TableauProver``, and
+``ResolutionProver``.  The first is an off-the-shelf prover, while the other
+two are written in Python and included in the ``nltk.inference`` package.
+
+    >>> from nltk.sem import Expression
+    >>> read_expr = Expression.fromstring
+    >>> p1 = read_expr('man(socrates)')
+    >>> p2 = read_expr('all x.(man(x) -> mortal(x))')
+    >>> c  = read_expr('mortal(socrates)')
+    >>> Prover9().prove(c, [p1,p2])
+    True
+    >>> TableauProver().prove(c, [p1,p2])
+    True
+    >>> ResolutionProver().prove(c, [p1,p2], verbose=True)
+    [1] {-mortal(socrates)}     A
+    [2] {man(socrates)}         A
+    [3] {-man(z2), mortal(z2)}  A
+    [4] {-man(socrates)}        (1, 3)
+    [5] {mortal(socrates)}      (2, 3)
+    [6] {}                      (1, 5)
+    <BLANKLINE>
+    True
+
+---------------------
+The ``ProverCommand``
+---------------------
+
+A ``ProverCommand`` is a stateful holder for a theorem
+prover.  The command stores a theorem prover instance (of type ``Prover``),
+a goal, a list of assumptions, the result of the proof, and a string version
+of the entire proof.  Corresponding to the three included ``Prover``
+implementations, there are three ``ProverCommand`` implementations:
+``Prover9Command``, ``TableauProverCommand``, and
+``ResolutionProverCommand``.
+
+The ``ProverCommand``'s constructor takes its goal and assumptions.  The
+``prove()`` command executes the ``Prover`` and ``proof()``
+returns a String form of the proof
+If the ``prove()`` method has not been called,
+then the prover command will be unable to display a proof.
+
+    >>> prover = ResolutionProverCommand(c, [p1,p2])
+    >>> print(prover.proof()) # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+      File "...", line 1212, in __run
+        compileflags, 1) in test.globs
+      File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
+      File "...", line ..., in proof
+        raise LookupError("You have to call prove() first to get a proof!")
+    LookupError: You have to call prove() first to get a proof!
+    >>> prover.prove()
+    True
+    >>> print(prover.proof())
+    [1] {-mortal(socrates)}     A
+    [2] {man(socrates)}         A
+    [3] {-man(z4), mortal(z4)}  A
+    [4] {-man(socrates)}        (1, 3)
+    [5] {mortal(socrates)}      (2, 3)
+    [6] {}                      (1, 5)
+    <BLANKLINE>
+
+The prover command stores the result of proving so that if ``prove()`` is
+called again, then the command can return the result without executing the
+prover again.  This allows the user to access the result of the proof without
+wasting time re-computing what it already knows.
+
+    >>> prover.prove()
+    True
+    >>> prover.prove()
+    True
+
+The assumptions and goal may be accessed using the ``assumptions()`` and
+``goal()`` methods, respectively.
+
+    >>> prover.assumptions()
+    [<ApplicationExpression man(socrates)>, <Alread_expression all x.(man(x) -> mortal(x))>]
+    >>> prover.goal()
+    <ApplicationExpression mortal(socrates)>
+
+The assumptions list may be modified using the ``add_assumptions()`` and
+``retract_assumptions()`` methods.  Both methods take a list of ``Expression``
+objects.  Since adding or removing assumptions may change the result of the
+proof, the stored result is cleared when either of these methods are called.
+That means that ``proof()`` will be unavailable until ``prove()`` is called and
+a call to ``prove()`` will execute the theorem prover.
+
+    >>> prover.retract_assumptions([read_expr('man(socrates)')])
+    >>> print(prover.proof()) # doctest: +ELLIPSIS
+    Traceback (most recent call last):
+      File "...", line 1212, in __run
+        compileflags, 1) in test.globs
+      File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
+      File "...", line ..., in proof
+        raise LookupError("You have to call prove() first to get a proof!")
+    LookupError: You have to call prove() first to get a proof!
+    >>> prover.prove()
+    False
+    >>> print(prover.proof())
+    [1] {-mortal(socrates)}     A
+    [2] {-man(z6), mortal(z6)}  A
+    [3] {-man(socrates)}        (1, 2)
+    <BLANKLINE>
+    >>> prover.add_assumptions([read_expr('man(socrates)')])
+    >>> prover.prove()
+    True
+
+-------
+Prover9
+-------
+
+Prover9 Installation
+~~~~~~~~~~~~~~~~~~~~
+
+You can download Prover9 from http://www.cs.unm.edu/~mccune/prover9/.
+
+Extract the source code into a suitable directory and follow the
+instructions in the Prover9 ``README.make`` file to compile the executables.
+Install these into an appropriate location; the
+``prover9_search`` variable is currently configured to look in the
+following locations:
+
+    >>> p = Prover9()
+    >>> p.binary_locations() # doctest: +NORMALIZE_WHITESPACE
+    ['/usr/local/bin/prover9',
+     '/usr/local/bin/prover9/bin',
+     '/usr/local/bin',
+     '/usr/bin',
+     '/usr/local/prover9',
+     '/usr/local/share/prover9']
+
+Alternatively, the environment variable ``PROVER9HOME`` may be configured with
+the binary's location.
+
+The path to the correct directory can be set manually in the following
+manner:
+
+    >>> config_prover9(path='/usr/local/bin') # doctest: +SKIP
+    [Found prover9: /usr/local/bin/prover9]
+
+If the executables cannot be found, ``Prover9`` will issue a warning message:
+
+    >>> p.prove() # doctest: +SKIP
+    Traceback (most recent call last):
+      ...
+    LookupError:
+    ===========================================================================
+      NLTK was unable to find the prover9 executable!  Use config_prover9() or
+      set the PROVER9HOME environment variable.
+    <BLANKLINE>
+        >> config_prover9('/path/to/prover9')
+    <BLANKLINE>
+      For more information, on prover9, see:
+        <http://www.cs.unm.edu/~mccune/prover9/>
+    ===========================================================================
+
+
+Using Prover9
+~~~~~~~~~~~~~
+
+The general case in theorem proving is to determine whether ``S |- g``
+holds, where ``S`` is a possibly empty set of assumptions, and ``g``
+is a proof goal.
+
+As mentioned earlier, NLTK input to ``Prover9`` must be
+``Expression``\ s of ``nltk.sem.logic``. A ``Prover9`` instance is
+initialized with a proof goal and, possibly, some assumptions. The
+``prove()`` method attempts to find a proof of the goal, given the
+list of assumptions (in this case, none).
+
+    >>> goal = read_expr('(man(x) <-> --man(x))')
+    >>> prover = Prover9Command(goal)
+    >>> prover.prove()
+    True
+
+Given a ``ProverCommand`` instance ``prover``, the method
+``prover.proof()`` will return a String of the extensive proof information
+provided by Prover9, shown in abbreviated form here::
+
+    ============================== Prover9 ===============================
+    Prover9 (32) version ...
+    Process ... was started by ... on ...
+    ...
+    The command was ".../prover9 -f ...".
+    ============================== end of head ===========================
+
+    ============================== INPUT =================================
+
+    % Reading from file /var/...
+
+
+    formulas(goals).
+    (all x (man(x) -> man(x))).
+    end_of_list.
+
+    ...
+    ============================== end of search =========================
+
+    THEOREM PROVED
+
+    Exiting with 1 proof.
+
+    Process 6317 exit (max_proofs) Mon Jan 21 15:23:28 2008
+
+
+As mentioned earlier, we may want to list some assumptions for
+the proof, as shown here.
+
+    >>> g = read_expr('mortal(socrates)')
+    >>> a1 = read_expr('all x.(man(x) -> mortal(x))')
+    >>> prover = Prover9Command(g, assumptions=[a1])
+    >>> prover.print_assumptions()
+    all x.(man(x) -> mortal(x))
+
+However, the assumptions are not sufficient to derive the goal:
+
+    >>> print(prover.prove())
+    False
+
+So let's add another assumption:
+
+    >>> a2 = read_expr('man(socrates)')
+    >>> prover.add_assumptions([a2])
+    >>> prover.print_assumptions()
+    all x.(man(x) -> mortal(x))
+    man(socrates)
+    >>> print(prover.prove())
+    True
+
+We can also show the assumptions in ``Prover9`` format.
+
+    >>> prover.print_assumptions(output_format='Prover9')
+    all x (man(x) -> mortal(x))
+    man(socrates)
+
+    >>> prover.print_assumptions(output_format='Spass')
+    Traceback (most recent call last):
+      . . .
+    NameError: Unrecognized value for 'output_format': Spass
+
+Assumptions can be retracted from the list of assumptions.
+
+    >>> prover.retract_assumptions([a1])
+    >>> prover.print_assumptions()
+    man(socrates)
+    >>> prover.retract_assumptions([a1])
+
+Statements can be loaded from a file and parsed. We can then add these
+statements as new assumptions.
+
+    >>> g = read_expr('all x.(boxer(x) -> -boxerdog(x))')
+    >>> prover = Prover9Command(g)
+    >>> prover.prove()
+    False
+    >>> import nltk.data
+    >>> new = nltk.data.load('grammars/sample_grammars/background0.fol')
+    >>> for a in new:
+    ...     print(a)
+    all x.(boxerdog(x) -> dog(x))
+    all x.(boxer(x) -> person(x))
+    all x.-(dog(x) & person(x))
+    exists x.boxer(x)
+    exists x.boxerdog(x)
+    >>> prover.add_assumptions(new)
+    >>> print(prover.prove())
+    True
+    >>> print(prover.proof()) # doctest: +ELLIPSIS
+    ============================== prooftrans ============================
+    Prover9 (...) version ...
+    Process ... was started by ... on ...
+    ...
+    The command was ".../prover9".
+    ============================== end of head ===========================
+    <BLANKLINE>
+    ============================== end of input ==========================
+    <BLANKLINE>
+    ============================== PROOF =================================
+    <BLANKLINE>
+    % -------- Comments from original proof --------
+    % Proof 1 at ... seconds.
+    % Length of proof is 13.
+    % Level of proof is 4.
+    % Maximum clause weight is 0.000.
+    % Given clauses 0.
+    <BLANKLINE>
+    <BLANKLINE>
+    1 (all x (boxerdog(x) -> dog(x))).  [assumption].
+    2 (all x (boxer(x) -> person(x))).  [assumption].
+    3 (all x -(dog(x) & person(x))).  [assumption].
+    6 (all x (boxer(x) -> -boxerdog(x))).  [goal].
+    8 -boxerdog(x) | dog(x).  [clausify(1)].
+    9 boxerdog(c3).  [deny(6)].
+    11 -boxer(x) | person(x).  [clausify(2)].
+    12 boxer(c3).  [deny(6)].
+    14 -dog(x) | -person(x).  [clausify(3)].
+    15 dog(c3).  [resolve(9,a,8,a)].
+    18 person(c3).  [resolve(12,a,11,a)].
+    19 -person(c3).  [resolve(15,a,14,a)].
+    20 $F.  [resolve(19,a,18,a)].
+    <BLANKLINE>
+    ============================== end of proof ==========================
+
+----------------------
+The equiv() method
+----------------------
+
+One application of the theorem prover functionality is to check if
+two Expressions have the same meaning.
+The ``equiv()`` method calls a theorem prover to determine whether two
+Expressions are logically equivalent.
+
+    >>> a = read_expr(r'exists x.(man(x) & walks(x))')
+    >>> b = read_expr(r'exists x.(walks(x) & man(x))')
+    >>> print(a.equiv(b))
+    True
+
+The same method can be used on Discourse Representation Structures (DRSs).
+In this case, each DRS is converted to a first order logic form, and then
+passed to the theorem prover.
+
+    >>> dp = DrtParser()
+    >>> a = dp.parse(r'([x],[man(x), walks(x)])')
+    >>> b = dp.parse(r'([x],[walks(x), man(x)])')
+    >>> print(a.equiv(b))
+    True
+
+
+--------------------------------
+NLTK Interface to Model Builders
+--------------------------------
+
+The top-level to model builders is parallel to that for
+theorem-provers. The ``ModelBuilder`` interface is located
+in ``nltk.inference.api``.  It is currently only implemented by
+``Mace``, which interfaces with the Mace4 model builder.
+
+Typically we use a model builder to show that some set of formulas has
+a model, and is therefore consistent. One way of doing this is by
+treating our candidate set of sentences as assumptions, and leaving
+the goal unspecified.
+Thus, the following interaction shows how both ``{a, c1}`` and ``{a, c2}``
+are consistent sets, since Mace succeeds in a building a
+model for each of them, while ``{c1, c2}`` is inconsistent.
+
+    >>> a3 = read_expr('exists x.(man(x) and walks(x))')
+    >>> c1 = read_expr('mortal(socrates)')
+    >>> c2 = read_expr('-mortal(socrates)')
+    >>> mace = Mace()
+    >>> print(mace.build_model(None, [a3, c1]))
+    True
+    >>> print(mace.build_model(None, [a3, c2]))
+    True
+
+We can also use the model builder as an adjunct to theorem prover.
+Let's suppose we are trying to prove ``S |- g``, i.e. that ``g``
+is logically entailed by assumptions ``S = {s1, s2, ..., sn}``.
+We can this same input to Mace4, and the model builder will try to
+find a counterexample, that is, to show that ``g`` does *not* follow
+from ``S``. So, given this input, Mace4 will try to find a model for
+the set ``S' = {s1, s2, ..., sn, (not g)}``. If ``g`` fails to follow
+from ``S``, then Mace4 may well return with a counterexample faster
+than Prover9 concludes that it cannot find the required proof.
+Conversely, if ``g`` *is* provable from ``S``, Mace4 may take a long
+time unsuccessfully trying to find a counter model, and will eventually give up.
+
+In the following example, we see that the model builder does succeed
+in building a model of the assumptions together with the negation of
+the goal. That is, it succeeds in finding a model
+where there is a woman that every man loves; Adam is a man; Eve is a
+woman; but Adam does not love Eve.
+
+    >>> a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x,y)))')
+    >>> a5 = read_expr('man(adam)')
+    >>> a6 = read_expr('woman(eve)')
+    >>> g = read_expr('love(adam,eve)')
+    >>> print(mace.build_model(g, [a4, a5, a6]))
+    True
+
+The Model Builder will fail to find a model if the assumptions do entail
+the goal.  Mace will continue to look for models of ever-increasing sizes
+until the end_size number is reached.  By default, end_size is 500,
+but it can be set manually for quicker response time.
+
+    >>> a7 = read_expr('all x.(man(x) -> mortal(x))')
+    >>> a8 = read_expr('man(socrates)')
+    >>> g2 = read_expr('mortal(socrates)')
+    >>> print(Mace(end_size=50).build_model(g2, [a7, a8]))
+    False
+
+There is also a ``ModelBuilderCommand`` class that, like ``ProverCommand``,
+stores a ``ModelBuilder``, a goal, assumptions, a result, and a model.  The
+only implementation in NLTK is ``MaceCommand``.
+
+
+-----
+Mace4
+-----
+
+Mace4 Installation
+~~~~~~~~~~~~~~~~~~
+
+Mace4 is packaged with Prover9, and can be downloaded from the same
+source, namely http://www.cs.unm.edu/~mccune/prover9/. It is installed
+in the same manner as Prover9.
+
+Using Mace4
+~~~~~~~~~~~
+
+Check whether Mace4 can find a model.
+
+    >>> a = read_expr('(see(mary,john) & -(mary = john))')
+    >>> mb = MaceCommand(assumptions=[a])
+    >>> mb.build_model()
+    True
+
+Show the model in 'tabular' format.
+
+    >>> print(mb.model(format='tabular'))
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 2
+    <BLANKLINE>
+     john : 0
+    <BLANKLINE>
+     mary : 1
+    <BLANKLINE>
+     see :
+           | 0 1
+        ---+----
+         0 | 0 0
+         1 | 1 0
+    <BLANKLINE>
+
+Show the model in 'tabular' format.
+
+    >>> print(mb.model(format='cooked'))
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 2
+    <BLANKLINE>
+    john = 0.
+    <BLANKLINE>
+    mary = 1.
+    <BLANKLINE>
+    - see(0,0).
+    - see(0,1).
+      see(1,0).
+    - see(1,1).
+    <BLANKLINE>
+
+The property ``valuation`` accesses the stored ``Valuation``.
+
+    >>> print(mb.valuation)
+    {'john': 'a', 'mary': 'b', 'see': {('b', 'a')}}
+
+We can return to our earlier example and inspect the model:
+
+    >>> mb = MaceCommand(g, assumptions=[a4, a5, a6])
+    >>> m = mb.build_model()
+    >>> print(mb.model(format='cooked'))
+    % number = 1
+    % seconds = 0
+    <BLANKLINE>
+    % Interpretation of size 2
+    <BLANKLINE>
+    adam = 0.
+    <BLANKLINE>
+    eve = 0.
+    <BLANKLINE>
+    c1 = 1.
+    <BLANKLINE>
+      man(0).
+    - man(1).
+    <BLANKLINE>
+      woman(0).
+      woman(1).
+    <BLANKLINE>
+    - love(0,0).
+      love(0,1).
+    - love(1,0).
+    - love(1,1).
+    <BLANKLINE>
+
+Here, we can see that ``adam`` and ``eve`` have been assigned the same
+individual, namely ``0`` as value; ``0`` is both a man and a woman; a second
+individual ``1`` is also a woman; and ``0`` loves ``1``. Thus, this is
+an interpretation in which there is a woman that every man loves but
+Adam doesn't love Eve.
+
+Mace can also be used with propositional logic.
+
+    >>> p = read_expr('P')
+    >>> q = read_expr('Q')
+    >>> mb = MaceCommand(q, [p, p>-q])
+    >>> mb.build_model()
+    True
+    >>> mb.valuation['P']
+    True
+    >>> mb.valuation['Q']
+    False
diff --git a/nlp_resource_data/nltk/test/inference_fixt.py b/nlp_resource_data/nltk/test/inference_fixt.py

new file mode 100644 (file)

index 0000000..5103cd9
--- /dev/null
+++ b/nlp_resource_data/nltk/test/inference_fixt.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+
+def setup_module(module):
+    from nose import SkipTest
+    from nltk.inference.mace import Mace
+
+    try:
+        m = Mace()
+        m._find_binary("mace4")
+    except LookupError:
+        raise SkipTest(
+            "Mace4/Prover9 is not available so inference.doctest was skipped"
+        )
diff --git a/nlp_resource_data/nltk/test/internals.doctest b/nlp_resource_data/nltk/test/internals.doctest

new file mode 100644 (file)

index 0000000..f906203
--- /dev/null
+++ b/nlp_resource_data/nltk/test/internals.doctest
@@ -0,0 +1,140 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==========================================
+ Unit tests for the nltk.utilities module
+==========================================
+
+overridden()
+~~~~~~~~~~~~
+    >>> from nltk.internals import overridden
+
+The typical use case is in defining methods for an interface or
+abstract base class, in such a way that subclasses don't have to
+implement all of the methods:
+
+    >>> class EaterI(object):
+    ...     '''Subclass must define eat() or batch_eat().'''
+    ...     def eat(self, food):
+    ...         if overridden(self.batch_eat):
+    ...             return self.batch_eat([food])[0]
+    ...         else:
+    ...             raise NotImplementedError()
+    ...     def batch_eat(self, foods):
+    ...         return [self.eat(food) for food in foods]
+
+As long as a subclass implements one method, it will be used to
+perform the other method:
+
+    >>> class GoodEater1(EaterI):
+    ...     def eat(self, food):
+    ...         return 'yum'
+    >>> GoodEater1().eat('steak')
+    'yum'
+    >>> GoodEater1().batch_eat(['steak', 'peas'])
+    ['yum', 'yum']
+
+    >>> class GoodEater2(EaterI):
+    ...     def batch_eat(self, foods):
+    ...         return ['yum' for food in foods]
+    >>> GoodEater2().eat('steak')
+    'yum'
+    >>> GoodEater2().batch_eat(['steak', 'peas'])
+    ['yum', 'yum']
+
+But if a subclass doesn't implement either one, then they'll get an
+error when they try to call them.  (nb this is better than infinite
+recursion):
+
+    >>> class BadEater1(EaterI):
+    ...     pass
+    >>> BadEater1().eat('steak')
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+    >>> BadEater1().batch_eat(['steak', 'peas'])
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+
+Trying to use the abstract base class itself will also result in an
+error:
+
+    >>> class EaterI(EaterI):
+    ...     pass
+    >>> EaterI().eat('steak')
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+    >>> EaterI().batch_eat(['steak', 'peas'])
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+
+It's ok to use intermediate abstract classes:
+
+    >>> class AbstractEater(EaterI):
+    ...     pass
+
+    >>> class GoodEater3(AbstractEater):
+    ...     def eat(self, food):
+    ...         return 'yum'
+    ...
+    >>> GoodEater3().eat('steak')
+    'yum'
+    >>> GoodEater3().batch_eat(['steak', 'peas'])
+    ['yum', 'yum']
+
+    >>> class GoodEater4(AbstractEater):
+    ...     def batch_eat(self, foods):
+    ...         return ['yum' for food in foods]
+    >>> GoodEater4().eat('steak')
+    'yum'
+    >>> GoodEater4().batch_eat(['steak', 'peas'])
+    ['yum', 'yum']
+
+    >>> class BadEater2(AbstractEater):
+    ...     pass
+    >>> BadEater2().eat('steak')
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+    >>> BadEater2().batch_eat(['steak', 'peas'])
+    Traceback (most recent call last):
+      . . .
+    NotImplementedError
+
+Here's some extra tests:
+
+    >>> class A(object):
+    ...     def f(x): pass
+    >>> class B(A):
+    ...     def f(x): pass
+    >>> class C(A): pass
+    >>> class D(B): pass
+
+    >>> overridden(A().f)
+    False
+    >>> overridden(B().f)
+    True
+    >>> overridden(C().f)
+    False
+    >>> overridden(D().f)
+    True
+
+It works for classic classes, too:
+
+    >>> class A:
+    ...     def f(x): pass
+    >>> class B(A):
+    ...     def f(x): pass
+    >>> class C(A): pass
+    >>> class D(B): pass
+    >>> overridden(A().f)
+    False
+    >>> overridden(B().f)
+    True
+    >>> overridden(C().f)
+    False
+    >>> overridden(D().f)
+    True
diff --git a/nlp_resource_data/nltk/test/japanese.doctest b/nlp_resource_data/nltk/test/japanese.doctest

new file mode 100644 (file)

index 0000000..f82af81
--- /dev/null
+++ b/nlp_resource_data/nltk/test/japanese.doctest
@@ -0,0 +1,48 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+============================
+Japanese Language Processing
+============================
+
+    >>> from nltk import *
+
+-------------
+Corpus Access
+-------------
+
+KNB Corpus
+----------
+
+    >>> from nltk.corpus import knbc
+
+Access the words: this should produce a list of strings:
+
+    >>> type(knbc.words()[0]) is not bytes
+    True
+
+Access the sentences: this should produce a list of lists of strings:
+
+    >>> type(knbc.sents()[0][0]) is not bytes
+    True
+
+Access the tagged words: this should produce a list of word, tag pairs:
+
+    >>> type(knbc.tagged_words()[0])
+    <... 'tuple'>
+
+Access the tagged sentences: this should produce a list of lists of word, tag pairs:
+
+    >>> type(knbc.tagged_sents()[0][0])
+    <... 'tuple'>
+
+
+JEITA Corpus
+------------
+
+    >>> from nltk.corpus import jeita
+
+Access the tagged words: this should produce a list of word, tag pairs, where a tag is a string:
+
+    >>> type(jeita.tagged_words()[0][1]) is not bytes
+    True
diff --git a/nlp_resource_data/nltk/test/lm.doctest b/nlp_resource_data/nltk/test/lm.doctest

new file mode 100644 (file)

index 0000000..c2a97c8
--- /dev/null
+++ b/nlp_resource_data/nltk/test/lm.doctest
@@ -0,0 +1,131 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+.. -*- coding: utf-8 -*-
+
+
+Regression Tests
+================
+
+
+Issue 167
+---------
+https://github.com/nltk/nltk/issues/167
+
+    >>> from nltk.corpus import brown
+    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
+    >>> ngram_order = 3
+    >>> train_data, vocab_data = padded_everygram_pipeline(
+    ...     ngram_order,
+    ...     brown.sents(categories="news")
+    ... )
+
+    >>> from nltk.lm import WittenBellInterpolated
+    >>> lm = WittenBellInterpolated(ngram_order)
+    >>> lm.fit(train_data, vocab_data)
+
+Sentence containing an unseen word should result in infinite entropy because
+Witten-Bell is based ultimately on MLE, which cannot handle unseen ngrams.
+Crucially, it shouldn't raise any exceptions for unseen words.
+
+    >>> from nltk.util import ngrams
+    >>> sent = ngrams("This is a sentence with the word aaddvark".split(), 3)
+    >>> lm.entropy(sent)
+    inf
+
+If we remove all unseen ngrams from the sentence, we'll get a non-infinite value
+for the entropy.
+
+    >>> sent = ngrams("This is a sentence".split(), 3)
+    >>> lm.entropy(sent)
+    17.41365588455936
+
+
+Issue 367
+---------
+https://github.com/nltk/nltk/issues/367
+
+Reproducing Dan Blanchard's example:
+https://github.com/nltk/nltk/issues/367#issuecomment-14646110
+
+    >>> from nltk.lm import Lidstone, Vocabulary
+    >>> word_seq = list('aaaababaaccbacb')
+    >>> ngram_order = 2
+    >>> from nltk.util import everygrams
+    >>> train_data = [everygrams(word_seq, max_len=ngram_order)]
+    >>> V = Vocabulary(['a', 'b', 'c', ''])
+    >>> lm = Lidstone(0.2, ngram_order, vocabulary=V)
+    >>> lm.fit(train_data)
+
+For doctest to work we have to sort the vocabulary keys.
+
+    >>> V_keys = sorted(V)
+    >>> round(sum(lm.score(w, ("b",)) for w in V_keys), 6)
+    1.0
+    >>> round(sum(lm.score(w, ("a",)) for w in V_keys), 6)
+    1.0
+
+    >>> [lm.score(w, ("b",)) for w in V_keys]
+    [0.05, 0.05, 0.8, 0.05, 0.05]
+    >>> [round(lm.score(w, ("a",)), 4) for w in V_keys]
+    [0.0222, 0.0222, 0.4667, 0.2444, 0.2444]
+
+
+Here's reproducing @afourney's comment:
+https://github.com/nltk/nltk/issues/367#issuecomment-15686289
+
+    >>> sent = ['foo', 'foo', 'foo', 'foo', 'bar', 'baz']
+    >>> ngram_order = 3
+    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
+    >>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, [sent])
+    >>> from nltk.lm import Lidstone
+    >>> lm = Lidstone(0.2, ngram_order)
+    >>> lm.fit(train_data, vocab_data)
+
+The vocabulary includes the "UNK" symbol as well as two padding symbols.
+
+    >>> len(lm.vocab)
+    6
+    >>> word = "foo"
+    >>> context = ("bar", "baz")
+
+The raw counts.
+
+    >>> lm.context_counts(context)[word]
+    0
+    >>> lm.context_counts(context).N()
+    1
+
+Counts with Lidstone smoothing.
+
+    >>> lm.context_counts(context)[word] + lm.gamma
+    0.2
+    >>> lm.context_counts(context).N() + len(lm.vocab) * lm.gamma
+    2.2
+
+Without any backoff, just using Lidstone smoothing, P("foo" | "bar", "baz") should be:
+0.2 / 2.2 ~= 0.090909
+
+    >>> round(lm.score(word, context), 6)
+    0.090909
+
+
+Issue 380
+---------
+https://github.com/nltk/nltk/issues/380
+
+Reproducing setup akin to this comment:
+https://github.com/nltk/nltk/issues/380#issue-12879030
+
+For speed take only the first 100 sentences of reuters. Shouldn't affect the test.
+    >>> from nltk.corpus import reuters
+    >>> sents = reuters.sents()[:100]
+    >>> ngram_order = 3
+    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
+    >>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, sents)
+
+    >>> from nltk.lm import Lidstone
+    >>> lm = Lidstone(0.2, ngram_order)
+    >>> lm.fit(train_data, vocab_data)
+    >>> lm.score("said", ("",)) < 1
+    True
diff --git a/nlp_resource_data/nltk/test/logic.doctest b/nlp_resource_data/nltk/test/logic.doctest

new file mode 100644 (file)

index 0000000..45c6429
--- /dev/null
+++ b/nlp_resource_data/nltk/test/logic.doctest
@@ -0,0 +1,1098 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=======================
+Logic & Lambda Calculus
+=======================
+
+The `nltk.logic` package allows expressions of First-Order Logic (FOL) to be
+parsed into ``Expression`` objects. In addition to FOL, the parser
+handles lambda-abstraction with variables of higher order.
+
+--------
+Overview
+--------
+
+    >>> from nltk.sem.logic import *
+
+The default inventory of logical constants is the following:
+
+    >>> boolean_ops() # doctest: +NORMALIZE_WHITESPACE
+    negation           -
+    conjunction        &
+    disjunction        |
+    implication        ->
+    equivalence        <->
+    >>> equality_preds() # doctest: +NORMALIZE_WHITESPACE
+    equality           =
+    inequality         !=
+    >>> binding_ops() # doctest: +NORMALIZE_WHITESPACE
+    existential        exists
+    universal          all
+    lambda             \
+
+----------------
+Regression Tests
+----------------
+
+
+Untyped Logic
++++++++++++++
+
+Process logical expressions conveniently:
+
+    >>> read_expr = Expression.fromstring
+
+Test for equality under alpha-conversion
+========================================
+
+    >>> e1 = read_expr('exists x.P(x)')
+    >>> print(e1)
+    exists x.P(x)
+    >>> e2 = e1.alpha_convert(Variable('z'))
+    >>> print(e2)
+    exists z.P(z)
+    >>> e1 == e2
+    True
+
+
+    >>> l = read_expr(r'\X.\X.X(X)(1)').simplify()
+    >>> id = read_expr(r'\X.X(X)')
+    >>> l == id
+    True
+
+Test numerals
+=============
+
+    >>> zero = read_expr(r'\F x.x')
+    >>> one = read_expr(r'\F x.F(x)')
+    >>> two = read_expr(r'\F x.F(F(x))')
+    >>> three = read_expr(r'\F x.F(F(F(x)))')
+    >>> four = read_expr(r'\F x.F(F(F(F(x))))')
+    >>> succ = read_expr(r'\N F x.F(N(F,x))')
+    >>> plus = read_expr(r'\M N F x.M(F,N(F,x))')
+    >>> mult = read_expr(r'\M N F.M(N(F))')
+    >>> pred = read_expr(r'\N F x.(N(\G H.H(G(F)))(\u.x)(\u.u))')
+    >>> v1 = ApplicationExpression(succ, zero).simplify()
+    >>> v1 == one
+    True
+    >>> v2 = ApplicationExpression(succ, v1).simplify()
+    >>> v2 == two
+    True
+    >>> v3 = ApplicationExpression(ApplicationExpression(plus, v1), v2).simplify()
+    >>> v3 == three
+    True
+    >>> v4 = ApplicationExpression(ApplicationExpression(mult, v2), v2).simplify()
+    >>> v4 == four
+    True
+    >>> v5 = ApplicationExpression(pred, ApplicationExpression(pred, v4)).simplify()
+    >>> v5 == two
+    True
+
+Overloaded operators also exist, for convenience.
+
+    >>> print(succ(zero).simplify() == one)
+    True
+    >>> print(plus(one,two).simplify() == three)
+    True
+    >>> print(mult(two,two).simplify() == four)
+    True
+    >>> print(pred(pred(four)).simplify() == two)
+    True
+
+    >>> john = read_expr(r'john')
+    >>> man = read_expr(r'\x.man(x)')
+    >>> walk = read_expr(r'\x.walk(x)')
+    >>> man(john).simplify()
+    <ApplicationExpression man(john)>
+    >>> print(-walk(john).simplify())
+    -walk(john)
+    >>> print((man(john) & walk(john)).simplify())
+    (man(john) & walk(john))
+    >>> print((man(john) | walk(john)).simplify())
+    (man(john) | walk(john))
+    >>> print((man(john) > walk(john)).simplify())
+    (man(john) -> walk(john))
+    >>> print((man(john) < walk(john)).simplify())
+    (man(john) <-> walk(john))
+
+Python's built-in lambda operator can also be used with Expressions
+
+    >>> john = VariableExpression(Variable('john'))
+    >>> run_var = VariableExpression(Variable('run'))
+    >>> run = lambda x: run_var(x)
+    >>> run(john)
+    <ApplicationExpression run(john)>
+
+
+``betaConversionTestSuite.pl``
+------------------------------
+
+Tests based on Blackburn & Bos' book, *Representation and Inference
+for Natural Language*.
+
+    >>> x1 = read_expr(r'\P.P(mia)(\x.walk(x))').simplify()
+    >>> x2 = read_expr(r'walk(mia)').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'exists x.(man(x) & ((\P.exists x.(woman(x) & P(x)))(\y.love(x,y))))').simplify()
+    >>> x2 = read_expr(r'exists x.(man(x) & exists y.(woman(y) & love(x,y)))').simplify()
+    >>> x1 == x2
+    True
+    >>> x1 = read_expr(r'\a.sleep(a)(mia)').simplify()
+    >>> x2 = read_expr(r'sleep(mia)').simplify()
+    >>> x1 == x2
+    True
+    >>> x1 = read_expr(r'\a.\b.like(b,a)(mia)').simplify()
+    >>> x2 = read_expr(r'\b.like(b,mia)').simplify()
+    >>> x1 == x2
+    True
+    >>> x1 = read_expr(r'\a.(\b.like(b,a)(vincent))').simplify()
+    >>> x2 = read_expr(r'\a.like(vincent,a)').simplify()
+    >>> x1 == x2
+    True
+    >>> x1 = read_expr(r'\a.((\b.like(b,a)(vincent)) & sleep(a))').simplify()
+    >>> x2 = read_expr(r'\a.(like(vincent,a) & sleep(a))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\a.\b.like(b,a)(mia)(vincent))').simplify()
+    >>> x2 = read_expr(r'like(vincent,mia)').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'P((\a.sleep(a)(vincent)))').simplify()
+    >>> x2 = read_expr(r'P(sleep(vincent))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'\A.A((\b.sleep(b)(vincent)))').simplify()
+    >>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'\A.A(sleep(vincent))').simplify()
+    >>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\A.A(vincent)(\b.sleep(b)))').simplify()
+    >>> x2 = read_expr(r'sleep(vincent)').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'\A.believe(mia,A(vincent))(\b.sleep(b))').simplify()
+    >>> x2 = read_expr(r'believe(mia,sleep(vincent))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\A.(A(vincent) & A(mia)))(\b.sleep(b))').simplify()
+    >>> x2 = read_expr(r'(sleep(vincent) & sleep(mia))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'\A.\B.(\C.C(A(vincent))(\d.probably(d)) & (\C.C(B(mia))(\d.improbably(d))))(\f.walk(f))(\f.talk(f))').simplify()
+    >>> x2 = read_expr(r'(probably(walk(vincent)) & improbably(talk(mia)))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\d.\f.love(d,f))))(jules)(mia)').simplify()
+    >>> x2 = read_expr(r'love(jules,mia)').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\A.\B.exists c.(A(c) & B(c)))(\d.boxer(d),\d.sleep(d))').simplify()
+    >>> x2 = read_expr(r'exists c.(boxer(c) & sleep(c))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'\A.Z(A)(\c.\a.like(a,c))').simplify()
+    >>> x2 = read_expr(r'Z(\c.\a.like(a,c))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'\A.\b.A(b)(\c.\b.like(b,c))').simplify()
+    >>> x2 = read_expr(r'\b.(\c.\b.like(b,c)(b))').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\b.\a.loves(b,a))))(jules)(mia)').simplify()
+    >>> x2 = read_expr(r'loves(jules,mia)').simplify()
+    >>> x1 == x2
+    True
+
+    >>> x1 = read_expr(r'(\A.\b.(exists b.A(b) & A(b)))(\c.boxer(c))(vincent)').simplify()
+    >>> x2 = read_expr(r'((exists b.boxer(b)) & boxer(vincent))').simplify()
+    >>> x1 == x2
+    True
+
+Test Parser
+===========
+
+    >>> print(read_expr(r'john'))
+    john
+    >>> print(read_expr(r'x'))
+    x
+    >>> print(read_expr(r'-man(x)'))
+    -man(x)
+    >>> print(read_expr(r'--man(x)'))
+    --man(x)
+    >>> print(read_expr(r'(man(x))'))
+    man(x)
+    >>> print(read_expr(r'((man(x)))'))
+    man(x)
+    >>> print(read_expr(r'man(x) <-> tall(x)'))
+    (man(x) <-> tall(x))
+    >>> print(read_expr(r'(man(x) <-> tall(x))'))
+    (man(x) <-> tall(x))
+    >>> print(read_expr(r'(man(x) & tall(x) & walks(x))'))
+    (man(x) & tall(x) & walks(x))
+    >>> print(read_expr(r'(man(x) & tall(x) & walks(x))').first)
+    (man(x) & tall(x))
+    >>> print(read_expr(r'man(x) | tall(x) & walks(x)'))
+    (man(x) | (tall(x) & walks(x)))
+    >>> print(read_expr(r'((man(x) & tall(x)) | walks(x))'))
+    ((man(x) & tall(x)) | walks(x))
+    >>> print(read_expr(r'man(x) & (tall(x) | walks(x))'))
+    (man(x) & (tall(x) | walks(x)))
+    >>> print(read_expr(r'(man(x) & (tall(x) | walks(x)))'))
+    (man(x) & (tall(x) | walks(x)))
+    >>> print(read_expr(r'P(x) -> Q(x) <-> R(x) | S(x) & T(x)'))
+    ((P(x) -> Q(x)) <-> (R(x) | (S(x) & T(x))))
+    >>> print(read_expr(r'exists x.man(x)'))
+    exists x.man(x)
+    >>> print(read_expr(r'exists x.(man(x) & tall(x))'))
+    exists x.(man(x) & tall(x))
+    >>> print(read_expr(r'exists x.(man(x) & tall(x) & walks(x))'))
+    exists x.(man(x) & tall(x) & walks(x))
+    >>> print(read_expr(r'-P(x) & Q(x)'))
+    (-P(x) & Q(x))
+    >>> read_expr(r'-P(x) & Q(x)') == read_expr(r'(-P(x)) & Q(x)')
+    True
+    >>> print(read_expr(r'\x.man(x)'))
+    \x.man(x)
+    >>> print(read_expr(r'\x.man(x)(john)'))
+    \x.man(x)(john)
+    >>> print(read_expr(r'\x.man(x)(john) & tall(x)'))
+    (\x.man(x)(john) & tall(x))
+    >>> print(read_expr(r'\x.\y.sees(x,y)'))
+    \x y.sees(x,y)
+    >>> print(read_expr(r'\x  y.sees(x,y)'))
+    \x y.sees(x,y)
+    >>> print(read_expr(r'\x.\y.sees(x,y)(a)'))
+    (\x y.sees(x,y))(a)
+    >>> print(read_expr(r'\x  y.sees(x,y)(a)'))
+    (\x y.sees(x,y))(a)
+    >>> print(read_expr(r'\x.\y.sees(x,y)(a)(b)'))
+    ((\x y.sees(x,y))(a))(b)
+    >>> print(read_expr(r'\x  y.sees(x,y)(a)(b)'))
+    ((\x y.sees(x,y))(a))(b)
+    >>> print(read_expr(r'\x.\y.sees(x,y)(a,b)'))
+    ((\x y.sees(x,y))(a))(b)
+    >>> print(read_expr(r'\x  y.sees(x,y)(a,b)'))
+    ((\x y.sees(x,y))(a))(b)
+    >>> print(read_expr(r'((\x.\y.sees(x,y))(a))(b)'))
+    ((\x y.sees(x,y))(a))(b)
+    >>> print(read_expr(r'P(x)(y)(z)'))
+    P(x,y,z)
+    >>> print(read_expr(r'P(Q)'))
+    P(Q)
+    >>> print(read_expr(r'P(Q(x))'))
+    P(Q(x))
+    >>> print(read_expr(r'(\x.exists y.walks(x,y))(x)'))
+    (\x.exists y.walks(x,y))(x)
+    >>> print(read_expr(r'exists x.(x = john)'))
+    exists x.(x = john)
+    >>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))'))
+    ((\P Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))
+    >>> a = read_expr(r'exists c.exists b.A(b,c) & A(b,c)')
+    >>> b = read_expr(r'(exists c.(exists b.A(b,c))) & A(b,c)')
+    >>> print(a == b)
+    True
+    >>> a = read_expr(r'exists c.(exists b.A(b,c) & A(b,c))')
+    >>> b = read_expr(r'exists c.((exists b.A(b,c)) & A(b,c))')
+    >>> print(a == b)
+    True
+    >>> print(read_expr(r'exists x.x = y'))
+    exists x.(x = y)
+    >>> print(read_expr('A(B)(C)'))
+    A(B,C)
+    >>> print(read_expr('(A(B))(C)'))
+    A(B,C)
+    >>> print(read_expr('A((B)(C))'))
+    A(B(C))
+    >>> print(read_expr('A(B(C))'))
+    A(B(C))
+    >>> print(read_expr('(A)(B(C))'))
+    A(B(C))
+    >>> print(read_expr('(((A)))(((B))(((C))))'))
+    A(B(C))
+    >>> print(read_expr(r'A != B'))
+    -(A = B)
+    >>> print(read_expr('P(x) & x=y & P(y)'))
+    (P(x) & (x = y) & P(y))
+    >>> try: print(read_expr(r'\walk.walk(x)'))
+    ... except LogicalExpressionException as e: print(e)
+    'walk' is an illegal variable name.  Constants may not be abstracted.
+    \walk.walk(x)
+     ^
+    >>> try: print(read_expr(r'all walk.walk(john)'))
+    ... except LogicalExpressionException as e: print(e)
+    'walk' is an illegal variable name.  Constants may not be quantified.
+    all walk.walk(john)
+        ^
+    >>> try: print(read_expr(r'x(john)'))
+    ... except LogicalExpressionException as e: print(e)
+    'x' is an illegal predicate name.  Individual variables may not be used as predicates.
+    x(john)
+    ^
+
+    >>> from nltk.sem.logic import LogicParser # hack to give access to custom quote chars
+    >>> lpq = LogicParser()
+    >>> lpq.quote_chars = [("'", "'", "\\", False)]
+    >>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
+    (man(x) & tall's,(x) & walks(x))
+    >>> lpq.quote_chars = [("'", "'", "\\", True)]
+    >>> print(lpq.parse(r"'tall\'s,'"))
+    'tall\'s,'
+    >>> print(lpq.parse(r"'spaced name(x)'"))
+    'spaced name(x)'
+    >>> print(lpq.parse(r"-'tall\'s,'(x)"))
+    -'tall\'s,'(x)
+    >>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
+    (man(x) & 'tall\'s,'(x) & walks(x))
+
+
+Simplify
+========
+
+    >>> print(read_expr(r'\x.man(x)(john)').simplify())
+    man(john)
+    >>> print(read_expr(r'\x.((man(x)))(john)').simplify())
+    man(john)
+    >>> print(read_expr(r'\x.\y.sees(x,y)(john, mary)').simplify())
+    sees(john,mary)
+    >>> print(read_expr(r'\x  y.sees(x,y)(john, mary)').simplify())
+    sees(john,mary)
+    >>> print(read_expr(r'\x.\y.sees(x,y)(john)(mary)').simplify())
+    sees(john,mary)
+    >>> print(read_expr(r'\x  y.sees(x,y)(john)(mary)').simplify())
+    sees(john,mary)
+    >>> print(read_expr(r'\x.\y.sees(x,y)(john)').simplify())
+    \y.sees(john,y)
+    >>> print(read_expr(r'\x  y.sees(x,y)(john)').simplify())
+    \y.sees(john,y)
+    >>> print(read_expr(r'(\x.\y.sees(x,y)(john))(mary)').simplify())
+    sees(john,mary)
+    >>> print(read_expr(r'(\x  y.sees(x,y)(john))(mary)').simplify())
+    sees(john,mary)
+    >>> print(read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify())
+    exists x.(man(x) & exists y.walks(x,y))
+    >>> e1 = read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(y))').simplify()
+    >>> e2 = read_expr(r'exists x.(man(x) & exists z1.walks(y,z1))')
+    >>> e1 == e2
+    True
+    >>> print(read_expr(r'(\P Q.exists x.(P(x) & Q(x)))(\x.dog(x))').simplify())
+    \Q.exists x.(dog(x) & Q(x))
+    >>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))').simplify())
+    exists x.(dog(x) & bark(x))
+    >>> print(read_expr(r'\P.(P(x)(y))(\a b.Q(a,b))').simplify())
+    Q(x,y)
+
+Replace
+=======
+
+    >>> a = read_expr(r'a')
+    >>> x = read_expr(r'x')
+    >>> y = read_expr(r'y')
+    >>> z = read_expr(r'z')
+
+    >>> print(read_expr(r'man(x)').replace(x.variable, a, False))
+    man(a)
+    >>> print(read_expr(r'(man(x) & tall(x))').replace(x.variable, a, False))
+    (man(a) & tall(a))
+    >>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, False))
+    exists x.man(x)
+    >>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, True))
+    exists a.man(a)
+    >>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, False))
+    exists x.give(x,a,z)
+    >>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, True))
+    exists x.give(x,a,z)
+    >>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, False)
+    >>> e2 = read_expr(r'exists z1.give(z1,x,z)')
+    >>> e1 == e2
+    True
+    >>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, True)
+    >>> e2 = read_expr(r'exists z1.give(z1,x,z)')
+    >>> e1 == e2
+    True
+    >>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, False))
+    \x y z.give(x,y,z)
+    >>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, True))
+    \x a z.give(x,a,z)
+    >>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, False))
+    \x y.give(x,y,a)
+    >>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, True))
+    \x y.give(x,y,a)
+    >>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, False)
+    >>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
+    >>> e1 == e2
+    True
+    >>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, True)
+    >>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
+    >>> e1 == e2
+    True
+    >>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, False))
+    \x.give(x,y,y)
+    >>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, True))
+    \x.give(x,y,y)
+
+    >>> from nltk.sem import logic
+    >>> logic._counter._value = 0
+    >>> e1 = read_expr('e1')
+    >>> e2 = read_expr('e2')
+    >>> print(read_expr('exists e1 e2.(walk(e1) & talk(e2))').replace(e1.variable, e2, True))
+    exists e2 e01.(walk(e2) & talk(e01))
+
+
+Variables / Free
+================
+
+    >>> examples = [r'walk(john)',
+    ...             r'walk(x)',
+    ...             r'?vp(?np)',
+    ...             r'see(john,mary)',
+    ...             r'exists x.walk(x)',
+    ...             r'\x.see(john,x)',
+    ...             r'\x.see(john,x)(mary)',
+    ...             r'P(x)',
+    ...             r'\P.P(x)',
+    ...             r'aa(x,bb(y),cc(z),P(w),u)',
+    ...             r'bo(?det(?n),@x)']
+    >>> examples = [read_expr(e) for e in examples]
+
+    >>> for e in examples:
+    ...     print('%-25s' % e, sorted(e.free()))
+    walk(john)                []
+    walk(x)                   [Variable('x')]
+    ?vp(?np)                  []
+    see(john,mary)            []
+    exists x.walk(x)          []
+    \x.see(john,x)            []
+    (\x.see(john,x))(mary)    []
+    P(x)                      [Variable('P'), Variable('x')]
+    \P.P(x)                   [Variable('x')]
+    aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
+    bo(?det(?n),@x)           []
+
+    >>> for e in examples:
+    ...     print('%-25s' % e, sorted(e.constants()))
+    walk(john)                [Variable('john')]
+    walk(x)                   []
+    ?vp(?np)                  [Variable('?np')]
+    see(john,mary)            [Variable('john'), Variable('mary')]
+    exists x.walk(x)          []
+    \x.see(john,x)            [Variable('john')]
+    (\x.see(john,x))(mary)    [Variable('john'), Variable('mary')]
+    P(x)                      []
+    \P.P(x)                   []
+    aa(x,bb(y),cc(z),P(w),u)  []
+    bo(?det(?n),@x)           [Variable('?n'), Variable('@x')]
+
+    >>> for e in examples:
+    ...     print('%-25s' % e, sorted(e.predicates()))
+    walk(john)                [Variable('walk')]
+    walk(x)                   [Variable('walk')]
+    ?vp(?np)                  [Variable('?vp')]
+    see(john,mary)            [Variable('see')]
+    exists x.walk(x)          [Variable('walk')]
+    \x.see(john,x)            [Variable('see')]
+    (\x.see(john,x))(mary)    [Variable('see')]
+    P(x)                      []
+    \P.P(x)                   []
+    aa(x,bb(y),cc(z),P(w),u)  [Variable('aa'), Variable('bb'), Variable('cc')]
+    bo(?det(?n),@x)           [Variable('?det'), Variable('bo')]
+
+    >>> for e in examples:
+    ...     print('%-25s' % e, sorted(e.variables()))
+    walk(john)                []
+    walk(x)                   [Variable('x')]
+    ?vp(?np)                  [Variable('?np'), Variable('?vp')]
+    see(john,mary)            []
+    exists x.walk(x)          []
+    \x.see(john,x)            []
+    (\x.see(john,x))(mary)    []
+    P(x)                      [Variable('P'), Variable('x')]
+    \P.P(x)                   [Variable('x')]
+    aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
+    bo(?det(?n),@x)           [Variable('?det'), Variable('?n'), Variable('@x')]
+
+
+
+`normalize`
+    >>> print(read_expr(r'\e083.(walk(e083, z472) & talk(e092, z938))').normalize())
+    \e01.(walk(e01,z3) & talk(e02,z4))
+
+Typed Logic
++++++++++++
+
+    >>> from nltk.sem.logic import LogicParser
+    >>> tlp = LogicParser(True)
+    >>> print(tlp.parse(r'man(x)').type)
+    ?
+    >>> print(tlp.parse(r'walk(angus)').type)
+    ?
+    >>> print(tlp.parse(r'-man(x)').type)
+    t
+    >>> print(tlp.parse(r'(man(x) <-> tall(x))').type)
+    t
+    >>> print(tlp.parse(r'exists x.(man(x) & tall(x))').type)
+    t
+    >>> print(tlp.parse(r'\x.man(x)').type)
+    <e,?>
+    >>> print(tlp.parse(r'john').type)
+    e
+    >>> print(tlp.parse(r'\x y.sees(x,y)').type)
+    <e,<e,?>>
+    >>> print(tlp.parse(r'\x.man(x)(john)').type)
+    ?
+    >>> print(tlp.parse(r'\x.\y.sees(x,y)(john)').type)
+    <e,?>
+    >>> print(tlp.parse(r'\x.\y.sees(x,y)(john)(mary)').type)
+    ?
+    >>> print(tlp.parse(r'\P.\Q.exists x.(P(x) & Q(x))').type)
+    <<e,t>,<<e,t>,t>>
+    >>> print(tlp.parse(r'\x.y').type)
+    <?,e>
+    >>> print(tlp.parse(r'\P.P(x)').type)
+    <<e,?>,?>
+
+    >>> parsed = tlp.parse('see(john,mary)')
+    >>> print(parsed.type)
+    ?
+    >>> print(parsed.function)
+    see(john)
+    >>> print(parsed.function.type)
+    <e,?>
+    >>> print(parsed.function.function)
+    see
+    >>> print(parsed.function.function.type)
+    <e,<e,?>>
+
+    >>> parsed = tlp.parse('P(x,y)')
+    >>> print(parsed)
+    P(x,y)
+    >>> print(parsed.type)
+    ?
+    >>> print(parsed.function)
+    P(x)
+    >>> print(parsed.function.type)
+    <e,?>
+    >>> print(parsed.function.function)
+    P
+    >>> print(parsed.function.function.type)
+    <e,<e,?>>
+
+    >>> print(tlp.parse(r'P').type)
+    ?
+
+    >>> print(tlp.parse(r'P', {'P': 't'}).type)
+    t
+
+    >>> a = tlp.parse(r'P(x)')
+    >>> print(a.type)
+    ?
+    >>> print(a.function.type)
+    <e,?>
+    >>> print(a.argument.type)
+    e
+
+    >>> a = tlp.parse(r'-P(x)')
+    >>> print(a.type)
+    t
+    >>> print(a.term.type)
+    t
+    >>> print(a.term.function.type)
+    <e,t>
+    >>> print(a.term.argument.type)
+    e
+
+    >>> a = tlp.parse(r'P & Q')
+    >>> print(a.type)
+    t
+    >>> print(a.first.type)
+    t
+    >>> print(a.second.type)
+    t
+
+    >>> a = tlp.parse(r'(P(x) & Q(x))')
+    >>> print(a.type)
+    t
+    >>> print(a.first.type)
+    t
+    >>> print(a.first.function.type)
+    <e,t>
+    >>> print(a.first.argument.type)
+    e
+    >>> print(a.second.type)
+    t
+    >>> print(a.second.function.type)
+    <e,t>
+    >>> print(a.second.argument.type)
+    e
+
+    >>> a = tlp.parse(r'\x.P(x)')
+    >>> print(a.type)
+    <e,?>
+    >>> print(a.term.function.type)
+    <e,?>
+    >>> print(a.term.argument.type)
+    e
+
+    >>> a = tlp.parse(r'\P.P(x)')
+    >>> print(a.type)
+    <<e,?>,?>
+    >>> print(a.term.function.type)
+    <e,?>
+    >>> print(a.term.argument.type)
+    e
+
+    >>> a = tlp.parse(r'(\x.P(x)(john)) & Q(x)')
+    >>> print(a.type)
+    t
+    >>> print(a.first.type)
+    t
+    >>> print(a.first.function.type)
+    <e,t>
+    >>> print(a.first.function.term.function.type)
+    <e,t>
+    >>> print(a.first.function.term.argument.type)
+    e
+    >>> print(a.first.argument.type)
+    e
+
+    >>> a = tlp.parse(r'\x y.P(x,y)(john)(mary) & Q(x)')
+    >>> print(a.type)
+    t
+    >>> print(a.first.type)
+    t
+    >>> print(a.first.function.type)
+    <e,t>
+    >>> print(a.first.function.function.type)
+    <e,<e,t>>
+
+    >>> a = tlp.parse(r'--P')
+    >>> print(a.type)
+    t
+    >>> print(a.term.type)
+    t
+    >>> print(a.term.term.type)
+    t
+
+    >>> tlp.parse(r'\x y.P(x,y)').type
+    <e,<e,?>>
+    >>> tlp.parse(r'\x y.P(x,y)', {'P': '<e,<e,t>>'}).type
+    <e,<e,t>>
+
+    >>> a = tlp.parse(r'\P y.P(john,y)(\x y.see(x,y))')
+    >>> a.type
+    <e,?>
+    >>> a.function.type
+    <<e,<e,?>>,<e,?>>
+    >>> a.function.term.term.function.function.type
+    <e,<e,?>>
+    >>> a.argument.type
+    <e,<e,?>>
+
+    >>> a = tlp.parse(r'exists c f.(father(c) = f)')
+    >>> a.type
+    t
+    >>> a.term.term.type
+    t
+    >>> a.term.term.first.type
+    e
+    >>> a.term.term.first.function.type
+    <e,e>
+    >>> a.term.term.second.type
+    e
+
+typecheck()
+
+    >>> a = tlp.parse('P(x)')
+    >>> b = tlp.parse('Q(x)')
+    >>> a.type
+    ?
+    >>> c = a & b
+    >>> c.first.type
+    ?
+    >>> c.typecheck() # doctest: +ELLIPSIS
+    {...}
+    >>> c.first.type
+    t
+
+    >>> a = tlp.parse('P(x)')
+    >>> b = tlp.parse('P(x) & Q(x)')
+    >>> a.type
+    ?
+    >>> typecheck([a,b]) # doctest: +ELLIPSIS
+    {...}
+    >>> a.type
+    t
+
+    >>> e = tlp.parse(r'man(x)')
+    >>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,?>'})
+    True
+    >>> sig = {'man': '<e, t>'}
+    >>> e = tlp.parse(r'man(x)', sig)
+    >>> print(e.function.type)
+    <e,t>
+    >>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,t>'})
+    True
+    >>> print(e.function.type)
+    <e,t>
+    >>> print(dict((k,str(v)) for k,v in e.typecheck(sig).items()) == {'x': 'e', 'man': '<e,t>'})
+    True
+
+findtype()
+
+    >>> print(tlp.parse(r'man(x)').findtype(Variable('man')))
+    <e,?>
+    >>> print(tlp.parse(r'see(x,y)').findtype(Variable('see')))
+    <e,<e,?>>
+    >>> print(tlp.parse(r'P(Q(R(x)))').findtype(Variable('Q')))
+    ?
+
+reading types from strings
+
+    >>> Type.fromstring('e')
+    e
+    >>> Type.fromstring('<e,t>')
+    <e,t>
+    >>> Type.fromstring('<<e,t>,<e,t>>')
+    <<e,t>,<e,t>>
+    >>> Type.fromstring('<<e,?>,?>')
+    <<e,?>,?>
+
+alternative type format
+
+    >>> Type.fromstring('e').str()
+    'IND'
+    >>> Type.fromstring('<e,?>').str()
+    '(IND -> ANY)'
+    >>> Type.fromstring('<<e,t>,t>').str()
+    '((IND -> BOOL) -> BOOL)'
+
+Type.__eq__()
+
+    >>> from nltk.sem.logic import *
+
+    >>> e = ENTITY_TYPE
+    >>> t = TRUTH_TYPE
+    >>> a = ANY_TYPE
+    >>> et = ComplexType(e,t)
+    >>> eet = ComplexType(e,ComplexType(e,t))
+    >>> at = ComplexType(a,t)
+    >>> ea = ComplexType(e,a)
+    >>> aa = ComplexType(a,a)
+
+    >>> e == e
+    True
+    >>> t == t
+    True
+    >>> e == t
+    False
+    >>> a == t
+    False
+    >>> t == a
+    False
+    >>> a == a
+    True
+    >>> et == et
+    True
+    >>> a == et
+    False
+    >>> et == a
+    False
+    >>> a == ComplexType(a,aa)
+    True
+    >>> ComplexType(a,aa) == a
+    True
+
+matches()
+
+    >>> e.matches(t)
+    False
+    >>> a.matches(t)
+    True
+    >>> t.matches(a)
+    True
+    >>> a.matches(et)
+    True
+    >>> et.matches(a)
+    True
+    >>> ea.matches(eet)
+    True
+    >>> eet.matches(ea)
+    True
+    >>> aa.matches(et)
+    True
+    >>> aa.matches(t)
+    True
+
+Type error during parsing
+=========================
+
+    >>> try: print(tlp.parse(r'exists x y.(P(x) & P(x,y))'))
+    ... except InconsistentTypeHierarchyException as e: print(e)
+    The variable 'P' was found in multiple places with different types.
+    >>> try: tlp.parse(r'\x y.see(x,y)(\x.man(x))')
+    ... except TypeException as e: print(e)
+    The function '\x y.see(x,y)' is of type '<e,<e,?>>' and cannot be applied to '\x.man(x)' of type '<e,?>'.  Its argument must match type 'e'.
+    >>> try: tlp.parse(r'\P x y.-P(x,y)(\x.-man(x))')
+    ... except TypeException as e: print(e)
+    The function '\P x y.-P(x,y)' is of type '<<e,<e,t>>,<e,<e,t>>>' and cannot be applied to '\x.-man(x)' of type '<e,t>'.  Its argument must match type '<e,<e,t>>'.
+
+    >>> a = tlp.parse(r'-talk(x)')
+    >>> signature = a.typecheck()
+    >>> try: print(tlp.parse(r'-talk(x,y)', signature))
+    ... except InconsistentTypeHierarchyException as e: print(e)
+    The variable 'talk' was found in multiple places with different types.
+
+    >>> a = tlp.parse(r'-P(x)')
+    >>> b = tlp.parse(r'-P(x,y)')
+    >>> a.typecheck() # doctest: +ELLIPSIS
+    {...}
+    >>> b.typecheck() # doctest: +ELLIPSIS
+    {...}
+    >>> try: typecheck([a,b])
+    ... except InconsistentTypeHierarchyException as e: print(e)
+    The variable 'P' was found in multiple places with different types.
+
+    >>> a = tlp.parse(r'P(x)')
+    >>> b = tlp.parse(r'P(x,y)')
+    >>> signature = {'P': '<e,t>'}
+    >>> a.typecheck(signature) # doctest: +ELLIPSIS
+    {...}
+    >>> try: typecheck([a,b], signature)
+    ... except InconsistentTypeHierarchyException as e: print(e)
+    The variable 'P' was found in multiple places with different types.
+
+Parse errors
+============
+
+    >>> try: read_expr(r'')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    <BLANKLINE>
+    ^
+    >>> try: read_expr(r'(')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    (
+     ^
+    >>> try: read_expr(r')')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    )
+    ^
+    >>> try: read_expr(r'()')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    ()
+     ^
+    >>> try: read_expr(r'(P(x) & Q(x)')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expected token ')'.
+    (P(x) & Q(x)
+                ^
+    >>> try: read_expr(r'(P(x) &')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    (P(x) &
+           ^
+    >>> try: read_expr(r'(P(x) | )')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    (P(x) | )
+            ^
+    >>> try: read_expr(r'P(x) ->')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    P(x) ->
+           ^
+    >>> try: read_expr(r'P(x')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expected token ')'.
+    P(x
+       ^
+    >>> try: read_expr(r'P(x,')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    P(x,
+        ^
+    >>> try: read_expr(r'P(x,)')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    P(x,)
+        ^
+    >>> try: read_expr(r'exists')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Variable and Expression expected following quantifier 'exists'.
+    exists
+           ^
+    >>> try: read_expr(r'exists x')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    exists x
+             ^
+    >>> try: read_expr(r'exists x.')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    exists x.
+             ^
+    >>> try: read_expr(r'\  ')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Variable and Expression expected following lambda operator.
+    \
+      ^
+    >>> try: read_expr(r'\ x')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    \ x
+        ^
+    >>> try: read_expr(r'\ x y')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    \ x y
+          ^
+    >>> try: read_expr(r'\ x.')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    \ x.
+        ^
+    >>> try: read_expr(r'P(x)Q(x)')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: 'Q'.
+    P(x)Q(x)
+        ^
+    >>> try: read_expr(r'(P(x)Q(x)')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: 'Q'.  Expected token ')'.
+    (P(x)Q(x)
+         ^
+    >>> try: read_expr(r'exists x y')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    exists x y
+               ^
+    >>> try: read_expr(r'exists x y.')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expression expected.
+    exists x y.
+               ^
+    >>> try: read_expr(r'exists x -> y')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: '->'.  Expression expected.
+    exists x -> y
+             ^
+
+
+    >>> try: read_expr(r'A -> ((P(x) & Q(x)) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expected token ')'.
+    A -> ((P(x) & Q(x)) -> Z
+                            ^
+    >>> try: read_expr(r'A -> ((P(x) &) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> ((P(x) &) -> Z
+                 ^
+    >>> try: read_expr(r'A -> ((P(x) | )) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> ((P(x) | )) -> Z
+                  ^
+    >>> try: read_expr(r'A -> (P(x) ->) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (P(x) ->) -> Z
+                 ^
+    >>> try: read_expr(r'A -> (P(x) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    End of input found.  Expected token ')'.
+    A -> (P(x) -> Z
+                   ^
+    >>> try: read_expr(r'A -> (P(x,) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (P(x,) -> Z
+              ^
+    >>> try: read_expr(r'A -> (P(x,)) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (P(x,)) -> Z
+              ^
+    >>> try: read_expr(r'A -> (exists) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    ')' is an illegal variable name.  Constants may not be quantified.
+    A -> (exists) -> Z
+                ^
+    >>> try: read_expr(r'A -> (exists x) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (exists x) -> Z
+                  ^
+    >>> try: read_expr(r'A -> (exists x.) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (exists x.) -> Z
+                   ^
+    >>> try: read_expr(r'A -> (\  ) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    ')' is an illegal variable name.  Constants may not be abstracted.
+    A -> (\  ) -> Z
+             ^
+    >>> try: read_expr(r'A -> (\ x) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (\ x) -> Z
+             ^
+    >>> try: read_expr(r'A -> (\ x y) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (\ x y) -> Z
+               ^
+    >>> try: read_expr(r'A -> (\ x.) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (\ x.) -> Z
+              ^
+    >>> try: read_expr(r'A -> (P(x)Q(x)) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: 'Q'.  Expected token ')'.
+    A -> (P(x)Q(x)) -> Z
+              ^
+    >>> try: read_expr(r'A -> ((P(x)Q(x)) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: 'Q'.  Expected token ')'.
+    A -> ((P(x)Q(x)) -> Z
+               ^
+    >>> try: read_expr(r'A -> (all x y) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (all x y) -> Z
+                 ^
+    >>> try: read_expr(r'A -> (exists x y.) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: ')'.  Expression expected.
+    A -> (exists x y.) -> Z
+                     ^
+    >>> try: read_expr(r'A -> (exists x -> y) -> Z')
+    ... except LogicalExpressionException as e: print(e)
+    Unexpected token: '->'.  Expression expected.
+    A -> (exists x -> y) -> Z
+                   ^
+
+
diff --git a/nlp_resource_data/nltk/test/meteor.doctest b/nlp_resource_data/nltk/test/meteor.doctest

new file mode 100644 (file)

index 0000000..7544d25
--- /dev/null
+++ b/nlp_resource_data/nltk/test/meteor.doctest
@@ -0,0 +1,45 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+.. -*- coding: utf-8 -*-
+
+=============
+METEOR tests
+=============
+
+No Allignment test
+------------------
+
+    >>> from nltk.translate import meteor
+
+If the candidate has no alignment to any of the references, the METEOR score is 0.
+
+    >>> round(meteor(
+    ...     ['The candidate has no alignment to any of the references'],
+    ...     'John loves Mary'
+    ... ),4)
+    0.0
+
+Tests based on wikipedia examples
+---------------------------------
+
+Testing on `wikipedia examples <https://en.wikipedia.org/wiki/METEOR#Examples>`_
+
+    >>> same_res = round(meteor(
+    ...       ['The cat sat on the mat'], 
+    ...       'The cat sat on the mat'
+    ...       ),4)
+    >>> abs(same_res - 0.9977) < 1e-2
+    True
+
+    >>> meteor(
+    ...       ['The cat sat on the mat'], 
+    ...       'on the mat sat the cat'
+    ...       )
+    0.5
+
+    >>> round(meteor(
+    ...       ['The cat sat on the mat'], 
+    ...       'The cat was sat on the mat'
+    ...       ),4)
+    0.9654
diff --git a/nlp_resource_data/nltk/test/metrics.doctest b/nlp_resource_data/nltk/test/metrics.doctest

new file mode 100644 (file)

index 0000000..5ff9877
--- /dev/null
+++ b/nlp_resource_data/nltk/test/metrics.doctest
@@ -0,0 +1,292 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=======
+Metrics
+=======
+
+The `nltk.metrics` package provides a variety of *evaluation measures*
+which can be used for a wide variety of NLP tasks.
+
+   >>> from nltk.metrics import *
+
+------------------
+Standard IR Scores
+------------------
+
+We can use standard scores from information retrieval to test the
+performance of taggers, chunkers, etc.
+
+    >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
+    >>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
+    >>> print(accuracy(reference, test))
+    0.8
+
+
+The following measures apply to sets:
+
+    >>> reference_set = set(reference)
+    >>> test_set = set(test)
+    >>> precision(reference_set, test_set)
+    1.0
+    >>> print(recall(reference_set, test_set))
+    0.8
+    >>> print(f_measure(reference_set, test_set))
+    0.88888888888...
+
+Measuring the likelihood of the data, given probability distributions:
+
+    >>> from nltk import FreqDist, MLEProbDist
+    >>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf"))
+    >>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss"))
+    >>> print(log_likelihood(['a', 'd'], [pdist1, pdist2]))
+    -2.7075187496...
+
+
+----------------
+Distance Metrics
+----------------
+
+String edit distance (Levenshtein):
+
+    >>> edit_distance("rain", "shine")
+    3
+    >>> edit_distance_align("shine", "shine")
+    [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
+    >>> edit_distance_align("rain", "brainy")
+    [(0, 0), (1, 1), (1, 2), (2, 3), (3, 4), (4, 5), (4, 6)]
+    >>> edit_distance_align("", "brainy")
+    [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6)]
+    >>> edit_distance_align("", "")
+    [(0, 0)]
+
+Other distance measures:
+
+    >>> s1 = set([1,2,3,4])
+    >>> s2 = set([3,4,5])
+    >>> binary_distance(s1, s2)
+    1.0
+    >>> print(jaccard_distance(s1, s2))
+    0.6
+    >>> print(masi_distance(s1, s2))
+    0.868
+
+----------------------
+Miscellaneous Measures
+----------------------
+
+Rank Correlation works with two dictionaries mapping keys to ranks.
+The dictionaries should have the same set of keys.
+
+    >>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3})
+    0.5
+
+Windowdiff uses a sliding window in comparing two segmentations of the same input (e.g. tokenizations, chunkings).
+Segmentations are represented using strings of zeros and ones.
+
+    >>> s1 = "000100000010"
+    >>> s2 = "000010000100"
+    >>> s3 = "100000010000"
+    >>> s4 = "000000000000"
+    >>> s5 = "111111111111"
+    >>> windowdiff(s1, s1, 3)
+    0.0
+    >>> abs(windowdiff(s1, s2, 3) - 0.3)  < 1e-6  # windowdiff(s1, s2, 3) == 0.3
+    True
+    >>> abs(windowdiff(s2, s3, 3) - 0.8)  < 1e-6  # windowdiff(s2, s3, 3) == 0.8
+    True
+    >>> windowdiff(s1, s4, 3)
+    0.5
+    >>> windowdiff(s1, s5, 3)
+    1.0
+
+----------------
+Confusion Matrix
+----------------
+
+    >>> reference = 'This is the reference data.  Testing 123.  aoaeoeoe'
+    >>> test =      'Thos iz_the rifirenci data.  Testeng 123.  aoaeoeoe'
+    >>> print(ConfusionMatrix(reference, test))
+      |   . 1 2 3 T _ a c d e f g h i n o r s t z |
+    --+-------------------------------------------+
+      |<8>. . . . . 1 . . . . . . . . . . . . . . |
+    . | .<2>. . . . . . . . . . . . . . . . . . . |
+    1 | . .<1>. . . . . . . . . . . . . . . . . . |
+    2 | . . .<1>. . . . . . . . . . . . . . . . . |
+    3 | . . . .<1>. . . . . . . . . . . . . . . . |
+    T | . . . . .<2>. . . . . . . . . . . . . . . |
+    _ | . . . . . .<.>. . . . . . . . . . . . . . |
+    a | . . . . . . .<4>. . . . . . . . . . . . . |
+    c | . . . . . . . .<1>. . . . . . . . . . . . |
+    d | . . . . . . . . .<1>. . . . . . . . . . . |
+    e | . . . . . . . . . .<6>. . . 3 . . . . . . |
+    f | . . . . . . . . . . .<1>. . . . . . . . . |
+    g | . . . . . . . . . . . .<1>. . . . . . . . |
+    h | . . . . . . . . . . . . .<2>. . . . . . . |
+    i | . . . . . . . . . . 1 . . .<1>. 1 . . . . |
+    n | . . . . . . . . . . . . . . .<2>. . . . . |
+    o | . . . . . . . . . . . . . . . .<3>. . . . |
+    r | . . . . . . . . . . . . . . . . .<2>. . . |
+    s | . . . . . . . . . . . . . . . . . .<2>. 1 |
+    t | . . . . . . . . . . . . . . . . . . .<3>. |
+    z | . . . . . . . . . . . . . . . . . . . .<.>|
+    --+-------------------------------------------+
+    (row = reference; col = test)
+    <BLANKLINE>
+
+    >>> cm = ConfusionMatrix(reference, test)
+    >>> print(cm.pretty_format(sort_by_count=True))
+      |   e a i o s t . T h n r 1 2 3 c d f g _ z |
+    --+-------------------------------------------+
+      |<8>. . . . . . . . . . . . . . . . . . 1 . |
+    e | .<6>. 3 . . . . . . . . . . . . . . . . . |
+    a | . .<4>. . . . . . . . . . . . . . . . . . |
+    i | . 1 .<1>1 . . . . . . . . . . . . . . . . |
+    o | . . . .<3>. . . . . . . . . . . . . . . . |
+    s | . . . . .<2>. . . . . . . . . . . . . . 1 |
+    t | . . . . . .<3>. . . . . . . . . . . . . . |
+    . | . . . . . . .<2>. . . . . . . . . . . . . |
+    T | . . . . . . . .<2>. . . . . . . . . . . . |
+    h | . . . . . . . . .<2>. . . . . . . . . . . |
+    n | . . . . . . . . . .<2>. . . . . . . . . . |
+    r | . . . . . . . . . . .<2>. . . . . . . . . |
+    1 | . . . . . . . . . . . .<1>. . . . . . . . |
+    2 | . . . . . . . . . . . . .<1>. . . . . . . |
+    3 | . . . . . . . . . . . . . .<1>. . . . . . |
+    c | . . . . . . . . . . . . . . .<1>. . . . . |
+    d | . . . . . . . . . . . . . . . .<1>. . . . |
+    f | . . . . . . . . . . . . . . . . .<1>. . . |
+    g | . . . . . . . . . . . . . . . . . .<1>. . |
+    _ | . . . . . . . . . . . . . . . . . . .<.>. |
+    z | . . . . . . . . . . . . . . . . . . . .<.>|
+    --+-------------------------------------------+
+    (row = reference; col = test)
+    <BLANKLINE>
+
+    >>> print(cm.pretty_format(sort_by_count=True, truncate=10))
+      |   e a i o s t . T h |
+    --+---------------------+
+      |<8>. . . . . . . . . |
+    e | .<6>. 3 . . . . . . |
+    a | . .<4>. . . . . . . |
+    i | . 1 .<1>1 . . . . . |
+    o | . . . .<3>. . . . . |
+    s | . . . . .<2>. . . . |
+    t | . . . . . .<3>. . . |
+    . | . . . . . . .<2>. . |
+    T | . . . . . . . .<2>. |
+    h | . . . . . . . . .<2>|
+    --+---------------------+
+    (row = reference; col = test)
+    <BLANKLINE>
+
+    >>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False))
+       |                   1 |
+       | 1 2 3 4 5 6 7 8 9 0 |
+    ---+---------------------+
+     1 |<8>. . . . . . . . . |
+     2 | .<6>. 3 . . . . . . |
+     3 | . .<4>. . . . . . . |
+     4 | . 1 .<1>1 . . . . . |
+     5 | . . . .<3>. . . . . |
+     6 | . . . . .<2>. . . . |
+     7 | . . . . . .<3>. . . |
+     8 | . . . . . . .<2>. . |
+     9 | . . . . . . . .<2>. |
+    10 | . . . . . . . . .<2>|
+    ---+---------------------+
+    (row = reference; col = test)
+    Value key:
+         1:
+         2: e
+         3: a
+         4: i
+         5: o
+         6: s
+         7: t
+         8: .
+         9: T
+        10: h
+    <BLANKLINE>
+
+
+--------------------
+Association measures
+--------------------
+
+These measures are useful to determine whether the coocurrence of two random
+events is meaningful. They are used, for instance, to distinguish collocations
+from other pairs of adjacent words.
+
+We bring some examples of bigram association calculations from Manning and
+Schutze's SNLP, 2nd Ed. chapter 5.
+
+    >>> n_new_companies, n_new, n_companies, N = 8, 15828, 4675, 14307668
+    >>> bam = BigramAssocMeasures
+    >>> bam.raw_freq(20, (42, 20), N) == 20. / N
+    True
+    >>> bam.student_t(n_new_companies, (n_new, n_companies), N)
+    0.999...
+    >>> bam.chi_sq(n_new_companies, (n_new, n_companies), N)
+    1.54...
+    >>> bam.likelihood_ratio(150, (12593, 932), N)
+    1291...
+
+For other associations, we ensure the ordering of the measures:
+
+    >>> bam.mi_like(20, (42, 20), N) > bam.mi_like(20, (41, 27), N)
+    True
+    >>> bam.pmi(20, (42, 20), N) > bam.pmi(20, (41, 27), N)
+    True
+    >>> bam.phi_sq(20, (42, 20), N) > bam.phi_sq(20, (41, 27), N)
+    True
+    >>> bam.poisson_stirling(20, (42, 20), N) > bam.poisson_stirling(20, (41, 27), N)
+    True
+    >>> bam.jaccard(20, (42, 20), N) > bam.jaccard(20, (41, 27), N)
+    True
+    >>> bam.dice(20, (42, 20), N) > bam.dice(20, (41, 27), N)
+    True
+    >>> bam.fisher(20, (42, 20), N) > bam.fisher(20, (41, 27), N) # doctest: +SKIP
+    False
+
+For trigrams, we have to provide more count information:
+
+    >>> n_w1_w2_w3 = 20
+    >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
+    >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
+    >>> n_w1, n_w2, n_w3 = 100, 200, 300
+    >>> uni_counts = (n_w1, n_w2, n_w3)
+    >>> N = 14307668
+    >>> tam = TrigramAssocMeasures
+    >>> tam.raw_freq(n_w1_w2_w3, pair_counts, uni_counts, N) == 1. * n_w1_w2_w3 / N
+    True
+    >>> uni_counts2 = (n_w1, n_w2, 100)
+    >>> tam.student_t(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.student_t(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+    >>> tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+    >>> tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+    >>> tam.pmi(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.pmi(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+    >>> tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+    >>> tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+    >>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N)
+    True
+
+
+For fourgrams, we have to provide more count information:
+
+    >>> n_w1_w2_w3_w4 = 5
+    >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
+    >>> n_w1_w2_w3, n_w2_w3_w4 = 20, 10 
+    >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
+    >>> triplet_counts = (n_w1_w2_w3, n_w2_w3_w4)
+    >>> n_w1, n_w2, n_w3, n_w4 = 100, 200, 300, 400
+    >>> uni_counts = (n_w1, n_w2, n_w3, n_w4)
+    >>> N = 14307668
+    >>> qam = QuadgramAssocMeasures
+    >>> qam.raw_freq(n_w1_w2_w3_w4, pair_counts, triplet_counts, uni_counts, N) == 1. * n_w1_w2_w3_w4 / N
+    True
diff --git a/nlp_resource_data/nltk/test/misc.doctest b/nlp_resource_data/nltk/test/misc.doctest

new file mode 100644 (file)

index 0000000..d72e0b3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/misc.doctest
@@ -0,0 +1,118 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+--------------------------------------------------------------------------------
+Unit tests for the miscellaneous sort functions.
+--------------------------------------------------------------------------------
+
+    >>> from copy import deepcopy
+    >>> from nltk.misc.sort import *
+
+A (very) small list of unsorted integers.
+
+    >>> test_data = [12, 67, 7, 28, 92, 56, 53, 720, 91, 57, 20, 20]
+
+Test each sorting method - each method returns the number of operations
+required to sort the data, and sorts in-place (desctructively - hence the need
+for multiple copies).
+
+    >>> sorted_data = deepcopy(test_data)
+    >>> selection(sorted_data)
+    66
+
+    >>> sorted_data
+    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+    >>> sorted_data = deepcopy(test_data)
+    >>> bubble(sorted_data)
+    30
+
+    >>> sorted_data
+    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+    >>> sorted_data = deepcopy(test_data)
+    >>> merge(sorted_data)
+    30
+
+    >>> sorted_data
+    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+    >>> sorted_data = deepcopy(test_data)
+    >>> quick(sorted_data)
+    13
+
+    >>> sorted_data
+    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
+
+--------------------------------------------------------------------------------
+Unit tests for Wordfinder class
+--------------------------------------------------------------------------------
+
+    >>> import random
+
+    >>> # The following is not enough for reproducibility under Python 2/3
+    >>> # (see http://bugs.python.org/issue9025) so this test is skipped.
+    >>> random.seed(12345)
+
+    >>> from nltk.misc import wordfinder
+    >>> wordfinder.word_finder() # doctest: +SKIP
+    Word Finder
+    <BLANKLINE>
+    J V L A I R O T A T I S I V O D E R E T
+    H U U B E A R O E P O C S O R E T N E P
+    A D A U Z E E S R A P P A L L M E N T R
+    C X A D Q S Z T P E O R S N G P J A D E
+    I G Y K K T I A A R G F I D T E L C N S
+    R E C N B H T R L T N N B W N T A O A I
+    A Y I L O E I A M E I A A Y U R P L L D
+    G L T V S T S F E A D I P H D O O H N I
+    R L S E C I N I L R N N M E C G R U E A
+    A A Y G I C E N L L E O I G Q R T A E L
+    M R C E T I S T A E T L L E U A E N R L
+    O U O T A S E E C S O O N H Y P A T G Y
+    E M H O M M D R E S F P U L T H C F N V
+    L A C A I M A M A N L B R U T E D O M I
+    O R I L N E E E E E U A R S C R Y L I P
+    H T R K E S N N M S I L A S R E V I N U
+    T X T A A O U T K S E T A R R E S I B J
+    A E D L E L J I F O O R P E L K N I R W
+    K H A I D E Q O P R I C K T I M B E R P
+    Z K D O O H G N I H T U R V E Y D R O P
+    <BLANKLINE>
+    1: INTERCHANGER
+    2: TEARLESSNESS
+    3: UNIVERSALISM
+    4: DESENSITIZER
+    5: INTERMENTION
+    6: TRICHOCYSTIC
+    7: EXTRAMURALLY
+    8: VEGETOALKALI
+    9: PALMELLACEAE
+    10: AESTHETICISM
+    11: PETROGRAPHER
+    12: VISITATORIAL
+    13: OLEOMARGARIC
+    14: WRINKLEPROOF
+    15: PRICKTIMBER
+    16: PRESIDIALLY
+    17: SCITAMINEAE
+    18: ENTEROSCOPE
+    19: APPALLMENT
+    20: TURVEYDROP
+    21: THINGHOOD
+    22: BISERRATE
+    23: GREENLAND
+    24: BRUTEDOM
+    25: POLONIAN
+    26: ACOLHUAN
+    27: LAPORTEA
+    28: TENDING
+    29: TEREDO
+    30: MESOLE
+    31: UNLIMP
+    32: OSTARA
+    33: PILY
+    34: DUNT
+    35: ONYX
+    36: KATH
+    37: JUNE
diff --git a/nlp_resource_data/nltk/test/nonmonotonic.doctest b/nlp_resource_data/nltk/test/nonmonotonic.doctest

new file mode 100644 (file)

index 0000000..ea17c60
--- /dev/null
+++ b/nlp_resource_data/nltk/test/nonmonotonic.doctest
@@ -0,0 +1,286 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+======================
+Nonmonotonic Reasoning
+======================
+
+    >>> from nltk import *
+    >>> from nltk.inference.nonmonotonic import *
+    >>> from nltk.sem import logic
+    >>> logic._counter._value = 0
+    >>> read_expr = logic.Expression.fromstring
+
+------------------------
+Closed Domain Assumption
+------------------------
+
+The only entities in the domain are those found in the assumptions or goal.
+If the domain only contains "A" and "B", then the expression "exists x.P(x)" can
+be replaced with "P(A) | P(B)" and an expression "all x.P(x)" can be replaced
+with "P(A) & P(B)".
+
+    >>> p1 = read_expr(r'all x.(man(x) -> mortal(x))')
+    >>> p2 = read_expr(r'man(Socrates)')
+    >>> c = read_expr(r'mortal(Socrates)')
+    >>> prover = Prover9Command(c, [p1,p2])
+    >>> prover.prove()
+    True
+    >>> cdp = ClosedDomainProver(prover)
+    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
+    (man(Socrates) -> mortal(Socrates))
+    man(Socrates)
+    >>> cdp.prove()
+    True
+
+    >>> p1 = read_expr(r'exists x.walk(x)')
+    >>> p2 = read_expr(r'man(Socrates)')
+    >>> c = read_expr(r'walk(Socrates)')
+    >>> prover = Prover9Command(c, [p1,p2])
+    >>> prover.prove()
+    False
+    >>> cdp = ClosedDomainProver(prover)
+    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
+    walk(Socrates)
+    man(Socrates)
+    >>> cdp.prove()
+    True
+
+    >>> p1 = read_expr(r'exists x.walk(x)')
+    >>> p2 = read_expr(r'man(Socrates)')
+    >>> p3 = read_expr(r'-walk(Bill)')
+    >>> c = read_expr(r'walk(Socrates)')
+    >>> prover = Prover9Command(c, [p1,p2,p3])
+    >>> prover.prove()
+    False
+    >>> cdp = ClosedDomainProver(prover)
+    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
+    (walk(Socrates) | walk(Bill))
+    man(Socrates)
+    -walk(Bill)
+    >>> cdp.prove()
+    True
+
+    >>> p1 = read_expr(r'walk(Socrates)')
+    >>> p2 = read_expr(r'walk(Bill)')
+    >>> c = read_expr(r'all x.walk(x)')
+    >>> prover = Prover9Command(c, [p1,p2])
+    >>> prover.prove()
+    False
+    >>> cdp = ClosedDomainProver(prover)
+    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
+    walk(Socrates)
+    walk(Bill)
+    >>> print(cdp.goal()) # doctest: +SKIP
+    (walk(Socrates) & walk(Bill))
+    >>> cdp.prove()
+    True
+
+    >>> p1 = read_expr(r'girl(mary)')
+    >>> p2 = read_expr(r'dog(rover)')
+    >>> p3 = read_expr(r'all x.(girl(x) -> -dog(x))')
+    >>> p4 = read_expr(r'all x.(dog(x) -> -girl(x))')
+    >>> p5 = read_expr(r'chase(mary, rover)')
+    >>> c = read_expr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))')
+    >>> prover = Prover9Command(c, [p1,p2,p3,p4,p5])
+    >>> print(prover.prove())
+    False
+    >>> cdp = ClosedDomainProver(prover)
+    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
+    girl(mary)
+    dog(rover)
+    ((girl(rover) -> -dog(rover)) & (girl(mary) -> -dog(mary)))
+    ((dog(rover) -> -girl(rover)) & (dog(mary) -> -girl(mary)))
+    chase(mary,rover)
+    >>> print(cdp.goal()) # doctest: +SKIP
+    ((dog(rover) & (girl(rover) -> chase(rover,rover)) & (girl(mary) -> chase(mary,rover))) | (dog(mary) & (girl(rover) -> chase(rover,mary)) & (girl(mary) -> chase(mary,mary))))
+    >>> print(cdp.prove())
+    True
+
+-----------------------
+Unique Names Assumption
+-----------------------
+
+No two entities in the domain represent the same entity unless it can be
+explicitly proven that they do.  Therefore, if the domain contains "A" and "B",
+then add the assumption "-(A = B)" if it is not the case that
+"<assumptions> \|- (A = B)".
+
+    >>> p1 = read_expr(r'man(Socrates)')
+    >>> p2 = read_expr(r'man(Bill)')
+    >>> c = read_expr(r'exists x.exists y.-(x = y)')
+    >>> prover = Prover9Command(c, [p1,p2])
+    >>> prover.prove()
+    False
+    >>> unp = UniqueNamesProver(prover)
+    >>> for a in unp.assumptions(): print(a) # doctest: +SKIP
+    man(Socrates)
+    man(Bill)
+    -(Socrates = Bill)
+    >>> unp.prove()
+    True
+
+    >>> p1 = read_expr(r'all x.(walk(x) -> (x = Socrates))')
+    >>> p2 = read_expr(r'Bill = William')
+    >>> p3 = read_expr(r'Bill = Billy')
+    >>> c = read_expr(r'-walk(William)')
+    >>> prover = Prover9Command(c, [p1,p2,p3])
+    >>> prover.prove()
+    False
+    >>> unp = UniqueNamesProver(prover)
+    >>> for a in unp.assumptions(): print(a) # doctest: +SKIP
+    all x.(walk(x) -> (x = Socrates))
+    (Bill = William)
+    (Bill = Billy)
+    -(William = Socrates)
+    -(Billy = Socrates)
+    -(Socrates = Bill)
+    >>> unp.prove()
+    True
+
+-----------------------
+Closed World Assumption
+-----------------------
+
+The only entities that have certain properties are those that is it stated
+have the properties.  We accomplish this assumption by "completing" predicates.
+
+If the assumptions contain "P(A)", then "all x.(P(x) -> (x=A))" is the completion
+of "P".  If the assumptions contain "all x.(ostrich(x) -> bird(x))", then
+"all x.(bird(x) -> ostrich(x))" is the completion of "bird".  If the
+assumptions don't contain anything that are "P", then "all x.-P(x)" is the
+completion of "P".
+
+    >>> p1 = read_expr(r'walk(Socrates)')
+    >>> p2 = read_expr(r'-(Socrates = Bill)')
+    >>> c = read_expr(r'-walk(Bill)')
+    >>> prover = Prover9Command(c, [p1,p2])
+    >>> prover.prove()
+    False
+    >>> cwp = ClosedWorldProver(prover)
+    >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
+    walk(Socrates)
+    -(Socrates = Bill)
+    all z1.(walk(z1) -> (z1 = Socrates))
+    >>> cwp.prove()
+    True
+
+    >>> p1 = read_expr(r'see(Socrates, John)')
+    >>> p2 = read_expr(r'see(John, Mary)')
+    >>> p3 = read_expr(r'-(Socrates = John)')
+    >>> p4 = read_expr(r'-(John = Mary)')
+    >>> c = read_expr(r'-see(Socrates, Mary)')
+    >>> prover = Prover9Command(c, [p1,p2,p3,p4])
+    >>> prover.prove()
+    False
+    >>> cwp = ClosedWorldProver(prover)
+    >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
+    see(Socrates,John)
+    see(John,Mary)
+    -(Socrates = John)
+    -(John = Mary)
+    all z3 z4.(see(z3,z4) -> (((z3 = Socrates) & (z4 = John)) | ((z3 = John) & (z4 = Mary))))
+    >>> cwp.prove()
+    True
+
+    >>> p1 = read_expr(r'all x.(ostrich(x) -> bird(x))')
+    >>> p2 = read_expr(r'bird(Tweety)')
+    >>> p3 = read_expr(r'-ostrich(Sam)')
+    >>> p4 = read_expr(r'Sam != Tweety')
+    >>> c = read_expr(r'-bird(Sam)')
+    >>> prover = Prover9Command(c, [p1,p2,p3,p4])
+    >>> prover.prove()
+    False
+    >>> cwp = ClosedWorldProver(prover)
+    >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
+    all x.(ostrich(x) -> bird(x))
+    bird(Tweety)
+    -ostrich(Sam)
+    -(Sam = Tweety)
+    all z7.-ostrich(z7)
+    all z8.(bird(z8) -> ((z8 = Tweety) | ostrich(z8)))
+    >>> print(cwp.prove())
+    True
+
+-----------------------
+Multi-Decorator Example
+-----------------------
+
+Decorators can be nested to utilize multiple assumptions.
+
+    >>> p1 = read_expr(r'see(Socrates, John)')
+    >>> p2 = read_expr(r'see(John, Mary)')
+    >>> c = read_expr(r'-see(Socrates, Mary)')
+    >>> prover = Prover9Command(c, [p1,p2])
+    >>> print(prover.prove())
+    False
+    >>> cmd = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover)))
+    >>> print(cmd.prove())
+    True
+
+-----------------
+Default Reasoning
+-----------------
+    >>> logic._counter._value = 0
+    >>> premises = []
+
+define the taxonomy
+    >>> premises.append(read_expr(r'all x.(elephant(x)        -> animal(x))'))
+    >>> premises.append(read_expr(r'all x.(bird(x)            -> animal(x))'))
+    >>> premises.append(read_expr(r'all x.(dove(x)            -> bird(x))'))
+    >>> premises.append(read_expr(r'all x.(ostrich(x)         -> bird(x))'))
+    >>> premises.append(read_expr(r'all x.(flying_ostrich(x)  -> ostrich(x))'))
+
+default the properties using abnormalities
+    >>> premises.append(read_expr(r'all x.((animal(x)  & -Ab1(x)) -> -fly(x))')) #normal animals don't fly
+    >>> premises.append(read_expr(r'all x.((bird(x)    & -Ab2(x)) -> fly(x))'))  #normal birds fly
+    >>> premises.append(read_expr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))')) #normal ostriches don't fly
+
+specify abnormal entities
+    >>> premises.append(read_expr(r'all x.(bird(x)           -> Ab1(x))')) #flight
+    >>> premises.append(read_expr(r'all x.(ostrich(x)        -> Ab2(x))')) #non-flying bird
+    >>> premises.append(read_expr(r'all x.(flying_ostrich(x) -> Ab3(x))')) #flying ostrich
+
+define entities
+    >>> premises.append(read_expr(r'elephant(el)'))
+    >>> premises.append(read_expr(r'dove(do)'))
+    >>> premises.append(read_expr(r'ostrich(os)'))
+
+print the augmented assumptions list
+    >>> prover = Prover9Command(None, premises)
+    >>> command = UniqueNamesProver(ClosedWorldProver(prover))
+    >>> for a in command.assumptions(): print(a) # doctest: +SKIP
+    all x.(elephant(x) -> animal(x))
+    all x.(bird(x) -> animal(x))
+    all x.(dove(x) -> bird(x))
+    all x.(ostrich(x) -> bird(x))
+    all x.(flying_ostrich(x) -> ostrich(x))
+    all x.((animal(x) & -Ab1(x)) -> -fly(x))
+    all x.((bird(x) & -Ab2(x)) -> fly(x))
+    all x.((ostrich(x) & -Ab3(x)) -> -fly(x))
+    all x.(bird(x) -> Ab1(x))
+    all x.(ostrich(x) -> Ab2(x))
+    all x.(flying_ostrich(x) -> Ab3(x))
+    elephant(el)
+    dove(do)
+    ostrich(os)
+    all z1.(animal(z1) -> (elephant(z1) | bird(z1)))
+    all z2.(Ab1(z2) -> bird(z2))
+    all z3.(bird(z3) -> (dove(z3) | ostrich(z3)))
+    all z4.(dove(z4) -> (z4 = do))
+    all z5.(Ab2(z5) -> ostrich(z5))
+    all z6.(Ab3(z6) -> flying_ostrich(z6))
+    all z7.(ostrich(z7) -> ((z7 = os) | flying_ostrich(z7)))
+    all z8.-flying_ostrich(z8)
+    all z9.(elephant(z9) -> (z9 = el))
+    -(el = os)
+    -(el = do)
+    -(os = do)
+
+    >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(el)'), premises))).prove()
+    True
+    >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('fly(do)'), premises))).prove()
+    True
+    >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(os)'), premises))).prove()
+    True
+
diff --git a/nlp_resource_data/nltk/test/nonmonotonic_fixt.py b/nlp_resource_data/nltk/test/nonmonotonic_fixt.py

new file mode 100644 (file)

index 0000000..e6bdffa
--- /dev/null
+++ b/nlp_resource_data/nltk/test/nonmonotonic_fixt.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+
+def setup_module(module):
+    from nose import SkipTest
+    from nltk.inference.mace import Mace
+
+    try:
+        m = Mace()
+        m._find_binary("mace4")
+    except LookupError:
+        raise SkipTest(
+            "Mace4/Prover9 is not available so nonmonotonic.doctest was skipped"
+        )
diff --git a/nlp_resource_data/nltk/test/paice.doctest b/nlp_resource_data/nltk/test/paice.doctest

new file mode 100644 (file)

index 0000000..1e3a65c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/paice.doctest
@@ -0,0 +1,35 @@
+
+=====================================================
+PAICE's evaluation statistics for stemming algorithms
+=====================================================
+
+Given a list of words with their real lemmas and stems according to stemming algorithm under evaluation,
+counts Understemming Index (UI), Overstemming Index (OI), Stemming Weight (SW) and Error-rate relative to truncation (ERRT).
+
+   >>> from nltk.metrics import Paice
+
+
+-------------------------------------
+Understemming and Overstemming values
+-------------------------------------
+
+    >>> lemmas = {'kneel': ['kneel', 'knelt'],
+    ...           'range': ['range', 'ranged'],
+    ...           'ring': ['ring', 'rang', 'rung']}
+    >>> stems = {'kneel': ['kneel'],
+    ...          'knelt': ['knelt'],
+    ...          'rang': ['rang', 'range', 'ranged'],
+    ...          'ring': ['ring'],
+    ...          'rung': ['rung']}
+    >>> p = Paice(lemmas, stems)
+    >>> p.gumt, p.gdmt, p.gwmt, p.gdnt
+    (4.0, 5.0, 2.0, 16.0)
+
+    >>> p.ui, p.oi, p.sw
+    (0.8..., 0.125..., 0.15625...)
+
+    >>> p.errt
+    1.0
+
+    >>> [('{0:.3f}'.format(a), '{0:.3f}'.format(b)) for a, b in p.coords]
+    [('0.000', '1.000'), ('0.000', '0.375'), ('0.600', '0.125'), ('0.800', '0.125')]
diff --git a/nlp_resource_data/nltk/test/parse.doctest b/nlp_resource_data/nltk/test/parse.doctest

new file mode 100644 (file)

index 0000000..c84b469
--- /dev/null
+++ b/nlp_resource_data/nltk/test/parse.doctest
@@ -0,0 +1,884 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=========
+ Parsing
+=========
+
+Unit tests for the Context Free Grammar class
+---------------------------------------------
+
+    >>> from nltk import Nonterminal, nonterminals, Production, CFG
+
+    >>> nt1 = Nonterminal('NP')
+    >>> nt2 = Nonterminal('VP')
+
+    >>> nt1.symbol()
+    'NP'
+
+    >>> nt1 == Nonterminal('NP')
+    True
+
+    >>> nt1 == nt2
+    False
+
+    >>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
+    >>> N, V, P, DT = nonterminals('N, V, P, DT')
+
+    >>> prod1 = Production(S, [NP, VP])
+    >>> prod2 = Production(NP, [DT, NP])
+
+    >>> prod1.lhs()
+    S
+
+    >>> prod1.rhs()
+    (NP, VP)
+
+    >>> prod1 == Production(S, [NP, VP])
+    True
+
+    >>> prod1 == prod2
+    False
+
+    >>> grammar = CFG.fromstring("""
+    ... S -> NP VP
+    ... PP -> P NP
+    ... NP -> 'the' N | N PP | 'the' N PP
+    ... VP -> V NP | V PP | V NP PP
+    ... N -> 'cat'
+    ... N -> 'dog'
+    ... N -> 'rug'
+    ... V -> 'chased'
+    ... V -> 'sat'
+    ... P -> 'in'
+    ... P -> 'on'
+    ... """)
+
+Unit tests for the rd (Recursive Descent Parser) class
+------------------------------------------------------
+
+Create and run a recursive descent parser over both a syntactically ambiguous
+and unambiguous sentence.
+
+    >>> from nltk.parse import RecursiveDescentParser
+    >>> rd = RecursiveDescentParser(grammar)
+
+    >>> sentence1 = 'the cat chased the dog'.split()
+    >>> sentence2 = 'the cat chased the dog on the rug'.split()
+
+    >>> for t in rd.parse(sentence1):
+    ...     print(t)
+    (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
+
+    >>> for t in rd.parse(sentence2):
+    ...     print(t)
+    (S
+      (NP the (N cat))
+      (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
+    (S
+      (NP the (N cat))
+      (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
+
+
+(dolist (expr doctest-font-lock-keywords)
+  (add-to-list 'font-lock-keywords expr))
+
+  font-lock-keywords
+(add-to-list 'font-lock-keywords
+  (car doctest-font-lock-keywords))
+
+
+Unit tests for the sr (Shift Reduce Parser) class
+-------------------------------------------------
+
+Create and run a shift reduce parser over both a syntactically ambiguous
+and unambiguous sentence. Note that unlike the recursive descent parser, one
+and only one parse is ever returned.
+
+    >>> from nltk.parse import ShiftReduceParser
+    >>> sr = ShiftReduceParser(grammar)
+
+    >>> sentence1 = 'the cat chased the dog'.split()
+    >>> sentence2 = 'the cat chased the dog on the rug'.split()
+
+    >>> for t in sr.parse(sentence1):
+    ...     print(t)
+    (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
+
+
+The shift reduce parser uses heuristics to decide what to do when there are
+multiple possible shift or reduce operations available - for the supplied
+grammar clearly the wrong operation is selected.
+
+    >>> for t in sr.parse(sentence2):
+    ...     print(t)
+
+
+Unit tests for the Chart Parser class
+-------------------------------------
+
+We use the demo() function for testing.
+We must turn off showing of times.
+
+    >>> import nltk
+
+First we test tracing with a short sentence
+
+    >>> nltk.parse.chart.demo(2, print_times=False, trace=1,
+    ...                       sent='I saw a dog', numparses=1)
+    * Sentence:
+    I saw a dog
+    ['I', 'saw', 'a', 'dog']
+    <BLANKLINE>
+    * Strategy: Bottom-up
+    <BLANKLINE>
+    |.    I    .   saw   .    a    .   dog   .|
+    |[---------]         .         .         .| [0:1] 'I'
+    |.         [---------]         .         .| [1:2] 'saw'
+    |.         .         [---------]         .| [2:3] 'a'
+    |.         .         .         [---------]| [3:4] 'dog'
+    |>         .         .         .         .| [0:0] NP -> * 'I'
+    |[---------]         .         .         .| [0:1] NP -> 'I' *
+    |>         .         .         .         .| [0:0] S  -> * NP VP
+    |>         .         .         .         .| [0:0] NP -> * NP PP
+    |[--------->         .         .         .| [0:1] S  -> NP * VP
+    |[--------->         .         .         .| [0:1] NP -> NP * PP
+    |.         >         .         .         .| [1:1] Verb -> * 'saw'
+    |.         [---------]         .         .| [1:2] Verb -> 'saw' *
+    |.         >         .         .         .| [1:1] VP -> * Verb NP
+    |.         >         .         .         .| [1:1] VP -> * Verb
+    |.         [--------->         .         .| [1:2] VP -> Verb * NP
+    |.         [---------]         .         .| [1:2] VP -> Verb *
+    |.         >         .         .         .| [1:1] VP -> * VP PP
+    |[-------------------]         .         .| [0:2] S  -> NP VP *
+    |.         [--------->         .         .| [1:2] VP -> VP * PP
+    |.         .         >         .         .| [2:2] Det -> * 'a'
+    |.         .         [---------]         .| [2:3] Det -> 'a' *
+    |.         .         >         .         .| [2:2] NP -> * Det Noun
+    |.         .         [--------->         .| [2:3] NP -> Det * Noun
+    |.         .         .         >         .| [3:3] Noun -> * 'dog'
+    |.         .         .         [---------]| [3:4] Noun -> 'dog' *
+    |.         .         [-------------------]| [2:4] NP -> Det Noun *
+    |.         .         >         .         .| [2:2] S  -> * NP VP
+    |.         .         >         .         .| [2:2] NP -> * NP PP
+    |.         [-----------------------------]| [1:4] VP -> Verb NP *
+    |.         .         [------------------->| [2:4] S  -> NP * VP
+    |.         .         [------------------->| [2:4] NP -> NP * PP
+    |[=======================================]| [0:4] S  -> NP VP *
+    |.         [----------------------------->| [1:4] VP -> VP * PP
+    Nr edges in chart: 33
+    (S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
+    <BLANKLINE>
+
+Then we test the different parsing Strategies.
+Note that the number of edges differ between the strategies.
+
+Top-down
+
+    >>> nltk.parse.chart.demo(1, print_times=False, trace=0,
+    ...                       sent='I saw John with a dog', numparses=2)
+    * Sentence:
+    I saw John with a dog
+    ['I', 'saw', 'John', 'with', 'a', 'dog']
+    <BLANKLINE>
+    * Strategy: Top-down
+    <BLANKLINE>
+    Nr edges in chart: 48
+    (S
+      (NP I)
+      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+    (S
+      (NP I)
+      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+    <BLANKLINE>
+
+Bottom-up
+
+    >>> nltk.parse.chart.demo(2, print_times=False, trace=0,
+    ...                       sent='I saw John with a dog', numparses=2)
+    * Sentence:
+    I saw John with a dog
+    ['I', 'saw', 'John', 'with', 'a', 'dog']
+    <BLANKLINE>
+    * Strategy: Bottom-up
+    <BLANKLINE>
+    Nr edges in chart: 53
+    (S
+      (NP I)
+      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+    (S
+      (NP I)
+      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+    <BLANKLINE>
+
+Bottom-up Left-Corner
+
+    >>> nltk.parse.chart.demo(3, print_times=False, trace=0,
+    ...                       sent='I saw John with a dog', numparses=2)
+    * Sentence:
+    I saw John with a dog
+    ['I', 'saw', 'John', 'with', 'a', 'dog']
+    <BLANKLINE>
+    * Strategy: Bottom-up left-corner
+    <BLANKLINE>
+    Nr edges in chart: 36
+    (S
+      (NP I)
+      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+    (S
+      (NP I)
+      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+    <BLANKLINE>
+
+Left-Corner with Bottom-Up Filter
+
+    >>> nltk.parse.chart.demo(4, print_times=False, trace=0,
+    ...                       sent='I saw John with a dog', numparses=2)
+    * Sentence:
+    I saw John with a dog
+    ['I', 'saw', 'John', 'with', 'a', 'dog']
+    <BLANKLINE>
+    * Strategy: Filtered left-corner
+    <BLANKLINE>
+    Nr edges in chart: 28
+    (S
+      (NP I)
+      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+    (S
+      (NP I)
+      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+    <BLANKLINE>
+
+The stepping chart parser
+
+    >>> nltk.parse.chart.demo(5, print_times=False, trace=1,
+    ...                       sent='I saw John with a dog', numparses=2)
+    * Sentence:
+    I saw John with a dog
+    ['I', 'saw', 'John', 'with', 'a', 'dog']
+    <BLANKLINE>
+    * Strategy: Stepping (top-down vs bottom-up)
+    <BLANKLINE>
+    *** SWITCH TO TOP DOWN
+    |[------]      .      .      .      .      .| [0:1] 'I'
+    |.      [------]      .      .      .      .| [1:2] 'saw'
+    |.      .      [------]      .      .      .| [2:3] 'John'
+    |.      .      .      [------]      .      .| [3:4] 'with'
+    |.      .      .      .      [------]      .| [4:5] 'a'
+    |.      .      .      .      .      [------]| [5:6] 'dog'
+    |>      .      .      .      .      .      .| [0:0] S  -> * NP VP
+    |>      .      .      .      .      .      .| [0:0] NP -> * NP PP
+    |>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
+    |>      .      .      .      .      .      .| [0:0] NP -> * 'I'
+    |[------]      .      .      .      .      .| [0:1] NP -> 'I' *
+    |[------>      .      .      .      .      .| [0:1] S  -> NP * VP
+    |[------>      .      .      .      .      .| [0:1] NP -> NP * PP
+    |.      >      .      .      .      .      .| [1:1] VP -> * VP PP
+    |.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
+    |.      >      .      .      .      .      .| [1:1] VP -> * Verb
+    |.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
+    |.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
+    |.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
+    |.      [------]      .      .      .      .| [1:2] VP -> Verb *
+    |[-------------]      .      .      .      .| [0:2] S  -> NP VP *
+    |.      [------>      .      .      .      .| [1:2] VP -> VP * PP
+    *** SWITCH TO BOTTOM UP
+    |.      .      >      .      .      .      .| [2:2] NP -> * 'John'
+    |.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
+    |.      .      .      >      .      .      .| [3:3] Prep -> * 'with'
+    |.      .      .      .      >      .      .| [4:4] Det -> * 'a'
+    |.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
+    |.      .      [------]      .      .      .| [2:3] NP -> 'John' *
+    |.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
+    |.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
+    |.      .      .      .      [------]      .| [4:5] Det -> 'a' *
+    |.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
+    |.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
+    |[--------------------]      .      .      .| [0:3] S  -> NP VP *
+    |.      [------------->      .      .      .| [1:3] VP -> VP * PP
+    |.      .      >      .      .      .      .| [2:2] S  -> * NP VP
+    |.      .      >      .      .      .      .| [2:2] NP -> * NP PP
+    |.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
+    |.      .      [------>      .      .      .| [2:3] S  -> NP * VP
+    |.      .      [------>      .      .      .| [2:3] NP -> NP * PP
+    |.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
+    |.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
+    |.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
+    |.      [----------------------------------]| [1:6] VP -> VP PP *
+    *** SWITCH TO TOP DOWN
+    |.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
+    |.      .      .      .      >      .      .| [4:4] NP -> * NP PP
+    |.      .      .      >      .      .      .| [3:3] VP -> * VP PP
+    |.      .      .      >      .      .      .| [3:3] VP -> * Verb NP
+    |.      .      .      >      .      .      .| [3:3] VP -> * Verb
+    |[=========================================]| [0:6] S  -> NP VP *
+    |.      [---------------------------------->| [1:6] VP -> VP * PP
+    |.      .      [---------------------------]| [2:6] NP -> NP PP *
+    |.      .      .      .      [------------->| [4:6] NP -> NP * PP
+    |.      [----------------------------------]| [1:6] VP -> Verb NP *
+    |.      .      [--------------------------->| [2:6] S  -> NP * VP
+    |.      .      [--------------------------->| [2:6] NP -> NP * PP
+    |[=========================================]| [0:6] S  -> NP VP *
+    |.      [---------------------------------->| [1:6] VP -> VP * PP
+    |.      .      .      .      .      .      >| [6:6] VP -> * VP PP
+    |.      .      .      .      .      .      >| [6:6] VP -> * Verb NP
+    |.      .      .      .      .      .      >| [6:6] VP -> * Verb
+    *** SWITCH TO BOTTOM UP
+    |.      .      .      .      >      .      .| [4:4] S  -> * NP VP
+    |.      .      .      .      [------------->| [4:6] S  -> NP * VP
+    *** SWITCH TO TOP DOWN
+    *** SWITCH TO BOTTOM UP
+    *** SWITCH TO TOP DOWN
+    *** SWITCH TO BOTTOM UP
+    *** SWITCH TO TOP DOWN
+    *** SWITCH TO BOTTOM UP
+    Nr edges in chart: 61
+    (S
+      (NP I)
+      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+    (S
+      (NP I)
+      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+    <BLANKLINE>
+
+
+Unit tests for the Incremental Chart Parser class
+-------------------------------------------------
+
+The incremental chart parsers are defined in earleychart.py.
+We use the demo() function for testing. We must turn off showing of times.
+
+    >>> import nltk
+
+Earley Chart Parser
+
+    >>> nltk.parse.earleychart.demo(print_times=False, trace=1,
+    ...                             sent='I saw John with a dog', numparses=2)
+    * Sentence:
+    I saw John with a dog
+    ['I', 'saw', 'John', 'with', 'a', 'dog']
+    <BLANKLINE>
+    |.  I   . saw  . John . with .  a   . dog  .|
+    |[------]      .      .      .      .      .| [0:1] 'I'
+    |.      [------]      .      .      .      .| [1:2] 'saw'
+    |.      .      [------]      .      .      .| [2:3] 'John'
+    |.      .      .      [------]      .      .| [3:4] 'with'
+    |.      .      .      .      [------]      .| [4:5] 'a'
+    |.      .      .      .      .      [------]| [5:6] 'dog'
+    |>      .      .      .      .      .      .| [0:0] S  -> * NP VP
+    |>      .      .      .      .      .      .| [0:0] NP -> * NP PP
+    |>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
+    |>      .      .      .      .      .      .| [0:0] NP -> * 'I'
+    |[------]      .      .      .      .      .| [0:1] NP -> 'I' *
+    |[------>      .      .      .      .      .| [0:1] S  -> NP * VP
+    |[------>      .      .      .      .      .| [0:1] NP -> NP * PP
+    |.      >      .      .      .      .      .| [1:1] VP -> * VP PP
+    |.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
+    |.      >      .      .      .      .      .| [1:1] VP -> * Verb
+    |.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
+    |.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
+    |.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
+    |.      [------]      .      .      .      .| [1:2] VP -> Verb *
+    |[-------------]      .      .      .      .| [0:2] S  -> NP VP *
+    |.      [------>      .      .      .      .| [1:2] VP -> VP * PP
+    |.      .      >      .      .      .      .| [2:2] NP -> * NP PP
+    |.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
+    |.      .      >      .      .      .      .| [2:2] NP -> * 'John'
+    |.      .      [------]      .      .      .| [2:3] NP -> 'John' *
+    |.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
+    |.      .      [------>      .      .      .| [2:3] NP -> NP * PP
+    |.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
+    |[--------------------]      .      .      .| [0:3] S  -> NP VP *
+    |.      [------------->      .      .      .| [1:3] VP -> VP * PP
+    |.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
+    |.      .      .      .      >      .      .| [4:4] NP -> * NP PP
+    |.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
+    |.      .      .      .      >      .      .| [4:4] Det -> * 'a'
+    |.      .      .      .      [------]      .| [4:5] Det -> 'a' *
+    |.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
+    |.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
+    |.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
+    |.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
+    |.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
+    |.      .      .      .      [------------->| [4:6] NP -> NP * PP
+    |.      .      [---------------------------]| [2:6] NP -> NP PP *
+    |.      [----------------------------------]| [1:6] VP -> VP PP *
+    |[=========================================]| [0:6] S  -> NP VP *
+    |.      [---------------------------------->| [1:6] VP -> VP * PP
+    |.      [----------------------------------]| [1:6] VP -> Verb NP *
+    |.      .      [--------------------------->| [2:6] NP -> NP * PP
+    |[=========================================]| [0:6] S  -> NP VP *
+    |.      [---------------------------------->| [1:6] VP -> VP * PP
+    (S
+      (NP I)
+      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
+    (S
+      (NP I)
+      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
+
+
+Unit tests for LARGE context-free grammars
+------------------------------------------
+
+Reading the ATIS grammar.
+
+    >>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
+    >>> grammar
+    <Grammar with 5517 productions>
+
+Reading the test sentences.
+
+    >>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
+    >>> sentences = nltk.parse.util.extract_test_sentences(sentences)
+    >>> len(sentences)
+    98
+    >>> testsentence = sentences[22]
+    >>> testsentence[0]
+    ['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
+    >>> testsentence[1]
+    17
+    >>> sentence = testsentence[0]
+
+Now we test all different parsing strategies.
+Note that the number of edges differ between the strategies.
+
+Bottom-up parsing.
+
+    >>> parser = nltk.parse.BottomUpChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    7661
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Bottom-up Left-corner parsing.
+
+    >>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    4986
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Left-corner parsing with bottom-up filter.
+
+    >>> parser = nltk.parse.LeftCornerChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    1342
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Top-down parsing.
+
+    >>> parser = nltk.parse.TopDownChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    28352
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Incremental Bottom-up parsing.
+
+    >>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    7661
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Incremental Bottom-up Left-corner parsing.
+
+    >>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    4986
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Incremental Left-corner parsing with bottom-up filter.
+
+    >>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    1342
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Incremental Top-down parsing.
+
+    >>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    28352
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+Earley parsing. This is similar to the incremental top-down algorithm.
+
+    >>> parser = nltk.parse.EarleyChartParser(grammar)
+    >>> chart = parser.chart_parse(sentence)
+    >>> print((chart.num_edges()))
+    28352
+    >>> print((len(list(chart.parses(grammar.start())))))
+    17
+
+
+Unit tests for the Probabilistic CFG class
+------------------------------------------
+
+    >>> from nltk.corpus import treebank
+    >>> from itertools import islice
+    >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
+
+Create a set of PCFG productions.
+
+    >>> grammar = PCFG.fromstring("""
+    ... A -> B B [.3] | C B C [.7]
+    ... B -> B D [.5] | C [.5]
+    ... C -> 'a' [.1] | 'b' [0.9]
+    ... D -> 'b' [1.0]
+    ... """)
+    >>> prod = grammar.productions()[0]
+    >>> prod
+    A -> B B [0.3]
+
+    >>> prod.lhs()
+    A
+
+    >>> prod.rhs()
+    (B, B)
+
+    >>> print((prod.prob()))
+    0.3
+
+    >>> grammar.start()
+    A
+
+    >>> grammar.productions()
+    [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
+
+Induce some productions using parsed Treebank data.
+
+    >>> productions = []
+    >>> for fileid in treebank.fileids()[:2]:
+    ...     for t in treebank.parsed_sents(fileid):
+    ...         productions += t.productions()
+
+    >>> grammar = induce_pcfg(S, productions)
+    >>> grammar
+    <Grammar with 71 productions>
+
+    >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
+    [PP -> IN NP [1.0]]
+    >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
+    [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
+    >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
+    [JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
+    >>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
+    [NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
+
+Unit tests for the Probabilistic Chart Parse classes
+----------------------------------------------------
+
+    >>> tokens = "Jack saw Bob with my cookie".split()
+    >>> grammar = toy_pcfg2
+    >>> print(grammar)
+    Grammar with 23 productions (start state = S)
+        S -> NP VP [1.0]
+        VP -> V NP [0.59]
+        VP -> V [0.4]
+        VP -> VP PP [0.01]
+        NP -> Det N [0.41]
+        NP -> Name [0.28]
+        NP -> NP PP [0.31]
+        PP -> P NP [1.0]
+        V -> 'saw' [0.21]
+        V -> 'ate' [0.51]
+        V -> 'ran' [0.28]
+        N -> 'boy' [0.11]
+        N -> 'cookie' [0.12]
+        N -> 'table' [0.13]
+        N -> 'telescope' [0.14]
+        N -> 'hill' [0.5]
+        Name -> 'Jack' [0.52]
+        Name -> 'Bob' [0.48]
+        P -> 'with' [0.61]
+        P -> 'under' [0.39]
+        Det -> 'the' [0.41]
+        Det -> 'a' [0.31]
+        Det -> 'my' [0.28]
+
+Create several parsers using different queuing strategies and show the
+resulting parses.
+
+    >>> from nltk.parse import pchart
+
+    >>> parser = pchart.InsideChartParser(grammar)
+    >>> for t in parser.parse(tokens):
+    ...     print(t)
+    (S
+      (NP (Name Jack))
+      (VP
+        (V saw)
+        (NP
+          (NP (Name Bob))
+          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+    (S
+      (NP (Name Jack))
+      (VP
+        (VP (V saw) (NP (Name Bob)))
+        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+    >>> parser = pchart.RandomChartParser(grammar)
+    >>> for t in parser.parse(tokens):
+    ...     print(t)
+    (S
+      (NP (Name Jack))
+      (VP
+        (V saw)
+        (NP
+          (NP (Name Bob))
+          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+    (S
+      (NP (Name Jack))
+      (VP
+        (VP (V saw) (NP (Name Bob)))
+        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+    >>> parser = pchart.UnsortedChartParser(grammar)
+    >>> for t in parser.parse(tokens):
+    ...     print(t)
+    (S
+      (NP (Name Jack))
+      (VP
+        (V saw)
+        (NP
+          (NP (Name Bob))
+          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+    (S
+      (NP (Name Jack))
+      (VP
+        (VP (V saw) (NP (Name Bob)))
+        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+    >>> parser = pchart.LongestChartParser(grammar)
+    >>> for t in parser.parse(tokens):
+    ...     print(t)
+    (S
+      (NP (Name Jack))
+      (VP
+        (V saw)
+        (NP
+          (NP (Name Bob))
+          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+    (S
+      (NP (Name Jack))
+      (VP
+        (VP (V saw) (NP (Name Bob)))
+        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
+
+    >>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
+    >>> for t in parser.parse(tokens):
+    ...     print(t)
+
+
+Unit tests for the Viterbi Parse classes
+----------------------------------------
+
+    >>> from nltk.parse import ViterbiParser
+    >>> tokens = "Jack saw Bob with my cookie".split()
+    >>> grammar = toy_pcfg2
+
+Parse the tokenized sentence.
+
+    >>> parser = ViterbiParser(grammar)
+    >>> for t in parser.parse(tokens):
+    ...     print(t)
+    (S
+      (NP (Name Jack))
+      (VP
+        (V saw)
+        (NP
+          (NP (Name Bob))
+          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
+
+
+Unit tests for the FeatStructNonterminal class
+----------------------------------------------
+
+    >>> from nltk.grammar import FeatStructNonterminal
+    >>> FeatStructNonterminal(
+    ...     pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
+    [agr=[gender='f', number='pl'], pos='n']
+
+    >>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
+    VP[+fin]/NP[+pl]
+
+
+Tracing the Feature Chart Parser
+--------------------------------
+
+We use the featurechart.demo() function for tracing the Feature Chart Parser.
+
+    >>> nltk.parse.featurechart.demo(print_times=False,
+    ...                              print_grammar=True,
+    ...                              parser=nltk.parse.featurechart.FeatureChartParser,
+    ...                              sent='I saw John with a dog')
+    <BLANKLINE>
+    Grammar with 18 productions (start state = S[])
+        S[] -> NP[] VP[]
+        PP[] -> Prep[] NP[]
+        NP[] -> NP[] PP[]
+        VP[] -> VP[] PP[]
+        VP[] -> Verb[] NP[]
+        VP[] -> Verb[]
+        NP[] -> Det[pl=?x] Noun[pl=?x]
+        NP[] -> 'John'
+        NP[] -> 'I'
+        Det[] -> 'the'
+        Det[] -> 'my'
+        Det[-pl] -> 'a'
+        Noun[-pl] -> 'dog'
+        Noun[-pl] -> 'cookie'
+        Verb[] -> 'ate'
+        Verb[] -> 'saw'
+        Prep[] -> 'with'
+        Prep[] -> 'under'
+    <BLANKLINE>
+    * FeatureChartParser
+    Sentence: I saw John with a dog
+    |.I.s.J.w.a.d.|
+    |[-] . . . . .| [0:1] 'I'
+    |. [-] . . . .| [1:2] 'saw'
+    |. . [-] . . .| [2:3] 'John'
+    |. . . [-] . .| [3:4] 'with'
+    |. . . . [-] .| [4:5] 'a'
+    |. . . . . [-]| [5:6] 'dog'
+    |[-] . . . . .| [0:1] NP[] -> 'I' *
+    |[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
+    |[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
+    |. [-] . . . .| [1:2] Verb[] -> 'saw' *
+    |. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
+    |. [-] . . . .| [1:2] VP[] -> Verb[] *
+    |. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
+    |[---] . . . .| [0:2] S[] -> NP[] VP[] *
+    |. . [-] . . .| [2:3] NP[] -> 'John' *
+    |. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
+    |. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
+    |. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
+    |. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
+    |[-----] . . .| [0:3] S[] -> NP[] VP[] *
+    |. . . [-] . .| [3:4] Prep[] -> 'with' *
+    |. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
+    |. . . . [-] .| [4:5] Det[-pl] -> 'a' *
+    |. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
+    |. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
+    |. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
+    |. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
+    |. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
+    |. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
+    |. . [-------]| [2:6] NP[] -> NP[] PP[] *
+    |. [---------]| [1:6] VP[] -> VP[] PP[] *
+    |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
+    |[===========]| [0:6] S[] -> NP[] VP[] *
+    |. . [------->| [2:6] S[] -> NP[] * VP[] {}
+    |. . [------->| [2:6] NP[] -> NP[] * PP[] {}
+    |. [---------]| [1:6] VP[] -> Verb[] NP[] *
+    |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
+    |[===========]| [0:6] S[] -> NP[] VP[] *
+    (S[]
+      (NP[] I)
+      (VP[]
+        (VP[] (Verb[] saw) (NP[] John))
+        (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
+    (S[]
+      (NP[] I)
+      (VP[]
+        (Verb[] saw)
+        (NP[]
+          (NP[] John)
+          (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
+
+
+Unit tests for the Feature Chart Parser classes
+-----------------------------------------------
+
+The list of parsers we want to test.
+
+    >>> parsers = [nltk.parse.featurechart.FeatureChartParser,
+    ...            nltk.parse.featurechart.FeatureTopDownChartParser,
+    ...            nltk.parse.featurechart.FeatureBottomUpChartParser,
+    ...            nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
+    ...            nltk.parse.earleychart.FeatureIncrementalChartParser,
+    ...            nltk.parse.earleychart.FeatureEarleyChartParser,
+    ...            nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
+    ...            nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
+    ...            nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
+    ...            ]
+
+A helper function that tests each parser on the given grammar and sentence.
+We check that the number of trees are correct, and that all parsers
+return the same trees. Otherwise an error is printed.
+
+    >>> def unittest(grammar, sentence, nr_trees):
+    ...     sentence = sentence.split()
+    ...     trees = None
+    ...     for P in parsers:
+    ...         result = P(grammar).parse(sentence)
+    ...         result = set(tree.freeze() for tree in result)
+    ...         if len(result) != nr_trees:
+    ...             print("Wrong nr of trees:", len(result))
+    ...         elif trees is None:
+    ...             trees = result
+    ...         elif result != trees:
+    ...             print("Trees differ for parser:", P.__name__)
+
+The demo grammar from before, with an ambiguous sentence.
+
+    >>> isawjohn = nltk.parse.featurechart.demo_grammar()
+    >>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
+
+This grammar tests that variables in different grammar rules are renamed
+before unification. (The problematic variable is in this case ?X).
+
+    >>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
+    ... S[] -> NP[num=?N] VP[num=?N, slash=?X]
+    ... NP[num=?X] -> "what"
+    ... NP[num=?X] -> "that"
+    ... VP[num=?P, slash=none] -> V[num=?P] NP[]
+    ... V[num=sg] -> "was"
+    ... ''')
+    >>> unittest(whatwasthat, "what was that", 1)
+
+This grammar tests that the same rule can be used in different places
+in another rule, and that the variables are properly renamed.
+
+    >>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
+    ... S[] -> NP[case=nom] V[] NP[case=acc]
+    ... NP[case=?X] -> Pron[case=?X]
+    ... Pron[] -> "this"
+    ... Pron[] -> "that"
+    ... V[] -> "loves"
+    ... ''')
+    >>> unittest(thislovesthat, "this loves that", 1)
+
+
+Tests for loading feature grammar files
+---------------------------------------
+
+Alternative 1: first load the grammar, then create the parser.
+
+    >>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
+    >>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
+    >>> print((type(fcp1)))
+    <class 'nltk.parse.featurechart.FeatureChartParser'>
+
+Alternative 2: directly load the parser.
+
+    >>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
+    >>> print((type(fcp2)))
+    <class 'nltk.parse.featurechart.FeatureChartParser'>
+
+
+
diff --git a/nlp_resource_data/nltk/test/portuguese_en.doctest b/nlp_resource_data/nltk/test/portuguese_en.doctest

new file mode 100644 (file)

index 0000000..84cee4a
--- /dev/null
+++ b/nlp_resource_data/nltk/test/portuguese_en.doctest
@@ -0,0 +1,565 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==================================
+Examples for Portuguese Processing
+==================================
+
+This HOWTO contains a variety of examples relating to the Portuguese language.
+It is intended to be read in conjunction with the NLTK book
+(``http://nltk.org/book``).  For instructions on running the Python
+interpreter, please see the section *Getting Started with Python*, in Chapter 1.
+
+--------------------------------------------
+Python Programming, with Portuguese Examples
+--------------------------------------------
+
+Chapter 1 of the NLTK book contains many elementary programming examples, all
+with English texts.  In this section, we'll see some corresponding examples
+using Portuguese.  Please refer to the chapter for full discussion.  *Vamos!*
+
+    >>> from nltk.examples.pt import *
+    *** Introductory Examples for the NLTK Book ***
+    Loading ptext1, ... and psent1, ...
+    Type the name of the text or sentence to view it.
+    Type: 'texts()' or 'sents()' to list the materials.
+    ptext1: Memórias Póstumas de Brás Cubas (1881)
+    ptext2: Dom Casmurro (1899)
+    ptext3: Gênesis
+    ptext4: Folha de Sao Paulo (1994)
+
+
+Any time we want to find out about these texts, we just have
+to enter their names at the Python prompt:
+
+    >>> ptext2
+    <Text: Dom Casmurro (1899)>
+
+Searching Text
+--------------
+
+A concordance permits us to see words in context.
+
+    >>> ptext1.concordance('olhos')
+    Building index...
+    Displaying 25 of 138 matches:
+    De pé , à cabeceira da cama , com os olhos estúpidos , a boca entreaberta , a t
+    orelhas . Pela minha parte fechei os olhos e deixei - me ir à ventura . Já agor
+    xões de cérebro enfermo . Como ia de olhos fechados , não via o caminho ; lembr
+    gelos eternos . Com efeito , abri os olhos e vi que o meu animal galopava numa
+    me apareceu então , fitando - me uns olhos rutilantes como o sol . Tudo nessa f
+     mim mesmo . Então , encarei - a com olhos súplices , e pedi mais alguns anos .
+    ...
+
+For a given word, we can find words with a similar text distribution:
+
+    >>> ptext1.similar('chegar')
+    Building word-context index...
+    acabada acudir aludir avistar bramanismo casamento cheguei com contar
+    contrário corpo dali deixei desferirem dizer fazer filhos já leitor lhe
+    >>> ptext3.similar('chegar')
+    Building word-context index...
+    achar alumiar arrombar destruir governar guardar ir lavrar passar que
+    toda tomar ver vir
+
+We can search for the statistically significant collocations in a text:
+
+    >>> ptext1.collocations()
+    Building collocations list
+    Quincas Borba; Lobo Neves; alguma coisa; Brás Cubas; meu pai; dia
+    seguinte; não sei; Meu pai; alguns instantes; outra vez; outra coisa;
+    por exemplo; mim mesmo; coisa nenhuma; mesma coisa; não era; dias
+    depois; Passeio Público; olhar para; das coisas
+
+We can search for words in context, with the help of *regular expressions*, e.g.:
+
+    >>> ptext1.findall("<olhos> (<.*>)")
+    estúpidos; e; fechados; rutilantes; súplices; a; do; babavam;
+    na; moles; se; da; umas; espraiavam; chamejantes; espetados;
+    ...
+
+We can automatically generate random text based on a given text, e.g.:
+
+    >>> ptext3.generate() # doctest: +SKIP
+    No princípio , criou Deus os abençoou , dizendo : Onde { estão } e até
+    à ave dos céus , { que } será . Disse mais Abrão : Dá - me a mulher
+    que tomaste ; porque daquele poço Eseque , { tinha .} E disse : Não
+    poderemos descer ; mas , do campo ainda não estava na casa do teu
+    pescoço . E viveu Serugue , depois Simeão e Levi { são } estes ? E o
+    varão , porque habitava na terra de Node , da mão de Esaú : Jeús ,
+    Jalão e Corá
+
+Texts as List of Words
+----------------------
+
+A few sentences have been defined for you.
+
+    >>> psent1
+    ['o', 'amor', 'da', 'gl\xf3ria', 'era', 'a', 'coisa', 'mais',
+    'verdadeiramente', 'humana', 'que', 'h\xe1', 'no', 'homem', ',',
+    'e', ',', 'conseq\xfcentemente', ',', 'a', 'sua', 'mais',
+    'genu\xedna', 'fei\xe7\xe3o', '.']
+    >>>
+
+Notice that the sentence has been *tokenized*.  Each token is
+represented as a string, represented using quotes, e.g. ``'coisa'``.
+Some strings contain special characters, e.g. ``\xf3``,
+the internal representation for ó.
+The tokens are combined in the form of a *list*.  How long is this list?
+
+    >>> len(psent1)
+    25
+    >>>
+
+What is the vocabulary of this sentence?
+
+    >>> sorted(set(psent1))
+    [',', '.', 'a', 'amor', 'coisa', 'conseqüentemente', 'da', 'e', 'era',
+     'feição', 'genuína', 'glória', 'homem', 'humana', 'há', 'mais', 'no',
+     'o', 'que', 'sua', 'verdadeiramente']
+    >>>
+
+Let's iterate over each item in ``psent2``, and print information for each:
+
+    >>> for w in psent2:
+    ...     print(w, len(w), w[-1])
+    ...
+    Não 3 o
+    consultes 9 s
+    dicionários 11 s
+    . 1 .
+
+Observe how we make a human-readable version of a string, using ``decode()``.
+Also notice that we accessed the last character of a string ``w`` using ``w[-1]``.
+
+We just saw a ``for`` loop above.  Another useful control structure is a
+*list comprehension*.
+
+    >>> [w.upper() for w in psent2]
+    ['N\xc3O', 'CONSULTES', 'DICION\xc1RIOS', '.']
+    >>> [w for w in psent1 if w.endswith('a')]
+    ['da', 'gl\xf3ria', 'era', 'a', 'coisa', 'humana', 'a', 'sua', 'genu\xedna']
+    >>> [w for w in ptext4 if len(w) > 15]
+    ['norte-irlandeses', 'pan-nacionalismo', 'predominatemente', 'primeiro-ministro',
+    'primeiro-ministro', 'irlandesa-americana', 'responsabilidades', 'significativamente']
+
+We can examine the relative frequency of words in a text, using ``FreqDist``:
+
+    >>> fd1 = FreqDist(ptext1)
+    >>> fd1
+    <FreqDist with 10848 samples and 77098 outcomes>
+    >>> fd1['olhos']
+    137
+    >>> fd1.max()
+    ','
+    >>> fd1.samples()[:100]
+    [',', '.', 'a', 'que', 'de', 'e', '-', 'o', ';', 'me', 'um', 'n\xe3o',
+    '\x97', 'se', 'do', 'da', 'uma', 'com', 'os', '\xe9', 'era', 'as', 'eu',
+    'lhe', 'ao', 'em', 'para', 'mas', '...', '!', '\xe0', 'na', 'mais', '?',
+    'no', 'como', 'por', 'N\xe3o', 'dos', 'o', 'ele', ':', 'Virg\xedlia',
+    'me', 'disse', 'minha', 'das', 'O', '/', 'A', 'CAP\xcdTULO', 'muito',
+    'depois', 'coisa', 'foi', 'sem', 'olhos', 'ela', 'nos', 'tinha', 'nem',
+    'E', 'outro', 'vida', 'nada', 'tempo', 'menos', 'outra', 'casa', 'homem',
+    'porque', 'quando', 'mim', 'mesmo', 'ser', 'pouco', 'estava', 'dia',
+    't\xe3o', 'tudo', 'Mas', 'at\xe9', 'D', 'ainda', 's\xf3', 'alguma',
+    'la', 'vez', 'anos', 'h\xe1', 'Era', 'pai', 'esse', 'lo', 'dizer', 'assim',
+    'ent\xe3o', 'dizia', 'aos', 'Borba']
+
+---------------
+Reading Corpora
+---------------
+
+Accessing the Machado Text Corpus
+---------------------------------
+
+NLTK includes the complete works of Machado de Assis.
+
+    >>> from nltk.corpus import machado
+    >>> machado.fileids()
+    ['contos/macn001.txt', 'contos/macn002.txt', 'contos/macn003.txt', ...]
+
+Each file corresponds to one of the works of Machado de Assis.  To see a complete
+list of works, you can look at the corpus README file: ``print machado.readme()``.
+Let's access the text of the *Posthumous Memories of Brás Cubas*.
+
+We can access the text as a list of characters, and access 200 characters starting
+from position 10,000.
+
+    >>> raw_text = machado.raw('romance/marm05.txt')
+    >>> raw_text[10000:10200]
+    u', primou no\nEstado, e foi um dos amigos particulares do vice-rei Conde
+    da Cunha.\n\nComo este apelido de Cubas lhe\ncheirasse excessivamente a
+    tanoaria, alegava meu pai, bisneto de Dami\xe3o, que o\ndito ape'
+
+However, this is not a very useful way to work with a text.  We generally think
+of a text as a sequence of words and punctuation, not characters:
+
+    >>> text1 = machado.words('romance/marm05.txt')
+    >>> text1
+    ['Romance', ',', 'Mem\xf3rias', 'P\xf3stumas', 'de', ...]
+    >>> len(text1)
+    77098
+    >>> len(set(text1))
+    10848
+
+Here's a program that finds the most common ngrams that contain a
+particular target word.
+
+    >>> from nltk import ngrams, FreqDist
+    >>> target_word = 'olhos'
+    >>> fd = FreqDist(ng
+    ...               for ng in ngrams(text1, 5)
+    ...               if target_word in ng)
+    >>> for hit in fd.samples():
+    ...     print(' '.join(hit))
+    ...
+    , com os olhos no
+    com os olhos no ar
+    com os olhos no chão
+    e todos com os olhos
+    me estar com os olhos
+    os olhos estúpidos , a
+    os olhos na costura ,
+    os olhos no ar ,
+    , com os olhos espetados
+    , com os olhos estúpidos
+    , com os olhos fitos
+    , com os olhos naquele
+    , com os olhos para
+
+
+Accessing the MacMorpho Tagged Corpus
+-------------------------------------
+
+NLTK includes the MAC-MORPHO Brazilian Portuguese POS-tagged news text,
+with over a million words of
+journalistic texts extracted from ten sections of
+the daily newspaper *Folha de Sao Paulo*, 1994.
+
+We can access this corpus as a sequence of words or tagged words as follows:
+    >>> import nltk.corpus
+    >>> nltk.corpus.mac_morpho.words()
+    ['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', ...]
+    >>> nltk.corpus.mac_morpho.sents() # doctest: +NORMALIZE_WHITESPACE
+    [['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', 'milh\xe3o',
+    'em', 'a', 'venda', 'de', 'a', 'Pinhal', 'em', 'S\xe3o', 'Paulo'],
+    ['Programe', 'sua', 'viagem', 'a', 'a', 'Exposi\xe7\xe3o', 'Nacional',
+    'do', 'Zeb', ',', 'que', 'come\xe7a', 'dia', '25'], ...]
+    >>> nltk.corpus.mac_morpho.tagged_words()
+    [('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...]
+
+We can also access it in sentence chunks.
+
+    >>> nltk.corpus.mac_morpho.tagged_sents() # doctest: +NORMALIZE_WHITESPACE
+    [[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ('de', 'PREP'),
+      ('Cr$', 'CUR'), ('1,4', 'NUM'), ('milh\xe3o', 'N'), ('em', 'PREP|+'),
+      ('a', 'ART'), ('venda', 'N'), ('de', 'PREP|+'), ('a', 'ART'),
+      ('Pinhal', 'NPROP'), ('em', 'PREP'), ('S\xe3o', 'NPROP'),
+      ('Paulo', 'NPROP')],
+     [('Programe', 'V'), ('sua', 'PROADJ'), ('viagem', 'N'), ('a', 'PREP|+'),
+      ('a', 'ART'), ('Exposi\xe7\xe3o', 'NPROP'), ('Nacional', 'NPROP'),
+      ('do', 'NPROP'), ('Zeb', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'),
+      ('come\xe7a', 'V'), ('dia', 'N'), ('25', 'N|AP')], ...]
+
+This data can be used to train taggers (examples below for the Floresta treebank).
+
+Accessing the Floresta Portuguese Treebank
+------------------------------------------
+
+The NLTK data distribution includes the
+"Floresta Sinta(c)tica Corpus" version 7.4, available from
+``http://www.linguateca.pt/Floresta/``.
+
+We can access this corpus as a sequence of words or tagged words as follows:
+
+    >>> from nltk.corpus import floresta
+    >>> floresta.words()
+    ['Um', 'revivalismo', 'refrescante', 'O', '7_e_Meio', ...]
+    >>> floresta.tagged_words()
+    [('Um', '>N+art'), ('revivalismo', 'H+n'), ...]
+
+The tags consist of some syntactic information, followed by a plus sign,
+followed by a conventional part-of-speech tag.  Let's strip off the material before
+the plus sign:
+
+    >>> def simplify_tag(t):
+    ...     if "+" in t:
+    ...         return t[t.index("+")+1:]
+    ...     else:
+    ...         return t
+    >>> twords = floresta.tagged_words()
+    >>> twords = [(w.lower(), simplify_tag(t)) for (w,t) in twords]
+    >>> twords[:10]
+    [('um', 'art'), ('revivalismo', 'n'), ('refrescante', 'adj'), ('o', 'art'), ('7_e_meio', 'prop'),
+    ('\xe9', 'v-fin'), ('um', 'art'), ('ex-libris', 'n'), ('de', 'prp'), ('a', 'art')]
+
+Pretty printing the tagged words:
+
+    >>> print(' '.join(word + '/' + tag for (word, tag) in twords[:10]))
+    um/art revivalismo/n refrescante/adj o/art 7_e_meio/prop é/v-fin um/art ex-libris/n de/prp a/art
+
+Count the word tokens and types, and determine the most common word:
+
+    >>> words = floresta.words()
+    >>> len(words)
+    211852
+    >>> fd = nltk.FreqDist(words)
+    >>> len(fd)
+    29421
+    >>> fd.max()
+    'de'
+
+List the 20 most frequent tags, in order of decreasing frequency:
+
+    >>> tags = [simplify_tag(tag) for (word,tag) in floresta.tagged_words()]
+    >>> fd = nltk.FreqDist(tags)
+    >>> fd.keys()[:20] # doctest: +NORMALIZE_WHITESPACE
+    ['n', 'prp', 'art', 'v-fin', ',', 'prop', 'adj', 'adv', '.',
+     'conj-c', 'v-inf', 'pron-det', 'v-pcp', 'num', 'pron-indp',
+     'pron-pers', '\xab', '\xbb', 'conj-s', '}']
+
+We can also access the corpus grouped by sentence:
+
+    >>> floresta.sents() # doctest: +NORMALIZE_WHITESPACE
+    [['Um', 'revivalismo', 'refrescante'],
+     ['O', '7_e_Meio', '\xe9', 'um', 'ex-libris', 'de', 'a', 'noite',
+      'algarvia', '.'], ...]
+    >>> floresta.tagged_sents() # doctest: +NORMALIZE_WHITESPACE
+    [[('Um', '>N+art'), ('revivalismo', 'H+n'), ('refrescante', 'N<+adj')],
+     [('O', '>N+art'), ('7_e_Meio', 'H+prop'), ('\xe9', 'P+v-fin'),
+      ('um', '>N+art'), ('ex-libris', 'H+n'), ('de', 'H+prp'),
+      ('a', '>N+art'), ('noite', 'H+n'), ('algarvia', 'N<+adj'), ('.', '.')],
+     ...]
+    >>> floresta.parsed_sents() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    [Tree('UTT+np', [Tree('>N+art', ['Um']), Tree('H+n', ['revivalismo']),
+                     Tree('N<+adj', ['refrescante'])]),
+     Tree('STA+fcl',
+         [Tree('SUBJ+np', [Tree('>N+art', ['O']),
+                           Tree('H+prop', ['7_e_Meio'])]),
+          Tree('P+v-fin', ['\xe9']),
+          Tree('SC+np',
+             [Tree('>N+art', ['um']),
+              Tree('H+n', ['ex-libris']),
+              Tree('N<+pp', [Tree('H+prp', ['de']),
+                             Tree('P<+np', [Tree('>N+art', ['a']),
+                                            Tree('H+n', ['noite']),
+                                            Tree('N<+adj', ['algarvia'])])])]),
+          Tree('.', ['.'])]), ...]
+
+To view a parse tree, use the ``draw()`` method, e.g.:
+
+    >>> psents = floresta.parsed_sents()
+    >>> psents[5].draw() # doctest: +SKIP
+
+Character Encodings
+-------------------
+
+Python understands the common character encoding used for Portuguese, ISO 8859-1 (ISO Latin 1).
+
+    >>> import os, nltk.test
+    >>> testdir = os.path.split(nltk.test.__file__)[0]
+    >>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO 8859-1')
+    >>> text[:60]
+    'O 7 e Meio \xe9 um ex-libris da noite algarvia.\n\xc9 uma das mais '
+    >>> print(text[:60])
+    O 7 e Meio é um ex-libris da noite algarvia.
+    É uma das mais
+
+For more information about character encodings and Python, please see section 3.3 of the book.
+
+----------------
+Processing Tasks
+----------------
+
+
+Simple Concordancing
+--------------------
+
+Here's a function that takes a word and a specified amount of context (measured
+in characters), and generates a concordance for that word.
+
+    >>> def concordance(word, context=30):
+    ...     for sent in floresta.sents():
+    ...         if word in sent:
+    ...             pos = sent.index(word)
+    ...             left = ' '.join(sent[:pos])
+    ...             right = ' '.join(sent[pos+1:])
+    ...             print('%*s %s %-*s' %
+    ...                 (context, left[-context:], word, context, right[:context]))
+
+    >>> concordance("dar") # doctest: +SKIP
+    anduru , foi o suficiente para dar a volta a o resultado .
+                 1. O P?BLICO veio dar a a imprensa di?ria portuguesa
+      A fartura de pensamento pode dar maus resultados e n?s n?o quer
+                          Come?a a dar resultados a pol?tica de a Uni
+    ial come?ar a incorporar- lo e dar forma a um ' site ' que tem se
+    r com Constantino para ele lhe dar tamb?m os pap?is assinados .
+    va a brincar , pois n?o lhe ia dar procura??o nenhuma enquanto n?
+    ?rica como o ant?doto capaz de dar sentido a o seu enorme poder .
+    . . .
+    >>> concordance("vender") # doctest: +SKIP
+    er recebido uma encomenda para vender 4000 blindados a o Iraque .
+    m?rico_Amorim caso conseguisse vender o lote de ac??es de o empres?r
+    mpre ter jovens simp?ticos a ? vender ? chega ! }
+           Disse que o governo vai vender ? desde autom?vel at? particip
+    ndiciou ontem duas pessoas por vender carro com ?gio .
+            A inten??o de Fleury ? vender as a??es para equilibrar as fi
+
+Part-of-Speech Tagging
+----------------------
+
+Let's begin by getting the tagged sentence data, and simplifying the tags
+as described earlier.
+
+    >>> from nltk.corpus import floresta
+    >>> tsents = floresta.tagged_sents()
+    >>> tsents = [[(w.lower(),simplify_tag(t)) for (w,t) in sent] for sent in tsents if sent]
+    >>> train = tsents[100:]
+    >>> test = tsents[:100]
+
+We already know that ``n`` is the most common tag, so we can set up a
+default tagger that tags every word as a noun, and see how well it does:
+
+    >>> tagger0 = nltk.DefaultTagger('n')
+    >>> nltk.tag.accuracy(tagger0, test)
+    0.17697228144989338
+
+Evidently, about one in every six words is a noun.  Let's improve on this by
+training a unigram tagger:
+
+    >>> tagger1 = nltk.UnigramTagger(train, backoff=tagger0)
+    >>> nltk.tag.accuracy(tagger1, test)
+    0.87029140014214645
+
+Next a bigram tagger:
+
+    >>> tagger2 = nltk.BigramTagger(train, backoff=tagger1)
+    >>> nltk.tag.accuracy(tagger2, test)
+    0.89019189765458417
+
+
+Sentence Segmentation
+---------------------
+
+Punkt is a language-neutral sentence segmentation tool.  We
+
+    >>> sent_tokenizer=nltk.data.load('tokenizers/punkt/portuguese.pickle')
+    >>> raw_text = machado.raw('romance/marm05.txt')
+    >>> sentences = sent_tokenizer.tokenize(raw_text)
+    >>> for sent in sentences[1000:1005]:
+    ...     print("<<", sent, ">>")
+    ...
+    << Em verdade, parecia ainda mais mulher do que era;
+    seria criança nos seus folgares de moça; mas assim quieta, impassível, tinha a
+    compostura da mulher casada. >>
+    << Talvez essa circunstância lhe diminuía um pouco da
+    graça virginal. >>
+    << Depressa nos familiarizamos; a mãe fazia-lhe grandes elogios, eu
+    escutava-os de boa sombra, e ela sorria com os olhos fúlgidos, como se lá dentro
+    do cérebro lhe estivesse a voar uma borboletinha de asas de ouro e olhos de
+    diamante... >>
+    << Digo lá dentro, porque cá fora o
+    que esvoaçou foi uma borboleta preta, que subitamente penetrou na varanda, e
+    começou a bater as asas em derredor de D. Eusébia. >>
+    << D. Eusébia deu um grito,
+    levantou-se, praguejou umas palavras soltas: - T'esconjuro!... >>
+
+The sentence tokenizer can be trained and evaluated on other text.
+The source text (from the Floresta Portuguese Treebank) contains one sentence per line.
+We read the text, split it into its lines, and then join these lines together using
+spaces.  Now the information about sentence breaks has been discarded.  We split this
+material into training and testing data:
+
+    >>> import os, nltk.test
+    >>> testdir = os.path.split(nltk.test.__file__)[0]
+    >>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO-8859-1')
+    >>> lines = text.split('\n')
+    >>> train = ' '.join(lines[10:])
+    >>> test = ' '.join(lines[:10])
+
+Now we train the sentence segmenter (or sentence tokenizer) and use it on our test sentences:
+
+    >>> stok = nltk.PunktSentenceTokenizer(train)
+    >>> print(stok.tokenize(test))
+    ['O 7 e Meio \xe9 um ex-libris da noite algarvia.',
+    '\xc9 uma das mais antigas discotecas do Algarve, situada em Albufeira,
+    que continua a manter os tra\xe7os decorativos e as clientelas de sempre.',
+    '\xc9 um pouco a vers\xe3o de uma esp\xe9cie de \xaboutro lado\xbb da noite,
+    a meio caminho entre os devaneios de uma fauna perif\xe9rica, seja de Lisboa,
+    Londres, Dublin ou Faro e Portim\xe3o, e a postura circunspecta dos fi\xe9is da casa,
+    que dela esperam a m\xfasica \xabgeracionista\xbb dos 60 ou dos 70.',
+    'N\xe3o deixa de ser, nos tempos que correm, um certo \xabvery typical\xbb algarvio,
+    cabe\xe7a de cartaz para os que querem fugir a algumas movimenta\xe7\xf5es nocturnas
+    j\xe1 a caminho da ritualiza\xe7\xe3o de massas, do g\xe9nero \xabvamos todos ao
+    Calypso e encontramo-nos na Locomia\xbb.',
+    'E assim, aos 2,5 milh\xf5es que o Minist\xe9rio do Planeamento e Administra\xe7\xe3o
+    do Territ\xf3rio j\xe1 gasta no pagamento do pessoal afecto a estes organismos,
+    v\xeam juntar-se os montantes das obras propriamente ditas, que os munic\xedpios,
+    j\xe1 com projectos na m\xe3o, v\xeam reivindicar junto do Executivo, como salienta
+    aquele membro do Governo.',
+    'E o dinheiro \xabn\xe3o falta s\xf3 \xe0s c\xe2maras\xbb, lembra o secret\xe1rio de Estado,
+    que considera que a solu\xe7\xe3o para as autarquias \xe9 \xabespecializarem-se em
+    fundos comunit\xe1rios\xbb.',
+    'Mas como, se muitas n\xe3o disp\xf5em, nos seus quadros, dos t\xe9cnicos necess\xe1rios?',
+    '\xabEncomendem-nos a projectistas de fora\xbb porque, se as obras vierem a ser financiadas,
+    eles at\xe9 saem de gra\xe7a, j\xe1 que, nesse caso, \xabos fundos comunit\xe1rios pagam
+    os projectos, o mesmo n\xe3o acontecendo quando eles s\xe3o feitos pelos GAT\xbb,
+    dado serem organismos do Estado.',
+    'Essa poder\xe1 vir a ser uma hip\xf3tese, at\xe9 porque, no terreno, a capacidade dos GAT
+    est\xe1 cada vez mais enfraquecida.',
+    'Alguns at\xe9 j\xe1 desapareceram, como o de Castro Verde, e outros t\xeam vindo a perder quadros.']
+
+NLTK's data collection includes a trained model for Portuguese sentence
+segmentation, which can be loaded as follows.  It is faster to load a trained model than
+to retrain it.
+
+    >>> stok = nltk.data.load('tokenizers/punkt/portuguese.pickle')
+
+Stemming
+--------
+
+NLTK includes the RSLP Portuguese stemmer.  Here we use it to stem some Portuguese text:
+
+    >>> stemmer = nltk.stem.RSLPStemmer()
+    >>> stemmer.stem("copiar")
+    'copi'
+    >>> stemmer.stem("paisagem")
+    'pais'
+
+
+Stopwords
+---------
+
+NLTK includes Portuguese stopwords:
+
+    >>> stopwords = nltk.corpus.stopwords.words('portuguese')
+    >>> stopwords[:10]
+    ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'at\xe9']
+
+Now we can use these to filter text.  Let's find the most frequent words (other than stopwords)
+and print them in descending order of frequency:
+
+    >>> fd = nltk.FreqDist(w.lower() for w in floresta.words() if w not in stopwords)
+    >>> for word in list(fd.keys())[:20]:
+    ...     print(word, fd[word])
+    , 13444
+    . 7725
+    « 2369
+    » 2310
+    é 1305
+    o 1086
+    } 1047
+    { 1044
+    a 897
+    ; 633
+    em 516
+    ser 466
+    sobre 349
+    os 313
+    anos 301
+    ontem 292
+    ainda 279
+    segundo 256
+    ter 249
+    dois 231
+
diff --git a/nlp_resource_data/nltk/test/portuguese_en_fixt.py b/nlp_resource_data/nltk/test/portuguese_en_fixt.py

new file mode 100644 (file)

index 0000000..f417bc6
--- /dev/null
+++ b/nlp_resource_data/nltk/test/portuguese_en_fixt.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from nltk.corpus import teardown_module
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    raise SkipTest(
+        "portuguese_en.doctest imports nltk.examples.pt which doesn't exist!"
+    )
diff --git a/nlp_resource_data/nltk/test/probability.doctest b/nlp_resource_data/nltk/test/probability.doctest

new file mode 100644 (file)

index 0000000..ea36fe3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/probability.doctest
@@ -0,0 +1,304 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===========
+Probability
+===========
+
+    >>> import nltk
+    >>> from nltk.probability import *
+
+FreqDist
+--------
+
+    >>> text1 = ['no', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '!']
+    >>> text2 = ['no', 'good', 'porpoise', 'likes', 'to', 'fish', 'fish', 'anywhere', '.']
+
+    >>> fd1 = nltk.FreqDist(text1)
+    >>> fd1 == nltk.FreqDist(text1)
+    True
+
+Note that items are sorted in order of decreasing frequency; two items of the same frequency appear in indeterminate order.
+
+    >>> import itertools
+    >>> both = nltk.FreqDist(text1 + text2)
+    >>> both_most_common = both.most_common()
+    >>> list(itertools.chain(*(sorted(ys) for k, ys in itertools.groupby(both_most_common, key=lambda t: t[1]))))
+    [('fish', 3), ('anywhere', 2), ('good', 2), ('no', 2), ('porpoise', 2), ('!', 1), ('.', 1), ('a', 1), ('goes', 1), ('likes', 1), ('to', 1), ('without', 1)]
+
+    >>> both == fd1 + nltk.FreqDist(text2)
+    True
+    >>> fd1 == nltk.FreqDist(text1) # But fd1 is unchanged
+    True
+
+    >>> fd2 = nltk.FreqDist(text2)
+    >>> fd1.update(fd2)
+    >>> fd1 == both
+    True
+
+    >>> fd1 = nltk.FreqDist(text1)
+    >>> fd1.update(text2)
+    >>> fd1 == both
+    True
+
+    >>> fd1 = nltk.FreqDist(text1)
+    >>> fd2 = nltk.FreqDist(fd1)
+    >>> fd2 == fd1
+    True
+
+``nltk.FreqDist`` can be pickled:
+
+    >>> import pickle
+    >>> fd1 = nltk.FreqDist(text1)
+    >>> pickled = pickle.dumps(fd1)
+    >>> fd1 == pickle.loads(pickled)
+    True
+
+Mathematical operations:
+
+    >>> FreqDist('abbb') + FreqDist('bcc')
+    FreqDist({'b': 4, 'c': 2, 'a': 1})
+    >>> FreqDist('abbbc') - FreqDist('bccd')
+    FreqDist({'b': 2, 'a': 1})
+    >>> FreqDist('abbb') | FreqDist('bcc')
+    FreqDist({'b': 3, 'c': 2, 'a': 1})
+    >>> FreqDist('abbb') & FreqDist('bcc')
+    FreqDist({'b': 1})
+
+ConditionalFreqDist
+-------------------
+
+    >>> cfd1 = ConditionalFreqDist()
+    >>> cfd1[1] = FreqDist('abbbb')
+    >>> cfd1[2] = FreqDist('xxxxyy')
+    >>> cfd1
+    <ConditionalFreqDist with 2 conditions>
+
+    >>> cfd2 = ConditionalFreqDist()
+    >>> cfd2[1] = FreqDist('bbccc')
+    >>> cfd2[2] = FreqDist('xxxyyyzz')
+    >>> cfd2[3] = FreqDist('m')
+    >>> cfd2
+    <ConditionalFreqDist with 3 conditions>
+
+    >>> r = cfd1 + cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 6, 'c': 3, 'a': 1})), (2, FreqDist({'x': 7, 'y': 5, 'z': 2})), (3, FreqDist({'m': 1}))]
+
+    >>> r = cfd1 - cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 2, 'a': 1})), (2, FreqDist({'x': 1}))]
+
+    >>> r = cfd1 | cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 4, 'c': 3, 'a': 1})), (2, FreqDist({'x': 4, 'y': 3, 'z': 2})), (3, FreqDist({'m': 1}))]
+
+    >>> r = cfd1 & cfd2
+    >>> [(i,r[i]) for i in r.conditions()]
+    [(1, FreqDist({'b': 2})), (2, FreqDist({'x': 3, 'y': 2}))]
+
+Testing some HMM estimators
+---------------------------
+
+We extract a small part (500 sentences) of the Brown corpus
+
+    >>> corpus = nltk.corpus.brown.tagged_sents(categories='adventure')[:500]
+    >>> print(len(corpus))
+    500
+
+We create a HMM trainer - note that we need the tags and symbols
+from the whole corpus, not just the training corpus
+
+    >>> from nltk.util import unique_list
+    >>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
+    >>> print(len(tag_set))
+    92
+    >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
+    >>> print(len(symbols))
+    1464
+    >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
+
+We divide the corpus into 90% training and 10% testing
+
+    >>> train_corpus = []
+    >>> test_corpus = []
+    >>> for i in range(len(corpus)):
+    ...     if i % 10:
+    ...         train_corpus += [corpus[i]]
+    ...     else:
+    ...         test_corpus += [corpus[i]]
+    >>> print(len(train_corpus))
+    450
+    >>> print(len(test_corpus))
+    50
+
+And now we can test the estimators
+
+    >>> def train_and_test(est):
+    ...     hmm = trainer.train_supervised(train_corpus, estimator=est)
+    ...     print('%.2f%%' % (100 * hmm.evaluate(test_corpus)))
+
+Maximum Likelihood Estimation
+-----------------------------
+- this resulted in an initialization error before r7209
+
+    >>> mle = lambda fd, bins: MLEProbDist(fd)
+    >>> train_and_test(mle)
+    22.75%
+
+Laplace (= Lidstone with gamma==1)
+
+    >>> train_and_test(LaplaceProbDist)
+    66.04%
+
+Expected Likelihood Estimation (= Lidstone with gamma==0.5)
+
+    >>> train_and_test(ELEProbDist)
+    73.01%
+
+Lidstone Estimation, for gamma==0.1, 0.5 and 1
+(the later two should be exactly equal to MLE and ELE above)
+
+    >>> def lidstone(gamma):
+    ...     return lambda fd, bins: LidstoneProbDist(fd, gamma, bins)
+    >>> train_and_test(lidstone(0.1))
+    82.51%
+    >>> train_and_test(lidstone(0.5))
+    73.01%
+    >>> train_and_test(lidstone(1.0))
+    66.04%
+
+Witten Bell Estimation
+----------------------
+- This resulted in ZeroDivisionError before r7209
+
+    >>> train_and_test(WittenBellProbDist)
+    88.12%
+
+Good Turing Estimation
+
+    >>> gt = lambda fd, bins: SimpleGoodTuringProbDist(fd, bins=1e5)
+    >>> train_and_test(gt)
+    86.93%
+
+Kneser Ney Estimation
+---------------------
+Since the Kneser-Ney distribution is best suited for trigrams, we must adjust
+our testing accordingly.
+
+    >>> corpus = [[((x[0],y[0],z[0]),(x[1],y[1],z[1]))
+    ...     for x, y, z in nltk.trigrams(sent)]
+    ...         for sent in corpus[:100]]
+
+We will then need to redefine the rest of the training/testing variables
+    >>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
+    >>> len(tag_set)
+    906
+
+    >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
+    >>> len(symbols)
+    1341
+
+    >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
+    >>> train_corpus = []
+    >>> test_corpus = []
+
+    >>> for i in range(len(corpus)):
+    ...    if i % 10:
+    ...        train_corpus += [corpus[i]]
+    ...    else:
+    ...        test_corpus += [corpus[i]]
+
+    >>> len(train_corpus)
+    90
+    >>> len(test_corpus)
+    10
+
+    >>> kn = lambda fd, bins: KneserNeyProbDist(fd)
+    >>> train_and_test(kn)
+    0.86%
+
+Remains to be added:
+- Tests for HeldoutProbDist, CrossValidationProbDist and MutableProbDist
+
+Squashed bugs
+-------------
+
+Issue 511: override pop and popitem to invalidate the cache
+
+    >>> fd = nltk.FreqDist('a')
+    >>> list(fd.keys())
+    ['a']
+    >>> fd.pop('a')
+    1
+    >>> list(fd.keys())
+    []
+
+Issue 533: access cumulative frequencies with no arguments
+
+    >>> fd = nltk.FreqDist('aab')
+    >>> list(fd._cumulative_frequencies(['a']))
+    [2.0]
+    >>> list(fd._cumulative_frequencies(['a', 'b']))
+    [2.0, 3.0]
+
+Issue 579: override clear to reset some variables
+
+    >>> fd = FreqDist('aab')
+    >>> fd.clear()
+    >>> fd.N()
+    0
+
+Issue 351: fix fileids method of CategorizedCorpusReader to inadvertently
+add errant categories
+
+    >>> from nltk.corpus import brown
+    >>> brown.fileids('blah')
+    Traceback (most recent call last):
+      ...
+    ValueError: Category blah not found
+    >>> brown.categories()
+    ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
+
+Issue 175: add the unseen bin to SimpleGoodTuringProbDist by default
+otherwise any unseen events get a probability of zero, i.e.,
+they don't get smoothed
+
+    >>> from nltk import SimpleGoodTuringProbDist, FreqDist
+    >>> fd = FreqDist({'a':1, 'b':1, 'c': 2, 'd': 3, 'e': 4, 'f': 4, 'g': 4, 'h': 5, 'i': 5, 'j': 6, 'k': 6, 'l': 6, 'm': 7, 'n': 7, 'o': 8, 'p': 9, 'q': 10})
+    >>> p = SimpleGoodTuringProbDist(fd)
+    >>> p.prob('a')
+    0.017649766667026317...
+    >>> p.prob('o')
+    0.08433050215340411...
+    >>> p.prob('z')
+    0.022727272727272728...
+    >>> p.prob('foobar')
+    0.022727272727272728...
+
+``MLEProbDist``, ``ConditionalProbDist'', ``DictionaryConditionalProbDist`` and
+``ConditionalFreqDist`` can be pickled:
+
+    >>> import pickle
+    >>> pd = MLEProbDist(fd)
+    >>> sorted(pd.samples()) == sorted(pickle.loads(pickle.dumps(pd)).samples())
+    True
+    >>> dpd = DictionaryConditionalProbDist({'x': pd})
+    >>> unpickled = pickle.loads(pickle.dumps(dpd))
+    >>> dpd['x'].prob('a')
+    0.011363636...
+    >>> dpd['x'].prob('a') == unpickled['x'].prob('a')
+    True
+    >>> cfd = nltk.probability.ConditionalFreqDist()
+    >>> cfd['foo']['hello'] += 1
+    >>> cfd['foo']['hello'] += 1
+    >>> cfd['bar']['hello'] += 1
+    >>> cfd2 = pickle.loads(pickle.dumps(cfd))
+    >>> cfd2 == cfd
+    True
+    >>> cpd = ConditionalProbDist(cfd, SimpleGoodTuringProbDist)
+    >>> cpd2 = pickle.loads(pickle.dumps(cpd))
+    >>> cpd['foo'].prob('hello') == cpd2['foo'].prob('hello')
+    True
+
+
diff --git a/nlp_resource_data/nltk/test/probability_fixt.py b/nlp_resource_data/nltk/test/probability_fixt.py

new file mode 100644 (file)

index 0000000..fc786c9
--- /dev/null
+++ b/nlp_resource_data/nltk/test/probability_fixt.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+
+# probability.doctest uses HMM which requires numpy;
+# skip probability.doctest if numpy is not available
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("probability.doctest requires numpy")
diff --git a/nlp_resource_data/nltk/test/propbank.doctest b/nlp_resource_data/nltk/test/propbank.doctest

new file mode 100644 (file)

index 0000000..d3e8a68
--- /dev/null
+++ b/nlp_resource_data/nltk/test/propbank.doctest
@@ -0,0 +1,176 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+========
+PropBank
+========
+
+The PropBank Corpus provides predicate-argument annotation for the
+entire Penn Treebank.  Each verb in the treebank is annotated by a single
+instance in PropBank, containing information about the location of
+the verb, and the location and identity of its arguments:
+
+    >>> from nltk.corpus import propbank
+    >>> pb_instances = propbank.instances()
+    >>> print(pb_instances) # doctest: +NORMALIZE_WHITESPACE
+    [<PropbankInstance: wsj_0001.mrg, sent 0, word 8>,
+     <PropbankInstance: wsj_0001.mrg, sent 1, word 10>, ...]
+
+Each propbank instance defines the following member variables:
+
+  - Location information: `fileid`, `sentnum`, `wordnum`
+  - Annotator information: `tagger`
+  - Inflection information: `inflection`
+  - Roleset identifier: `roleset`
+  - Verb (aka predicate) location: `predicate`
+  - Argument locations and types: `arguments`
+
+The following examples show the types of these arguments:
+
+    >>> inst = pb_instances[103]
+    >>> (inst.fileid, inst.sentnum, inst.wordnum)
+    ('wsj_0004.mrg', 8, 16)
+    >>> inst.tagger
+    'gold'
+    >>> inst.inflection
+    <PropbankInflection: vp--a>
+    >>> infl = inst.inflection
+    >>> infl.form, infl.tense, infl.aspect, infl.person, infl.voice
+    ('v', 'p', '-', '-', 'a')
+    >>> inst.roleset
+    'rise.01'
+    >>> inst.predicate
+    PropbankTreePointer(16, 0)
+    >>> inst.arguments # doctest: +NORMALIZE_WHITESPACE
+    ((PropbankTreePointer(0, 2), 'ARG1'),
+     (PropbankTreePointer(13, 1), 'ARGM-DIS'),
+     (PropbankTreePointer(17, 1), 'ARG4-to'),
+     (PropbankTreePointer(20, 1), 'ARG3-from'))
+
+The location of the predicate and of the arguments are encoded using
+`PropbankTreePointer` objects, as well as `PropbankChainTreePointer`
+objects and `PropbankSplitTreePointer` objects.  A
+`PropbankTreePointer` consists of a `wordnum` and a `height`:
+
+    >>> print(inst.predicate.wordnum, inst.predicate.height)
+    16 0
+
+This identifies the tree constituent that is headed by the word that
+is the `wordnum`\ 'th token in the sentence, and whose span is found
+by going `height` nodes up in the tree.  This type of pointer is only
+useful if we also have the corresponding tree structure, since it
+includes empty elements such as traces in the word number count.  The
+trees for 10% of the standard PropBank Corpus are contained in the
+`treebank` corpus:
+
+    >>> tree = inst.tree
+
+    >>> from nltk.corpus import treebank
+    >>> assert tree == treebank.parsed_sents(inst.fileid)[inst.sentnum]
+
+    >>> inst.predicate.select(tree)
+    Tree('VBD', ['rose'])
+    >>> for (argloc, argid) in inst.arguments:
+    ...     print('%-10s %s' % (argid, argloc.select(tree).pformat(500)[:50]))
+    ARG1       (NP-SBJ (NP (DT The) (NN yield)) (PP (IN on) (NP (
+    ARGM-DIS   (PP (IN for) (NP (NN example)))
+    ARG4-to    (PP-DIR (TO to) (NP (CD 8.04) (NN %)))
+    ARG3-from  (PP-DIR (IN from) (NP (CD 7.90) (NN %)))
+
+Propbank tree pointers can be converted to standard tree locations,
+which are usually easier to work with, using the `treepos()` method:
+
+    >>> treepos = inst.predicate.treepos(tree)
+    >>> print (treepos, tree[treepos])
+    (4, 0) (VBD rose)
+
+In some cases, argument locations will be encoded using
+`PropbankChainTreePointer`\ s (for trace chains) or
+`PropbankSplitTreePointer`\ s (for discontinuous constituents).  Both
+of these objects contain a single member variable, `pieces`,
+containing a list of the constituent pieces.  They also define the
+method `select()`, which will return a tree containing all the
+elements of the argument.  (A new head node is created, labeled
+"*CHAIN*" or "*SPLIT*", since the argument is not a single constituent
+in the original tree).  Sentence #6 contains an example of an argument
+that is both discontinuous and contains a chain:
+
+    >>> inst = pb_instances[6]
+    >>> inst.roleset
+    'expose.01'
+    >>> argloc, argid = inst.arguments[2]
+    >>> argloc
+    <PropbankChainTreePointer: 22:1,24:0,25:1*27:0>
+    >>> argloc.pieces
+    [<PropbankSplitTreePointer: 22:1,24:0,25:1>, PropbankTreePointer(27, 0)]
+    >>> argloc.pieces[0].pieces
+    ... # doctest: +NORMALIZE_WHITESPACE
+    [PropbankTreePointer(22, 1), PropbankTreePointer(24, 0),
+     PropbankTreePointer(25, 1)]
+    >>> print(argloc.select(inst.tree))
+    (*CHAIN*
+      (*SPLIT* (NP (DT a) (NN group)) (IN of) (NP (NNS workers)))
+      (-NONE- *))
+
+The PropBank Corpus also provides access to the frameset files, which
+define the argument labels used by the annotations, on a per-verb
+basis.  Each frameset file contains one or more predicates, such as
+'turn' or 'turn_on', each of which is divided into coarse-grained word
+senses called rolesets.  For each roleset, the frameset file provides
+descriptions of the argument roles, along with examples.
+
+    >>> expose_01 = propbank.roleset('expose.01')
+    >>> turn_01 = propbank.roleset('turn.01')
+    >>> print(turn_01) # doctest: +ELLIPSIS
+    <Element 'roleset' at ...>
+    >>> for role in turn_01.findall("roles/role"):
+    ...     print(role.attrib['n'], role.attrib['descr'])
+    0 turner
+    1 thing turning
+    m direction, location
+
+    >>> from xml.etree import ElementTree
+    >>> print(ElementTree.tostring(turn_01.find('example')).decode('utf8').strip())
+    <example name="transitive agentive">
+      <text>
+      John turned the key in the lock.
+      </text>
+      <arg n="0">John</arg>
+      <rel>turned</rel>
+      <arg n="1">the key</arg>
+      <arg f="LOC" n="m">in the lock</arg>
+    </example>
+
+Note that the standard corpus distribution only contains 10% of the
+treebank, so the parse trees are not available for instances starting
+at 9353:
+
+    >>> inst = pb_instances[9352]
+    >>> inst.fileid
+    'wsj_0199.mrg'
+    >>> print(inst.tree) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
+    (S (NP-SBJ (NNP Trinity)) (VP (VBD said) (SBAR (-NONE- 0) ...))
+    >>> print(inst.predicate.select(inst.tree))
+    (VB begin)
+
+    >>> inst = pb_instances[9353]
+    >>> inst.fileid
+    'wsj_0200.mrg'
+    >>> print(inst.tree)
+    None
+    >>> print(inst.predicate.select(inst.tree))
+    Traceback (most recent call last):
+      . . .
+    ValueError: Parse tree not avaialable
+
+However, if you supply your own version of the treebank corpus (by
+putting it before the nltk-provided version on `nltk.data.path`, or
+by creating a `ptb` directory as described above and using the
+`propbank_ptb` module), then you can access the trees for all
+instances.
+
+A list of the verb lemmas contained in PropBank is returned by the
+`propbank.verbs()` method:
+
+    >>> propbank.verbs()
+    ['abandon', 'abate', 'abdicate', 'abet', 'abide', ...]
diff --git a/nlp_resource_data/nltk/test/relextract.doctest b/nlp_resource_data/nltk/test/relextract.doctest

new file mode 100644 (file)

index 0000000..6df3c1c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/relextract.doctest
@@ -0,0 +1,263 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+======================
+Information Extraction
+======================
+
+Information Extraction standardly consists of three subtasks:
+
+#. Named Entity Recognition
+
+#. Relation Extraction
+
+#. Template Filling
+
+Named Entities
+~~~~~~~~~~~~~~
+
+The IEER corpus is marked up for a variety of Named Entities. A `Named
+Entity`:dt: (more strictly, a Named Entity mention) is a name of an
+entity belonging to a specified class. For example, the Named Entity
+classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so
+on. Within NLTK, Named Entities are represented as subtrees within a
+chunk structure: the class name is treated as node label, while the
+entity mention itself appears as the leaves of the subtree. This is
+illustrated below, where we have show an extract of the chunk
+representation of document NYT_19980315.064:
+
+    >>> from nltk.corpus import ieer
+    >>> docs = ieer.parsed_docs('NYT_19980315')
+    >>> tree = docs[1].text
+    >>> print(tree) # doctest: +ELLIPSIS
+    (DOCUMENT
+    ...
+      ``It's
+      a
+      chance
+      to
+      think
+      about
+      first-level
+      questions,''
+      said
+      Ms.
+      (PERSON Cohn)
+      ,
+      a
+      partner
+      in
+      the
+      (ORGANIZATION McGlashan &AMP; Sarrail)
+      firm
+      in
+      (LOCATION San Mateo)
+      ,
+      (LOCATION Calif.)
+      ...)
+
+Thus, the Named Entity mentions in this example are *Cohn*, *McGlashan &AMP;
+Sarrail*, *San Mateo* and *Calif.*.
+
+The CoNLL2002 Dutch and Spanish data is treated similarly, although in
+this case, the strings are also POS tagged.
+
+    >>> from nltk.corpus import conll2002
+    >>> for doc in conll2002.chunked_sents('ned.train')[27]:
+    ...     print(doc)
+    ('Het', 'Art')
+    (ORG Hof/N van/Prep Cassatie/N)
+    ('verbrak', 'V')
+    ('het', 'Art')
+    ('arrest', 'N')
+    ('zodat', 'Conj')
+    ('het', 'Pron')
+    ('moest', 'V')
+    ('worden', 'V')
+    ('overgedaan', 'V')
+    ('door', 'Prep')
+    ('het', 'Art')
+    ('hof', 'N')
+    ('van', 'Prep')
+    ('beroep', 'N')
+    ('van', 'Prep')
+    (LOC Antwerpen/N)
+    ('.', 'Punc')
+
+Relation Extraction
+~~~~~~~~~~~~~~~~~~~
+
+Relation Extraction standardly consists of identifying specified
+relations between Named Entities. For example, assuming that we can
+recognize ORGANIZATIONs and LOCATIONs in text, we might want to also
+recognize pairs *(o, l)* of these kinds of entities such that *o* is
+located in *l*.
+
+The `sem.relextract` module provides some tools to help carry out a
+simple version of this task. The `tree2semi_rel()` function splits a chunk
+document into a list of two-member lists, each of which consists of a
+(possibly empty) string followed by a `Tree` (i.e., a Named Entity):
+
+    >>> from nltk.sem import relextract
+    >>> pairs = relextract.tree2semi_rel(tree)
+    >>> for s, tree in pairs[18:22]:
+    ...     print('("...%s", %s)' % (" ".join(s[-5:]),tree))
+    ("...about first-level questions,'' said Ms.", (PERSON Cohn))
+    ("..., a partner in the", (ORGANIZATION McGlashan &AMP; Sarrail))
+    ("...firm in", (LOCATION San Mateo))
+    ("...,", (LOCATION Calif.))
+
+The function `semi_rel2reldict()` processes triples of these pairs, i.e.,
+pairs of the form ``((string1, Tree1), (string2, Tree2), (string3,
+Tree3))`` and outputs a dictionary (a `reldict`) in which ``Tree1`` is
+the subject of the relation, ``string2`` is the filler
+and ``Tree3`` is the object of the relation. ``string1`` and ``string3`` are
+stored as left and right context respectively.
+
+    >>> reldicts = relextract.semi_rel2reldict(pairs)
+    >>> for k, v in sorted(reldicts[0].items()):
+    ...     print(k, '=>', v) # doctest: +ELLIPSIS
+    filler => of messages to their own ``Cyberia'' ...
+    lcon => transactions.'' Each week, they post
+    objclass => ORGANIZATION
+    objsym => white_house
+    objtext => White House
+    rcon => for access to its planned
+    subjclass => CARDINAL
+    subjsym => hundreds
+    subjtext => hundreds
+    untagged_filler => of messages to their own ``Cyberia'' ...
+
+The next example shows some of the values for two `reldict`\ s
+corresponding to the ``'NYT_19980315'`` text extract shown earlier.
+
+    >>> for r in reldicts[18:20]:
+    ...     print('=' * 20)
+    ...     print(r['subjtext'])
+    ...     print(r['filler'])
+    ...     print(r['objtext'])
+    ====================
+    Cohn
+    , a partner in the
+    McGlashan &AMP; Sarrail
+    ====================
+    McGlashan &AMP; Sarrail
+    firm in
+    San Mateo
+
+The function `relextract()` allows us to filter the `reldict`\ s
+according to the classes of the subject and object named entities. In
+addition, we can specify that the filler text has to match a given
+regular expression, as illustrated in the next example. Here, we are
+looking for pairs of entities in the IN relation, where IN has
+signature <ORG, LOC>.
+
+    >>> import re
+    >>> IN = re.compile(r'.*\bin\b(?!\b.+ing\b)')
+    >>> for fileid in ieer.fileids():
+    ...     for doc in ieer.parsed_docs(fileid):
+    ...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern = IN):
+    ...             print(relextract.rtuple(rel))  # doctest: +ELLIPSIS
+    [ORG: 'Christian Democrats'] ', the leading political forces in' [LOC: 'Italy']
+    [ORG: 'AP'] ') _ Lebanese guerrillas attacked Israeli forces in southern' [LOC: 'Lebanon']
+    [ORG: 'Security Council'] 'adopted Resolution 425. Huge yellow banners hung across intersections in' [LOC: 'Beirut']
+    [ORG: 'U.N.'] 'failures in' [LOC: 'Africa']
+    [ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
+    [ORG: 'U.N.'] 'partners on a more effective role in' [LOC: 'Africa']
+    [ORG: 'AP'] ') _ A bomb exploded in a mosque in central' [LOC: 'San`a']
+    [ORG: 'Krasnoye Sormovo'] 'shipyard in the Soviet city of' [LOC: 'Gorky']
+    [ORG: 'Kelab Golf Darul Ridzuan'] 'in' [LOC: 'Perak']
+    [ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
+    [ORG: 'WHYY'] 'in' [LOC: 'Philadelphia']
+    [ORG: 'McGlashan &AMP; Sarrail'] 'firm in' [LOC: 'San Mateo']
+    [ORG: 'Freedom Forum'] 'in' [LOC: 'Arlington']
+    [ORG: 'Brookings Institution'] ', the research group in' [LOC: 'Washington']
+    [ORG: 'Idealab'] ', a self-described business incubator based in' [LOC: 'Los Angeles']
+    [ORG: 'Open Text'] ', based in' [LOC: 'Waterloo']
+    ...
+
+The next example illustrates a case where the patter is a disjunction
+of roles that a PERSON can occupy in an ORGANIZATION.
+
+    >>> roles = """
+    ... (.*(
+    ... analyst|
+    ... chair(wo)?man|
+    ... commissioner|
+    ... counsel|
+    ... director|
+    ... economist|
+    ... editor|
+    ... executive|
+    ... foreman|
+    ... governor|
+    ... head|
+    ... lawyer|
+    ... leader|
+    ... librarian).*)|
+    ... manager|
+    ... partner|
+    ... president|
+    ... producer|
+    ... professor|
+    ... researcher|
+    ... spokes(wo)?man|
+    ... writer|
+    ... ,\sof\sthe?\s*  # "X, of (the) Y"
+    ... """
+    >>> ROLES = re.compile(roles, re.VERBOSE)
+    >>> for fileid in ieer.fileids():
+    ...     for doc in ieer.parsed_docs(fileid):
+    ...         for rel in relextract.extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES):
+    ...             print(relextract.rtuple(rel)) # doctest: +ELLIPSIS
+    [PER: 'Kivutha Kibwana'] ', of the' [ORG: 'National Convention Assembly']
+    [PER: 'Boban Boskovic'] ', chief executive of the' [ORG: 'Plastika']
+    [PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
+    [PER: 'Kiriyenko'] 'became a foreman at the' [ORG: 'Krasnoye Sormovo']
+    [PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
+    [PER: 'Mike Godwin'] ', chief counsel for the' [ORG: 'Electronic Frontier Foundation']
+    ...
+
+In the case of the CoNLL2002 data, we can include POS tags in the
+query pattern. This example also illustrates how the output can be
+presented as something that looks more like a clause in a logical language.
+
+    >>> de = """
+    ... .*
+    ... (
+    ... de/SP|
+    ... del/SP
+    ... )
+    ... """
+    >>> DE = re.compile(de, re.VERBOSE)
+    >>> rels = [rel for doc in conll2002.chunked_sents('esp.train')
+    ...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
+    >>> for r in rels[:10]:
+    ...     print(relextract.clause(r, relsym='DE'))    # doctest: +NORMALIZE_WHITESPACE
+    DE('tribunal_supremo', 'victoria')
+    DE('museo_de_arte', 'alcorc\xf3n')
+    DE('museo_de_bellas_artes', 'a_coru\xf1a')
+    DE('siria', 'l\xedbano')
+    DE('uni\xf3n_europea', 'pek\xedn')
+    DE('ej\xe9rcito', 'rogberi')
+    DE('juzgado_de_instrucci\xf3n_n\xfamero_1', 'san_sebasti\xe1n')
+    DE('psoe', 'villanueva_de_la_serena')
+    DE('ej\xe9rcito', 'l\xedbano')
+    DE('juzgado_de_lo_penal_n\xfamero_2', 'ceuta')
+    >>> vnv = """
+    ... (
+    ... is/V|
+    ... was/V|
+    ... werd/V|
+    ... wordt/V
+    ... )
+    ... .*
+    ... van/Prep
+    ... """
+    >>> VAN = re.compile(vnv, re.VERBOSE)
+    >>> for doc in conll2002.chunked_sents('ned.train'):
+    ...     for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
+    ...         print(relextract.clause(r, relsym="VAN"))
+    VAN("cornet_d'elzius", 'buitenlandse_handel')
+    VAN('johan_rottiers', 'kardinaal_van_roey_instituut')
+    VAN('annie_lennox', 'eurythmics')
diff --git a/nlp_resource_data/nltk/test/resolution.doctest b/nlp_resource_data/nltk/test/resolution.doctest

new file mode 100644 (file)

index 0000000..fc31db4
--- /dev/null
+++ b/nlp_resource_data/nltk/test/resolution.doctest
@@ -0,0 +1,221 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=========================
+Resolution Theorem Prover
+=========================
+
+    >>> from nltk.inference.resolution import *
+    >>> from nltk.sem import logic
+    >>> from nltk.sem.logic import *
+    >>> logic._counter._value = 0
+    >>> read_expr = logic.Expression.fromstring
+
+    >>> P = read_expr('P')
+    >>> Q = read_expr('Q')
+    >>> R = read_expr('R')
+    >>> A = read_expr('A')
+    >>> B = read_expr('B')
+    >>> x = read_expr('x')
+    >>> y = read_expr('y')
+    >>> z = read_expr('z')
+
+-------------------------------
+Test most_general_unification()
+-------------------------------
+    >>> print(most_general_unification(x, x))
+    {}
+    >>> print(most_general_unification(A, A))
+    {}
+    >>> print(most_general_unification(A, x))
+    {x: A}
+    >>> print(most_general_unification(x, A))
+    {x: A}
+    >>> print(most_general_unification(x, y))
+    {x: y}
+    >>> print(most_general_unification(P(x), P(A)))
+    {x: A}
+    >>> print(most_general_unification(P(x,B), P(A,y)))
+    {x: A, y: B}
+    >>> print(most_general_unification(P(x,B), P(B,x)))
+    {x: B}
+    >>> print(most_general_unification(P(x,y), P(A,x)))
+    {x: A, y: x}
+    >>> print(most_general_unification(P(Q(x)), P(y)))
+    {y: Q(x)}
+
+------------
+Test unify()
+------------
+    >>> print(Clause([]).unify(Clause([])))
+    []
+    >>> print(Clause([P(x)]).unify(Clause([-P(A)])))
+    [{}]
+    >>> print(Clause([P(A), Q(x)]).unify(Clause([-P(x), R(x)])))
+    [{R(A), Q(A)}]
+    >>> print(Clause([P(A), Q(x), R(x,y)]).unify(Clause([-P(x), Q(y)])))
+    [{Q(y), Q(A), R(A,y)}]
+    >>> print(Clause([P(A), -Q(y)]).unify(Clause([-P(x), Q(B)])))
+    [{}]
+    >>> print(Clause([P(x), Q(x)]).unify(Clause([-P(A), -Q(B)])))
+    [{-Q(B), Q(A)}, {-P(A), P(B)}]
+    >>> print(Clause([P(x,x), Q(x), R(x)]).unify(Clause([-P(A,z), -Q(B)])))
+    [{-Q(B), Q(A), R(A)}, {-P(A,z), R(B), P(B,B)}]
+
+    >>> a = clausify(read_expr('P(A)'))
+    >>> b = clausify(read_expr('A=B'))
+    >>> print(a[0].unify(b[0]))
+    [{P(B)}]
+
+-------------------------
+Test is_tautology()
+-------------------------
+    >>> print(Clause([P(A), -P(A)]).is_tautology())
+    True
+    >>> print(Clause([-P(A), P(A)]).is_tautology())
+    True
+    >>> print(Clause([P(x), -P(A)]).is_tautology())
+    False
+    >>> print(Clause([Q(B), -P(A), P(A)]).is_tautology())
+    True
+    >>> print(Clause([-Q(A), P(R(A)), -P(R(A)), Q(x), -R(y)]).is_tautology())
+    True
+    >>> print(Clause([P(x), -Q(A)]).is_tautology())
+    False
+
+-------------------------
+Test subsumes()
+-------------------------
+    >>> print(Clause([P(A), Q(B)]).subsumes(Clause([P(A), Q(B)])))
+    True
+    >>> print(Clause([-P(A)]).subsumes(Clause([P(A)])))
+    False
+    >>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
+    True
+    >>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), R(A), P(A)])))
+    True
+    >>> print(Clause([P(A), R(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
+    False
+    >>> print(Clause([P(x)]).subsumes(Clause([P(A)])))
+    True
+    >>> print(Clause([P(A)]).subsumes(Clause([P(x)])))
+    True
+
+------------
+Test prove()
+------------
+    >>> print(ResolutionProverCommand(read_expr('man(x)')).prove())
+    False
+    >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('(man(x) -> --man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('(man(x) <-> man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('-(man(x) <-> -man(x))')).prove())
+    True
+    >>> print(ResolutionProverCommand(read_expr('all x.man(x)')).prove())
+    False
+    >>> print(ResolutionProverCommand(read_expr('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')).prove())
+    False
+    >>> print(ResolutionProverCommand(read_expr('some x.all y.sees(x,y)')).prove())
+    False
+
+    >>> p1 = read_expr('all x.(man(x) -> mortal(x))')
+    >>> p2 = read_expr('man(Socrates)')
+    >>> c = read_expr('mortal(Socrates)')
+    >>> ResolutionProverCommand(c, [p1,p2]).prove()
+    True
+
+    >>> p1 = read_expr('all x.(man(x) -> walks(x))')
+    >>> p2 = read_expr('man(John)')
+    >>> c = read_expr('some y.walks(y)')
+    >>> ResolutionProverCommand(c, [p1,p2]).prove()
+    True
+
+    >>> p = read_expr('some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))')
+    >>> c = read_expr('some e0.walk(e0,mary)')
+    >>> ResolutionProverCommand(c, [p]).prove()
+    True
+
+------------
+Test proof()
+------------
+    >>> p1 = read_expr('all x.(man(x) -> mortal(x))')
+    >>> p2 = read_expr('man(Socrates)')
+    >>> c = read_expr('mortal(Socrates)')
+    >>> logic._counter._value = 0
+    >>> tp = ResolutionProverCommand(c, [p1,p2])
+    >>> tp.prove()
+    True
+    >>> print(tp.proof())
+    [1] {-mortal(Socrates)}     A
+    [2] {-man(z2), mortal(z2)}  A
+    [3] {man(Socrates)}         A
+    [4] {-man(Socrates)}        (1, 2)
+    [5] {mortal(Socrates)}      (2, 3)
+    [6] {}                      (1, 5)
+    <BLANKLINE>
+
+------------------
+Question Answering
+------------------
+One answer
+    >>> p1 = read_expr('father_of(art,john)')
+    >>> p2 = read_expr('father_of(bob,kim)')
+    >>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
+    >>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
+    >>> logic._counter._value = 0
+    >>> tp = ResolutionProverCommand(None, [p1,p2,p3,c])
+    >>> sorted(tp.find_answers())
+    [<ConstantExpression art>]
+    >>> print(tp.proof()) # doctest: +SKIP
+    [1] {father_of(art,john)}                  A
+    [2] {father_of(bob,kim)}                   A
+    [3] {-father_of(z3,z4), parent_of(z3,z4)}  A
+    [4] {-parent_of(z6,john), ANSWER(z6)}      A
+    [5] {parent_of(art,john)}                  (1, 3)
+    [6] {parent_of(bob,kim)}                   (2, 3)
+    [7] {ANSWER(z6), -father_of(z6,john)}      (3, 4)
+    [8] {ANSWER(art)}                          (1, 7)
+    [9] {ANSWER(art)}                          (4, 5)
+    <BLANKLINE>
+
+Multiple answers
+    >>> p1 = read_expr('father_of(art,john)')
+    >>> p2 = read_expr('mother_of(ann,john)')
+    >>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
+    >>> p4 = read_expr('all x.all y.(mother_of(x,y) -> parent_of(x,y))')
+    >>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
+    >>> logic._counter._value = 0
+    >>> tp = ResolutionProverCommand(None, [p1,p2,p3,p4,c])
+    >>> sorted(tp.find_answers())
+    [<ConstantExpression ann>, <ConstantExpression art>]
+    >>> print(tp.proof()) # doctest: +SKIP
+    [ 1] {father_of(art,john)}                  A
+    [ 2] {mother_of(ann,john)}                  A
+    [ 3] {-father_of(z3,z4), parent_of(z3,z4)}  A
+    [ 4] {-mother_of(z7,z8), parent_of(z7,z8)}  A
+    [ 5] {-parent_of(z10,john), ANSWER(z10)}    A
+    [ 6] {parent_of(art,john)}                  (1, 3)
+    [ 7] {parent_of(ann,john)}                  (2, 4)
+    [ 8] {ANSWER(z10), -father_of(z10,john)}    (3, 5)
+    [ 9] {ANSWER(art)}                          (1, 8)
+    [10] {ANSWER(z10), -mother_of(z10,john)}    (4, 5)
+    [11] {ANSWER(ann)}                          (2, 10)
+    [12] {ANSWER(art)}                          (5, 6)
+    [13] {ANSWER(ann)}                          (5, 7)
+    <BLANKLINE>
+
diff --git a/nlp_resource_data/nltk/test/runtests.py b/nlp_resource_data/nltk/test/runtests.py

new file mode 100644 (file)

index 0000000..9dc06ec
--- /dev/null
+++ b/nlp_resource_data/nltk/test/runtests.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import sys
+import os
+import nose
+from nose.plugins.manager import PluginManager
+from nose.plugins.doctests import Doctest
+from nose.plugins import builtin
+
+NLTK_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+sys.path.insert(0, NLTK_ROOT)
+
+NLTK_TEST_DIR = os.path.join(NLTK_ROOT, "nltk")
+
+if __name__ == "__main__":
+    # there shouldn't be import from NLTK for coverage to work properly
+    try:
+        # Import RedNose plugin for colored test output
+        from rednose import RedNose
+
+        rednose_available = True
+    except ImportError:
+        rednose_available = False
+
+    class NltkPluginManager(PluginManager):
+        """
+        Nose plugin manager that replaces standard doctest plugin
+        with a patched version and adds RedNose plugin for colored test output.
+        """
+
+        def loadPlugins(self):
+            for plug in builtin.plugins:
+                self.addPlugin(plug())
+            if rednose_available:
+                self.addPlugin(RedNose())
+
+            super(NltkPluginManager, self).loadPlugins()
+
+    manager = NltkPluginManager()
+    manager.loadPlugins()
+
+    # allow passing extra options and running individual tests
+    # Examples:
+    #
+    #    python runtests.py semantics.doctest
+    #    python runtests.py --with-id -v
+    #    python runtests.py --with-id -v nltk.featstruct
+
+    args = sys.argv[1:]
+    if not args:
+        args = [NLTK_TEST_DIR]
+
+    if all(arg.startswith("-") for arg in args):
+        # only extra options were passed
+        args += [NLTK_TEST_DIR]
+
+    # Activate RedNose and hide skipped test messages from output
+    if rednose_available:
+        args += ["--rednose", "--hide-skips"]
+
+    arguments = [
+        "--exclude=",  # why is this needed?
+        # '--with-xunit',
+        # '--xunit-file=$WORKSPACE/nosetests.xml',
+        # '--nocapture',
+        "--with-doctest",
+        # '--doctest-tests',
+        # '--debug=nose,nose.importer,nose.inspector,nose.plugins,nose.result,nose.selector',
+        "--doctest-extension=.doctest",
+        "--doctest-fixtures=_fixt",
+        "--doctest-options=+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL",
+        # '--verbosity=3',
+    ] + args
+
+    nose.main(argv=arguments, plugins=manager.plugins)
diff --git a/nlp_resource_data/nltk/test/segmentation_fixt.py b/nlp_resource_data/nltk/test/segmentation_fixt.py

new file mode 100644 (file)

index 0000000..82918ba
--- /dev/null
+++ b/nlp_resource_data/nltk/test/segmentation_fixt.py
@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+
+
+# skip segmentation.doctest if numpy is not available
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("segmentation.doctest requires numpy")
diff --git a/nlp_resource_data/nltk/test/semantics.doctest b/nlp_resource_data/nltk/test/semantics.doctest

new file mode 100644 (file)

index 0000000..32c0f84
--- /dev/null
+++ b/nlp_resource_data/nltk/test/semantics.doctest
@@ -0,0 +1,665 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=========
+Semantics
+=========
+
+    >>> import nltk
+    >>> from nltk.sem import Valuation, Model
+    >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
+    ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
+    ... ('dog', set(['d1'])),
+    ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+    >>> val = Valuation(v)
+    >>> dom = val.domain
+    >>> m = Model(dom, val)
+
+Evaluation
+----------
+
+The top-level method of a ``Model`` instance is ``evaluate()``, which
+assigns a semantic value to expressions of the ``logic`` module, under
+an assignment ``g``:
+
+    >>> dom = val.domain
+    >>> g = nltk.sem.Assignment(dom)
+    >>> m.evaluate('all x.(boy(x) -> - girl(x))', g)
+    True
+
+
+``evaluate()`` calls a recursive function ``satisfy()``, which in turn
+calls a function ``i()`` to interpret non-logical constants and
+individual variables. ``i()`` delegates the interpretation of these to
+the the model's ``Valuation`` and the variable assignment ``g``
+respectively. Any atomic expression which cannot be assigned a value
+by ``i`` raises an ``Undefined`` exception; this is caught by
+``evaluate``, which returns the string ``'Undefined'``.
+
+    >>> m.evaluate('walk(adam)', g, trace=2)
+    <BLANKLINE>
+    'walk(adam)' is undefined under M, g
+    'Undefined'
+
+Batch Processing
+----------------
+
+The utility functions ``interpret_sents()`` and ``evaluate_sents()`` are intended to
+help with processing multiple sentences. Here's an example of the first of these:
+
+    >>> sents = ['Mary walks']
+    >>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
+    >>> for result in results:
+    ...     for (synrep, semrep) in result:
+    ...         print(synrep)
+    (S[SEM=<walk(mary)>]
+      (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
+        (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
+      (VP[NUM='sg', SEM=<\x.walk(x)>]
+        (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
+
+In order to provide backwards compatibility with 'legacy' grammars where the semantics value
+is specified with a lowercase
+``sem`` feature, the relevant feature name can be passed to the function using the
+``semkey`` parameter, as shown here:
+
+    >>> sents = ['raining']
+    >>> g = nltk.grammar.FeatureGrammar.fromstring("""
+    ... % start S
+    ... S[sem=<raining>] -> 'raining'
+    ... """)
+    >>> results = nltk.sem.util.interpret_sents(sents, g, semkey='sem')
+    >>> for result in results:
+    ...     for (synrep, semrep) in result:
+    ...         print(semrep)
+    raining
+
+The function ``evaluate_sents()`` works in a similar manner, but also needs to be
+passed a ``Model`` against which the semantic representations are evaluated.
+
+Unit Tests
+==========
+
+
+Unit tests for relations and valuations
+---------------------------------------
+
+    >>> from nltk.sem import *
+
+Relations are sets of tuples, all of the same length.
+
+    >>> s1 = set([('d1', 'd2'), ('d1', 'd1'), ('d2', 'd1')])
+    >>> is_rel(s1)
+    True
+    >>> s2 = set([('d1', 'd2'), ('d1', 'd2'), ('d1',)])
+    >>> is_rel(s2)
+    Traceback (most recent call last):
+      . . .
+    ValueError: Set set([('d1', 'd2'), ('d1',)]) contains sequences of different lengths
+    >>> s3 = set(['d1', 'd2'])
+    >>> is_rel(s3)
+    Traceback (most recent call last):
+      . . .
+    ValueError: Set set(['d2', 'd1']) contains sequences of different lengths
+    >>> s4 = set2rel(s3)
+    >>> is_rel(s4)
+    True
+    >>> is_rel(set())
+    True
+    >>> null_binary_rel = set([(None, None)])
+    >>> is_rel(null_binary_rel)
+    True
+
+Sets of entities are converted into sets of singleton tuples
+(containing strings).
+
+    >>> sorted(set2rel(s3))
+    [('d1',), ('d2',)]
+    >>> sorted(set2rel(set([1,3,5,])))
+    ['1', '3', '5']
+    >>> set2rel(set()) == set()
+    True
+    >>> set2rel(set2rel(s3)) == set2rel(s3)
+    True
+
+Predication is evaluated by set membership.
+
+    >>> ('d1', 'd2') in s1
+    True
+    >>> ('d2', 'd2') in s1
+    False
+    >>> ('d1',) in s1
+    False
+    >>> 'd2' in s1
+    False
+    >>> ('d1',) in s4
+    True
+    >>> ('d1',) in set()
+    False
+    >>> 'd1' in  null_binary_rel
+    False
+
+
+    >>> val = Valuation([('Fido', 'd1'), ('dog', set(['d1', 'd2'])), ('walk', set())])
+    >>> sorted(val['dog'])
+    [('d1',), ('d2',)]
+    >>> val.domain == set(['d1', 'd2'])
+    True
+    >>> print(val.symbols)
+    ['Fido', 'dog', 'walk']
+
+
+Parse a valuation from a string.
+
+    >>> v = """
+    ... john => b1
+    ... mary => g1
+    ... suzie => g2
+    ... fido => d1
+    ... tess => d2
+    ... noosa => n
+    ... girl => {g1, g2}
+    ... boy => {b1, b2}
+    ... dog => {d1, d2}
+    ... bark => {d1, d2}
+    ... walk => {b1, g2, d1}
+    ... chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
+    ... see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)}
+    ... in => {(b1, n), (b2, n), (d2, n)}
+    ... with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)}
+    ... """
+    >>> val = Valuation.fromstring(v)
+
+    >>> print(val) # doctest: +SKIP
+    {'bark': set([('d1',), ('d2',)]),
+     'boy': set([('b1',), ('b2',)]),
+     'chase': set([('b1', 'g1'), ('g2', 'd2'), ('g1', 'd1'), ('b2', 'g1')]),
+     'dog': set([('d1',), ('d2',)]),
+     'fido': 'd1',
+     'girl': set([('g2',), ('g1',)]),
+     'in': set([('d2', 'n'), ('b1', 'n'), ('b2', 'n')]),
+     'john': 'b1',
+     'mary': 'g1',
+     'noosa': 'n',
+     'see': set([('b1', 'g1'), ('b2', 'd2'), ('d2', 'b1'), ('g2', 'n'), ('g1', 'b1')]),
+     'suzie': 'g2',
+     'tess': 'd2',
+     'walk': set([('d1',), ('b1',), ('g2',)]),
+     'with': set([('b1', 'g1'), ('d1', 'b1'), ('b1', 'd1'), ('g1', 'b1')])}
+
+
+Unit tests for function argument application in a Model
+-------------------------------------------------------
+
+    >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
+    ...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
+    ...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])),
+    ...      ('kiss', null_binary_rel)]
+    >>> val = Valuation(v)
+    >>> dom = val.domain
+    >>> m = Model(dom, val)
+    >>> g = Assignment(dom)
+    >>> sorted(val['boy'])
+    [('b1',), ('b2',)]
+    >>> ('b1',) in val['boy']
+    True
+    >>> ('g1',) in val['boy']
+    False
+    >>> ('foo',) in val['boy']
+    False
+    >>> ('b1', 'g1') in val['love']
+    True
+    >>> ('b1', 'b1') in val['kiss']
+    False
+    >>> sorted(val.domain)
+    ['b1', 'b2', 'd1', 'g1', 'g2']
+
+
+Model Tests
+===========
+
+Extension of Lambda expressions
+
+    >>> v0 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
+    ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
+    ... ('dog', set(['d1'])),
+    ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+
+    >>> val0 = Valuation(v0)
+    >>> dom0 = val0.domain
+    >>> m0 = Model(dom0, val0)
+    >>> g0 = Assignment(dom0)
+
+    >>> print(m0.evaluate(r'\x. \y. love(x, y)', g0) == {'g2': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'b2': {'g2': True, 'b2': False, 'b1': False, 'g1': False, 'd1': False}, 'b1': {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False}, 'g1': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'd1': {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}})
+    True
+    >>> print(m0.evaluate(r'\x. dog(x) (adam)', g0))
+    False
+    >>> print(m0.evaluate(r'\x. (dog(x) | boy(x)) (adam)', g0))
+    True
+    >>> print(m0.evaluate(r'\x. \y. love(x, y)(fido)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
+    True
+    >>> print(m0.evaluate(r'\x. \y. love(x, y)(adam)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False})
+    True
+    >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)', g0) == {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False})
+    True
+    >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)(adam)', g0))
+    True
+    >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty, adam)', g0))
+    True
+    >>> print(m0.evaluate(r'\y. \x. love(x, y)(fido)(adam)', g0))
+    False
+    >>> print(m0.evaluate(r'\y. \x. love(x, y)(betty, adam)', g0))
+    True
+    >>> print(m0.evaluate(r'\x. exists y. love(x, y)', g0) == {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False})
+    True
+    >>> print(m0.evaluate(r'\z. adam', g0) == {'g2': 'b1', 'b2': 'b1', 'b1': 'b1', 'g1': 'b1', 'd1': 'b1'})
+    True
+    >>> print(m0.evaluate(r'\z. love(x, y)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
+    True
+
+
+Propositional Model Test
+------------------------
+
+    >>> tests = [
+    ...     ('P & Q', True),
+    ...     ('P & R', False),
+    ...     ('- P', False),
+    ...     ('- R', True),
+    ...     ('- - P', True),
+    ...     ('- (P & R)', True),
+    ...     ('P | R', True),
+    ...     ('R | P', True),
+    ...     ('R | R', False),
+    ...     ('- P | R', False),
+    ...     ('P | - P', True),
+    ...     ('P -> Q', True),
+    ...     ('P -> R', False),
+    ...     ('R -> P', True),
+    ...     ('P <-> P', True),
+    ...     ('R <-> R', True),
+    ...     ('P <-> R', False),
+    ...     ]
+    >>> val1 = Valuation([('P', True), ('Q', True), ('R', False)])
+    >>> dom = set([])
+    >>> m = Model(dom, val1)
+    >>> g = Assignment(dom)
+    >>> for (sent, testvalue) in tests:
+    ...     semvalue = m.evaluate(sent, g)
+    ...     if semvalue == testvalue:
+    ...         print('*', end=' ')
+    * * * * * * * * * * * * * * * * *
+
+
+Test of i Function
+------------------
+
+    >>> from nltk.sem import Expression
+    >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
+    ...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
+    ...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
+    >>> val = Valuation(v)
+    >>> dom = val.domain
+    >>> m = Model(dom, val)
+    >>> g = Assignment(dom, [('x', 'b1'), ('y', 'g2')])
+    >>> exprs = ['adam', 'girl', 'love', 'walks', 'x', 'y', 'z']
+    >>> parsed_exprs = [Expression.fromstring(e) for e in exprs]
+    >>> sorted_set = lambda x: sorted(x) if isinstance(x, set) else x
+    >>> for parsed in parsed_exprs:
+    ...     try:
+    ...         print("'%s' gets value %s" % (parsed, sorted_set(m.i(parsed, g))))
+    ...     except Undefined:
+    ...         print("'%s' is Undefined" % parsed)
+    'adam' gets value b1
+    'girl' gets value [('g1',), ('g2',)]
+    'love' gets value [('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]
+    'walks' is Undefined
+    'x' gets value b1
+    'y' gets value g2
+    'z' is Undefined
+
+Test for formulas in Model
+--------------------------
+
+    >>> tests = [
+    ...     ('love(adam, betty)', True),
+    ...     ('love(adam, sue)', 'Undefined'),
+    ...     ('dog(fido)', True),
+    ...     ('- dog(fido)', False),
+    ...     ('- - dog(fido)', True),
+    ...     ('- dog(sue)', 'Undefined'),
+    ...     ('dog(fido) & boy(adam)', True),
+    ...     ('- (dog(fido) & boy(adam))', False),
+    ...     ('- dog(fido) & boy(adam)', False),
+    ...     ('dog(fido) | boy(adam)', True),
+    ...     ('- (dog(fido) | boy(adam))', False),
+    ...     ('- dog(fido) | boy(adam)', True),
+    ...     ('- dog(fido) | - boy(adam)', False),
+    ...     ('dog(fido) -> boy(adam)', True),
+    ...     ('- (dog(fido) -> boy(adam))', False),
+    ...     ('- dog(fido) -> boy(adam)', True),
+    ...     ('exists x . love(adam, x)', True),
+    ...     ('all x . love(adam, x)', False),
+    ...     ('fido = fido', True),
+    ...     ('exists x . all y. love(x, y)', False),
+    ...     ('exists x . (x = fido)', True),
+    ...     ('all x . (dog(x) | - dog(x))', True),
+    ...     ('adam = mia', 'Undefined'),
+    ...     ('\\x. (boy(x) | girl(x))', {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False}),
+    ...     ('\\x. exists y. (boy(x) & love(x, y))', {'g2': False, 'b2': True, 'b1': True, 'g1': False, 'd1': False}),
+    ...     ('exists z1. boy(z1)', True),
+    ...     ('exists x. (boy(x) & - (x = adam))', True),
+    ...     ('exists x. (boy(x) & all y. love(y, x))', False),
+    ...     ('all x. (boy(x) | girl(x))', False),
+    ...     ('all x. (girl(x) -> exists y. boy(y) & love(x, y))', False),
+    ...     ('exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', True),
+    ...     ('exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', False),
+    ...     ('all x. (dog(x) -> - girl(x))', True),
+    ...     ('exists x. exists y. (love(x, y) & love(x, y))', True),
+    ...     ]
+    >>> for (sent, testvalue) in tests:
+    ...     semvalue = m.evaluate(sent, g)
+    ...     if semvalue == testvalue:
+    ...         print('*', end=' ')
+    ...     else:
+    ...         print(sent, semvalue)
+    * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+
+
+
+Satisfier Tests
+---------------
+
+    >>> formulas = [
+    ...     'boy(x)',
+    ...     '(x = x)',
+    ...     '(boy(x) | girl(x))',
+    ...     '(boy(x) & girl(x))',
+    ...     'love(adam, x)',
+    ...     'love(x, adam)',
+    ...     '- (x = adam)',
+    ...     'exists z22. love(x, z22)',
+    ...     'exists y. love(y, x)',
+    ...     'all y. (girl(y) -> love(x, y))',
+    ...     'all y. (girl(y) -> love(y, x))',
+    ...     'all y. (girl(y) -> (boy(x) & love(y, x)))',
+    ...     'boy(x) & all y. (girl(y) -> love(x, y))',
+    ...     'boy(x) & all y. (girl(y) -> love(y, x))',
+    ...     'boy(x) & exists y. (girl(y) & love(y, x))',
+    ...     'girl(x) -> dog(x)',
+    ...     'all y. (dog(y) -> (x = y))',
+    ...     '- exists y. love(y, x)',
+    ...     'exists y. (love(adam, y) & love(y, x))'
+    ...     ]
+    >>> g.purge()
+    >>> g.add('x', 'b1')
+    {'x': 'b1'}
+    >>> for f in formulas: # doctest: +NORMALIZE_WHITESPACE
+    ...     try:
+    ...         print("'%s' gets value: %s" % (f, m.evaluate(f, g)))
+    ...     except Undefined:
+    ...         print("'%s' is Undefined" % f)
+    'boy(x)' gets value: True
+    '(x = x)' gets value: True
+    '(boy(x) | girl(x))' gets value: True
+    '(boy(x) & girl(x))' gets value: False
+    'love(adam, x)' gets value: False
+    'love(x, adam)' gets value: False
+    '- (x = adam)' gets value: False
+    'exists z22. love(x, z22)' gets value: True
+    'exists y. love(y, x)' gets value: True
+    'all y. (girl(y) -> love(x, y))' gets value: False
+    'all y. (girl(y) -> love(y, x))' gets value: True
+    'all y. (girl(y) -> (boy(x) & love(y, x)))' gets value: True
+    'boy(x) & all y. (girl(y) -> love(x, y))' gets value: False
+    'boy(x) & all y. (girl(y) -> love(y, x))' gets value: True
+    'boy(x) & exists y. (girl(y) & love(y, x))' gets value: True
+    'girl(x) -> dog(x)' gets value: True
+    'all y. (dog(y) -> (x = y))' gets value: False
+    '- exists y. love(y, x)' gets value: False
+    'exists y. (love(adam, y) & love(y, x))' gets value: True
+
+    >>> from nltk.sem import Expression
+    >>> for fmla in formulas: # doctest: +NORMALIZE_WHITESPACE
+    ...     p = Expression.fromstring(fmla)
+    ...     g.purge()
+    ...     print("Satisfiers of '%s':\n\t%s" % (p, sorted(m.satisfiers(p, 'x', g))))
+    Satisfiers of 'boy(x)':
+    ['b1', 'b2']
+    Satisfiers of '(x = x)':
+    ['b1', 'b2', 'd1', 'g1', 'g2']
+    Satisfiers of '(boy(x) | girl(x))':
+    ['b1', 'b2', 'g1', 'g2']
+    Satisfiers of '(boy(x) & girl(x))':
+    []
+    Satisfiers of 'love(adam,x)':
+    ['g1']
+    Satisfiers of 'love(x,adam)':
+    ['g1', 'g2']
+    Satisfiers of '-(x = adam)':
+    ['b2', 'd1', 'g1', 'g2']
+    Satisfiers of 'exists z22.love(x,z22)':
+    ['b1', 'b2', 'g1', 'g2']
+    Satisfiers of 'exists y.love(y,x)':
+    ['b1', 'g1', 'g2']
+    Satisfiers of 'all y.(girl(y) -> love(x,y))':
+    []
+    Satisfiers of 'all y.(girl(y) -> love(y,x))':
+    ['b1']
+    Satisfiers of 'all y.(girl(y) -> (boy(x) & love(y,x)))':
+    ['b1']
+    Satisfiers of '(boy(x) & all y.(girl(y) -> love(x,y)))':
+    []
+    Satisfiers of '(boy(x) & all y.(girl(y) -> love(y,x)))':
+    ['b1']
+    Satisfiers of '(boy(x) & exists y.(girl(y) & love(y,x)))':
+    ['b1']
+    Satisfiers of '(girl(x) -> dog(x))':
+    ['b1', 'b2', 'd1']
+    Satisfiers of 'all y.(dog(y) -> (x = y))':
+    ['d1']
+    Satisfiers of '-exists y.love(y,x)':
+    ['b2', 'd1']
+    Satisfiers of 'exists y.(love(adam,y) & love(y,x))':
+    ['b1']
+
+
+Tests based on the Blackburn & Bos testsuite
+--------------------------------------------
+
+    >>> v1 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
+    ...       ('honey_bunny', 'd4'), ('yolanda', 'd5'),
+    ...       ('customer', set(['d1', 'd2'])),
+    ...       ('robber', set(['d3', 'd4'])),
+    ...       ('love', set([('d3', 'd4')]))]
+    >>> val1 = Valuation(v1)
+    >>> dom1 = val1.domain
+    >>> m1 = Model(dom1, val1)
+    >>> g1 = Assignment(dom1)
+
+    >>> v2 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
+    ...       ('honey_bunny', 'd4'), ('yolanda', 'd4'),
+    ...       ('customer', set(['d1', 'd2', 'd5', 'd6'])),
+    ...       ('robber', set(['d3', 'd4'])),
+    ...       ('love', set([(None, None)]))]
+    >>> val2 = Valuation(v2)
+    >>> dom2 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6'])
+    >>> m2 = Model(dom2, val2)
+    >>> g2 = Assignment(dom2)
+    >>> g21 = Assignment(dom2)
+    >>> g21.add('y', 'd3')
+    {'y': 'd3'}
+
+    >>> v3 = [('mia', 'd1'), ('jody', 'd2'), ('jules', 'd3'),
+    ...       ('vincent', 'd4'),
+    ...       ('woman', set(['d1', 'd2'])), ('man', set(['d3', 'd4'])),
+    ...       ('joke', set(['d5', 'd6'])), ('episode', set(['d7', 'd8'])),
+    ...       ('in', set([('d5', 'd7'), ('d5', 'd8')])),
+    ...       ('tell', set([('d1', 'd5'), ('d2', 'd6')]))]
+    >>> val3 = Valuation(v3)
+    >>> dom3 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8'])
+    >>> m3 = Model(dom3, val3)
+    >>> g3 = Assignment(dom3)
+
+    >>> tests = [
+    ...     ('exists x. robber(x)', m1, g1, True),
+    ...     ('exists x. exists y. love(y, x)', m1, g1, True),
+    ...     ('exists x0. exists x1. love(x1, x0)', m2, g2, False),
+    ...     ('all x. all y. love(y, x)', m2, g2, False),
+    ...     ('- (all x. all y. love(y, x))', m2, g2, True),
+    ...     ('all x. all y. - love(y, x)', m2, g2, True),
+    ...     ('yolanda = honey_bunny', m2, g2, True),
+    ...     ('mia = honey_bunny', m2, g2, 'Undefined'),
+    ...     ('- (yolanda = honey_bunny)', m2, g2, False),
+    ...     ('- (mia = honey_bunny)', m2, g2, 'Undefined'),
+    ...     ('all x. (robber(x) | customer(x))', m2, g2, True),
+    ...     ('- (all x. (robber(x) | customer(x)))', m2, g2, False),
+    ...     ('(robber(x) | customer(x))', m2, g2, 'Undefined'),
+    ...     ('(robber(y) | customer(y))', m2, g21, True),
+    ...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
+    ...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
+    ...     ('- exists x. woman(x)', m3, g3, False),
+    ...     ('exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
+    ...     ('- exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
+    ...     ('exists x. (man(x) & - exists y. woman(y))', m3, g3, False),
+    ...     ('exists x. (man(x) & - exists x. woman(x))', m3, g3, False),
+    ...     ('exists x. (woman(x) & - exists x. customer(x))', m2, g2, 'Undefined'),
+    ... ]
+
+    >>> for item in tests:
+    ...     sentence, model, g, testvalue = item
+    ...     semvalue = model.evaluate(sentence, g)
+    ...     if semvalue == testvalue:
+    ...         print('*', end=' ')
+    ...     g.purge()
+    * * * * * * * * * * * * * * * * * * * * * *
+
+
+Tests for mapping from syntax to semantics
+------------------------------------------
+
+Load a valuation from a file.
+
+    >>> import nltk.data
+    >>> from nltk.sem.util import parse_sents
+    >>> val = nltk.data.load('grammars/sample_grammars/valuation1.val')
+    >>> dom = val.domain
+    >>> m = Model(dom, val)
+    >>> g = Assignment(dom)
+    >>> gramfile = 'grammars/sample_grammars/sem2.fcfg'
+    >>> inputs = ['John sees a girl', 'every dog barks']
+    >>> parses = parse_sents(inputs, gramfile)
+    >>> for sent, trees in zip(inputs, parses):
+    ...     print()
+    ...     print("Sentence: %s" % sent)
+    ...     for tree in trees:
+    ...         print("Parse:\n %s" %tree)
+    ...         print("Semantics: %s" %  root_semrep(tree))
+    <BLANKLINE>
+    Sentence: John sees a girl
+    Parse:
+     (S[SEM=<exists x.(girl(x) & see(john,x))>]
+      (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
+        (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
+      (VP[NUM='sg', SEM=<\y.exists x.(girl(x) & see(y,x))>]
+        (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
+        (NP[NUM='sg', SEM=<\Q.exists x.(girl(x) & Q(x))>]
+          (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
+          (Nom[NUM='sg', SEM=<\x.girl(x)>]
+            (N[NUM='sg', SEM=<\x.girl(x)>] girl)))))
+    Semantics: exists x.(girl(x) & see(john,x))
+    <BLANKLINE>
+    Sentence: every dog barks
+    Parse:
+     (S[SEM=<all x.(dog(x) -> bark(x))>]
+      (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
+        (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
+        (Nom[NUM='sg', SEM=<\x.dog(x)>]
+          (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
+      (VP[NUM='sg', SEM=<\x.bark(x)>]
+        (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
+    Semantics: all x.(dog(x) -> bark(x))
+
+    >>> sent = "every dog barks"
+    >>> result = nltk.sem.util.interpret_sents([sent], gramfile)[0]
+    >>> for (syntree, semrep) in result:
+    ...     print(syntree)
+    ...     print()
+    ...     print(semrep)
+    (S[SEM=<all x.(dog(x) -> bark(x))>]
+      (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
+        (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
+        (Nom[NUM='sg', SEM=<\x.dog(x)>]
+          (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
+      (VP[NUM='sg', SEM=<\x.bark(x)>]
+        (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
+    <BLANKLINE>
+    all x.(dog(x) -> bark(x))
+
+    >>> result = nltk.sem.util.evaluate_sents([sent], gramfile, m, g)[0]
+    >>> for (syntree, semrel, value) in result:
+    ...     print(syntree)
+    ...     print()
+    ...     print(semrep)
+    ...     print()
+    ...     print(value)
+    (S[SEM=<all x.(dog(x) -> bark(x))>]
+      (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
+        (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
+        (Nom[NUM='sg', SEM=<\x.dog(x)>]
+          (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
+      (VP[NUM='sg', SEM=<\x.bark(x)>]
+        (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
+    <BLANKLINE>
+    all x.(dog(x) -> bark(x))
+    <BLANKLINE>
+    True
+
+    >>> sents = ['Mary walks', 'John sees a dog']
+    >>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
+    >>> for result in results:
+    ...     for (synrep, semrep) in result:
+    ...         print(synrep)
+    (S[SEM=<walk(mary)>]
+      (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
+        (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
+      (VP[NUM='sg', SEM=<\x.walk(x)>]
+        (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
+    (S[SEM=<exists x.(dog(x) & see(john,x))>]
+      (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
+        (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
+      (VP[NUM='sg', SEM=<\y.exists x.(dog(x) & see(y,x))>]
+        (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
+        (NP[NUM='sg', SEM=<\Q.exists x.(dog(x) & Q(x))>]
+          (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
+          (Nom[NUM='sg', SEM=<\x.dog(x)>]
+            (N[NUM='sg', SEM=<\x.dog(x)>] dog)))))
+
+Cooper Storage
+--------------
+
+    >>> from nltk.sem import cooper_storage as cs
+    >>> sentence = 'every girl chases a dog'
+    >>> trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')
+    >>> semrep = trees[0].label()['SEM']
+    >>> cs_semrep = cs.CooperStore(semrep)
+    >>> print(cs_semrep.core)
+    chase(z2,z4)
+    >>> for bo in cs_semrep.store:
+    ...     print(bo)
+    bo(\P.all x.(girl(x) -> P(x)),z2)
+    bo(\P.exists x.(dog(x) & P(x)),z4)
+    >>> cs_semrep.s_retrieve(trace=True)
+    Permutation 1
+       (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z4))
+       (\P.exists x.(dog(x) & P(x)))(\z4.all x.(girl(x) -> chase(x,z4)))
+    Permutation 2
+       (\P.exists x.(dog(x) & P(x)))(\z4.chase(z2,z4))
+       (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))
+
+    >>> for reading in cs_semrep.readings:
+    ...     print(reading)
+    exists x.(dog(x) & all z3.(girl(z3) -> chase(z3,x)))
+    all x.(girl(x) -> exists z4.(dog(z4) & chase(x,z4)))
+
+
diff --git a/nlp_resource_data/nltk/test/semantics_fixt.py b/nlp_resource_data/nltk/test/semantics_fixt.py

new file mode 100644 (file)

index 0000000..8d67144
--- /dev/null
+++ b/nlp_resource_data/nltk/test/semantics_fixt.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+# reset the variables counter before running tests
+def setup_module(module):
+    from nltk.sem import logic
+
+    logic._counter._value = 0
diff --git a/nlp_resource_data/nltk/test/sentiment.doctest b/nlp_resource_data/nltk/test/sentiment.doctest

new file mode 100644 (file)

index 0000000..36e5b20
--- /dev/null
+++ b/nlp_resource_data/nltk/test/sentiment.doctest
@@ -0,0 +1,233 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===================
+Sentiment Analysis
+===================
+
+    >>> from nltk.classify import NaiveBayesClassifier
+    >>> from nltk.corpus import subjectivity
+    >>> from nltk.sentiment import SentimentAnalyzer
+    >>> from nltk.sentiment.util import *
+
+    >>> n_instances = 100
+    >>> subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
+    >>> obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
+    >>> len(subj_docs), len(obj_docs)
+    (100, 100)
+
+Each document is represented by a tuple (sentence, label). The sentence is tokenized,
+so it is represented by a list of strings:
+
+    >>> subj_docs[0]
+    (['smart', 'and', 'alert', ',', 'thirteen', 'conversations', 'about', 'one',
+    'thing', 'is', 'a', 'small', 'gem', '.'], 'subj')
+
+We separately split subjective and objective instances to keep a balanced uniform
+class distribution in both train and test sets.
+
+    >>> train_subj_docs = subj_docs[:80]
+    >>> test_subj_docs = subj_docs[80:100]
+    >>> train_obj_docs = obj_docs[:80]
+    >>> test_obj_docs = obj_docs[80:100]
+    >>> training_docs = train_subj_docs+train_obj_docs
+    >>> testing_docs = test_subj_docs+test_obj_docs
+
+    >>> sentim_analyzer = SentimentAnalyzer()
+    >>> all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
+
+We use simple unigram word features, handling negation:
+
+    >>> unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
+    >>> len(unigram_feats)
+    83
+    >>> sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
+
+We apply features to obtain a feature-value representation of our datasets:
+
+    >>> training_set = sentim_analyzer.apply_features(training_docs)
+    >>> test_set = sentim_analyzer.apply_features(testing_docs)
+
+We can now train our classifier on the training set, and subsequently output the
+evaluation results:
+
+    >>> trainer = NaiveBayesClassifier.train
+    >>> classifier = sentim_analyzer.train(trainer, training_set)
+    Training classifier
+    >>> for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
+    ...     print('{0}: {1}'.format(key, value))
+    Evaluating NaiveBayesClassifier results...
+    Accuracy: 0.8
+    F-measure [obj]: 0.8
+    F-measure [subj]: 0.8
+    Precision [obj]: 0.8
+    Precision [subj]: 0.8
+    Recall [obj]: 0.8
+    Recall [subj]: 0.8
+
+
+Vader
+------
+
+    >>> from nltk.sentiment.vader import SentimentIntensityAnalyzer
+    >>> sentences = ["VADER is smart, handsome, and funny.", # positive sentence example
+    ...    "VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
+    ...    "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
+    ...    "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
+    ...    "VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
+    ...    "VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score
+    ...    "The book was good.",         # positive sentence
+    ...    "The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
+    ...    "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
+    ...    "A really bad, horrible book.",       # negative sentence with booster words
+    ...    "At least it isn't a horrible book.", # negated negative sentence with contraction
+    ...    ":) and :D",     # emoticons handled
+    ...    "",              # an empty string is correctly handled
+    ...    "Today sux",     #  negative slang handled
+    ...    "Today sux!",    #  negative slang with punctuation emphasis handled
+    ...    "Today SUX!",    #  negative slang with capitalization emphasis
+    ...    "Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but"
+    ... ]
+    >>> paragraph = "It was one of the worst movies I've seen, despite good reviews. \
+    ... Unbelievably bad acting!! Poor direction. VERY poor production. \
+    ... The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!"
+
+    >>> from nltk import tokenize
+    >>> lines_list = tokenize.sent_tokenize(paragraph)
+    >>> sentences.extend(lines_list)
+
+    >>> tricky_sentences = [
+    ...    "Most automated sentiment analysis tools are shit.",
+    ...    "VADER sentiment analysis is the shit.",
+    ...    "Sentiment analysis has never been good.",
+    ...    "Sentiment analysis with VADER has never been this good.",
+    ...    "Warren Beatty has never been so entertaining.",
+    ...    "I won't say that the movie is astounding and I wouldn't claim that \
+    ...    the movie is too banal either.",
+    ...    "I like to hate Michael Bay films, but I couldn't fault this one",
+    ...    "It's one thing to watch an Uwe Boll film, but another thing entirely \
+    ...    to pay for it",
+    ...    "The movie was too good",
+    ...    "This movie was actually neither that funny, nor super witty.",
+    ...    "This movie doesn't care about cleverness, wit or any other kind of \
+    ...    intelligent humor.",
+    ...    "Those who find ugly meanings in beautiful things are corrupt without \
+    ...    being charming.",
+    ...    "There are slow and repetitive parts, BUT it has just enough spice to \
+    ...    keep it interesting.",
+    ...    "The script is not fantastic, but the acting is decent and the cinematography \
+    ...    is EXCELLENT!",
+    ...    "Roger Dodger is one of the most compelling variations on this theme.",
+    ...    "Roger Dodger is one of the least compelling variations on this theme.",
+    ...    "Roger Dodger is at least compelling as a variation on the theme.",
+    ...    "they fall in love with the product",
+    ...    "but then it breaks",
+    ...    "usually around the time the 90 day warranty expires",
+    ...    "the twin towers collapsed today",
+    ...    "However, Mr. Carter solemnly argues, his client carried out the kidnapping \
+    ...    under orders and in the ''least offensive way possible.''"
+    ... ]
+    >>> sentences.extend(tricky_sentences)
+    >>> for sentence in sentences:
+    ...     sid = SentimentIntensityAnalyzer()
+    ...     print(sentence)
+    ...     ss = sid.polarity_scores(sentence)
+    ...     for k in sorted(ss):
+    ...         print('{0}: {1}, '.format(k, ss[k]), end='')
+    ...     print()
+    VADER is smart, handsome, and funny.
+    compound: 0.8316, neg: 0.0, neu: 0.254, pos: 0.746,
+    VADER is smart, handsome, and funny!
+    compound: 0.8439, neg: 0.0, neu: 0.248, pos: 0.752,
+    VADER is very smart, handsome, and funny.
+    compound: 0.8545, neg: 0.0, neu: 0.299, pos: 0.701,
+    VADER is VERY SMART, handsome, and FUNNY.
+    compound: 0.9227, neg: 0.0, neu: 0.246, pos: 0.754,
+    VADER is VERY SMART, handsome, and FUNNY!!!
+    compound: 0.9342, neg: 0.0, neu: 0.233, pos: 0.767,
+    VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!
+    compound: 0.9469, neg: 0.0, neu: 0.294, pos: 0.706,
+    The book was good.
+    compound: 0.4404, neg: 0.0, neu: 0.508, pos: 0.492,
+    The book was kind of good.
+    compound: 0.3832, neg: 0.0, neu: 0.657, pos: 0.343,
+    The plot was good, but the characters are uncompelling and the dialog is not great.
+    compound: -0.7042, neg: 0.327, neu: 0.579, pos: 0.094,
+    A really bad, horrible book.
+    compound: -0.8211, neg: 0.791, neu: 0.209, pos: 0.0,
+    At least it isn't a horrible book.
+    compound: 0.431, neg: 0.0, neu: 0.637, pos: 0.363,
+    :) and :D
+    compound: 0.7925, neg: 0.0, neu: 0.124, pos: 0.876,
+    <BLANKLINE>
+    compound: 0.0, neg: 0.0, neu: 0.0, pos: 0.0,
+    Today sux
+    compound: -0.3612, neg: 0.714, neu: 0.286, pos: 0.0,
+    Today sux!
+    compound: -0.4199, neg: 0.736, neu: 0.264, pos: 0.0,
+    Today SUX!
+    compound: -0.5461, neg: 0.779, neu: 0.221, pos: 0.0,
+    Today kinda sux! But I'll get by, lol
+    compound: 0.2228, neg: 0.195, neu: 0.531, pos: 0.274,
+    It was one of the worst movies I've seen, despite good reviews.
+    compound: -0.7584, neg: 0.394, neu: 0.606, pos: 0.0,
+    Unbelievably bad acting!!
+    compound: -0.6572, neg: 0.686, neu: 0.314, pos: 0.0,
+    Poor direction.
+    compound: -0.4767, neg: 0.756, neu: 0.244, pos: 0.0,
+    VERY poor production.
+    compound: -0.6281, neg: 0.674, neu: 0.326, pos: 0.0,
+    The movie was bad.
+    compound: -0.5423, neg: 0.538, neu: 0.462, pos: 0.0,
+    Very bad movie.
+    compound: -0.5849, neg: 0.655, neu: 0.345, pos: 0.0,
+    VERY bad movie.
+    compound: -0.6732, neg: 0.694, neu: 0.306, pos: 0.0,
+    VERY BAD movie.
+    compound: -0.7398, neg: 0.724, neu: 0.276, pos: 0.0,
+    VERY BAD movie!
+    compound: -0.7616, neg: 0.735, neu: 0.265, pos: 0.0,
+    Most automated sentiment analysis tools are shit.
+    compound: -0.5574, neg: 0.375, neu: 0.625, pos: 0.0,
+    VADER sentiment analysis is the shit.
+    compound: 0.6124, neg: 0.0, neu: 0.556, pos: 0.444,
+    Sentiment analysis has never been good.
+    compound: -0.3412, neg: 0.325, neu: 0.675, pos: 0.0,
+    Sentiment analysis with VADER has never been this good.
+    compound: 0.5228, neg: 0.0, neu: 0.703, pos: 0.297,
+    Warren Beatty has never been so entertaining.
+    compound: 0.5777, neg: 0.0, neu: 0.616, pos: 0.384,
+    I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either.
+    compound: 0.4215, neg: 0.0, neu: 0.851, pos: 0.149,
+    I like to hate Michael Bay films, but I couldn't fault this one
+    compound: 0.3153, neg: 0.157, neu: 0.534, pos: 0.309,
+    It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it
+    compound: -0.2541, neg: 0.112, neu: 0.888, pos: 0.0,
+    The movie was too good
+    compound: 0.4404, neg: 0.0, neu: 0.58, pos: 0.42,
+    This movie was actually neither that funny, nor super witty.
+    compound: -0.6759, neg: 0.41, neu: 0.59, pos: 0.0,
+    This movie doesn't care about cleverness, wit or any other kind of intelligent humor.
+    compound: -0.1338, neg: 0.265, neu: 0.497, pos: 0.239,
+    Those who find ugly meanings in beautiful things are corrupt without being charming.
+    compound: -0.3553, neg: 0.314, neu: 0.493, pos: 0.192,
+    There are slow and repetitive parts, BUT it has just enough spice to keep it interesting.
+    compound: 0.4678, neg: 0.079, neu: 0.735, pos: 0.186,
+    The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT!
+    compound: 0.7565, neg: 0.092, neu: 0.607, pos: 0.301,
+    Roger Dodger is one of the most compelling variations on this theme.
+    compound: 0.2944, neg: 0.0, neu: 0.834, pos: 0.166,
+    Roger Dodger is one of the least compelling variations on this theme.
+    compound: -0.1695, neg: 0.132, neu: 0.868, pos: 0.0,
+    Roger Dodger is at least compelling as a variation on the theme.
+    compound: 0.2263, neg: 0.0, neu: 0.84, pos: 0.16,
+    they fall in love with the product
+    compound: 0.6369, neg: 0.0, neu: 0.588, pos: 0.412,
+    but then it breaks
+    compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
+    usually around the time the 90 day warranty expires
+    compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
+    the twin towers collapsed today
+    compound: -0.2732, neg: 0.344, neu: 0.656, pos: 0.0,
+    However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.''
+    compound: -0.5859, neg: 0.23, neu: 0.697, pos: 0.074,
diff --git a/nlp_resource_data/nltk/test/sentiwordnet.doctest b/nlp_resource_data/nltk/test/sentiwordnet.doctest

new file mode 100644 (file)

index 0000000..70f25ee
--- /dev/null
+++ b/nlp_resource_data/nltk/test/sentiwordnet.doctest
@@ -0,0 +1,41 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+======================
+SentiWordNet Interface
+======================
+
+SentiWordNet can be imported like this:
+
+    >>> from nltk.corpus import sentiwordnet as swn
+
+------------
+SentiSynsets
+------------
+
+    >>> breakdown = swn.senti_synset('breakdown.n.03')
+    >>> print(breakdown)
+    <breakdown.n.03: PosScore=0.0 NegScore=0.25>
+    >>> breakdown.pos_score()
+    0.0
+    >>> breakdown.neg_score()
+    0.25
+    >>> breakdown.obj_score()
+    0.75
+
+
+------
+Lookup
+------
+
+    >>> list(swn.senti_synsets('slow')) # doctest: +NORMALIZE_WHITESPACE
+    [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
+    SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
+    SentiSynset('slow.a.02'), SentiSynset('dense.s.04'),
+    SentiSynset('slow.a.04'), SentiSynset('boring.s.01'),
+    SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'),
+    SentiSynset('behind.r.03')]
+
+    >>> happy = swn.senti_synsets('happy', 'a')
+
+    >>> all = swn.all_senti_synsets()
diff --git a/nlp_resource_data/nltk/test/simple.doctest b/nlp_resource_data/nltk/test/simple.doctest

new file mode 100644 (file)

index 0000000..48fdcd3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/simple.doctest
@@ -0,0 +1,84 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=================
+EasyInstall Tests
+=================
+
+This file contains some simple tests that will be run by EasyInstall in
+order to test the installation when NLTK-Data is absent.
+
+ 
+------------
+Tokenization
+------------
+
+    >>> from nltk.tokenize import wordpunct_tokenize
+    >>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
+    ...      "two of them.\n\nThanks.")
+    >>> wordpunct_tokenize(s) # doctest: +NORMALIZE_WHITESPACE
+    ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
+    'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
+
+-------
+Metrics
+-------
+
+    >>> from nltk.metrics import precision, recall, f_measure
+    >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
+    >>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
+    >>> reference_set = set(reference)
+    >>> test_set = set(test)
+    >>> precision(reference_set, test_set)
+    1.0
+    >>> print(recall(reference_set, test_set))
+    0.8
+    >>> print(f_measure(reference_set, test_set))
+    0.88888888888...
+
+------------------
+Feature Structures
+------------------
+
+    >>> from nltk import FeatStruct
+    >>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
+    >>> fs2 = FeatStruct(POS='N', AGR=fs1)
+    >>> print(fs2)
+    [       [ GND = 'fem' ] ]
+    [ AGR = [ NUM = 'pl'  ] ]
+    [       [ PER = 3     ] ]
+    [                       ]
+    [ POS = 'N'             ]
+    >>> print(fs2['AGR'])
+    [ GND = 'fem' ]
+    [ NUM = 'pl'  ]
+    [ PER = 3     ]
+    >>> print(fs2['AGR']['PER'])
+    3
+
+-------
+Parsing
+-------
+
+    >>> from nltk.parse.recursivedescent import RecursiveDescentParser
+    >>> from nltk.grammar import CFG
+    >>> grammar = CFG.fromstring("""
+    ... S -> NP VP
+    ... PP -> P NP
+    ... NP -> 'the' N | N PP | 'the' N PP
+    ... VP -> V NP | V PP | V NP PP
+    ... N -> 'cat' | 'dog' | 'rug'
+    ... V -> 'chased'
+    ... P -> 'on'
+    ... """)
+    >>> rd = RecursiveDescentParser(grammar)
+    >>> sent = 'the cat chased the dog on the rug'.split()
+    >>> for t in rd.parse(sent):
+    ...     print(t)
+    (S
+      (NP the (N cat))
+      (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
+    (S
+      (NP the (N cat))
+      (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
+
diff --git a/nlp_resource_data/nltk/test/stem.doctest b/nlp_resource_data/nltk/test/stem.doctest

new file mode 100644 (file)

index 0000000..b80104d
--- /dev/null
+++ b/nlp_resource_data/nltk/test/stem.doctest
@@ -0,0 +1,77 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+==========
+ Stemmers
+==========
+
+Overview
+~~~~~~~~
+
+Stemmers remove morphological affixes from words, leaving only the
+word stem.
+
+    >>> from nltk.stem import *
+
+Unit tests for the Porter stemmer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    >>> from nltk.stem.porter import *
+
+Create a new Porter stemmer.
+
+    >>> stemmer = PorterStemmer()
+
+Test the stemmer on various pluralised words.
+
+    >>> plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
+    ...            'died', 'agreed', 'owned', 'humbled', 'sized',
+    ...            'meeting', 'stating', 'siezing', 'itemization',
+    ...            'sensational', 'traditional', 'reference', 'colonizer',
+    ...            'plotted']
+
+    >>> singles = [stemmer.stem(plural) for plural in plurals]
+
+    >>> print(' '.join(singles))  # doctest: +NORMALIZE_WHITESPACE
+    caress fli die mule deni die agre own humbl size meet
+    state siez item sensat tradit refer colon plot
+
+
+Unit tests for Snowball stemmer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    >>> from nltk.stem.snowball import SnowballStemmer
+
+See which languages are supported.
+
+    >>> print(" ".join(SnowballStemmer.languages))
+    arabic danish dutch english finnish french german hungarian italian
+    norwegian porter portuguese romanian russian spanish swedish
+
+Create a new instance of a language specific subclass.
+
+    >>> stemmer = SnowballStemmer("english")
+
+Stem a word.
+
+    >>> print(stemmer.stem("running"))
+    run
+
+Decide not to stem stopwords.
+
+    >>> stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
+    >>> print(stemmer.stem("having"))
+    have
+    >>> print(stemmer2.stem("having"))
+    having
+
+The 'english' stemmer is better than the original 'porter' stemmer.
+
+    >>> print(SnowballStemmer("english").stem("generously"))
+    generous
+    >>> print(SnowballStemmer("porter").stem("generously"))
+    gener
+
+.. note::
+
+    Extra stemmer tests can be found in `nltk.test.unit.test_stem`.
diff --git a/nlp_resource_data/nltk/test/tag.doctest b/nlp_resource_data/nltk/test/tag.doctest

new file mode 100644 (file)

index 0000000..7103b41
--- /dev/null
+++ b/nlp_resource_data/nltk/test/tag.doctest
@@ -0,0 +1,33 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+Regression Tests
+~~~~~~~~~~~~~~~~
+
+Sequential Taggers
+------------------
+
+Add tests for:
+  - make sure backoff is being done correctly.
+  - make sure ngram taggers don't use previous sentences for context.
+  - make sure ngram taggers see 'beginning of the sentence' as a
+    unique context
+  - make sure regexp tagger's regexps are tried in order
+  - train on some simple examples, & make sure that the size & the
+    generated models are correct.
+  - make sure cutoff works as intended
+  - make sure that ngram models only exclude contexts covered by the
+    backoff tagger if the backoff tagger gets that context correct at
+    *all* locations.
+
+
+Regression Testing for issue #1025
+==================================
+
+We want to ensure that a RegexpTagger can be created with more than 100 patterns
+and does not fail with:
+ "AssertionError: sorry, but this version only supports 100 named groups"
+
+    >>> from nltk.tag import RegexpTagger
+    >>> patterns = [(str(i), 'NNP',) for i in range(200)]
+    >>> tagger = RegexpTagger(patterns)
diff --git a/nlp_resource_data/nltk/test/tokenize.doctest b/nlp_resource_data/nltk/test/tokenize.doctest

new file mode 100644 (file)

index 0000000..a3a7dfa
--- /dev/null
+++ b/nlp_resource_data/nltk/test/tokenize.doctest
@@ -0,0 +1,289 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+    >>> from nltk.tokenize import *
+
+Regression Tests: Treebank Tokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some test strings.
+
+    >>> s1 = "On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88."
+    >>> word_tokenize(s1)
+    ['On', 'a', '$', '50,000', 'mortgage', 'of', '30', 'years', 'at', '8', 'percent', ',', 'the', 'monthly', 'payment', 'would', 'be', '$', '366.88', '.']
+    >>> s2 = "\"We beat some pretty good teams to get here,\" Slocum said."
+    >>> word_tokenize(s2)
+    ['``', 'We', 'beat', 'some', 'pretty', 'good', 'teams', 'to', 'get', 'here', ',', "''", 'Slocum', 'said', '.']
+    >>> s3 = "Well, we couldn't have this predictable, cliche-ridden, \"Touched by an Angel\" (a show creator John Masius worked on) wanna-be if she didn't."
+    >>> word_tokenize(s3)
+    ['Well', ',', 'we', 'could', "n't", 'have', 'this', 'predictable', ',', 'cliche-ridden', ',', '``', 'Touched', 'by', 'an', 'Angel', "''", '(', 'a', 'show', 'creator', 'John', 'Masius', 'worked', 'on', ')', 'wanna-be', 'if', 'she', 'did', "n't", '.']
+    >>> s4 = "I cannot cannot work under these conditions!"
+    >>> word_tokenize(s4)
+    ['I', 'can', 'not', 'can', 'not', 'work', 'under', 'these', 'conditions', '!']
+    >>> s5 = "The company spent $30,000,000 last year."
+    >>> word_tokenize(s5)
+    ['The', 'company', 'spent', '$', '30,000,000', 'last', 'year', '.']
+    >>> s6 = "The company spent 40.75% of its income last year."
+    >>> word_tokenize(s6)
+    ['The', 'company', 'spent', '40.75', '%', 'of', 'its', 'income', 'last', 'year', '.']
+    >>> s7 = "He arrived at 3:00 pm."
+    >>> word_tokenize(s7)
+    ['He', 'arrived', 'at', '3:00', 'pm', '.']
+    >>> s8 = "I bought these items: books, pencils, and pens."
+    >>> word_tokenize(s8)
+    ['I', 'bought', 'these', 'items', ':', 'books', ',', 'pencils', ',', 'and', 'pens', '.']
+    >>> s9 = "Though there were 150, 100 of them were old."
+    >>> word_tokenize(s9)
+    ['Though', 'there', 'were', '150', ',', '100', 'of', 'them', 'were', 'old', '.']
+    >>> s10 = "There were 300,000, but that wasn't enough."
+    >>> word_tokenize(s10)
+    ['There', 'were', '300,000', ',', 'but', 'that', 'was', "n't", 'enough', '.']
+
+
+Testing improvement made to the TreebankWordTokenizer
+
+    >>> sx1 = '\xabNow that I can do.\xbb'
+    >>> expected = ['\xab', 'Now', 'that', 'I', 'can', 'do', '.', '\xbb']
+    >>> word_tokenize(sx1) == expected
+    True
+    >>> sx2 = 'The unicode 201C and 201D \u201cLEFT(RIGHT) DOUBLE QUOTATION MARK\u201d is also OPEN_PUNCT and CLOSE_PUNCT.'
+    >>> expected = ['The', 'unicode', '201C', 'and', '201D', '\u201c', 'LEFT', '(', 'RIGHT', ')', 'DOUBLE', 'QUOTATION', 'MARK', '\u201d', 'is', 'also', 'OPEN_PUNCT', 'and', 'CLOSE_PUNCT', '.']
+    >>> word_tokenize(sx2) == expected
+    True
+
+
+Sentence tokenization in word_tokenize:
+
+    >>> s11 = "I called Dr. Jones. I called Dr. Jones."
+    >>> word_tokenize(s11)
+    ['I', 'called', 'Dr.', 'Jones', '.', 'I', 'called', 'Dr.', 'Jones', '.']
+    >>> s12 = ("Ich muss unbedingt daran denken, Mehl, usw. fur einen "
+    ...        "Kuchen einzukaufen. Ich muss.")
+    >>> word_tokenize(s12)
+    ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw',
+     '.', 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
+    >>> word_tokenize(s12, 'german')
+    ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw.',
+     'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
+
+
+Regression Tests: Regexp Tokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some additional test strings.
+
+    >>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
+    ...      "two of them.\n\nThanks.")
+    >>> s2 = ("Alas, it has not rained today. When, do you think, "
+    ...       "will it rain again?")
+    >>> s3 = ("<p>Although this is <b>not</b> the case here, we must "
+    ...       "not relax our vigilance!</p>")
+
+    >>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=False)
+    [', ', '. ', ', ', ', ', '?']
+    >>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=True)
+    ['Alas', 'it has not rained today', 'When', 'do you think',
+     'will it rain again']
+
+Take care to avoid using capturing groups:
+
+    >>> regexp_tokenize(s3, r'</?[bp]>', gaps=False)
+    ['<p>', '<b>', '</b>', '</p>']
+    >>> regexp_tokenize(s3, r'</?(?:b|p)>', gaps=False)
+    ['<p>', '<b>', '</b>', '</p>']
+    >>> regexp_tokenize(s3, r'</?(?:b|p)>', gaps=True)
+    ['Although this is ', 'not',
+     ' the case here, we must not relax our vigilance!']
+
+Named groups are capturing groups, and confuse the tokenizer:
+
+    >>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=False)
+    ['p', 'b', 'b', 'p']
+    >>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=True)
+    ['p', 'Although this is ', 'b', 'not', 'b',
+     ' the case here, we must not relax our vigilance!', 'p']
+
+Make sure that nested groups don't confuse the tokenizer:
+
+    >>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=False)
+    ['las', 'has', 'rai', 'rai']
+    >>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=True)
+    ['A', ', it ', ' not ', 'ned today. When, do you think, will it ',
+     'n again?']
+
+Back-references require capturing groups, and these are not supported:
+
+    >>> regexp_tokenize("aabbbcccc", r'(.)\1')
+    ['a', 'b', 'c', 'c']
+
+A simple sentence tokenizer '\.(\s+|$)'
+
+    >>> regexp_tokenize(s, pattern=r'\.(?:\s+|$)', gaps=True)
+    ['Good muffins cost $3.88\nin New York',
+     'Please buy me\ntwo of them', 'Thanks']
+
+
+Regression Tests: TweetTokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TweetTokenizer is a tokenizer specifically designed for micro-blogging tokenization tasks.
+
+    >>> from nltk.tokenize import TweetTokenizer
+    >>> tknzr = TweetTokenizer()
+    >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--"
+    >>> tknzr.tokenize(s0)
+    ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--']
+    >>> s1 = "@Joyster2012 @CathStaincliffe Good for you, girl!! Best wishes :-)"
+    >>> tknzr.tokenize(s1)
+    ['@Joyster2012', '@CathStaincliffe', 'Good', 'for', 'you', ',', 'girl', '!', '!', 'Best', 'wishes', ':-)']
+    >>> s2 = "3Points for #DreamTeam Gooo BAILEY! :) #PBB737Gold @PBBabscbn"
+    >>> tknzr.tokenize(s2)
+    ['3Points', 'for', '#DreamTeam', 'Gooo', 'BAILEY', '!', ':)', '#PBB737Gold', '@PBBabscbn']
+    >>> s3 = "@Insanomania They do... Their mentality doesn't :("
+    >>> tknzr.tokenize(s3)
+    ['@Insanomania', 'They', 'do', '...', 'Their', 'mentality', "doesn't", ':(']
+    >>> s4 = "RT @facugambande: Ya por arrancar a grabar !!! #TirenTirenTiren vamoo !!"
+    >>> tknzr.tokenize(s4)
+    ['RT', '@facugambande', ':', 'Ya', 'por', 'arrancar', 'a', 'grabar', '!', '!', '!', '#TirenTirenTiren', 'vamoo', '!', '!']
+    >>> tknzr = TweetTokenizer(reduce_len=True)
+    >>> s5 = "@crushinghes the summer holidays are great but I'm so bored already :("
+    >>> tknzr.tokenize(s5)
+    ['@crushinghes', 'the', 'summer', 'holidays', 'are', 'great', 'but', "I'm", 'so', 'bored', 'already', ':(']
+
+It is possible to specify `strip_handles` and `reduce_len` parameters for a TweetTokenizer instance. Setting `strip_handles` to True, the tokenizer will remove Twitter handles (e.g. usernames). Setting `reduce_len` to True, repeated character sequences of length 3 or greater will be replaced with sequences of length 3.
+
+    >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)
+    >>> s6 = '@remy: This is waaaaayyyy too much for you!!!!!!'
+    >>> tknzr.tokenize(s6)
+    [':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!']
+    >>> s7 = '@_willy65: No place for @chuck tonight. Sorry.'
+    >>> tknzr.tokenize(s7)
+    [':', 'No', 'place', 'for', 'tonight', '.', 'Sorry', '.']
+    >>> s8 = '@mar_tin is a great developer. Contact him at mar_tin@email.com.'
+    >>> tknzr.tokenize(s8)
+    ['is', 'a', 'great', 'developer', '.', 'Contact', 'him', 'at', 'mar_tin@email.com', '.']
+
+The `preserve_case` parameter (default: True) allows to convert uppercase tokens to lowercase tokens. Emoticons are not affected:
+
+    >>> tknzr = TweetTokenizer(preserve_case=False)
+    >>> s9 = "@jrmy: I'm REALLY HAPPYYY about that! NICEEEE :D :P"
+    >>> tknzr.tokenize(s9)
+    ['@jrmy', ':', "i'm", 'really', 'happyyy', 'about', 'that', '!', 'niceeee', ':D', ':P']
+
+It should not hang on long sequences of the same punctuation character.
+
+    >>> tknzr = TweetTokenizer()
+    >>> s10 = "Photo: Aujourd'hui sur http://t.co/0gebOFDUzn Projet... http://t.co/bKfIUbydz2.............................. http://fb.me/3b6uXpz0L"
+    >>> tknzr.tokenize(s10)
+    ['Photo', ':', "Aujourd'hui", 'sur', 'http://t.co/0gebOFDUzn', 'Projet', '...', 'http://t.co/bKfIUbydz2', '...', 'http://fb.me/3b6uXpz0L']
+
+
+Regression Tests: PunktSentenceTokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The sentence splitter should remove whitespace following the sentence boundary.
+
+    >>> pst = PunktSentenceTokenizer()
+    >>> pst.tokenize('See Section 3).  Or Section 2).  ')
+    ['See Section 3).', 'Or Section 2).']
+    >>> pst.tokenize('See Section 3.)  Or Section 2.)  ')
+    ['See Section 3.)', 'Or Section 2.)']
+    >>> pst.tokenize('See Section 3.)  Or Section 2.)  ', realign_boundaries=False)
+    ['See Section 3.', ')  Or Section 2.', ')']
+
+
+Two instances of PunktSentenceTokenizer should not share PunktParameters.
+
+    >>> pst = PunktSentenceTokenizer()
+    >>> pst2 = PunktSentenceTokenizer()
+    >>> pst._params is pst2._params
+    False
+    
+Testing mutable default arguments for https://github.com/nltk/nltk/pull/2067
+
+    >>> from nltk.tokenize.punkt import PunktBaseClass, PunktTrainer, PunktSentenceTokenizer
+    >>> from nltk.tokenize.punkt import PunktLanguageVars, PunktParameters
+    >>> pbc = PunktBaseClass(lang_vars=None, params=None)
+    >>> type(pbc._params)
+    <class 'nltk.tokenize.punkt.PunktParameters'>
+    >>> type(pbc._lang_vars)
+    <class 'nltk.tokenize.punkt.PunktLanguageVars'>
+    >>> pt = PunktTrainer(lang_vars=None)
+    >>> type(pt._lang_vars)
+    <class 'nltk.tokenize.punkt.PunktLanguageVars'>
+    >>> pst = PunktSentenceTokenizer(lang_vars=None)
+    >>> type(pst._lang_vars)
+    <class 'nltk.tokenize.punkt.PunktLanguageVars'>
+    
+
+Regression Tests: align_tokens
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Post-hoc alignment of tokens with a source string
+
+    >>> from nltk.tokenize.util import align_tokens
+    >>> list(align_tokens([''], ""))
+    [(0, 0)]
+    >>> list(align_tokens([''], " "))
+    [(0, 0)]
+    >>> list(align_tokens([], ""))
+    []
+    >>> list(align_tokens([], " "))
+    []
+    >>> list(align_tokens(['a'], "a"))
+    [(0, 1)]
+    >>> list(align_tokens(['abc', 'def'], "abcdef"))
+    [(0, 3), (3, 6)]
+    >>> list(align_tokens(['abc', 'def'], "abc def"))
+    [(0, 3), (4, 7)]
+    >>> list(align_tokens(['ab', 'cd'], "ab cd ef"))
+    [(0, 2), (3, 5)]
+    >>> list(align_tokens(['ab', 'cd', 'ef'], "ab cd ef"))
+    [(0, 2), (3, 5), (6, 8)]
+    >>> list(align_tokens(['ab', 'cd', 'efg'], "ab cd ef"))
+    Traceback (most recent call last):
+    ....
+    ValueError: substring "efg" not found in "ab cd ef"
+    >>> list(align_tokens(['ab', 'cd', 'ef', 'gh'], "ab cd ef"))
+    Traceback (most recent call last):
+    ....
+    ValueError: substring "gh" not found in "ab cd ef"
+    >>> list(align_tokens(['The', 'plane', ',', 'bound', 'for', 'St', 'Petersburg', ',', 'crashed', 'in', 'Egypt', "'s", 'Sinai', 'desert', 'just', '23', 'minutes', 'after', 'take-off', 'from', 'Sharm', 'el-Sheikh', 'on', 'Saturday', '.'], "The plane, bound for St Petersburg, crashed in Egypt's Sinai desert just 23 minutes after take-off from Sharm el-Sheikh on Saturday."))
+    [(0, 3), (4, 9), (9, 10), (11, 16), (17, 20), (21, 23), (24, 34), (34, 35), (36, 43), (44, 46), (47, 52), (52, 54), (55, 60), (61, 67), (68, 72), (73, 75), (76, 83), (84, 89), (90, 98), (99, 103), (104, 109), (110, 119), (120, 122), (123, 131), (131, 132)]
+
+
+Regression Tests: MWETokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Pickle an MWETokenizer
+
+    >>> from nltk.tokenize import MWETokenizer
+    >>> import pickle
+
+    >>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+')
+    >>> p = pickle.dumps(tokenizer)
+    >>> unpickeled = pickle.loads(p)
+    >>> unpickeled.tokenize("An hors d'oeuvre tonight, sir?".split())
+    ['An', "hors+d'oeuvre", 'tonight,', 'sir?']
+
+
+Regression Tests: TextTilingTokenizer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TextTilingTokneizer tokenizes text into coherent subtopic chunks based upon Hearst's TextTiling algorithm.
+
+    >>> from nltk.tokenize import TextTilingTokenizer
+    >>> from nltk.corpus import brown
+    >>> tt = TextTilingTokenizer()
+    >>> tt.tokenize(brown.raw()[0:1000])
+    ["\n\n\tThe/at Fulton/np-tl County/nn-tl Grand/jj-tl Jury/nn-tl said/vbd Friday/nr an/at investigation/nn of/in Atlanta's/np$ recent/jj primary/nn election/nn produced/vbd ``/`` no/at evidence/nn ''/'' that/cs any/dti irregularities/nns took/vbd place/nn ./.\n\n\n\tThe/at jury/nn further/rbr said/vbd in/in term-end/nn presentments/nns that/cs the/at City/nn-tl Executive/jj-tl Committee/nn-tl ,/, which/wdt had/hvd over-all/jj charge/nn of/in the/at election/nn ,/, ``/`` deserves/vbz the/at praise/nn and/cc thanks/nns of/in the/at City/nn-tl of/in-tl Atlanta/np-tl ''/'' for/in the/at manner/nn in/in which/wdt the/at election/nn was/bedz conducted/vbn ./.\n\n\n\tThe/at September-October/np term/nn jury/nn had/hvd been/ben charged/vbn by/in Fulton/np-tl Superior/jj-tl Court/nn-tl Judge/nn-tl Durwood/np Pye/np to/to investigate/vb reports/nns of/in possible/jj ``/`` irregularities/nns ''/'' in/in the/at hard-fought/jj primary/nn which/wdt was/bedz won/vbn by/in Mayor-nominate/nn-tl Ivan/np Allen/np Jr./"]
+
+Test that `ValueError` exceptions are raised when illegal arguments are used.
+
+    >>> TextTilingTokenizer(similarity_method='foo').tokenize(brown.raw()[0:1000])
+    Traceback (most recent call last):
+      ...
+    ValueError: Similarity method foo not recognized
+    >>> TextTilingTokenizer(smoothing_method='bar').tokenize(brown.raw()[0:1000])
+    Traceback (most recent call last):
+      ...
+    ValueError: Smoothing method bar not recognized
+
diff --git a/nlp_resource_data/nltk/test/toolbox.doctest b/nlp_resource_data/nltk/test/toolbox.doctest

new file mode 100644 (file)

index 0000000..1e430ad
--- /dev/null
+++ b/nlp_resource_data/nltk/test/toolbox.doctest
@@ -0,0 +1,307 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===============================
+Unit test cases for ``toolbox``
+===============================
+
+    >>> from nltk import toolbox
+
+--------------------------
+``toolbox.StandardFormat``
+--------------------------
+
+    >>> f = toolbox.StandardFormat()
+
+``toolbox.StandardFormat.open()``
+---------------------------------
+    >>> import os, tempfile
+    >>> (fd, fname) = tempfile.mkstemp()
+    >>> tf = os.fdopen(fd, "w")
+    >>> _ = tf.write('\\lx a value\n\\lx another value\n')
+    >>> tf.close()
+    >>> f = toolbox.StandardFormat()
+    >>> f.open(fname)
+    >>> list(f.fields())
+    [('lx', 'a value'), ('lx', 'another value')]
+    >>> f.close()
+    >>> os.unlink(fname)
+
+``toolbox.StandardFormat.open_string()``
+----------------------------------------
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\n\\lx another value\n')
+    >>> list(f.fields())
+    [('lx', 'a value'), ('lx', 'another value')]
+    >>> f.close()
+
+``toolbox.StandardFormat.close()``
+----------------------------------
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\n\\lx another value\n')
+    >>> list(f.fields())
+    [('lx', 'a value'), ('lx', 'another value')]
+    >>> f.close()
+
+``toolbox.StandardFormat.line_num``
+---------------------------------------
+
+``StandardFormat.line_num`` contains the line number of the last line returned:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\n\\lx another value\n\\lx a third value\n')
+    >>> line_nums = []
+    >>> for l in f.raw_fields():
+    ...     line_nums.append(f.line_num)
+    >>> line_nums
+    [1, 2, 3]
+
+``StandardFormat.line_num`` contains the line number of the last line returned:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
+    >>> line_nums = []
+    >>> for l in f.raw_fields():
+    ...     line_nums.append(f.line_num)
+    >>> line_nums
+    [2, 5, 7]
+
+``StandardFormat.line_num`` doesn't exist before openning or after closing
+a file or string:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.line_num
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'StandardFormat' object has no attribute 'line_num'
+    >>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
+    >>> line_nums = []
+    >>> for l in f.raw_fields():
+    ...     line_nums.append(f.line_num)
+    >>> line_nums
+    [2, 5, 7]
+    >>> f.close()
+    >>> f.line_num
+    Traceback (most recent call last):
+        ...
+    AttributeError: 'StandardFormat' object has no attribute 'line_num'
+
+``toolbox.StandardFormat.raw_fields()``
+---------------------------------------
+``raw_fields()`` returns an iterator over tuples of two strings representing the
+marker and its value. The marker is given without the backslash and the value
+without its trailing newline:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\n\\lx another value\n')
+    >>> list(f.raw_fields())
+    [('lx', 'a value'), ('lx', 'another value')]
+
+an empty file returns nothing:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('')
+    >>> list(f.raw_fields())
+    []
+
+file with only a newline returns WHAT SHOULD IT RETURN???:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\n')
+    >>> list(f.raw_fields())
+    [(None, '')]
+
+file with only one field should be parsed ok:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx one value\n')
+    >>> list(f.raw_fields())
+    [('lx', 'one value')]
+
+file without a trailing newline should be parsed ok:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\n\\lx another value')
+    >>> list(f.raw_fields())
+    [('lx', 'a value'), ('lx', 'another value')]
+
+trailing white space is preserved except for the final newline:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
+    >>> list(f.raw_fields())
+    [('lx', 'trailing space '), ('lx', 'trailing tab\t'), ('lx', 'extra newline\n')]
+
+line wrapping is preserved:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
+    >>> list(f.raw_fields())
+    [('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
+
+file beginning with a multiline record should be parsed ok:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
+    >>> list(f.raw_fields())
+    [('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
+
+file ending with a multiline record should be parsed ok:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lc a value\n\\lx another value\nmore of the value\nand still more\n')
+    >>> list(f.raw_fields())
+    [('lc', 'a value'), ('lx', 'another value\nmore of the value\nand still more')]
+
+file beginning with a BOM should be parsed ok:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\xef\xbb\xbf\\lx a value\n\\lx another value\n')
+    >>> list(f.raw_fields())
+    [('lx', 'a value'), ('lx', 'another value')]
+
+file beginning with two BOMs should ignore only the first one:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\xef\xbb\xbf\xef\xbb\xbf\\lx a value\n\\lx another value\n')
+    >>> list(f.raw_fields())
+    [(None, '\xef\xbb\xbf\\lx a value'), ('lx', 'another value')]
+
+should not ignore a BOM not at the beginning of the file:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\n\xef\xbb\xbf\\lx another value\n')
+    >>> list(f.raw_fields())
+    [('lx', 'a value\n\xef\xbb\xbf\\lx another value')]
+
+``toolbox.StandardFormat.fields()``
+-----------------------------------
+trailing white space is not preserved:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
+    >>> list(f.fields())
+    [('lx', 'trailing space'), ('lx', 'trailing tab'), ('lx', 'extra newline')]
+
+multiline fields are unwrapped:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
+    >>> list(f.fields())
+    [('lx', 'a value more of the value and still more'), ('lc', 'another val')]
+
+markers
+-------
+A backslash in the first position on a new line indicates the start of a
+marker. The backslash is not part of the marker:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\mk a value\n')
+    >>> list(f.fields())
+    [('mk', 'a value')]
+
+If the backslash occurs later in the line it does not indicate the start
+of a marker:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\mk a value\n \\mk another one\n')
+    >>> list(f.raw_fields())
+    [('mk', 'a value\n \\mk another one')]
+
+There is no specific limit to the length of a marker:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\this_is_an_extremely_long_marker value\n')
+    >>> list(f.fields())
+    [('this_is_an_extremely_long_marker', 'value')]
+
+A marker can contain any non white space character:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\`~!@#$%^&*()_-=+[{]}\|,<.>/?;:"0123456789 value\n')
+    >>> list(f.fields())
+    [('`~!@#$%^&*()_-=+[{]}\\|,<.>/?;:"0123456789', 'value')]
+
+A marker is terminated by any white space character:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\mk a value\n\\mk\tanother one\n\\mk\rthird one\n\\mk\ffourth one')
+    >>> list(f.fields())
+    [('mk', 'a value'), ('mk', 'another one'), ('mk', 'third one'), ('mk', 'fourth one')]
+
+Consecutive whitespace characters (except newline) are treated the same as one:
+
+    >>> f = toolbox.StandardFormat()
+    >>> f.open_string('\\mk \t\r\fa value\n')
+    >>> list(f.fields())
+    [('mk', 'a value')]
+
+-----------------------
+``toolbox.ToolboxData``
+-----------------------
+
+    >>> db = toolbox.ToolboxData()
+
+``toolbox.ToolboxData.parse()``
+-------------------------------
+check that normal parsing works:
+
+    >>> from xml.etree import ElementTree
+    >>> td = toolbox.ToolboxData()
+    >>> s = """\\_sh v3.0  400  Rotokas Dictionary
+    ... \\_DateStampHasFourDigitYear
+    ...
+    ... \\lx kaa
+    ... \\ps V.A
+    ... \\ge gag
+    ... \\gp nek i pas
+    ...
+    ... \\lx kaa
+    ... \\ps V.B
+    ... \\ge strangle
+    ... \\gp pasim nek
+    ... """
+    >>> td.open_string(s)
+    >>> tree = td.parse(key='lx')
+    >>> tree.tag
+    'toolbox_data'
+    >>> ElementTree.tostring(list(tree)[0]).decode('utf8')
+    '<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
+    >>> ElementTree.tostring(list(tree)[1]).decode('utf8')
+    '<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
+    >>> ElementTree.tostring(list(tree)[2]).decode('utf8')
+    '<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
+
+check that guessing the key marker works:
+
+    >>> from xml.etree import ElementTree
+    >>> td = toolbox.ToolboxData()
+    >>> s = """\\_sh v3.0  400  Rotokas Dictionary
+    ... \\_DateStampHasFourDigitYear
+    ...
+    ... \\lx kaa
+    ... \\ps V.A
+    ... \\ge gag
+    ... \\gp nek i pas
+    ...
+    ... \\lx kaa
+    ... \\ps V.B
+    ... \\ge strangle
+    ... \\gp pasim nek
+    ... """
+    >>> td.open_string(s)
+    >>> tree = td.parse()
+    >>> ElementTree.tostring(list(tree)[0]).decode('utf8')
+    '<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
+    >>> ElementTree.tostring(list(tree)[1]).decode('utf8')
+    '<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
+    >>> ElementTree.tostring(list(tree)[2]).decode('utf8')
+    '<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
+
+-----------------------
+``toolbox`` functions
+-----------------------
+
+``toolbox.to_sfm_string()``
+-------------------------------
+
diff --git a/nlp_resource_data/nltk/test/translate.doctest b/nlp_resource_data/nltk/test/translate.doctest

new file mode 100644 (file)

index 0000000..87966fb
--- /dev/null
+++ b/nlp_resource_data/nltk/test/translate.doctest
@@ -0,0 +1,242 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+.. -*- coding: utf-8 -*-
+
+=========
+Alignment
+=========
+
+Corpus Reader
+-------------
+
+    >>> from nltk.corpus import comtrans
+    >>> words = comtrans.words('alignment-en-fr.txt')
+    >>> for word in words[:6]:
+    ...     print(word)
+    Resumption
+    of
+    the
+    session
+    I
+    declare
+    >>> als = comtrans.aligned_sents('alignment-en-fr.txt')[0]
+    >>> als  # doctest: +NORMALIZE_WHITESPACE
+    AlignedSent(['Resumption', 'of', 'the', 'session'],
+    ['Reprise', 'de', 'la', 'session'],
+    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
+
+
+Alignment Objects
+-----------------
+
+Aligned sentences are simply a mapping between words in a sentence:
+
+    >>> print(" ".join(als.words))
+    Resumption of the session
+    >>> print(" ".join(als.mots))
+    Reprise de la session
+    >>> als.alignment
+    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])
+
+
+Usually we look at them from the perspective of a source to a target language,
+but they are easily inverted:
+
+    >>> als.invert() # doctest: +NORMALIZE_WHITESPACE
+    AlignedSent(['Reprise', 'de', 'la', 'session'],
+    ['Resumption', 'of', 'the', 'session'],
+    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
+
+
+We can create new alignments, but these need to be in the correct range of
+the corresponding sentences:
+
+    >>> from nltk.translate import Alignment, AlignedSent
+    >>> als = AlignedSent(['Reprise', 'de', 'la', 'session'],
+    ...                   ['Resumption', 'of', 'the', 'session'],
+    ...                   Alignment([(0, 0), (1, 4), (2, 1), (3, 3)]))
+    Traceback (most recent call last):
+        ...
+    IndexError: Alignment is outside boundary of mots
+
+
+You can set alignments with any sequence of tuples, so long as the first two
+indexes of the tuple are the alignment indices:
+
+    >>> als.alignment = Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
+
+    >>> Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
+    Alignment([(0, 0), (1, 1), (2, 2, 'boat'), (3, 3, False, (1, 2))])
+
+
+Alignment Algorithms
+--------------------
+
+EM for IBM Model 1
+~~~~~~~~~~~~~~~~~~
+
+Here is an example from Koehn, 2010:
+
+    >>> from nltk.translate import IBMModel1
+    >>> corpus = [AlignedSent(['the', 'house'], ['das', 'Haus']),
+    ...           AlignedSent(['the', 'book'], ['das', 'Buch']),
+    ...           AlignedSent(['a', 'book'], ['ein', 'Buch'])]
+    >>> em_ibm1 = IBMModel1(corpus, 20)
+    >>> print(round(em_ibm1.translation_table['the']['das'], 1))
+    1.0
+    >>> print(round(em_ibm1.translation_table['book']['das'], 1))
+    0.0
+    >>> print(round(em_ibm1.translation_table['house']['das'], 1))
+    0.0
+    >>> print(round(em_ibm1.translation_table['the']['Buch'], 1))
+    0.0
+    >>> print(round(em_ibm1.translation_table['book']['Buch'], 1))
+    1.0
+    >>> print(round(em_ibm1.translation_table['a']['Buch'], 1))
+    0.0
+    >>> print(round(em_ibm1.translation_table['book']['ein'], 1))
+    0.0
+    >>> print(round(em_ibm1.translation_table['a']['ein'], 1))
+    1.0
+    >>> print(round(em_ibm1.translation_table['the']['Haus'], 1))
+    0.0
+    >>> print(round(em_ibm1.translation_table['house']['Haus'], 1))
+    1.0
+    >>> print(round(em_ibm1.translation_table['book'][None], 1))
+    0.5
+
+And using an NLTK corpus. We train on only 10 sentences, since it is so slow:
+
+    >>> from nltk.corpus import comtrans
+    >>> com_ibm1 = IBMModel1(comtrans.aligned_sents()[:10], 20)
+    >>> print(round(com_ibm1.translation_table['bitte']['Please'], 1))
+    0.2
+    >>> print(round(com_ibm1.translation_table['Sitzungsperiode']['session'], 1))
+    1.0
+
+
+Evaluation
+----------
+The evaluation metrics for alignments are usually not interested in the
+contents of alignments but more often the comparison to a "gold standard"
+alignment that has been been constructed by human experts. For this reason we
+often want to work just with raw set operations against the alignment points.
+This then gives us a very clean form for defining our evaluation metrics.
+
+.. Note::
+    The AlignedSent class has no distinction of "possible" or "sure"
+    alignments. Thus all alignments are treated as "sure".
+
+Consider the following aligned sentence for evaluation:
+
+    >>> my_als = AlignedSent(['Resumption', 'of', 'the', 'session'],
+    ...     ['Reprise', 'de', 'la', 'session'],
+    ...     Alignment([(0, 0), (3, 3), (1, 2), (1, 1), (1, 3)]))
+
+Precision
+~~~~~~~~~
+``precision = |A∩P| / |A|``
+
+**Precision** is probably the most well known evaluation metric and it is implemented
+in `nltk.metrics.scores.precision`_.  Since precision is simply interested in the
+proportion of correct alignments, we calculate the ratio of the number of our
+test alignments (*A*) that match a possible alignment (*P*), over the number of
+test alignments provided. There is no penalty for missing a possible alignment
+in our test alignments. An easy way to game this metric is to provide just one
+test alignment that is in *P* [OCH2000]_.
+
+Here are some examples:
+
+    >>> from nltk.metrics import precision
+    >>> als.alignment = Alignment([(0,0), (1,1), (2,2), (3,3)])
+    >>> precision(Alignment([]), als.alignment)
+    0.0
+    >>> precision(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
+    1.0
+    >>> precision(Alignment([(0,0), (3,3)]), als.alignment)
+    0.5
+    >>> precision(Alignment.fromstring('0-0 3-3'), als.alignment)
+    0.5
+    >>> precision(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment)
+    1.0
+    >>> precision(als.alignment, my_als.alignment)
+    0.6
+
+
+.. _nltk.metrics.scores.precision:
+    http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.precision
+
+
+Recall
+~~~~~~
+``recall = |A∩S| / |S|``
+
+**Recall** is another well known evaluation metric that has a set based
+implementation in NLTK as `nltk.metrics.scores.recall`_. Since recall is
+simply interested in the proportion of found alignments, we calculate the
+ratio of the number of our test alignments (*A*) that match a sure alignment
+(*S*) over the number of sure alignments. There is no penalty for producing
+a lot of test alignments. An easy way to game this metric is to include every
+possible alignment in our test alignments, regardless if they are correct or
+not [OCH2000]_.
+
+Here are some examples:
+
+    >>> from nltk.metrics import recall
+    >>> print(recall(Alignment([]), als.alignment))
+    None
+    >>> recall(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
+    1.0
+    >>> recall(Alignment.fromstring('0-0 3-3'), als.alignment)
+    1.0
+    >>> recall(Alignment([(0,0), (3,3)]), als.alignment)
+    1.0
+    >>> recall(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment)
+    0.66666...
+    >>> recall(als.alignment, my_als.alignment)
+    0.75
+
+
+.. _nltk.metrics.scores.recall:
+    http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.recall
+
+
+Alignment Error Rate (AER)
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+``AER = 1 - (|A∩S| + |A∩P|) / (|A| + |S|)``
+
+**Alignment Error Rate** is commonly used metric for assessing sentence
+alignments. It combines precision and recall metrics together such that a
+perfect alignment must have all of the sure alignments and may have some
+possible alignments [MIHALCEA2003]_ [KOEHN2010]_.
+
+.. Note::
+    [KOEHN2010]_ defines the AER as ``AER = (|A∩S| + |A∩P|) / (|A| + |S|)``
+    in his book, but corrects it to the above in his online errata. This is
+    in line with [MIHALCEA2003]_.
+
+Here are some examples:
+
+    >>> from nltk.translate import alignment_error_rate
+    >>> alignment_error_rate(Alignment([]), als.alignment)
+    1.0
+    >>> alignment_error_rate(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
+    0.0
+    >>> alignment_error_rate(als.alignment, my_als.alignment)
+    0.333333...
+    >>> alignment_error_rate(als.alignment, my_als.alignment,
+    ...     als.alignment | Alignment([(1,2), (2,1)]))
+    0.222222...
+
+
+.. [OCH2000] Och, F. and Ney, H. (2000)
+    *Statistical Machine Translation*, EAMT Workshop
+
+.. [MIHALCEA2003] Mihalcea, R. and Pedersen, T. (2003)
+    *An evaluation exercise for word alignment*, HLT-NAACL 2003
+
+.. [KOEHN2010] Koehn, P. (2010)
+    *Statistical Machine Translation*, Cambridge University Press
+
+
diff --git a/nlp_resource_data/nltk/test/translate_fixt.py b/nlp_resource_data/nltk/test/translate_fixt.py

new file mode 100644 (file)

index 0000000..17b011b
--- /dev/null
+++ b/nlp_resource_data/nltk/test/translate_fixt.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from nltk.corpus import teardown_module
diff --git a/nlp_resource_data/nltk/test/tree.doctest b/nlp_resource_data/nltk/test/tree.doctest

new file mode 100644 (file)

index 0000000..9389417
--- /dev/null
+++ b/nlp_resource_data/nltk/test/tree.doctest
@@ -0,0 +1,1101 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===============================
+ Unit tests for nltk.tree.Tree
+===============================
+
+    >>> from nltk.tree import *
+
+Some trees to run tests on:
+
+    >>> dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])])
+    >>> dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])])
+    >>> vp = Tree('vp', [Tree('v', ['chased']), dp2])
+    >>> tree = Tree('s', [dp1, vp])
+    >>> print(tree)
+    (s (dp (d the) (np dog)) (vp (v chased) (dp (d the) (np cat))))
+
+The node label is accessed using the `label()` method:
+
+    >>> dp1.label(), dp2.label(), vp.label(), tree.label()
+    ('dp', 'dp', 'vp', 's')
+
+    >>> print(tree[1,1,1,0])
+    cat
+
+The `treepositions` method returns a list of the tree positions of
+subtrees and leaves in a tree.  By default, it gives the position of
+every tree, subtree, and leaf, in prefix order:
+
+    >>> print(tree.treepositions())
+    [(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0), (1, 1, 0, 0), (1, 1, 1), (1, 1, 1, 0)]
+
+In addition to `str` and `repr`, several methods exist to convert a
+tree object to one of several standard tree encodings:
+
+    >>> print(tree.pformat_latex_qtree())
+    \Tree [.s
+            [.dp [.d the ] [.np dog ] ]
+            [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ]
+
+There is also a fancy ASCII art representation:
+
+    >>> tree.pretty_print()
+                  s               
+          ________|_____           
+         |              vp        
+         |         _____|___       
+         dp       |         dp    
+      ___|___     |      ___|___   
+     d       np   v     d       np
+     |       |    |     |       |  
+    the     dog chased the     cat
+
+    >>> tree.pretty_print(unicodelines=True, nodedist=4)
+                           s                        
+            ┌──────────────┴────────┐                   
+            │                       vp              
+            │              ┌────────┴──────┐            
+            dp             │               dp       
+     ┌──────┴──────┐       │        ┌──────┴──────┐     
+     d             np      v        d             np
+     │             │       │        │             │     
+    the           dog    chased    the           cat
+
+Trees can be initialized from treebank strings:
+
+    >>> tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
+    >>> print(tree2)
+    (S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+Trees can be compared for equality:
+
+    >>> tree == Tree.fromstring(str(tree))
+    True
+    >>> tree2 == Tree.fromstring(str(tree2))
+    True
+    >>> tree == tree2
+    False
+    >>> tree == Tree.fromstring(str(tree2))
+    False
+    >>> tree2 == Tree.fromstring(str(tree))
+    False
+
+    >>> tree != Tree.fromstring(str(tree))
+    False
+    >>> tree2 != Tree.fromstring(str(tree2))
+    False
+    >>> tree != tree2
+    True
+    >>> tree != Tree.fromstring(str(tree2))
+    True
+    >>> tree2 != Tree.fromstring(str(tree))
+    True
+
+    >>> tree < tree2 or tree > tree2
+    True
+
+Tree Parsing
+============
+
+The class method `Tree.fromstring()` can be used to parse trees, and it
+provides some additional options.
+
+    >>> tree = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
+    >>> print(tree)
+    (S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+When called on a subclass of `Tree`, it will create trees of that
+type:
+
+    >>> tree = ImmutableTree.fromstring('(VP (V enjoyed) (NP my cookie))')
+    >>> print(tree)
+    (VP (V enjoyed) (NP my cookie))
+    >>> print(type(tree))
+    <class 'nltk.tree.ImmutableTree'>
+    >>> tree[1] = 'x'
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableTree may not be modified
+    >>> del tree[0]
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableTree may not be modified
+
+The ``brackets`` parameter can be used to specify two characters that
+should be used as brackets:
+
+    >>> print(Tree.fromstring('[S [NP I] [VP [V enjoyed] [NP my cookie]]]',
+    ...                  brackets='[]'))
+    (S (NP I) (VP (V enjoyed) (NP my cookie)))
+    >>> print(Tree.fromstring('<S <NP I> <VP <V enjoyed> <NP my cookie>>>',
+    ...                  brackets='<>'))
+    (S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+If ``brackets`` is not a string, or is not exactly two characters,
+then `Tree.fromstring` raises an exception:
+
+    >>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='')
+    Traceback (most recent call last):
+      . . .
+    TypeError: brackets must be a length-2 string
+    >>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='<<>>')
+    Traceback (most recent call last):
+      . . .
+    TypeError: brackets must be a length-2 string
+    >>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets=12)
+    Traceback (most recent call last):
+      . . .
+    TypeError: brackets must be a length-2 string
+    >>> Tree.fromstring('<<NP my cookie>>', brackets=('<<','>>'))
+    Traceback (most recent call last):
+      . . .
+    TypeError: brackets must be a length-2 string
+
+(We may add support for multi-character brackets in the future, in
+which case the ``brackets=('<<','>>')`` example would start working.)
+
+Whitespace brackets are not permitted:
+
+    >>> Tree.fromstring('(NP my cookie\n', brackets='(\n')
+    Traceback (most recent call last):
+      . . .
+    TypeError: whitespace brackets not allowed
+
+If an invalid tree is given to Tree.fromstring, then it raises a
+ValueError, with a description of the problem:
+
+    >>> Tree.fromstring('(NP my cookie) (NP my milk)')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Tree.fromstring(): expected 'end-of-string' but got '(NP'
+                at index 15.
+                    "...y cookie) (NP my mil..."
+                                  ^
+    >>> Tree.fromstring(')NP my cookie(')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Tree.fromstring(): expected '(' but got ')'
+                at index 0.
+                    ")NP my coo..."
+                     ^
+    >>> Tree.fromstring('(NP my cookie))')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Tree.fromstring(): expected 'end-of-string' but got ')'
+                at index 14.
+                    "...my cookie))"
+                                  ^
+    >>> Tree.fromstring('my cookie)')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Tree.fromstring(): expected '(' but got 'my'
+                at index 0.
+                    "my cookie)"
+                     ^
+    >>> Tree.fromstring('(NP my cookie')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Tree.fromstring(): expected ')' but got 'end-of-string'
+                at index 13.
+                    "... my cookie"
+                                  ^
+    >>> Tree.fromstring('')
+    Traceback (most recent call last):
+      . . .
+    ValueError: Tree.fromstring(): expected '(' but got 'end-of-string'
+                at index 0.
+                    ""
+                     ^
+
+Trees with no children are supported:
+
+    >>> print(Tree.fromstring('(S)'))
+    (S )
+    >>> print(Tree.fromstring('(X (Y) (Z))'))
+    (X (Y ) (Z ))
+
+Trees with an empty node label and no children are supported:
+
+    >>> print(Tree.fromstring('()'))
+    ( )
+    >>> print(Tree.fromstring('(X () ())'))
+    (X ( ) ( ))
+
+Trees with an empty node label and children are supported, but only if the
+first child is not a leaf (otherwise, it will be treated as the node label).
+
+    >>> print(Tree.fromstring('((A) (B) (C))'))
+    ( (A ) (B ) (C ))
+    >>> print(Tree.fromstring('((A) leaf)'))
+    ( (A ) leaf)
+    >>> print(Tree.fromstring('(((())))'))
+    ( ( ( ( ))))
+
+The optional arguments `read_node` and `read_leaf` may be used to
+transform the string values of nodes or leaves.
+
+    >>> print(Tree.fromstring('(A b (C d e) (F (G h i)))',
+    ...                  read_node=lambda s: '<%s>' % s,
+    ...                  read_leaf=lambda s: '"%s"' % s))
+    (<A> "b" (<C> "d" "e") (<F> (<G> "h" "i")))
+
+These transformation functions are typically used when the node or
+leaf labels should be parsed to a non-string value (such as a feature
+structure).  If node and leaf labels need to be able to include
+whitespace, then you must also use the optional `node_pattern` and
+`leaf_pattern` arguments.
+
+    >>> from nltk.featstruct import FeatStruct
+    >>> tree = Tree.fromstring('([cat=NP] [lex=the] [lex=dog])',
+    ...                   read_node=FeatStruct, read_leaf=FeatStruct)
+    >>> tree.set_label(tree.label().unify(FeatStruct('[num=singular]')))
+    >>> print(tree)
+    ([cat='NP', num='singular'] [lex='the'] [lex='dog'])
+
+The optional argument ``remove_empty_top_bracketing`` can be used to
+remove any top-level empty bracketing that occurs.
+
+    >>> print(Tree.fromstring('((S (NP I) (VP (V enjoyed) (NP my cookie))))',
+    ...                  remove_empty_top_bracketing=True))
+    (S (NP I) (VP (V enjoyed) (NP my cookie)))
+
+It will not remove a top-level empty bracketing with multiple children:
+
+    >>> print(Tree.fromstring('((A a) (B b))'))
+    ( (A a) (B b))
+
+Parented Trees
+==============
+`ParentedTree` is a subclass of `Tree` that automatically maintains
+parent pointers for single-parented trees.  Parented trees can be
+created directly from a node label and a list of children:
+
+    >>> ptree = (
+    ...     ParentedTree('VP', [
+    ...         ParentedTree('VERB', ['saw']),
+    ...         ParentedTree('NP', [
+    ...             ParentedTree('DET', ['the']),
+    ...             ParentedTree('NOUN', ['dog'])])]))
+    >>> print(ptree)
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+
+Parented trees can be created from strings using the classmethod
+`ParentedTree.fromstring`:
+
+    >>> ptree = ParentedTree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
+    >>> print(ptree)
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    >>> print(type(ptree))
+    <class 'nltk.tree.ParentedTree'>
+
+Parented trees can also be created by using the classmethod
+`ParentedTree.convert` to convert another type of tree to a parented
+tree:
+
+    >>> tree = Tree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
+    >>> ptree = ParentedTree.convert(tree)
+    >>> print(ptree)
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    >>> print(type(ptree))
+    <class 'nltk.tree.ParentedTree'>
+
+.. clean-up:
+
+    >>> del tree
+
+`ParentedTree`\ s should never be used in the same tree as `Tree`\ s
+or `MultiParentedTree`\ s.  Mixing tree implementations may result in
+incorrect parent pointers and in `TypeError` exceptions:
+
+    >>> # Inserting a Tree in a ParentedTree gives an exception:
+    >>> ParentedTree('NP', [
+    ...     Tree('DET', ['the']), Tree('NOUN', ['dog'])])
+    Traceback (most recent call last):
+      . . .
+    TypeError: Can not insert a non-ParentedTree into a ParentedTree
+
+    >>> # inserting a ParentedTree in a Tree gives incorrect parent pointers:
+    >>> broken_tree = Tree('NP', [
+    ...     ParentedTree('DET', ['the']), ParentedTree('NOUN', ['dog'])])
+    >>> print(broken_tree[0].parent())
+    None
+
+Parented Tree Methods
+------------------------
+In addition to all the methods defined by the `Tree` class, the
+`ParentedTree` class adds six new methods whose values are
+automatically updated whenver a parented tree is modified: `parent()`,
+`parent_index()`, `left_sibling()`, `right_sibling()`, `root()`, and
+`treeposition()`.
+
+The `parent()` method contains a `ParentedTree`\ 's parent, if it has
+one; and ``None`` otherwise.  `ParentedTree`\ s that do not have
+parents are known as "root trees."
+
+    >>> for subtree in ptree.subtrees():
+    ...     print(subtree)
+    ...     print('  Parent = %s' % subtree.parent())
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+      Parent = None
+    (VERB saw)
+      Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    (NP (DET the) (NOUN dog))
+      Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    (DET the)
+      Parent = (NP (DET the) (NOUN dog))
+    (NOUN dog)
+      Parent = (NP (DET the) (NOUN dog))
+
+The `parent_index()` method stores the index of a tree in its parent's
+child list.  If a tree does not have a parent, then its `parent_index`
+is ``None``.
+
+    >>> for subtree in ptree.subtrees():
+    ...     print(subtree)
+    ...     print('  Parent Index = %s' % subtree.parent_index())
+    ...     assert (subtree.parent() is None or
+    ...             subtree.parent()[subtree.parent_index()] is subtree)
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+      Parent Index = None
+    (VERB saw)
+      Parent Index = 0
+    (NP (DET the) (NOUN dog))
+      Parent Index = 1
+    (DET the)
+      Parent Index = 0
+    (NOUN dog)
+      Parent Index = 1
+
+Note that ``ptree.parent().index(ptree)`` is *not* equivalent to
+``ptree.parent_index()``.  In particular, ``ptree.parent().index(ptree)``
+will return the index of the first child of ``ptree.parent()`` that is
+equal to ``ptree`` (using ``==``); and that child may not be
+``ptree``:
+
+    >>> on_and_on = ParentedTree('CONJP', [
+    ...     ParentedTree('PREP', ['on']),
+    ...     ParentedTree('COJN', ['and']),
+    ...     ParentedTree('PREP', ['on'])])
+    >>> second_on = on_and_on[2]
+    >>> print(second_on.parent_index())
+    2
+    >>> print(second_on.parent().index(second_on))
+    0
+
+The methods `left_sibling()` and `right_sibling()` can be used to get a
+parented tree's siblings.  If a tree does not have a left or right
+sibling, then the corresponding method's value is ``None``:
+
+    >>> for subtree in ptree.subtrees():
+    ...     print(subtree)
+    ...     print('  Left Sibling  = %s' % subtree.left_sibling())
+    ...     print('  Right Sibling = %s' % subtree.right_sibling())
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+      Left Sibling  = None
+      Right Sibling = None
+    (VERB saw)
+      Left Sibling  = None
+      Right Sibling = (NP (DET the) (NOUN dog))
+    (NP (DET the) (NOUN dog))
+      Left Sibling  = (VERB saw)
+      Right Sibling = None
+    (DET the)
+      Left Sibling  = None
+      Right Sibling = (NOUN dog)
+    (NOUN dog)
+      Left Sibling  = (DET the)
+      Right Sibling = None
+
+A parented tree's root tree can be accessed using the `root()`
+method.  This method follows the tree's parent pointers until it
+finds a tree without a parent.  If a tree does not have a parent, then
+it is its own root:
+
+    >>> for subtree in ptree.subtrees():
+    ...     print(subtree)
+    ...     print('  Root = %s' % subtree.root())
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    (VERB saw)
+      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    (NP (DET the) (NOUN dog))
+      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    (DET the)
+      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+    (NOUN dog)
+      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
+
+The `treeposition()` method can be used to find a tree's treeposition
+relative to its root:
+
+    >>> for subtree in ptree.subtrees():
+    ...     print(subtree)
+    ...     print('  Tree Position = %s' % (subtree.treeposition(),))
+    ...     assert subtree.root()[subtree.treeposition()] is subtree
+    (VP (VERB saw) (NP (DET the) (NOUN dog)))
+      Tree Position = ()
+    (VERB saw)
+      Tree Position = (0,)
+    (NP (DET the) (NOUN dog))
+      Tree Position = (1,)
+    (DET the)
+      Tree Position = (1, 0)
+    (NOUN dog)
+      Tree Position = (1, 1)
+
+Whenever a parented tree is modified, all of the methods described
+above (`parent()`, `parent_index()`, `left_sibling()`, `right_sibling()`,
+`root()`, and `treeposition()`) are automatically updated.  For example,
+if we replace ``ptree``\ 's subtree for the word "dog" with a new
+subtree for "cat," the method values for both the "dog" subtree and the
+"cat" subtree get automatically updated:
+
+    >>> # Replace the dog with a cat
+    >>> dog = ptree[1,1]
+    >>> cat = ParentedTree('NOUN', ['cat'])
+    >>> ptree[1,1] = cat
+
+    >>> # the noun phrase is no longer the dog's parent:
+    >>> print(dog.parent(), dog.parent_index(), dog.left_sibling())
+    None None None
+    >>> # dog is now its own root.
+    >>> print(dog.root())
+    (NOUN dog)
+    >>> print(dog.treeposition())
+    ()
+
+    >>> # the cat's parent is now the noun phrase:
+    >>> print(cat.parent())
+    (NP (DET the) (NOUN cat))
+    >>> print(cat.parent_index())
+    1
+    >>> print(cat.left_sibling())
+    (DET the)
+    >>> print(cat.root())
+    (VP (VERB saw) (NP (DET the) (NOUN cat)))
+    >>> print(cat.treeposition())
+    (1, 1)
+
+ParentedTree Regression Tests
+-----------------------------
+Keep track of all trees that we create (including subtrees) using this
+variable:
+
+    >>> all_ptrees = []
+
+Define a helper funciton to create new parented trees:
+
+    >>> def make_ptree(s):
+    ...     ptree = ParentedTree.convert(Tree.fromstring(s))
+    ...     all_ptrees.extend(t for t in ptree.subtrees()
+    ...                       if isinstance(t, Tree))
+    ...     return ptree
+
+Define a test function that examines every subtree in all_ptrees; and
+checks that all six of its methods are defined correctly.  If any
+ptrees are passed as arguments, then they are printed.
+
+    >>> def pcheck(*print_ptrees):
+    ...     for ptree in all_ptrees:
+    ...         # Check ptree's methods.
+    ...         if ptree.parent() is not None:
+    ...             i = ptree.parent_index()
+    ...             assert ptree.parent()[i] is ptree
+    ...             if i > 0:
+    ...                 assert ptree.left_sibling() is ptree.parent()[i-1]
+    ...             if i < (len(ptree.parent())-1):
+    ...                 assert ptree.right_sibling() is ptree.parent()[i+1]
+    ...             assert len(ptree.treeposition()) > 0
+    ...             assert (ptree.treeposition() ==
+    ...                     ptree.parent().treeposition() + (ptree.parent_index(),))
+    ...             assert ptree.root() is not ptree
+    ...             assert ptree.root() is not None
+    ...             assert ptree.root() is ptree.parent().root()
+    ...             assert ptree.root()[ptree.treeposition()] is ptree
+    ...         else:
+    ...             assert ptree.parent_index() is None
+    ...             assert ptree.left_sibling() is None
+    ...             assert ptree.right_sibling() is None
+    ...             assert ptree.root() is ptree
+    ...             assert ptree.treeposition() == ()
+    ...         # Check ptree's children's methods:
+    ...         for i, child in enumerate(ptree):
+    ...             if isinstance(child, Tree):
+    ...                 # pcheck parent() & parent_index() methods
+    ...                 assert child.parent() is ptree
+    ...                 assert child.parent_index() == i
+    ...                 # pcheck sibling methods
+    ...                 if i == 0:
+    ...                     assert child.left_sibling() is None
+    ...                 else:
+    ...                     assert child.left_sibling() is ptree[i-1]
+    ...                 if i == len(ptree)-1:
+    ...                     assert child.right_sibling() is None
+    ...                 else:
+    ...                     assert child.right_sibling() is ptree[i+1]
+    ...     if print_ptrees:
+    ...         print('ok!', end=' ')
+    ...         for ptree in print_ptrees: print(ptree)
+    ...     else:
+    ...         print('ok!')
+
+Run our test function on a variety of newly-created trees:
+
+    >>> pcheck(make_ptree('(A)'))
+    ok! (A )
+    >>> pcheck(make_ptree('(A (B (C (D) (E f)) g) h)'))
+    ok! (A (B (C (D ) (E f)) g) h)
+    >>> pcheck(make_ptree('(A (B) (C c) (D d d) (E e e e))'))
+    ok! (A (B ) (C c) (D d d) (E e e e))
+    >>> pcheck(make_ptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
+    ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
+
+Run our test function after performing various tree-modification
+operations:
+
+**__delitem__()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> e = ptree[0,0,1]
+    >>> del ptree[0,0,1]; pcheck(ptree); pcheck(e)
+    ok! (A (B (C (D ) (Q p)) g) h)
+    ok! (E f)
+    >>> del ptree[0,0,0]; pcheck(ptree)
+    ok! (A (B (C (Q p)) g) h)
+    >>> del ptree[0,1]; pcheck(ptree)
+    ok! (A (B (C (Q p))) h)
+    >>> del ptree[-1]; pcheck(ptree)
+    ok! (A (B (C (Q p))))
+    >>> del ptree[-100]
+    Traceback (most recent call last):
+      . . .
+    IndexError: index out of range
+    >>> del ptree[()]
+    Traceback (most recent call last):
+      . . .
+    IndexError: The tree position () may not be deleted.
+
+    >>> # With slices:
+    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> b = ptree[0]
+    >>> del ptree[0:0]; pcheck(ptree)
+    ok! (A (B c) (D e) f g (H i) j (K l))
+    >>> del ptree[:1]; pcheck(ptree); pcheck(b)
+    ok! (A (D e) f g (H i) j (K l))
+    ok! (B c)
+    >>> del ptree[-2:]; pcheck(ptree)
+    ok! (A (D e) f g (H i))
+    >>> del ptree[1:3]; pcheck(ptree)
+    ok! (A (D e) (H i))
+    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> del ptree[5:1000]; pcheck(ptree)
+    ok! (A (B c) (D e) f g (H i))
+    >>> del ptree[-2:1000]; pcheck(ptree)
+    ok! (A (B c) (D e) f)
+    >>> del ptree[-100:1]; pcheck(ptree)
+    ok! (A (D e) f)
+    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> del ptree[1:-2:2]; pcheck(ptree)
+    ok! (A (B c) f (H i) j (K l))
+
+**__setitem__()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> d, e, q = ptree[0,0]
+    >>> ptree[0,0,0] = 'x'; pcheck(ptree); pcheck(d)
+    ok! (A (B (C x (E f) (Q p)) g) h)
+    ok! (D )
+    >>> ptree[0,0,1] = make_ptree('(X (Y z))'); pcheck(ptree); pcheck(e)
+    ok! (A (B (C x (X (Y z)) (Q p)) g) h)
+    ok! (E f)
+    >>> ptree[1] = d; pcheck(ptree)
+    ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
+    >>> ptree[-1] = 'x'; pcheck(ptree)
+    ok! (A (B (C x (X (Y z)) (Q p)) g) x)
+    >>> ptree[-100] = 'y'
+    Traceback (most recent call last):
+      . . .
+    IndexError: index out of range
+    >>> ptree[()] = make_ptree('(X y)')
+    Traceback (most recent call last):
+      . . .
+    IndexError: The tree position () may not be assigned to.
+
+    >>> # With slices:
+    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> b = ptree[0]
+    >>> ptree[0:0] = ('x', make_ptree('(Y)')); pcheck(ptree)
+    ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
+    >>> ptree[2:6] = (); pcheck(ptree); pcheck(b)
+    ok! (A x (Y ) (H i) j (K l))
+    ok! (B c)
+    >>> ptree[-2:] = ('z', 'p'); pcheck(ptree)
+    ok! (A x (Y ) (H i) z p)
+    >>> ptree[1:3] = [make_ptree('(X)') for x in range(10)]; pcheck(ptree)
+    ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
+    >>> ptree[5:1000] = []; pcheck(ptree)
+    ok! (A x (X ) (X ) (X ) (X ))
+    >>> ptree[-2:1000] = ['n']; pcheck(ptree)
+    ok! (A x (X ) (X ) n)
+    >>> ptree[-100:1] = [make_ptree('(U v)')]; pcheck(ptree)
+    ok! (A (U v) (X ) (X ) n)
+    >>> ptree[-1:] = (make_ptree('(X)') for x in range(3)); pcheck(ptree)
+    ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
+    >>> ptree[1:-2:2] = ['x', 'y']; pcheck(ptree)
+    ok! (A (U v) x (X ) y (X ) (X ))
+
+**append()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> ptree.append('x'); pcheck(ptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x)
+    >>> ptree.append(make_ptree('(X (Y z))')); pcheck(ptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
+
+**extend()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> ptree.extend(['x', 'y', make_ptree('(X (Y z))')]); pcheck(ptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+    >>> ptree.extend([]); pcheck(ptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+    >>> ptree.extend(make_ptree('(X)') for x in range(3)); pcheck(ptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
+
+**insert()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> ptree.insert(0, make_ptree('(X (Y z))')); pcheck(ptree)
+    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
+    >>> ptree.insert(-1, make_ptree('(X (Y z))')); pcheck(ptree)
+    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+    >>> ptree.insert(-4, make_ptree('(X (Y z))')); pcheck(ptree)
+    ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+    >>> # Note: as with ``list``, inserting at a negative index that
+    >>> # gives a position before the start of the list does *not*
+    >>> # raise an IndexError exception; it just inserts at 0.
+    >>> ptree.insert(-400, make_ptree('(X (Y z))')); pcheck(ptree)
+    ok! (A
+      (X (Y z))
+      (X (Y z))
+      (X (Y z))
+      (B (C (D ) (E f) (Q p)) g)
+      (X (Y z))
+      h)
+
+**pop()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> ptree[0,0].pop(1); pcheck(ptree)
+    ParentedTree('E', ['f'])
+    ok! (A (B (C (D ) (Q p)) g) h)
+    >>> ptree[0].pop(-1); pcheck(ptree)
+    'g'
+    ok! (A (B (C (D ) (Q p))) h)
+    >>> ptree.pop(); pcheck(ptree)
+    'h'
+    ok! (A (B (C (D ) (Q p))))
+    >>> ptree.pop(-100)
+    Traceback (most recent call last):
+      . . .
+    IndexError: index out of range
+
+**remove()**
+
+    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> e = ptree[0,0,1]
+    >>> ptree[0,0].remove(ptree[0,0,1]); pcheck(ptree); pcheck(e)
+    ok! (A (B (C (D ) (Q p)) g) h)
+    ok! (E f)
+    >>> ptree[0,0].remove(make_ptree('(Q p)')); pcheck(ptree)
+    ok! (A (B (C (D )) g) h)
+    >>> ptree[0,0].remove(make_ptree('(Q p)'))
+    Traceback (most recent call last):
+      . . .
+    ValueError: ParentedTree('Q', ['p']) is not in list
+    >>> ptree.remove('h'); pcheck(ptree)
+    ok! (A (B (C (D )) g))
+    >>> ptree.remove('h');
+    Traceback (most recent call last):
+      . . .
+    ValueError: 'h' is not in list
+    >>> # remove() removes the first subtree that is equal (==) to the
+    >>> # given tree, which may not be the identical tree we give it:
+    >>> ptree = make_ptree('(A (X x) (Y y) (X x))')
+    >>> x1, y, x2 = ptree
+    >>> ptree.remove(ptree[-1]); pcheck(ptree)
+    ok! (A (Y y) (X x))
+    >>> print(x1.parent()); pcheck(x1)
+    None
+    ok! (X x)
+    >>> print(x2.parent())
+    (A (Y y) (X x))
+
+Test that a tree can not be given multiple parents:
+
+    >>> ptree = make_ptree('(A (X x) (Y y) (Z z))')
+    >>> ptree[0] = ptree[1]
+    Traceback (most recent call last):
+      . . .
+    ValueError: Can not insert a subtree that already has a parent.
+    >>> pcheck()
+    ok!
+
+[more to be written]
+
+
+ImmutableParentedTree Regression Tests
+--------------------------------------
+
+    >>> iptree = ImmutableParentedTree.convert(ptree)
+    >>> type(iptree)
+    <class 'nltk.tree.ImmutableParentedTree'>
+    >>> del iptree[0]
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableParentedTree may not be modified
+    >>> iptree.set_label('newnode')
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableParentedTree may not be modified
+
+
+MultiParentedTree Regression Tests
+----------------------------------
+Keep track of all trees that we create (including subtrees) using this
+variable:
+
+    >>> all_mptrees = []
+
+Define a helper funciton to create new parented trees:
+
+    >>> def make_mptree(s):
+    ...     mptree = MultiParentedTree.convert(Tree.fromstring(s))
+    ...     all_mptrees.extend(t for t in mptree.subtrees()
+    ...                       if isinstance(t, Tree))
+    ...     return mptree
+
+Define a test function that examines every subtree in all_mptrees; and
+checks that all six of its methods are defined correctly.  If any
+mptrees are passed as arguments, then they are printed.
+
+    >>> def mpcheck(*print_mptrees):
+    ...     def has(seq, val): # uses identity comparison
+    ...         for item in seq:
+    ...             if item is val: return True
+    ...         return False
+    ...     for mptree in all_mptrees:
+    ...         # Check mptree's methods.
+    ...         if len(mptree.parents()) == 0:
+    ...             assert len(mptree.left_siblings()) == 0
+    ...             assert len(mptree.right_siblings()) == 0
+    ...             assert len(mptree.roots()) == 1
+    ...             assert mptree.roots()[0] is mptree
+    ...             assert mptree.treepositions(mptree) == [()]
+    ...             left_siblings = right_siblings = ()
+    ...             roots = {id(mptree): 1}
+    ...         else:
+    ...             roots = dict((id(r), 0) for r in mptree.roots())
+    ...             left_siblings = mptree.left_siblings()
+    ...             right_siblings = mptree.right_siblings()
+    ...         for parent in mptree.parents():
+    ...             for i in mptree.parent_indices(parent):
+    ...                 assert parent[i] is mptree
+    ...                 # check left siblings
+    ...                 if i > 0:
+    ...                     for j in range(len(left_siblings)):
+    ...                         if left_siblings[j] is parent[i-1]:
+    ...                             del left_siblings[j]
+    ...                             break
+    ...                     else:
+    ...                         assert 0, 'sibling not found!'
+    ...                 # check ight siblings
+    ...                 if i < (len(parent)-1):
+    ...                     for j in range(len(right_siblings)):
+    ...                         if right_siblings[j] is parent[i+1]:
+    ...                             del right_siblings[j]
+    ...                             break
+    ...                     else:
+    ...                         assert 0, 'sibling not found!'
+    ...             # check roots
+    ...             for root in parent.roots():
+    ...                 assert id(root) in roots, 'missing root'
+    ...                 roots[id(root)] += 1
+    ...         # check that we don't have any unexplained values
+    ...         assert len(left_siblings)==0, 'unexpected sibling'
+    ...         assert len(right_siblings)==0, 'unexpected sibling'
+    ...         for v in roots.values(): assert v>0, roots #'unexpected root'
+    ...         # check treepositions
+    ...         for root in mptree.roots():
+    ...             for treepos in mptree.treepositions(root):
+    ...                 assert root[treepos] is mptree
+    ...         # Check mptree's children's methods:
+    ...         for i, child in enumerate(mptree):
+    ...             if isinstance(child, Tree):
+    ...                 # mpcheck parent() & parent_index() methods
+    ...                 assert has(child.parents(), mptree)
+    ...                 assert i in child.parent_indices(mptree)
+    ...                 # mpcheck sibling methods
+    ...                 if i > 0:
+    ...                     assert has(child.left_siblings(), mptree[i-1])
+    ...                 if i < len(mptree)-1:
+    ...                     assert has(child.right_siblings(), mptree[i+1])
+    ...     if print_mptrees:
+    ...         print('ok!', end=' ')
+    ...         for mptree in print_mptrees: print(mptree)
+    ...     else:
+    ...         print('ok!')
+
+Run our test function on a variety of newly-created trees:
+
+    >>> mpcheck(make_mptree('(A)'))
+    ok! (A )
+    >>> mpcheck(make_mptree('(A (B (C (D) (E f)) g) h)'))
+    ok! (A (B (C (D ) (E f)) g) h)
+    >>> mpcheck(make_mptree('(A (B) (C c) (D d d) (E e e e))'))
+    ok! (A (B ) (C c) (D d d) (E e e e))
+    >>> mpcheck(make_mptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
+    ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
+    >>> subtree = make_mptree('(A (B (C (D) (E f)) g) h)')
+
+Including some trees that contain multiple parents:
+
+    >>> mpcheck(MultiParentedTree('Z', [subtree, subtree]))
+    ok! (Z (A (B (C (D ) (E f)) g) h) (A (B (C (D ) (E f)) g) h))
+
+Run our test function after performing various tree-modification
+operations (n.b., these are the same tests that we ran for
+`ParentedTree`, above; thus, none of these trees actually *uses*
+multiple parents.)
+
+**__delitem__()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> e = mptree[0,0,1]
+    >>> del mptree[0,0,1]; mpcheck(mptree); mpcheck(e)
+    ok! (A (B (C (D ) (Q p)) g) h)
+    ok! (E f)
+    >>> del mptree[0,0,0]; mpcheck(mptree)
+    ok! (A (B (C (Q p)) g) h)
+    >>> del mptree[0,1]; mpcheck(mptree)
+    ok! (A (B (C (Q p))) h)
+    >>> del mptree[-1]; mpcheck(mptree)
+    ok! (A (B (C (Q p))))
+    >>> del mptree[-100]
+    Traceback (most recent call last):
+      . . .
+    IndexError: index out of range
+    >>> del mptree[()]
+    Traceback (most recent call last):
+      . . .
+    IndexError: The tree position () may not be deleted.
+
+    >>> # With slices:
+    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> b = mptree[0]
+    >>> del mptree[0:0]; mpcheck(mptree)
+    ok! (A (B c) (D e) f g (H i) j (K l))
+    >>> del mptree[:1]; mpcheck(mptree); mpcheck(b)
+    ok! (A (D e) f g (H i) j (K l))
+    ok! (B c)
+    >>> del mptree[-2:]; mpcheck(mptree)
+    ok! (A (D e) f g (H i))
+    >>> del mptree[1:3]; mpcheck(mptree)
+    ok! (A (D e) (H i))
+    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> del mptree[5:1000]; mpcheck(mptree)
+    ok! (A (B c) (D e) f g (H i))
+    >>> del mptree[-2:1000]; mpcheck(mptree)
+    ok! (A (B c) (D e) f)
+    >>> del mptree[-100:1]; mpcheck(mptree)
+    ok! (A (D e) f)
+    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> del mptree[1:-2:2]; mpcheck(mptree)
+    ok! (A (B c) f (H i) j (K l))
+
+**__setitem__()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> d, e, q = mptree[0,0]
+    >>> mptree[0,0,0] = 'x'; mpcheck(mptree); mpcheck(d)
+    ok! (A (B (C x (E f) (Q p)) g) h)
+    ok! (D )
+    >>> mptree[0,0,1] = make_mptree('(X (Y z))'); mpcheck(mptree); mpcheck(e)
+    ok! (A (B (C x (X (Y z)) (Q p)) g) h)
+    ok! (E f)
+    >>> mptree[1] = d; mpcheck(mptree)
+    ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
+    >>> mptree[-1] = 'x'; mpcheck(mptree)
+    ok! (A (B (C x (X (Y z)) (Q p)) g) x)
+    >>> mptree[-100] = 'y'
+    Traceback (most recent call last):
+      . . .
+    IndexError: index out of range
+    >>> mptree[()] = make_mptree('(X y)')
+    Traceback (most recent call last):
+      . . .
+    IndexError: The tree position () may not be assigned to.
+
+    >>> # With slices:
+    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
+    >>> b = mptree[0]
+    >>> mptree[0:0] = ('x', make_mptree('(Y)')); mpcheck(mptree)
+    ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
+    >>> mptree[2:6] = (); mpcheck(mptree); mpcheck(b)
+    ok! (A x (Y ) (H i) j (K l))
+    ok! (B c)
+    >>> mptree[-2:] = ('z', 'p'); mpcheck(mptree)
+    ok! (A x (Y ) (H i) z p)
+    >>> mptree[1:3] = [make_mptree('(X)') for x in range(10)]; mpcheck(mptree)
+    ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
+    >>> mptree[5:1000] = []; mpcheck(mptree)
+    ok! (A x (X ) (X ) (X ) (X ))
+    >>> mptree[-2:1000] = ['n']; mpcheck(mptree)
+    ok! (A x (X ) (X ) n)
+    >>> mptree[-100:1] = [make_mptree('(U v)')]; mpcheck(mptree)
+    ok! (A (U v) (X ) (X ) n)
+    >>> mptree[-1:] = (make_mptree('(X)') for x in range(3)); mpcheck(mptree)
+    ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
+    >>> mptree[1:-2:2] = ['x', 'y']; mpcheck(mptree)
+    ok! (A (U v) x (X ) y (X ) (X ))
+
+**append()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> mptree.append('x'); mpcheck(mptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x)
+    >>> mptree.append(make_mptree('(X (Y z))')); mpcheck(mptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
+
+**extend()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> mptree.extend(['x', 'y', make_mptree('(X (Y z))')]); mpcheck(mptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+    >>> mptree.extend([]); mpcheck(mptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
+    >>> mptree.extend(make_mptree('(X)') for x in range(3)); mpcheck(mptree)
+    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
+
+**insert()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> mptree.insert(0, make_mptree('(X (Y z))')); mpcheck(mptree)
+    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
+    >>> mptree.insert(-1, make_mptree('(X (Y z))')); mpcheck(mptree)
+    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+    >>> mptree.insert(-4, make_mptree('(X (Y z))')); mpcheck(mptree)
+    ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
+    >>> # Note: as with ``list``, inserting at a negative index that
+    >>> # gives a position before the start of the list does *not*
+    >>> # raise an IndexError exception; it just inserts at 0.
+    >>> mptree.insert(-400, make_mptree('(X (Y z))')); mpcheck(mptree)
+    ok! (A
+      (X (Y z))
+      (X (Y z))
+      (X (Y z))
+      (B (C (D ) (E f) (Q p)) g)
+      (X (Y z))
+      h)
+
+**pop()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> mptree[0,0].pop(1); mpcheck(mptree)
+    MultiParentedTree('E', ['f'])
+    ok! (A (B (C (D ) (Q p)) g) h)
+    >>> mptree[0].pop(-1); mpcheck(mptree)
+    'g'
+    ok! (A (B (C (D ) (Q p))) h)
+    >>> mptree.pop(); mpcheck(mptree)
+    'h'
+    ok! (A (B (C (D ) (Q p))))
+    >>> mptree.pop(-100)
+    Traceback (most recent call last):
+      . . .
+    IndexError: index out of range
+
+**remove()**
+
+    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
+    >>> e = mptree[0,0,1]
+    >>> mptree[0,0].remove(mptree[0,0,1]); mpcheck(mptree); mpcheck(e)
+    ok! (A (B (C (D ) (Q p)) g) h)
+    ok! (E f)
+    >>> mptree[0,0].remove(make_mptree('(Q p)')); mpcheck(mptree)
+    ok! (A (B (C (D )) g) h)
+    >>> mptree[0,0].remove(make_mptree('(Q p)'))
+    Traceback (most recent call last):
+      . . .
+    ValueError: MultiParentedTree('Q', ['p']) is not in list
+    >>> mptree.remove('h'); mpcheck(mptree)
+    ok! (A (B (C (D )) g))
+    >>> mptree.remove('h');
+    Traceback (most recent call last):
+      . . .
+    ValueError: 'h' is not in list
+    >>> # remove() removes the first subtree that is equal (==) to the
+    >>> # given tree, which may not be the identical tree we give it:
+    >>> mptree = make_mptree('(A (X x) (Y y) (X x))')
+    >>> x1, y, x2 = mptree
+    >>> mptree.remove(mptree[-1]); mpcheck(mptree)
+    ok! (A (Y y) (X x))
+    >>> print([str(p) for p in x1.parents()])
+    []
+    >>> print([str(p) for p in x2.parents()])
+    ['(A (Y y) (X x))']
+
+
+ImmutableMultiParentedTree Regression Tests
+-------------------------------------------
+
+    >>> imptree = ImmutableMultiParentedTree.convert(mptree)
+    >>> type(imptree)
+    <class 'nltk.tree.ImmutableMultiParentedTree'>
+    >>> del imptree[0]
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableMultiParentedTree may not be modified
+    >>> imptree.set_label('newnode')
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableMultiParentedTree may not be modified
+
+
+ProbabilisticTree Regression Tests
+----------------------------------
+
+    >>> prtree = ProbabilisticTree("S", [ProbabilisticTree("NP", ["N"], prob=0.3)], prob=0.6)
+    >>> print(prtree)
+    (S (NP N)) (p=0.6)
+    >>> import copy
+    >>> prtree == copy.deepcopy(prtree) == prtree.copy(deep=True) == prtree.copy()
+    True
+    >>> prtree[0] is prtree.copy()[0]
+    True
+    >>> prtree[0] is prtree.copy(deep=True)[0]
+    False
+
+    >>> imprtree = ImmutableProbabilisticTree.convert(prtree)
+    >>> type(imprtree)
+    <class 'nltk.tree.ImmutableProbabilisticTree'>
+    >>> del imprtree[0]
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableProbabilisticTree may not be modified
+    >>> imprtree.set_label('newnode')
+    Traceback (most recent call last):
+      . . .
+    ValueError: ImmutableProbabilisticTree may not be modified
+
+
+Squashed Bugs
+=============
+
+This used to discard the ``(B b)`` subtree (fixed in svn 6270):
+
+    >>> print(Tree.fromstring('((A a) (B b))'))
+    ( (A a) (B b))
+
diff --git a/nlp_resource_data/nltk/test/treeprettyprinter.doctest b/nlp_resource_data/nltk/test/treeprettyprinter.doctest

new file mode 100644 (file)

index 0000000..3c129c7
--- /dev/null
+++ b/nlp_resource_data/nltk/test/treeprettyprinter.doctest
@@ -0,0 +1,127 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+========================================================
+ Unit tests for nltk.treeprettyprinter.TreePrettyPrinter
+========================================================
+
+    >>> from nltk.tree import Tree
+    >>> from nltk.treeprettyprinter import TreePrettyPrinter
+
+Tree nr 2170 from nltk.corpus.treebank:
+
+    >>> tree = Tree.fromstring(
+    ...     '(S (NP-SBJ (PRP I)) (VP (VBP feel) (ADJP-PRD (RB pretty) '
+    ...     '(JJ good)) (PP-CLR (IN about) (NP (PRP it)))) (. .))')
+    >>> tpp = TreePrettyPrinter(tree)
+    >>> print(tpp.text())
+                                 S                       
+       __________________________|_____________________   
+      |                          VP                    | 
+      |      ____________________|___________          |  
+      |     |             |                PP-CLR      | 
+      |     |             |             _____|_____    |  
+    NP-SBJ  |          ADJP-PRD        |           NP  | 
+      |     |      _______|______      |           |   |  
+     PRP   VBP    RB             JJ    IN         PRP  . 
+      |     |     |              |     |           |   |  
+      I    feel pretty          good about         it  . 
+
+    >>> print(tpp.text(unicodelines=True))
+                                 S                       
+      ┌──────────────────────────┼─────────────────────┐  
+      │                          VP                    │ 
+      │     ┌─────────────┬──────┴───────────┐         │  
+      │     │             │                PP-CLR      │ 
+      │     │             │            ┌─────┴─────┐   │  
+    NP-SBJ  │          ADJP-PRD        │           NP  │ 
+      │     │     ┌───────┴──────┐     │           │   │  
+     PRP   VBP    RB             JJ    IN         PRP  . 
+      │     │     │              │     │           │   │  
+      I    feel pretty          good about         it  . 
+
+A tree with long labels:
+
+    >>> tree = Tree.fromstring(
+    ...     '(sentence (plural-noun-phrase (plural-noun Superconductors)) '
+    ...     '(verb-phrase (plural-verb conduct) '
+    ...     '(noun-phrase (singular-noun electricity))))')
+    >>> tpp = TreePrettyPrinter(tree)
+    >>> print(tpp.text(abbreviate=8, nodedist=2))
+                sentence                      
+         __________|__________                  
+        |                 verb-phr.           
+        |           __________|__________       
+    plural-n.      |                 noun-phr.
+        |          |                     |      
+    plural-n.  plural-v.             singular.
+        |          |                     |      
+    Supercon.   conduct              electric.
+
+    >>> print(tpp.text(maxwidth=8, nodedist=2))
+              sentence                   
+        _________|________                 
+       |                verb-            
+       |                phrase           
+       |          ________|_________       
+    plural-      |                noun-  
+     noun-       |                phrase 
+     phrase      |                  |    
+       |         |                  |      
+    plural-   plural-           singular-
+      noun      verb               noun  
+       |         |                  |      
+    Supercon  conduct            electric
+    ductors                        ity   
+
+A discontinuous tree:
+
+    >>> tree = Tree.fromstring(
+    ...     '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
+    ...     '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
+    ...     '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
+    >>> sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
+    ...             ' zwemmen of terrassen .'.split())
+    >>> tpp = TreePrettyPrinter(tree, sentence)
+    >>> print(tpp.text())
+                                          top                                                
+                                      _____|______________________________________________    
+                                   smain                      |                           |  
+      _______________________________|_____                   |                           |   
+     |    |                               inf                 |                           |  
+     |    |                           _____|____              |                           |   
+     |    |                          |         inf            |                           |  
+     |    |                          |      ____|_____        |                           |   
+     |    |                          |     |         conj     |                           |  
+     |    |                    _____ | ___ | _________|______ | __________________        |   
+     |    |                  inf     |     |                  |      |     |      |       |  
+     |    |          _________|_____ | ___ | _________        |      |     |      |       |   
+     |    |         pp               |     |          |       |      |     |      |       |  
+     |    |     ____|____            |     |          |       |      |     |      |       |   
+     |    |    |         np          |     |          |       |     inf    |     inf      |  
+     |    |    |     ____|____       |     |          |       |      |     |      |       |   
+    noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
+     |    |    |    |         |      |     |          |       |      |     |      |       |   
+     Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .  
+
+    >>> print(tpp.text(unicodelines=True))
+                                          top                                                
+                                     ┌─────┴──────────────────┬───────────────────────────┐   
+                                   smain                      │                           │  
+     ┌────┬──────────────────────────┴─────┐                  │                           │   
+     │    │                               inf                 │                           │  
+     │    │                          ┌─────┴────┐             │                           │   
+     │    │                          │         inf            │                           │  
+     │    │                          │     ┌────┴─────┐       │                           │   
+     │    │                          │     │         conj     │                           │  
+     │    │                   ┌───── │ ─── │ ─────────┴────── │ ─────┬─────┬──────┐       │   
+     │    │                  inf     │     │                  │      │     │      │       │  
+     │    │         ┌─────────┴───── │ ─── │ ─────────┐       │      │     │      │       │   
+     │    │         pp               │     │          │       │      │     │      │       │  
+     │    │    ┌────┴────┐           │     │          │       │      │     │      │       │   
+     │    │    │         np          │     │          │       │     inf    │     inf      │  
+     │    │    │    ┌────┴────┐      │     │          │       │      │     │      │       │   
+    noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
+     │    │    │    │         │      │     │          │       │      │     │      │       │   
+     Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .  
+
diff --git a/nlp_resource_data/nltk/test/treetransforms.doctest b/nlp_resource_data/nltk/test/treetransforms.doctest

new file mode 100644 (file)

index 0000000..973c27d
--- /dev/null
+++ b/nlp_resource_data/nltk/test/treetransforms.doctest
@@ -0,0 +1,156 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+-------------------------------------------
+Unit tests for the TreeTransformation class
+-------------------------------------------
+
+    >>> from copy import deepcopy
+    >>> from nltk.tree import *
+    >>> from nltk.treetransforms import *
+
+    >>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
+
+    >>> tree = Tree.fromstring(tree_string)
+    >>> print(tree)
+    (TOP
+      (S
+        (S
+          (VP
+            (VBN Turned)
+            (ADVP (RB loose))
+            (PP
+              (IN in)
+              (NP
+                (NP (NNP Shane) (NNP Longman) (POS 's))
+                (NN trading)
+                (NN room)))))
+        (, ,)
+        (NP (DT the) (NN yuppie) (NNS dealers))
+        (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
+        (. .)))
+
+Make a copy of the original tree and collapse the subtrees with only one child
+
+    >>> collapsedTree = deepcopy(tree)
+    >>> collapse_unary(collapsedTree)
+    >>> print(collapsedTree)
+    (TOP
+      (S
+        (S+VP
+          (VBN Turned)
+          (ADVP (RB loose))
+          (PP
+            (IN in)
+            (NP
+              (NP (NNP Shane) (NNP Longman) (POS 's))
+              (NN trading)
+              (NN room))))
+        (, ,)
+        (NP (DT the) (NN yuppie) (NNS dealers))
+        (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
+        (. .)))
+
+    >>> collapsedTree2 = deepcopy(tree)
+    >>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
+    >>> print(collapsedTree2)
+    (TOP+S
+      (S+VP
+        (VBN Turned)
+        (ADVP+RB loose)
+        (PP
+          (IN in)
+          (NP
+            (NP (NNP Shane) (NNP Longman) (POS 's))
+            (NN trading)
+            (NN room))))
+      (, ,)
+      (NP (DT the) (NN yuppie) (NNS dealers))
+      (VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
+      (. .))
+
+Convert the tree to Chomsky Normal Form i.e. each subtree has either two
+subtree children or a single leaf value. This conversion can be performed
+using either left- or right-factoring.
+
+    >>> cnfTree = deepcopy(collapsedTree)
+    >>> chomsky_normal_form(cnfTree, factor='left')
+    >>> print(cnfTree)
+    (TOP
+      (S
+        (S|<S+VP-,-NP-VP>
+          (S|<S+VP-,-NP>
+            (S|<S+VP-,>
+              (S+VP
+                (S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
+                (PP
+                  (IN in)
+                  (NP
+                    (NP|<NP-NN>
+                      (NP
+                        (NP|<NNP-NNP> (NNP Shane) (NNP Longman))
+                        (POS 's))
+                      (NN trading))
+                    (NN room))))
+              (, ,))
+            (NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
+          (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
+        (. .)))
+
+    >>> cnfTree = deepcopy(collapsedTree)
+    >>> chomsky_normal_form(cnfTree, factor='right')
+    >>> print(cnfTree)
+    (TOP
+      (S
+        (S+VP
+          (VBN Turned)
+          (S+VP|<ADVP-PP>
+            (ADVP (RB loose))
+            (PP
+              (IN in)
+              (NP
+                (NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
+                (NP|<NN-NN> (NN trading) (NN room))))))
+        (S|<,-NP-VP-.>
+          (, ,)
+          (S|<NP-VP-.>
+            (NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
+            (S|<VP-.>
+              (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
+              (. .))))))
+
+Employ some Markov smoothing to make the artificial node labels a bit more
+readable. See the treetransforms.py documentation for more details.
+
+    >>> markovTree = deepcopy(collapsedTree)
+    >>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
+    >>> print(markovTree)
+    (TOP
+      (S^<TOP>
+        (S+VP^<S>
+          (VBN Turned)
+          (S+VP|<ADVP-PP>^<S>
+            (ADVP^<S+VP> (RB loose))
+            (PP^<S+VP>
+              (IN in)
+              (NP^<PP>
+                (NP^<NP>
+                  (NNP Shane)
+                  (NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
+                (NP|<NN-NN>^<PP> (NN trading) (NN room))))))
+        (S|<,-NP>^<TOP>
+          (, ,)
+          (S|<NP-VP>^<TOP>
+            (NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
+            (S|<VP-.>^<TOP>
+              (VP^<S>
+                (AUX do)
+                (NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
+              (. .))))))
+
+Convert the transformed tree back to its original form
+
+    >>> un_chomsky_normal_form(markovTree)
+    >>> tree == markovTree
+    True
+
diff --git a/nlp_resource_data/nltk/test/unit/__init__.py b/nlp_resource_data/nltk/test/unit/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..76578be

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ece381a

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5a5d98c

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4d60415

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc

new file mode 100644 (file)

index 0000000..eaa2fb5

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c6d9f89

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc

new file mode 100644 (file)

index 0000000..82e8d40

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7431372

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fd3e1a9

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..06ebde8

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1e3fa28

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7110524

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d5bc73f

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fae667e

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_freqdist.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_freqdist.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c197f56

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_freqdist.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc

new file mode 100644 (file)

index 0000000..332a391

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5ceab30

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_json_serialization.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_json_serialization.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ae73510

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_json_serialization.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1cd2936

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6f65ef6

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_pl196x.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_pl196x.cpython-37.pyc

new file mode 100644 (file)

index 0000000..056626f

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_pl196x.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7a4365f

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0cffd6a

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3c370a5

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3b33bc8

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d78e5d3

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f110b10

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc

new file mode 100644 (file)

index 0000000..294e480

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b18da62

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2bc7807

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc

new file mode 100644 (file)

index 0000000..17475b9

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc

new file mode 100644 (file)

index 0000000..688638f

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__init__.py b/nlp_resource_data/nltk/test/unit/lm/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fc66cb5

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7eec05f

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc

new file mode 100644 (file)

index 0000000..592d6df

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2afa322

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f85a3a9

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_counter.py b/nlp_resource_data/nltk/test/unit/lm/test_counter.py

new file mode 100644 (file)

index 0000000..f7182cf
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/lm/test_counter.py
@@ -0,0 +1,132 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unittest
+
+from nltk import FreqDist
+from nltk.lm import NgramCounter
+from nltk.util import everygrams
+
+
+class NgramCounterTests(unittest.TestCase):
+    """Tests for NgramCounter that only involve lookup, no modification."""
+
+    @classmethod
+    def setUpClass(cls):
+
+        text = [list("abcd"), list("egdbe")]
+        cls.trigram_counter = NgramCounter(
+            (everygrams(sent, max_len=3) for sent in text)
+        )
+        cls.bigram_counter = NgramCounter(
+            (everygrams(sent, max_len=2) for sent in text)
+        )
+
+    def test_N(self):
+        self.assertEqual(self.bigram_counter.N(), 16)
+        self.assertEqual(self.trigram_counter.N(), 21)
+
+    def test_counter_len_changes_with_lookup(self):
+        self.assertEqual(len(self.bigram_counter), 2)
+        _ = self.bigram_counter[50]
+        self.assertEqual(len(self.bigram_counter), 3)
+
+    def test_ngram_order_access_unigrams(self):
+        self.assertEqual(self.bigram_counter[1], self.bigram_counter.unigrams)
+
+    def test_ngram_conditional_freqdist(self):
+        expected_trigram_contexts = [
+            ("a", "b"),
+            ("b", "c"),
+            ("e", "g"),
+            ("g", "d"),
+            ("d", "b"),
+        ]
+        expected_bigram_contexts = [("a",), ("b",), ("d",), ("e",), ("c",), ("g",)]
+
+        bigrams = self.trigram_counter[2]
+        trigrams = self.trigram_counter[3]
+
+        self.assertCountEqual(expected_bigram_contexts, bigrams.conditions())
+        self.assertCountEqual(expected_trigram_contexts, trigrams.conditions())
+
+    def test_bigram_counts_seen_ngrams(self):
+        b_given_a_count = 1
+        unk_given_b_count = 1
+
+        self.assertEqual(b_given_a_count, self.bigram_counter[["a"]]["b"])
+        self.assertEqual(unk_given_b_count, self.bigram_counter[["b"]]["c"])
+
+    def test_bigram_counts_unseen_ngrams(self):
+        z_given_b_count = 0
+
+        self.assertEqual(z_given_b_count, self.bigram_counter[["b"]]["z"])
+
+    def test_unigram_counts_seen_words(self):
+        expected_count_b = 2
+
+        self.assertEqual(expected_count_b, self.bigram_counter["b"])
+
+    def test_unigram_counts_completely_unseen_words(self):
+        unseen_count = 0
+
+        self.assertEqual(unseen_count, self.bigram_counter["z"])
+
+
+class NgramCounterTrainingTests(unittest.TestCase):
+    def setUp(self):
+        self.counter = NgramCounter()
+
+    def test_empty_string(self):
+        test = NgramCounter("")
+        self.assertNotIn(2, test)
+        self.assertEqual(test[1], FreqDist())
+
+    def test_empty_list(self):
+        test = NgramCounter([])
+        self.assertNotIn(2, test)
+        self.assertEqual(test[1], FreqDist())
+
+    def test_None(self):
+        test = NgramCounter(None)
+        self.assertNotIn(2, test)
+        self.assertEqual(test[1], FreqDist())
+
+    def test_train_on_unigrams(self):
+        words = list("abcd")
+        counter = NgramCounter([[(w,) for w in words]])
+
+        self.assertFalse(bool(counter[3]))
+        self.assertFalse(bool(counter[2]))
+        self.assertCountEqual(words, counter[1].keys())
+
+    def test_train_on_illegal_sentences(self):
+        str_sent = ["Check", "this", "out", "!"]
+        list_sent = [["Check", "this"], ["this", "out"], ["out", "!"]]
+
+        with self.assertRaises(TypeError):
+            NgramCounter([str_sent])
+
+        with self.assertRaises(TypeError):
+            NgramCounter([list_sent])
+
+    def test_train_on_bigrams(self):
+        bigram_sent = [("a", "b"), ("c", "d")]
+        counter = NgramCounter([bigram_sent])
+
+        self.assertFalse(bool(counter[3]))
+
+    def test_train_on_mix(self):
+        mixed_sent = [("a", "b"), ("c", "d"), ("e", "f", "g"), ("h",)]
+        counter = NgramCounter([mixed_sent])
+        unigrams = ["h"]
+        bigram_contexts = [("a",), ("c",)]
+        trigram_contexts = [("e", "f")]
+
+        self.assertCountEqual(unigrams, counter[1].keys())
+        self.assertCountEqual(bigram_contexts, counter[2].keys())
+        self.assertCountEqual(trigram_contexts, counter[3].keys())
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_models.py b/nlp_resource_data/nltk/test/unit/lm/test_models.py

new file mode 100644 (file)

index 0000000..f39619e
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/lm/test_models.py
@@ -0,0 +1,433 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+import math
+import unittest
+
+
+from nltk.lm import (
+    Vocabulary,
+    MLE,
+    Lidstone,
+    Laplace,
+    WittenBellInterpolated,
+    KneserNeyInterpolated,
+)
+from nltk.lm.preprocessing import padded_everygrams
+
+
+def _prepare_test_data(ngram_order):
+    return (
+        Vocabulary(["a", "b", "c", "d", "z", "<s>", "</s>"], unk_cutoff=1),
+        [
+            list(padded_everygrams(ngram_order, sent))
+            for sent in (list("abcd"), list("egadbe"))
+        ],
+    )
+
+
+class ParametrizeTestsMeta(type):
+    """Metaclass for generating parametrized tests."""
+
+    def __new__(cls, name, bases, dct):
+        contexts = (
+            ("a",),
+            ("c",),
+            (u"<s>",),
+            ("b",),
+            (u"<UNK>",),
+            ("d",),
+            ("e",),
+            ("r",),
+            ("w",),
+        )
+        for i, c in enumerate(contexts):
+            dct["test_sumto1_{0}".format(i)] = cls.add_sum_to_1_test(c)
+        scores = dct.get("score_tests", [])
+        for i, (word, context, expected_score) in enumerate(scores):
+            dct["test_score_{0}".format(i)] = cls.add_score_test(
+                word, context, expected_score
+            )
+        return super().__new__(cls, name, bases, dct)
+
+    @classmethod
+    def add_score_test(cls, word, context, expected_score):
+        message = "word='{word}', context={context}"
+
+        def test_method(self):
+            score = self.model.score(word, context)
+            self.assertAlmostEqual(
+                score, expected_score, msg=message.format(**locals()), places=4
+            )
+
+        return test_method
+
+    @classmethod
+    def add_sum_to_1_test(cls, context):
+        def test(self):
+            s = sum(self.model.score(w, context) for w in self.model.vocab)
+            self.assertAlmostEqual(s, 1.0, msg="The context is {}".format(context))
+
+        return test
+
+
+class MleBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    """Unit tests for MLE ngram model."""
+
+    score_tests = [
+        ("d", ["c"], 1),
+        # Unseen ngrams should yield 0
+        ("d", ["e"], 0),
+        # Unigrams should also be 0
+        ("z", None, 0),
+        # N unigrams = 14
+        # count('a') = 2
+        ("a", None, 2.0 / 14),
+        # count('y') = 3
+        ("y", None, 3.0 / 14),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(2)
+        self.model = MLE(2, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_logscore_zero_score(self):
+        # logscore of unseen ngrams should be -inf
+        logscore = self.model.logscore("d", ["e"])
+
+        self.assertTrue(math.isinf(logscore))
+
+    def test_entropy_perplexity_seen(self):
+        # ngrams seen during training
+        trained = [
+            ("<s>", "a"),
+            ("a", "b"),
+            ("b", "<UNK>"),
+            ("<UNK>", "a"),
+            ("a", "d"),
+            ("d", "</s>"),
+        ]
+        # Ngram = Log score
+        # <s>, a    = -1
+        # a, b      = -1
+        # b, UNK    = -1
+        # UNK, a    = -1.585
+        # a, d      = -1
+        # d, </s>   = -1
+        # TOTAL logscores   = -6.585
+        # - AVG logscores   = 1.0975
+        H = 1.0975
+        perplexity = 2.1398
+
+        self.assertAlmostEqual(H, self.model.entropy(trained), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(trained), places=4)
+
+    def test_entropy_perplexity_unseen(self):
+        # In MLE, even one unseen ngram should make entropy and perplexity infinite
+        untrained = [("<s>", "a"), ("a", "c"), ("c", "d"), ("d", "</s>")]
+
+        self.assertTrue(math.isinf(self.model.entropy(untrained)))
+        self.assertTrue(math.isinf(self.model.perplexity(untrained)))
+
+    def test_entropy_perplexity_unigrams(self):
+        # word = score, log score
+        # <s>   = 0.1429, -2.8074
+        # a     = 0.1429, -2.8074
+        # c     = 0.0714, -3.8073
+        # UNK   = 0.2143, -2.2224
+        # d     = 0.1429, -2.8074
+        # c     = 0.0714, -3.8073
+        # </s>  = 0.1429, -2.8074
+        # TOTAL logscores = -21.6243
+        # - AVG logscores = 3.0095
+        H = 3.0095
+        perplexity = 8.0529
+
+        text = [("<s>",), ("a",), ("c",), ("-",), ("d",), ("c",), ("</s>",)]
+
+        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
+
+
+class MleTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    """MLE trigram model tests"""
+
+    score_tests = [
+        # count(d | b, c) = 1
+        # count(b, c) = 1
+        ("d", ("b", "c"), 1),
+        # count(d | c) = 1
+        # count(c) = 1
+        ("d", ["c"], 1),
+        # total number of tokens is 18, of which "a" occured 2 times
+        ("a", None, 2.0 / 18),
+        # in vocabulary but unseen
+        ("z", None, 0),
+        # out of vocabulary should use "UNK" score
+        ("y", None, 3.0 / 18),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = MLE(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+
+class LidstoneBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    """Unit tests for Lidstone class"""
+
+    score_tests = [
+        # count(d | c) = 1
+        # *count(d | c) = 1.1
+        # Count(w | c for w in vocab) = 1
+        # *Count(w | c for w in vocab) = 1.8
+        ("d", ["c"], 1.1 / 1.8),
+        # Total unigrams: 14
+        # Vocab size: 8
+        # Denominator: 14 + 0.8 = 14.8
+        # count("a") = 2
+        # *count("a") = 2.1
+        ("a", None, 2.1 / 14.8),
+        # in vocabulary but unseen
+        # count("z") = 0
+        # *count("z") = 0.1
+        ("z", None, 0.1 / 14.8),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        # *count("<UNK>") = 3.1
+        ("y", None, 3.1 / 14.8),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(2)
+        self.model = Lidstone(0.1, 2, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_gamma(self):
+        self.assertEqual(0.1, self.model.gamma)
+
+    def test_entropy_perplexity(self):
+        text = [
+            ("<s>", "a"),
+            ("a", "c"),
+            ("c", "<UNK>"),
+            ("<UNK>", "d"),
+            ("d", "c"),
+            ("c", "</s>"),
+        ]
+        # Unlike MLE this should be able to handle completely novel ngrams
+        # Ngram = score, log score
+        # <s>, a    = 0.3929, -1.3479
+        # a, c      = 0.0357, -4.8074
+        # c, UNK    = 0.0(5), -4.1699
+        # UNK, d    = 0.0263,  -5.2479
+        # d, c      = 0.0357, -4.8074
+        # c, </s>   = 0.0(5), -4.1699
+        # TOTAL logscore: −24.5504
+        # - AVG logscore: 4.0917
+        H = 4.0917
+        perplexity = 17.0504
+        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
+
+
+class LidstoneTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    score_tests = [
+        # Logic behind this is the same as for bigram model
+        ("d", ["c"], 1.1 / 1.8),
+        # if we choose a word that hasn't appeared after (b, c)
+        ("e", ["c"], 0.1 / 1.8),
+        # Trigram score now
+        ("d", ["b", "c"], 1.1 / 1.8),
+        ("e", ["b", "c"], 0.1 / 1.8),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = Lidstone(0.1, 3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+
+class LaplaceBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    """Unit tests for Laplace class"""
+
+    score_tests = [
+        # basic sanity-check:
+        # count(d | c) = 1
+        # *count(d | c) = 2
+        # Count(w | c for w in vocab) = 1
+        # *Count(w | c for w in vocab) = 9
+        ("d", ["c"], 2.0 / 9),
+        # Total unigrams: 14
+        # Vocab size: 8
+        # Denominator: 14 + 8 = 22
+        # count("a") = 2
+        # *count("a") = 3
+        ("a", None, 3.0 / 22),
+        # in vocabulary but unseen
+        # count("z") = 0
+        # *count("z") = 1
+        ("z", None, 1.0 / 22),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        # *count("<UNK>") = 4
+        ("y", None, 4.0 / 22),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(2)
+        self.model = Laplace(2, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_gamma(self):
+        # Make sure the gamma is set to 1
+        self.assertEqual(1, self.model.gamma)
+
+    def test_entropy_perplexity(self):
+        text = [
+            ("<s>", "a"),
+            ("a", "c"),
+            ("c", "<UNK>"),
+            ("<UNK>", "d"),
+            ("d", "c"),
+            ("c", "</s>"),
+        ]
+        # Unlike MLE this should be able to handle completely novel ngrams
+        # Ngram = score, log score
+        # <s>, a    = 0.2, -2.3219
+        # a, c      = 0.1, -3.3219
+        # c, UNK    = 0.(1), -3.1699
+        # UNK, d    = 0.(09), 3.4594
+        # d, c      = 0.1 -3.3219
+        # c, </s>   = 0.(1), -3.1699
+        # Total logscores: −18.7651
+        # - AVG logscores: 3.1275
+        H = 3.1275
+        perplexity = 8.7393
+        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
+
+
+class WittenBellInterpolatedTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = WittenBellInterpolated(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    score_tests = [
+        # For unigram scores by default revert to MLE
+        # Total unigrams: 18
+        # count('c'): 1
+        ("c", None, 1.0 / 18),
+        # in vocabulary but unseen
+        # count("z") = 0
+        ("z", None, 0.0 / 18),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        ("y", None, 3.0 / 18),
+        # gamma(['b']) = 0.1111
+        # mle.score('c', ['b']) = 0.5
+        # (1 - gamma) * mle + gamma * mle('c') ~= 0.45 + .3 / 18
+        ("c", ["b"], (1 - 0.1111) * 0.5 + 0.1111 * 1 / 18),
+        # building on that, let's try 'a b c' as the trigram
+        # gamma(['a', 'b']) = 0.0667
+        # mle("c", ["a", "b"]) = 1
+        ("c", ["a", "b"], (1 - 0.0667) + 0.0667 * ((1 - 0.1111) * 0.5 + 0.1111 / 18)),
+        # The ngram 'z b c' was not seen, so we should simply revert to
+        # the score of the ngram 'b c'. See issue #2332.
+        ("c", ["z", "b"], ((1 - 0.1111) * 0.5 + 0.1111 / 18)),
+    ]
+
+
+class KneserNeyInterpolatedTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta):
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = KneserNeyInterpolated(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    score_tests = [
+        # For unigram scores revert to uniform
+        # Vocab size: 8
+        # count('c'): 1
+        ("c", None, 1.0 / 8),
+        # in vocabulary but unseen, still uses uniform
+        ("z", None, 1 / 8),
+        # out of vocabulary should use "UNK" score, i.e. again uniform
+        ("y", None, 1.0 / 8),
+        # alpha = count('bc') - discount = 1 - 0.1 = 0.9
+        # gamma(['b']) = discount * number of unique words that follow ['b'] = 0.1 * 2
+        # normalizer = total number of bigrams with this context = 2
+        # the final should be: (alpha + gamma * unigram_score("c"))
+        ("c", ["b"], (0.9 + 0.2 * (1 / 8)) / 2),
+        # building on that, let's try 'a b c' as the trigram
+        # alpha = count('abc') - discount = 1 - 0.1 = 0.9
+        # gamma(['a', 'b']) = 0.1 * 1
+        # normalizer = total number of trigrams with prefix "ab" = 1 => we can ignore it!
+        ("c", ["a", "b"], 0.9 + 0.1 * ((0.9 + 0.2 * (1 / 8)) / 2)),
+        # The ngram 'z b c' was not seen, so we should simply revert to
+        # the score of the ngram 'b c'. See issue #2332.
+        ("c", ["z", "b"], ((0.9 + 0.2 * (1 / 8)) / 2)),
+    ]
+
+
+class NgramModelTextGenerationTests(unittest.TestCase):
+    """Using MLE model, generate some text."""
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = MLE(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_generate_one_no_context(self):
+        self.assertEqual(self.model.generate(random_seed=3), "<UNK>")
+
+    def test_generate_one_limiting_context(self):
+        # We don't need random_seed for contexts with only one continuation
+        self.assertEqual(self.model.generate(text_seed=["c"]), "d")
+        self.assertEqual(self.model.generate(text_seed=["b", "c"]), "d")
+        self.assertEqual(self.model.generate(text_seed=["a", "c"]), "d")
+
+    def test_generate_one_varied_context(self):
+        # When context doesn't limit our options enough, seed the random choice
+        self.assertEqual(
+            self.model.generate(text_seed=("a", "<s>"), random_seed=2), "a"
+        )
+
+    def test_generate_cycle(self):
+        # Add a cycle to the model: bd -> b, db -> d
+        more_training_text = [list(padded_everygrams(self.model.order, list("bdbdbd")))]
+        self.model.fit(more_training_text)
+        # Test that we can escape the cycle
+        self.assertEqual(
+            self.model.generate(7, text_seed=("b", "d"), random_seed=5),
+            ["b", "d", "b", "d", "b", "d", "</s>"],
+        )
+
+    def test_generate_with_text_seed(self):
+        self.assertEqual(
+            self.model.generate(5, text_seed=("<s>", "e"), random_seed=3),
+            ["<UNK>", "a", "d", "b", "<UNK>"],
+        )
+
+    def test_generate_oov_text_seed(self):
+        self.assertEqual(
+            self.model.generate(text_seed=("aliens",), random_seed=3),
+            self.model.generate(text_seed=("<UNK>",), random_seed=3),
+        )
+
+    def test_generate_None_text_seed(self):
+        # should crash with type error when we try to look it up in vocabulary
+        with self.assertRaises(TypeError):
+            self.model.generate(text_seed=(None,))
+
+        # This will work
+        self.assertEqual(
+            self.model.generate(text_seed=None, random_seed=3),
+            self.model.generate(random_seed=3),
+        )
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py b/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py

new file mode 100644 (file)

index 0000000..c298552
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py
@@ -0,0 +1,30 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+import unittest
+
+from nltk.lm.preprocessing import padded_everygram_pipeline
+
+
+class TestPreprocessing(unittest.TestCase):
+    def test_padded_everygram_pipeline(self):
+        expected_train = [
+            [
+                ("<s>",),
+                ("a",),
+                ("b",),
+                ("c",),
+                ("</s>",),
+                ("<s>", "a"),
+                ("a", "b"),
+                ("b", "c"),
+                ("c", "</s>"),
+            ]
+        ]
+        expected_vocab = ["<s>", "a", "b", "c", "</s>"]
+        train_data, vocab_data = padded_everygram_pipeline(2, [["a", "b", "c"]])
+        self.assertEqual([list(sent) for sent in train_data], expected_train)
+        self.assertEqual(list(vocab_data), expected_vocab)
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py b/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py

new file mode 100644 (file)

index 0000000..db82eb5
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py
@@ -0,0 +1,138 @@
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unittest
+from collections import Counter
+
+from nltk.lm import Vocabulary
+
+
+class NgramModelVocabularyTests(unittest.TestCase):
+    """tests Vocabulary Class"""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.vocab = Vocabulary(
+            ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"],
+            unk_cutoff=2,
+        )
+
+    def test_truthiness(self):
+        self.assertTrue(self.vocab)
+
+    def test_cutoff_value_set_correctly(self):
+        self.assertEqual(self.vocab.cutoff, 2)
+
+    def test_unable_to_change_cutoff(self):
+        with self.assertRaises(AttributeError):
+            self.vocab.cutoff = 3
+
+    def test_cutoff_setter_checks_value(self):
+        with self.assertRaises(ValueError) as exc_info:
+            Vocabulary("abc", unk_cutoff=0)
+        expected_error_msg = "Cutoff value cannot be less than 1. Got: 0"
+        self.assertEqual(expected_error_msg, str(exc_info.exception))
+
+    def test_counts_set_correctly(self):
+        self.assertEqual(self.vocab.counts["a"], 2)
+        self.assertEqual(self.vocab.counts["b"], 2)
+        self.assertEqual(self.vocab.counts["c"], 1)
+
+    def test_membership_check_respects_cutoff(self):
+        # a was seen 2 times, so it should be considered part of the vocabulary
+        self.assertTrue("a" in self.vocab)
+        # "c" was seen once, it shouldn't be considered part of the vocab
+        self.assertFalse("c" in self.vocab)
+        # "z" was never seen at all, also shouldn't be considered in the vocab
+        self.assertFalse("z" in self.vocab)
+
+    def test_vocab_len_respects_cutoff(self):
+        # Vocab size is the number of unique tokens that occur at least as often
+        # as the cutoff value, plus 1 to account for unknown words.
+        self.assertEqual(5, len(self.vocab))
+
+    def test_vocab_iter_respects_cutoff(self):
+        vocab_counts = ["a", "b", "c", "d", "e", "f", "g", "w", "z"]
+        vocab_items = ["a", "b", "d", "e", "<UNK>"]
+
+        self.assertCountEqual(vocab_counts, list(self.vocab.counts.keys()))
+        self.assertCountEqual(vocab_items, list(self.vocab))
+
+    def test_update_empty_vocab(self):
+        empty = Vocabulary(unk_cutoff=2)
+        self.assertEqual(len(empty), 0)
+        self.assertFalse(empty)
+        self.assertIn(empty.unk_label, empty)
+
+        empty.update(list("abcde"))
+        self.assertIn(empty.unk_label, empty)
+
+    def test_lookup(self):
+        self.assertEqual(self.vocab.lookup("a"), "a")
+        self.assertEqual(self.vocab.lookup("c"), "<UNK>")
+
+    def test_lookup_iterables(self):
+        self.assertEqual(self.vocab.lookup(["a", "b"]), ("a", "b"))
+        self.assertEqual(self.vocab.lookup(("a", "b")), ("a", "b"))
+        self.assertEqual(self.vocab.lookup(("a", "c")), ("a", "<UNK>"))
+        self.assertEqual(
+            self.vocab.lookup(map(str, range(3))), ("<UNK>", "<UNK>", "<UNK>")
+        )
+
+    def test_lookup_empty_iterables(self):
+        self.assertEqual(self.vocab.lookup(()), ())
+        self.assertEqual(self.vocab.lookup([]), ())
+        self.assertEqual(self.vocab.lookup(iter([])), ())
+        self.assertEqual(self.vocab.lookup(n for n in range(0, 0)), ())
+
+    def test_lookup_recursive(self):
+        self.assertEqual(
+            self.vocab.lookup([["a", "b"], ["a", "c"]]), (("a", "b"), ("a", "<UNK>"))
+        )
+        self.assertEqual(self.vocab.lookup([["a", "b"], "c"]), (("a", "b"), "<UNK>"))
+        self.assertEqual(self.vocab.lookup([[[[["a", "b"]]]]]), ((((("a", "b"),),),),))
+
+    def test_lookup_None(self):
+        with self.assertRaises(TypeError):
+            self.vocab.lookup(None)
+        with self.assertRaises(TypeError):
+            list(self.vocab.lookup([None, None]))
+
+    def test_lookup_int(self):
+        with self.assertRaises(TypeError):
+            self.vocab.lookup(1)
+        with self.assertRaises(TypeError):
+            list(self.vocab.lookup([1, 2]))
+
+    def test_lookup_empty_str(self):
+        self.assertEqual(self.vocab.lookup(""), "<UNK>")
+
+    def test_eqality(self):
+        v1 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
+        v2 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
+        v3 = Vocabulary(["a", "b", "c"], unk_cutoff=1, unk_label="blah")
+        v4 = Vocabulary(["a", "b"], unk_cutoff=1)
+
+        self.assertEqual(v1, v2)
+        self.assertNotEqual(v1, v3)
+        self.assertNotEqual(v1, v4)
+
+    def test_str(self):
+        self.assertEqual(
+            str(self.vocab), "<Vocabulary with cutoff=2 unk_label='<UNK>' and 5 items>"
+        )
+
+    def test_creation_with_counter(self):
+        self.assertEqual(
+            self.vocab,
+            Vocabulary(
+                Counter(
+                    ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"]
+                ),
+                unk_cutoff=2,
+            ),
+        )
diff --git a/nlp_resource_data/nltk/test/unit/test_aline.py b/nlp_resource_data/nltk/test/unit/test_aline.py

new file mode 100644 (file)

index 0000000..f63d211
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_aline.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.metrics.aline
+"""
+
+
+import unittest
+
+from nltk.metrics import aline
+
+
+class TestAline(unittest.TestCase):
+    """
+    Test Aline algorithm for aligning phonetic sequences
+    """
+
+    def test_aline(self):
+        result = aline.align('θin', 'tenwis')
+        expected = [
+            [('θ', 't'), ('i', 'e'), ('n', 'n'), ('-', 'w'), ('-', 'i'), ('-', 's')]
+        ]
+
+        self.assertEqual(result, expected)
+
+        result = aline.align('jo', 'ʒə')
+        expected = [[('j', 'ʒ'), ('o', 'ə')]]
+
+        self.assertEqual(result, expected)
+
+        result = aline.align('pematesiweni', 'pematesewen')
+        expected = [
+            [
+                ('p', 'p'),
+                ('e', 'e'),
+                ('m', 'm'),
+                ('a', 'a'),
+                ('t', 't'),
+                ('e', 'e'),
+                ('s', 's'),
+                ('i', 'e'),
+                ('w', 'w'),
+                ('e', 'e'),
+                ('n', 'n'),
+                ('i', '-'),
+            ]
+        ]
+
+        self.assertEqual(result, expected)
+
+        result = aline.align('tuwθ', 'dentis')
+        expected = [
+            [
+                ('t', 'd'),
+                ('u', 'e'),
+                ('w', '-'),
+                ('-', 'n'),
+                ('-', 't'),
+                ('-', 'i'),
+                ('θ', 's'),
+            ]
+        ]
+
+        self.assertEqual(result, expected)
+
+    def test_aline_delta(self):
+        """
+        Test aline for computing the difference between two segments
+        """
+        result = aline.delta('p', 'q')
+        expected = 20.0
+
+        self.assertEqual(result, expected)
+
+        result = aline.delta('a', 'A')
+        expected = 0.0
+
+        self.assertEqual(result, expected)
diff --git a/nlp_resource_data/nltk/test/unit/test_brill.py b/nlp_resource_data/nltk/test/unit/test_brill.py

new file mode 100644 (file)

index 0000000..5297fe1
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_brill.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for Brill tagger.
+"""
+
+import unittest
+
+from nltk.tag import UnigramTagger, brill, brill_trainer
+from nltk.tbl import Template
+from nltk.corpus import treebank
+
+from nltk.tbl import demo
+
+
+class TestBrill(unittest.TestCase):
+    def test_pos_template(self):
+        train_sents = treebank.tagged_sents()[:1000]
+        tagger = UnigramTagger(train_sents)
+        trainer = brill_trainer.BrillTaggerTrainer(
+            tagger, [brill.Template(brill.Pos([-1]))]
+        )
+        brill_tagger = trainer.train(train_sents)
+        # Example from https://github.com/nltk/nltk/issues/769
+        result = brill_tagger.tag('This is a foo bar sentence'.split())
+        expected = [
+            ('This', 'DT'),
+            ('is', 'VBZ'),
+            ('a', 'DT'),
+            ('foo', None),
+            ('bar', 'NN'),
+            ('sentence', None),
+        ]
+        self.assertEqual(result, expected)
+
+    @unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
+    def test_brill_demo(self):
+        demo()
diff --git a/nlp_resource_data/nltk/test/unit/test_cfd_mutation.py b/nlp_resource_data/nltk/test/unit/test_cfd_mutation.py

new file mode 100644 (file)

index 0000000..7e21d7e
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_cfd_mutation.py
@@ -0,0 +1,39 @@
+import unittest
+from nltk import ConditionalFreqDist, tokenize
+
+class TestEmptyCondFreq(unittest.TestCase):
+    def test_tabulate(self):
+        empty = ConditionalFreqDist()
+        self.assertEqual(empty.conditions(),[])
+        try:
+            empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
+        except:
+            pass
+        self.assertEqual(empty.conditions(), [])
+
+
+    def test_plot(self):
+        empty = ConditionalFreqDist()
+        self.assertEqual(empty.conditions(),[])
+        try:
+            empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
+        except:
+            pass
+        self.assertEqual(empty.conditions(),[])
+
+    def test_increment(self):
+        # make sure that we can still mutate cfd normally
+        text = "cow cat mouse cat tiger"
+        cfd = ConditionalFreqDist()
+
+        # create cfd with word length as condition 
+        for word in tokenize.word_tokenize(text):
+            condition = len(word)
+            cfd[condition][word] += 1
+
+        self.assertEqual(cfd.conditions(), [3,5])
+
+        # incrementing previously unseen key is still possible
+        cfd[2]['hi'] += 1
+        self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
+        self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1
diff --git a/nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py b/nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py

new file mode 100644 (file)

index 0000000..686861e
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+import unittest
+import nltk
+from nltk.grammar import CFG
+
+
+class ChomskyNormalFormForCFGTest(unittest.TestCase):
+    def test_simple(self):
+        grammar = CFG.fromstring(
+            """
+          S -> NP VP
+          PP -> P NP
+          NP -> Det N | NP PP P
+          VP -> V NP | VP PP
+          VP -> Det
+          Det -> 'a' | 'the'
+          N -> 'dog' | 'cat'
+          V -> 'chased' | 'sat'
+          P -> 'on' | 'in'
+        """
+        )
+        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+        grammar = grammar.chomsky_normal_form(flexible=True)
+        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+
+        grammar2 = CFG.fromstring(
+            """
+          S -> NP VP
+          NP -> VP N P
+          VP -> P
+          N -> 'dog' | 'cat'
+          P -> 'on' | 'in'
+        """
+        )
+        self.assertFalse(grammar2.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar2.is_chomsky_normal_form())
+        grammar2 = grammar2.chomsky_normal_form()
+        self.assertTrue(grammar2.is_flexible_chomsky_normal_form())
+        self.assertTrue(grammar2.is_chomsky_normal_form())
+
+    def test_complex(self):
+        grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
+        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+        grammar = grammar.chomsky_normal_form(flexible=True)
+        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
diff --git a/nlp_resource_data/nltk/test/unit/test_chunk.py b/nlp_resource_data/nltk/test/unit/test_chunk.py

new file mode 100644 (file)

index 0000000..7d61518
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_chunk.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+import unittest
+
+from nltk import RegexpParser
+
+
+class TestChunkRule(unittest.TestCase):
+    def test_tag_pattern2re_pattern_quantifier(self):
+        """Test for bug https://github.com/nltk/nltk/issues/1597
+
+        Ensures that curly bracket quantifiers can be used inside a chunk rule.
+        This type of quantifier has been used for the supplementary example
+        in http://www.nltk.org/book/ch07.html#exploring-text-corpora.
+        """
+        sent = [
+            ('The', 'AT'),
+            ('September-October', 'NP'),
+            ('term', 'NN'),
+            ('jury', 'NN'),
+            ('had', 'HVD'),
+            ('been', 'BEN'),
+            ('charged', 'VBN'),
+            ('by', 'IN'),
+            ('Fulton', 'NP-TL'),
+            ('Superior', 'JJ-TL'),
+            ('Court', 'NN-TL'),
+            ('Judge', 'NN-TL'),
+            ('Durwood', 'NP'),
+            ('Pye', 'NP'),
+            ('to', 'TO'),
+            ('investigate', 'VB'),
+            ('reports', 'NNS'),
+            ('of', 'IN'),
+            ('possible', 'JJ'),
+            ('``', '``'),
+            ('irregularities', 'NNS'),
+            ("''", "''"),
+            ('in', 'IN'),
+            ('the', 'AT'),
+            ('hard-fought', 'JJ'),
+            ('primary', 'NN'),
+            ('which', 'WDT'),
+            ('was', 'BEDZ'),
+            ('won', 'VBN'),
+            ('by', 'IN'),
+            ('Mayor-nominate', 'NN-TL'),
+            ('Ivan', 'NP'),
+            ('Allen', 'NP'),
+            ('Jr.', 'NP'),
+            ('.', '.'),
+        ]  # source: brown corpus
+        cp = RegexpParser('CHUNK: {<N.*>{4,}}')
+        tree = cp.parse(sent)
+        assert (
+            tree.pformat()
+            == """(S
+  The/AT
+  September-October/NP
+  term/NN
+  jury/NN
+  had/HVD
+  been/BEN
+  charged/VBN
+  by/IN
+  Fulton/NP-TL
+  Superior/JJ-TL
+  (CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP)
+  to/TO
+  investigate/VB
+  reports/NNS
+  of/IN
+  possible/JJ
+  ``/``
+  irregularities/NNS
+  ''/''
+  in/IN
+  the/AT
+  hard-fought/JJ
+  primary/NN
+  which/WDT
+  was/BEDZ
+  won/VBN
+  by/IN
+  (CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP)
+  ./.)"""
+        )
diff --git a/nlp_resource_data/nltk/test/unit/test_classify.py b/nlp_resource_data/nltk/test/unit/test_classify.py

new file mode 100644 (file)

index 0000000..4dae5d1
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_classify.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.classify. See also: nltk/test/classify.doctest
+"""
+from nose import SkipTest
+from nltk import classify
+
+TRAIN = [
+    (dict(a=1, b=1, c=1), 'y'),
+    (dict(a=1, b=1, c=1), 'x'),
+    (dict(a=1, b=1, c=0), 'y'),
+    (dict(a=0, b=1, c=1), 'x'),
+    (dict(a=0, b=1, c=1), 'y'),
+    (dict(a=0, b=0, c=1), 'y'),
+    (dict(a=0, b=1, c=0), 'x'),
+    (dict(a=0, b=0, c=0), 'x'),
+    (dict(a=0, b=1, c=1), 'y'),
+]
+
+TEST = [
+    (dict(a=1, b=0, c=1)),  # unseen
+    (dict(a=1, b=0, c=0)),  # unseen
+    (dict(a=0, b=1, c=1)),  # seen 3 times, labels=y,y,x
+    (dict(a=0, b=1, c=0)),  # seen 1 time, label=x
+]
+
+RESULTS = [(0.16, 0.84), (0.46, 0.54), (0.41, 0.59), (0.76, 0.24)]
+
+
+def assert_classifier_correct(algorithm):
+    try:
+        classifier = classify.MaxentClassifier.train(
+            TRAIN, algorithm, trace=0, max_iter=1000
+        )
+    except (LookupError, AttributeError) as e:
+        raise SkipTest(str(e))
+
+    for (px, py), featureset in zip(RESULTS, TEST):
+        pdist = classifier.prob_classify(featureset)
+        assert abs(pdist.prob('x') - px) < 1e-2, (pdist.prob('x'), px)
+        assert abs(pdist.prob('y') - py) < 1e-2, (pdist.prob('y'), py)
+
+
+def test_megam():
+    assert_classifier_correct('MEGAM')
+
+
+def test_tadm():
+    assert_classifier_correct('TADM')
diff --git a/nlp_resource_data/nltk/test/unit/test_collocations.py b/nlp_resource_data/nltk/test/unit/test_collocations.py

new file mode 100644 (file)

index 0000000..8949411
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_collocations.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+import unittest
+
+from nltk.collocations import BigramCollocationFinder
+from nltk.metrics import BigramAssocMeasures
+
+## Test bigram counters with discontinuous bigrams and repeated words
+
+_EPSILON = 1e-8
+
+
+def close_enough(x, y):
+    """Verify that two sequences of n-gram association values are within
+       _EPSILON of each other.
+    """
+
+    for (x1, y1) in zip(x, y):
+        if x1[0] != y1[0] or abs(x1[1] - y1[1]) > _EPSILON:
+            return False
+    return True
+
+
+class TestBigram(unittest.TestCase):
+    def test_bigram2(self):
+        sent = 'this this is is a a test test'.split()
+
+        b = BigramCollocationFinder.from_words(sent)
+
+        # python 2.6 does not have assertItemsEqual or assertListEqual
+        self.assertEqual(
+            sorted(b.ngram_fd.items()),
+            sorted(
+                [
+                    (('a', 'a'), 1),
+                    (('a', 'test'), 1),
+                    (('is', 'a'), 1),
+                    (('is', 'is'), 1),
+                    (('test', 'test'), 1),
+                    (('this', 'is'), 1),
+                    (('this', 'this'), 1),
+                ]
+            ),
+        )
+        self.assertEqual(
+            sorted(b.word_fd.items()),
+            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
+        )
+        self.assertTrue(
+            len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1
+        )
+        self.assertTrue(
+            close_enough(
+                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+                sorted(
+                    [
+                        (('a', 'a'), 1.0),
+                        (('a', 'test'), 1.0),
+                        (('is', 'a'), 1.0),
+                        (('is', 'is'), 1.0),
+                        (('test', 'test'), 1.0),
+                        (('this', 'is'), 1.0),
+                        (('this', 'this'), 1.0),
+                    ]
+                ),
+            )
+        )
+
+    def test_bigram3(self):
+        sent = 'this this is is a a test test'.split()
+
+        b = BigramCollocationFinder.from_words(sent, window_size=3)
+        self.assertEqual(
+            sorted(b.ngram_fd.items()),
+            sorted(
+                [
+                    (('a', 'test'), 3),
+                    (('is', 'a'), 3),
+                    (('this', 'is'), 3),
+                    (('a', 'a'), 1),
+                    (('is', 'is'), 1),
+                    (('test', 'test'), 1),
+                    (('this', 'this'), 1),
+                ]
+            ),
+        )
+        self.assertEqual(
+            sorted(b.word_fd.items()),
+            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
+        )
+        self.assertTrue(
+            len(sent)
+            == sum(b.word_fd.values())
+            == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0
+        )
+        self.assertTrue(
+            close_enough(
+                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+                sorted(
+                    [
+                        (('a', 'test'), 1.584962500721156),
+                        (('is', 'a'), 1.584962500721156),
+                        (('this', 'is'), 1.584962500721156),
+                        (('a', 'a'), 0.0),
+                        (('is', 'is'), 0.0),
+                        (('test', 'test'), 0.0),
+                        (('this', 'this'), 0.0),
+                    ]
+                ),
+            )
+        )
+
+    def test_bigram5(self):
+        sent = 'this this is is a a test test'.split()
+
+        b = BigramCollocationFinder.from_words(sent, window_size=5)
+        self.assertEqual(
+            sorted(b.ngram_fd.items()),
+            sorted(
+                [
+                    (('a', 'test'), 4),
+                    (('is', 'a'), 4),
+                    (('this', 'is'), 4),
+                    (('is', 'test'), 3),
+                    (('this', 'a'), 3),
+                    (('a', 'a'), 1),
+                    (('is', 'is'), 1),
+                    (('test', 'test'), 1),
+                    (('this', 'this'), 1),
+                ]
+            ),
+        )
+        self.assertEqual(
+            sorted(b.word_fd.items()),
+            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
+        )
+        self.assertTrue(
+            len(sent)
+            == sum(b.word_fd.values())
+            == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0
+        )
+        self.assertTrue(
+            close_enough(
+                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+                sorted(
+                    [
+                        (('a', 'test'), 1.0),
+                        (('is', 'a'), 1.0),
+                        (('this', 'is'), 1.0),
+                        (('is', 'test'), 0.5849625007211562),
+                        (('this', 'a'), 0.5849625007211562),
+                        (('a', 'a'), -1.0),
+                        (('is', 'is'), -1.0),
+                        (('test', 'test'), -1.0),
+                        (('this', 'this'), -1.0),
+                    ]
+                ),
+            )
+        )
diff --git a/nlp_resource_data/nltk/test/unit/test_concordance.py b/nlp_resource_data/nltk/test/unit/test_concordance.py

new file mode 100644 (file)

index 0000000..83e407b
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_concordance.py
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import contextlib
+import sys
+from io import StringIO
+
+from nose import with_setup
+
+from nltk.corpus import gutenberg
+from nltk.text import Text
+
+
+@contextlib.contextmanager
+def stdout_redirect(where):
+    sys.stdout = where
+    try:
+        yield where
+    finally:
+        sys.stdout = sys.__stdout__
+
+
+class TestConcordance(unittest.TestCase):
+    """Text constructed using: http://www.nltk.org/book/ch01.html"""
+
+    @classmethod
+    def setup_class(cls):
+        cls.corpus = gutenberg.words('melville-moby_dick.txt')
+
+    @classmethod
+    def teardown_class(cls):
+        pass
+
+    def setUp(self):
+        self.text = Text(TestConcordance.corpus)
+        self.query = "monstrous"
+        self.maxDiff = None
+        self.list_out = [
+            'ong the former , one was of a most monstrous size . ... This came towards us , ',
+            'ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r',
+            'll over with a heathenish array of monstrous clubs and spears . Some were thick',
+            'd as you gazed , and wondered what monstrous cannibal and savage could ever hav',
+            'that has survived the flood ; most monstrous and most mountainous ! That Himmal',
+            'they might scout at Moby Dick as a monstrous fable , or still worse and more de',
+            'th of Radney .\'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l',
+            'ing Scenes . In connexion with the monstrous pictures of whales , I am strongly',
+            'ere to enter upon those still more monstrous stories of them which are to be fo',
+            'ght have been rummaged out of this monstrous cabinet there is no telling . But ',
+            'of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u',
+        ]
+
+    def tearDown(self):
+        pass
+
+    def test_concordance_list(self):
+        concordance_out = self.text.concordance_list(self.query)
+        self.assertEqual(self.list_out, [c.line for c in concordance_out])
+
+    def test_concordance_width(self):
+        list_out = [
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "Monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+        ]
+
+        concordance_out = self.text.concordance_list(self.query, width=0)
+        self.assertEqual(list_out, [c.query for c in concordance_out])
+
+    def test_concordance_lines(self):
+        concordance_out = self.text.concordance_list(self.query, lines=3)
+        self.assertEqual(self.list_out[:3], [c.line for c in concordance_out])
+
+    def test_concordance_print(self):
+        print_out = """Displaying 11 of 11 matches:
+        ong the former , one was of a most monstrous size . ... This came towards us ,
+        ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+        ll over with a heathenish array of monstrous clubs and spears . Some were thick
+        d as you gazed , and wondered what monstrous cannibal and savage could ever hav
+        that has survived the flood ; most monstrous and most mountainous ! That Himmal
+        they might scout at Moby Dick as a monstrous fable , or still worse and more de
+        th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
+        ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
+        ere to enter upon those still more monstrous stories of them which are to be fo
+        ght have been rummaged out of this monstrous cabinet there is no telling . But
+        of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
+        """
+
+        with stdout_redirect(StringIO()) as stdout:
+            self.text.concordance(self.query)
+
+        def strip_space(raw_str):
+            return raw_str.replace(" ", "")
+
+        self.assertEqual(strip_space(print_out), strip_space(stdout.getvalue()))
diff --git a/nlp_resource_data/nltk/test/unit/test_corenlp.py b/nlp_resource_data/nltk/test/unit/test_corenlp.py

new file mode 100644 (file)

index 0000000..966ecc6
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_corenlp.py
@@ -0,0 +1,1416 @@
+# -*- coding: utf-8 -*-
+
+"""
+Mock test for Stanford CoreNLP wrappers.
+"""
+
+import sys
+from itertools import chain
+from unittest import TestCase, SkipTest
+from unittest.mock import MagicMock
+
+from nltk.tree import Tree
+from nltk.parse import corenlp
+
+
+class TestTokenizerAPI(TestCase):
+    def test_tokenize(self):
+        corenlp_tokenizer = corenlp.CoreNLPParser()
+
+        api_return_value = {
+            u'sentences': [
+                {
+                    u'index': 0,
+                    u'tokens': [
+                        {
+                            u'after': u' ',
+                            u'before': u'',
+                            u'characterOffsetBegin': 0,
+                            u'characterOffsetEnd': 4,
+                            u'index': 1,
+                            u'originalText': u'Good',
+                            u'word': u'Good',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 5,
+                            u'characterOffsetEnd': 12,
+                            u'index': 2,
+                            u'originalText': u'muffins',
+                            u'word': u'muffins',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 13,
+                            u'characterOffsetEnd': 17,
+                            u'index': 3,
+                            u'originalText': u'cost',
+                            u'word': u'cost',
+                        },
+                        {
+                            u'after': u'',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 18,
+                            u'characterOffsetEnd': 19,
+                            u'index': 4,
+                            u'originalText': u'$',
+                            u'word': u'$',
+                        },
+                        {
+                            u'after': u'\n',
+                            u'before': u'',
+                            u'characterOffsetBegin': 19,
+                            u'characterOffsetEnd': 23,
+                            u'index': 5,
+                            u'originalText': u'3.88',
+                            u'word': u'3.88',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u'\n',
+                            u'characterOffsetBegin': 24,
+                            u'characterOffsetEnd': 26,
+                            u'index': 6,
+                            u'originalText': u'in',
+                            u'word': u'in',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 27,
+                            u'characterOffsetEnd': 30,
+                            u'index': 7,
+                            u'originalText': u'New',
+                            u'word': u'New',
+                        },
+                        {
+                            u'after': u'',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 31,
+                            u'characterOffsetEnd': 35,
+                            u'index': 8,
+                            u'originalText': u'York',
+                            u'word': u'York',
+                        },
+                        {
+                            u'after': u'  ',
+                            u'before': u'',
+                            u'characterOffsetBegin': 35,
+                            u'characterOffsetEnd': 36,
+                            u'index': 9,
+                            u'originalText': u'.',
+                            u'word': u'.',
+                        },
+                    ],
+                },
+                {
+                    u'index': 1,
+                    u'tokens': [
+                        {
+                            u'after': u' ',
+                            u'before': u'  ',
+                            u'characterOffsetBegin': 38,
+                            u'characterOffsetEnd': 44,
+                            u'index': 1,
+                            u'originalText': u'Please',
+                            u'word': u'Please',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 45,
+                            u'characterOffsetEnd': 48,
+                            u'index': 2,
+                            u'originalText': u'buy',
+                            u'word': u'buy',
+                        },
+                        {
+                            u'after': u'\n',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 49,
+                            u'characterOffsetEnd': 51,
+                            u'index': 3,
+                            u'originalText': u'me',
+                            u'word': u'me',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u'\n',
+                            u'characterOffsetBegin': 52,
+                            u'characterOffsetEnd': 55,
+                            u'index': 4,
+                            u'originalText': u'two',
+                            u'word': u'two',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 56,
+                            u'characterOffsetEnd': 58,
+                            u'index': 5,
+                            u'originalText': u'of',
+                            u'word': u'of',
+                        },
+                        {
+                            u'after': u'',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 59,
+                            u'characterOffsetEnd': 63,
+                            u'index': 6,
+                            u'originalText': u'them',
+                            u'word': u'them',
+                        },
+                        {
+                            u'after': u'\n',
+                            u'before': u'',
+                            u'characterOffsetBegin': 63,
+                            u'characterOffsetEnd': 64,
+                            u'index': 7,
+                            u'originalText': u'.',
+                            u'word': u'.',
+                        },
+                    ],
+                },
+                {
+                    u'index': 2,
+                    u'tokens': [
+                        {
+                            u'after': u'',
+                            u'before': u'\n',
+                            u'characterOffsetBegin': 65,
+                            u'characterOffsetEnd': 71,
+                            u'index': 1,
+                            u'originalText': u'Thanks',
+                            u'word': u'Thanks',
+                        },
+                        {
+                            u'after': u'',
+                            u'before': u'',
+                            u'characterOffsetBegin': 71,
+                            u'characterOffsetEnd': 72,
+                            u'index': 2,
+                            u'originalText': u'.',
+                            u'word': u'.',
+                        },
+                    ],
+                },
+            ]
+        }
+        corenlp_tokenizer.api_call = MagicMock(return_value=api_return_value)
+
+        input_string = "Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\nThanks."
+
+        expected_output = [
+            u'Good',
+            u'muffins',
+            u'cost',
+            u'$',
+            u'3.88',
+            u'in',
+            u'New',
+            u'York',
+            u'.',
+            u'Please',
+            u'buy',
+            u'me',
+            u'two',
+            u'of',
+            u'them',
+            u'.',
+            u'Thanks',
+            u'.',
+        ]
+
+        tokenized_output = list(corenlp_tokenizer.tokenize(input_string))
+
+        corenlp_tokenizer.api_call.assert_called_once_with(
+            'Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\nThanks.',
+            properties={'annotators': 'tokenize,ssplit'},
+        )
+        self.assertEqual(expected_output, tokenized_output)
+
+
+class TestTaggerAPI(TestCase):
+    def test_pos_tagger(self):
+        corenlp_tagger = corenlp.CoreNLPParser(tagtype='pos')
+
+        api_return_value = {
+            u'sentences': [
+                {
+                    u'basicDependencies': [
+                        {
+                            u'dep': u'ROOT',
+                            u'dependent': 1,
+                            u'dependentGloss': u'What',
+                            u'governor': 0,
+                            u'governorGloss': u'ROOT',
+                        },
+                        {
+                            u'dep': u'cop',
+                            u'dependent': 2,
+                            u'dependentGloss': u'is',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                        {
+                            u'dep': u'det',
+                            u'dependent': 3,
+                            u'dependentGloss': u'the',
+                            u'governor': 4,
+                            u'governorGloss': u'airspeed',
+                        },
+                        {
+                            u'dep': u'nsubj',
+                            u'dependent': 4,
+                            u'dependentGloss': u'airspeed',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                        {
+                            u'dep': u'case',
+                            u'dependent': 5,
+                            u'dependentGloss': u'of',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'det',
+                            u'dependent': 6,
+                            u'dependentGloss': u'an',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'compound',
+                            u'dependent': 7,
+                            u'dependentGloss': u'unladen',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'nmod',
+                            u'dependent': 8,
+                            u'dependentGloss': u'swallow',
+                            u'governor': 4,
+                            u'governorGloss': u'airspeed',
+                        },
+                        {
+                            u'dep': u'punct',
+                            u'dependent': 9,
+                            u'dependentGloss': u'?',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                    ],
+                    u'enhancedDependencies': [
+                        {
+                            u'dep': u'ROOT',
+                            u'dependent': 1,
+                            u'dependentGloss': u'What',
+                            u'governor': 0,
+                            u'governorGloss': u'ROOT',
+                        },
+                        {
+                            u'dep': u'cop',
+                            u'dependent': 2,
+                            u'dependentGloss': u'is',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                        {
+                            u'dep': u'det',
+                            u'dependent': 3,
+                            u'dependentGloss': u'the',
+                            u'governor': 4,
+                            u'governorGloss': u'airspeed',
+                        },
+                        {
+                            u'dep': u'nsubj',
+                            u'dependent': 4,
+                            u'dependentGloss': u'airspeed',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                        {
+                            u'dep': u'case',
+                            u'dependent': 5,
+                            u'dependentGloss': u'of',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'det',
+                            u'dependent': 6,
+                            u'dependentGloss': u'an',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'compound',
+                            u'dependent': 7,
+                            u'dependentGloss': u'unladen',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'nmod:of',
+                            u'dependent': 8,
+                            u'dependentGloss': u'swallow',
+                            u'governor': 4,
+                            u'governorGloss': u'airspeed',
+                        },
+                        {
+                            u'dep': u'punct',
+                            u'dependent': 9,
+                            u'dependentGloss': u'?',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                    ],
+                    u'enhancedPlusPlusDependencies': [
+                        {
+                            u'dep': u'ROOT',
+                            u'dependent': 1,
+                            u'dependentGloss': u'What',
+                            u'governor': 0,
+                            u'governorGloss': u'ROOT',
+                        },
+                        {
+                            u'dep': u'cop',
+                            u'dependent': 2,
+                            u'dependentGloss': u'is',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                        {
+                            u'dep': u'det',
+                            u'dependent': 3,
+                            u'dependentGloss': u'the',
+                            u'governor': 4,
+                            u'governorGloss': u'airspeed',
+                        },
+                        {
+                            u'dep': u'nsubj',
+                            u'dependent': 4,
+                            u'dependentGloss': u'airspeed',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                        {
+                            u'dep': u'case',
+                            u'dependent': 5,
+                            u'dependentGloss': u'of',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'det',
+                            u'dependent': 6,
+                            u'dependentGloss': u'an',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'compound',
+                            u'dependent': 7,
+                            u'dependentGloss': u'unladen',
+                            u'governor': 8,
+                            u'governorGloss': u'swallow',
+                        },
+                        {
+                            u'dep': u'nmod:of',
+                            u'dependent': 8,
+                            u'dependentGloss': u'swallow',
+                            u'governor': 4,
+                            u'governorGloss': u'airspeed',
+                        },
+                        {
+                            u'dep': u'punct',
+                            u'dependent': 9,
+                            u'dependentGloss': u'?',
+                            u'governor': 1,
+                            u'governorGloss': u'What',
+                        },
+                    ],
+                    u'index': 0,
+                    u'parse': u'(ROOT\n  (SBARQ\n    (WHNP (WP What))\n    (SQ (VBZ is)\n      (NP\n        (NP (DT the) (NN airspeed))\n        (PP (IN of)\n          (NP (DT an) (NN unladen) (NN swallow)))))\n    (. ?)))',
+                    u'tokens': [
+                        {
+                            u'after': u' ',
+                            u'before': u'',
+                            u'characterOffsetBegin': 0,
+                            u'characterOffsetEnd': 4,
+                            u'index': 1,
+                            u'lemma': u'what',
+                            u'originalText': u'What',
+                            u'pos': u'WP',
+                            u'word': u'What',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 5,
+                            u'characterOffsetEnd': 7,
+                            u'index': 2,
+                            u'lemma': u'be',
+                            u'originalText': u'is',
+                            u'pos': u'VBZ',
+                            u'word': u'is',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 8,
+                            u'characterOffsetEnd': 11,
+                            u'index': 3,
+                            u'lemma': u'the',
+                            u'originalText': u'the',
+                            u'pos': u'DT',
+                            u'word': u'the',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 12,
+                            u'characterOffsetEnd': 20,
+                            u'index': 4,
+                            u'lemma': u'airspeed',
+                            u'originalText': u'airspeed',
+                            u'pos': u'NN',
+                            u'word': u'airspeed',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 21,
+                            u'characterOffsetEnd': 23,
+                            u'index': 5,
+                            u'lemma': u'of',
+                            u'originalText': u'of',
+                            u'pos': u'IN',
+                            u'word': u'of',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 24,
+                            u'characterOffsetEnd': 26,
+                            u'index': 6,
+                            u'lemma': u'a',
+                            u'originalText': u'an',
+                            u'pos': u'DT',
+                            u'word': u'an',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 27,
+                            u'characterOffsetEnd': 34,
+                            u'index': 7,
+                            u'lemma': u'unladen',
+                            u'originalText': u'unladen',
+                            u'pos': u'JJ',
+                            u'word': u'unladen',
+                        },
+                        {
+                            u'after': u' ',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 35,
+                            u'characterOffsetEnd': 42,
+                            u'index': 8,
+                            u'lemma': u'swallow',
+                            u'originalText': u'swallow',
+                            u'pos': u'VB',
+                            u'word': u'swallow',
+                        },
+                        {
+                            u'after': u'',
+                            u'before': u' ',
+                            u'characterOffsetBegin': 43,
+                            u'characterOffsetEnd': 44,
+                            u'index': 9,
+                            u'lemma': u'?',
+                            u'originalText': u'?',
+                            u'pos': u'.',
+                            u'word': u'?',
+                        },
+                    ],
+                }
+            ]
+        }
+        corenlp_tagger.api_call = MagicMock(return_value=api_return_value)
+
+        input_tokens = 'What is the airspeed of an unladen swallow ?'.split()
+        expected_output = [
+            ('What', 'WP'),
+            ('is', 'VBZ'),
+            ('the', 'DT'),
+            ('airspeed', 'NN'),
+            ('of', 'IN'),
+            ('an', 'DT'),
+            ('unladen', 'JJ'),
+            ('swallow', 'VB'),
+            ('?', '.'),
+        ]
+        tagged_output = corenlp_tagger.tag(input_tokens)
+
+        corenlp_tagger.api_call.assert_called_once_with(
+            'What is the airspeed of an unladen swallow ?',
+            properties={
+                'ssplit.isOneSentence': 'true',
+                'annotators': 'tokenize,ssplit,pos',
+            },
+        )
+        self.assertEqual(expected_output, tagged_output)
+
+    def test_ner_tagger(self):
+        corenlp_tagger = corenlp.CoreNLPParser(tagtype='ner')
+
+        api_return_value = {
+            'sentences': [
+                {
+                    'index': 0,
+                    'tokens': [
+                        {
+                            'after': ' ',
+                            'before': '',
+                            'characterOffsetBegin': 0,
+                            'characterOffsetEnd': 4,
+                            'index': 1,
+                            'lemma': 'Rami',
+                            'ner': 'PERSON',
+                            'originalText': 'Rami',
+                            'pos': 'NNP',
+                            'word': 'Rami',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 5,
+                            'characterOffsetEnd': 8,
+                            'index': 2,
+                            'lemma': 'Eid',
+                            'ner': 'PERSON',
+                            'originalText': 'Eid',
+                            'pos': 'NNP',
+                            'word': 'Eid',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 9,
+                            'characterOffsetEnd': 11,
+                            'index': 3,
+                            'lemma': 'be',
+                            'ner': 'O',
+                            'originalText': 'is',
+                            'pos': 'VBZ',
+                            'word': 'is',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 12,
+                            'characterOffsetEnd': 20,
+                            'index': 4,
+                            'lemma': 'study',
+                            'ner': 'O',
+                            'originalText': 'studying',
+                            'pos': 'VBG',
+                            'word': 'studying',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 21,
+                            'characterOffsetEnd': 23,
+                            'index': 5,
+                            'lemma': 'at',
+                            'ner': 'O',
+                            'originalText': 'at',
+                            'pos': 'IN',
+                            'word': 'at',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 24,
+                            'characterOffsetEnd': 29,
+                            'index': 6,
+                            'lemma': 'Stony',
+                            'ner': 'ORGANIZATION',
+                            'originalText': 'Stony',
+                            'pos': 'NNP',
+                            'word': 'Stony',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 30,
+                            'characterOffsetEnd': 35,
+                            'index': 7,
+                            'lemma': 'Brook',
+                            'ner': 'ORGANIZATION',
+                            'originalText': 'Brook',
+                            'pos': 'NNP',
+                            'word': 'Brook',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 36,
+                            'characterOffsetEnd': 46,
+                            'index': 8,
+                            'lemma': 'University',
+                            'ner': 'ORGANIZATION',
+                            'originalText': 'University',
+                            'pos': 'NNP',
+                            'word': 'University',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 47,
+                            'characterOffsetEnd': 49,
+                            'index': 9,
+                            'lemma': 'in',
+                            'ner': 'O',
+                            'originalText': 'in',
+                            'pos': 'IN',
+                            'word': 'in',
+                        },
+                        {
+                            'after': '',
+                            'before': ' ',
+                            'characterOffsetBegin': 50,
+                            'characterOffsetEnd': 52,
+                            'index': 10,
+                            'lemma': 'NY',
+                            'ner': 'O',
+                            'originalText': 'NY',
+                            'pos': 'NNP',
+                            'word': 'NY',
+                        },
+                    ],
+                }
+            ]
+        }
+
+        corenlp_tagger.api_call = MagicMock(return_value=api_return_value)
+
+        input_tokens = 'Rami Eid is studying at Stony Brook University in NY'.split()
+        expected_output = [
+            ('Rami', 'PERSON'),
+            ('Eid', 'PERSON'),
+            ('is', 'O'),
+            ('studying', 'O'),
+            ('at', 'O'),
+            ('Stony', 'ORGANIZATION'),
+            ('Brook', 'ORGANIZATION'),
+            ('University', 'ORGANIZATION'),
+            ('in', 'O'),
+            ('NY', 'O'),
+        ]
+        tagged_output = corenlp_tagger.tag(input_tokens)
+
+        corenlp_tagger.api_call.assert_called_once_with(
+            'Rami Eid is studying at Stony Brook University in NY',
+            properties={
+                'ssplit.isOneSentence': 'true',
+                'annotators': 'tokenize,ssplit,ner',
+            },
+        )
+        self.assertEqual(expected_output, tagged_output)
+
+    def test_unexpected_tagtype(self):
+        with self.assertRaises(ValueError):
+            corenlp_tagger = corenlp.CoreNLPParser(tagtype='test')
+
+
+class TestParserAPI(TestCase):
+    def test_parse(self):
+        corenlp_parser = corenlp.CoreNLPParser()
+
+        api_return_value = {
+            'sentences': [
+                {
+                    'basicDependencies': [
+                        {
+                            'dep': 'ROOT',
+                            'dependent': 4,
+                            'dependentGloss': 'fox',
+                            'governor': 0,
+                            'governorGloss': 'ROOT',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 1,
+                            'dependentGloss': 'The',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 2,
+                            'dependentGloss': 'quick',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 3,
+                            'dependentGloss': 'brown',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'dep',
+                            'dependent': 5,
+                            'dependentGloss': 'jumps',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'case',
+                            'dependent': 6,
+                            'dependentGloss': 'over',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 7,
+                            'dependentGloss': 'the',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 8,
+                            'dependentGloss': 'lazy',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'nmod',
+                            'dependent': 9,
+                            'dependentGloss': 'dog',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                    ],
+                    'enhancedDependencies': [
+                        {
+                            'dep': 'ROOT',
+                            'dependent': 4,
+                            'dependentGloss': 'fox',
+                            'governor': 0,
+                            'governorGloss': 'ROOT',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 1,
+                            'dependentGloss': 'The',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 2,
+                            'dependentGloss': 'quick',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 3,
+                            'dependentGloss': 'brown',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'dep',
+                            'dependent': 5,
+                            'dependentGloss': 'jumps',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'case',
+                            'dependent': 6,
+                            'dependentGloss': 'over',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 7,
+                            'dependentGloss': 'the',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 8,
+                            'dependentGloss': 'lazy',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'nmod:over',
+                            'dependent': 9,
+                            'dependentGloss': 'dog',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                    ],
+                    'enhancedPlusPlusDependencies': [
+                        {
+                            'dep': 'ROOT',
+                            'dependent': 4,
+                            'dependentGloss': 'fox',
+                            'governor': 0,
+                            'governorGloss': 'ROOT',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 1,
+                            'dependentGloss': 'The',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 2,
+                            'dependentGloss': 'quick',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 3,
+                            'dependentGloss': 'brown',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'dep',
+                            'dependent': 5,
+                            'dependentGloss': 'jumps',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'case',
+                            'dependent': 6,
+                            'dependentGloss': 'over',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 7,
+                            'dependentGloss': 'the',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 8,
+                            'dependentGloss': 'lazy',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'nmod:over',
+                            'dependent': 9,
+                            'dependentGloss': 'dog',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                    ],
+                    'index': 0,
+                    'parse': '(ROOT\n  (NP\n    (NP (DT The) (JJ quick) (JJ brown) (NN fox))\n    (NP\n      (NP (NNS jumps))\n      (PP (IN over)\n        (NP (DT the) (JJ lazy) (NN dog))))))',
+                    'tokens': [
+                        {
+                            'after': ' ',
+                            'before': '',
+                            'characterOffsetBegin': 0,
+                            'characterOffsetEnd': 3,
+                            'index': 1,
+                            'lemma': 'the',
+                            'originalText': 'The',
+                            'pos': 'DT',
+                            'word': 'The',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 4,
+                            'characterOffsetEnd': 9,
+                            'index': 2,
+                            'lemma': 'quick',
+                            'originalText': 'quick',
+                            'pos': 'JJ',
+                            'word': 'quick',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 10,
+                            'characterOffsetEnd': 15,
+                            'index': 3,
+                            'lemma': 'brown',
+                            'originalText': 'brown',
+                            'pos': 'JJ',
+                            'word': 'brown',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 16,
+                            'characterOffsetEnd': 19,
+                            'index': 4,
+                            'lemma': 'fox',
+                            'originalText': 'fox',
+                            'pos': 'NN',
+                            'word': 'fox',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 20,
+                            'characterOffsetEnd': 25,
+                            'index': 5,
+                            'lemma': 'jump',
+                            'originalText': 'jumps',
+                            'pos': 'VBZ',
+                            'word': 'jumps',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 26,
+                            'characterOffsetEnd': 30,
+                            'index': 6,
+                            'lemma': 'over',
+                            'originalText': 'over',
+                            'pos': 'IN',
+                            'word': 'over',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 31,
+                            'characterOffsetEnd': 34,
+                            'index': 7,
+                            'lemma': 'the',
+                            'originalText': 'the',
+                            'pos': 'DT',
+                            'word': 'the',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 35,
+                            'characterOffsetEnd': 39,
+                            'index': 8,
+                            'lemma': 'lazy',
+                            'originalText': 'lazy',
+                            'pos': 'JJ',
+                            'word': 'lazy',
+                        },
+                        {
+                            'after': '',
+                            'before': ' ',
+                            'characterOffsetBegin': 40,
+                            'characterOffsetEnd': 43,
+                            'index': 9,
+                            'lemma': 'dog',
+                            'originalText': 'dog',
+                            'pos': 'NN',
+                            'word': 'dog',
+                        },
+                    ],
+                }
+            ]
+        }
+
+        corenlp_parser.api_call = MagicMock(return_value=api_return_value)
+
+        input_string = "The quick brown fox jumps over the lazy dog".split()
+        expected_output = Tree(
+            'ROOT',
+            [
+                Tree(
+                    'NP',
+                    [
+                        Tree(
+                            'NP',
+                            [
+                                Tree('DT', ['The']),
+                                Tree('JJ', ['quick']),
+                                Tree('JJ', ['brown']),
+                                Tree('NN', ['fox']),
+                            ],
+                        ),
+                        Tree(
+                            'NP',
+                            [
+                                Tree('NP', [Tree('NNS', ['jumps'])]),
+                                Tree(
+                                    'PP',
+                                    [
+                                        Tree('IN', ['over']),
+                                        Tree(
+                                            'NP',
+                                            [
+                                                Tree('DT', ['the']),
+                                                Tree('JJ', ['lazy']),
+                                                Tree('NN', ['dog']),
+                                            ],
+                                        ),
+                                    ],
+                                ),
+                            ],
+                        ),
+                    ],
+                )
+            ],
+        )
+
+        parsed_data = next(corenlp_parser.parse(input_string))
+
+        corenlp_parser.api_call.assert_called_once_with(
+            "The quick brown fox jumps over the lazy dog",
+            properties={'ssplit.eolonly': 'true'},
+        )
+        self.assertEqual(expected_output, parsed_data)
+
+    def test_dependency_parser(self):
+        corenlp_parser = corenlp.CoreNLPDependencyParser()
+
+        api_return_value = {
+            'sentences': [
+                {
+                    'basicDependencies': [
+                        {
+                            'dep': 'ROOT',
+                            'dependent': 5,
+                            'dependentGloss': 'jumps',
+                            'governor': 0,
+                            'governorGloss': 'ROOT',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 1,
+                            'dependentGloss': 'The',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 2,
+                            'dependentGloss': 'quick',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 3,
+                            'dependentGloss': 'brown',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'nsubj',
+                            'dependent': 4,
+                            'dependentGloss': 'fox',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                        {
+                            'dep': 'case',
+                            'dependent': 6,
+                            'dependentGloss': 'over',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 7,
+                            'dependentGloss': 'the',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 8,
+                            'dependentGloss': 'lazy',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'nmod',
+                            'dependent': 9,
+                            'dependentGloss': 'dog',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                    ],
+                    'enhancedDependencies': [
+                        {
+                            'dep': 'ROOT',
+                            'dependent': 5,
+                            'dependentGloss': 'jumps',
+                            'governor': 0,
+                            'governorGloss': 'ROOT',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 1,
+                            'dependentGloss': 'The',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 2,
+                            'dependentGloss': 'quick',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 3,
+                            'dependentGloss': 'brown',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'nsubj',
+                            'dependent': 4,
+                            'dependentGloss': 'fox',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                        {
+                            'dep': 'case',
+                            'dependent': 6,
+                            'dependentGloss': 'over',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 7,
+                            'dependentGloss': 'the',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 8,
+                            'dependentGloss': 'lazy',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'nmod:over',
+                            'dependent': 9,
+                            'dependentGloss': 'dog',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                    ],
+                    'enhancedPlusPlusDependencies': [
+                        {
+                            'dep': 'ROOT',
+                            'dependent': 5,
+                            'dependentGloss': 'jumps',
+                            'governor': 0,
+                            'governorGloss': 'ROOT',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 1,
+                            'dependentGloss': 'The',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 2,
+                            'dependentGloss': 'quick',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 3,
+                            'dependentGloss': 'brown',
+                            'governor': 4,
+                            'governorGloss': 'fox',
+                        },
+                        {
+                            'dep': 'nsubj',
+                            'dependent': 4,
+                            'dependentGloss': 'fox',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                        {
+                            'dep': 'case',
+                            'dependent': 6,
+                            'dependentGloss': 'over',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'det',
+                            'dependent': 7,
+                            'dependentGloss': 'the',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'amod',
+                            'dependent': 8,
+                            'dependentGloss': 'lazy',
+                            'governor': 9,
+                            'governorGloss': 'dog',
+                        },
+                        {
+                            'dep': 'nmod:over',
+                            'dependent': 9,
+                            'dependentGloss': 'dog',
+                            'governor': 5,
+                            'governorGloss': 'jumps',
+                        },
+                    ],
+                    'index': 0,
+                    'tokens': [
+                        {
+                            'after': ' ',
+                            'before': '',
+                            'characterOffsetBegin': 0,
+                            'characterOffsetEnd': 3,
+                            'index': 1,
+                            'lemma': 'the',
+                            'originalText': 'The',
+                            'pos': 'DT',
+                            'word': 'The',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 4,
+                            'characterOffsetEnd': 9,
+                            'index': 2,
+                            'lemma': 'quick',
+                            'originalText': 'quick',
+                            'pos': 'JJ',
+                            'word': 'quick',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 10,
+                            'characterOffsetEnd': 15,
+                            'index': 3,
+                            'lemma': 'brown',
+                            'originalText': 'brown',
+                            'pos': 'JJ',
+                            'word': 'brown',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 16,
+                            'characterOffsetEnd': 19,
+                            'index': 4,
+                            'lemma': 'fox',
+                            'originalText': 'fox',
+                            'pos': 'NN',
+                            'word': 'fox',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 20,
+                            'characterOffsetEnd': 25,
+                            'index': 5,
+                            'lemma': 'jump',
+                            'originalText': 'jumps',
+                            'pos': 'VBZ',
+                            'word': 'jumps',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 26,
+                            'characterOffsetEnd': 30,
+                            'index': 6,
+                            'lemma': 'over',
+                            'originalText': 'over',
+                            'pos': 'IN',
+                            'word': 'over',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 31,
+                            'characterOffsetEnd': 34,
+                            'index': 7,
+                            'lemma': 'the',
+                            'originalText': 'the',
+                            'pos': 'DT',
+                            'word': 'the',
+                        },
+                        {
+                            'after': ' ',
+                            'before': ' ',
+                            'characterOffsetBegin': 35,
+                            'characterOffsetEnd': 39,
+                            'index': 8,
+                            'lemma': 'lazy',
+                            'originalText': 'lazy',
+                            'pos': 'JJ',
+                            'word': 'lazy',
+                        },
+                        {
+                            'after': '',
+                            'before': ' ',
+                            'characterOffsetBegin': 40,
+                            'characterOffsetEnd': 43,
+                            'index': 9,
+                            'lemma': 'dog',
+                            'originalText': 'dog',
+                            'pos': 'NN',
+                            'word': 'dog',
+                        },
+                    ],
+                }
+            ]
+        }
+
+        corenlp_parser.api_call = MagicMock(return_value=api_return_value)
+
+        input_string = "The quick brown fox jumps over the lazy dog".split()
+        expected_output = Tree(
+            'jumps',
+            [
+                Tree('fox', ['The', 'quick', 'brown']),
+                Tree('dog', ['over', 'the', 'lazy']),
+            ],
+        )
+
+        parsed_data = next(corenlp_parser.parse(input_string))
+
+        corenlp_parser.api_call.assert_called_once_with(
+            "The quick brown fox jumps over the lazy dog",
+            properties={'ssplit.eolonly': 'true'},
+        )
+        self.assertEqual(expected_output, parsed_data.tree())
diff --git a/nlp_resource_data/nltk/test/unit/test_corpora.py b/nlp_resource_data/nltk/test/unit/test_corpora.py

new file mode 100644 (file)

index 0000000..8b105b8
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_corpora.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+import unittest
+
+from nltk.corpus import (
+    sinica_treebank,
+    conll2007,
+    indian,
+    cess_cat,
+    cess_esp,
+    floresta,
+    ptb,
+    udhr,
+)  # mwa_ppdb
+
+from nltk.tree import Tree
+from nltk.test.unit.utils import skipIf
+
+
+class TestUdhr(unittest.TestCase):
+    def test_words(self):
+        for name in udhr.fileids():
+            try:
+                words = list(udhr.words(name))
+            except AssertionError:
+                print(name)
+                raise
+            self.assertTrue(words)
+
+    def test_raw_unicode(self):
+        for name in udhr.fileids():
+            txt = udhr.raw(name)
+            assert not isinstance(txt, bytes), name
+
+
+class TestIndian(unittest.TestCase):
+    def test_words(self):
+        words = indian.words()[:3]
+        self.assertEqual(words, ['মহিষের', 'সন্তান', ':'])
+
+    def test_tagged_words(self):
+        tagged_words = indian.tagged_words()[:3]
+        self.assertEqual(
+            tagged_words, [('মহিষের', 'NN'), ('সন্তান', 'NN'), (':', 'SYM')]
+        )
+
+
+class TestCess(unittest.TestCase):
+    def test_catalan(self):
+        words = cess_cat.words()[:15]
+        txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial"
+        self.assertEqual(words, txt.split())
+        self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
+
+    def test_esp(self):
+        words = cess_esp.words()[:15]
+        txt = "El grupo estatal Electricité_de_France -Fpa- EDF -Fpt- anunció hoy , jueves , la compra del"
+        self.assertEqual(words, txt.split())
+        self.assertEqual(cess_esp.words()[115], "años")
+
+
+class TestFloresta(unittest.TestCase):
+    def test_words(self):
+        words = floresta.words()[:10]
+        txt = "Um revivalismo refrescante O 7_e_Meio é um ex-libris de a"
+        self.assertEqual(words, txt.split())
+
+
+class TestSinicaTreebank(unittest.TestCase):
+    def test_sents(self):
+        first_3_sents = sinica_treebank.sents()[:3]
+        self.assertEqual(
+            first_3_sents, [['一'], ['友情'], ['嘉珍', '和', '我', '住在', '同一條', '巷子']]
+        )
+
+    def test_parsed_sents(self):
+        parsed_sents = sinica_treebank.parsed_sents()[25]
+        self.assertEqual(
+            parsed_sents,
+            Tree(
+                'S',
+                [
+                    Tree('NP', [Tree('Nba', ['嘉珍'])]),
+                    Tree('V‧地', [Tree('VA11', ['不停']), Tree('DE', ['的'])]),
+                    Tree('VA4', ['哭泣']),
+                ],
+            ),
+        )
+
+
+class TestCoNLL2007(unittest.TestCase):
+    # Reading the CoNLL 2007 Dependency Treebanks
+
+    def test_sents(self):
+        sents = conll2007.sents('esp.train')[0]
+        self.assertEqual(
+            sents[:6], ['El', 'aumento', 'del', 'índice', 'de', 'desempleo']
+        )
+
+    def test_parsed_sents(self):
+
+        parsed_sents = conll2007.parsed_sents('esp.train')[0]
+
+        self.assertEqual(
+            parsed_sents.tree(),
+            Tree(
+                'fortaleció',
+                [
+                    Tree(
+                        'aumento',
+                        [
+                            'El',
+                            Tree(
+                                'del',
+                                [
+                                    Tree(
+                                        'índice',
+                                        [
+                                            Tree(
+                                                'de',
+                                                [Tree('desempleo', ['estadounidense'])],
+                                            )
+                                        ],
+                                    )
+                                ],
+                            ),
+                        ],
+                    ),
+                    'hoy',
+                    'considerablemente',
+                    Tree(
+                        'al',
+                        [
+                            Tree(
+                                'euro',
+                                [
+                                    Tree(
+                                        'cotizaba',
+                                        [
+                                            ',',
+                                            'que',
+                                            Tree('a', [Tree('15.35', ['las', 'GMT'])]),
+                                            'se',
+                                            Tree(
+                                                'en',
+                                                [
+                                                    Tree(
+                                                        'mercado',
+                                                        [
+                                                            'el',
+                                                            Tree('de', ['divisas']),
+                                                            Tree('de', ['Fráncfort']),
+                                                        ],
+                                                    )
+                                                ],
+                                            ),
+                                            Tree('a', ['0,9452_dólares']),
+                                            Tree(
+                                                'frente_a',
+                                                [
+                                                    ',',
+                                                    Tree(
+                                                        '0,9349_dólares',
+                                                        [
+                                                            'los',
+                                                            Tree(
+                                                                'de',
+                                                                [
+                                                                    Tree(
+                                                                        'mañana',
+                                                                        ['esta'],
+                                                                    )
+                                                                ],
+                                                            ),
+                                                        ],
+                                                    ),
+                                                ],
+                                            ),
+                                        ],
+                                    )
+                                ],
+                            )
+                        ],
+                    ),
+                    '.',
+                ],
+            ),
+        )
+
+
+@skipIf(not ptb.fileids(), "A full installation of the Penn Treebank is not available")
+class TestPTB(unittest.TestCase):
+    def test_fileids(self):
+        self.assertEqual(
+            ptb.fileids()[:4],
+            [
+                'BROWN/CF/CF01.MRG',
+                'BROWN/CF/CF02.MRG',
+                'BROWN/CF/CF03.MRG',
+                'BROWN/CF/CF04.MRG',
+            ],
+        )
+
+    def test_words(self):
+        self.assertEqual(
+            ptb.words('WSJ/00/WSJ_0003.MRG')[:7],
+            ['A', 'form', 'of', 'asbestos', 'once', 'used', '*'],
+        )
+
+    def test_tagged_words(self):
+        self.assertEqual(
+            ptb.tagged_words('WSJ/00/WSJ_0003.MRG')[:3],
+            [('A', 'DT'), ('form', 'NN'), ('of', 'IN')],
+        )
+
+    def test_categories(self):
+        self.assertEqual(
+            ptb.categories(),
+            [
+                'adventure',
+                'belles_lettres',
+                'fiction',
+                'humor',
+                'lore',
+                'mystery',
+                'news',
+                'romance',
+                'science_fiction',
+            ],
+        )
+
+    def test_news_fileids(self):
+        self.assertEqual(
+            ptb.fileids('news')[:3],
+            ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG'],
+        )
+
+    def test_category_words(self):
+        self.assertEqual(
+            ptb.words(categories=['humor', 'fiction'])[:6],
+            ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back'],
+        )
+
+
+@unittest.skip("Skipping test for mwa_ppdb.")
+class TestMWAPPDB(unittest.TestCase):
+    def test_fileids(self):
+        self.assertEqual(
+            mwa_ppdb.fileids(), ['ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs']
+        )
+
+    def test_entries(self):
+        self.assertEqual(
+            mwa_ppdb.entries()[:10],
+            [
+                ('10/17/01', '17/10/2001'),
+                ('102,70', '102.70'),
+                ('13,53', '13.53'),
+                ('3.2.5.3.2.1', '3.2.5.3.2.1.'),
+                ('53,76', '53.76'),
+                ('6.9.5', '6.9.5.'),
+                ('7.7.6.3', '7.7.6.3.'),
+                ('76,20', '76.20'),
+                ('79,85', '79.85'),
+                ('93,65', '93.65'),
+            ],
+        )
+
+
+# unload corpora
+from nltk.corpus import teardown_module
diff --git a/nlp_resource_data/nltk/test/unit/test_corpus_views.py b/nlp_resource_data/nltk/test/unit/test_corpus_views.py

new file mode 100644 (file)

index 0000000..29d8a3c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_corpus_views.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Corpus View Regression Tests
+"""
+import unittest
+import nltk.data
+from nltk.corpus.reader.util import (
+    StreamBackedCorpusView,
+    read_whitespace_block,
+    read_line_block,
+)
+
+
+class TestCorpusViews(unittest.TestCase):
+
+    linetok = nltk.LineTokenizer(blanklines='keep')
+    names = [
+        'corpora/inaugural/README',  # A very short file (160 chars)
+        'corpora/inaugural/1793-Washington.txt',  # A relatively short file (791 chars)
+        'corpora/inaugural/1909-Taft.txt',  # A longer file (32k chars)
+    ]
+
+    def data(self):
+        for name in self.names:
+            f = nltk.data.find(name)
+            with f.open() as fp:
+                file_data = fp.read().decode('utf8')
+            yield f, file_data
+
+    def test_correct_values(self):
+        # Check that corpus views produce the correct sequence of values.
+
+        for f, file_data in self.data():
+            v = StreamBackedCorpusView(f, read_whitespace_block)
+            self.assertEqual(list(v), file_data.split())
+
+            v = StreamBackedCorpusView(f, read_line_block)
+            self.assertEqual(list(v), self.linetok.tokenize(file_data))
+
+    def test_correct_length(self):
+        # Check that the corpus views report the correct lengths:
+
+        for f, file_data in self.data():
+            v = StreamBackedCorpusView(f, read_whitespace_block)
+            self.assertEqual(len(v), len(file_data.split()))
+
+            v = StreamBackedCorpusView(f, read_line_block)
+            self.assertEqual(len(v), len(self.linetok.tokenize(file_data)))
diff --git a/nlp_resource_data/nltk/test/unit/test_data.py b/nlp_resource_data/nltk/test/unit/test_data.py

new file mode 100644 (file)

index 0000000..b586155
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_data.py
@@ -0,0 +1,22 @@
+import unittest
+import nltk.data
+from nose.tools import assert_raises
+
+
+class TestData(unittest.TestCase):
+    def test_find_raises_exception(self):
+
+        with assert_raises(LookupError) as context:
+            nltk.data.find('no_such_resource/foo')
+
+        assert type(context.exception) == LookupError, 'Unexpected exception raised'
+
+    def test_find_raises_exception_with_full_resource_name(self):
+        no_such_thing = 'no_such_thing/bar'
+
+        with assert_raises(LookupError) as context:
+            nltk.data.find(no_such_thing)
+
+        assert no_such_thing in str(
+            context.exception
+        ), 'Exception message does not include full resource name'
diff --git a/nlp_resource_data/nltk/test/unit/test_disagreement.py b/nlp_resource_data/nltk/test/unit/test_disagreement.py

new file mode 100644 (file)

index 0000000..6a88868
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_disagreement.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+import unittest
+
+from nltk.metrics.agreement import AnnotationTask
+
+class TestDisagreement(unittest.TestCase):
+
+    '''
+    Class containing unit tests for nltk.metrics.agreement.Disagreement.
+    '''
+
+    def test_easy(self):
+        '''
+        Simple test, based on
+        https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf.
+        '''
+        data = [('coder1', 'dress1', 'YES'),
+                ('coder2', 'dress1', 'NO'),
+                ('coder3', 'dress1', 'NO'),
+                ('coder1', 'dress2', 'YES'),
+                ('coder2', 'dress2', 'NO'),
+                ('coder3', 'dress3', 'NO'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
+
+    def test_easy2(self):
+        '''
+        Same simple test with 1 rating removed.
+        Removal of that rating should not matter: K-Apha ignores items with
+        only 1 rating.
+        '''
+        data = [('coder1', 'dress1', 'YES'),
+                ('coder2', 'dress1', 'NO'),
+                ('coder3', 'dress1', 'NO'),
+                ('coder1', 'dress2', 'YES'),
+                ('coder2', 'dress2', 'NO'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
+
+    def test_advanced(self):
+        '''
+        More advanced test, based on 
+        http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf
+        '''
+        data = [('A', '1', '1'),
+                ('B', '1', '1'),
+                ('D', '1', '1'),
+                ('A', '2', '2'),
+                ('B', '2', '2'),
+                ('C', '2', '3'),
+                ('D', '2', '2'),
+                ('A', '3', '3'),
+                ('B', '3', '3'),
+                ('C', '3', '3'),
+                ('D', '3', '3'),
+                ('A', '4', '3'),
+                ('B', '4', '3'),
+                ('C', '4', '3'),
+                ('D', '4', '3'),
+                ('A', '5', '2'),
+                ('B', '5', '2'),
+                ('C', '5', '2'),
+                ('D', '5', '2'),
+                ('A', '6', '1'),
+                ('B', '6', '2'),
+                ('C', '6', '3'),
+                ('D', '6', '4'),
+                ('A', '7', '4'),
+                ('B', '7', '4'),
+                ('C', '7', '4'),
+                ('D', '7', '4'),
+                ('A', '8', '1'),
+                ('B', '8', '1'),
+                ('C', '8', '2'),
+                ('D', '8', '1'),
+                ('A', '9', '2'),
+                ('B', '9', '2'),
+                ('C', '9', '2'),
+                ('D', '9', '2'),
+                ('B', '10', '5'),
+                ('C', '10', '5'),
+                ('D', '10', '5'),
+                ('C', '11', '1'),
+                ('D', '11', '1'),
+                ('C', '12', '3'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
+
+    def test_advanced2(self):
+        '''
+        Same more advanced example, but with 1 rating removed.
+        Again, removal of that 1 rating shoudl not matter.
+        '''
+        data = [('A', '1', '1'),
+                ('B', '1', '1'),
+                ('D', '1', '1'),
+                ('A', '2', '2'),
+                ('B', '2', '2'),
+                ('C', '2', '3'),
+                ('D', '2', '2'),
+                ('A', '3', '3'),
+                ('B', '3', '3'),
+                ('C', '3', '3'),
+                ('D', '3', '3'),
+                ('A', '4', '3'),
+                ('B', '4', '3'),
+                ('C', '4', '3'),
+                ('D', '4', '3'),
+                ('A', '5', '2'),
+                ('B', '5', '2'),
+                ('C', '5', '2'),
+                ('D', '5', '2'),
+                ('A', '6', '1'),
+                ('B', '6', '2'),
+                ('C', '6', '3'),
+                ('D', '6', '4'),
+                ('A', '7', '4'),
+                ('B', '7', '4'),
+                ('C', '7', '4'),
+                ('D', '7', '4'),
+                ('A', '8', '1'),
+                ('B', '8', '1'),
+                ('C', '8', '2'),
+                ('D', '8', '1'),
+                ('A', '9', '2'),
+                ('B', '9', '2'),
+                ('C', '9', '2'),
+                ('D', '9', '2'),
+                ('B', '10', '5'),
+                ('C', '10', '5'),
+                ('D', '10', '5'),
+                ('C', '11', '1'),
+                ('D', '11', '1'),
+                ('C', '12', '3'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
+
diff --git a/nlp_resource_data/nltk/test/unit/test_freqdist.py b/nlp_resource_data/nltk/test/unit/test_freqdist.py

new file mode 100644 (file)

index 0000000..a73fd02
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_freqdist.py
@@ -0,0 +1,16 @@
+import unittest
+import nltk
+
+
+class TestFreqDist(unittest.TestCase):
+
+    def test_iterating_returns_an_iterator_ordered_by_frequency(self):
+
+        samples = ['one', 'two', 'two']
+
+        distribution = nltk.FreqDist(samples)
+
+        most_frequent, less_frequent = [entry for entry in distribution]
+
+        self.assertEqual(most_frequent, 'two')
+        self.assertEqual(less_frequent, 'one')
diff --git a/nlp_resource_data/nltk/test/unit/test_hmm.py b/nlp_resource_data/nltk/test/unit/test_hmm.py

new file mode 100644 (file)

index 0000000..b9770ca
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_hmm.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+from nltk.tag import hmm
+
+
+def _wikipedia_example_hmm():
+    # Example from wikipedia
+    # (http://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm)
+
+    states = ['rain', 'no rain']
+    symbols = ['umbrella', 'no umbrella']
+
+    A = [[0.7, 0.3], [0.3, 0.7]]  # transition probabilities
+    B = [[0.9, 0.1], [0.2, 0.8]]  # emission probabilities
+    pi = [0.5, 0.5]  # initial probabilities
+
+    seq = ['umbrella', 'umbrella', 'no umbrella', 'umbrella', 'umbrella']
+    seq = list(zip(seq, [None] * len(seq)))
+
+    model = hmm._create_hmm_tagger(states, symbols, A, B, pi)
+    return model, states, symbols, seq
+
+
+def test_forward_probability():
+    from numpy.testing import assert_array_almost_equal
+
+    # example from p. 385, Huang et al
+    model, states, symbols = hmm._market_hmm_example()
+    seq = [('up', None), ('up', None)]
+    expected = [[0.35, 0.02, 0.09], [0.1792, 0.0085, 0.0357]]
+
+    fp = 2 ** model._forward_probability(seq)
+
+    assert_array_almost_equal(fp, expected)
+
+
+def test_forward_probability2():
+    from numpy.testing import assert_array_almost_equal
+
+    model, states, symbols, seq = _wikipedia_example_hmm()
+    fp = 2 ** model._forward_probability(seq)
+
+    # examples in wikipedia are normalized
+    fp = (fp.T / fp.sum(axis=1)).T
+
+    wikipedia_results = [
+        [0.8182, 0.1818],
+        [0.8834, 0.1166],
+        [0.1907, 0.8093],
+        [0.7308, 0.2692],
+        [0.8673, 0.1327],
+    ]
+
+    assert_array_almost_equal(wikipedia_results, fp, 4)
+
+
+def test_backward_probability():
+    from numpy.testing import assert_array_almost_equal
+
+    model, states, symbols, seq = _wikipedia_example_hmm()
+
+    bp = 2 ** model._backward_probability(seq)
+    # examples in wikipedia are normalized
+
+    bp = (bp.T / bp.sum(axis=1)).T
+
+    wikipedia_results = [
+        # Forward-backward algorithm doesn't need b0_5,
+        # so .backward_probability doesn't compute it.
+        # [0.6469, 0.3531],
+        [0.5923, 0.4077],
+        [0.3763, 0.6237],
+        [0.6533, 0.3467],
+        [0.6273, 0.3727],
+        [0.5, 0.5],
+    ]
+
+    assert_array_almost_equal(wikipedia_results, bp, 4)
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("numpy is required for nltk.test.test_hmm")
diff --git a/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py b/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py

new file mode 100644 (file)

index 0000000..6714d9c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Regression tests for `json2csv()` and `json2csv_entities()` in Twitter
+package.
+
+"""
+
+import os
+import unittest
+from tempfile import TemporaryDirectory
+
+from nltk.corpus import twitter_samples
+from nltk.twitter.common import json2csv, json2csv_entities
+
+
+def are_files_identical(filename1, filename2, debug=False):
+    """
+    Compare two files, ignoring carriage returns.
+    """
+    with open(filename1, "rb") as fileA:
+        with open(filename2, "rb") as fileB:
+            result = True
+            for lineA, lineB in zip(
+                sorted(fileA.readlines()), sorted(fileB.readlines())
+            ):
+                if lineA.strip() != lineB.strip():
+                    if debug:
+                        print(
+                            "Error while comparing files. "
+                            + "First difference at line below."
+                        )
+                        print("=> Output file line: {0}".format(lineA))
+                        print("=> Refer. file line: {0}".format(lineB))
+                    result = False
+                    break
+            return result
+
+
+class TestJSON2CSV(unittest.TestCase):
+    def setUp(self):
+        with open(twitter_samples.abspath("tweets.20150430-223406.json")) as infile:
+            self.infile = [next(infile) for x in range(100)]
+        infile.close()
+        self.msg = "Test and reference files are not the same"
+        self.subdir = os.path.join(os.path.dirname(__file__), 'files')
+
+    def tearDown(self):
+        return
+
+    def test_textoutput(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.text.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
+            json2csv(self.infile, outfn, ['text'], gzip_compress=False)
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_metadata(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref')
+        fields = [
+            'created_at',
+            'favorite_count',
+            'id',
+            'in_reply_to_status_id',
+            'in_reply_to_user_id',
+            'retweet_count',
+            'retweeted',
+            'text',
+            'truncated',
+            'user.id',
+        ]
+
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv')
+            json2csv(self.infile, outfn, fields, gzip_compress=False)
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_user_metadata(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref')
+        fields = ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']
+
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv')
+            json2csv(self.infile, outfn, fields, gzip_compress=False)
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_hashtag(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.hashtag.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.hashtag.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'text'],
+                'hashtags',
+                ['text'],
+                gzip_compress=False,
+            )
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_usermention(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.usermention.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.usermention.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'text'],
+                'user_mentions',
+                ['id', 'screen_name'],
+                gzip_compress=False,
+            )
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_media(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.media.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.media.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id'],
+                'media',
+                ['media_url', 'url'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_url(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.url.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.url.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id'],
+                'urls',
+                ['url', 'expanded_url'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_userurl(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.userurl.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.userurl.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'screen_name'],
+                'user.urls',
+                ['url', 'expanded_url'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_place(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.place.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.place.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'text'],
+                'place',
+                ['name', 'country'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_place_boundingbox(self):
+        ref_fn = os.path.join(
+            self.subdir, 'tweets.20150430-223406.placeboundingbox.csv.ref'
+        )
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.placeboundingbox.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'name'],
+                'place.bounding_box',
+                ['coordinates'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_retweet_original_tweet(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.retweet.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id'],
+                'retweeted_status',
+                [
+                    'created_at',
+                    'favorite_count',
+                    'id',
+                    'in_reply_to_status_id',
+                    'in_reply_to_user_id',
+                    'retweet_count',
+                    'text',
+                    'truncated',
+                    'user.id',
+                ],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_file_is_wrong(self):
+        """
+        Sanity check that file comparison is not giving false positives.
+        """
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
+            json2csv(self.infile, outfn, ['text'], gzip_compress=False)
+            self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/nlp_resource_data/nltk/test/unit/test_json_serialization.py b/nlp_resource_data/nltk/test/unit/test_json_serialization.py

new file mode 100644 (file)

index 0000000..4667fbf
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_json_serialization.py
@@ -0,0 +1,87 @@
+import unittest
+
+from nltk.corpus import brown
+from nltk.jsontags import JSONTaggedDecoder, JSONTaggedEncoder
+from nltk.tag import DefaultTagger, RegexpTagger, AffixTagger
+from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger, NgramTagger
+from nltk.tag import PerceptronTagger
+from nltk.tag import BrillTaggerTrainer, BrillTagger
+from nltk.tag.brill import nltkdemo18
+
+    
+class TestJSONSerialization(unittest.TestCase):
+    def setUp(self):
+        self.corpus = brown.tagged_sents()[:35]
+        self.decoder = JSONTaggedDecoder()
+        self.encoder = JSONTaggedEncoder()
+        self.default_tagger = DefaultTagger("NN")
+
+    def test_default_tagger(self):
+        encoded = self.encoder.encode(self.default_tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(self.default_tagger), repr(decoded))
+        self.assertEqual(self.default_tagger._tag, decoded._tag)
+
+    def test_regexp_tagger(self):
+        tagger = RegexpTagger([(r".*", "NN")], backoff=self.default_tagger)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(tagger), repr(decoded))
+        self.assertEqual(repr(tagger.backoff), repr(decoded.backoff))
+        self.assertEqual(tagger._regexps, decoded._regexps)
+
+    def test_affix_tagger(self):
+        tagger = AffixTagger(self.corpus, backoff=self.default_tagger)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(tagger), repr(decoded))
+        self.assertEqual(repr(tagger.backoff), repr(decoded.backoff))
+        self.assertEqual(tagger._affix_length, decoded._affix_length)
+        self.assertEqual(tagger._min_word_length, decoded._min_word_length)
+        self.assertEqual(tagger._context_to_tag, decoded._context_to_tag)
+
+    def test_ngram_taggers(self):
+        unitagger = UnigramTagger(self.corpus, backoff=self.default_tagger)
+        bitagger = BigramTagger(self.corpus, backoff=unitagger)
+        tritagger = TrigramTagger(self.corpus, backoff=bitagger)
+        ntagger = NgramTagger(4, self.corpus, backoff=tritagger)
+
+        encoded = self.encoder.encode(ntagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(ntagger), repr(decoded))
+        self.assertEqual(repr(tritagger), repr(decoded.backoff))
+        self.assertEqual(repr(bitagger), repr(decoded.backoff.backoff))
+        self.assertEqual(repr(unitagger), repr(decoded.backoff.backoff.backoff))
+        self.assertEqual(repr(self.default_tagger), 
+                         repr(decoded.backoff.backoff.backoff.backoff))
+
+    def test_perceptron_tagger(self):
+        tagger = PerceptronTagger(load=False)
+        tagger.train(self.corpus)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(tagger.model.weights, decoded.model.weights)
+        self.assertEqual(tagger.tagdict, decoded.tagdict)
+        self.assertEqual(tagger.classes, decoded.classes)
+
+    def test_brill_tagger(self):
+        trainer = BrillTaggerTrainer(self.default_tagger, nltkdemo18(),
+                                     deterministic=True)
+        tagger = trainer.train(self.corpus, max_rules=30)
+
+        encoded = self.encoder.encode(tagger)
+        decoded = self.decoder.decode(encoded)
+
+        self.assertEqual(repr(tagger._initial_tagger),
+                         repr(decoded._initial_tagger))
+        self.assertEqual(tagger._rules, decoded._rules)
+        self.assertEqual(tagger._training_stats, decoded._training_stats)
+
diff --git a/nlp_resource_data/nltk/test/unit/test_naivebayes.py b/nlp_resource_data/nltk/test/unit/test_naivebayes.py

new file mode 100644 (file)

index 0000000..ac9ff9b
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_naivebayes.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+
+import unittest
+from nltk.classify.naivebayes import NaiveBayesClassifier
+
+
+class NaiveBayesClassifierTest(unittest.TestCase):
+    def test_simple(self):
+        training_features = [
+            ({'nice': True, 'good': True}, 'positive'),
+            ({'bad': True, 'mean': True}, 'negative'),
+        ]
+
+        classifier = NaiveBayesClassifier.train(training_features)
+
+        result = classifier.prob_classify({'nice': True})
+        self.assertTrue(result.prob('positive') > result.prob('negative'))
+        self.assertEqual(result.max(), 'positive')
+
+        result = classifier.prob_classify({'bad': True})
+        self.assertTrue(result.prob('positive') < result.prob('negative'))
+        self.assertEqual(result.max(), 'negative')
diff --git a/nlp_resource_data/nltk/test/unit/test_nombank.py b/nlp_resource_data/nltk/test/unit/test_nombank.py

new file mode 100644 (file)

index 0000000..8f2d9d8
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_nombank.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.corpus.nombank
+"""
+
+import unittest
+
+from nltk.corpus import nombank
+# Load the nombank once.
+nombank.nouns()
+
+class NombankDemo(unittest.TestCase):
+    def test_numbers(self):
+        # No. of instances.
+        self.assertEqual(len(nombank.instances()), 114574)
+        # No. of rolesets
+        self.assertEqual(len(nombank.rolesets()), 5577)
+        # No. of nouns.
+        self.assertEqual(len(nombank.nouns()), 4704)
+
+
+    def test_instance(self):
+        self.assertEqual(nombank.instances()[0].roleset, 'perc-sign.01')
+
+    def test_framefiles_fileids(self):
+        self.assertEqual(len(nombank.fileids()), 4705)
+        self.assertTrue(all(fileid.endswith('.xml') for fileid in nombank.fileids()))
diff --git a/nlp_resource_data/nltk/test/unit/test_pl196x.py b/nlp_resource_data/nltk/test/unit/test_pl196x.py

new file mode 100644 (file)

index 0000000..d90d94c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_pl196x.py
@@ -0,0 +1,14 @@
+import unittest
+
+import nltk
+from nltk.corpus.reader import pl196x
+
+
+class TestCorpusViews(unittest.TestCase):
+
+    def test_corpus_reader(self):
+        pl196x_dir = nltk.data.find('corpora/pl196x')
+        pl = pl196x.Pl196xCorpusReader(pl196x_dir, r'.*\.xml',
+                                       textids='textids.txt',
+                                       cat_file='cats.txt')
+        pl.tagged_words(fileids=pl.fileids(), categories='cats.txt')
diff --git a/nlp_resource_data/nltk/test/unit/test_pos_tag.py b/nlp_resource_data/nltk/test/unit/test_pos_tag.py

new file mode 100644 (file)

index 0000000..0aced19
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_pos_tag.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for nltk.pos_tag
+"""
+
+
+import unittest
+
+from nltk import word_tokenize, pos_tag
+
+
+class TestPosTag(unittest.TestCase):
+    def test_pos_tag_eng(self):
+        text = "John's big idea isn't all that bad."
+        expected_tagged = [
+            ('John', 'NNP'),
+            ("'s", 'POS'),
+            ('big', 'JJ'),
+            ('idea', 'NN'),
+            ('is', 'VBZ'),
+            ("n't", 'RB'),
+            ('all', 'PDT'),
+            ('that', 'DT'),
+            ('bad', 'JJ'),
+            ('.', '.'),
+        ]
+        assert pos_tag(word_tokenize(text)) == expected_tagged
+
+    def test_pos_tag_eng_universal(self):
+        text = "John's big idea isn't all that bad."
+        expected_tagged = [
+            ('John', 'NOUN'),
+            ("'s", 'PRT'),
+            ('big', 'ADJ'),
+            ('idea', 'NOUN'),
+            ('is', 'VERB'),
+            ("n't", 'ADV'),
+            ('all', 'DET'),
+            ('that', 'DET'),
+            ('bad', 'ADJ'),
+            ('.', '.'),
+        ]
+        assert pos_tag(word_tokenize(text), tagset='universal') == expected_tagged
+
+    def test_pos_tag_rus(self):
+        text = u"Илья оторопел и дважды перечитал бумажку."
+        expected_tagged = [
+            ('Илья', 'S'),
+            ('оторопел', 'V'),
+            ('и', 'CONJ'),
+            ('дважды', 'ADV'),
+            ('перечитал', 'V'),
+            ('бумажку', 'S'),
+            ('.', 'NONLEX'),
+        ]
+        assert pos_tag(word_tokenize(text), lang='rus') == expected_tagged
+
+    def test_pos_tag_rus_universal(self):
+        text = u"Илья оторопел и дважды перечитал бумажку."
+        expected_tagged = [
+            ('Илья', 'NOUN'),
+            ('оторопел', 'VERB'),
+            ('и', 'CONJ'),
+            ('дважды', 'ADV'),
+            ('перечитал', 'VERB'),
+            ('бумажку', 'NOUN'),
+            ('.', '.'),
+        ]
+        assert (
+            pos_tag(word_tokenize(text), tagset='universal', lang='rus')
+            == expected_tagged
+        )
+
+    def test_pos_tag_unknown_lang(self):
+        text = u"모르겠 습니 다"
+        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang='kor')
+        # Test for default kwarg, `lang=None`
+        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang=None)
+
+    def test_unspecified_lang(self):
+        # Tries to force the lang='eng' option.
+        text = u"모르겠 습니 다"
+        expected_but_wrong = [('모르겠', 'JJ'), ('습니', 'NNP'), ('다', 'NN')]
+        assert pos_tag(word_tokenize(text)) == expected_but_wrong
diff --git a/nlp_resource_data/nltk/test/unit/test_rte_classify.py b/nlp_resource_data/nltk/test/unit/test_rte_classify.py

new file mode 100644 (file)

index 0000000..3ba2d06
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_rte_classify.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+
+from nltk.corpus import rte as rte_corpus
+from nltk.classify.rte_classify import RTEFeatureExtractor, rte_features, rte_classifier
+
+expected_from_rte_feature_extration = """
+alwayson        => True
+ne_hyp_extra    => 0
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 3
+word_overlap    => 3
+
+alwayson        => True
+ne_hyp_extra    => 0
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 2
+word_overlap    => 1
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 1
+word_overlap    => 2
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 6
+word_overlap    => 2
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 4
+word_overlap    => 0
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 3
+word_overlap    => 1
+"""
+
+
+class RTEClassifierTest(unittest.TestCase):
+    # Test the feature extraction method.
+    def test_rte_feature_extraction(self):
+        pairs = rte_corpus.pairs(['rte1_dev.xml'])[:6]
+        test_output = [
+            "%-15s => %s" % (key, rte_features(pair)[key])
+            for pair in pairs
+            for key in sorted(rte_features(pair))
+        ]
+        expected_output = expected_from_rte_feature_extration.strip().split('\n')
+        # Remove null strings.
+        expected_output = list(filter(None, expected_output))
+        self.assertEqual(test_output, expected_output)
+
+    # Test the RTEFeatureExtractor object.
+    def test_feature_extractor_object(self):
+        rtepair = rte_corpus.pairs(['rte3_dev.xml'])[33]
+        extractor = RTEFeatureExtractor(rtepair)
+        self.assertEqual(extractor.hyp_words, {'member', 'China', 'SCO.'})
+        self.assertEqual(extractor.overlap('word'), set())
+        self.assertEqual(extractor.overlap('ne'), {'China'})
+        self.assertEqual(extractor.hyp_extra('word'), {'member'})
+
+    # Test the RTE classifier training.
+    def test_rte_classification_without_megam(self):
+        clf = rte_classifier('IIS')
+        clf = rte_classifier('GIS')
+
+    @unittest.skip("Skipping tests with dependencies on MEGAM")
+    def test_rte_classification_with_megam(self):
+        nltk.config_megam('/usr/local/bin/megam')
+        clf = rte_classifier('megam')
+        clf = rte_classifier('BFGS')
diff --git a/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py b/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py

new file mode 100644 (file)

index 0000000..c5d1583
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+The following test performs a random series of reads, seeks, and
+tells, and checks that the results are consistent.
+"""
+import random
+import functools
+from io import BytesIO
+from nltk.corpus.reader import SeekableUnicodeStreamReader
+
+
+def check_reader(unicode_string, encoding, n=1000):
+    bytestr = unicode_string.encode(encoding)
+    strlen = len(unicode_string)
+    stream = BytesIO(bytestr)
+    reader = SeekableUnicodeStreamReader(stream, encoding)
+    # Find all character positions
+    chars = []
+    while True:
+        pos = reader.tell()
+        chars.append((pos, reader.read(1)))
+        if chars[-1][1] == '':
+            break
+    # Find all strings
+    strings = dict((pos, '') for (pos, c) in chars)
+    for pos1, char in chars:
+        for pos2, _ in chars:
+            if pos2 <= pos1:
+                strings[pos2] += char
+    while True:
+        op = random.choice('tsrr')
+        # Check our position?
+        if op == 't':  # tell
+            reader.tell()
+        # Perform a seek?
+        if op == 's':  # seek
+            new_pos = random.choice([p for (p, c) in chars])
+            reader.seek(new_pos)
+        # Perform a read?
+        if op == 'r':  # read
+            if random.random() < 0.3:
+                pos = reader.tell()
+            else:
+                pos = None
+            if random.random() < 0.2:
+                size = None
+            elif random.random() < 0.8:
+                size = random.randint(0, int(strlen / 6))
+            else:
+                size = random.randint(0, strlen + 20)
+            if random.random() < 0.8:
+                s = reader.read(size)
+            else:
+                s = reader.readline(size)
+            # check that everything's consistent
+            if pos is not None:
+                assert pos in strings
+                assert strings[pos].startswith(s)
+                n -= 1
+                if n == 0:
+                    return 'passed'
+
+
+# Call the randomized test function `check_reader` with a variety of
+# input strings and encodings.
+
+ENCODINGS = ['ascii', 'latin1', 'greek', 'hebrew', 'utf-16', 'utf-8']
+
+STRINGS = [
+    """
+    This is a test file.
+    It is fairly short.
+    """,
+    "This file can be encoded with latin1. \x83",
+    """\
+    This is a test file.
+    Here's a blank line:
+
+    And here's some unicode: \xee \u0123 \uffe3
+    """,
+    """\
+    This is a test file.
+    Unicode characters: \xf3 \u2222 \u3333\u4444 \u5555
+    """,
+]
+
+
+def test_reader():
+    for string in STRINGS:
+        for encoding in ENCODINGS:
+            try:
+                # skip strings that can't be encoded with the current encoding
+                string.encode(encoding)
+                yield check_reader, string, encoding
+            except UnicodeEncodeError:
+                pass
+
+
+# nose shows the whole string arguments in a verbose mode; this is annoying,
+# so large string test is separated.
+
+LARGE_STRING = (
+    """\
+This is a larger file.  It has some lines that are longer \
+than 72 characters.  It's got lots of repetition.  Here's \
+some unicode chars: \xee \u0123 \uffe3 \ueeee \u2345
+
+How fun!  Let's repeat it twenty times.
+"""
+    * 10
+)
+
+
+def test_reader_on_large_string():
+    for encoding in ENCODINGS:
+        try:
+            # skip strings that can't be encoded with the current encoding
+            LARGE_STRING.encode(encoding)
+
+            def _check(encoding, n=1000):
+                check_reader(LARGE_STRING, encoding, n)
+
+            yield _check, encoding
+
+        except UnicodeEncodeError:
+            pass
+
+
+def test_reader_stream_is_closed():
+    reader = SeekableUnicodeStreamReader(BytesIO(b''), 'ascii')
+    assert reader.stream.closed is False
+    reader.__del__()
+    assert reader.stream.closed is True
+
+
+def teardown_module(module=None):
+    import gc
+
+    gc.collect()
diff --git a/nlp_resource_data/nltk/test/unit/test_senna.py b/nlp_resource_data/nltk/test/unit/test_senna.py

new file mode 100644 (file)

index 0000000..be5fed0
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_senna.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for Senna
+"""
+
+from os import environ, path, sep
+
+import logging
+import unittest
+
+from nltk.classify import Senna
+from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger
+
+# Set Senna executable path for tests if it is not specified as an environment variable
+if 'SENNA' in environ:
+    SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep
+else:
+    SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0'
+
+senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
+
+
+@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
+class TestSennaPipeline(unittest.TestCase):
+    """Unittest for nltk.classify.senna"""
+
+    def test_senna_pipeline(self):
+        """Senna pipeline interface"""
+
+        pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
+        sent = 'Dusseldorf is an international business center'.split()
+        result = [
+            (token['word'], token['chk'], token['ner'], token['pos'])
+            for token in pipeline.tag(sent)
+        ]
+        expected = [
+            ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
+            ('is', 'B-VP', 'O', 'VBZ'),
+            ('an', 'B-NP', 'O', 'DT'),
+            ('international', 'I-NP', 'O', 'JJ'),
+            ('business', 'I-NP', 'O', 'NN'),
+            ('center', 'I-NP', 'O', 'NN'),
+        ]
+        self.assertEqual(result, expected)
+
+
+@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
+class TestSennaTagger(unittest.TestCase):
+    """Unittest for nltk.tag.senna"""
+
+    def test_senna_tagger(self):
+        tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
+        result = tagger.tag('What is the airspeed of an unladen swallow ?'.split())
+        expected = [
+            ('What', 'WP'),
+            ('is', 'VBZ'),
+            ('the', 'DT'),
+            ('airspeed', 'NN'),
+            ('of', 'IN'),
+            ('an', 'DT'),
+            ('unladen', 'NN'),
+            ('swallow', 'NN'),
+            ('?', '.'),
+        ]
+        self.assertEqual(result, expected)
+
+    def test_senna_chunk_tagger(self):
+        chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
+        result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
+        expected_1 = [
+            ('What', 'B-NP'),
+            ('is', 'B-VP'),
+            ('the', 'B-NP'),
+            ('airspeed', 'I-NP'),
+            ('of', 'B-PP'),
+            ('an', 'B-NP'),
+            ('unladen', 'I-NP'),
+            ('swallow', 'I-NP'),
+            ('?', 'O'),
+        ]
+
+        result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP'))
+        expected_2 = [
+            ('What', '0'),
+            ('the airspeed', '2-3'),
+            ('an unladen swallow', '5-6-7'),
+        ]
+        self.assertEqual(result_1, expected_1)
+        self.assertEqual(result_2, expected_2)
+
+    def test_senna_ner_tagger(self):
+        nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
+        result_1 = nertagger.tag('Shakespeare theatre was in London .'.split())
+        expected_1 = [
+            ('Shakespeare', 'B-PER'),
+            ('theatre', 'O'),
+            ('was', 'O'),
+            ('in', 'O'),
+            ('London', 'B-LOC'),
+            ('.', 'O'),
+        ]
+
+        result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split())
+        expected_2 = [
+            ('UN', 'B-ORG'),
+            ('headquarters', 'O'),
+            ('are', 'O'),
+            ('in', 'O'),
+            ('NY', 'B-LOC'),
+            (',', 'O'),
+            ('USA', 'B-LOC'),
+            ('.', 'O'),
+        ]
+        self.assertEqual(result_1, expected_1)
+        self.assertEqual(result_2, expected_2)
diff --git a/nlp_resource_data/nltk/test/unit/test_stem.py b/nlp_resource_data/nltk/test/unit/test_stem.py

new file mode 100644 (file)

index 0000000..52a0d66
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_stem.py
@@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+
+import os
+import unittest
+from contextlib import closing
+
+from nltk import data
+from nltk.stem.snowball import SnowballStemmer
+from nltk.stem.porter import PorterStemmer
+
+
+class SnowballTest(unittest.TestCase):
+    def test_arabic(self):
+        """
+        this unit testing for test the snowball arabic light stemmer
+        this stemmer deals with prefixes and suffixes
+        """
+        # Test where the ignore_stopwords=True.
+        ar_stemmer = SnowballStemmer("arabic", True)
+        assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
+        assert ar_stemmer.stem("العربية") == "عرب"
+        assert ar_stemmer.stem("فقالوا") == "قال"
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("فالطالبات") == "طالب"
+        assert ar_stemmer.stem("والطالبات") == "طالب"
+        assert ar_stemmer.stem("الطالبون") == "طالب"
+        assert ar_stemmer.stem("اللذان") == "اللذان"
+        assert ar_stemmer.stem("من") == "من"
+        # Test where the ignore_stopwords=False.
+        ar_stemmer = SnowballStemmer("arabic", False)
+        assert ar_stemmer.stem("اللذان") == "اللذ"  # this is a stop word
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("الكلمات") == "كلم"
+        # test where create the arabic stemmer without given init value to ignore_stopwords
+        ar_stemmer = SnowballStemmer("arabic")
+        assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
+        assert ar_stemmer.stem("العربية") == "عرب"
+        assert ar_stemmer.stem("فقالوا") == "قال"
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("الكلمات") == "كلم"
+
+    def test_russian(self):
+        stemmer_russian = SnowballStemmer("russian")
+        assert stemmer_russian.stem("авантненькая") == "авантненьк"
+
+    def test_german(self):
+        stemmer_german = SnowballStemmer("german")
+        stemmer_german2 = SnowballStemmer("german", ignore_stopwords=True)
+
+        assert stemmer_german.stem("Schr\xe4nke") == 'schrank'
+        assert stemmer_german2.stem("Schr\xe4nke") == 'schrank'
+
+        assert stemmer_german.stem("keinen") == 'kein'
+        assert stemmer_german2.stem("keinen") == 'keinen'
+
+    def test_spanish(self):
+        stemmer = SnowballStemmer('spanish')
+
+        assert stemmer.stem("Visionado") == 'vision'
+
+        # The word 'algue' was raising an IndexError
+        assert stemmer.stem("algue") == 'algu'
+
+    def test_short_strings_bug(self):
+        stemmer = SnowballStemmer('english')
+        assert stemmer.stem("y's") == 'y'
+
+
+class PorterTest(unittest.TestCase):
+    def _vocabulary(self):
+        with closing(
+            data.find('stemmers/porter_test/porter_vocabulary.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            return fp.read().splitlines()
+
+    def _test_against_expected_output(self, stemmer_mode, expected_stems):
+        stemmer = PorterStemmer(mode=stemmer_mode)
+        for word, true_stem in zip(self._vocabulary(), expected_stems):
+            our_stem = stemmer.stem(word)
+            assert our_stem == true_stem, (
+                "%s should stem to %s in %s mode but got %s"
+                % (word, true_stem, stemmer_mode, our_stem)
+            )
+
+    def test_vocabulary_martin_mode(self):
+        """Tests all words from the test vocabulary provided by M Porter
+
+        The sample vocabulary and output were sourced from:
+            http://tartarus.org/martin/PorterStemmer/voc.txt
+            http://tartarus.org/martin/PorterStemmer/output.txt
+        and are linked to from the Porter Stemmer algorithm's homepage
+        at
+            http://tartarus.org/martin/PorterStemmer/
+        """
+        with closing(
+            data.find('stemmers/porter_test/porter_martin_output.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.MARTIN_EXTENSIONS, fp.read().splitlines()
+            )
+
+    def test_vocabulary_nltk_mode(self):
+        with closing(
+            data.find('stemmers/porter_test/porter_nltk_output.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.NLTK_EXTENSIONS, fp.read().splitlines()
+            )
+
+    def test_vocabulary_original_mode(self):
+        # The list of stems for this test was generated by taking the
+        # Martin-blessed stemmer from
+        # http://tartarus.org/martin/PorterStemmer/c.txt
+        # and removing all the --DEPARTURE-- sections from it and
+        # running it against Martin's test vocabulary.
+
+        with closing(
+            data.find('stemmers/porter_test/porter_original_output.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.ORIGINAL_ALGORITHM, fp.read().splitlines()
+            )
+
+        self._test_against_expected_output(
+            PorterStemmer.ORIGINAL_ALGORITHM,
+            data.find('stemmers/porter_test/porter_original_output.txt')
+            .open(encoding='utf-8')
+            .read()
+            .splitlines(),
+        )
+
+    def test_oed_bug(self):
+        """Test for bug https://github.com/nltk/nltk/issues/1581
+
+        Ensures that 'oed' can be stemmed without throwing an error.
+        """
+        assert PorterStemmer().stem('oed') == 'o'
diff --git a/nlp_resource_data/nltk/test/unit/test_tag.py b/nlp_resource_data/nltk/test/unit/test_tag.py

new file mode 100644 (file)

index 0000000..b460854
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_tag.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+
+def test_basic():
+    from nltk.tag import pos_tag
+    from nltk.tokenize import word_tokenize
+
+    result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
+    assert result == [
+        ('John', 'NNP'),
+        ("'s", 'POS'),
+        ('big', 'JJ'),
+        ('idea', 'NN'),
+        ('is', 'VBZ'),
+        ("n't", 'RB'),
+        ('all', 'PDT'),
+        ('that', 'DT'),
+        ('bad', 'JJ'),
+        ('.', '.'),
+    ]
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("numpy is required for nltk.test.test_tag")
diff --git a/nlp_resource_data/nltk/test/unit/test_tgrep.py b/nlp_resource_data/nltk/test/unit/test_tgrep.py

new file mode 100644 (file)

index 0000000..f46b4ce
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_tgrep.py
@@ -0,0 +1,787 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Natural Language Toolkit: TGrep search
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Will Roberts <wildwilhelm@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+'''
+Unit tests for nltk.tgrep.
+'''
+
+
+import unittest
+
+from nltk.tree import ParentedTree
+from nltk import tgrep
+
+
+class TestSequenceFunctions(unittest.TestCase):
+
+    '''
+    Class containing unit tests for nltk.tgrep.
+    '''
+
+    def test_tokenize_simple(self):
+        '''
+        Simple test of tokenization.
+        '''
+        tokens = tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]')
+        self.assertEqual(
+            tokens,
+            [
+                'A',
+                '..',
+                '(',
+                'B',
+                '!',
+                '<',
+                'C',
+                '.',
+                'D',
+                ')',
+                '|',
+                '!',
+                '[',
+                '<<',
+                '(',
+                'E',
+                ',',
+                'F',
+                ')',
+                '$',
+                'G',
+                ']',
+            ],
+        )
+
+    def test_tokenize_encoding(self):
+        '''
+        Test that tokenization handles bytes and strs the same way.
+        '''
+        self.assertEqual(
+            tgrep.tgrep_tokenize(b'A .. (B !< C . D) | ![<< (E , F) $ G]'),
+            tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]'),
+        )
+
+    def test_tokenize_link_types(self):
+        '''
+        Test tokenization of basic link types.
+        '''
+        self.assertEqual(tgrep.tgrep_tokenize('A<B'), ['A', '<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>B'), ['A', '>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<3B'), ['A', '<3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>3B'), ['A', '>3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<,B'), ['A', '<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>,B'), ['A', '>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<-3B'), ['A', '<-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>-3B'), ['A', '>-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<-B'), ['A', '<-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>-B'), ['A', '>-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<\'B'), ['A', '<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>\'B'), ['A', '>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<:B'), ['A', '<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>:B'), ['A', '>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<B'), ['A', '<<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>B'), ['A', '>>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<,B'), ['A', '<<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>,B'), ['A', '>>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<\'B'), ['A', '<<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>\'B'), ['A', '>>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<:B'), ['A', '<<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>:B'), ['A', '>>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A.B'), ['A', '.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A,B'), ['A', ',', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A..B'), ['A', '..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A,,B'), ['A', ',,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$B'), ['A', '$', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$.B'), ['A', '$.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$,B'), ['A', '$,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$..B'), ['A', '$..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$,,B'), ['A', '$,,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<B'), ['A', '!', '<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>B'), ['A', '!', '>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<3B'), ['A', '!', '<3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>3B'), ['A', '!', '>3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<,B'), ['A', '!', '<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>,B'), ['A', '!', '>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<-3B'), ['A', '!', '<-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>-3B'), ['A', '!', '>-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<-B'), ['A', '!', '<-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>-B'), ['A', '!', '>-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<\'B'), ['A', '!', '<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>\'B'), ['A', '!', '>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<:B'), ['A', '!', '<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>:B'), ['A', '!', '>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<B'), ['A', '!', '<<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>B'), ['A', '!', '>>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<,B'), ['A', '!', '<<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>,B'), ['A', '!', '>>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<\'B'), ['A', '!', '<<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>\'B'), ['A', '!', '>>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<:B'), ['A', '!', '<<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>:B'), ['A', '!', '>>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!.B'), ['A', '!', '.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!,B'), ['A', '!', ',', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!..B'), ['A', '!', '..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!,,B'), ['A', '!', ',,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$B'), ['A', '!', '$', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$.B'), ['A', '!', '$.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$,B'), ['A', '!', '$,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$..B'), ['A', '!', '$..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$,,B'), ['A', '!', '$,,', 'B'])
+
+    def test_tokenize_examples(self):
+        '''
+        Test tokenization of the TGrep2 manual example patterns.
+        '''
+        self.assertEqual(tgrep.tgrep_tokenize('NP < PP'), ['NP', '<', 'PP'])
+        self.assertEqual(tgrep.tgrep_tokenize('/^NP/'), ['/^NP/'])
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP << PP . VP'), ['NP', '<<', 'PP', '.', 'VP']
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP << PP | . VP'), ['NP', '<<', 'PP', '|', '.', 'VP']
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP !<< PP [> NP | >> VP]'),
+            ['NP', '!', '<<', 'PP', '[', '>', 'NP', '|', '>>', 'VP', ']'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP << (PP . VP)'),
+            ['NP', '<<', '(', 'PP', '.', 'VP', ')'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP <\' (PP <, (IN < on))'),
+            ['NP', '<\'', '(', 'PP', '<,', '(', 'IN', '<', 'on', ')', ')'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < (A < B) < C'),
+            ['S', '<', '(', 'A', '<', 'B', ')', '<', 'C'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < ((A < B) < C)'),
+            ['S', '<', '(', '(', 'A', '<', 'B', ')', '<', 'C', ')'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < (A < B < C)'),
+            ['S', '<', '(', 'A', '<', 'B', '<', 'C', ')'],
+        )
+        self.assertEqual(tgrep.tgrep_tokenize('A<B&.C'), ['A', '<', 'B', '&', '.', 'C'])
+
+    def test_tokenize_quoting(self):
+        '''
+        Test tokenization of quoting.
+        '''
+        self.assertEqual(
+            tgrep.tgrep_tokenize('"A<<:B"<<:"A $.. B"<"A>3B"<C'),
+            ['"A<<:B"', '<<:', '"A $.. B"', '<', '"A>3B"', '<', 'C'],
+        )
+
+    def test_tokenize_nodenames(self):
+        '''
+        Test tokenization of node names.
+        '''
+        self.assertEqual(tgrep.tgrep_tokenize('Robert'), ['Robert'])
+        self.assertEqual(tgrep.tgrep_tokenize('/^[Bb]ob/'), ['/^[Bb]ob/'])
+        self.assertEqual(tgrep.tgrep_tokenize('*'), ['*'])
+        self.assertEqual(tgrep.tgrep_tokenize('__'), ['__'])
+        # test tokenization of NLTK tree position syntax
+        self.assertEqual(tgrep.tgrep_tokenize('N()'), ['N(', ')'])
+        self.assertEqual(tgrep.tgrep_tokenize('N(0,)'), ['N(', '0', ',', ')'])
+        self.assertEqual(tgrep.tgrep_tokenize('N(0,0)'), ['N(', '0', ',', '0', ')'])
+        self.assertEqual(
+            tgrep.tgrep_tokenize('N(0,0,)'), ['N(', '0', ',', '0', ',', ')']
+        )
+
+    def test_tokenize_macros(self):
+        '''
+        Test tokenization of macro definitions.
+        '''
+        self.assertEqual(
+            tgrep.tgrep_tokenize(
+                '@ NP /^NP/;\n@ NN /^NN/;\n@NP [!< NP | < @NN] !$.. @NN'
+            ),
+            [
+                '@',
+                'NP',
+                '/^NP/',
+                ';',
+                '@',
+                'NN',
+                '/^NN/',
+                ';',
+                '@NP',
+                '[',
+                '!',
+                '<',
+                'NP',
+                '|',
+                '<',
+                '@NN',
+                ']',
+                '!',
+                '$..',
+                '@NN',
+            ],
+        )
+
+    def test_node_simple(self):
+        '''
+        Test a simple use of tgrep for finding nodes matching a given
+        pattern.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(
+            list(tgrep.tgrep_nodes('NN', [tree])), [[tree[0, 2], tree[2, 1]]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NN|JJ', [tree])), [[(0, 1), (0, 2), (2, 1)]]
+        )
+
+    def test_node_printing(self):
+        '''Test that the tgrep print operator ' is properly ignored.'''
+        tree = ParentedTree.fromstring('(S (n x) (N x))')
+        self.assertEqual(
+            list(tgrep.tgrep_positions('N', [tree])),
+            list(tgrep.tgrep_positions('\'N', [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('/[Nn]/', [tree])),
+            list(tgrep.tgrep_positions('\'/[Nn]/', [tree])),
+        )
+
+    def test_node_encoding(self):
+        '''
+        Test that tgrep search strings handles bytes and strs the same
+        way.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions(b'NN', [tree])),
+            list(tgrep.tgrep_positions(b'NN', [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_nodes(b'NN', [tree])),
+            list(tgrep.tgrep_nodes('NN', [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions(b'NN|JJ', [tree])),
+            list(tgrep.tgrep_positions('NN|JJ', [tree])),
+        )
+
+    def test_node_nocase(self):
+        '''
+        Test selecting nodes using case insensitive node names.
+        '''
+        tree = ParentedTree.fromstring('(S (n x) (N x))')
+        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]])
+
+    def test_node_quoted(self):
+        '''
+        Test selecting nodes using quoted node names.
+        '''
+        tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))')
+        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]])
+        self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]])
+
+    def test_node_regex(self):
+        '''
+        Test regex matching on nodes.
+        '''
+        tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))')
+        # This is a regular expression that matches any node whose
+        # name starts with NP, including NP-SBJ:
+        self.assertEqual(list(tgrep.tgrep_positions('/^NP/', [tree])), [[(0,), (1,)]])
+
+    def test_node_regex_2(self):
+        '''
+        Test regex matching on nodes.
+        '''
+        tree = ParentedTree.fromstring('(S (SBJ x) (SBJ1 x) (NP-SBJ x))')
+        self.assertEqual(list(tgrep.tgrep_positions('/^SBJ/', [tree])), [[(0,), (1,)]])
+        # This is a regular expression that matches any node whose
+        # name includes SBJ, including NP-SBJ:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('/SBJ/', [tree])), [[(0,), (1,), (2,)]]
+        )
+
+    def test_node_tree_position(self):
+        '''
+        Test matching on nodes based on NLTK tree position.
+        '''
+        tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))')
+        # test all tree positions that are not leaves
+        leaf_positions = set(
+            tree.leaf_treeposition(x) for x in range(len(tree.leaves()))
+        )
+        tree_positions = [x for x in tree.treepositions() if x not in leaf_positions]
+        for position in tree_positions:
+            node_id = 'N{0}'.format(position)
+            tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree]))
+            self.assertEqual(len(tgrep_positions[0]), 1)
+            self.assertEqual(tgrep_positions[0][0], position)
+
+    def test_node_noleaves(self):
+        '''
+        Test node name matching with the search_leaves flag set to False.
+        '''
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
+        self.assertEqual(
+            list(tgrep.tgrep_positions('x', [tree])), [[(0, 0, 0), (1, 0, 0)]]
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('x', [tree], False)), [[]])
+
+    def tests_rel_dominance(self):
+        '''
+        Test matching nodes based on dominance relations.
+        '''
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
+        self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* < T > S', [tree])), [[(0,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !< T', [tree])),
+            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('* !< T > S', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* > A', [tree])), [[(0, 0)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* > B', [tree])), [[(1, 0)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !> B', [tree])),
+            [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !> B >> S', [tree])), [[(0,), (0, 0), (1,)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >> S', [tree])),
+            [[(0,), (0, 0), (1,), (1, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >>, S', [tree])), [[(0,), (0, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >>\' S', [tree])), [[(1,), (1, 0)]]
+        )
+        # Known issue:
+        # self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])),
+        #                 [[()]])
+        self.assertEqual(list(tgrep.tgrep_positions('* << T', [tree])), [[(), (0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <<\' T', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <<1 N', [tree])), [[(1,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !<< T', [tree])),
+            [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
+        )
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (T x) (N x )))')
+        self.assertEqual(list(tgrep.tgrep_positions('* <: T', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,), (1,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !<: T', [tree])),
+            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]],
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('* !<: T > S', [tree])), [[(1,)]])
+        tree = ParentedTree.fromstring('(S (T (A x) (B x)) (T (C x)))')
+        self.assertEqual(list(tgrep.tgrep_positions('* >: T', [tree])), [[(1, 0)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !>: T', [tree])),
+            [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]],
+        )
+        tree = ParentedTree.fromstring(
+            '(S (A (B (C (D (E (T x))))))' ' (A (B (C (D (E (T x))) (N x)))))'
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* <<: T', [tree])),
+            [
+                [
+                    (0,),
+                    (0, 0),
+                    (0, 0, 0),
+                    (0, 0, 0, 0),
+                    (0, 0, 0, 0, 0),
+                    (1, 0, 0, 0),
+                    (1, 0, 0, 0, 0),
+                ]
+            ],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >>: A', [tree])),
+            [
+                [
+                    (0, 0),
+                    (0, 0, 0),
+                    (0, 0, 0, 0),
+                    (0, 0, 0, 0, 0),
+                    (0, 0, 0, 0, 0, 0),
+                    (1, 0),
+                    (1, 0, 0),
+                ]
+            ],
+        )
+
+    def test_bad_operator(self):
+        '''
+        Test error handling of undefined tgrep operators.
+        '''
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
+        self.assertRaises(
+            tgrep.TgrepException, list, tgrep.tgrep_positions('* >>> S', [tree])
+        )
+
+    def test_comments(self):
+        '''
+        Test that comments are correctly filtered out of tgrep search
+        strings.
+        '''
+        tree = ParentedTree.fromstring('(S (NN x) (NP x) (NN x))')
+        search1 = '''
+        @ NP /^NP/;
+        @ NN /^NN/;
+        @NN
+        '''
+        self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]])
+        search2 = '''
+        # macros
+        @ NP /^NP/;
+        @ NN /^NN/;
+
+        # search string
+        @NN
+        '''
+        self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])
+
+    def test_rel_sister_nodes(self):
+        '''
+        Test matching sister nodes in a tree.
+        '''
+        tree = ParentedTree.fromstring('(S (A x) (B x) (C x))')
+        self.assertEqual(list(tgrep.tgrep_positions('* $. B', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $.. B', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $, B', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $,, B', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $ B', [tree])), [[(0,), (2,)]])
+
+    def tests_rel_indexed_children(self):
+        '''
+        Test matching nodes based on their index in their parent node.
+        '''
+        tree = ParentedTree.fromstring('(S (A x) (B x) (C x))')
+        self.assertEqual(list(tgrep.tgrep_positions('* >, S', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >1 S', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >2 S', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >3 S', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >\' S', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >-1 S', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >-2 S', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >-3 S', [tree])), [[(0,)]])
+        tree = ParentedTree.fromstring(
+            '(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) ' '(F (C x) (A x) (B x)))'
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('* <, A', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <1 A', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <2 A', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <3 A', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <\' A', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <-1 A', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <-2 A', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <-3 A', [tree])), [[(0,)]])
+
+    def test_rel_precedence(self):
+        '''
+        Test matching nodes based on precedence relations.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (NP (PP x)) (NP (AP x)))'
+            ' (VP (AP (X (PP x)) (Y (AP x))))'
+            ' (NP (RC (NP (AP x)))))'
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* . X', [tree])), [[(0,), (0, 1), (0, 1, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* . Y', [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* .. X', [tree])),
+            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* .. Y', [tree])),
+            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* , X', [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* , Y', [tree])),
+            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* ,, X', [tree])),
+            [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* ,, Y', [tree])),
+            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+
+    def test_examples(self):
+        '''
+        Test the Basic Examples from the TGrep2 manual.
+        '''
+        tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)))')
+        # This matches any NP node that immediately dominates a PP:
+        self.assertEqual(list(tgrep.tgrep_positions('NP < PP', [tree])), [[(1,)]])
+
+        tree = ParentedTree.fromstring('(S (NP x) (VP x) (NP (PP x)) (VP x))')
+        # This matches an NP that dominates a PP and is immediately
+        # followed by a VP:
+        self.assertEqual(list(tgrep.tgrep_positions('NP << PP . VP', [tree])), [[(2,)]])
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (AP x)) (NP (PP x)) ' '(NP (DET x) (NN x)) (VP x))'
+        )
+        # This matches an NP that dominates a PP or is immediately
+        # followed by a VP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP << PP | . VP', [tree])), [[(1,), (2,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (NP (PP x)) (NP (AP x)))'
+            ' (VP (AP (NP (PP x)) (NP (AP x))))'
+            ' (NP (RC (NP (AP x)))))'
+        )
+        # This matches an NP that does not dominate a PP. Also, the NP
+        # must either have a parent that is an NP or be dominated by a
+        # VP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP !<< PP [> NP | >> VP]', [tree])),
+            [[(0, 1), (1, 0, 1)]],
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (AP (PP x) (VP x))) ' '(NP (AP (PP x) (NP x))) (NP x))'
+        )
+        # This matches an NP that dominates a PP which itself is
+        # immediately followed by a VP. Note the use of parentheses to
+        # group ". VP" with the PP rather than with the NP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP << (PP . VP)', [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))'
+            ' (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))'
+            ' (NP x))'
+        )
+        # This matches an NP whose last child is a PP that begins with
+        # the preposition "on":
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP <\' (PP <, (IN < on))', [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (S (C x) (A (B x))) (S (C x) (A x)) ' '(S (D x) (A (B x))))'
+        )
+        # The following pattern matches an S which has a child A and
+        # another child that is a C and that the A has a child B:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('S < (A < B) < C', [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))'
+        )
+        # However, this pattern means that S has child A and that A
+        # has children B and C:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('S < ((A < B) < C)', [tree])), [[(0,)]]
+        )
+
+        # It is equivalent to this:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('S < (A < B < C)', [tree])), [[(0,)]]
+        )
+
+    def test_use_macros(self):
+        '''
+        Test defining and using tgrep2 macros.
+        '''
+        tree = ParentedTree.fromstring(
+            '(VP (VB sold) (NP (DET the) '
+            '(NN heiress)) (NP (NN deed) (PREP to) '
+            '(NP (DET the) (NN school) (NN house))))'
+        )
+        self.assertEqual(
+            list(
+                tgrep.tgrep_positions(
+                    '@ NP /^NP/;\n@ NN /^NN/;\n@NP !< @NP !$.. @NN', [tree]
+                )
+            ),
+            [[(1,), (2, 2)]],
+        )
+        # use undefined macro @CNP
+        self.assertRaises(
+            tgrep.TgrepException,
+            list,
+            tgrep.tgrep_positions(
+                '@ NP /^NP/;\n@ NN /^NN/;\n@CNP !< @NP !$.. @NN', [tree]
+            ),
+        )
+
+    def test_tokenize_node_labels(self):
+        '''Test tokenization of labeled nodes.'''
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < @SBJ < (@VP < (@VB $.. @OBJ))'),
+            [
+                'S',
+                '<',
+                '@SBJ',
+                '<',
+                '(',
+                '@VP',
+                '<',
+                '(',
+                '@VB',
+                '$..',
+                '@OBJ',
+                ')',
+                ')',
+            ],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ))'),
+            [
+                'S',
+                '<',
+                '@SBJ',
+                '=',
+                's',
+                '<',
+                '(',
+                '@VP',
+                '=',
+                'v',
+                '<',
+                '(',
+                '@VB',
+                '$..',
+                '@OBJ',
+                ')',
+                ')',
+            ],
+        )
+
+    def test_tokenize_segmented_patterns(self):
+        '''Test tokenization of segmented patterns.'''
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'),
+            [
+                'S',
+                '<',
+                '@SBJ',
+                '=',
+                's',
+                '<',
+                '(',
+                '@VP',
+                '=',
+                'v',
+                '<',
+                '(',
+                '@VB',
+                '$..',
+                '@OBJ',
+                ')',
+                ')',
+                ':',
+                '=s',
+                '..',
+                '=v',
+            ],
+        )
+
+    def test_labeled_nodes(self):
+        '''
+        Test labeled nodes.
+
+        Test case from Emily M. Bender.
+        '''
+        search = '''
+            # macros
+            @ SBJ /SBJ/;
+            @ VP /VP/;
+            @ VB /VB/;
+            @ VPoB /V[PB]/;
+            @ OBJ /OBJ/;
+
+            # 1 svo
+            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'''
+        sent1 = ParentedTree.fromstring(
+            '(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))'
+        )
+        sent2 = ParentedTree.fromstring(
+            '(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))'
+        )
+        search_firsthalf = search.split('\n\n')[0] + 'S < @SBJ < (@VP < (@VB $.. @OBJ))'
+        search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))'
+
+        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
+        self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
+        self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
+        self.assertEqual(
+            list(tgrep.tgrep_positions(search, [sent1])),
+            list(tgrep.tgrep_positions(search_rewrite, [sent1])),
+        )
+        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
+        self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
+        self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
+        self.assertEqual(
+            list(tgrep.tgrep_positions(search, [sent2])),
+            list(tgrep.tgrep_positions(search_rewrite, [sent2])),
+        )
+
+    def test_multiple_conjs(self):
+        '''
+        Test that multiple (3 or more) conjunctions of node relations are
+        handled properly.
+        '''
+        sent = ParentedTree.fromstring('((A (B b) (C c)) (A (B b) (C c) (D d)))')
+        # search = '(A < B < C < D)'
+        # search_tworels = '(A < B < C)'
+        self.assertEqual(
+            list(tgrep.tgrep_positions('(A < B < C < D)', [sent])), [[(1,)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('(A < B < C)', [sent])), [[(0,), (1,)]]
+        )
+
+    def test_trailing_semicolon(self):
+        '''
+        Test that semicolons at the end of a tgrep2 search string won't
+        cause a parse failure.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(list(tgrep.tgrep_positions('NN;', [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NN;;', [tree])), [[(0, 2), (2, 1)]]
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/nlp_resource_data/nltk/test/unit/test_tokenize.py b/nlp_resource_data/nltk/test/unit/test_tokenize.py

new file mode 100644 (file)

index 0000000..f3b80c5
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_tokenize.py
@@ -0,0 +1,425 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.tokenize.
+See also nltk/test/tokenize.doctest
+"""
+
+
+import unittest
+
+from nose import SkipTest
+from nose.tools import assert_equal
+
+from nltk.tokenize import (
+    punkt,
+    word_tokenize,
+    TweetTokenizer,
+    StanfordSegmenter,
+    TreebankWordTokenizer,
+    SyllableTokenizer,
+)
+
+
+class TestTokenize(unittest.TestCase):
+    def test_tweet_tokenizer(self):
+        """
+        Test TweetTokenizer using words with special and accented characters.
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
+        s9 = "@myke: Let's test these words: resumé España München français"
+        tokens = tokenizer.tokenize(s9)
+        expected = [
+            ':',
+            "Let's",
+            'test',
+            'these',
+            'words',
+            ':',
+            'resumé',
+            'España',
+            'München',
+            'français',
+        ]
+        self.assertEqual(tokens, expected)
+        
+    def test_sonority_sequencing_syllable_tokenizer(self):
+        """
+        Test SyllableTokenizer tokenizer.
+        """
+        tokenizer = SyllableTokenizer()
+        tokens = tokenizer.tokenize('justification')
+        self.assertEqual(tokens, ['jus', 'ti', 'fi', 'ca', 'tion'])
+
+    def test_stanford_segmenter_arabic(self):
+        """
+        Test the Stanford Word Segmenter for Arabic (default config)
+        """
+        try:
+            seg = StanfordSegmenter()
+            seg.default_config('ar')
+            sent = u'يبحث علم الحاسوب استخدام الحوسبة بجميع اشكالها لحل المشكلات'
+            segmented_sent = seg.segment(sent.split())
+            assert segmented_sent.split() == [
+                'يبحث',
+                'علم',
+                'الحاسوب',
+                'استخدام',
+                'الحوسبة',
+                'ب',
+                'جميع',
+                'اشكال',
+                'ها',
+                'ل',
+                'حل',
+                'المشكلات',
+            ]
+        except LookupError as e:
+            raise SkipTest(str(e))
+
+    def test_stanford_segmenter_chinese(self):
+        """
+        Test the Stanford Word Segmenter for Chinese (default config)
+        """
+        try:
+            seg = StanfordSegmenter()
+            seg.default_config('zh')
+            sent = u"这是斯坦福中文分词器测试"
+            segmented_sent = seg.segment(sent.split())
+            assert segmented_sent.split() == ['这', '是', '斯坦福', '中文', '分词器', '测试']
+        except LookupError as e:
+            raise SkipTest(str(e))
+
+    def test_phone_tokenizer(self):
+        """
+        Test a string that resembles a phone number but contains a newline
+        """
+
+        # Should be recognized as a phone number, albeit one with multiple spaces
+        tokenizer = TweetTokenizer()
+        test1 = "(393)  928 -3010"
+        expected = ['(393)  928 -3010']
+        result = tokenizer.tokenize(test1)
+        self.assertEqual(result, expected)
+
+        # Due to newline, first three elements aren't part of a phone number;
+        # fourth is
+        test2 = "(393)\n928 -3010"
+        expected = ['(', '393', ')', "928 -3010"]
+        result = tokenizer.tokenize(test2)
+        self.assertEqual(result, expected)
+        
+    def test_pad_asterisk(self):
+        """
+        Test padding of asterisk for word tokenization.
+        """
+        text = "This is a, *weird sentence with *asterisks in it."
+        expected = ['This', 'is', 'a', ',', '*', 'weird', 'sentence', 
+                    'with', '*', 'asterisks', 'in', 'it', '.']
+        self.assertEqual(word_tokenize(text), expected)
+        
+    def test_pad_dotdot(self):
+        """
+        Test padding of dotdot* for word tokenization.
+        """
+        text = "Why did dotdot.. not get tokenized but dotdotdot... did? How about manydots....."
+        expected = ['Why', 'did', 'dotdot', '..', 'not', 'get', 
+                    'tokenized', 'but', 'dotdotdot', '...', 'did', '?', 
+                    'How', 'about', 'manydots', '.....']
+        self.assertEqual(word_tokenize(text), expected)
+
+    def test_remove_handle(self):
+        """
+        Test remove_handle() from casual.py with specially crafted edge cases
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True)
+
+        # Simple example. Handles with just numbers should be allowed
+        test1 = "@twitter hello @twi_tter_. hi @12345 @123news"
+        expected = ['hello', '.', 'hi']
+        result = tokenizer.tokenize(test1)
+        self.assertEqual(result, expected)
+
+        # Handles are allowed to follow any of the following characters
+        test2 = "@n`@n~@n(@n)@n-@n=@n+@n\\@n|@n[@n]@n{@n}@n;@n:@n'@n\"@n/@n?@n.@n,@n<@n>@n @n\n@n ñ@n.ü@n.ç@n."
+        expected = [
+            '`',
+            '~',
+            '(',
+            ')',
+            '-',
+            '=',
+            '+',
+            '\\',
+            '|',
+            '[',
+            ']',
+            '{',
+            '}',
+            ';',
+            ':',
+            "'",
+            '"',
+            '/',
+            '?',
+            '.',
+            ',',
+            '<',
+            '>',
+            'ñ',
+            '.',
+            'ü',
+            '.',
+            'ç',
+            '.',
+        ]
+        result = tokenizer.tokenize(test2)
+        self.assertEqual(result, expected)
+
+        # Handles are NOT allowed to follow any of the following characters
+        test3 = "a@n j@n z@n A@n L@n Z@n 1@n 4@n 7@n 9@n 0@n _@n !@n @@n #@n $@n %@n &@n *@n"
+        expected = [
+            'a',
+            '@n',
+            'j',
+            '@n',
+            'z',
+            '@n',
+            'A',
+            '@n',
+            'L',
+            '@n',
+            'Z',
+            '@n',
+            '1',
+            '@n',
+            '4',
+            '@n',
+            '7',
+            '@n',
+            '9',
+            '@n',
+            '0',
+            '@n',
+            '_',
+            '@n',
+            '!',
+            '@n',
+            '@',
+            '@n',
+            '#',
+            '@n',
+            '$',
+            '@n',
+            '%',
+            '@n',
+            '&',
+            '@n',
+            '*',
+            '@n',
+        ]
+        result = tokenizer.tokenize(test3)
+        self.assertEqual(result, expected)
+
+        # Handles are allowed to precede the following characters
+        test4 = "@n!a @n#a @n$a @n%a @n&a @n*a"
+        expected = ['!', 'a', '#', 'a', '$', 'a', '%', 'a', '&', 'a', '*', 'a']
+        result = tokenizer.tokenize(test4)
+        self.assertEqual(result, expected)
+
+        # Tests interactions with special symbols and multiple @
+        test5 = "@n!@n @n#@n @n$@n @n%@n @n&@n @n*@n @n@n @@n @n@@n @n_@n @n7@n @nj@n"
+        expected = [
+            '!',
+            '@n',
+            '#',
+            '@n',
+            '$',
+            '@n',
+            '%',
+            '@n',
+            '&',
+            '@n',
+            '*',
+            '@n',
+            '@n',
+            '@n',
+            '@',
+            '@n',
+            '@n',
+            '@',
+            '@n',
+            '@n_',
+            '@n',
+            '@n7',
+            '@n',
+            '@nj',
+            '@n',
+        ]
+        result = tokenizer.tokenize(test5)
+        self.assertEqual(result, expected)
+
+        # Tests that handles can have a max length of 20
+        test6 = "@abcdefghijklmnopqrstuvwxyz @abcdefghijklmnopqrst1234 @abcdefghijklmnopqrst_ @abcdefghijklmnopqrstendofhandle"
+        expected = ['uvwxyz', '1234', '_', 'endofhandle']
+        result = tokenizer.tokenize(test6)
+        self.assertEqual(result, expected)
+
+        # Edge case where an @ comes directly after a long handle
+        test7 = "@abcdefghijklmnopqrstu@abcde @abcdefghijklmnopqrst@abcde @abcdefghijklmnopqrst_@abcde @abcdefghijklmnopqrst5@abcde"
+        expected = [
+            'u',
+            '@abcde',
+            '@abcdefghijklmnopqrst',
+            '@abcde',
+            '_',
+            '@abcde',
+            '5',
+            '@abcde',
+        ]
+        result = tokenizer.tokenize(test7)
+        self.assertEqual(result, expected)
+
+    def test_treebank_span_tokenizer(self):
+        """
+        Test TreebankWordTokenizer.span_tokenize function
+        """
+
+        tokenizer = TreebankWordTokenizer()
+
+        # Test case in the docstring
+        test1 = "Good muffins cost $3.88\nin New (York).  Please (buy) me\ntwo of them.\n(Thanks)."
+        expected = [
+            (0, 4),
+            (5, 12),
+            (13, 17),
+            (18, 19),
+            (19, 23),
+            (24, 26),
+            (27, 30),
+            (31, 32),
+            (32, 36),
+            (36, 37),
+            (37, 38),
+            (40, 46),
+            (47, 48),
+            (48, 51),
+            (51, 52),
+            (53, 55),
+            (56, 59),
+            (60, 62),
+            (63, 68),
+            (69, 70),
+            (70, 76),
+            (76, 77),
+            (77, 78),
+        ]
+        result = list(tokenizer.span_tokenize(test1))
+        self.assertEqual(result, expected)
+
+        # Test case with double quotation
+        test2 = "The DUP is similar to the \"religious right\" in the United States and takes a hardline stance on social issues"
+        expected = [
+            (0, 3),
+            (4, 7),
+            (8, 10),
+            (11, 18),
+            (19, 21),
+            (22, 25),
+            (26, 27),
+            (27, 36),
+            (37, 42),
+            (42, 43),
+            (44, 46),
+            (47, 50),
+            (51, 57),
+            (58, 64),
+            (65, 68),
+            (69, 74),
+            (75, 76),
+            (77, 85),
+            (86, 92),
+            (93, 95),
+            (96, 102),
+            (103, 109),
+        ]
+        result = list(tokenizer.span_tokenize(test2))
+        self.assertEqual(result, expected)
+
+        # Test case with double qoutation as well as converted quotations
+        test3 = "The DUP is similar to the \"religious right\" in the United States and takes a ``hardline'' stance on social issues"
+        expected = [
+            (0, 3),
+            (4, 7),
+            (8, 10),
+            (11, 18),
+            (19, 21),
+            (22, 25),
+            (26, 27),
+            (27, 36),
+            (37, 42),
+            (42, 43),
+            (44, 46),
+            (47, 50),
+            (51, 57),
+            (58, 64),
+            (65, 68),
+            (69, 74),
+            (75, 76),
+            (77, 79),
+            (79, 87),
+            (87, 89),
+            (90, 96),
+            (97, 99),
+            (100, 106),
+            (107, 113),
+        ]
+        result = list(tokenizer.span_tokenize(test3))
+        self.assertEqual(result, expected)
+
+    def test_word_tokenize(self):
+        """
+        Test word_tokenize function
+        """
+        
+        sentence = "The 'v', I've been fooled but I'll seek revenge."
+        expected = ['The', "'", 'v', "'", ',', 'I', "'ve", 'been', 'fooled', 
+                    'but', 'I', "'ll", 'seek', 'revenge', '.']
+        self.assertEqual(word_tokenize(sentence), expected)
+        
+        sentence = "'v' 're'"
+        expected = ["'", 'v', "'", "'re", "'"]
+        self.assertEqual(word_tokenize(sentence), expected)
+
+    def test_punkt_pair_iter(self):
+
+        test_cases = [
+            ('12', [('1', '2'), ('2', None)]),
+            ('123', [('1', '2'), ('2', '3'), ('3', None)]),
+            ('1234', [('1', '2'), ('2', '3'), ('3', '4'), ('4', None)]),
+        ]
+
+        for (test_input, expected_output) in test_cases:
+            actual_output = [x for x in punkt._pair_iter(test_input)]
+
+            assert_equal(actual_output, expected_output)
+
+    def test_punkt_pair_iter_handles_stop_iteration_exception(self):
+        # test input to trigger StopIteration from next()
+        it = iter([])
+        # call method under test and produce a generator
+        gen = punkt._pair_iter(it)
+        # unpack generator, ensure that no error is raised
+        list(gen)
+
+    def test_punkt_tokenize_words_handles_stop_iteration_exception(self):
+        obj = punkt.PunktBaseClass()
+
+        class TestPunktTokenizeWordsMock:
+            def word_tokenize(self, s):
+                return iter([])
+
+        obj._lang_vars = TestPunktTokenizeWordsMock()
+        # unpack generator, ensure that no error is raised
+        list(obj._tokenize_words('test'))
diff --git a/nlp_resource_data/nltk/test/unit/test_twitter_auth.py b/nlp_resource_data/nltk/test/unit/test_twitter_auth.py

new file mode 100644 (file)

index 0000000..e0189fb
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_twitter_auth.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for static parts of Twitter package
+"""
+
+import os
+import unittest
+from nose import SkipTest
+
+try:
+    import twython
+except ImportError as e:
+    raise SkipTest("The twython library has not been installed.")
+
+from nltk.twitter import Authenticate
+
+
+class TestCredentials(unittest.TestCase):
+    """
+    Tests that Twitter credentials information from file is handled correctly.
+    """
+
+    def setUp(self):
+        self.subdir = os.path.join(os.path.dirname(__file__), 'files')
+        self.auth = Authenticate()
+        os.environ['TWITTER'] = 'twitter-files'
+
+    def test_environment(self):
+        """
+        Test that environment variable has been read correctly.
+        """
+        fn = os.path.basename(self.auth.creds_subdir)
+        self.assertEqual(fn, os.environ['TWITTER'])
+
+    def test_empty_subdir1(self):
+        """
+        Setting subdir to empty path should raise an error.
+        """
+        try:
+            self.auth.load_creds(subdir='')
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_empty_subdir2(self):
+        """
+        Setting subdir to `None` should raise an error.
+        """
+        self.auth.creds_subdir = None
+        try:
+            self.auth.load_creds()
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_missingdir(self):
+        """
+        Setting subdir to nonexistent directory should raise an error.
+        """
+        try:
+            self.auth.load_creds(subdir='/nosuchdir')
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_missingfile1(self):
+        """
+        Defaults for authentication will fail since 'credentials.txt' not
+        present in default subdir, as read from `os.environ['TWITTER']`.
+        """
+        try:
+            self.auth.load_creds()
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_missingfile2(self):
+        """
+        Credentials file 'foobar' cannot be found in default subdir.
+        """
+        try:
+            self.auth.load_creds(creds_file='foobar')
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_incomplete_file(self):
+        """
+        Credentials file 'bad_oauth1-1.txt' is incomplete
+        """
+        try:
+            self.auth.load_creds(creds_file='bad_oauth1-1.txt', subdir=self.subdir)
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_malformed_file1(self):
+        """
+        First key in credentials file 'bad_oauth1-2.txt' is ill-formed
+        """
+        try:
+            self.auth.load_creds(creds_file='bad_oauth1-2.txt', subdir=self.subdir)
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_malformed_file2(self):
+        """
+        First key in credentials file 'bad_oauth1-2.txt' is ill-formed
+        """
+        try:
+            self.auth.load_creds(creds_file='bad_oauth1-3.txt', subdir=self.subdir)
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_correct_path(self):
+        """
+        Path to default credentials file is well-formed, given specified
+        subdir.
+        """
+        self.auth.load_creds(subdir=self.subdir)
+        self.auth.creds_fullpath = os.path.join(self.subdir, self.auth.creds_file)
+
+    def test_correct_file1(self):
+        """
+        Default credentials file is identified
+        """
+        self.auth.load_creds(subdir=self.subdir)
+        self.assertEqual(self.auth.creds_file, 'credentials.txt')
+
+    def test_correct_file2(self):
+        """
+        Default credentials file has been read correctluy
+        """
+        oauth = self.auth.load_creds(subdir=self.subdir)
+        self.assertEqual(oauth['app_key'], 'a')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/nlp_resource_data/nltk/test/unit/test_wordnet.py b/nlp_resource_data/nltk/test/unit/test_wordnet.py

new file mode 100644 (file)

index 0000000..08fd14a
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/test_wordnet.py
@@ -0,0 +1,220 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.corpus.wordnet
+See also nltk/test/wordnet.doctest
+"""
+
+
+import collections
+import os
+import unittest
+
+from nose import SkipTest
+
+from nltk.corpus.reader.wordnet import WordNetCorpusReader
+from nltk.corpus import wordnet as wn
+from nltk.corpus import wordnet_ic as wnic
+from nltk.data import find as find_data
+
+
+wn.ensure_loaded()
+S = wn.synset
+L = wn.lemma
+
+
+class WordnNetDemo(unittest.TestCase):
+    def test_retrieve_synset(self):
+        move_synset = S('go.v.21')
+        self.assertEqual(move_synset.name(), "move.v.15")
+        self.assertEqual(move_synset.lemma_names(), ['move', 'go'])
+        self.assertEqual(
+            move_synset.definition(), "have a turn; make one's move in a game"
+        )
+        self.assertEqual(move_synset.examples(), ['Can I go now?'])
+
+    def test_retrieve_synsets(self):
+        self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')])
+        self.assertEqual(
+            sorted(wn.synsets('zap', pos='v')),
+            [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')],
+        )
+
+    def test_hyperhyponyms(self):
+        # Not every synset as hypernyms()
+        self.assertEqual(S('travel.v.01').hypernyms(), [])
+        self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')])
+        self.assertEqual(S('travel.v.03').hypernyms(), [])
+
+        # Test hyper-/hyponyms.
+        self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')])
+        first_five_meal_hypo = [
+            S('banquet.n.02'),
+            S('bite.n.04'),
+            S('breakfast.n.01'),
+            S('brunch.n.01'),
+            S('buffet.n.02'),
+        ]
+        self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo)
+        self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')])
+        first_five_composer_hypo = [
+            S('ambrose.n.01'),
+            S('bach.n.01'),
+            S('barber.n.01'),
+            S('bartok.n.01'),
+            S('beethoven.n.01'),
+        ]
+        self.assertEqual(
+            S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo
+        )
+
+        # Test root hyper-/hyponyms
+        self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')])
+        self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')])
+        self.assertEqual(
+            S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')]
+        )
+
+    def test_derivationally_related_forms(self):
+        # Test `derivationally_related_forms()`
+        self.assertEqual(
+            L('zap.v.03.nuke').derivationally_related_forms(),
+            [L('atomic_warhead.n.01.nuke')],
+        )
+        self.assertEqual(
+            L('zap.v.03.atomize').derivationally_related_forms(),
+            [L('atomization.n.02.atomization')],
+        )
+        self.assertEqual(
+            L('zap.v.03.atomise').derivationally_related_forms(),
+            [L('atomization.n.02.atomisation')],
+        )
+        self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), [])
+
+    def test_meronyms_holonyms(self):
+        # Test meronyms, holonyms.
+        self.assertEqual(
+            S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')]
+        )
+        self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')])
+
+        self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')])
+        self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')])
+
+        self.assertEqual(
+            S('table.n.2').part_meronyms(),
+            [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')],
+        )
+        self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')])
+
+        self.assertEqual(
+            S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')]
+        )
+        self.assertEqual(
+            S('gin.n.1').substance_holonyms(),
+            [
+                S('gin_and_it.n.01'),
+                S('gin_and_tonic.n.01'),
+                S('martini.n.01'),
+                S('pink_lady.n.01'),
+            ],
+        )
+
+    def test_antonyms(self):
+        # Test antonyms.
+        self.assertEqual(
+            L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')]
+        )
+        self.assertEqual(
+            L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')]
+        )
+
+    def test_misc_relations(self):
+        # Test misc relations.
+        self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')])
+        self.assertEqual(
+            S('heavy.a.1').similar_tos(),
+            [
+                S('dense.s.03'),
+                S('doughy.s.01'),
+                S('heavier-than-air.s.01'),
+                S('hefty.s.02'),
+                S('massive.s.04'),
+                S('non-buoyant.s.01'),
+                S('ponderous.s.02'),
+            ],
+        )
+        self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')])
+        self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')])
+
+        # Test pertainyms.
+        self.assertEqual(
+            L('English.a.1.English').pertainyms(), [L('england.n.01.England')]
+        )
+
+    def test_lch(self):
+        # Test LCH.
+        self.assertEqual(
+            S('person.n.01').lowest_common_hypernyms(S('dog.n.01')),
+            [S('organism.n.01')],
+        )
+        self.assertEqual(
+            S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')),
+            [S('woman.n.01')],
+        )
+
+    def test_domains(self):
+        # Test domains.
+        self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')])
+        self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')])
+        self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')])
+
+    def test_in_topic_domains(self):
+        # Test in domains.
+        self.assertEqual(
+            S('computer_science.n.01').in_topic_domains()[0], S('access.n.05')
+        )
+        self.assertEqual(S('germany.n.01').in_region_domains()[23], S('trillion.n.02'))
+        self.assertEqual(S('slang.n.02').in_usage_domains()[1], S('airhead.n.01'))
+
+    def test_wordnet_similarities(self):
+        # Path based similarities.
+        self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
+        self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
+        self.assertAlmostEqual(
+            S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3
+        )
+        self.assertAlmostEqual(
+            S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3
+        )
+        # Information Content similarities.
+        brown_ic = wnic.ic('ic-brown.dat')
+        self.assertAlmostEqual(
+            S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3
+        )
+        semcor_ic = wnic.ic('ic-semcor.dat')
+        self.assertAlmostEqual(
+            S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3
+        )
+
+    def test_omw_lemma_no_trailing_underscore(self):
+        expected = sorted([
+            u'popolna_sprememba_v_mišljenju',
+            u'popoln_obrat',
+            u'preobrat',
+            u'preobrat_v_mišljenju'
+            ])
+        self.assertEqual(sorted(S('about-face.n.02').lemma_names(lang='slv')), expected)
+
+    def test_iterable_type_for_all_lemma_names(self):
+        # Duck-test for iterables.
+        # See https://stackoverflow.com/a/36230057/610569
+        cat_lemmas = wn.all_lemma_names(lang='cat')
+        eng_lemmas = wn.all_lemma_names(lang='eng')
+
+        self.assertTrue(hasattr(eng_lemmas, '__iter__'))
+        self.assertTrue(hasattr(eng_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
+        self.assertTrue(eng_lemmas.__iter__() is eng_lemmas)
+
+        self.assertTrue(hasattr(cat_lemmas, '__iter__'))
+        self.assertTrue(hasattr(cat_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
+        self.assertTrue(cat_lemmas.__iter__() is cat_lemmas)
diff --git a/nlp_resource_data/nltk/test/unit/translate/__init__.py b/nlp_resource_data/nltk/test/unit/translate/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8a4115d

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f84b75a

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc

new file mode 100644 (file)

index 0000000..e9f61e1

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc

new file mode 100644 (file)

index 0000000..77558d2

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc

new file mode 100644 (file)

index 0000000..13b6a54

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b3c6f66

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc

new file mode 100644 (file)

index 0000000..7a975f3

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a61b2c2

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc

new file mode 100644 (file)

index 0000000..2a75f20

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ee088b1

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9c024dd

Binary files /dev/null and b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_bleu.py b/nlp_resource_data/nltk/test/unit/translate/test_bleu.py

new file mode 100644 (file)

index 0000000..a97d4de
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_bleu.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for BLEU translation evaluation metric
+"""
+
+import functools
+import io
+import unittest
+
+from nltk.data import find
+from nltk.translate.bleu_score import (
+    modified_precision,
+    brevity_penalty,
+    closest_ref_length,
+)
+from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
+
+
+class TestBLEU(unittest.TestCase):
+    def test_modified_precision(self):
+        """
+        Examples from the original BLEU paper
+        http://www.aclweb.org/anthology/P02-1040.pdf
+        """
+        # Example 1: the "the*" example.
+        # Reference sentences.
+        ref1 = 'the cat is on the mat'.split()
+        ref2 = 'there is a cat on the mat'.split()
+        # Hypothesis sentence(s).
+        hyp1 = 'the the the the the the the'.split()
+
+        references = [ref1, ref2]
+
+        # Testing modified unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        assert round(hyp1_unigram_precision, 4) == 0.2857
+        # With assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4)
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 0.0
+
+        # Example 2: the "of the" example.
+        # Reference sentences
+        ref1 = str(
+            'It is a guide to action that ensures that the military '
+            'will forever heed Party commands'
+        ).split()
+        ref2 = str(
+            'It is the guiding principle which guarantees the military '
+            'forces always being under the command of the Party'
+        ).split()
+        ref3 = str(
+            'It is the practical guide for the army always to heed '
+            'the directions of the party'
+        ).split()
+        # Hypothesis sentence(s).
+        hyp1 = 'of the'.split()
+
+        references = [ref1, ref2, ref3]
+        # Testing modified unigram precision.
+        assert float(modified_precision(references, hyp1, n=1)) == 1.0
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 1.0
+
+        # Example 3: Proper MT outputs.
+        hyp1 = str(
+            'It is a guide to action which ensures that the military '
+            'always obeys the commands of the party'
+        ).split()
+        hyp2 = str(
+            'It is to insure the troops forever hearing the activity '
+            'guidebook that party direct'
+        ).split()
+
+        references = [ref1, ref2, ref3]
+
+        # Unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1))
+        # Test unigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4)
+        self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4)
+        # Test unigram precision with rounding.
+        assert round(hyp1_unigram_precision, 4) == 0.9444
+        assert round(hyp2_unigram_precision, 4) == 0.5714
+
+        # Bigram precision
+        hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2))
+        hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2))
+        # Test bigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4)
+        self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4)
+        # Test bigram precision with rounding.
+        assert round(hyp1_bigram_precision, 4) == 0.5882
+        assert round(hyp2_bigram_precision, 4) == 0.0769
+
+    def test_brevity_penalty(self):
+        # Test case from brevity_penalty_closest function in mteval-v13a.pl.
+        # Same test cases as in the doctest in nltk.translate.bleu_score.py
+        references = [['a'] * 11, ['a'] * 8]
+        hypothesis = ['a'] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        self.assertAlmostEqual(
+            brevity_penalty(closest_ref_len, hyp_len), 0.8669, places=4
+        )
+
+        references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
+        hypothesis = ['a'] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        assert brevity_penalty(closest_ref_len, hyp_len) == 1.0
+
+    def test_zero_matches(self):
+        # Test case where there's 0 matches
+        references = ['The candidate has no alignment to any of the references'.split()]
+        hypothesis = 'John loves Mary'.split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = [1.0 / n] * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 0
+
+    def test_full_matches(self):
+        # Test case where there's 100% matches
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary'.split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = [1.0 / n] * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 1.0
+
+    def test_partial_matches_hypothesis_longer_than_reference(self):
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary who loves Mike'.split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised because len(reference) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+
+# @unittest.skip("Skipping fringe cases for BLEU.")
+class TestBLEUFringeCases(unittest.TestCase):
+    def test_case_where_n_is_bigger_than_hypothesis_length(self):
+        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
+        references = ['John loves Mary ?'.split()]
+        hypothesis = 'John loves Mary'.split()
+        n = len(hypothesis) + 1  #
+        weights = [1.0 / n] * n  # Uniform weights.
+        # Since no n-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+        # Checks that the warning has been raised because len(hypothesis) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+        # Test case where n > len(hypothesis) but so is n > len(reference), and
+        # it's a special case where reference == hypothesis.
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary'.split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+
+    def test_empty_hypothesis(self):
+        # Test case where there's hypothesis is empty.
+        references = ['The candidate has no alignment to any of the references'.split()]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_empty_references(self):
+        # Test case where there's reference is empty.
+        references = [[]]
+        hypothesis = 'John loves Mary'.split()
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_empty_references_and_hypothesis(self):
+        # Test case where both references and hypothesis is empty.
+        references = [[]]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_reference_or_hypothesis_shorter_than_fourgrams(self):
+        # Tese case where the length of reference or hypothesis
+        # is shorter than 4.
+        references = ['let it go'.split()]
+        hypothesis = 'let go it'.split()
+        # Checks that the value the hypothesis and reference returns is 0.0
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+
+class TestBLEUvsMteval13a(unittest.TestCase):
+    def test_corpus_bleu(self):
+        ref_file = find('models/wmt15_eval/ref.ru')
+        hyp_file = find('models/wmt15_eval/google.ru')
+        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')
+
+        # Reads the BLEU scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file, 'r') as mteval_fin:
+            # The numbers are located in the last 2nd line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1])
+
+        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
+            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypothesis = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references, hypothesis, weights=(1.0 / i,) * i
+                    )
+                    # Check that the BLEU scores difference is less than 0.005 .
+                    # Note: This is an approximate comparison; as much as
+                    #       +/- 0.01 BLEU might be "statistically significant",
+                    #       the actual translation quality might not be.
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+                # With the same smoothing method used in mteval-v13a.pl
+                chencherry = SmoothingFunction()
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references,
+                        hypothesis,
+                        weights=(1.0 / i,) * i,
+                        smoothing_function=chencherry.method3,
+                    )
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+
+class TestBLEUWithBadSentence(unittest.TestCase):
+    def test_corpus_bleu_with_bad_sentence(self):
+        hyp = "Teo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR R"
+        ref = str(
+            "Their tasks include changing a pump on the faulty stokehold ."
+            "Likewise , two species that are very similar in morphology "
+            "were distinguished using genetics ."
+        )
+        references = [[ref.split()]]
+        hypotheses = [hyp.split()]
+        try:  # Check that the warning is raised since no. of 2-grams < 0.
+            with self.assertWarns(UserWarning):
+                # Verify that the BLEU output is undesired since no. of 2-grams < 0.
+                self.assertAlmostEqual(
+                    corpus_bleu(references, hypotheses), 0.0, places=4
+                )
+        except AttributeError:  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+            self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_gdfa.py b/nlp_resource_data/nltk/test/unit/translate/test_gdfa.py

new file mode 100644 (file)

index 0000000..58db482
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_gdfa.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+"""
+Tests GDFA alignments
+"""
+
+import functools
+import io
+import unittest
+
+from nltk.translate.gdfa import grow_diag_final_and
+
+
+class TestGDFA(unittest.TestCase):
+    def test_from_eflomal_outputs(self):
+        """
+        Testing GDFA with first 10 eflomal outputs from issue #1829
+        https://github.com/nltk/nltk/issues/1829
+        """
+        # Input.
+        forwards = [
+            '0-0 1-2',
+            '0-0 1-1',
+            '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14',
+            '0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10',
+            '0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31',
+            '0-0 1-1 0-2 2-3',
+            '0-0 2-2 4-4',
+            '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20',
+            '3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14',
+            '1-0',
+        ]
+        backwards = [
+            '0-0 1-2',
+            '0-0 1-1',
+            '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13',
+            '0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8',
+            '0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31',
+            '0-0 1-1 2-3',
+            '0-0 1-1 2-3 4-4',
+            '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18',
+            '0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10',
+            '1-0',
+        ]
+        source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18]
+        target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16]
+        # Expected Output.
+        expected = [
+            [(0, 0), (1, 2)],
+            [(0, 0), (1, 1)],
+            [
+                (0, 0),
+                (2, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (8, 7),
+                (10, 10),
+                (11, 12),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (1, 2),
+                (2, 3),
+                (3, 4),
+                (4, 5),
+                (4, 6),
+                (5, 7),
+                (6, 8),
+                (7, 5),
+                (8, 7),
+                (8, 9),
+                (9, 8),
+                (9, 10),
+            ],
+            [
+                (0, 0),
+                (1, 8),
+                (2, 9),
+                (3, 10),
+                (4, 11),
+                (5, 8),
+                (6, 9),
+                (6, 11),
+                (7, 10),
+                (8, 11),
+                (31, 31),
+            ],
+            [(0, 0), (0, 2), (1, 1), (2, 3)],
+            [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)],
+            [
+                (0, 0),
+                (1, 1),
+                (2, 3),
+                (3, 4),
+                (5, 5),
+                (7, 6),
+                (8, 7),
+                (9, 8),
+                (10, 9),
+                (11, 10),
+                (12, 11),
+                (13, 12),
+                (14, 13),
+                (15, 14),
+                (16, 16),
+                (17, 17),
+                (18, 18),
+                (19, 19),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 0),
+                (3, 2),
+                (4, 1),
+                (5, 3),
+                (6, 2),
+                (6, 4),
+                (7, 5),
+                (8, 6),
+                (9, 7),
+                (9, 12),
+                (10, 8),
+                (10, 13),
+                (11, 9),
+                (12, 8),
+                (12, 14),
+                (13, 9),
+                (14, 8),
+                (15, 9),
+                (16, 10),
+            ],
+            [(1, 0)],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (9, 10),
+                (10, 12),
+                (11, 13),
+                (12, 14),
+                (13, 15),
+            ],
+        ]
+
+        # Iterate through all 10 examples and check for expected outputs.
+        for fw, bw, src_len, trg_len, expect in zip(
+            forwards, backwards, source_lens, target_lens, expected
+        ):
+            self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm1.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm1.py

new file mode 100644 (file)

index 0000000..ae8c941
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_ibm1.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 1 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel1
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel1(unittest.TestCase):
+    def test_set_uniform_translation_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (target vocab size + 1)
+        self.assertEqual(model1.translation_table['ham']['eier'], 1.0 / 3)
+        self.assertEqual(model1.translation_table['eggs'][None], 1.0 / 3)
+
+    def test_set_uniform_translation_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine target words that are not in the training data domain
+        self.assertEqual(model1.translation_table['parrot']['eier'], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        model1 = IBMModel1(corpus, 0)
+        model1.translation_table = translation_table
+
+        # act
+        probability = model1.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        expected_probability = lexical_translation
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm2.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm2.py

new file mode 100644 (file)

index 0000000..1d0579b
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_ibm2.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 2 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel2
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel2(unittest.TestCase):
+    def test_set_uniform_alignment_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model2 = IBMModel2(corpus, 0)
+
+        # act
+        model2.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (length of source sentence + 1)
+        self.assertEqual(model2.alignment_table[0][1][3][2], 1.0 / 4)
+        self.assertEqual(model2.alignment_table[2][4][2][4], 1.0 / 3)
+
+    def test_set_uniform_alignment_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model2 = IBMModel2(corpus, 0)
+
+        # act
+        model2.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine i and j values that are not in the training data domain
+        self.assertEqual(model2.alignment_table[99][1][3][2], IBMModel.MIN_PROB)
+        self.assertEqual(model2.alignment_table[2][99][2][4], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        alignment_table[0][3][5][6] = 0.97  # None -> to
+        alignment_table[1][1][5][6] = 0.97  # ich -> i
+        alignment_table[2][4][5][6] = 0.97  # esse -> eat
+        alignment_table[4][2][5][6] = 0.97  # gern -> love
+        alignment_table[5][5][5][6] = 0.96  # räucherschinken -> smoked
+        alignment_table[5][6][5][6] = 0.96  # räucherschinken -> ham
+
+        model2 = IBMModel2(corpus, 0)
+        model2.translation_table = translation_table
+        model2.alignment_table = alignment_table
+
+        # act
+        probability = model2.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        alignment = 0.97 * 0.97 * 0.97 * 0.97 * 0.96 * 0.96
+        expected_probability = lexical_translation * alignment
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm3.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm3.py

new file mode 100644 (file)

index 0000000..7c42404
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_ibm3.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 3 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel3
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel3(unittest.TestCase):
+    def test_set_uniform_distortion_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model3 = IBMModel3(corpus, 0)
+
+        # act
+        model3.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / length of target sentence
+        self.assertEqual(model3.distortion_table[1][0][3][2], 1.0 / 2)
+        self.assertEqual(model3.distortion_table[4][2][2][4], 1.0 / 4)
+
+    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model3 = IBMModel3(corpus, 0)
+
+        # act
+        model3.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine i and j values that are not in the training data domain
+        self.assertEqual(model3.distortion_table[0][0][3][2], IBMModel.MIN_PROB)
+        self.assertEqual(model3.distortion_table[9][2][2][4], IBMModel.MIN_PROB)
+        self.assertEqual(model3.distortion_table[2][9][2][4], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        distortion_table[1][1][5][6] = 0.97  # i -> ich
+        distortion_table[2][4][5][6] = 0.97  # love -> gern
+        distortion_table[3][0][5][6] = 0.97  # to -> NULL
+        distortion_table[4][2][5][6] = 0.97  # eat -> esse
+        distortion_table[5][5][5][6] = 0.97  # smoked -> räucherschinken
+        distortion_table[6][5][5][6] = 0.97  # ham -> räucherschinken
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]['ich'] = 0.99
+        fertility_table[1]['esse'] = 0.99
+        fertility_table[0]['ja'] = 0.99
+        fertility_table[1]['gern'] = 0.99
+        fertility_table[2]['räucherschinken'] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            'p1': 0.167,
+            'translation_table': translation_table,
+            'distortion_table': distortion_table,
+            'fertility_table': fertility_table,
+            'alignment_table': None,
+        }
+
+        model3 = IBMModel3(corpus, 0, probabilities)
+
+        # act
+        probability = model3.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        distortion = 0.97 * 0.97 * 0.97 * 0.97 * 0.97 * 0.97
+        expected_probability = (
+            null_generation * fertility * lexical_translation * distortion
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm4.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm4.py

new file mode 100644 (file)

index 0000000..c6e5398
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_ibm4.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 4 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel4
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel4(unittest.TestCase):
+    def test_set_uniform_distortion_probabilities_of_max_displacements(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model4.set_uniform_probabilities(corpus)
+
+        # assert
+        # number of displacement values =
+        #     2 *(number of words in longest target sentence - 1)
+        expected_prob = 1.0 / (2 * (4 - 1))
+
+        # examine the boundary values for (displacement, src_class, trg_class)
+        self.assertEqual(model4.head_distortion_table[3][0][0], expected_prob)
+        self.assertEqual(model4.head_distortion_table[-3][1][2], expected_prob)
+        self.assertEqual(model4.non_head_distortion_table[3][0], expected_prob)
+        self.assertEqual(model4.non_head_distortion_table[-3][2], expected_prob)
+
+    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model4.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine displacement values that are not in the training data domain
+        self.assertEqual(model4.head_distortion_table[4][0][0], IBMModel.MIN_PROB)
+        self.assertEqual(model4.head_distortion_table[100][1][2], IBMModel.MIN_PROB)
+        self.assertEqual(model4.non_head_distortion_table[4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model4.non_head_distortion_table[100][2], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4}
+        trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4}
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        head_distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        head_distortion_table[1][None][3] = 0.97  # None, i
+        head_distortion_table[3][2][4] = 0.97  # ich, eat
+        head_distortion_table[-2][3][4] = 0.97  # esse, love
+        head_distortion_table[3][4][1] = 0.97  # gern, smoked
+
+        non_head_distortion_table = defaultdict(lambda: defaultdict(float))
+        non_head_distortion_table[1][0] = 0.96  # ham
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]['ich'] = 0.99
+        fertility_table[1]['esse'] = 0.99
+        fertility_table[0]['ja'] = 0.99
+        fertility_table[1]['gern'] = 0.99
+        fertility_table[2]['räucherschinken'] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            'p1': 0.167,
+            'translation_table': translation_table,
+            'head_distortion_table': head_distortion_table,
+            'non_head_distortion_table': non_head_distortion_table,
+            'fertility_table': fertility_table,
+            'alignment_table': None,
+        }
+
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes, probabilities)
+
+        # act
+        probability = model4.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        distortion = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
+        expected_probability = (
+            null_generation * fertility * lexical_translation * distortion
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm5.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm5.py

new file mode 100644 (file)

index 0000000..a3eecb3
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_ibm5.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 5 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel4
+from nltk.translate import IBMModel5
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel5(unittest.TestCase):
+    def test_set_uniform_vacancy_probabilities_of_max_displacements(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model5.set_uniform_probabilities(corpus)
+
+        # assert
+        # number of vacancy difference values =
+        #     2 * number of words in longest target sentence
+        expected_prob = 1.0 / (2 * 4)
+
+        # examine the boundary values for (dv, max_v, trg_class)
+        self.assertEqual(model5.head_vacancy_table[4][4][0], expected_prob)
+        self.assertEqual(model5.head_vacancy_table[-3][1][2], expected_prob)
+        self.assertEqual(model5.non_head_vacancy_table[4][4][0], expected_prob)
+        self.assertEqual(model5.non_head_vacancy_table[-3][1][2], expected_prob)
+
+    def test_set_uniform_vacancy_probabilities_of_non_domain_values(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model5.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine dv and max_v values that are not in the training data domain
+        self.assertEqual(model5.head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
+        self.assertEqual(model5.head_vacancy_table[4][0][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.non_head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.non_head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4}
+        trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4}
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        head_vacancy_table[1 - 0][6][3] = 0.97  # ich -> i
+        head_vacancy_table[3 - 0][5][4] = 0.97  # esse -> eat
+        head_vacancy_table[1 - 2][4][4] = 0.97  # gern -> love
+        head_vacancy_table[2 - 0][2][1] = 0.97  # räucherschinken -> smoked
+
+        non_head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        non_head_vacancy_table[1 - 0][1][0] = 0.96  # räucherschinken -> ham
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]['ich'] = 0.99
+        fertility_table[1]['esse'] = 0.99
+        fertility_table[0]['ja'] = 0.99
+        fertility_table[1]['gern'] = 0.99
+        fertility_table[2]['räucherschinken'] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            'p1': 0.167,
+            'translation_table': translation_table,
+            'fertility_table': fertility_table,
+            'head_vacancy_table': head_vacancy_table,
+            'non_head_vacancy_table': non_head_vacancy_table,
+            'head_distortion_table': None,
+            'non_head_distortion_table': None,
+            'alignment_table': None,
+        }
+
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes, probabilities)
+
+        # act
+        probability = model5.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        vacancy = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
+        expected_probability = (
+            null_generation * fertility * lexical_translation * vacancy
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
+
+    def test_prune(self):
+        # arrange
+        alignment_infos = [
+            AlignmentInfo((1, 1), None, None, None),
+            AlignmentInfo((1, 2), None, None, None),
+            AlignmentInfo((2, 1), None, None, None),
+            AlignmentInfo((2, 2), None, None, None),
+            AlignmentInfo((0, 0), None, None, None),
+        ]
+        min_factor = IBMModel5.MIN_SCORE_FACTOR
+        best_score = 0.9
+        scores = {
+            (1, 1): min(min_factor * 1.5, 1) * best_score,  # above threshold
+            (1, 2): best_score,
+            (2, 1): min_factor * best_score,  # at threshold
+            (2, 2): min_factor * best_score * 0.5,  # low score
+            (0, 0): min(min_factor * 1.1, 1) * 1.2,  # above threshold
+        }
+        corpus = [AlignedSent(['a'], ['b'])]
+        original_prob_function = IBMModel4.model4_prob_t_a_given_s
+        # mock static method
+        IBMModel4.model4_prob_t_a_given_s = staticmethod(
+            lambda a, model: scores[a.alignment]
+        )
+        model5 = IBMModel5(corpus, 0, None, None)
+
+        # act
+        pruned_alignments = model5.prune(alignment_infos)
+
+        # assert
+        self.assertEqual(len(pruned_alignments), 3)
+
+        # restore static method
+        IBMModel4.model4_prob_t_a_given_s = original_prob_function
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py

new file mode 100644 (file)

index 0000000..31383bc
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py
@@ -0,0 +1,279 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for common methods of IBM translation models
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel(unittest.TestCase):
+    __TEST_SRC_SENTENCE = ["j'", 'aime', 'bien', 'jambon']
+    __TEST_TRG_SENTENCE = ['i', 'love', 'ham']
+
+    def test_vocabularies_are_initialized(self):
+        parallel_corpora = [
+            AlignedSent(['one', 'two', 'three', 'four'], ['un', 'deux', 'trois']),
+            AlignedSent(['five', 'one', 'six'], ['quatre', 'cinq', 'six']),
+            AlignedSent([], ['sept']),
+        ]
+
+        ibm_model = IBMModel(parallel_corpora)
+        self.assertEqual(len(ibm_model.src_vocab), 8)
+        self.assertEqual(len(ibm_model.trg_vocab), 6)
+
+    def test_vocabularies_are_initialized_even_with_empty_corpora(self):
+        parallel_corpora = []
+
+        ibm_model = IBMModel(parallel_corpora)
+        self.assertEqual(len(ibm_model.src_vocab), 1)  # addition of NULL token
+        self.assertEqual(len(ibm_model.trg_vocab), 0)
+
+    def test_best_model2_alignment(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        # None and 'bien' have zero fertility
+        translation_table = {
+            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
+            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
+            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 2, 4))  # 0th element unused
+        self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
+
+    def test_best_model2_alignment_does_not_change_pegged_alignment(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        translation_table = {
+            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
+            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
+            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act: force 'love' to be pegged to 'jambon'
+        a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4)
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 4, 4))
+        self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])
+
+    def test_best_model2_alignment_handles_fertile_words(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            ['i', 'really', ',', 'really', 'love', 'ham'],
+            TestIBMModel.__TEST_SRC_SENTENCE,
+        )
+        # 'bien' produces 2 target words: 'really' and another 'really'
+        translation_table = {
+            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
+            'really': {"j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09},
+            ',': {"j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7},
+            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
+            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4))
+        self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
+
+    def test_best_model2_alignment_handles_empty_src_sentence(self):
+        # arrange
+        sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, [])
+        ibm_model = IBMModel([])
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (0, 0, 0))
+        self.assertEqual(a_info.cepts, [[1, 2, 3]])
+
+    def test_best_model2_alignment_handles_empty_trg_sentence(self):
+        # arrange
+        sentence_pair = AlignedSent([], TestIBMModel.__TEST_SRC_SENTENCE)
+        ibm_model = IBMModel([])
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], ())
+        self.assertEqual(a_info.cepts, [[], [], [], [], []])
+
+    def test_neighboring_finds_neighbor_alignments(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, 'des', 'œufs', 'verts'),
+            ('UNUSED', 'green', 'eggs'),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act
+        neighbors = ibm_model.neighboring(a_info)
+
+        # assert
+        neighbor_alignments = set()
+        for neighbor in neighbors:
+            neighbor_alignments.add(neighbor.alignment)
+        expected_alignments = set(
+            [
+                # moves
+                (0, 0, 2),
+                (0, 1, 2),
+                (0, 2, 2),
+                (0, 3, 0),
+                (0, 3, 1),
+                (0, 3, 3),
+                # swaps
+                (0, 2, 3),
+                # original alignment
+                (0, 3, 2),
+            ]
+        )
+        self.assertEqual(neighbor_alignments, expected_alignments)
+
+    def test_neighboring_sets_neighbor_alignment_info(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, 'des', 'œufs', 'verts'),
+            ('UNUSED', 'green', 'eggs'),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act
+        neighbors = ibm_model.neighboring(a_info)
+
+        # assert: select a few particular alignments
+        for neighbor in neighbors:
+            if neighbor.alignment == (0, 2, 2):
+                moved_alignment = neighbor
+            elif neighbor.alignment == (0, 3, 2):
+                swapped_alignment = neighbor
+
+        self.assertEqual(moved_alignment.cepts, [[], [], [1, 2], []])
+        self.assertEqual(swapped_alignment.cepts, [[], [], [2], [1]])
+
+    def test_neighboring_returns_neighbors_with_pegged_alignment(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, 'des', 'œufs', 'verts'),
+            ('UNUSED', 'green', 'eggs'),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act: peg 'eggs' to align with 'œufs'
+        neighbors = ibm_model.neighboring(a_info, 2)
+
+        # assert
+        neighbor_alignments = set()
+        for neighbor in neighbors:
+            neighbor_alignments.add(neighbor.alignment)
+        expected_alignments = set(
+            [
+                # moves
+                (0, 0, 2),
+                (0, 1, 2),
+                (0, 2, 2),
+                # no swaps
+                # original alignment
+                (0, 3, 2),
+            ]
+        )
+        self.assertEqual(neighbor_alignments, expected_alignments)
+
+    def test_hillclimb(self):
+        # arrange
+        initial_alignment = AlignmentInfo((0, 3, 2), None, None, None)
+
+        def neighboring_mock(a, j):
+            if a.alignment == (0, 3, 2):
+                return set(
+                    [
+                        AlignmentInfo((0, 2, 2), None, None, None),
+                        AlignmentInfo((0, 1, 1), None, None, None),
+                    ]
+                )
+            elif a.alignment == (0, 2, 2):
+                return set(
+                    [
+                        AlignmentInfo((0, 3, 3), None, None, None),
+                        AlignmentInfo((0, 4, 4), None, None, None),
+                    ]
+                )
+            return set()
+
+        def prob_t_a_given_s_mock(a):
+            prob_values = {
+                (0, 3, 2): 0.5,
+                (0, 2, 2): 0.6,
+                (0, 1, 1): 0.4,
+                (0, 3, 3): 0.6,
+                (0, 4, 4): 0.7,
+            }
+            return prob_values.get(a.alignment, 0.01)
+
+        ibm_model = IBMModel([])
+        ibm_model.neighboring = neighboring_mock
+        ibm_model.prob_t_a_given_s = prob_t_a_given_s_mock
+
+        # act
+        best_alignment = ibm_model.hillclimb(initial_alignment)
+
+        # assert: hill climbing goes from (0, 3, 2) -> (0, 2, 2) -> (0, 4, 4)
+        self.assertEqual(best_alignment.alignment, (0, 4, 4))
+
+    def test_sample(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        ibm_model = IBMModel([])
+        ibm_model.prob_t_a_given_s = lambda x: 0.001
+
+        # act
+        samples, best_alignment = ibm_model.sample(sentence_pair)
+
+        # assert
+        self.assertEqual(len(samples), 61)
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_nist.py b/nlp_resource_data/nltk/test/unit/translate/test_nist.py

new file mode 100644 (file)

index 0000000..84e6342
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_nist.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for NIST translation evaluation metric
+"""
+
+import io
+import unittest
+
+from nltk.data import find
+from nltk.translate.nist_score import sentence_nist, corpus_nist
+
+
+class TestNIST(unittest.TestCase):
+    def test_sentence_nist(self):
+        ref_file = find('models/wmt15_eval/ref.ru')
+        hyp_file = find('models/wmt15_eval/google.ru')
+        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')
+
+        # Reads the NIST scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file, 'r') as mteval_fin:
+            # The numbers are located in the last 4th line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_nist_scores = map(float, mteval_fin.readlines()[-4].split()[1:-1])
+
+        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
+            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypotheses = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_nist in zip(range(1, 10), mteval_nist_scores):
+                    nltk_nist = corpus_nist(references, hypotheses, i)
+                    # Check that the NIST scores difference is less than 0.5
+                    assert abs(mteval_nist - nltk_nist) < 0.05
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py b/nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py

new file mode 100644 (file)

index 0000000..fd286f6
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py
@@ -0,0 +1,295 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Stack decoder
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Tests for stack decoder
+"""
+
+import unittest
+from collections import defaultdict
+from math import log
+from nltk.translate import PhraseTable
+from nltk.translate import StackDecoder
+from nltk.translate.stack_decoder import _Hypothesis, _Stack
+
+
+class TestStackDecoder(unittest.TestCase):
+    def test_find_all_src_phrases(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        stack_decoder = StackDecoder(phrase_table, None)
+        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
+
+        # act
+        src_phrase_spans = stack_decoder.find_all_src_phrases(sentence)
+
+        # assert
+        self.assertEqual(src_phrase_spans[0], [2])  # 'my hovercraft'
+        self.assertEqual(src_phrase_spans[1], [2])  # 'hovercraft'
+        self.assertEqual(src_phrase_spans[2], [3])  # 'is'
+        self.assertEqual(src_phrase_spans[3], [5, 6])  # 'full of', 'full of eels'
+        self.assertFalse(src_phrase_spans[4])  # no entry starting with 'of'
+        self.assertEqual(src_phrase_spans[5], [6])  # 'eels'
+
+    def test_distortion_score(self):
+        # arrange
+        stack_decoder = StackDecoder(None, None)
+        stack_decoder.distortion_factor = 0.5
+        hypothesis = _Hypothesis()
+        hypothesis.src_phrase_span = (3, 5)
+
+        # act
+        score = stack_decoder.distortion_score(hypothesis, (8, 10))
+
+        # assert
+        expected_score = log(stack_decoder.distortion_factor) * (8 - 5)
+        self.assertEqual(score, expected_score)
+
+    def test_distortion_score_of_first_expansion(self):
+        # arrange
+        stack_decoder = StackDecoder(None, None)
+        stack_decoder.distortion_factor = 0.5
+        hypothesis = _Hypothesis()
+
+        # act
+        score = stack_decoder.distortion_score(hypothesis, (8, 10))
+
+        # assert
+        # expansion from empty hypothesis always has zero distortion cost
+        self.assertEqual(score, 0.0)
+
+    def test_compute_future_costs(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        language_model = TestStackDecoder.create_fake_language_model()
+        stack_decoder = StackDecoder(phrase_table, language_model)
+        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
+
+        # act
+        future_scores = stack_decoder.compute_future_scores(sentence)
+
+        # assert
+        self.assertEqual(
+            future_scores[1][2],
+            (
+                phrase_table.translations_for(('hovercraft',))[0].log_prob
+                + language_model.probability(('hovercraft',))
+            ),
+        )
+        self.assertEqual(
+            future_scores[0][2],
+            (
+                phrase_table.translations_for(('my', 'hovercraft'))[0].log_prob
+                + language_model.probability(('my', 'hovercraft'))
+            ),
+        )
+
+    def test_compute_future_costs_for_phrases_not_in_phrase_table(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        language_model = TestStackDecoder.create_fake_language_model()
+        stack_decoder = StackDecoder(phrase_table, language_model)
+        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
+
+        # act
+        future_scores = stack_decoder.compute_future_scores(sentence)
+
+        # assert
+        self.assertEqual(
+            future_scores[1][3],  # 'hovercraft is' is not in phrase table
+            future_scores[1][2] + future_scores[2][3],
+        )  # backoff
+
+    def test_future_score(self):
+        # arrange: sentence with 8 words; words 2, 3, 4 already translated
+        hypothesis = _Hypothesis()
+        hypothesis.untranslated_spans = lambda _: [(0, 2), (5, 8)]  # mock
+        future_score_table = defaultdict(lambda: defaultdict(float))
+        future_score_table[0][2] = 0.4
+        future_score_table[5][8] = 0.5
+        stack_decoder = StackDecoder(None, None)
+
+        # act
+        future_score = stack_decoder.future_score(hypothesis, future_score_table, 8)
+
+        # assert
+        self.assertEqual(future_score, 0.4 + 0.5)
+
+    def test_valid_phrases(self):
+        # arrange
+        hypothesis = _Hypothesis()
+        # mock untranslated_spans method
+        hypothesis.untranslated_spans = lambda _: [(0, 2), (3, 6)]
+        all_phrases_from = [[1, 4], [2], [], [5], [5, 6, 7], [], [7]]
+
+        # act
+        phrase_spans = StackDecoder.valid_phrases(all_phrases_from, hypothesis)
+
+        # assert
+        self.assertEqual(phrase_spans, [(0, 1), (1, 2), (3, 5), (4, 5), (4, 6)])
+
+    @staticmethod
+    def create_fake_phrase_table():
+        phrase_table = PhraseTable()
+        phrase_table.add(('hovercraft',), ('',), 0.8)
+        phrase_table.add(('my', 'hovercraft'), ('', ''), 0.7)
+        phrase_table.add(('my', 'cheese'), ('', ''), 0.7)
+        phrase_table.add(('is',), ('',), 0.8)
+        phrase_table.add(('is',), ('',), 0.5)
+        phrase_table.add(('full', 'of'), ('', ''), 0.01)
+        phrase_table.add(('full', 'of', 'eels'), ('', '', ''), 0.5)
+        phrase_table.add(('full', 'of', 'spam'), ('', ''), 0.5)
+        phrase_table.add(('eels',), ('',), 0.5)
+        phrase_table.add(('spam',), ('',), 0.5)
+        return phrase_table
+
+    @staticmethod
+    def create_fake_language_model():
+        # nltk.model should be used here once it is implemented
+        language_prob = defaultdict(lambda: -999.0)
+        language_prob[('my',)] = log(0.1)
+        language_prob[('hovercraft',)] = log(0.1)
+        language_prob[('is',)] = log(0.1)
+        language_prob[('full',)] = log(0.1)
+        language_prob[('of',)] = log(0.1)
+        language_prob[('eels',)] = log(0.1)
+        language_prob[('my', 'hovercraft')] = log(0.3)
+        language_model = type(
+            '', (object,), {'probability': lambda _, phrase: language_prob[phrase]}
+        )()
+        return language_model
+
+
+class TestHypothesis(unittest.TestCase):
+    def setUp(self):
+        root = _Hypothesis()
+        child = _Hypothesis(
+            raw_score=0.5,
+            src_phrase_span=(3, 7),
+            trg_phrase=('hello', 'world'),
+            previous=root,
+        )
+        grandchild = _Hypothesis(
+            raw_score=0.4,
+            src_phrase_span=(1, 2),
+            trg_phrase=('and', 'goodbye'),
+            previous=child,
+        )
+        self.hypothesis_chain = grandchild
+
+    def test_translation_so_far(self):
+        # act
+        translation = self.hypothesis_chain.translation_so_far()
+
+        # assert
+        self.assertEqual(translation, ['hello', 'world', 'and', 'goodbye'])
+
+    def test_translation_so_far_for_empty_hypothesis(self):
+        # arrange
+        hypothesis = _Hypothesis()
+
+        # act
+        translation = hypothesis.translation_so_far()
+
+        # assert
+        self.assertEqual(translation, [])
+
+    def test_total_translated_words(self):
+        # act
+        total_translated_words = self.hypothesis_chain.total_translated_words()
+
+        # assert
+        self.assertEqual(total_translated_words, 5)
+
+    def test_translated_positions(self):
+        # act
+        translated_positions = self.hypothesis_chain.translated_positions()
+
+        # assert
+        translated_positions.sort()
+        self.assertEqual(translated_positions, [1, 3, 4, 5, 6])
+
+    def test_untranslated_spans(self):
+        # act
+        untranslated_spans = self.hypothesis_chain.untranslated_spans(10)
+
+        # assert
+        self.assertEqual(untranslated_spans, [(0, 1), (2, 3), (7, 10)])
+
+    def test_untranslated_spans_for_empty_hypothesis(self):
+        # arrange
+        hypothesis = _Hypothesis()
+
+        # act
+        untranslated_spans = hypothesis.untranslated_spans(10)
+
+        # assert
+        self.assertEqual(untranslated_spans, [(0, 10)])
+
+
+class TestStack(unittest.TestCase):
+    def test_push_bumps_off_worst_hypothesis_when_stack_is_full(self):
+        # arrange
+        stack = _Stack(3)
+        poor_hypothesis = _Hypothesis(0.01)
+
+        # act
+        stack.push(_Hypothesis(0.2))
+        stack.push(poor_hypothesis)
+        stack.push(_Hypothesis(0.1))
+        stack.push(_Hypothesis(0.3))
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+
+    def test_push_removes_hypotheses_that_fall_below_beam_threshold(self):
+        # arrange
+        stack = _Stack(3, 0.5)
+        poor_hypothesis = _Hypothesis(0.01)
+        worse_hypothesis = _Hypothesis(0.009)
+
+        # act
+        stack.push(poor_hypothesis)
+        stack.push(worse_hypothesis)
+        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+        self.assertFalse(worse_hypothesis in stack)
+
+    def test_push_does_not_add_hypothesis_that_falls_below_beam_threshold(self):
+        # arrange
+        stack = _Stack(3, 0.5)
+        poor_hypothesis = _Hypothesis(0.01)
+
+        # act
+        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
+        stack.push(poor_hypothesis)
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+
+    def test_best_returns_the_best_hypothesis(self):
+        # arrange
+        stack = _Stack(3)
+        best_hypothesis = _Hypothesis(0.99)
+
+        # act
+        stack.push(_Hypothesis(0.0))
+        stack.push(best_hypothesis)
+        stack.push(_Hypothesis(0.5))
+
+        # assert
+        self.assertEqual(stack.best(), best_hypothesis)
+
+    def test_best_returns_none_when_stack_is_empty(self):
+        # arrange
+        stack = _Stack(3)
+
+        # assert
+        self.assertEqual(stack.best(), None)
diff --git a/nlp_resource_data/nltk/test/unit/utils.py b/nlp_resource_data/nltk/test/unit/utils.py

new file mode 100644 (file)

index 0000000..8bd7346
--- /dev/null
+++ b/nlp_resource_data/nltk/test/unit/utils.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+from unittest import TestCase
+from functools import wraps
+from nose.plugins.skip import SkipTest
+from nltk.util import py26
+
+
+def skip(reason):
+    """
+    Unconditionally skip a test.
+    """
+
+    def decorator(test_item):
+        is_test_class = isinstance(test_item, type) and issubclass(test_item, TestCase)
+
+        if is_test_class and py26():
+            # Patch all test_ methods to raise SkipText exception.
+            # This is necessary for Python 2.6 because its unittest
+            # doesn't understand __unittest_skip__.
+            for meth_name in (m for m in dir(test_item) if m.startswith('test_')):
+                patched_method = skip(reason)(getattr(test_item, meth_name))
+                setattr(test_item, meth_name, patched_method)
+
+        if not is_test_class:
+
+            @wraps(test_item)
+            def skip_wrapper(*args, **kwargs):
+                raise SkipTest(reason)
+
+            skip_wrapper.__name__ = test_item.__name__
+            test_item = skip_wrapper
+
+        test_item.__unittest_skip__ = True
+        test_item.__unittest_skip_why__ = reason
+        return test_item
+
+    return decorator
+
+
+def skipIf(condition, reason):
+    """
+    Skip a test if the condition is true.
+    """
+    if condition:
+        return skip(reason)
+    return lambda obj: obj
diff --git a/nlp_resource_data/nltk/test/util.doctest b/nlp_resource_data/nltk/test/util.doctest

new file mode 100644 (file)

index 0000000..f2360ff
--- /dev/null
+++ b/nlp_resource_data/nltk/test/util.doctest
@@ -0,0 +1,47 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=================
+Utility functions
+=================
+
+    >>> from nltk.util import *
+    >>> from nltk.tree import Tree
+
+    >>> print_string("This is a long string, therefore it should break", 25)
+    This is a long string,
+    therefore it should break
+
+    >>> re_show("[a-z]+", "sdf123")
+    {sdf}123
+
+    >>> tree = Tree(5,
+    ...             [Tree(4, [Tree(2, [1, 3])]),
+    ...              Tree(8, [Tree(6, [7]), 9])])
+    >>> for x in breadth_first(tree):
+    ...     if isinstance(x, int): print(x)
+    ...     else: print(x.label())
+    5
+    4
+    8
+    2
+    6
+    9
+    1
+    3
+    7
+    >>> for x in breadth_first(tree, maxdepth=2):
+    ...     if isinstance(x, int): print(x)
+    ...     else: print(x.label())
+    5
+    4
+    8
+    2
+    6
+    9
+
+    >>> invert_dict({1: 2})
+    defaultdict(<... 'list'>, {2: 1})
+
+    >>> invert_dict({1: [3, 4, 5]})
+    defaultdict(<... 'list'>, {3: [1], 4: [1], 5: [1]})
diff --git a/nlp_resource_data/nltk/test/wordnet.doctest b/nlp_resource_data/nltk/test/wordnet.doctest

new file mode 100644 (file)

index 0000000..54c5975
--- /dev/null
+++ b/nlp_resource_data/nltk/test/wordnet.doctest
@@ -0,0 +1,604 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+=================
+WordNet Interface
+=================
+
+WordNet is just another NLTK corpus reader, and can be imported like this:
+    >>> from nltk.corpus import wordnet
+
+For more compact code, we recommend:
+
+    >>> from nltk.corpus import wordnet as wn
+
+-----
+Words
+-----
+
+Look up a word using ``synsets()``; this function has an optional ``pos`` argument
+which lets you constrain the part of speech of the word:
+
+    >>> wn.synsets('dog') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+    [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'),
+    Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
+    >>> wn.synsets('dog', pos=wn.VERB)
+    [Synset('chase.v.01')]
+
+The other parts of speech are ``NOUN``, ``ADJ`` and ``ADV``.
+A synset is identified with a 3-part name of the form: word.pos.nn:
+
+    >>> wn.synset('dog.n.01')
+    Synset('dog.n.01')
+    >>> print(wn.synset('dog.n.01').definition())
+    a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
+    >>> len(wn.synset('dog.n.01').examples())
+    1
+    >>> print(wn.synset('dog.n.01').examples()[0])
+    the dog barked all night
+    >>> wn.synset('dog.n.01').lemmas()
+    [Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
+    >>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()]
+    ['dog', 'domestic_dog', 'Canis_familiaris']
+    >>> wn.lemma('dog.n.01.dog').synset()
+    Synset('dog.n.01')
+
+The WordNet corpus reader gives access to the Open Multilingual
+WordNet, using ISO-639 language codes.
+
+    >>> sorted(wn.langs()) # doctest: +NORMALIZE_WHITESPACE
+    ['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas',
+    'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno',
+    'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm']
+    >>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')
+    [Synset('dog.n.01'), Synset('spy.n.01')]
+
+    wn.synset('spy.n.01').lemma_names('jpn') # doctest: +NORMALIZE_WHITESPACE
+    ['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005',
+    '\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005',
+    '\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1',
+    '\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6']
+
+    >>> wn.synset('dog.n.01').lemma_names('ita')
+    ['cane', 'Canis_familiaris']
+    >>> wn.lemmas('cane', lang='ita') # doctest: +NORMALIZE_WHITESPACE
+    [Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'),
+    Lemma('incompetent.n.01.cane')]
+    >>> sorted(wn.synset('dog.n.01').lemmas('dan')) # doctest: +NORMALIZE_WHITESPACE
+    [Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'),
+    Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')]
+
+    sorted(wn.synset('dog.n.01').lemmas('por'))
+       [Lemma('dog.n.01.cachorra'), Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.cadela'), Lemma('dog.n.01.c\xe3o')]
+
+    >>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por')
+    >>> dog_lemma
+    Lemma('dog.n.01.c\xe3o')
+    >>> dog_lemma.lang()
+    'por'
+    >>> len(list(wordnet.all_lemma_names(pos='n', lang='jpn')))
+    64797
+
+-------
+Synsets
+-------
+
+`Synset`: a set of synonyms that share a common meaning.
+
+    >>> dog = wn.synset('dog.n.01')
+    >>> dog.hypernyms()
+    [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
+    >>> dog.hyponyms()  # doctest: +ELLIPSIS
+    [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...]
+    >>> dog.member_holonyms()
+    [Synset('canis.n.01'), Synset('pack.n.06')]
+    >>> dog.root_hypernyms()
+    [Synset('entity.n.01')]
+    >>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))
+    [Synset('carnivore.n.01')]
+
+Each synset contains one or more lemmas, which represent a specific
+sense of a specific word.
+
+Note that some relations are defined by WordNet only over Lemmas:
+
+    >>> good = wn.synset('good.a.01')
+    >>> good.antonyms()
+    Traceback (most recent call last):
+      File "<stdin>", line 1, in <module>
+    AttributeError: 'Synset' object has no attribute 'antonyms'
+    >>> good.lemmas()[0].antonyms()
+    [Lemma('bad.a.01.bad')]
+
+The relations that are currently defined in this way are `antonyms`,
+`derivationally_related_forms` and `pertainyms`.
+
+If you know the byte offset used to identify a synset in the original
+Princeton WordNet data file, you can use that to instantiate the synset
+in NLTK:
+
+    >>> wn.synset_from_pos_and_offset('n', 4543158)
+    Synset('wagon.n.01')
+
+------
+Lemmas
+------
+
+    >>> eat = wn.lemma('eat.v.03.eat')
+    >>> eat
+    Lemma('feed.v.06.eat')
+    >>> print(eat.key())
+    eat%2:34:02::
+    >>> eat.count()
+    4
+    >>> wn.lemma_from_key(eat.key())
+    Lemma('feed.v.06.eat')
+    >>> wn.lemma_from_key(eat.key()).synset()
+    Synset('feed.v.06')
+    >>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00')
+    Lemma('backward.s.03.feebleminded')
+    >>> for lemma in wn.synset('eat.v.03').lemmas():
+    ...     print(lemma, lemma.count())
+    ...
+    Lemma('feed.v.06.feed') 3
+    Lemma('feed.v.06.eat') 4
+    >>> for lemma in wn.lemmas('eat', 'v'):
+    ...     print(lemma, lemma.count())
+    ...
+    Lemma('eat.v.01.eat') 61
+    Lemma('eat.v.02.eat') 13
+    Lemma('feed.v.06.eat') 4
+    Lemma('eat.v.04.eat') 0
+    Lemma('consume.v.05.eat') 0
+    Lemma('corrode.v.01.eat') 0
+    >>> wn.lemma('jump.v.11.jump')
+    Lemma('jump.v.11.jump')
+
+Lemmas can also have relations between them:
+
+    >>> vocal = wn.lemma('vocal.a.01.vocal')
+    >>> vocal.derivationally_related_forms()
+    [Lemma('vocalize.v.02.vocalize')]
+    >>> vocal.pertainyms()
+    [Lemma('voice.n.02.voice')]
+    >>> vocal.antonyms()
+    [Lemma('instrumental.a.01.instrumental')]
+
+The three relations above exist only on lemmas, not on synsets.
+
+-----------
+Verb Frames
+-----------
+
+    >>> wn.synset('think.v.01').frame_ids()
+    [5, 9]
+    >>> for lemma in wn.synset('think.v.01').lemmas():
+    ...     print(lemma, lemma.frame_ids())
+    ...     print(" | ".join(lemma.frame_strings()))
+    ...
+    Lemma('think.v.01.think') [5, 9]
+    Something think something Adjective/Noun | Somebody think somebody
+    Lemma('think.v.01.believe') [5, 9]
+    Something believe something Adjective/Noun | Somebody believe somebody
+    Lemma('think.v.01.consider') [5, 9]
+    Something consider something Adjective/Noun | Somebody consider somebody
+    Lemma('think.v.01.conceive') [5, 9]
+    Something conceive something Adjective/Noun | Somebody conceive somebody
+    >>> wn.synset('stretch.v.02').frame_ids()
+    [8]
+    >>> for lemma in wn.synset('stretch.v.02').lemmas():
+    ...     print(lemma, lemma.frame_ids())
+    ...     print(" | ".join(lemma.frame_strings()))
+    ...
+    Lemma('stretch.v.02.stretch') [8, 2]
+    Somebody stretch something | Somebody stretch
+    Lemma('stretch.v.02.extend') [8]
+    Somebody extend something
+
+
+----------
+Similarity
+----------
+
+    >>> dog = wn.synset('dog.n.01')
+    >>> cat = wn.synset('cat.n.01')
+
+    >>> hit = wn.synset('hit.v.01')
+    >>> slap = wn.synset('slap.v.01')
+
+
+``synset1.path_similarity(synset2):``
+Return a score denoting how similar two word senses are, based on the
+shortest path that connects the senses in the is-a (hypernym/hypnoym)
+taxonomy. The score is in the range 0 to 1. By default, there is now
+a fake root node added to verbs so for cases where previously a path
+could not be found---and None was returned---it should return a value.
+The old behavior can be achieved by setting simulate_root to be False.
+A score of 1 represents identity i.e. comparing a sense with itself
+will return 1.
+
+    >>> dog.path_similarity(cat)  # doctest: +ELLIPSIS
+    0.2...
+
+    >>> hit.path_similarity(slap)  # doctest: +ELLIPSIS
+    0.142...
+
+    >>> wn.path_similarity(hit, slap)  # doctest: +ELLIPSIS
+    0.142...
+
+    >>> print(hit.path_similarity(slap, simulate_root=False))
+    None
+
+    >>> print(wn.path_similarity(hit, slap, simulate_root=False))
+    None
+
+``synset1.lch_similarity(synset2):``
+Leacock-Chodorow Similarity:
+Return a score denoting how similar two word senses are, based on the
+shortest path that connects the senses (as above) and the maximum depth
+of the taxonomy in which the senses occur. The relationship is given
+as -log(p/2d) where p is the shortest path length and d the taxonomy
+depth.
+
+    >>> dog.lch_similarity(cat)  # doctest: +ELLIPSIS
+    2.028...
+
+    >>> hit.lch_similarity(slap)  # doctest: +ELLIPSIS
+    1.312...
+
+    >>> wn.lch_similarity(hit, slap)  # doctest: +ELLIPSIS
+    1.312...
+
+    >>> print(hit.lch_similarity(slap, simulate_root=False))
+    None
+
+    >>> print(wn.lch_similarity(hit, slap, simulate_root=False))
+    None
+
+``synset1.wup_similarity(synset2):``
+Wu-Palmer Similarity:
+Return a score denoting how similar two word senses are, based on the
+depth of the two senses in the taxonomy and that of their Least Common
+Subsumer (most specific ancestor node). Note that at this time the
+scores given do _not_ always agree with those given by Pedersen's Perl
+implementation of Wordnet Similarity.
+
+The LCS does not necessarily feature in the shortest path connecting the
+two senses, as it is by definition the common ancestor deepest in the
+taxonomy, not closest to the two senses. Typically, however, it will so
+feature. Where multiple candidates for the LCS exist, that whose
+shortest path to the root node is the longest will be selected. Where
+the LCS has multiple paths to the root, the longer path is used for
+the purposes of the calculation.
+
+    >>> dog.wup_similarity(cat)  # doctest: +ELLIPSIS
+    0.857...
+
+    >>> hit.wup_similarity(slap)
+    0.25
+
+    >>> wn.wup_similarity(hit, slap)
+    0.25
+
+    >>> print(hit.wup_similarity(slap, simulate_root=False))
+    None
+
+    >>> print(wn.wup_similarity(hit, slap, simulate_root=False))
+    None
+
+``wordnet_ic``
+Information Content:
+Load an information content file from the wordnet_ic corpus.
+
+    >>> from nltk.corpus import wordnet_ic
+    >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
+    >>> semcor_ic = wordnet_ic.ic('ic-semcor.dat')
+
+Or you can create an information content dictionary from a corpus (or
+anything that has a words() method).
+
+   >>> from nltk.corpus import genesis
+   >>> genesis_ic = wn.ic(genesis, False, 0.0)
+
+``synset1.res_similarity(synset2, ic):``
+Resnik Similarity:
+Return a score denoting how similar two word senses are, based on the
+Information Content (IC) of the Least Common Subsumer (most specific
+ancestor node).  Note that for any similarity measure that uses
+information content, the result is dependent on the corpus used to
+generate the information content and the specifics of how the
+information content was created.
+
+    >>> dog.res_similarity(cat, brown_ic)  # doctest: +ELLIPSIS
+    7.911...
+    >>> dog.res_similarity(cat, genesis_ic)  # doctest: +ELLIPSIS
+    7.204...
+
+``synset1.jcn_similarity(synset2, ic):``
+Jiang-Conrath Similarity
+Return a score denoting how similar two word senses are, based on the
+Information Content (IC) of the Least Common Subsumer (most specific
+ancestor node) and that of the two input Synsets. The relationship is
+given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).
+
+    >>> dog.jcn_similarity(cat, brown_ic)  # doctest: +ELLIPSIS
+    0.449...
+    >>> dog.jcn_similarity(cat, genesis_ic)  # doctest: +ELLIPSIS
+    0.285...
+
+``synset1.lin_similarity(synset2, ic):``
+Lin Similarity:
+Return a score denoting how similar two word senses are, based on the
+Information Content (IC) of the Least Common Subsumer (most specific
+ancestor node) and that of the two input Synsets. The relationship is
+given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).
+
+    >>> dog.lin_similarity(cat, semcor_ic)  # doctest: +ELLIPSIS
+    0.886...
+
+
+---------------------
+Access to all Synsets
+---------------------
+
+Iterate over all the noun synsets:
+
+    >>> for synset in list(wn.all_synsets('n'))[:10]:
+    ...     print(synset)
+    ...
+    Synset('entity.n.01')
+    Synset('physical_entity.n.01')
+    Synset('abstraction.n.06')
+    Synset('thing.n.12')
+    Synset('object.n.01')
+    Synset('whole.n.02')
+    Synset('congener.n.03')
+    Synset('living_thing.n.01')
+    Synset('organism.n.01')
+    Synset('benthos.n.02')
+
+Get all synsets for this word, possibly restricted by POS:
+
+    >>> wn.synsets('dog') # doctest: +ELLIPSIS
+    [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...]
+    >>> wn.synsets('dog', pos='v')
+    [Synset('chase.v.01')]
+
+Walk through the noun synsets looking at their hypernyms:
+
+    >>> from itertools import islice
+    >>> for synset in islice(wn.all_synsets('n'), 5):
+    ...     print(synset, synset.hypernyms())
+    ...
+    Synset('entity.n.01') []
+    Synset('physical_entity.n.01') [Synset('entity.n.01')]
+    Synset('abstraction.n.06') [Synset('entity.n.01')]
+    Synset('thing.n.12') [Synset('physical_entity.n.01')]
+    Synset('object.n.01') [Synset('physical_entity.n.01')]
+
+
+------
+Morphy
+------
+
+Look up forms not in WordNet, with the help of Morphy:
+
+    >>> wn.morphy('denied', wn.NOUN)
+    >>> print(wn.morphy('denied', wn.VERB))
+    deny
+    >>> wn.synsets('denied', wn.NOUN)
+    []
+    >>> wn.synsets('denied', wn.VERB) # doctest: +NORMALIZE_WHITESPACE
+    [Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'),
+    Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')]
+
+Morphy uses a combination of inflectional ending rules and exception
+lists to handle a variety of different possibilities:
+
+    >>> print(wn.morphy('dogs'))
+    dog
+    >>> print(wn.morphy('churches'))
+    church
+    >>> print(wn.morphy('aardwolves'))
+    aardwolf
+    >>> print(wn.morphy('abaci'))
+    abacus
+    >>> print(wn.morphy('book', wn.NOUN))
+    book
+    >>> wn.morphy('hardrock', wn.ADV)
+    >>> wn.morphy('book', wn.ADJ)
+    >>> wn.morphy('his', wn.NOUN)
+    >>>
+
+---------------
+Synset Closures
+---------------
+
+Compute transitive closures of synsets
+
+    >>> dog = wn.synset('dog.n.01')
+    >>> hypo = lambda s: s.hyponyms()
+    >>> hyper = lambda s: s.hypernyms()
+    >>> list(dog.closure(hypo, depth=1)) == dog.hyponyms()
+    True
+    >>> list(dog.closure(hyper, depth=1)) == dog.hypernyms()
+    True
+    >>> list(dog.closure(hypo)) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+    [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'),
+     Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'),
+     Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'),
+     Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'),
+     Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...]
+    >>> list(dog.closure(hyper)) # doctest: +NORMALIZE_WHITESPACE
+    [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'),
+    Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'),
+    Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),
+    Synset('physical_entity.n.01'), Synset('entity.n.01')]
+
+
+----------------
+Regression Tests
+----------------
+
+Bug 85: morphy returns the base form of a word, if it's input is given
+as a base form for a POS for which that word is not defined:
+
+    >>> wn.synsets('book', wn.NOUN)
+    [Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')]
+    >>> wn.synsets('book', wn.ADJ)
+    []
+    >>> wn.morphy('book', wn.NOUN)
+    'book'
+    >>> wn.morphy('book', wn.ADJ)
+
+Bug 160: wup_similarity breaks when the two synsets have no common hypernym
+
+    >>> t = wn.synsets('picasso')[0]
+    >>> m = wn.synsets('male')[1]
+    >>> t.wup_similarity(m)  # doctest: +ELLIPSIS
+    0.631...
+
+    >>> t = wn.synsets('titan')[1]
+    >>> s = wn.synsets('say', wn.VERB)[0]
+    >>> print(t.wup_similarity(s))
+    None
+
+Bug 21: "instance of" not included in LCS (very similar to bug 160)
+
+    >>> a = wn.synsets("writings")[0]
+    >>> b = wn.synsets("scripture")[0]
+    >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
+    >>> a.jcn_similarity(b, brown_ic)  # doctest: +ELLIPSIS
+    0.175...
+
+Bug 221: Verb root IC is zero
+
+    >>> from nltk.corpus.reader.wordnet import information_content
+    >>> s = wn.synsets('say', wn.VERB)[0]
+    >>> information_content(s, brown_ic)  # doctest: +ELLIPSIS
+    4.623...
+
+Bug 161: Comparison between WN keys/lemmas should not be case sensitive
+
+    >>> k = wn.synsets("jefferson")[0].lemmas()[0].key()
+    >>> wn.lemma_from_key(k)
+    Lemma('jefferson.n.01.Jefferson')
+    >>> wn.lemma_from_key(k.upper())
+    Lemma('jefferson.n.01.Jefferson')
+
+Bug 99: WordNet root_hypernyms gives incorrect results
+
+    >>> from nltk.corpus import wordnet as wn
+    >>> for s in wn.all_synsets(wn.NOUN):
+    ...     if s.root_hypernyms()[0] != wn.synset('entity.n.01'):
+    ...         print(s, s.root_hypernyms())
+    ...
+    >>>
+
+Bug 382: JCN Division by zero error
+
+    >>> tow = wn.synset('tow.v.01')
+    >>> shlep = wn.synset('shlep.v.02')
+    >>> from nltk.corpus import wordnet_ic
+    >>> brown_ic =  wordnet_ic.ic('ic-brown.dat')
+    >>> tow.jcn_similarity(shlep, brown_ic)  # doctest: +ELLIPSIS
+    1...e+300
+
+Bug 428: Depth is zero for instance nouns
+
+    >>> s = wn.synset("lincoln.n.01")
+    >>> s.max_depth() > 0
+    True
+
+Bug 429: Information content smoothing used old reference to all_synsets
+
+    >>> genesis_ic = wn.ic(genesis, True, 1.0)
+
+Bug 430: all_synsets used wrong pos lookup when synsets were cached
+
+    >>> for ii in wn.all_synsets(): pass
+    >>> for ii in wn.all_synsets(): pass
+
+Bug 470: shortest_path_distance ignored instance hypernyms
+
+    >>> google = wordnet.synsets("google")[0]
+    >>> earth = wordnet.synsets("earth")[0]
+    >>> google.wup_similarity(earth)  # doctest: +ELLIPSIS
+    0.1...
+
+Bug 484: similarity metrics returned -1 instead of None for no LCS
+
+    >>> t = wn.synsets('fly', wn.VERB)[0]
+    >>> s = wn.synsets('say', wn.VERB)[0]
+    >>> print(s.shortest_path_distance(t))
+    None
+    >>> print(s.path_similarity(t, simulate_root=False))
+    None
+    >>> print(s.lch_similarity(t, simulate_root=False))
+    None
+    >>> print(s.wup_similarity(t, simulate_root=False))
+    None
+
+Bug 427: "pants" does not return all the senses it should
+
+    >>> from nltk.corpus import wordnet
+    >>> wordnet.synsets("pants",'n')
+    [Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')]
+
+Bug 482: Some nouns not being lemmatised by WordNetLemmatizer().lemmatize
+
+    >>> from nltk.stem.wordnet import WordNetLemmatizer
+    >>> WordNetLemmatizer().lemmatize("eggs", pos="n")
+    'egg'
+    >>> WordNetLemmatizer().lemmatize("legs", pos="n")
+    'leg'
+
+Bug 284: instance hypernyms not used in similarity calculations
+
+    >>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01'))  # doctest: +ELLIPSIS
+    1.335...
+    >>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01'))  # doctest: +ELLIPSIS
+    0.571...
+    >>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
+    2.224...
+    >>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
+    0.075...
+    >>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
+    0.252...
+    >>> wn.synset('john.n.02').hypernym_paths()  # doctest: +ELLIPSIS
+    [[Synset('entity.n.01'), ..., Synset('john.n.02')]]
+
+Issue 541: add domains to wordnet
+
+    >>> wn.synset('code.n.03').topic_domains()
+    [Synset('computer_science.n.01')]
+    >>> wn.synset('pukka.a.01').region_domains()
+    [Synset('india.n.01')]
+    >>> wn.synset('freaky.a.01').usage_domains()
+    [Synset('slang.n.02')]
+
+Issue 629: wordnet failures when python run with -O optimizations
+
+    >>> # Run the test suite with python -O to check this
+    >>> wn.synsets("brunch")
+    [Synset('brunch.n.01'), Synset('brunch.v.01')]
+
+Issue 395: wordnet returns incorrect result for lowest_common_hypernyms of chef and policeman
+
+    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
+    [Synset('person.n.01')]
+
+Bug https://github.com/nltk/nltk/issues/1641: Non-English lemmas containing capital letters cannot be looked up using wordnet.lemmas() or wordnet.synsets()
+
+    >>> wn.lemmas('Londres', lang='fra')
+    [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
+    >>> wn.lemmas('londres', lang='fra')
+    [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
+
+Patch-1 https://github.com/nltk/nltk/pull/2065  Adding 3 functions (relations) to WordNet class
+    >>> wn.synsets("computer_science")[0].in_topic_domains()[2]
+    Synset('access_time.n.01')
+    >>> wn.synsets("France")[0].in_region_domains()[18]
+    Synset('french.n.01')
+    >>> wn.synsets("slang")[1].in_usage_domains()[18]
+    Synset('can-do.s.01')
diff --git a/nlp_resource_data/nltk/test/wordnet_fixt.py b/nlp_resource_data/nltk/test/wordnet_fixt.py

new file mode 100644 (file)

index 0000000..09ba27c
--- /dev/null
+++ b/nlp_resource_data/nltk/test/wordnet_fixt.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+
+def teardown_module(module=None):
+    from nltk.corpus import wordnet
+
+    wordnet._unload()
diff --git a/nlp_resource_data/nltk/test/wordnet_lch.doctest b/nlp_resource_data/nltk/test/wordnet_lch.doctest

new file mode 100644 (file)

index 0000000..d92b5a1
--- /dev/null
+++ b/nlp_resource_data/nltk/test/wordnet_lch.doctest
@@ -0,0 +1,53 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+===============================
+WordNet Lowest Common Hypernyms
+===============================
+
+Wordnet's lowest_common_hypernyms() method is based used to locate the 
+lowest single hypernym that is shared by two given words:
+
+    >>> from nltk.corpus import wordnet as wn
+    >>> wn.synset('kin.n.01').lowest_common_hypernyms(wn.synset('mother.n.01'))
+    [Synset('relative.n.01')]
+
+    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
+    [Synset('person.n.01')]
+
+This method generally returns a single result, but in some cases, more than one
+valid LCH is possible:
+
+    >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
+    [Synset('attribute.n.02'), Synset('measure.n.02')]
+
+In some cases, lowest_common_hypernyms() can return one of the synsets which was 
+passed to it as an argument:
+
+    >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
+    [Synset('woman.n.01')]
+
+In NLTK 3.0a2 the behavior of lowest_common_hypernyms() was changed to give more
+accurate results in a small set of cases, generally when dealing with nouns describing 
+social roles or jobs. To emulate the pre v3.0a2 behavior, you can set the use_min_depth=True
+flag:
+
+    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
+    [Synset('person.n.01')]
+    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'), use_min_depth=True)
+    [Synset('organism.n.01')]
+
+In some cases use_min_depth=True may return more or fewer results than the default
+behavior:
+
+    >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
+    [Synset('woman.n.01')]
+    >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'), use_min_depth=True)
+    [Synset('organism.n.01'), Synset('woman.n.01')]
+
+In the general case, however, they tend to return the same results:
+
+    >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
+    [Synset('attribute.n.02'), Synset('measure.n.02')]
+    >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'), use_min_depth=True)
+    [Synset('attribute.n.02'), Synset('measure.n.02')]
diff --git a/nlp_resource_data/nltk/test/wsd.doctest b/nlp_resource_data/nltk/test/wsd.doctest

new file mode 100644 (file)

index 0000000..28cf0e9
--- /dev/null
+++ b/nlp_resource_data/nltk/test/wsd.doctest
@@ -0,0 +1,68 @@
+.. Copyright (C) 2001-2020 NLTK Project
+.. For license information, see LICENSE.TXT
+
+.. -*- coding: utf-8 -*-
+
+=========================
+Word Sense Disambiguation
+=========================
+
+
+Lesk Algorithm
+--------------
+
+
+Performs the classic Lesk algorithm for Word Sense Disambiguation (WSD) using
+a the definitions of the ambiguous word.
+
+Given an ambiguous word and the context in which the word occurs, Lesk returns
+a Synset with the highest number of overlapping words between the context
+sentence and different definitions from each Synset.
+
+    >>> from nltk.wsd import lesk
+    >>> sent = ['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.']
+
+    >>> print(lesk(sent, 'bank', 'n'))
+    Synset('savings_bank.n.02')
+
+    >>> print(lesk(sent, 'bank'))
+    Synset('savings_bank.n.02')
+
+The definitions for "bank" are:
+
+    >>> from nltk.corpus import wordnet as wn
+    >>> for ss in wn.synsets('bank'):
+    ...     print(ss, ss.definition())
+    ...
+    Synset('bank.n.01') sloping land (especially the slope beside a body of water)
+    Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
+    Synset('bank.n.03') a long ridge or pile
+    Synset('bank.n.04') an arrangement of similar objects in a row or in tiers
+    Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies)
+    Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games
+    Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
+    Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home
+    Synset('bank.n.09') a building in which the business of banking transacted
+    Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
+    Synset('bank.v.01') tip laterally
+    Synset('bank.v.02') enclose with a bank
+    Synset('bank.v.03') do business with a bank or keep an account at a bank
+    Synset('bank.v.04') act as the banker in a game or in gambling
+    Synset('bank.v.05') be in the banking business
+    Synset('deposit.v.02') put into a bank account
+    Synset('bank.v.07') cover with ashes so to control the rate of burning
+    Synset('trust.v.01') have confidence or faith in
+
+Test disambiguation of POS tagged `able`.
+
+    >>> [(s, s.pos()) for s in wn.synsets('able')]
+    [(Synset('able.a.01'), 'a'), (Synset('able.s.02'), 's'), (Synset('able.s.03'), 's'), (Synset('able.s.04'), 's')]
+    >>> sent = 'people should be able to marry a person of their choice'.split()
+    >>> lesk(sent, 'able')
+    Synset('able.s.04')
+    >>> lesk(sent, 'able', pos='a')
+    Synset('able.a.01')
+
+Test behavior if there is are no matching senses.
+
+    >>> lesk('John loves Mary'.split(), 'loves', synsets=[])
diff --git a/nlp_resource_data/nltk/text.py b/nlp_resource_data/nltk/text.py

old mode 100755 (executable)

new mode 100644 (file)

index 5563f3e..fc2731f
--- a/nlp_resource_data/nltk/text.py
+++ b/nlp_resource_data/nltk/text.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Texts
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  #         Edward Loper <edloper@gmail.com>
  # URL: <http://nltk.org/>
@@ -13,22 +13,26 @@ Functionality includes: concordancing, collocation discovery,
  regular expression search over tokenized strings, and
  distributional similarity.
  """
-from __future__ import print_function, division, unicode_literals
  
  from math import log
-from collections import defaultdict, Counter
+from collections import defaultdict, Counter, namedtuple
  from functools import reduce
-from itertools import islice
  import re
+import sys
  
-from six import text_type
-
-from nltk.probability import FreqDist, LidstoneProbDist
+from nltk.lm import MLE
+from nltk.lm.preprocessing import padded_everygram_pipeline
+from nltk.probability import FreqDist
  from nltk.probability import ConditionalFreqDist as CFD
  from nltk.util import tokenwrap, LazyConcatenation
  from nltk.metrics import f_measure, BigramAssocMeasures
  from nltk.collocations import BigramCollocationFinder
-from nltk.compat import python_2_unicode_compatible
+from nltk.tokenize import sent_tokenize
+
+ConcordanceLine = namedtuple(
+    "ConcordanceLine",
+    ["left", "query", "right", "offset", "left_print", "right_print", "line"],
+)
  
  
  class ContextIndex(object):
@@ -38,14 +42,15 @@ class ContextIndex(object):
      in a fixed window around the word; but other definitions may also
      be used by providing a custom context function.
      """
+
      @staticmethod
      def _default_context(tokens, i):
          """One left token and one right token, normalized to lowercase"""
-        left = (tokens[i-1].lower() if i != 0 else '*START*')
-        right = (tokens[i+1].lower() if i != len(tokens) - 1 else '*END*')
+        left = tokens[i - 1].lower() if i != 0 else "*START*"
+        right = tokens[i + 1].lower() if i != len(tokens) - 1 else "*END*"
          return (left, right)
  
-    def __init__(self, tokens, context_func=None, filter=None, key=lambda x:x):
+    def __init__(self, tokens, context_func=None, filter=None, key=lambda x: x):
          self._key = key
          self._tokens = tokens
          if context_func:
@@ -54,10 +59,12 @@ class ContextIndex(object):
              self._context_func = self._default_context
          if filter:
              tokens = [t for t in tokens if filter(t)]
-        self._word_to_contexts = CFD((self._key(w), self._context_func(tokens, i))
-                                     for i, w in enumerate(tokens))
-        self._context_to_words = CFD((self._context_func(tokens, i), self._key(w))
-                                     for i, w in enumerate(tokens))
+        self._word_to_contexts = CFD(
+            (self._key(w), self._context_func(tokens, i)) for i, w in enumerate(tokens)
+        )
+        self._context_to_words = CFD(
+            (self._context_func(tokens, i), self._key(w)) for i, w in enumerate(tokens)
+        )
  
      def tokens(self):
          """
@@ -87,7 +94,9 @@ class ContextIndex(object):
          for c in self._word_to_contexts[self._key(word)]:
              for w in self._context_to_words[c]:
                  if w != word:
-                    scores[w] += self._context_to_words[c][word] * self._context_to_words[c][w]
+                    scores[w] += (
+                        self._context_to_words[c][word] * self._context_to_words[c][w]
+                    )
          return sorted(scores, key=scores.get, reverse=True)[:n]
  
      def common_contexts(self, words, fail_on_unknown=False):
@@ -106,24 +115,25 @@ class ContextIndex(object):
          empty = [words[i] for i in range(len(words)) if not contexts[i]]
          common = reduce(set.intersection, contexts)
          if empty and fail_on_unknown:
-            raise ValueError("The following word(s) were not found:",
-                             " ".join(words))
+            raise ValueError("The following word(s) were not found:", " ".join(words))
          elif not common:
              # nothing in common -- just return an empty freqdist.
              return FreqDist()
          else:
-            fd = FreqDist(c for w in words
-                          for c in self._word_to_contexts[w]
-                          if c in common)
+            fd = FreqDist(
+                c for w in words for c in self._word_to_contexts[w] if c in common
+            )
              return fd
  
-@python_2_unicode_compatible
+
+
  class ConcordanceIndex(object):
      """
      An index that can be used to look up the offset locations at which
      a given word occurs in a document.
      """
-    def __init__(self, tokens, key=lambda x:x):
+
+    def __init__(self, tokens, key=lambda x: x):
          """
          Construct a new concordance index.
  
@@ -143,9 +153,7 @@ class ConcordanceIndex(object):
          """Function mapping each token to an index key (or None)."""
  
          self._offsets = defaultdict(list)
-        """Dictionary mapping words (or keys) to lists of offset
-           indices."""
-
+        """Dictionary mapping words (or keys) to lists of offset indices."""
          # Initialize the index (self._offsets)
          for index, word in enumerate(tokens):
              word = self._key(word)
@@ -170,39 +178,67 @@ class ConcordanceIndex(object):
          return self._offsets[word]
  
      def __repr__(self):
-        return '<ConcordanceIndex for %d tokens (%d types)>' % (
-            len(self._tokens), len(self._offsets))
+        return "<ConcordanceIndex for %d tokens (%d types)>" % (
+            len(self._tokens),
+            len(self._offsets),
+        )
  
-    def print_concordance(self, word, width=75, lines=25):
+    def find_concordance(self, word, width=80):
+        """
+        Find all concordance lines given the query word.
          """
-        Print a concordance for ``word`` with the specified context window.
+        half_width = (width - len(word) - 2) // 2
+        context = width // 4  # approx number of words of context
  
+        # Find the instances of the word to create the ConcordanceLine
+        concordance_list = []
+        offsets = self.offsets(word)
+        if offsets:
+            for i in offsets:
+                query_word = self._tokens[i]
+                # Find the context of query word.
+                left_context = self._tokens[max(0, i - context) : i]
+                right_context = self._tokens[i + 1 : i + context]
+                # Create the pretty lines with the query_word in the middle.
+                left_print = " ".join(left_context)[-half_width:]
+                right_print = " ".join(right_context)[:half_width]
+                # The WYSIWYG line of the concordance.
+                line_print = " ".join([left_print, query_word, right_print])
+                # Create the ConcordanceLine
+                concordance_line = ConcordanceLine(
+                    left_context,
+                    query_word,
+                    right_context,
+                    i,
+                    left_print,
+                    right_print,
+                    line_print,
+                )
+                concordance_list.append(concordance_line)
+        return concordance_list
+
+    def print_concordance(self, word, width=80, lines=25):
+        """
+        Print concordance lines given the query word.
          :param word: The target word
          :type word: str
-        :param width: The width of each line, in characters (default=80)
-        :type width: int
          :param lines: The number of lines to display (default=25)
          :type lines: int
+        :param width: The width of each line, in characters (default=80)
+        :type width: int
+        :param save: The option to save the concordance.
+        :type save: bool
          """
-        half_width = (width - len(word) - 2) // 2
-        context = width // 4 # approx number of words of context
+        concordance_list = self.find_concordance(word, width=width)
  
-        offsets = self.offsets(word)
-        if offsets:
-            lines = min(lines, len(offsets))
-            print("Displaying %s of %s matches:" % (lines, len(offsets)))
-            for i in offsets:
-                if lines <= 0:
-                    break
-                left = (' ' * half_width +
-                        ' '.join(self._tokens[i-context:i]))
-                right = ' '.join(self._tokens[i+1:i+context])
-                left = left[-half_width:]
-                right = right[:half_width]
-                print(left, self._tokens[i], right)
-                lines -= 1
+        if not concordance_list:
+            print("no matches")
          else:
-            print("No matches")
+            lines = min(lines, len(concordance_list))
+            print("Displaying {} of {} matches:".format(lines, len(concordance_list)))
+            for i, concordance_line in enumerate(concordance_list[:lines]):
+                print(concordance_line.line)
+
  
  class TokenSearcher(object):
      """
@@ -214,8 +250,9 @@ class TokenSearcher(object):
      brackets as non-capturing parentheses, in addition to matching the
      token boundaries; and to have ``'.'`` not match the angle brackets.
      """
+
      def __init__(self, tokens):
-        self._raw = ''.join('<'+w+'>' for w in tokens)
+        self._raw = "".join("<" + w + ">" for w in tokens)
  
      def findall(self, regexp):
          """
@@ -242,25 +279,25 @@ class TokenSearcher(object):
          :type regexp: str
          """
          # preprocess the regular expression
-        regexp = re.sub(r'\s', '', regexp)
-        regexp = re.sub(r'<', '(?:<(?:', regexp)
-        regexp = re.sub(r'>', ')>)', regexp)
-        regexp = re.sub(r'(?<!\\)\.', '[^>]', regexp)
+        regexp = re.sub(r"\s", "", regexp)
+        regexp = re.sub(r"<", "(?:<(?:", regexp)
+        regexp = re.sub(r">", ")>)", regexp)
+        regexp = re.sub(r"(?<!\\)\.", "[^>]", regexp)
  
          # perform the search
          hits = re.findall(regexp, self._raw)
  
          # Sanity check
          for h in hits:
-            if not h.startswith('<') and h.endswith('>'):
-                raise ValueError('Bad regexp for TokenSearcher.findall')
+            if not h.startswith("<") and h.endswith(">"):
+                raise ValueError("Bad regexp for TokenSearcher.findall")
  
          # postprocess the output
-        hits = [h[1:-1].split('><') for h in hits]
+        hits = [h[1:-1].split("><") for h in hits]
          return hits
  
  
-@python_2_unicode_compatible
+
  class Text(object):
      """
      A wrapper around a sequence of simple (string) tokens, which is
@@ -280,6 +317,7 @@ class Text(object):
      >>> moby = Text(nltk.corpus.gutenberg.words('melville-moby_dick.txt'))
  
      """
+
      # This defeats lazy loading, but makes things faster.  This
      # *shouldn't* be necessary because the corpus view *should* be
      # doing intelligent caching, but without this it's running slow.
@@ -299,15 +337,15 @@ class Text(object):
  
          if name:
              self.name = name
-        elif ']' in tokens[:20]:
-            end = tokens[:20].index(']')
-            self.name = " ".join(text_type(tok) for tok in tokens[1:end])
+        elif "]" in tokens[:20]:
+            end = tokens[:20].index("]")
+            self.name = " ".join(str(tok) for tok in tokens[1:end])
          else:
-            self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..."
+            self.name = " ".join(str(tok) for tok in tokens[:8]) + "..."
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Support item & slice access
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def __getitem__(self, i):
          return self.tokens[i]
@@ -315,47 +353,102 @@ class Text(object):
      def __len__(self):
          return len(self.tokens)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Interactive console methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def concordance(self, word, width=79, lines=25):
          """
-        Print a concordance for ``word`` with the specified context window.
+        Prints a concordance for ``word`` with the specified context window.
          Word matching is not case-sensitive.
+
+        :param word: The target word
+        :type word: str
+        :param width: The width of each line, in characters (default=80)
+        :type width: int
+        :param lines: The number of lines to display (default=25)
+        :type lines: int
+
          :seealso: ``ConcordanceIndex``
          """
-        if '_concordance_index' not in self.__dict__:
-            #print("Building index...")
-            self._concordance_index = ConcordanceIndex(self.tokens,
-                                                       key=lambda s:s.lower())
+        if "_concordance_index" not in self.__dict__:
+            self._concordance_index = ConcordanceIndex(
+                self.tokens, key=lambda s: s.lower()
+            )
  
-        self._concordance_index.print_concordance(word, width, lines)
+        return self._concordance_index.print_concordance(word, width, lines)
  
-    def collocations(self, num=20, window_size=2):
+    def concordance_list(self, word, width=79, lines=25):
          """
-        Print collocations derived from the text, ignoring stopwords.
+        Generate a concordance for ``word`` with the specified context window.
+        Word matching is not case-sensitive.
  
-        :seealso: find_collocations
-        :param num: The maximum number of collocations to print.
+        :param word: The target word
+        :type word: str
+        :param width: The width of each line, in characters (default=80)
+        :type width: int
+        :param lines: The number of lines to display (default=25)
+        :type lines: int
+
+        :seealso: ``ConcordanceIndex``
+        """
+        if "_concordance_index" not in self.__dict__:
+            self._concordance_index = ConcordanceIndex(
+                self.tokens, key=lambda s: s.lower()
+            )
+        return self._concordance_index.find_concordance(word, width)[:lines]
+
+    def collocation_list(self, num=20, window_size=2):
+        """
+        Return collocations derived from the text, ignoring stopwords.
+        
+            >>> from nltk.book import text4
+            >>> text4.collocation_list()[:2]
+            [('United', 'States'), ('fellow', 'citizens')]
+
+        :param num: The maximum number of collocations to return.
          :type num: int
          :param window_size: The number of tokens spanned by a collocation (default=2)
          :type window_size: int
+        :rtype: list(tuple(str, str))
          """
-        if not ('_collocations' in self.__dict__ and self._num == num and self._window_size == window_size):
+        if not (
+            "_collocations" in self.__dict__
+            and self._num == num
+            and self._window_size == window_size
+        ):
              self._num = num
              self._window_size = window_size
  
-            #print("Building collocations list")
+            # print("Building collocations list")
              from nltk.corpus import stopwords
-            ignored_words = stopwords.words('english')
+
+            ignored_words = stopwords.words("english")
              finder = BigramCollocationFinder.from_words(self.tokens, window_size)
              finder.apply_freq_filter(2)
              finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
              bigram_measures = BigramAssocMeasures()
-            self._collocations = finder.nbest(bigram_measures.likelihood_ratio, num)
-        colloc_strings = [w1+' '+w2 for w1, w2 in self._collocations]
-        print(tokenwrap(colloc_strings, separator="; "))
+            self._collocations = list(finder.nbest(bigram_measures.likelihood_ratio, num))
+        return self._collocations
+
+    def collocations(self, num=20, window_size=2):
+        """
+        Print collocations derived from the text, ignoring stopwords.
+        
+            >>> from nltk.book import text4
+            >>> text4.collocations() # doctest: +ELLIPSIS
+            United States; fellow citizens; four years; ...
+
+        :param num: The maximum number of collocations to print.
+        :type num: int
+        :param window_size: The number of tokens spanned by a collocation (default=2)
+        :type window_size: int
+        """
+
+        collocation_strings = [
+            w1 + " " + w2 for w1, w2 in self.collocation_list(num, window_size)
+        ]
+        print(tokenwrap(collocation_strings, separator="; "))
  
      def count(self, word):
          """
@@ -384,41 +477,45 @@ class Text(object):
          :type num: int
          :seealso: ContextIndex.similar_words()
          """
-        if '_word_context_index' not in self.__dict__:
-            #print('Building word-context index...')
-            self._word_context_index = ContextIndex(self.tokens,
-                                                    filter=lambda x:x.isalpha(),
-                                                    key=lambda s:s.lower())
+        if "_word_context_index" not in self.__dict__:
+            # print('Building word-context index...')
+            self._word_context_index = ContextIndex(
+                self.tokens, filter=lambda x: x.isalpha(), key=lambda s: s.lower()
+            )
  
-#        words = self._word_context_index.similar_words(word, num)
+        # words = self._word_context_index.similar_words(word, num)
  
          word = word.lower()
          wci = self._word_context_index._word_to_contexts
          if word in wci.conditions():
              contexts = set(wci[word])
-            fd = Counter(w for w in wci.conditions() for c in wci[w]
-                          if c in contexts and not w == word)
+            fd = Counter(
+                w
+                for w in wci.conditions()
+                for c in wci[w]
+                if c in contexts and not w == word
+            )
              words = [w for w, _ in fd.most_common(num)]
              print(tokenwrap(words))
          else:
              print("No matches")
  
-
      def common_contexts(self, words, num=20):
          """
          Find contexts where the specified words appear; list
          most frequent common contexts first.
  
-        :param word: The word used to seed the similarity search
-        :type word: str
+        :param words: The words used to seed the similarity search
+        :type words: str
          :param num: The number of words to generate (default=20)
          :type num: int
          :seealso: ContextIndex.common_contexts()
          """
-        if '_word_context_index' not in self.__dict__:
-            #print('Building word-context index...')
-            self._word_context_index = ContextIndex(self.tokens,
-                                                    key=lambda s:s.lower())
+        if "_word_context_index" not in self.__dict__:
+            # print('Building word-context index...')
+            self._word_context_index = ContextIndex(
+                self.tokens, key=lambda s: s.lower()
+            )
  
          try:
              fd = self._word_context_index.common_contexts(words, True)
@@ -426,7 +523,7 @@ class Text(object):
                  print("No common contexts were found")
              else:
                  ranked_contexts = [w for w, _ in fd.most_common(num)]
-                print(tokenwrap(w1+"_"+w2 for w1,w2 in ranked_contexts))
+                print(tokenwrap(w1 + "_" + w2 for w1, w2 in ranked_contexts))
  
          except ValueError as e:
              print(e)
@@ -441,14 +538,61 @@ class Text(object):
          :seealso: nltk.draw.dispersion_plot()
          """
          from nltk.draw import dispersion_plot
+
          dispersion_plot(self, words)
  
-    def generate(self, words):
-        """
-        Issues a reminder to users following the book online
-        """
-        import warnings
-        warnings.warn('The generate() method is no longer available.', DeprecationWarning)
+    def _train_default_ngram_lm(self, tokenized_sents, n=3):
+        train_data, padded_sents = padded_everygram_pipeline(n, tokenized_sents)
+        model = MLE(order=n)
+        model.fit(train_data, padded_sents)
+        return model
+
+    def generate(self, length=100, text_seed=None, random_seed=42):
+        """
+        Print random text, generated using a trigram language model.
+        See also `help(nltk.lm)`.
+
+        :param length: The length of text to generate (default=100)
+        :type length: int
+
+        :param text_seed: Generation can be conditioned on preceding context.
+        :type text_seed: list(str)
+
+        :param random_seed: A random seed or an instance of `random.Random`. If provided,
+        makes the random sampling part of generation reproducible. (default=42)
+        :type random_seed: int
+
+        """
+        # Create the model when using it the first time.
+        self._tokenized_sents = [
+            sent.split(" ") for sent in sent_tokenize(" ".join(self.tokens))
+        ]
+        if not hasattr(self, "trigram_model"):
+            print("Building ngram index...", file=sys.stderr)
+            self._trigram_model = self._train_default_ngram_lm(
+                self._tokenized_sents, n=3
+            )
+
+        generated_tokens = []
+
+        assert length > 0, "The `length` must be more than 0."
+        while len(generated_tokens) < length:
+            for idx, token in enumerate(
+                self._trigram_model.generate(
+                    length, text_seed=text_seed, random_seed=random_seed
+                )
+            ):
+                if token == "<s>":
+                    continue
+                if token == "</s>":
+                    break
+                generated_tokens.append(token)
+            random_seed += 1
+
+        prefix = " ".join(text_seed) + " " if text_seed else ""
+        output_str = prefix + tokenwrap(generated_tokens[:length])
+        print(output_str)
+        return output_str
  
      def plot(self, *args):
          """
@@ -462,7 +606,7 @@ class Text(object):
          :seealso: nltk.prob.FreqDist
          """
          if "_vocab" not in self.__dict__:
-            #print("Building vocabulary index...")
+            # print("Building vocabulary index...")
              self._vocab = FreqDist(self)
          return self._vocab
  
@@ -494,14 +638,15 @@ class Text(object):
              self._token_searcher = TokenSearcher(self)
  
          hits = self._token_searcher.findall(regexp)
-        hits = [' '.join(h) for h in hits]
+        hits = [" ".join(h) for h in hits]
          print(tokenwrap(hits, "; "))
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Helper Methods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+
+    _CONTEXT_RE = re.compile("\w+|[\.\!\?]")
  
-    _CONTEXT_RE = re.compile('\w+|[\.\!\?]')
      def _context(self, tokens, i):
          """
          One left & one right token, both case-normalized.  Skip over
@@ -509,28 +654,28 @@ class Text(object):
          that is created for ``similar()`` and ``common_contexts()``.
          """
          # Left context
-        j = i-1
-        while j>=0 and not self._CONTEXT_RE.match(tokens[j]):
+        j = i - 1
+        while j >= 0 and not self._CONTEXT_RE.match(tokens[j]):
              j -= 1
-        left = (tokens[j] if j != 0 else '*START*')
+        left = tokens[j] if j != 0 else "*START*"
  
          # Right context
-        j = i+1
-        while j<len(tokens) and not self._CONTEXT_RE.match(tokens[j]):
+        j = i + 1
+        while j < len(tokens) and not self._CONTEXT_RE.match(tokens[j]):
              j += 1
-        right = (tokens[j] if j != len(tokens) else '*END*')
+        right = tokens[j] if j != len(tokens) else "*END*"
  
          return (left, right)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # String Display
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def __str__(self):
-        return '<Text: %s>' % self.name
+        return "<Text: %s>" % self.name
  
      def __repr__(self):
-        return '<Text: %s>' % self.name
+        return "<Text: %s>" % self.name
  
  
  # Prototype only; this approach will be slow to load
@@ -550,8 +695,9 @@ class TextCollection(Text):
      Iterating over a TextCollection produces all the tokens of all the
      texts in order.
      """
+
      def __init__(self, source):
-        if hasattr(source, 'words'): # bridge to the text corpus reader
+        if hasattr(source, "words"):  # bridge to the text corpus reader
              source = [source.words(f) for f in source.fileids()]
  
          self._texts = source
@@ -570,33 +716,36 @@ class TextCollection(Text):
          idf = self._idf_cache.get(term)
          if idf is None:
              matches = len([True for text in self._texts if term in text])
-            # FIXME Should this raise some kind of error instead?
-            idf = (log(len(self._texts) / matches) if matches else 0.0)
+            if len(self._texts) == 0:
+                raise ValueError("IDF undefined for empty document collection")
+            idf = log(len(self._texts) / matches) if matches else 0.0
              self._idf_cache[term] = idf
          return idf
  
      def tf_idf(self, term, text):
          return self.tf(term, text) * self.idf(term)
  
+
  def demo():
      from nltk.corpus import brown
-    text = Text(brown.words(categories='news'))
+
+    text = Text(brown.words(categories="news"))
      print(text)
      print()
      print("Concordance:")
-    text.concordance('news')
+    text.concordance("news")
      print()
      print("Distributionally similar words:")
-    text.similar('news')
+    text.similar("news")
      print()
      print("Collocations:")
      text.collocations()
      print()
-    #print("Automatically generated text:")
-    #text.generate()
-    #print()
+    # print("Automatically generated text:")
+    # text.generate()
+    # print()
      print("Dispersion plot:")
-    text.dispersion_plot(['news', 'report', 'said', 'announced'])
+    text.dispersion_plot(["news", "report", "said", "announced"])
      print()
      print("Vocabulary plot:")
      text.plot(50)
@@ -604,13 +753,16 @@ def demo():
      print("Indexing:")
      print("text[3]:", text[3])
      print("text[3:5]:", text[3:5])
-    print("text.vocab()['news']:", text.vocab()['news'])
+    print("text.vocab()['news']:", text.vocab()["news"])
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
  
-__all__ = ["ContextIndex",
-           "ConcordanceIndex",
-           "TokenSearcher",
-           "Text",
-           "TextCollection"]
+__all__ = [
+    "ContextIndex",
+    "ConcordanceIndex",
+    "TokenSearcher",
+    "Text",
+    "TextCollection",
+]
diff --git a/nlp_resource_data/nltk/text.pyc b/nlp_resource_data/nltk/text.pyc

deleted file mode 100755 (executable)

index e0c4eb8..0000000

Binary files a/nlp_resource_data/nltk/text.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tgrep.py b/nlp_resource_data/nltk/tgrep.py

old mode 100755 (executable)

new mode 100644 (file)

index 81c7e2b..84df549
--- a/nlp_resource_data/nltk/tgrep.py
+++ b/nlp_resource_data/nltk/tgrep.py
@@ -3,12 +3,12 @@
  #
  # Natural Language Toolkit: TGrep search
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Will Roberts <wildwilhelm@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-'''
+"""
  ============================================
   TGrep search implementation for NLTK trees
  ============================================
@@ -108,33 +108,32 @@ specified in a call to a predicate.  Predicates which call other
  predicates must always pass the value of these arguments on.  The
  top-level predicate (constructed by ``_tgrep_exprs_action``) binds the
  macro definitions to ``m`` and initialises ``l`` to an empty dictionary.
-'''
-
-from __future__ import absolute_import, print_function, unicode_literals
+"""
  
  import functools
  import re
  
-from six import binary_type, text_type
-
  try:
      import pyparsing
  except ImportError:
-    print('Warning: nltk.tgrep will not work without the `pyparsing` package')
-    print('installed.')
+    print("Warning: nltk.tgrep will not work without the `pyparsing` package")
+    print("installed.")
  
  import nltk.tree
  
+
  class TgrepException(Exception):
-    '''Tgrep exception type.'''
+    """Tgrep exception type."""
+
      pass
  
+
  def ancestors(node):
-    '''
+    """
      Returns the list of all nodes dominating the given tree node.
      This method will not work with leaf nodes, since there is no way
      to recover the parent.
-    '''
+    """
      results = []
      try:
          current = node.parent()
@@ -146,11 +145,12 @@ def ancestors(node):
          current = current.parent()
      return results
  
+
  def unique_ancestors(node):
-    '''
+    """
      Returns the list of all nodes dominating the given node, where
      there is only a single path of descent.
-    '''
+    """
      results = []
      try:
          current = node.parent()
@@ -162,48 +162,53 @@ def unique_ancestors(node):
          current = current.parent()
      return results
  
+
  def _descendants(node):
-    '''
+    """
      Returns the list of all nodes which are descended from the given
      tree node in some way.
-    '''
+    """
      try:
          treepos = node.treepositions()
      except AttributeError:
          return []
      return [node[x] for x in treepos[1:]]
  
+
  def _leftmost_descendants(node):
-    '''
+    """
      Returns the set of all nodes descended in some way through
      left branches from this node.
-    '''
+    """
      try:
          treepos = node.treepositions()
      except AttributeError:
          return []
      return [node[x] for x in treepos[1:] if all(y == 0 for y in x)]
  
+
  def _rightmost_descendants(node):
-    '''
+    """
      Returns the set of all nodes descended in some way through
      right branches from this node.
-    '''
+    """
      try:
          rightmost_leaf = max(node.treepositions())
      except AttributeError:
          return []
      return [node[rightmost_leaf[:i]] for i in range(1, len(rightmost_leaf) + 1)]
  
+
  def _istree(obj):
-    '''Predicate to check whether `obj` is a nltk.tree.Tree.'''
+    """Predicate to check whether `obj` is a nltk.tree.Tree."""
      return isinstance(obj, nltk.tree.Tree)
  
+
  def _unique_descendants(node):
-    '''
+    """
      Returns the list of all nodes descended from the given node, where
      there is only a single path of descent.
-    '''
+    """
      results = []
      current = node
      while current and _istree(current) and len(current) == 1:
@@ -211,27 +216,28 @@ def _unique_descendants(node):
          results.append(current)
      return results
  
+
  def _before(node):
-    '''
+    """
      Returns the set of all nodes that are before the given node.
-    '''
+    """
      try:
          pos = node.treeposition()
          tree = node.root()
      except AttributeError:
          return []
-    return [tree[x] for x in tree.treepositions()
-            if x[:len(pos)] < pos[:len(x)]]
+    return [tree[x] for x in tree.treepositions() if x[: len(pos)] < pos[: len(x)]]
+
  
  def _immediately_before(node):
-    '''
+    """
      Returns the set of all nodes that are immediately before the given
      node.
  
      Tree node A immediately precedes node B if the last terminal
      symbol (word) produced by A immediately precedes the first
      terminal symbol produced by B.
-    '''
+    """
      try:
          pos = node.treeposition()
          tree = node.root()
@@ -243,32 +249,33 @@ def _immediately_before(node):
          idx -= 1
      if idx < 0:
          return []
-    pos = list(pos[:idx + 1])
+    pos = list(pos[: idx + 1])
      pos[-1] -= 1
      before = tree[pos]
      return [before] + _rightmost_descendants(before)
  
+
  def _after(node):
-    '''
+    """
      Returns the set of all nodes that are after the given node.
-    '''
+    """
      try:
          pos = node.treeposition()
          tree = node.root()
      except AttributeError:
          return []
-    return [tree[x] for x in tree.treepositions()
-            if x[:len(pos)] > pos[:len(x)]]
+    return [tree[x] for x in tree.treepositions() if x[: len(pos)] > pos[: len(x)]]
+
  
  def _immediately_after(node):
-    '''
+    """
      Returns the set of all nodes that are immediately after the given
      node.
  
      Tree node A immediately follows node B if the first terminal
      symbol (word) produced by A immediately follows the last
      terminal symbol produced by B.
-    '''
+    """
      try:
          pos = node.treeposition()
          tree = node.root()
@@ -283,283 +290,347 @@ def _immediately_after(node):
          current = current.parent()
      if idx < 0:
          return []
-    pos = list(pos[:idx + 1])
+    pos = list(pos[: idx + 1])
      pos[-1] += 1
      after = tree[pos]
      return [after] + _leftmost_descendants(after)
  
+
  def _tgrep_node_literal_value(node):
-    '''
+    """
      Gets the string value of a given parse tree node, for comparison
      using the tgrep node literal predicates.
-    '''
-    return (node.label() if _istree(node) else text_type(node))
+    """
+    return node.label() if _istree(node) else str(node)
+
  
  def _tgrep_macro_use_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function which looks up the macro name used.
-    '''
+    """
      assert len(tokens) == 1
-    assert tokens[0][0] == '@'
+    assert tokens[0][0] == "@"
      macro_name = tokens[0][1:]
+
      def macro_use(n, m=None, l=None):
          if m is None or macro_name not in m:
-            raise TgrepException('macro {0} not defined'.format(macro_name))
+            raise TgrepException("macro {0} not defined".format(macro_name))
          return m[macro_name](n, m, l)
+
      return macro_use
  
+
  def _tgrep_node_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      depending on the name of its node.
-    '''
-    # print 'node tokens: ', tokens
+    """
      if tokens[0] == "'":
          # strip initial apostrophe (tgrep2 print command)
          tokens = tokens[1:]
      if len(tokens) > 1:
          # disjunctive definition of a node name
-        assert list(set(tokens[1::2])) == ['|']
+        assert list(set(tokens[1::2])) == ["|"]
          # recursively call self to interpret each node name definition
-        tokens = [_tgrep_node_action(None, None, [node])
-                  for node in tokens[::2]]
+        tokens = [_tgrep_node_action(None, None, [node]) for node in tokens[::2]]
          # capture tokens and return the disjunction
          return (lambda t: lambda n, m=None, l=None: any(f(n, m, l) for f in t))(tokens)
      else:
-        if hasattr(tokens[0], '__call__'):
+        if hasattr(tokens[0], "__call__"):
              # this is a previously interpreted parenthetical node
              # definition (lambda function)
              return tokens[0]
-        elif tokens[0] == '*' or tokens[0] == '__':
+        elif tokens[0] == "*" or tokens[0] == "__":
              return lambda n, m=None, l=None: True
          elif tokens[0].startswith('"'):
              assert tokens[0].endswith('"')
-            node_lit = tokens[0][1:-1].replace('\\"', '"').replace('\\\\', '\\')
-            return (lambda s: lambda n, m=None, l=None: _tgrep_node_literal_value(n) == s)(node_lit)
-        elif tokens[0].startswith('/'):
-            assert tokens[0].endswith('/')
+            node_lit = tokens[0][1:-1].replace('\\"', '"').replace("\\\\", "\\")
+            return (
+                lambda s: lambda n, m=None, l=None: _tgrep_node_literal_value(n) == s
+            )(node_lit)
+        elif tokens[0].startswith("/"):
+            assert tokens[0].endswith("/")
              node_lit = tokens[0][1:-1]
-            return (lambda r: lambda n, m=None, l=None:
-                    r.search(_tgrep_node_literal_value(n)))(re.compile(node_lit))
-        elif tokens[0].startswith('i@'):
+            return (
+                lambda r: lambda n, m=None, l=None: r.search(
+                    _tgrep_node_literal_value(n)
+                )
+            )(re.compile(node_lit))
+        elif tokens[0].startswith("i@"):
              node_func = _tgrep_node_action(_s, _l, [tokens[0][2:].lower()])
-            return (lambda f: lambda n, m=None, l=None:
-                    f(_tgrep_node_literal_value(n).lower()))(node_func)
+            return (
+                lambda f: lambda n, m=None, l=None: f(
+                    _tgrep_node_literal_value(n).lower()
+                )
+            )(node_func)
          else:
-            return (lambda s: lambda n, m=None, l=None:
-                    _tgrep_node_literal_value(n) == s)(tokens[0])
+            return (
+                lambda s: lambda n, m=None, l=None: _tgrep_node_literal_value(n) == s
+            )(tokens[0])
+
  
  def _tgrep_parens_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      from a parenthetical notation.
-    '''
-    # print 'parenthetical tokens: ', tokens
+    """
      assert len(tokens) == 3
-    assert tokens[0] == '('
-    assert tokens[2] == ')'
+    assert tokens[0] == "("
+    assert tokens[2] == ")"
      return tokens[1]
  
+
  def _tgrep_nltk_tree_pos_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      which returns true if the node is located at a specific tree
      position.
-    '''
+    """
      # recover the tuple from the parsed sting
      node_tree_position = tuple(int(x) for x in tokens if x.isdigit())
      # capture the node's tree position
-    return (lambda i: lambda n, m=None, l=None: (hasattr(n, 'treeposition') and
-                                                 n.treeposition() == i))(node_tree_position)
+    return (
+        lambda i: lambda n, m=None, l=None: (
+            hasattr(n, "treeposition") and n.treeposition() == i
+        )
+    )(node_tree_position)
+
  
  def _tgrep_relation_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      depending on its relation to other nodes in the tree.
-    '''
-    # print 'relation tokens: ', tokens
+    """
      # process negation first if needed
      negated = False
-    if tokens[0] == '!':
+    if tokens[0] == "!":
          negated = True
          tokens = tokens[1:]
-    if tokens[0] == '[':
+    if tokens[0] == "[":
          # process square-bracketed relation expressions
          assert len(tokens) == 3
-        assert tokens[2] == ']'
+        assert tokens[2] == "]"
          retval = tokens[1]
      else:
          # process operator-node relation expressions
          assert len(tokens) == 2
          operator, predicate = tokens
          # A < B       A is the parent of (immediately dominates) B.
-        if operator == '<':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                any(predicate(x, m, l) for x in n))
+        if operator == "<":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and any(predicate(x, m, l) for x in n)
+            )
          # A > B       A is the child of B.
-        elif operator == '>':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                bool(n.parent()) and
-                                                predicate(n.parent(), m, l))
+        elif operator == ">":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and bool(n.parent())
+                and predicate(n.parent(), m, l)
+            )
          # A <, B      Synonymous with A <1 B.
-        elif operator == '<,' or operator == '<1':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                bool(list(n)) and
-                                                predicate(n[0], m, l))
+        elif operator == "<," or operator == "<1":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and bool(list(n)) and predicate(n[0], m, l)
+            )
          # A >, B      Synonymous with A >1 B.
-        elif operator == '>,' or operator == '>1':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                bool(n.parent()) and
-                                                (n is n.parent()[0]) and
-                                                predicate(n.parent(), m, l))
+        elif operator == ">," or operator == ">1":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and bool(n.parent())
+                and (n is n.parent()[0])
+                and predicate(n.parent(), m, l)
+            )
          # A <N B      B is the Nth child of A (the first child is <1).
-        elif operator[0] == '<' and operator[1:].isdigit():
+        elif operator[0] == "<" and operator[1:].isdigit():
              idx = int(operator[1:])
              # capture the index parameter
-            retval = (lambda i: lambda n, m=None, l=None: (_istree(n) and
-                                                           bool(list(n)) and
-                                                           0 <= i < len(n) and
-                                                           predicate(n[i], m, l)))(idx - 1)
+            retval = (
+                lambda i: lambda n, m=None, l=None: (
+                    _istree(n)
+                    and bool(list(n))
+                    and 0 <= i < len(n)
+                    and predicate(n[i], m, l)
+                )
+            )(idx - 1)
          # A >N B      A is the Nth child of B (the first child is >1).
-        elif operator[0] == '>' and operator[1:].isdigit():
+        elif operator[0] == ">" and operator[1:].isdigit():
              idx = int(operator[1:])
              # capture the index parameter
-            retval = (lambda i: lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                           bool(n.parent()) and
-                                                           0 <= i < len(n.parent()) and
-                                                           (n is n.parent()[i]) and
-                                                           predicate(n.parent(), m, l)))(idx - 1)
+            retval = (
+                lambda i: lambda n, m=None, l=None: (
+                    hasattr(n, "parent")
+                    and bool(n.parent())
+                    and 0 <= i < len(n.parent())
+                    and (n is n.parent()[i])
+                    and predicate(n.parent(), m, l)
+                )
+            )(idx - 1)
          # A <' B      B is the last child of A (also synonymous with A <-1 B).
          # A <- B      B is the last child of A (synonymous with A <-1 B).
-        elif operator == '<\'' or operator == '<-' or operator == '<-1':
-            retval = lambda n, m=None, l=None: (_istree(n) and bool(list(n))
-                                                and predicate(n[-1], m, l))
+        elif operator == "<'" or operator == "<-" or operator == "<-1":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and bool(list(n)) and predicate(n[-1], m, l)
+            )
          # A >' B      A is the last child of B (also synonymous with A >-1 B).
          # A >- B      A is the last child of B (synonymous with A >-1 B).
-        elif operator == '>\'' or operator == '>-' or operator == '>-1':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                bool(n.parent()) and
-                                                (n is n.parent()[-1]) and
-                                                predicate(n.parent(), m, l))
+        elif operator == ">'" or operator == ">-" or operator == ">-1":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and bool(n.parent())
+                and (n is n.parent()[-1])
+                and predicate(n.parent(), m, l)
+            )
          # A <-N B        B is the N th-to-last child of A (the last child is <-1).
-        elif operator[:2] == '<-' and operator[2:].isdigit():
+        elif operator[:2] == "<-" and operator[2:].isdigit():
              idx = -int(operator[2:])
              # capture the index parameter
-            retval = (lambda i: lambda n, m=None, l=None: (_istree(n) and
-                                                           bool(list(n)) and
-                                                           0 <= (i + len(n)) < len(n) and
-                                                           predicate(n[i + len(n)], m, l)))(idx)
+            retval = (
+                lambda i: lambda n, m=None, l=None: (
+                    _istree(n)
+                    and bool(list(n))
+                    and 0 <= (i + len(n)) < len(n)
+                    and predicate(n[i + len(n)], m, l)
+                )
+            )(idx)
          # A >-N B        A is the N th-to-last child of B (the last child is >-1).
-        elif operator[:2] == '>-' and operator[2:].isdigit():
+        elif operator[:2] == ">-" and operator[2:].isdigit():
              idx = -int(operator[2:])
              # capture the index parameter
-            retval = (lambda i: lambda n, m=None, l=None:
-                          (hasattr(n, 'parent') and
-                           bool(n.parent()) and
-                           0 <= (i + len(n.parent())) < len(n.parent()) and
-                           (n is n.parent()[i + len(n.parent())]) and
-                           predicate(n.parent(), m, l)))(idx)
+            retval = (
+                lambda i: lambda n, m=None, l=None: (
+                    hasattr(n, "parent")
+                    and bool(n.parent())
+                    and 0 <= (i + len(n.parent())) < len(n.parent())
+                    and (n is n.parent()[i + len(n.parent())])
+                    and predicate(n.parent(), m, l)
+                )
+            )(idx)
          # A <: B      B is the only child of A
-        elif operator == '<:':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                len(n) == 1 and
-                                                predicate(n[0], m, l))
+        elif operator == "<:":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and len(n) == 1 and predicate(n[0], m, l)
+            )
          # A >: B      A is the only child of B.
-        elif operator == '>:':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                bool(n.parent()) and
-                                                len(n.parent()) == 1 and
-                                                predicate(n.parent(), m, l))
+        elif operator == ">:":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and bool(n.parent())
+                and len(n.parent()) == 1
+                and predicate(n.parent(), m, l)
+            )
          # A << B      A dominates B (A is an ancestor of B).
-        elif operator == '<<':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                any(predicate(x, m, l) for x in _descendants(n)))
+        elif operator == "<<":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and any(predicate(x, m, l) for x in _descendants(n))
+            )
          # A >> B      A is dominated by B (A is a descendant of B).
-        elif operator == '>>':
-            retval = lambda n, m=None, l=None: any(predicate(x, m, l) for x in ancestors(n))
+        elif operator == ">>":
+            retval = lambda n, m=None, l=None: any(
+                predicate(x, m, l) for x in ancestors(n)
+            )
          # A <<, B     B is a left-most descendant of A.
-        elif operator == '<<,' or operator == '<<1':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                any(predicate(x, m, l)
-                                                    for x in _leftmost_descendants(n)))
+        elif operator == "<<," or operator == "<<1":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and any(predicate(x, m, l) for x in _leftmost_descendants(n))
+            )
          # A >>, B     A is a left-most descendant of B.
-        elif operator == '>>,':
-            retval = lambda n, m=None, l=None: any((predicate(x, m, l) and
-                                                    n in _leftmost_descendants(x))
-                                                   for x in ancestors(n))
+        elif operator == ">>,":
+            retval = lambda n, m=None, l=None: any(
+                (predicate(x, m, l) and n in _leftmost_descendants(x))
+                for x in ancestors(n)
+            )
          # A <<' B     B is a right-most descendant of A.
-        elif operator == '<<\'':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                any(predicate(x, m, l)
-                                                    for x in _rightmost_descendants(n)))
+        elif operator == "<<'":
+            retval = lambda n, m=None, l=None: (
+                _istree(n)
+                and any(predicate(x, m, l) for x in _rightmost_descendants(n))
+            )
          # A >>' B     A is a right-most descendant of B.
-        elif operator == '>>\'':
-            retval = lambda n, m=None, l=None: any((predicate(x, m, l) and
-                                                    n in _rightmost_descendants(x))
-                                                   for x in ancestors(n))
+        elif operator == ">>'":
+            retval = lambda n, m=None, l=None: any(
+                (predicate(x, m, l) and n in _rightmost_descendants(x))
+                for x in ancestors(n)
+            )
          # A <<: B     There is a single path of descent from A and B is on it.
-        elif operator == '<<:':
-            retval = lambda n, m=None, l=None: (_istree(n) and
-                                                any(predicate(x, m, l)
-                                                    for x in _unique_descendants(n)))
+        elif operator == "<<:":
+            retval = lambda n, m=None, l=None: (
+                _istree(n) and any(predicate(x, m, l) for x in _unique_descendants(n))
+            )
          # A >>: B     There is a single path of descent from B and A is on it.
-        elif operator == '>>:':
-            retval = lambda n, m=None, l=None: any(predicate(x, m, l) for x in unique_ancestors(n))
+        elif operator == ">>:":
+            retval = lambda n, m=None, l=None: any(
+                predicate(x, m, l) for x in unique_ancestors(n)
+            )
          # A . B       A immediately precedes B.
-        elif operator == '.':
-            retval = lambda n, m=None, l=None: any(predicate(x, m, l)
-                                                   for x in _immediately_after(n))
+        elif operator == ".":
+            retval = lambda n, m=None, l=None: any(
+                predicate(x, m, l) for x in _immediately_after(n)
+            )
          # A , B       A immediately follows B.
-        elif operator == ',':
-            retval = lambda n, m=None, l=None: any(predicate(x, m, l)
-                                                   for x in _immediately_before(n))
+        elif operator == ",":
+            retval = lambda n, m=None, l=None: any(
+                predicate(x, m, l) for x in _immediately_before(n)
+            )
          # A .. B      A precedes B.
-        elif operator == '..':
-            retval = lambda n, m=None, l=None: any(predicate(x, m, l) for x in _after(n))
+        elif operator == "..":
+            retval = lambda n, m=None, l=None: any(
+                predicate(x, m, l) for x in _after(n)
+            )
          # A ,, B      A follows B.
-        elif operator == ',,':
-            retval = lambda n, m=None, l=None: any(predicate(x, m, l) for x in _before(n))
+        elif operator == ",,":
+            retval = lambda n, m=None, l=None: any(
+                predicate(x, m, l) for x in _before(n)
+            )
          # A $ B       A is a sister of B (and A != B).
-        elif operator == '$' or operator == '%':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                bool(n.parent()) and
-                                                any(predicate(x, m, l)
-                                                    for x in n.parent() if x is not n))
+        elif operator == "$" or operator == "%":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and bool(n.parent())
+                and any(predicate(x, m, l) for x in n.parent() if x is not n)
+            )
          # A $. B      A is a sister of and immediately precedes B.
-        elif operator == '$.' or operator == '%.':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'right_sibling') and
-                                                bool(n.right_sibling()) and
-                                                predicate(n.right_sibling(), m, l))
+        elif operator == "$." or operator == "%.":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "right_sibling")
+                and bool(n.right_sibling())
+                and predicate(n.right_sibling(), m, l)
+            )
          # A $, B      A is a sister of and immediately follows B.
-        elif operator == '$,' or operator == '%,':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'left_sibling') and
-                                                bool(n.left_sibling()) and
-                                                predicate(n.left_sibling(), m, l))
+        elif operator == "$," or operator == "%,":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "left_sibling")
+                and bool(n.left_sibling())
+                and predicate(n.left_sibling(), m, l)
+            )
          # A $.. B     A is a sister of and precedes B.
-        elif operator == '$..' or operator == '%..':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                hasattr(n, 'parent_index') and
-                                                bool(n.parent()) and
-                                                any(predicate(x, m, l) for x in
-                                                    n.parent()[n.parent_index() + 1:]))
+        elif operator == "$.." or operator == "%..":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and hasattr(n, "parent_index")
+                and bool(n.parent())
+                and any(predicate(x, m, l) for x in n.parent()[n.parent_index() + 1 :])
+            )
          # A $,, B     A is a sister of and follows B.
-        elif operator == '$,,' or operator == '%,,':
-            retval = lambda n, m=None, l=None: (hasattr(n, 'parent') and
-                                                hasattr(n, 'parent_index') and
-                                                bool(n.parent()) and
-                                                any(predicate(x, m, l) for x in
-                                                    n.parent()[:n.parent_index()]))
+        elif operator == "$,," or operator == "%,,":
+            retval = lambda n, m=None, l=None: (
+                hasattr(n, "parent")
+                and hasattr(n, "parent_index")
+                and bool(n.parent())
+                and any(predicate(x, m, l) for x in n.parent()[: n.parent_index()])
+            )
          else:
              raise TgrepException(
-                'cannot interpret tgrep operator "{0}"'.format(operator))
+                'cannot interpret tgrep operator "{0}"'.format(operator)
+            )
      # now return the built function
      if negated:
          return (lambda r: (lambda n, m=None, l=None: not r(n, m, l)))(retval)
      else:
          return retval
  
-def _tgrep_conjunction_action(_s, _l, tokens, join_char = '&'):
-    '''
+
+def _tgrep_conjunction_action(_s, _l, tokens, join_char="&"):
+    """
      Builds a lambda function representing a predicate on a tree node
      from the conjunction of several other such lambda functions.
  
@@ -580,18 +651,21 @@ def _tgrep_conjunction_action(_s, _l, tokens, join_char = '&'):
      tokens[0] is a tgrep_expr predicate; tokens[1:] are an (optional)
      list of segmented patterns (`tgrep_expr_labeled`, processed by
      `_tgrep_segmented_pattern_action`).
-    '''
+    """
      # filter out the ampersand
      tokens = [x for x in tokens if x != join_char]
-    # print 'relation conjunction tokens: ', tokens
      if len(tokens) == 1:
          return tokens[0]
      else:
-        return (lambda ts: lambda n, m=None, l=None: all(predicate(n, m, l)
-                                                         for predicate in ts))(tokens)
+        return (
+            lambda ts: lambda n, m=None, l=None: all(
+                predicate(n, m, l) for predicate in ts
+            )
+        )(tokens)
+
  
  def _tgrep_segmented_pattern_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a segmented pattern.
  
      Called for expressions like (`tgrep_expr_labeled`)::
@@ -613,25 +687,29 @@ def _tgrep_segmented_pattern_action(_s, _l, tokens):
      parse action to the pred use inside a node_expr.  See
      `_tgrep_node_label_use_action` and
      `_tgrep_node_label_pred_use_action`.
-    '''
+    """
      # tokens[0] is a string containing the node label
      node_label = tokens[0]
      # tokens[1:] is an (optional) list of predicates which must all
      # hold of the bound node
      reln_preds = tokens[1:]
+
      def pattern_segment_pred(n, m=None, l=None):
-        '''This predicate function ignores its node argument.'''
+        """This predicate function ignores its node argument."""
          # look up the bound node using its label
          if l is None or node_label not in l:
-            raise TgrepException('node_label ={0} not bound in pattern'.format(
-                node_label))
+            raise TgrepException(
+                "node_label ={0} not bound in pattern".format(node_label)
+            )
          node = l[node_label]
          # match the relation predicates against the node
          return all(pred(node, m, l) for pred in reln_preds)
+
      return pattern_segment_pred
  
+
  def _tgrep_node_label_use_action(_s, _l, tokens):
-    '''
+    """
      Returns the node label used to begin a tgrep_expr_labeled.  See
      `_tgrep_segmented_pattern_action`.
  
@@ -643,13 +721,14 @@ def _tgrep_node_label_use_action(_s, _l, tokens):
      expression (see `_tgrep_segmented_pattern_action`).
  
      It returns the node label.
-    '''
+    """
      assert len(tokens) == 1
-    assert tokens[0].startswith('=')
+    assert tokens[0].startswith("=")
      return tokens[0][1:]
  
+
  def _tgrep_node_label_pred_use_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      which describes the use of a previously bound node label.
  
@@ -661,22 +740,26 @@ def _tgrep_node_label_pred_use_action(_s, _l, tokens):
      relation).  The predicate returns true if and only if its node
      argument is identical the the node looked up in the node label
      dictionary using the node's label.
-    '''
+    """
      assert len(tokens) == 1
-    assert tokens[0].startswith('=')
+    assert tokens[0].startswith("=")
      node_label = tokens[0][1:]
+
      def node_label_use_pred(n, m=None, l=None):
          # look up the bound node using its label
          if l is None or node_label not in l:
-            raise TgrepException('node_label ={0} not bound in pattern'.format(
-                node_label))
+            raise TgrepException(
+                "node_label ={0} not bound in pattern".format(node_label)
+            )
          node = l[node_label]
          # truth means the given node is this node
          return n is node
+
      return node_label_use_pred
  
+
  def _tgrep_bind_node_label_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      which can optionally bind a matching node into the tgrep2 string's
      label_dict.
@@ -685,7 +768,7 @@ def _tgrep_bind_node_label_action(_s, _l, tokens):
  
          /NP/
          @NP=n
-    '''
+    """
      # tokens[0] is a tgrep_node_expr
      if len(tokens) == 1:
          return tokens[0]
@@ -693,46 +776,53 @@ def _tgrep_bind_node_label_action(_s, _l, tokens):
          # if present, tokens[1] is the character '=', and tokens[2] is
          # a tgrep_node_label, a string value containing the node label
          assert len(tokens) == 3
-        assert tokens[1] == '='
+        assert tokens[1] == "="
          node_pred = tokens[0]
          node_label = tokens[2]
+
          def node_label_bind_pred(n, m=None, l=None):
              if node_pred(n, m, l):
                  # bind `n` into the dictionary `l`
                  if l is None:
                      raise TgrepException(
-                        'cannot bind node_label {0}: label_dict is None'.format(
-                            node_label))
+                        "cannot bind node_label {0}: label_dict is None".format(
+                            node_label
+                        )
+                    )
                  l[node_label] = n
                  return True
              else:
                  return False
+
          return node_label_bind_pred
  
+
  def _tgrep_rel_disjunction_action(_s, _l, tokens):
-    '''
+    """
      Builds a lambda function representing a predicate on a tree node
      from the disjunction of several other such lambda functions.
-    '''
+    """
      # filter out the pipe
-    tokens = [x for x in tokens if x != '|']
-    # print 'relation disjunction tokens: ', tokens
+    tokens = [x for x in tokens if x != "|"]
      if len(tokens) == 1:
          return tokens[0]
      elif len(tokens) == 2:
-        return (lambda a, b: lambda n, m=None, l=None:
-                a(n, m, l) or b(n, m, l))(tokens[0], tokens[1])
+        return (lambda a, b: lambda n, m=None, l=None: a(n, m, l) or b(n, m, l))(
+            tokens[0], tokens[1]
+        )
+
  
  def _macro_defn_action(_s, _l, tokens):
-    '''
+    """
      Builds a dictionary structure which defines the given macro.
-    '''
+    """
      assert len(tokens) == 3
-    assert tokens[0] == '@'
+    assert tokens[0] == "@"
      return {tokens[1]: tokens[2]}
  
+
  def _tgrep_exprs_action(_s, _l, tokens):
-    '''
+    """
      This is the top-lebel node in a tgrep2 search string; the
      predicate function it returns binds together all the state of a
      tgrep2 search string.
@@ -741,11 +831,11 @@ def _tgrep_exprs_action(_s, _l, tokens):
      from the disjunction of several tgrep expressions.  Also handles
      macro definitions and macro name binding, and node label
      definitions and node label binding.
-    '''
+    """
      if len(tokens) == 1:
          return lambda n, m=None, l=None: tokens[0](n, None, {})
      # filter out all the semicolons
-    tokens = [x for x in tokens if x != ';']
+    tokens = [x for x in tokens if x != ";"]
      # collect all macro definitions
      macro_dict = {}
      macro_defs = [tok for tok in tokens if isinstance(tok, dict)]
@@ -758,76 +848,90 @@ def _tgrep_exprs_action(_s, _l, tokens):
          label_dict = {}
          # bind macro definitions and OR together all tgrep_exprs
          return any(predicate(n, m, label_dict) for predicate in tgrep_exprs)
+
      return top_level_pred
  
-def _build_tgrep_parser(set_parse_actions = True):
-    '''
+
+def _build_tgrep_parser(set_parse_actions=True):
+    """
      Builds a pyparsing-based parser object for tokenizing and
      interpreting tgrep search strings.
-    '''
-    tgrep_op = (pyparsing.Optional('!') +
-                pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*'))
-    tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\',
-                                           unquoteResults=False)
-    tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\',
-                                              unquoteResults=False)
-    tgrep_qstring_icase = pyparsing.Regex(
-        'i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"')
-    tgrep_node_regex_icase = pyparsing.Regex(
-        'i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/')
-    tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+')
+    """
+    tgrep_op = pyparsing.Optional("!") + pyparsing.Regex("[$%,.<>][%,.<>0-9-':]*")
+    tgrep_qstring = pyparsing.QuotedString(
+        quoteChar='"', escChar="\\", unquoteResults=False
+    )
+    tgrep_node_regex = pyparsing.QuotedString(
+        quoteChar="/", escChar="\\", unquoteResults=False
+    )
+    tgrep_qstring_icase = pyparsing.Regex('i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"')
+    tgrep_node_regex_icase = pyparsing.Regex("i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/")
+    tgrep_node_literal = pyparsing.Regex("[^][ \r\t\n;:.,&|<>()$!@%'^=]+")
      tgrep_expr = pyparsing.Forward()
      tgrep_relations = pyparsing.Forward()
-    tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')'
+    tgrep_parens = pyparsing.Literal("(") + tgrep_expr + ")"
      tgrep_nltk_tree_pos = (
-        pyparsing.Literal('N(') +
-        pyparsing.Optional(pyparsing.Word(pyparsing.nums) + ',' +
-                           pyparsing.Optional(pyparsing.delimitedList(
-                    pyparsing.Word(pyparsing.nums), delim=',') +
-                                              pyparsing.Optional(','))) + ')')
-    tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+')
-    tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label)
+        pyparsing.Literal("N(")
+        + pyparsing.Optional(
+            pyparsing.Word(pyparsing.nums)
+            + ","
+            + pyparsing.Optional(
+                pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=",")
+                + pyparsing.Optional(",")
+            )
+        )
+        + ")"
+    )
+    tgrep_node_label = pyparsing.Regex("[A-Za-z0-9]+")
+    tgrep_node_label_use = pyparsing.Combine("=" + tgrep_node_label)
      # see _tgrep_segmented_pattern_action
      tgrep_node_label_use_pred = tgrep_node_label_use.copy()
-    macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+')
-    macro_name.setWhitespaceChars('')
-    macro_use = pyparsing.Combine('@' + macro_name)
-    tgrep_node_expr = (tgrep_node_label_use_pred |
-                       macro_use |
-                       tgrep_nltk_tree_pos |
-                       tgrep_qstring_icase |
-                       tgrep_node_regex_icase |
-                       tgrep_qstring |
-                       tgrep_node_regex |
-                       '*' |
-                       tgrep_node_literal)
-    tgrep_node_expr2 = ((tgrep_node_expr +
-                         pyparsing.Literal('=').setWhitespaceChars('') +
-                         tgrep_node_label.copy().setWhitespaceChars('')) |
-                        tgrep_node_expr)
-    tgrep_node = (tgrep_parens |
-                  (pyparsing.Optional("'") +
-                   tgrep_node_expr2 +
-                   pyparsing.ZeroOrMore("|" + tgrep_node_expr)))
-    tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']'
+    macro_name = pyparsing.Regex("[^];:.,&|<>()[$!@%'^=\r\t\n ]+")
+    macro_name.setWhitespaceChars("")
+    macro_use = pyparsing.Combine("@" + macro_name)
+    tgrep_node_expr = (
+        tgrep_node_label_use_pred
+        | macro_use
+        | tgrep_nltk_tree_pos
+        | tgrep_qstring_icase
+        | tgrep_node_regex_icase
+        | tgrep_qstring
+        | tgrep_node_regex
+        | "*"
+        | tgrep_node_literal
+    )
+    tgrep_node_expr2 = (
+        tgrep_node_expr
+        + pyparsing.Literal("=").setWhitespaceChars("")
+        + tgrep_node_label.copy().setWhitespaceChars("")
+    ) | tgrep_node_expr
+    tgrep_node = tgrep_parens | (
+        pyparsing.Optional("'")
+        + tgrep_node_expr2
+        + pyparsing.ZeroOrMore("|" + tgrep_node_expr)
+    )
+    tgrep_brackets = pyparsing.Optional("!") + "[" + tgrep_relations + "]"
      tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node)
      tgrep_rel_conjunction = pyparsing.Forward()
-    tgrep_rel_conjunction << (tgrep_relation +
-                              pyparsing.ZeroOrMore(pyparsing.Optional('&') +
-                                                   tgrep_rel_conjunction))
+    tgrep_rel_conjunction << (
+        tgrep_relation
+        + pyparsing.ZeroOrMore(pyparsing.Optional("&") + tgrep_rel_conjunction)
+    )
      tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
-        "|" + tgrep_relations)
+        "|" + tgrep_relations
+    )
      tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
      tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(tgrep_relations)
-    tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled)
-    macro_defn = (pyparsing.Literal('@') +
-                  pyparsing.White().suppress() +
-                  macro_name +
-                  tgrep_expr2)
-    tgrep_exprs = (pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') +
-                   tgrep_expr2 +
-                   pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) +
-                   pyparsing.ZeroOrMore(';').suppress())
+    tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(":" + tgrep_expr_labeled)
+    macro_defn = (
+        pyparsing.Literal("@") + pyparsing.White().suppress() + macro_name + tgrep_expr2
+    )
+    tgrep_exprs = (
+        pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(";" + macro_defn) + ";")
+        + tgrep_expr2
+        + pyparsing.ZeroOrMore(";" + (macro_defn | tgrep_expr2))
+        + pyparsing.ZeroOrMore(";").suppress()
+    )
      if set_parse_actions:
          tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action)
          tgrep_node_label_use_pred.setParseAction(_tgrep_node_label_pred_use_action)
@@ -845,35 +949,39 @@ def _build_tgrep_parser(set_parse_actions = True):
          # relation predicates
          tgrep_expr.setParseAction(_tgrep_conjunction_action)
          tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action)
-        tgrep_expr2.setParseAction(functools.partial(_tgrep_conjunction_action,
-                                                     join_char = ':'))
+        tgrep_expr2.setParseAction(
+            functools.partial(_tgrep_conjunction_action, join_char=":")
+        )
          tgrep_exprs.setParseAction(_tgrep_exprs_action)
-    return tgrep_exprs.ignore('#' + pyparsing.restOfLine)
+    return tgrep_exprs.ignore("#" + pyparsing.restOfLine)
+
  
  def tgrep_tokenize(tgrep_string):
-    '''
+    """
      Tokenizes a TGrep search string into separate tokens.
-    '''
+    """
      parser = _build_tgrep_parser(False)
-    if isinstance(tgrep_string, binary_type):
+    if isinstance(tgrep_string, bytes):
          tgrep_string = tgrep_string.decode()
      return list(parser.parseString(tgrep_string))
  
+
  def tgrep_compile(tgrep_string):
-    '''
+    """
      Parses (and tokenizes, if necessary) a TGrep search string into a
      lambda function.
-    '''
+    """
      parser = _build_tgrep_parser(True)
-    if isinstance(tgrep_string, binary_type):
+    if isinstance(tgrep_string, bytes):
          tgrep_string = tgrep_string.decode()
      return list(parser.parseString(tgrep_string, parseAll=True))[0]
  
+
  def treepositions_no_leaves(tree):
-    '''
+    """
      Returns all the tree positions in the given tree which are not
      leaf nodes.
-    '''
+    """
      treepositions = tree.treepositions()
      # leaves are treeposition tuples that are not prefixes of any
      # other treeposition
@@ -883,6 +991,7 @@ def treepositions_no_leaves(tree):
              prefixes.add(pos[:length])
      return [pos for pos in treepositions if pos in prefixes]
  
+
  def tgrep_positions(pattern, trees, search_leaves=True):
      """
      Return the tree positions in the trees which match the given pattern.
@@ -896,7 +1005,7 @@ def tgrep_positions(pattern, trees, search_leaves=True):
      :rtype: iter(tree positions)
      """
  
-    if isinstance(pattern, (binary_type, text_type)):
+    if isinstance(pattern, (bytes, str)):
          pattern = tgrep_compile(pattern)
  
      for tree in trees:
@@ -905,11 +1014,11 @@ def tgrep_positions(pattern, trees, search_leaves=True):
                  positions = tree.treepositions()
              else:
                  positions = treepositions_no_leaves(tree)
-            yield [position for position in positions
-                      if pattern(tree[position])]
+            yield [position for position in positions if pattern(tree[position])]
          except AttributeError:
              yield []
  
+
  def tgrep_nodes(pattern, trees, search_leaves=True):
      """
      Return the tree nodes in the trees which match the given pattern.
@@ -923,7 +1032,7 @@ def tgrep_nodes(pattern, trees, search_leaves=True):
      :rtype: iter(tree nodes)
      """
  
-    if isinstance(pattern, (binary_type, text_type)):
+    if isinstance(pattern, (bytes, str)):
          pattern = tgrep_compile(pattern)
  
      for tree in trees:
@@ -932,7 +1041,6 @@ def tgrep_nodes(pattern, trees, search_leaves=True):
                  positions = tree.treepositions()
              else:
                  positions = treepositions_no_leaves(tree)
-            yield [tree[position] for position in positions
-                      if pattern(tree[position])]
+            yield [tree[position] for position in positions if pattern(tree[position])]
          except AttributeError:
              yield []
diff --git a/nlp_resource_data/nltk/tgrep.pyc b/nlp_resource_data/nltk/tgrep.pyc

deleted file mode 100755 (executable)

index aa8d5eb..0000000

Binary files a/nlp_resource_data/nltk/tgrep.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/__init__.py b/nlp_resource_data/nltk/tokenize/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 6c16781..241b9f3
--- a/nlp_resource_data/nltk/tokenize/__init__.py
+++ b/nlp_resource_data/nltk/tokenize/__init__.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Tokenizers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com> (minor additions)
  # Contributors: matthewmc, clouds56
@@ -62,26 +62,38 @@ For further information, please see Chapter 3 of the NLTK book.
  
  import re
  
-from nltk.data              import load
-from nltk.tokenize.casual   import (TweetTokenizer, casual_tokenize)
-from nltk.tokenize.mwe      import MWETokenizer
-from nltk.tokenize.punkt    import PunktSentenceTokenizer
-from nltk.tokenize.regexp   import (RegexpTokenizer, WhitespaceTokenizer,
-                                    BlanklineTokenizer, WordPunctTokenizer,
-                                    wordpunct_tokenize, regexp_tokenize,
-                                    blankline_tokenize)
-from nltk.tokenize.repp     import ReppTokenizer
-from nltk.tokenize.sexpr    import SExprTokenizer, sexpr_tokenize
-from nltk.tokenize.simple   import (SpaceTokenizer, TabTokenizer, LineTokenizer,
-                                    line_tokenize)
+from nltk.data import load
+from nltk.tokenize.casual import TweetTokenizer, casual_tokenize
+from nltk.tokenize.mwe import MWETokenizer
+from nltk.tokenize.destructive import NLTKWordTokenizer
+from nltk.tokenize.punkt import PunktSentenceTokenizer
+from nltk.tokenize.regexp import (
+    RegexpTokenizer,
+    WhitespaceTokenizer,
+    BlanklineTokenizer,
+    WordPunctTokenizer,
+    wordpunct_tokenize,
+    regexp_tokenize,
+    blankline_tokenize,
+)
+from nltk.tokenize.repp import ReppTokenizer
+from nltk.tokenize.sexpr import SExprTokenizer, sexpr_tokenize
+from nltk.tokenize.simple import (
+    SpaceTokenizer,
+    TabTokenizer,
+    LineTokenizer,
+    line_tokenize,
+)
  from nltk.tokenize.texttiling import TextTilingTokenizer
-from nltk.tokenize.toktok   import ToktokTokenizer
+from nltk.tokenize.toktok import ToktokTokenizer
  from nltk.tokenize.treebank import TreebankWordTokenizer
-from nltk.tokenize.util     import string_span_tokenize, regexp_span_tokenize
+from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize
  from nltk.tokenize.stanford_segmenter import StanfordSegmenter
+from nltk.tokenize.sonority_sequencing import SyllableTokenizer
+
  
  # Standard sentence tokenizer.
-def sent_tokenize(text, language='english'):
+def sent_tokenize(text, language="english"):
      """
      Return a sentence-tokenized copy of *text*,
      using NLTK's recommended sentence tokenizer
@@ -91,26 +103,15 @@ def sent_tokenize(text, language='english'):
      :param text: text to split into sentences
      :param language: the model name in the Punkt corpus
      """
-    tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
+    tokenizer = load("tokenizers/punkt/{0}.pickle".format(language))
      return tokenizer.tokenize(text)
  
-# Standard word tokenizer.
-_treebank_word_tokenizer = TreebankWordTokenizer()
-
-# See discussion on https://github.com/nltk/nltk/pull/1437
-# Adding to TreebankWordTokenizer, the splits on
-# - chervon quotes u'\xab' and u'\xbb' .
-# - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d'
  
-improved_open_quote_regex = re.compile(u'([«“‘])', re.U)
-improved_close_quote_regex = re.compile(u'([»”’])', re.U)
-improved_punct_regex = re.compile(r'([^\.])(\.)([\]\)}>"\'' u'»”’ ' r']*)\s*$', re.U)
-_treebank_word_tokenizer.STARTING_QUOTES.insert(0, (improved_open_quote_regex, r' \1 '))
-_treebank_word_tokenizer.ENDING_QUOTES.insert(0, (improved_close_quote_regex, r' \1 '))
-_treebank_word_tokenizer.PUNCTUATION.insert(0, (improved_punct_regex, r'\1 \2 \3 '))
+# Standard word tokenizer.
+_treebank_word_tokenizer = NLTKWordTokenizer()
  
  
-def word_tokenize(text, language='english', preserve_line=False):
+def word_tokenize(text, language="english", preserve_line=False):
      """
      Return a tokenized copy of *text*,
      using NLTK's recommended word tokenizer
@@ -123,8 +124,9 @@ def word_tokenize(text, language='english', preserve_line=False):
      :param language: the model name in the Punkt corpus
      :type language: str
      :param preserve_line: An option to keep the preserve the sentence and not sentence tokenize it.
-    :type preserver_line: bool
+    :type preserve_line: bool
      """
      sentences = [text] if preserve_line else sent_tokenize(text, language)
-    return [token for sent in sentences
-            for token in _treebank_word_tokenizer.tokenize(sent)]
+    return [
+        token for sent in sentences for token in _treebank_word_tokenizer.tokenize(sent)
+    ]
diff --git a/nlp_resource_data/nltk/tokenize/__init__.pyc b/nlp_resource_data/nltk/tokenize/__init__.pyc

deleted file mode 100755 (executable)

index 5a37a5a..0000000

Binary files a/nlp_resource_data/nltk/tokenize/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3d7a120

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6adc126

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/casual.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/casual.cpython-37.pyc

new file mode 100644 (file)

index 0000000..5598df2

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/casual.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/destructive.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/destructive.cpython-37.pyc

new file mode 100644 (file)

index 0000000..bfdd50a

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/destructive.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/mwe.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/mwe.cpython-37.pyc

new file mode 100644 (file)

index 0000000..00bd19a

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/mwe.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/nist.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/nist.cpython-37.pyc

new file mode 100644 (file)

index 0000000..673b0cd

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/nist.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/punkt.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/punkt.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a488cf5

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/punkt.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/regexp.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/regexp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6f449ab

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/regexp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/repp.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/repp.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a4277c4

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/repp.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc

new file mode 100644 (file)

index 0000000..59ecdf1

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/simple.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/simple.cpython-37.pyc

new file mode 100644 (file)

index 0000000..fd0882f

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/simple.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc

new file mode 100644 (file)

index 0000000..aa2a6dd

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/stanford.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/stanford.cpython-37.pyc

new file mode 100644 (file)

index 0000000..eaa06cf

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/stanford.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ba109a5

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/texttiling.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/texttiling.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1c15047

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/texttiling.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/toktok.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/toktok.cpython-37.pyc

new file mode 100644 (file)

index 0000000..d0901ee

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/toktok.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/treebank.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/treebank.cpython-37.pyc

new file mode 100644 (file)

index 0000000..04e5241

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/treebank.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..119bd0e

Binary files /dev/null and b/nlp_resource_data/nltk/tokenize/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/tokenize/api.py b/nlp_resource_data/nltk/tokenize/api.py

old mode 100755 (executable)

new mode 100644 (file)

index f38ce86..316e385
--- a/nlp_resource_data/nltk/tokenize/api.py
+++ b/nlp_resource_data/nltk/tokenize/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tokenizer Interface
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
@@ -10,19 +10,18 @@
  Tokenizer Interface
  """
  
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
+from abc import ABC, abstractmethod
  
  from nltk.internals import overridden
  from nltk.tokenize.util import string_span_tokenize
  
  
-@add_metaclass(ABCMeta)
-class TokenizerI(object):
+class TokenizerI(ABC):
      """
      A processing interface for tokenizing a string.
      Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both).
      """
+
      @abstractmethod
      def tokenize(self, s):
          """
@@ -69,6 +68,11 @@ class StringTokenizer(TokenizerI):
      on the specified string (defined in subclasses).
      """
  
+    @property
+    @abstractmethod
+    def _string(self):
+        raise NotImplementedError
+
      def tokenize(self, s):
          return s.split(self._string)
  
diff --git a/nlp_resource_data/nltk/tokenize/api.pyc b/nlp_resource_data/nltk/tokenize/api.pyc

deleted file mode 100755 (executable)

index 5e5da6d..0000000

Binary files a/nlp_resource_data/nltk/tokenize/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/casual.py b/nlp_resource_data/nltk/tokenize/casual.py

old mode 100755 (executable)

new mode 100644 (file)

index 4a44233..9187cd1
--- a/nlp_resource_data/nltk/tokenize/casual.py
+++ b/nlp_resource_data/nltk/tokenize/casual.py
@@ -2,7 +2,7 @@
  #
  # Natural Language Toolkit: Twitter Tokenizer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Christopher Potts <cgpotts@stanford.edu>
  #         Ewan Klein <ewan@inf.ed.ac.uk> (modifications)
  #         Pierpaolo Pantone <> (modifications)
@@ -33,14 +33,10 @@ domains and tasks. The basic logic is this:
  """
  
  
-
  ######################################################################
  
-from __future__ import unicode_literals
-import re
-
-from six import int2byte, unichr
-from six.moves import html_entities
+import regex  # https://github.com/nltk/nltk/issues/2409
+import html
  
  ######################################################################
  # The following strings are components in the regular expression
@@ -127,36 +123,29 @@ REGEXPS = (
      (?:
        (?:            # (international)
          \+?[01]
-        [\-\s.]*
+        [ *\-.\)]*
        )?
        (?:            # (area code)
          [\(]?
          \d{3}
-        [\-\s.\)]*
+        [ *\-.\)]*
        )?
        \d{3}          # exchange
-      [\-\s.]*
+      [ *\-.\)]*
        \d{4}          # base
-    )"""
-    ,
+    )""",
      # ASCII Emoticons
-    EMOTICONS
-    ,
+    EMOTICONS,
      # HTML tags:
-    r"""<[^>\s]+>"""
-    ,
+    r"""<[^>\s]+>""",
      # ASCII Arrows
-    r"""[\-]+>|<[\-]+"""
-    ,
+    r"""[\-]+>|<[\-]+""",
      # Twitter username:
-    r"""(?:@[\w_]+)"""
-    ,
+    r"""(?:@[\w_]+)""",
      # Twitter hashtags:
-    r"""(?:\#+[\w_]+[\w\'_\-]*[\w_]+)"""
-    ,
+    r"""(?:\#+[\w_]+[\w\'_\-]*[\w_]+)""",
      # email addresses
-    r"""[\w.+-]+@[\w-]+\.(?:[\w-]\.?)+[\w-]"""
-    ,
+    r"""[\w.+-]+@[\w-]+\.(?:[\w-]\.?)+[\w-]""",
      # Remaining word types:
      r"""
      (?:[^\W\d_](?:[^\W\d_]|['\-_])+[^\W\d_]) # Words with apostrophes or dashes.
@@ -168,38 +157,39 @@ REGEXPS = (
      (?:\.(?:\s*\.){1,})            # Ellipsis dots.
      |
      (?:\S)                         # Everything else that isn't whitespace.
-    """
-    )
+    """,
+)
  
  ######################################################################
  # This is the core tokenizing regex:
  
-WORD_RE = re.compile(r"""(%s)""" % "|".join(REGEXPS), re.VERBOSE | re.I
-                     | re.UNICODE)
+WORD_RE = regex.compile(r"""(%s)""" % "|".join(REGEXPS), regex.VERBOSE | regex.I | regex.UNICODE)
  
  # WORD_RE performs poorly on these patterns:
-HANG_RE = re.compile(r'([^a-zA-Z0-9])\1{3,}')
+HANG_RE = regex.compile(r"([^a-zA-Z0-9])\1{3,}")
  
  # The emoticon string gets its own regex so that we can preserve case for
  # them as needed:
-EMOTICON_RE = re.compile(EMOTICONS, re.VERBOSE | re.I | re.UNICODE)
+EMOTICON_RE = regex.compile(EMOTICONS, regex.VERBOSE | regex.I | regex.UNICODE)
  
  # These are for regularizing HTML entities to Unicode:
-ENT_RE = re.compile(r'&(#?(x?))([^&;\s]+);')
+ENT_RE = regex.compile(r"&(#?(x?))([^&;\s]+);")
  
  
  ######################################################################
  # Functions for converting html entities
  ######################################################################
  
-def _str_to_unicode(text, encoding=None, errors='strict'):
+
+def _str_to_unicode(text, encoding=None, errors="strict"):
      if encoding is None:
-        encoding = 'utf-8'
+        encoding = "utf-8"
      if isinstance(text, bytes):
          return text.decode(encoding, errors)
      return text
  
-def _replace_html_entities(text, keep=(), remove_illegal=True, encoding='utf-8'):
+
+def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8"):
      """
      Remove entities from text by converting them to their
      corresponding unicode character.
@@ -238,19 +228,19 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding='utf-8')
                  # Numeric character references in the 80-9F range are typically
                  # interpreted by browsers as representing the characters mapped
                  # to bytes 80-9F in the Windows-1252 encoding. For more info
-                # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML
-                if 0x80 <= number <= 0x9f:
-                    return int2byte(number).decode('cp1252')
+                # see: https://en.wikipedia.org/wiki/ISO/IEC_8859-1#Similar_character_sets
+                if 0x80 <= number <= 0x9F:
+                    return bytes((number,)).decode("cp1252")
              except ValueError:
                  number = None
          else:
              if entity_body in keep:
                  return match.group(0)
              else:
-                number = html_entities.name2codepoint.get(entity_body)
+                number = html.entities.name2codepoint.get(entity_body)
          if number is not None:
              try:
-                return unichr(number)
+                return chr(number)
              except ValueError:
                  pass
  
@@ -261,6 +251,7 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding='utf-8')
  
  ######################################################################
  
+
  class TweetTokenizer:
      r"""
      Tokenizer for tweets.
@@ -300,44 +291,54 @@ class TweetTokenizer:
          if self.reduce_len:
              text = reduce_lengthening(text)
          # Shorten problematic sequences of characters
-        safe_text = HANG_RE.sub(r'\1\1\1', text)
+        safe_text = HANG_RE.sub(r"\1\1\1", text)
          # Tokenize:
          words = WORD_RE.findall(safe_text)
          # Possibly alter the case, but avoid changing emoticons like :D into :d:
          if not self.preserve_case:
-            words = list(map((lambda x : x if EMOTICON_RE.search(x) else
-                              x.lower()), words))
+            words = list(
+                map((lambda x: x if EMOTICON_RE.search(x) else x.lower()), words)
+            )
          return words
  
+
  ######################################################################
  # Normalization Functions
  ######################################################################
  
+
  def reduce_lengthening(text):
      """
      Replace repeated character sequences of length 3 or greater with sequences
      of length 3.
      """
-    pattern = re.compile(r"(.)\1{2,}")
+    pattern = regex.compile(r"(.)\1{2,}")
      return pattern.sub(r"\1\1\1", text)
  
+
  def remove_handles(text):
      """
      Remove Twitter username handles from text.
      """
-    pattern = re.compile(r"(?<![A-Za-z0-9_!@#\$%&*])@(([A-Za-z0-9_]){20}(?!@))|(?<![A-Za-z0-9_!@#\$%&*])@(([A-Za-z0-9_]){1,19})(?![A-Za-z0-9_]*@)")
-    # Substitute hadnles with ' ' to ensure that text on either side of removed handles are tokenized correctly
-    return pattern.sub(' ', text)
+    pattern = regex.compile(
+        r"(?<![A-Za-z0-9_!@#\$%&*])@(([A-Za-z0-9_]){20}(?!@))|(?<![A-Za-z0-9_!@#\$%&*])@(([A-Za-z0-9_]){1,19})(?![A-Za-z0-9_]*@)"
+    )
+    # Substitute handles with ' ' to ensure that text on either side of removed handles are tokenized correctly
+    return pattern.sub(" ", text)
+
  
  ######################################################################
  # Tokenization Function
  ######################################################################
  
+
  def casual_tokenize(text, preserve_case=True, reduce_len=False, strip_handles=False):
      """
      Convenience function for wrapping the tokenizer.
      """
-    return TweetTokenizer(preserve_case=preserve_case, reduce_len=reduce_len,
-                          strip_handles=strip_handles).tokenize(text)
+    return TweetTokenizer(
+        preserve_case=preserve_case, reduce_len=reduce_len, strip_handles=strip_handles
+    ).tokenize(text)
+
  
  ###############################################################################
diff --git a/nlp_resource_data/nltk/tokenize/casual.pyc b/nlp_resource_data/nltk/tokenize/casual.pyc

deleted file mode 100755 (executable)

index 30abc56..0000000

Binary files a/nlp_resource_data/nltk/tokenize/casual.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/destructive.py b/nlp_resource_data/nltk/tokenize/destructive.py

new file mode 100644 (file)

index 0000000..5cb524f
--- /dev/null
+++ b/nlp_resource_data/nltk/tokenize/destructive.py
@@ -0,0 +1,141 @@
+# Natural Language Toolkit: NLTK's very own tokenizer.
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author:
+# URL: <http://nltk.sourceforge.net>
+# For license information, see LICENSE.TXT
+
+
+import re
+from nltk.tokenize.api import TokenizerI
+
+
+class MacIntyreContractions:
+    """
+    List of contractions adapted from Robert MacIntyre's tokenizer.
+    """
+
+    CONTRACTIONS2 = [
+        r"(?i)\b(can)(?#X)(not)\b",
+        r"(?i)\b(d)(?#X)('ye)\b",
+        r"(?i)\b(gim)(?#X)(me)\b",
+        r"(?i)\b(gon)(?#X)(na)\b",
+        r"(?i)\b(got)(?#X)(ta)\b",
+        r"(?i)\b(lem)(?#X)(me)\b",
+        r"(?i)\b(mor)(?#X)('n)\b",
+        r"(?i)\b(wan)(?#X)(na)\s",
+    ]
+    CONTRACTIONS3 = [r"(?i) ('t)(?#X)(is)\b", r"(?i) ('t)(?#X)(was)\b"]
+    CONTRACTIONS4 = [r"(?i)\b(whad)(dd)(ya)\b", r"(?i)\b(wha)(t)(cha)\b"]
+
+
+class NLTKWordTokenizer(TokenizerI):
+    """
+    The NLTK tokenizer that has improved upon the TreebankWordTokenizer.
+
+    The tokenizer is "destructive" such that the regexes applied will munge the
+    input string to a state beyond re-construction. It is possible to apply
+    `TreebankWordDetokenizer.detokenize` to the tokenized outputs of
+    `NLTKDestructiveWordTokenizer.tokenize` but there's no guarantees to
+    revert to the original string.
+    """
+
+    # Starting quotes.
+    STARTING_QUOTES = [
+        (re.compile(u"([«“‘„]|[`]+)", re.U), r" \1 "),
+        (re.compile(r"^\""), r"``"),
+        (re.compile(r"(``)"), r" \1 "),
+        (re.compile(r"([ \(\[{<])(\"|\'{2})"), r"\1 `` "),
+        (re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.U), r"\1 \2"),
+    ]
+
+    # Ending quotes.
+    ENDING_QUOTES = [
+        (re.compile(u"([»”’])", re.U), r" \1 "),
+        (re.compile(r'"'), " '' "),
+        (re.compile(r"(\S)(\'\')"), r"\1 \2 "),
+        (re.compile(r"([^' ])('[sS]|'[mM]|'[dD]|') "), r"\1 \2 "),
+        (re.compile(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1 \2 "),
+    ]
+
+    # For improvements for starting/closing quotes from TreebankWordTokenizer,
+    # see discussion on https://github.com/nltk/nltk/pull/1437
+    # Adding to TreebankWordTokenizer, nltk.word_tokenize now splits on
+    # - chervon quotes u'\xab' and u'\xbb' .
+    # - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d'
+    # See https://github.com/nltk/nltk/issues/1995#issuecomment-376741608
+    # Also, behavior of splitting on clitics now follows Stanford CoreNLP
+    # - clitics covered (?!re|ve|ll|m|t|s|d)(\w)\b
+
+    # Punctuation.
+    PUNCTUATION = [
+        (re.compile(r'([^\.])(\.)([\]\)}>"\'' u"»”’ " r"]*)\s*$", re.U), r"\1 \2 \3 "),
+        (re.compile(r"([:,])([^\d])"), r" \1 \2"),
+        (re.compile(r"([:,])$"), r" \1 "),
+        (re.compile(r"\.{2,}", re.U), r" \g<0> "), # See https://github.com/nltk/nltk/pull/2322
+        (re.compile(r"[;@#$%&]"), r" \g<0> "),
+        (
+            re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'),
+            r"\1 \2\3 ",
+        ),  # Handles the final period.
+        (re.compile(r"[?!]"), r" \g<0> "),
+        (re.compile(r"([^'])' "), r"\1 ' "),
+        (re.compile(r"[*]", re.U), r" \g<0> "), # See https://github.com/nltk/nltk/pull/2322
+    ]
+
+    # Pads parentheses
+    PARENS_BRACKETS = (re.compile(r"[\]\[\(\)\{\}\<\>]"), r" \g<0> ")
+
+    # Optionally: Convert parentheses, brackets and converts them to PTB symbols.
+    CONVERT_PARENTHESES = [
+        (re.compile(r"\("), "-LRB-"),
+        (re.compile(r"\)"), "-RRB-"),
+        (re.compile(r"\["), "-LSB-"),
+        (re.compile(r"\]"), "-RSB-"),
+        (re.compile(r"\{"), "-LCB-"),
+        (re.compile(r"\}"), "-RCB-"),
+    ]
+
+    DOUBLE_DASHES = (re.compile(r"--"), r" -- ")
+
+    # List of contractions adapted from Robert MacIntyre's tokenizer.
+    _contractions = MacIntyreContractions()
+    CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2))
+    CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3))
+
+    def tokenize(self, text, convert_parentheses=False, return_str=False):
+        for regexp, substitution in self.STARTING_QUOTES:
+            text = regexp.sub(substitution, text)
+
+        for regexp, substitution in self.PUNCTUATION:
+            text = regexp.sub(substitution, text)
+
+        # Handles parentheses.
+        regexp, substitution = self.PARENS_BRACKETS
+        text = regexp.sub(substitution, text)
+        # Optionally convert parentheses
+        if convert_parentheses:
+            for regexp, substitution in self.CONVERT_PARENTHESES:
+                text = regexp.sub(substitution, text)
+
+        # Handles double dash.
+        regexp, substitution = self.DOUBLE_DASHES
+        text = regexp.sub(substitution, text)
+
+        # add extra space to make things easier
+        text = " " + text + " "
+
+        for regexp, substitution in self.ENDING_QUOTES:
+            text = regexp.sub(substitution, text)
+
+        for regexp in self.CONTRACTIONS2:
+            text = regexp.sub(r" \1 \2 ", text)
+        for regexp in self.CONTRACTIONS3:
+            text = regexp.sub(r" \1 \2 ", text)
+
+        # We are not using CONTRACTIONS4 since
+        # they are also commented out in the SED scripts
+        # for regexp in self._contractions.CONTRACTIONS4:
+        #     text = regexp.sub(r' \1 \2 \3 ', text)
+
+        return text if return_str else text.split()
diff --git a/nlp_resource_data/nltk/tokenize/moses.py b/nlp_resource_data/nltk/tokenize/moses.py

deleted file mode 100755 (executable)

index 0f7d31d..0000000
--- a/nlp_resource_data/nltk/tokenize/moses.py
+++ /dev/null
@@ -1,634 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit:
-#
-# Copyright (C) 2001-2015 NLTK Project
-# Author: Pidong Wang, Josh Schroeder, Ondrej Bojar, based on code by Philipp Koehn
-# Contributors: Liling Tan, Martijn Pieters, Wiktor Stribizew
-#
-# URL: <http://nltk.sourceforge.net>
-# For license information, see LICENSE.TXT
-
-from __future__ import print_function
-import re
-from six import text_type
-
-from nltk.tokenize.api import TokenizerI
-from nltk.tokenize.util import is_cjk
-from nltk.corpus import perluniprops, nonbreaking_prefixes
-
-
-class MosesTokenizer(TokenizerI):
-    """
-    This is a Python port of the Moses Tokenizer from
-    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl
-
-    >>> tokenizer = MosesTokenizer()
-    >>> text = u'This, is a sentence with weird\xbb symbols\u2026 appearing everywhere\xbf'
-    >>> expected_tokenized = u'This , is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf'
-    >>> tokenized_text = tokenizer.tokenize(text, return_str=True)
-    >>> tokenized_text == expected_tokenized
-    True
-    >>> tokenizer.tokenize(text) == [u'This', u',', u'is', u'a', u'sentence', u'with', u'weird', u'\xbb', u'symbols', u'\u2026', u'appearing', u'everywhere', u'\xbf']
-    True
-
-    The nonbreaking prefixes should tokenize the final fullstop.
-
-    >>> m = MosesTokenizer()
-    >>> m.tokenize('abc def.')
-    [u'abc', u'def', u'.']
-
-    The nonbreaking prefixes should deal the situation when numeric only prefix is the last token.
-    In below example, "pp" is the last element, and there is no digit after it.
-
-    >>> m = MosesTokenizer()
-    >>> m.tokenize('2016, pp.')
-    [u'2016', u',', u'pp', u'.']
-    
-    >>> sent = "This ain't funny. It's actually hillarious, yet double Ls. | [] < > [ ] & You're gonna shake it off? Don't?"
-    >>> m.tokenize(sent, escape=True)
-    ['This', 'ain', '&apos;t', 'funny', '.', 'It', '&apos;s', 'actually', 'hillarious', ',', 'yet', 'double', 'Ls', '.', '&#124;', '&#91;', '&#93;', '&lt;', '&gt;', '&#91;', '&#93;', '&amp;', 'You', '&apos;re', 'gonna', 'shake', 'it', 'off', '?', 'Don', '&apos;t', '?']
-    >>> m.tokenize(sent, escape=False)
-    ['This', 'ain', "'t", 'funny', '.', 'It', "'s", 'actually', 'hillarious', ',', 'yet', 'double', 'Ls', '.', '|', '[', ']', '<', '>', '[', ']', '&', 'You', "'re", 'gonna', 'shake', 'it', 'off', '?', 'Don', "'t", '?']
-    """
-
-    # Perl Unicode Properties character sets.
-    IsN = text_type(''.join(perluniprops.chars('IsN')))
-    IsAlnum = text_type(''.join(perluniprops.chars('IsAlnum')))
-    IsSc = text_type(''.join(perluniprops.chars('IsSc')))
-    IsSo = text_type(''.join(perluniprops.chars('IsSo')))
-    IsAlpha = text_type(''.join(perluniprops.chars('IsAlpha')))
-    IsLower = text_type(''.join(perluniprops.chars('IsLower')))
-
-    # Remove ASCII junk.
-    DEDUPLICATE_SPACE = r'\s+', r' '
-    ASCII_JUNK = r'[\000-\037]', r''
-
-    # Neurotic Perl heading space, multi-space and trailing space chomp.
-    # These regexes are kept for reference purposes and shouldn't be used!!
-    MID_STRIP = r" +", r" "     # Use DEDUPLICATE_SPACE instead.
-    LEFT_STRIP = r"^ ", r""     # Uses text.lstrip() instead.
-    RIGHT_STRIP = r" $", r""    # Uses text.rstrip() instead.
-
-    # Pad all "other" special characters not in IsAlnum.
-    PAD_NOT_ISALNUM = u'([^{}\s\.\'\`\,\-])'.format(IsAlnum), r' \1 '
-
-    # Splits all hypens (regardless of circumstances), e.g.
-    # 'foo -- bar' -> 'foo @-@ @-@ bar' , 'foo-bar' -> 'foo @-@ bar'
-    AGGRESSIVE_HYPHEN_SPLIT = u'([{alphanum}])\-(?=[{alphanum}])'.format(alphanum=IsAlnum), r'\1 \@-\@ '
-
-    # Make multi-dots stay together.
-    REPLACE_DOT_WITH_LITERALSTRING_1 = r'\.([\.]+)', ' DOTMULTI\1'
-    REPLACE_DOT_WITH_LITERALSTRING_2 = r'DOTMULTI\.([^\.])', 'DOTDOTMULTI \1'
-    REPLACE_DOT_WITH_LITERALSTRING_3 = r'DOTMULTI\.', 'DOTDOTMULTI'
-
-    # Separate out "," except if within numbers (5,300)
-    # e.g.  A,B,C,D,E > A , B,C , D,E
-    # First application uses up B so rule can't see B,C
-    # two-step version here may create extra spaces but these are removed later
-    # will also space digit,letter or letter,digit forms (redundant with next section)
-    COMMA_SEPARATE_1 = u'([^{}])[,]'.format(IsN), r'\1 , '
-    COMMA_SEPARATE_2 = u'[,]([^{}])'.format(IsN), r' , \1'
-
-    # Attempt to get correct directional quotes.
-    DIRECTIONAL_QUOTE_1 = r'^``',               r'`` '
-    DIRECTIONAL_QUOTE_2 = r'^"',                r'`` '
-    DIRECTIONAL_QUOTE_3 = r'^`([^`])',          r'` \1'
-    DIRECTIONAL_QUOTE_4 = r"^'",                r'`  '
-    DIRECTIONAL_QUOTE_5 = r'([ ([{<])"',        r'\1 `` '
-    DIRECTIONAL_QUOTE_6 = r'([ ([{<])``',       r'\1 `` '
-    DIRECTIONAL_QUOTE_7 = r'([ ([{<])`([^`])',  r'\1 ` \2'
-    DIRECTIONAL_QUOTE_8 = r"([ ([{<])'",        r'\1 ` '
-
-    # Replace ... with _ELLIPSIS_
-    REPLACE_ELLIPSIS = r'\.\.\.',       r' _ELLIPSIS_ '
-    # Restore _ELLIPSIS_ with ...
-    RESTORE_ELLIPSIS = r'_ELLIPSIS_',   r'\.\.\.'
-
-    # Pad , with tailing space except if within numbers, e.g. 5,300
-    # These are used in nltk.tokenize.moses.penn_tokenize()
-    COMMA_1 = u'([^{numbers}])[,]([^{numbers}])'.format(numbers=IsN), r'\1 , \2'
-    COMMA_2 = u'([{numbers}])[,]([^{numbers}])'.format(numbers=IsN), r'\1 , \2'
-    COMMA_3 = u'([^{numbers}])[,]([{numbers}])'.format(numbers=IsN), r'\1 , \2'
-
-    # Pad unicode symbols with spaces.
-    SYMBOLS = u'([;:@#\$%&{}{}])'.format(IsSc, IsSo), r' \1 '
-
-    # Separate out intra-token slashes.  PTB tokenization doesn't do this, so
-    # the tokens should be merged prior to parsing with a PTB-trained parser.
-    # e.g. "and/or" -> "and @/@ or"
-    INTRATOKEN_SLASHES = u'([{alphanum}])\/([{alphanum}])'.format(alphanum=IsAlnum), r'$1 \@\/\@ $2'
-
-    # Splits final period at end of string.
-    FINAL_PERIOD = r"""([^.])([.])([\]\)}>"']*) ?$""", r'\1 \2\3'
-    # Pad all question marks and exclamation marks with spaces.
-    PAD_QUESTION_EXCLAMATION_MARK = r'([?!])', r' \1 '
-
-    # Handles parentheses, brackets and converts them to PTB symbols.
-    PAD_PARENTHESIS = r'([\]\[\(\){}<>])', r' \1 '
-    CONVERT_PARENTHESIS_1 = r'\(', '-LRB-'
-    CONVERT_PARENTHESIS_2 = r'\)', '-RRB-'
-    CONVERT_PARENTHESIS_3 = r'\[', '-LSB-'
-    CONVERT_PARENTHESIS_4 = r'\]', '-RSB-'
-    CONVERT_PARENTHESIS_5 = r'\{', '-LCB-'
-    CONVERT_PARENTHESIS_6 = r'\}', '-RCB-'
-
-    # Pads double dashes with spaces.
-    PAD_DOUBLE_DASHES = r'--', ' -- '
-
-    # Adds spaces to start and end of string to simplify further regexps.
-    PAD_START_OF_STR = r'^', ' '
-    PAD_END_OF_STR = r'$', ' '
-
-    # Converts double quotes to two single quotes and pad with spaces.
-    CONVERT_DOUBLE_TO_SINGLE_QUOTES = r'"', " '' "
-    # Handles single quote in possessives or close-single-quote.
-    HANDLES_SINGLE_QUOTES = r"([^'])' ", r"\1 ' "
-
-    # Pad apostrophe in possessive or close-single-quote.
-    APOSTROPHE = r"([^'])'", r"\1 ' "
-
-    # Prepend space on contraction apostrophe.
-    CONTRACTION_1 = r"'([sSmMdD]) ", r" '\1 "
-    CONTRACTION_2 = r"'ll ", r" 'll "
-    CONTRACTION_3 = r"'re ", r" 're "
-    CONTRACTION_4 = r"'ve ", r" 've "
-    CONTRACTION_5 = r"n't ", r" n't "
-    CONTRACTION_6 = r"'LL ", r" 'LL "
-    CONTRACTION_7 = r"'RE ", r" 'RE "
-    CONTRACTION_8 = r"'VE ", r" 'VE "
-    CONTRACTION_9 = r"N'T ", r" N'T "
-
-    # Informal Contractions.
-    CONTRACTION_10 = r" ([Cc])annot ",  r" \1an not "
-    CONTRACTION_11 = r" ([Dd])'ye ",    r" \1' ye "
-    CONTRACTION_12 = r" ([Gg])imme ",   r" \1im me "
-    CONTRACTION_13 = r" ([Gg])onna ",   r" \1on na "
-    CONTRACTION_14 = r" ([Gg])otta ",   r" \1ot ta "
-    CONTRACTION_15 = r" ([Ll])emme ",   r" \1em me "
-    CONTRACTION_16 = r" ([Mm])ore$text =~ s='n ",  r" \1ore 'n "
-    CONTRACTION_17 = r" '([Tt])is ",    r" '\1 is "
-    CONTRACTION_18 = r" '([Tt])was ",   r" '\1 was "
-    CONTRACTION_19 = r" ([Ww])anna ",   r" \1an na "
-
-    # Clean out extra spaces
-    CLEAN_EXTRA_SPACE_1 = r'  *', r' '
-    CLEAN_EXTRA_SPACE_2 = r'^ *', r''
-    CLEAN_EXTRA_SPACE_3 = r' *$', r''
-
-    # Neurotic Perl regexes to escape special characters.
-    # These XML escaping regexes are kept such that tokens generated from
-    # NLTK's implementation is consistent with Moses' tokenizer's output.
-    # Outside of the MosesTokenizer function, it's strongly encouraged to use
-    # nltk.tokenize.util.xml_escape() function instead.
-    ESCAPE_AMPERSAND = r'&', r'&amp;'
-    ESCAPE_PIPE = r'\|', r'&#124;'
-    ESCAPE_LEFT_ANGLE_BRACKET = r'<', r'&lt;'
-    ESCAPE_RIGHT_ANGLE_BRACKET = r'>', r'&gt;'
-    ESCAPE_SINGLE_QUOTE = r"\'", r"&apos;"
-    ESCAPE_DOUBLE_QUOTE = r'\"', r'&quot;'
-    ESCAPE_LEFT_SQUARE_BRACKET = r"\[", r"&#91;"
-    ESCAPE_RIGHT_SQUARE_BRACKET = r"]", r"&#93;"
-
-    EN_SPECIFIC_1 = u"([^{alpha}])[']([^{alpha}])".format(alpha=IsAlpha), r"\1 ' \2"
-    EN_SPECIFIC_2 = u"([^{alpha}{isn}])[']([{alpha}])".format(alpha=IsAlpha, isn=IsN), r"\1 ' \2"
-    EN_SPECIFIC_3 = u"([{alpha}])[']([^{alpha}])".format(alpha=IsAlpha), r"\1 ' \2"
-    EN_SPECIFIC_4 = u"([{alpha}])[']([{alpha}])".format(alpha=IsAlpha), r"\1 '\2"
-    EN_SPECIFIC_5 = u"([{isn}])[']([s])".format(isn=IsN), r"\1 '\2"
-
-    ENGLISH_SPECIFIC_APOSTROPHE = [EN_SPECIFIC_1, EN_SPECIFIC_2, EN_SPECIFIC_3,
-                                   EN_SPECIFIC_4, EN_SPECIFIC_5]
-
-    FR_IT_SPECIFIC_1 = u"([^{alpha}])[']([^{alpha}])".format(alpha=IsAlpha), r"\1 ' \2"
-    FR_IT_SPECIFIC_2 = u"([^{alpha}])[']([{alpha}])".format(alpha=IsAlpha), r"\1 ' \2"
-    FR_IT_SPECIFIC_3 = u"([{alpha}])[']([^{alpha}])".format(alpha=IsAlpha), r"\1 ' \2"
-    FR_IT_SPECIFIC_4 = u"([{alpha}])[']([{alpha}])".format(alpha=IsAlpha), r"\1' \2"
-
-    FR_IT_SPECIFIC_APOSTROPHE = [FR_IT_SPECIFIC_1, FR_IT_SPECIFIC_2,
-                                 FR_IT_SPECIFIC_3, FR_IT_SPECIFIC_4]
-
-    NON_SPECIFIC_APOSTROPHE = r"\'", r" \' "
-
-    MOSES_PENN_REGEXES_1 = [DEDUPLICATE_SPACE, ASCII_JUNK, DIRECTIONAL_QUOTE_1,
-                              DIRECTIONAL_QUOTE_2, DIRECTIONAL_QUOTE_3,
-                              DIRECTIONAL_QUOTE_4, DIRECTIONAL_QUOTE_5,
-                              DIRECTIONAL_QUOTE_6, DIRECTIONAL_QUOTE_7,
-                              DIRECTIONAL_QUOTE_8, REPLACE_ELLIPSIS, COMMA_1,
-                              COMMA_2, COMMA_3, SYMBOLS, INTRATOKEN_SLASHES,
-                              FINAL_PERIOD, PAD_QUESTION_EXCLAMATION_MARK,
-                              PAD_PARENTHESIS, CONVERT_PARENTHESIS_1,
-                              CONVERT_PARENTHESIS_2, CONVERT_PARENTHESIS_3,
-                              CONVERT_PARENTHESIS_4, CONVERT_PARENTHESIS_5,
-                              CONVERT_PARENTHESIS_6, PAD_DOUBLE_DASHES,
-                              PAD_START_OF_STR, PAD_END_OF_STR,
-                              CONVERT_DOUBLE_TO_SINGLE_QUOTES,
-                              HANDLES_SINGLE_QUOTES, APOSTROPHE, CONTRACTION_1,
-                              CONTRACTION_2, CONTRACTION_3, CONTRACTION_4,
-                              CONTRACTION_5, CONTRACTION_6, CONTRACTION_7,
-                              CONTRACTION_8, CONTRACTION_9, CONTRACTION_10,
-                              CONTRACTION_11, CONTRACTION_12, CONTRACTION_13,
-                              CONTRACTION_14, CONTRACTION_15, CONTRACTION_16,
-                              CONTRACTION_17, CONTRACTION_18, CONTRACTION_19]
-
-    MOSES_PENN_REGEXES_2 = [RESTORE_ELLIPSIS, CLEAN_EXTRA_SPACE_1,
-                        CLEAN_EXTRA_SPACE_2, CLEAN_EXTRA_SPACE_3,
-                        ESCAPE_AMPERSAND, ESCAPE_PIPE,
-                        ESCAPE_LEFT_ANGLE_BRACKET, ESCAPE_RIGHT_ANGLE_BRACKET,
-                        ESCAPE_SINGLE_QUOTE, ESCAPE_DOUBLE_QUOTE]
-
-    MOSES_ESCAPE_XML_REGEXES = [ESCAPE_AMPERSAND, ESCAPE_PIPE,
-                                ESCAPE_LEFT_ANGLE_BRACKET,
-                                ESCAPE_RIGHT_ANGLE_BRACKET,
-                                ESCAPE_SINGLE_QUOTE, ESCAPE_DOUBLE_QUOTE,
-                                ESCAPE_LEFT_SQUARE_BRACKET,
-                                ESCAPE_RIGHT_SQUARE_BRACKET]
-
-    def __init__(self, lang='en'):
-        # Initialize the object.
-        super(MosesTokenizer, self).__init__()
-        self.lang = lang
-        # Initialize the language specific nonbreaking prefixes.
-        self.NONBREAKING_PREFIXES = [_nbp.strip() for _nbp in nonbreaking_prefixes.words(lang)]
-        self.NUMERIC_ONLY_PREFIXES = [w.rpartition(' ')[0] for w in
-                                      self.NONBREAKING_PREFIXES if
-                                      self.has_numeric_only(w)]
-
-
-
-    def replace_multidots(self, text):
-        text = re.sub(r'\.([\.]+)', r' DOTMULTI\1', text)
-        while re.search(r'DOTMULTI\.', text):
-            text = re.sub(r'DOTMULTI\.([^\.])', r'DOTDOTMULTI \1', text)
-            text = re.sub(r'DOTMULTI\.', 'DOTDOTMULTI', text)
-        return text
-
-    def restore_multidots(self, text):
-        while re.search(r'DOTDOTMULTI', text):
-            text = re.sub(r'DOTDOTMULTI', r'DOTMULTI.', text)
-        return re.sub(r'DOTMULTI', r'.', text)
-
-    def islower(self, text):
-        return not set(text).difference(set(self.IsLower))
-
-    def isalpha(self, text):
-        return not set(text).difference(set(self.IsAlpha))
-
-    def has_numeric_only(self, text):
-        return bool(re.search(r'(.*)[\s]+(\#NUMERIC_ONLY\#)', text))
-
-    def handles_nonbreaking_prefixes(self, text):
-        # Splits the text into tokens to check for nonbreaking prefixes.
-        tokens = text.split()
-        num_tokens = len(tokens)
-        for i, token in enumerate(tokens):
-            # Checks if token ends with a fullstop.
-            token_ends_with_period = re.search(r'^(\S+)\.$', token)
-            if token_ends_with_period:
-                prefix = token_ends_with_period.group(1)
-                # Checks for 3 conditions if
-                # i.   the prefix contains a fullstop and
-                #      any char in the prefix is within the IsAlpha charset
-                # ii.  the prefix is in the list of nonbreaking prefixes and
-                #      does not contain #NUMERIC_ONLY#
-                # iii. the token is not the last token and that the
-                #      next token contains all lowercase.
-                if ( ('.' in prefix and self.isalpha(prefix)) or
-                     (prefix in self.NONBREAKING_PREFIXES and
-                      prefix not in self.NUMERIC_ONLY_PREFIXES) or
-                     (i != num_tokens-1 and self.islower(tokens[i+1])) ):
-                    pass # No change to the token.
-                # Checks if the prefix is in NUMERIC_ONLY_PREFIXES
-                # and ensures that the next word is a digit.
-                elif (prefix in self.NUMERIC_ONLY_PREFIXES and
-                      (i + 1) < num_tokens and
-                      re.search(r'^[0-9]+', tokens[i+1])):
-                    pass # No change to the token.
-                else: # Otherwise, adds a space after the tokens before a dot.
-                    tokens[i] = prefix + ' .'
-        return " ".join(tokens) # Stitch the tokens back.
-
-    def escape_xml(self, text):
-        for regexp, substitution in self.MOSES_ESCAPE_XML_REGEXES:
-            text = re.sub(regexp, substitution, text)
-        return text
-
-    def penn_tokenize(self, text, return_str=False):
-        """
-        This is a Python port of the Penn treebank tokenizer adapted by the Moses
-        machine translation community. It's a little different from the
-        version in nltk.tokenize.treebank.
-        """
-        # Converts input string into unicode.
-        text = text_type(text)
-        # Perform a chain of regex substituitions using MOSES_PENN_REGEXES_1
-        for regexp, substitution in self.MOSES_PENN_REGEXES_1:
-            text = re.sub(regexp, substitution, text)
-        # Handles nonbreaking prefixes.
-        text = self.handles_nonbreaking_prefixes(text)
-        # Restore ellipsis, clean extra spaces, escape XML symbols.
-        for regexp, substitution in self.MOSES_PENN_REGEXES_2:
-            text = re.sub(regexp, substitution, text)
-        return text if return_str else text.split()
-
-    def tokenize(self, text, agressive_dash_splits=False, return_str=False, escape=True):
-        """
-        Python port of the Moses tokenizer.
-
-        >>> mtokenizer = MosesTokenizer()
-        >>> text = u'Is 9.5 or 525,600 my favorite number?'
-        >>> print (mtokenizer.tokenize(text, return_str=True))
-        Is 9.5 or 525,600 my favorite number ?
-        >>> text = u'The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things'
-        >>> print (mtokenizer.tokenize(text, return_str=True))
-        The https : / / github.com / jonsafari / tok-tok / blob / master / tok-tok.pl is a website with / and / or slashes and sort of weird : things
-        >>> text = u'This, is a sentence with weird\xbb symbols\u2026 appearing everywhere\xbf'
-        >>> expected = u'This , is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf'
-        >>> assert mtokenizer.tokenize(text, return_str=True) == expected
-
-        :param tokens: A single string, i.e. sentence text.
-        :type tokens: str
-        :param agressive_dash_splits: Option to trigger dash split rules .
-        :type agressive_dash_splits: bool
-        """
-        # Converts input string into unicode.
-        text = text_type(text)
-
-        # De-duplicate spaces and clean ASCII junk
-        for regexp, substitution in [self.DEDUPLICATE_SPACE, self.ASCII_JUNK]:
-            text = re.sub(regexp, substitution, text)
-        # Strips heading and trailing spaces.
-        text = text.strip()
-        # Separate special characters outside of IsAlnum character set.
-        regexp, substitution = self.PAD_NOT_ISALNUM
-        text = re.sub(regexp, substitution, text)
-        # Aggressively splits dashes
-        if agressive_dash_splits:
-            regexp, substitution = self.AGGRESSIVE_HYPHEN_SPLIT
-            text = re.sub(regexp, substitution, text)
-        # Replaces multidots with "DOTDOTMULTI" literal strings.
-        text = self.replace_multidots(text)
-        # Separate out "," except if within numbers e.g. 5,300
-        for regexp, substitution in [self.COMMA_SEPARATE_1, self.COMMA_SEPARATE_2]:
-            text = re.sub(regexp, substitution, text)
-
-        # (Language-specific) apostrophe tokenization.
-        if self.lang == 'en':
-            for regexp, substitution in self.ENGLISH_SPECIFIC_APOSTROPHE:
-                 text = re.sub(regexp, substitution, text)
-        elif self.lang in ['fr', 'it']:
-            for regexp, substitution in self.FR_IT_SPECIFIC_APOSTROPHE:
-                text = re.sub(regexp, substitution, text)
-        else:
-            regexp, substitution = self.NON_SPECIFIC_APOSTROPHE
-            text = re.sub(regexp, substitution, text)
-
-        # Handles nonbreaking prefixes.
-        text = self.handles_nonbreaking_prefixes(text)
-        # Cleans up extraneous spaces.
-        regexp, substitution = self.DEDUPLICATE_SPACE
-        text = re.sub(regexp,substitution, text).strip()
-        # Restore multidots.
-        text = self.restore_multidots(text)
-        if escape:
-            # Escape XML symbols.
-            text = self.escape_xml(text)
-
-        return text if return_str else text.split()
-
-
-class MosesDetokenizer(TokenizerI):
-    """
-    This is a Python port of the Moses Detokenizer from
-    https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl
-
-    >>> tokenizer = MosesTokenizer()
-    >>> text = u'This, is a sentence with weird\xbb symbols\u2026 appearing everywhere\xbf'
-    >>> expected_tokenized = u'This , is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf'
-    >>> tokenized_text = tokenizer.tokenize(text, return_str=True)
-    >>> tokenized_text == expected_tokenized
-    True
-    >>> detokenizer = MosesDetokenizer()
-    >>> expected_detokenized = u'This, is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf'
-    >>> detokenized_text = detokenizer.detokenize(tokenized_text.split(), return_str=True)
-    >>> detokenized_text == expected_detokenized
-    True
-
-    >>> from nltk.tokenize.moses import MosesTokenizer, MosesDetokenizer
-    >>> t, d = MosesTokenizer(), MosesDetokenizer()
-    >>> sent = "This ain't funny. It's actually hillarious, yet double Ls. | [] < > [ ] & You're gonna shake it off? Don't?"
-    >>> expected_tokens = [u'This', u'ain', u'&apos;t', u'funny', u'.', u'It', u'&apos;s', u'actually', u'hillarious', u',', u'yet', u'double', u'Ls', u'.', u'&#124;', u'&#91;', u'&#93;', u'&lt;', u'&gt;', u'&#91;', u'&#93;', u'&amp;', u'You', u'&apos;re', u'gonna', u'shake', u'it', u'off', u'?', u'Don', u'&apos;t', u'?']
-    >>> expected_detokens = "This ain't funny. It's actually hillarious, yet double Ls. | [] < > [] & You're gonna shake it off? Don't?"
-    >>> tokens = t.tokenize(sent)
-    >>> tokens == expected_tokens
-    True
-    >>> detokens = d.detokenize(tokens)
-    >>> " ".join(detokens) == expected_detokens
-    True
-    
-    >>> d.detokenize(expected_tokens, unescape=True)
-    ['This', "ain't", 'funny.', "It's", 'actually', 'hillarious,', 'yet', 'double', 'Ls.', '|', '[]', '<', '>', '[]', '&', "You're", 'gonna', 'shake', 'it', 'off?', "Don't?"]
-    >>> d.detokenize(expected_tokens, unescape=False)
-    ['This', 'ain', '&apos;t', 'funny.', 'It', '&apos;s', 'actually', 'hillarious,', 'yet', 'double', 'Ls.', '&#124;', '&#91;', '&#93;', '&lt;', '&gt;', '&#91;', '&#93;', '&amp;', 'You', '&apos;re', 'gonna', 'shake', 'it', 'off?', 'Don', '&apos;t?']
-    """
-    # Currency Symbols.
-    IsAlnum = text_type(''.join(perluniprops.chars('IsAlnum')))
-    IsAlpha = text_type(''.join(perluniprops.chars('IsAlpha')))
-    IsSc = text_type(''.join(perluniprops.chars('IsSc')))
-
-    AGGRESSIVE_HYPHEN_SPLIT = r' \@\-\@ ', r'-'
-
-    # Merge multiple spaces.
-    ONE_SPACE = re.compile(r' {2,}'), ' '
-
-    # Unescape special characters.
-    UNESCAPE_FACTOR_SEPARATOR = r'&#124;', r'|'
-    UNESCAPE_LEFT_ANGLE_BRACKET = r'&lt;', r'<'
-    UNESCAPE_RIGHT_ANGLE_BRACKET = r'&gt;', r'>'
-    UNESCAPE_DOUBLE_QUOTE = r'&quot;', r'"'
-    UNESCAPE_SINGLE_QUOTE = r"&apos;", r"'"
-    UNESCAPE_SYNTAX_NONTERMINAL_LEFT = r'&#91;', r'['
-    UNESCAPE_SYNTAX_NONTERMINAL_RIGHT = r'&#93;', r']'
-    UNESCAPE_AMPERSAND = r'&amp;', r'&'
-    # The legacy regexes are used to support outputs from older Moses versions.
-    UNESCAPE_FACTOR_SEPARATOR_LEGACY = r'&bar;', r'|'
-    UNESCAPE_SYNTAX_NONTERMINAL_LEFT_LEGACY = r'&bra;', r'['
-    UNESCAPE_SYNTAX_NONTERMINAL_RIGHT_LEGACY = r'&ket;', r']'
-
-
-    MOSES_UNESCAPE_XML_REGEXES = [UNESCAPE_FACTOR_SEPARATOR_LEGACY,
-                        UNESCAPE_FACTOR_SEPARATOR, UNESCAPE_LEFT_ANGLE_BRACKET,
-                        UNESCAPE_RIGHT_ANGLE_BRACKET,
-                        UNESCAPE_SYNTAX_NONTERMINAL_LEFT_LEGACY,
-                        UNESCAPE_SYNTAX_NONTERMINAL_RIGHT_LEGACY,
-                        UNESCAPE_DOUBLE_QUOTE, UNESCAPE_SINGLE_QUOTE,
-                        UNESCAPE_SYNTAX_NONTERMINAL_LEFT,
-                        UNESCAPE_SYNTAX_NONTERMINAL_RIGHT, UNESCAPE_AMPERSAND]
-
-    FINNISH_MORPHSET_1 = [u'N', u'n', u'A', u'a', u'\xc4', u'\xe4', u'ssa',
-                         u'Ssa', u'ss\xe4', u'Ss\xe4', u'sta', u'st\xe4',
-                         u'Sta', u'St\xe4', u'hun', u'Hun', u'hyn', u'Hyn',
-                         u'han', u'Han', u'h\xe4n', u'H\xe4n', u'h\xf6n',
-                         u'H\xf6n', u'un', u'Un', u'yn', u'Yn', u'an', u'An',
-                         u'\xe4n', u'\xc4n', u'\xf6n', u'\xd6n', u'seen',
-                         u'Seen', u'lla', u'Lla', u'll\xe4', u'Ll\xe4', u'lta',
-                         u'Lta', u'lt\xe4', u'Lt\xe4', u'lle', u'Lle', u'ksi',
-                         u'Ksi', u'kse', u'Kse', u'tta', u'Tta', u'ine', u'Ine']
-
-    FINNISH_MORPHSET_2 = [u'ni', u'si', u'mme', u'nne', u'nsa']
-
-    FINNISH_MORPHSET_3 = [u'ko', u'k\xf6', u'han', u'h\xe4n', u'pa', u'p\xe4',
-                         u'kaan', u'k\xe4\xe4n', u'kin']
-
-    FINNISH_REGEX = u'^({})({})?({})$'.format(text_type('|'.join(FINNISH_MORPHSET_1)),
-                                               text_type('|'.join(FINNISH_MORPHSET_2)),
-                                               text_type('|'.join(FINNISH_MORPHSET_3)))
-
-
-    def __init__(self, lang='en'):
-        super(MosesDetokenizer, self).__init__()
-        self.lang = lang
-
-
-    def unescape_xml(self, text):
-        for regexp, substitution in self.MOSES_UNESCAPE_XML_REGEXES:
-            text = re.sub(regexp, substitution, text)
-        return text
-
-
-    def tokenize(self, tokens, return_str=False, unescape=True):
-        """
-        Python port of the Moses detokenizer.
-
-        :param tokens: A list of strings, i.e. tokenized text.
-        :type tokens: list(str)
-        :return: str
-        """
-        # Convert the list of tokens into a string and pad it with spaces.
-        text = u" {} ".format(" ".join(tokens))
-        # Converts input string into unicode.
-        text = text_type(text)
-        # Detokenize the agressive hyphen split.
-        regexp, substitution = self.AGGRESSIVE_HYPHEN_SPLIT
-        text = re.sub(regexp, substitution, text)
-        if unescape:
-            # Unescape the XML symbols.
-            text = self.unescape_xml(text)
-        # Keep track of no. of quotation marks.
-        quote_counts = {u"'":0 , u'"':0, u"``":0, u"`":0, u"''":0}
-
-        # The *prepend_space* variable is used to control the "effects" of
-        # detokenization as the function loops through the list of tokens and
-        # changes the *prepend_space* accordingly as it sequentially checks
-        # through the language specific and language independent conditions.
-        prepend_space = " "
-        detokenized_text = ""
-        tokens = text.split()
-        # Iterate through every token and apply language specific detokenization rule(s).
-        for i, token in enumerate(iter(tokens)):
-            # Check if the first char is CJK.
-            if is_cjk(token[0]):
-                # Perform left shift if this is a second consecutive CJK word.
-                if i > 0 and is_cjk(token[-1]):
-                    detokenized_text += token
-                # But do nothing special if this is a CJK word that doesn't follow a CJK word
-                else:
-                    detokenized_text += prepend_space + token
-                prepend_space = " "
-
-            # If it's a currency symbol.
-            elif token in self.IsSc:
-                # Perform right shift on currency and other random punctuation items
-                detokenized_text += prepend_space + token
-                prepend_space = ""
-
-            elif re.search(r'^[\,\.\?\!\:\;\\\%\}\]\)]+$', token):
-                # In French, these punctuations are prefixed with a non-breakable space.
-                if self.lang == 'fr' and re.search(r'^[\?\!\:\;\\\%]$', token):
-                    detokenized_text += " "
-                # Perform left shift on punctuation items.
-                detokenized_text += token
-                prepend_space = " "
-
-            elif (self.lang == 'en' and i > 0
-                  and re.search(u"^[\'][{}]".format(self.IsAlpha), token)):
-                  #and re.search(u'[{}]$'.format(self.IsAlnum), tokens[i-1])):
-                # For English, left-shift the contraction.
-                detokenized_text += token
-                prepend_space = " "
-
-            elif (self.lang == 'cs' and i > 1
-                  and re.search(r'^[0-9]+$', tokens[-2]) # If the previous previous token is a number.
-                  and re.search(r'^[.,]$', tokens[-1]) # If previous token is a dot.
-                  and re.search(r'^[0-9]+$', token)): # If the current token is a number.
-                # In Czech, left-shift floats that are decimal numbers.
-                detokenized_text += token
-                prepend_space = " "
-
-            elif (self.lang in ['fr', 'it'] and i <= len(tokens)-2
-                  and re.search(u'[{}][\']$'.format(self.IsAlpha), token)
-                  and re.search(u'^[{}]$'.format(self.IsAlpha), tokens[i+1])): # If the next token is alpha.
-                # For French and Italian, right-shift the contraction.
-                detokenized_text += prepend_space + token
-                prepend_space = ""
-
-            elif (self.lang == 'cs' and i <= len(tokens)-3
-                  and re.search(u'[{}][\']$'.format(self.IsAlpha), token)
-                  and re.search(u'^[-–]$', tokens[i+1])
-                  and re.search(u'^li$|^mail.*', tokens[i+2], re.IGNORECASE)): # In Perl, ($words[$i+2] =~ /^li$|^mail.*/i)
-                # In Czech, right-shift "-li" and a few Czech dashed words (e.g. e-mail)
-                detokenized_text += prepend_space + token + tokens[i+1]
-                next(tokens, None) # Advance over the dash
-                prepend_space = ""
-
-            # Combine punctuation smartly.
-            elif re.search(r'''^[\'\"„“`]+$''', token):
-                normalized_quo = token
-                if re.search(r'^[„“”]+$', token):
-                    normalized_quo = '"'
-                quote_counts[normalized_quo] = quote_counts.get(normalized_quo, 0)
-
-                if self.lang == 'cs' and token == u"„":
-                    quote_counts[normalized_quo] = 0
-                if self.lang == 'cs' and token == u"“":
-                    quote_counts[normalized_quo] = 1
-
-
-                if quote_counts[normalized_quo] % 2 == 0:
-                    if (self.lang == 'en' and token == u"'" and i > 0
-                        and re.search(r'[s]$', tokens[i-1]) ):
-                        # Left shift on single quote for possessives ending
-                        # in "s", e.g. "The Jones' house"
-                        detokenized_text += token
-                        prepend_space = " "
-                    else:
-                        # Right shift.
-                        detokenized_text += prepend_space + token
-                        prepend_space = ""
-                        quote_counts[normalized_quo] += 1
-                else:
-                    # Left shift.
-                    detokenized_text += token
-                    prepend_space = " "
-                    quote_counts[normalized_quo] += 1
-
-            elif (self.lang == 'fi' and re.search(r':$', tokens[i-1])
-                  and re.search(self.FINNISH_REGEX, token)):
-                # Finnish : without intervening space if followed by case suffix
-                # EU:N EU:n EU:ssa EU:sta EU:hun EU:iin ...
-                detokenized_text += prepend_space + token
-                prepend_space = " "
-
-            else:
-                detokenized_text += prepend_space + token
-                prepend_space = " "
-
-        # Merge multiple spaces.
-        regexp, substitution = self.ONE_SPACE
-        detokenized_text = re.sub(regexp, substitution, detokenized_text)
-        # Removes heading and trailing spaces.
-        detokenized_text = detokenized_text.strip()
-
-        return detokenized_text if return_str else detokenized_text.split()
-
-    def detokenize(self, tokens, return_str=False, unescape=True):
-        """ Duck-typing the abstract *tokenize()*."""
-        return self.tokenize(tokens, return_str, unescape)
diff --git a/nlp_resource_data/nltk/tokenize/moses.pyc b/nlp_resource_data/nltk/tokenize/moses.pyc

deleted file mode 100755 (executable)

index e002ded..0000000

Binary files a/nlp_resource_data/nltk/tokenize/moses.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/mwe.py b/nlp_resource_data/nltk/tokenize/mwe.py

old mode 100755 (executable)

new mode 100644 (file)

index 40b3705..9e4b991
--- a/nlp_resource_data/nltk/tokenize/mwe.py
+++ b/nlp_resource_data/nltk/tokenize/mwe.py
@@ -1,6 +1,6 @@
  # Multi-Word Expression tokenizer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Rob Malouf <rmalouf@mail.sdsu.edu>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -38,7 +38,7 @@ class MWETokenizer(TokenizerI):
      into single tokens.
      """
  
-    def __init__(self, mwes=None, separator='_'):
+    def __init__(self, mwes=None, separator="_"):
          """Initialize the multi-word tokenizer with a list of expressions and a
          separator
  
@@ -71,7 +71,7 @@ class MWETokenizer(TokenizerI):
          >>> tokenizer.add_mwe(('a', 'b', 'c'))
          >>> tokenizer.add_mwe(('a', 'x'))
          >>> expected = {'a': {'x': {True: None}, 'b': {True: None, 'c': {True: None}}}}
-        >>> tokenizer._mwes.as_dict() == expected
+        >>> tokenizer._mwes == expected
          True
  
          """
diff --git a/nlp_resource_data/nltk/tokenize/mwe.pyc b/nlp_resource_data/nltk/tokenize/mwe.pyc

deleted file mode 100755 (executable)

index e9bd5f3..0000000

Binary files a/nlp_resource_data/nltk/tokenize/mwe.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/nist.py b/nlp_resource_data/nltk/tokenize/nist.py

old mode 100755 (executable)

new mode 100644 (file)

index 419732f..e6b7491
--- a/nlp_resource_data/nltk/tokenize/nist.py
+++ b/nlp_resource_data/nltk/tokenize/nist.py
@@ -15,11 +15,9 @@ which was also ported into Python in
  https://github.com/lium-lst/nmtpy/blob/master/nmtpy/metrics/mtevalbleu.py#L162
  """
  
-from __future__ import unicode_literals
  
  import io
  import re
-from six import text_type
  
  from nltk.corpus import perluniprops
  from nltk.tokenize.api import TokenizerI
@@ -32,7 +30,6 @@ class NISTTokenizer(TokenizerI):
      paragraph-based tokenization from mteval-14.pl; The sentence-based
      tokenization is consistent with the other tokenizers available in NLTK.
  
-    >>> from six import text_type
      >>> from nltk.tokenize.nist import NISTTokenizer
      >>> nist = NISTTokenizer()
      >>> s = "Good muffins cost $3.88 in New York."
@@ -65,49 +62,66 @@ class NISTTokenizer(TokenizerI):
      True
      >>> nist.international_tokenize(rkt)[:10] == expected_rkt
      True
+
+    # Doctest for patching issue #1926
+    >>> sent = u'this is a foo\u2604sentence.'
+    >>> expected_sent = [u'this', u'is', u'a', u'foo', u'\u2604', u'sentence', u'.']
+    >>> nist.international_tokenize(sent) == expected_sent
+    True
      """
+
      # Strip "skipped" tags
-    STRIP_SKIP = re.compile('<skipped>'), ''
+    STRIP_SKIP = re.compile("<skipped>"), ""
      #  Strip end-of-line hyphenation and join lines
-    STRIP_EOL_HYPHEN = re.compile(u'\u2028'), ' '
+    STRIP_EOL_HYPHEN = re.compile("\u2028"), " "
      # Tokenize punctuation.
-    PUNCT = re.compile('([\{-\~\[-\` -\&\(-\+\:-\@\/])'), ' \\1 '
+    PUNCT = re.compile("([\{-\~\[-\` -\&\(-\+\:-\@\/])"), " \\1 "
      # Tokenize period and comma unless preceded by a digit.
-    PERIOD_COMMA_PRECEED = re.compile('([^0-9])([\.,])'), '\\1 \\2 '
+    PERIOD_COMMA_PRECEED = re.compile("([^0-9])([\.,])"), "\\1 \\2 "
      # Tokenize period and comma unless followed by a digit.
-    PERIOD_COMMA_FOLLOW = re.compile('([\.,])([^0-9])'), ' \\1 \\2'
+    PERIOD_COMMA_FOLLOW = re.compile("([\.,])([^0-9])"), " \\1 \\2"
      # Tokenize dash when preceded by a digit
-    DASH_PRECEED_DIGIT = re.compile('([0-9])(-)'), '\\1 \\2 '
+    DASH_PRECEED_DIGIT = re.compile("([0-9])(-)"), "\\1 \\2 "
  
-    LANG_DEPENDENT_REGEXES = [PUNCT, PERIOD_COMMA_PRECEED,
-                              PERIOD_COMMA_FOLLOW, DASH_PRECEED_DIGIT]
+    LANG_DEPENDENT_REGEXES = [
+        PUNCT,
+        PERIOD_COMMA_PRECEED,
+        PERIOD_COMMA_FOLLOW,
+        DASH_PRECEED_DIGIT,
+    ]
  
      # Perluniprops characters used in NIST tokenizer.
-    pup_number = text_type(''.join(set(perluniprops.chars('Number')))) # i.e. \p{N}
-    pup_punct = text_type(''.join(set(perluniprops.chars('Punctuation')))) # i.e. \p{P}
-    pup_symbol = text_type(''.join(set(perluniprops.chars('Symbol')))) # i.e. \p{S}
+    pup_number = str("".join(set(perluniprops.chars("Number"))))  # i.e. \p{N}
+    pup_punct = str("".join(set(perluniprops.chars("Punctuation"))))  # i.e. \p{P}
+    pup_symbol = str("".join(set(perluniprops.chars("Symbol"))))  # i.e. \p{S}
  
      # Python regexes needs to escape some special symbols, see
      # see https://stackoverflow.com/q/45670950/610569
-    number_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_number)
-    punct_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_punct)
-    symbol_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_symbol)
+    number_regex = re.sub(r"[]^\\-]", r"\\\g<0>", pup_number)
+    punct_regex = re.sub(r"[]^\\-]", r"\\\g<0>", pup_punct)
+    symbol_regex = re.sub(r"[]^\\-]", r"\\\g<0>", pup_symbol)
  
      # Note: In the original perl implementation, \p{Z} and \p{Zl} were used to
      #       (i) strip trailing and heading spaces  and
      #       (ii) de-deuplicate spaces.
      #       In Python, this would do: ' '.join(str.strip().split())
      # Thus, the next two lines were commented out.
-    #Line_Separator = text_type(''.join(perluniprops.chars('Line_Separator'))) # i.e. \p{Zl}
-    #Separator = text_type(''.join(perluniprops.chars('Separator'))) # i.e. \p{Z}
+    # Line_Separator = str(''.join(perluniprops.chars('Line_Separator'))) # i.e. \p{Zl}
+    # Separator = str(''.join(perluniprops.chars('Separator'))) # i.e. \p{Z}
  
      # Pads non-ascii strings with space.
-    NONASCII = re.compile('([\x00-\x7f]+)'), r' \1 '
+    NONASCII = re.compile("([\x00-\x7f]+)"), r" \1 "
      #  Tokenize any punctuation unless followed AND preceded by a digit.
-    PUNCT_1 = re.compile(u"([{n}])([{p}])".format(n=number_regex, p=punct_regex)), '\\1 \\2 '
-    PUNCT_2 = re.compile(u"([{p}])([{n}])".format(n=number_regex, p=punct_regex)), ' \\1 \\2'
+    PUNCT_1 = (
+        re.compile("([{n}])([{p}])".format(n=number_regex, p=punct_regex)),
+        "\\1 \\2 ",
+    )
+    PUNCT_2 = (
+        re.compile("([{p}])([{n}])".format(n=number_regex, p=punct_regex)),
+        " \\1 \\2",
+    )
      # Tokenize symbols
-    SYMBOLS = re.compile(u"({s})".format(s=symbol_regex)), ' \\1 '
+    SYMBOLS = re.compile("([{s}])".format(s=symbol_regex)), " \\1 "
  
      INTERNATIONAL_REGEXES = [NONASCII, PUNCT_1, PUNCT_2, SYMBOLS]
  
@@ -123,30 +137,29 @@ class NISTTokenizer(TokenizerI):
          text = regexp.sub(substitution, text)
          return text
  
-    def tokenize(self, text, lowercase=False,
-                 western_lang=True, return_str=False):
-        text = text_type(text)
+    def tokenize(self, text, lowercase=False, western_lang=True, return_str=False):
+        text = str(text)
          # Language independent regex.
          text = self.lang_independent_sub(text)
          # Language dependent regex.
          if western_lang:
              # Pad string with whitespace.
-            text = ' ' + text + ' '
+            text = " " + text + " "
              if lowercase:
                  text = text.lower()
              for regexp, substitution in self.LANG_DEPENDENT_REGEXES:
                  text = regexp.sub(substitution, text)
          # Remove contiguous whitespaces.
-        text = ' '.join(text.split())
+        text = " ".join(text.split())
          # Finally, strips heading and trailing spaces
          # and converts output string into unicode.
-        text = text_type(text.strip())
+        text = str(text.strip())
          return text if return_str else text.split()
  
-    def international_tokenize(self, text, lowercase=False,
-                               split_non_ascii=True,
-                               return_str=False):
-        text = text_type(text)
+    def international_tokenize(
+        self, text, lowercase=False, split_non_ascii=True, return_str=False
+    ):
+        text = str(text)
          # Different from the 'normal' tokenize(), STRIP_EOL_HYPHEN is applied
          # first before unescaping.
          regexp, substitution = self.STRIP_SKIP
@@ -163,5 +176,5 @@ class NISTTokenizer(TokenizerI):
  
          # Make sure that there's only one space only between words.
          # Strip leading and trailing spaces.
-        text = ' '.join(text.strip().split())
+        text = " ".join(text.strip().split())
          return text if return_str else text.split()
diff --git a/nlp_resource_data/nltk/tokenize/nist.pyc b/nlp_resource_data/nltk/tokenize/nist.pyc

deleted file mode 100755 (executable)

index 86f2f30..0000000

Binary files a/nlp_resource_data/nltk/tokenize/nist.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/punkt.py b/nlp_resource_data/nltk/tokenize/punkt.py

old mode 100755 (executable)

new mode 100644 (file)

index afd73a1..408ce27
--- a/nlp_resource_data/nltk/tokenize/punkt.py
+++ b/nlp_resource_data/nltk/tokenize/punkt.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Punkt sentence tokenizer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Algorithm: Kiss & Strunk (2006)
  # Author: Willy <willy@csse.unimelb.edu.au> (original Python port)
  #         Steven Bird <stevenbird1@gmail.com> (additions)
@@ -13,7 +13,7 @@
  r"""
  Punkt Sentence Tokenizer
  
-This tokenizer divides a text into a list of sentences,
+This tokenizer divides a text into a list of sentences
  by using an unsupervised algorithm to build a model for abbreviation
  words, collocations, and words that start sentences.  It must be
  trained on a large collection of plaintext in the target language
@@ -99,7 +99,6 @@ The algorithm for this tokenizer is described in::
    Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence
      Boundary Detection.  Computational Linguistics 32: 485-525.
  """
-from __future__ import print_function, unicode_literals, division
  
  # TODO: Make orthographic heuristic less susceptible to overtraining
  # TODO: Frequent sentence starters optionally exclude always-capitalised words
@@ -109,35 +108,32 @@ import re
  import math
  from collections import defaultdict
  
-from six import string_types
-
-from nltk.compat import unicode_repr, python_2_unicode_compatible
  from nltk.probability import FreqDist
  from nltk.tokenize.api import TokenizerI
  
  ######################################################################
-#{ Orthographic Context Constants
+# { Orthographic Context Constants
  ######################################################################
  # The following constants are used to describe the orthographic
  # contexts in which a word can occur.  BEG=beginning, MID=middle,
  # UNK=unknown, UC=uppercase, LC=lowercase, NC=no case.
  
-_ORTHO_BEG_UC    = 1 << 1
+_ORTHO_BEG_UC = 1 << 1
  """Orthographic context: beginning of a sentence with upper case."""
  
-_ORTHO_MID_UC    = 1 << 2
+_ORTHO_MID_UC = 1 << 2
  """Orthographic context: middle of a sentence with upper case."""
  
-_ORTHO_UNK_UC    = 1 << 3
+_ORTHO_UNK_UC = 1 << 3
  """Orthographic context: unknown position in a sentence with upper case."""
  
-_ORTHO_BEG_LC    = 1 << 4
+_ORTHO_BEG_LC = 1 << 4
  """Orthographic context: beginning of a sentence with lower case."""
  
-_ORTHO_MID_LC    = 1 << 5
+_ORTHO_MID_LC = 1 << 5
  """Orthographic context: middle of a sentence with lower case."""
  
-_ORTHO_UNK_LC    = 1 << 6
+_ORTHO_UNK_LC = 1 << 6
  """Orthographic context: unknown position in a sentence with lower case."""
  
  _ORTHO_UC = _ORTHO_BEG_UC + _ORTHO_MID_UC + _ORTHO_UNK_UC
@@ -147,38 +143,42 @@ _ORTHO_LC = _ORTHO_BEG_LC + _ORTHO_MID_LC + _ORTHO_UNK_LC
  """Orthographic context: occurs with lower case."""
  
  _ORTHO_MAP = {
-        ('initial',  'upper'): _ORTHO_BEG_UC,
-        ('internal', 'upper'): _ORTHO_MID_UC,
-        ('unknown',  'upper'): _ORTHO_UNK_UC,
-        ('initial',  'lower'): _ORTHO_BEG_LC,
-        ('internal', 'lower'): _ORTHO_MID_LC,
-        ('unknown',  'lower'): _ORTHO_UNK_LC,
+    ("initial", "upper"): _ORTHO_BEG_UC,
+    ("internal", "upper"): _ORTHO_MID_UC,
+    ("unknown", "upper"): _ORTHO_UNK_UC,
+    ("initial", "lower"): _ORTHO_BEG_LC,
+    ("internal", "lower"): _ORTHO_MID_LC,
+    ("unknown", "lower"): _ORTHO_UNK_LC,
  }
  """A map from context position and first-letter case to the
  appropriate orthographic context flag."""
  
-#} (end orthographic context constants)
+# } (end orthographic context constants)
  ######################################################################
  
  ######################################################################
-#{ Decision reasons for debugging
+# { Decision reasons for debugging
  ######################################################################
  
-REASON_DEFAULT_DECISION = 'default decision'
-REASON_KNOWN_COLLOCATION = 'known collocation (both words)'
-REASON_ABBR_WITH_ORTHOGRAPHIC_HEURISTIC = 'abbreviation + orthographic heuristic'
-REASON_ABBR_WITH_SENTENCE_STARTER = 'abbreviation + frequent sentence starter'
-REASON_INITIAL_WITH_ORTHOGRAPHIC_HEURISTIC = 'initial + orthographic heuristic'
-REASON_NUMBER_WITH_ORTHOGRAPHIC_HEURISTIC = 'initial + orthographic heuristic'
-REASON_INITIAL_WITH_SPECIAL_ORTHOGRAPHIC_HEURISTIC = 'initial + special orthographic heuristic'
+REASON_DEFAULT_DECISION = "default decision"
+REASON_KNOWN_COLLOCATION = "known collocation (both words)"
+REASON_ABBR_WITH_ORTHOGRAPHIC_HEURISTIC = "abbreviation + orthographic heuristic"
+REASON_ABBR_WITH_SENTENCE_STARTER = "abbreviation + frequent sentence starter"
+REASON_INITIAL_WITH_ORTHOGRAPHIC_HEURISTIC = "initial + orthographic heuristic"
+REASON_NUMBER_WITH_ORTHOGRAPHIC_HEURISTIC = "initial + orthographic heuristic"
+REASON_INITIAL_WITH_SPECIAL_ORTHOGRAPHIC_HEURISTIC = (
+    "initial + special orthographic heuristic"
+)
+
  
-#} (end decision reasons for debugging)
+# } (end decision reasons for debugging)
  ######################################################################
  
  ######################################################################
-#{ Language-dependent variables
+# { Language-dependent variables
  ######################################################################
  
+
  class PunktLanguageVars(object):
      """
      Stores variables, mostly regular expressions, which may be
@@ -189,7 +189,7 @@ class PunktLanguageVars(object):
      constructors.
      """
  
-    __slots__ = ('_re_period_context', '_re_word_tokenizer')
+    __slots__ = ("_re_period_context", "_re_word_tokenizer")
  
      def __getstate__(self):
          # All modifications to the class are performed by inheritance.
@@ -200,32 +200,31 @@ class PunktLanguageVars(object):
      def __setstate__(self, state):
          return 1
  
-    sent_end_chars = ('.', '?', '!')
+    sent_end_chars = (".", "?", "!")
      """Characters which are candidates for sentence boundaries"""
  
      @property
      def _re_sent_end_chars(self):
-        return '[%s]' % re.escape(''.join(self.sent_end_chars))
+        return "[%s]" % re.escape("".join(self.sent_end_chars))
  
-    internal_punctuation = ',:;' # might want to extend this..
+    internal_punctuation = ",:;"  # might want to extend this..
      """sentence internal punctuation, which indicates an abbreviation if
      preceded by a period-final token."""
  
-    re_boundary_realignment = re.compile(r'["\')\]}]+?(?:\s+|(?=--)|$)',
-            re.MULTILINE)
+    re_boundary_realignment = re.compile(r'["\')\]}]+?(?:\s+|(?=--)|$)', re.MULTILINE)
      """Used to realign punctuation that should be included in a sentence
      although it follows the period (or ?, !)."""
  
-    _re_word_start    = r"[^\(\"\`{\[:;&\#\*@\)}\]\-,]"
+    _re_word_start = r"[^\(\"\`{\[:;&\#\*@\)}\]\-,]"
      """Excludes some characters from starting word tokens"""
  
-    _re_non_word_chars   = r"(?:[?!)\";}\]\*:@\'\({\[])"
+    _re_non_word_chars = r"(?:[?!)\";}\]\*:@\'\({\[])"
      """Characters that cannot appear within words"""
  
      _re_multi_char_punct = r"(?:\-{2,}|\.{2,}|(?:\.\s){2,}\.)"
      """Hyphen and ellipsis are multi-character punctuation"""
  
-    _word_tokenize_fmt = r'''(
+    _word_tokenize_fmt = r"""(
          %(MultiChar)s
          |
          (?=%(WordStart)s)\S+?  # Accept word characters until end is found
@@ -237,7 +236,7 @@ class PunktLanguageVars(object):
          )
          |
          \S
-    )'''
+    )"""
      """Format of a regular expression to split punctuation from words,
      excluding period."""
  
@@ -247,13 +246,13 @@ class PunktLanguageVars(object):
              return self._re_word_tokenizer
          except AttributeError:
              self._re_word_tokenizer = re.compile(
-                self._word_tokenize_fmt %
-                {
-                    'NonWord':   self._re_non_word_chars,
-                    'MultiChar': self._re_multi_char_punct,
-                    'WordStart': self._re_word_start,
+                self._word_tokenize_fmt
+                % {
+                    "NonWord": self._re_non_word_chars,
+                    "MultiChar": self._re_multi_char_punct,
+                    "WordStart": self._re_word_start,
                  },
-                re.UNICODE | re.VERBOSE
+                re.UNICODE | re.VERBOSE,
              )
              return self._re_word_tokenizer
  
@@ -280,27 +279,29 @@ class PunktLanguageVars(object):
              return self._re_period_context
          except:
              self._re_period_context = re.compile(
-                self._period_context_fmt %
-                {
-                    'NonWord':      self._re_non_word_chars,
-                    'SentEndChars': self._re_sent_end_chars,
+                self._period_context_fmt
+                % {
+                    "NonWord": self._re_non_word_chars,
+                    "SentEndChars": self._re_sent_end_chars,
                  },
-                re.UNICODE | re.VERBOSE)
+                re.UNICODE | re.VERBOSE,
+            )
              return self._re_period_context
  
  
-_re_non_punct = re.compile(r'[^\W\d]', re.UNICODE)
+_re_non_punct = re.compile(r"[^\W\d]", re.UNICODE)
  """Matches token types that are not merely punctuation. (Types for
  numeric tokens are changed to ##number## and hence contain alpha.)"""
  
-#}
+
+# }
  ######################################################################
  
  
+# ////////////////////////////////////////////////////////////
+# { Helper Functions
+# ////////////////////////////////////////////////////////////
  
-#////////////////////////////////////////////////////////////
-#{ Helper Functions
-#////////////////////////////////////////////////////////////
  
  def _pair_iter(it):
      """
@@ -309,16 +310,21 @@ def _pair_iter(it):
      pair will have None as its second element.
      """
      it = iter(it)
-    prev = next(it)
+    try:
+        prev = next(it)
+    except StopIteration:
+        return
      for el in it:
          yield (prev, el)
          prev = el
      yield (prev, None)
  
+
  ######################################################################
-#{ Punkt Parameters
+# { Punkt Parameters
  ######################################################################
  
+
  class PunktParameters(object):
      """Stores data used to perform sentence boundary detection with Punkt."""
  
@@ -360,66 +366,64 @@ class PunktParameters(object):
      def _debug_ortho_context(self, typ):
          c = self.ortho_context[typ]
          if c & _ORTHO_BEG_UC:
-            yield 'BEG-UC'
+            yield "BEG-UC"
          if c & _ORTHO_MID_UC:
-            yield 'MID-UC'
+            yield "MID-UC"
          if c & _ORTHO_UNK_UC:
-            yield 'UNK-UC'
+            yield "UNK-UC"
          if c & _ORTHO_BEG_LC:
-            yield 'BEG-LC'
+            yield "BEG-LC"
          if c & _ORTHO_MID_LC:
-            yield 'MID-LC'
+            yield "MID-LC"
          if c & _ORTHO_UNK_LC:
-            yield 'UNK-LC'
+            yield "UNK-LC"
+
  
  ######################################################################
-#{ PunktToken
+# { PunktToken
  ######################################################################
  
-@python_2_unicode_compatible
+
  class PunktToken(object):
      """Stores a token of text with annotations produced during
      sentence boundary detection."""
  
-    _properties = [
-        'parastart', 'linestart',
-        'sentbreak', 'abbr', 'ellipsis'
-    ]
-    __slots__ = ['tok', 'type', 'period_final'] + _properties
+    _properties = ["parastart", "linestart", "sentbreak", "abbr", "ellipsis"]
+    __slots__ = ["tok", "type", "period_final"] + _properties
  
      def __init__(self, tok, **params):
          self.tok = tok
          self.type = self._get_type(tok)
-        self.period_final = tok.endswith('.')
+        self.period_final = tok.endswith(".")
  
          for p in self._properties:
              setattr(self, p, None)
          for k in params:
              setattr(self, k, params[k])
  
-    #////////////////////////////////////////////////////////////
-    #{ Regular expressions for properties
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Regular expressions for properties
+    # ////////////////////////////////////////////////////////////
      # Note: [A-Za-z] is approximated by [^\W\d] in the general case.
-    _RE_ELLIPSIS = re.compile(r'\.\.+$')
-    _RE_NUMERIC = re.compile(r'^-?[\.,]?\d[\d,\.-]*\.?$')
-    _RE_INITIAL = re.compile(r'[^\W\d]\.$', re.UNICODE)
-    _RE_ALPHA = re.compile(r'[^\W\d]+$', re.UNICODE)
+    _RE_ELLIPSIS = re.compile(r"\.\.+$")
+    _RE_NUMERIC = re.compile(r"^-?[\.,]?\d[\d,\.-]*\.?$")
+    _RE_INITIAL = re.compile(r"[^\W\d]\.$", re.UNICODE)
+    _RE_ALPHA = re.compile(r"[^\W\d]+$", re.UNICODE)
  
-    #////////////////////////////////////////////////////////////
-    #{ Derived properties
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Derived properties
+    # ////////////////////////////////////////////////////////////
  
      def _get_type(self, tok):
          """Returns a case-normalized representation of the token."""
-        return self._RE_NUMERIC.sub('##number##', tok.lower())
+        return self._RE_NUMERIC.sub("##number##", tok.lower())
  
      @property
      def type_no_period(self):
          """
          The type with its final period removed if it has one.
          """
-        if len(self.type) > 1 and self.type[-1] == '.':
+        if len(self.type) > 1 and self.type[-1] == ".":
              return self.type[:-1]
          return self.type
  
@@ -446,10 +450,10 @@ class PunktToken(object):
      @property
      def first_case(self):
          if self.first_lower:
-            return 'lower'
+            return "lower"
          elif self.first_upper:
-            return 'upper'
-        return 'none'
+            return "upper"
+        return "none"
  
      @property
      def is_ellipsis(self):
@@ -459,7 +463,7 @@ class PunktToken(object):
      @property
      def is_number(self):
          """True if the token text is that of a number."""
-        return self.type.startswith('##number##')
+        return self.type.startswith("##number##")
  
      @property
      def is_initial(self):
@@ -476,9 +480,9 @@ class PunktToken(object):
          """True if the token is either a number or is alphabetic."""
          return _re_non_punct.search(self.type)
  
-    #////////////////////////////////////////////////////////////
-    #{ String representation
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { String representation
+    # ////////////////////////////////////////////////////////////
  
      def __repr__(self):
          """
@@ -486,17 +490,20 @@ class PunktToken(object):
          with eval(), which lists all the token's non-default
          annotations.
          """
-        typestr = (' type=%s,' % unicode_repr(self.type)
-                   if self.type != self.tok else '')
+        typestr = " type=%s," % repr(self.type) if self.type != self.tok else ""
  
-        propvals = ', '.join(
-            '%s=%s' % (p, unicode_repr(getattr(self, p)))
+        propvals = ", ".join(
+            "%s=%s" % (p, repr(getattr(self, p)))
              for p in self._properties
              if getattr(self, p)
          )
  
-        return '%s(%s,%s %s)' % (self.__class__.__name__,
-            unicode_repr(self.tok), typestr, propvals)
+        return "%s(%s,%s %s)" % (
+            self.__class__.__name__,
+            repr(self.tok),
+            typestr,
+            propvals,
+        )
  
      def __str__(self):
          """
@@ -504,35 +511,38 @@ class PunktToken(object):
          """
          res = self.tok
          if self.abbr:
-            res += '<A>'
+            res += "<A>"
          if self.ellipsis:
-            res += '<E>'
+            res += "<E>"
          if self.sentbreak:
-            res += '<S>'
+            res += "<S>"
          return res
  
+
  ######################################################################
-#{ Punkt base class
+# { Punkt base class
  ######################################################################
  
+
  class PunktBaseClass(object):
      """
      Includes common components of PunktTrainer and PunktSentenceTokenizer.
      """
  
-    def __init__(self, lang_vars=PunktLanguageVars(), token_cls=PunktToken,
-            params=None):
+    def __init__(self, lang_vars=None, token_cls=PunktToken, params=None):
+        if lang_vars is None:
+            lang_vars = PunktLanguageVars()
          if params is None:
-            params = PunktParameters() 
+            params = PunktParameters()
          self._params = params
          self._lang_vars = lang_vars
          self._Token = token_cls
          """The collection of parameters that determines the behavior
          of the punkt tokenizer."""
  
-    #////////////////////////////////////////////////////////////
-    #{ Word tokenization
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Word tokenization
+    # ////////////////////////////////////////////////////////////
  
      def _tokenize_words(self, plaintext):
          """
@@ -543,12 +553,16 @@ class PunktBaseClass(object):
          respectively.
          """
          parastart = False
-        for line in plaintext.split('\n'):
+        for line in plaintext.split("\n"):
              if line.strip():
                  line_toks = iter(self._lang_vars.word_tokenize(line))
  
-                yield self._Token(next(line_toks),
-                        parastart=parastart, linestart=True)
+                try:
+                    tok = next(line_toks)
+                except StopIteration:
+                    continue
+
+                yield self._Token(tok, parastart=parastart, linestart=True)
                  parastart = False
  
                  for t in line_toks:
@@ -556,10 +570,9 @@ class PunktBaseClass(object):
              else:
                  parastart = True
  
-
-    #////////////////////////////////////////////////////////////
-    #{ Annotation Procedures
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Annotation Procedures
+    # ////////////////////////////////////////////////////////////
  
      def _annotate_first_pass(self, tokens):
          """
@@ -593,9 +606,11 @@ class PunktBaseClass(object):
              aug_tok.sentbreak = True
          elif aug_tok.is_ellipsis:
              aug_tok.ellipsis = True
-        elif aug_tok.period_final and not tok.endswith('..'):
-            if (tok[:-1].lower() in self._params.abbrev_types or
-                tok[:-1].lower().split('-')[-1] in self._params.abbrev_types):
+        elif aug_tok.period_final and not tok.endswith(".."):
+            if (
+                tok[:-1].lower() in self._params.abbrev_types
+                or tok[:-1].lower().split("-")[-1] in self._params.abbrev_types
+            ):
  
                  aug_tok.abbr = True
              else:
@@ -603,19 +618,20 @@ class PunktBaseClass(object):
  
          return
  
+
  ######################################################################
-#{ Punkt Trainer
+# { Punkt Trainer
  ######################################################################
  
  
  class PunktTrainer(PunktBaseClass):
      """Learns parameters used in Punkt sentence boundary detection."""
  
-    def __init__(self, train_text=None, verbose=False,
-            lang_vars=PunktLanguageVars(), token_cls=PunktToken):
+    def __init__(
+        self, train_text=None, verbose=False, lang_vars=None, token_cls=PunktToken
+    ):
  
-        PunktBaseClass.__init__(self, lang_vars=lang_vars,
-                token_cls=token_cls)
+        PunktBaseClass.__init__(self, lang_vars=lang_vars, token_cls=token_cls)
  
          self._type_fdist = FreqDist()
          """A frequency distribution giving the frequency of each
@@ -660,9 +676,9 @@ class PunktTrainer(PunktBaseClass):
              self.finalize_training()
          return self._params
  
-    #////////////////////////////////////////////////////////////
-    #{ Customization Variables
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Customization Variables
+    # ////////////////////////////////////////////////////////////
  
      ABBREV = 0.3
      """cut-off value whether a 'token' is an abbreviation"""
@@ -701,9 +717,9 @@ class PunktTrainer(PunktBaseClass):
      appear before it can be considered a collocation, in addition to log
      likelihood statistics. This is useful when INCLUDE_ALL_COLLOCS is True."""
  
-    #////////////////////////////////////////////////////////////
-    #{ Training..
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Training..
+    # ////////////////////////////////////////////////////////////
  
      def train(self, text, verbose=False, finalize=True):
          """
@@ -747,14 +763,12 @@ class PunktTrainer(PunktBaseClass):
                  if is_add:
                      self._params.abbrev_types.add(abbr)
                      if verbose:
-                        print(('  Abbreviation: [%6.4f] %s' %
-                               (score, abbr)))
+                        print(("  Abbreviation: [%6.4f] %s" % (score, abbr)))
              else:
                  if not is_add:
                      self._params.abbrev_types.remove(abbr)
                      if verbose:
-                        print(('  Removed abbreviation: [%6.4f] %s' %
-                               (score, abbr)))
+                        print(("  Removed abbreviation: [%6.4f] %s" % (score, abbr)))
  
          # Make a preliminary pass through the document, marking likely
          # sentence breaks, abbreviations, and ellipsis tokens.
@@ -777,7 +791,7 @@ class PunktTrainer(PunktBaseClass):
              if self._is_rare_abbrev_type(aug_tok1, aug_tok2):
                  self._params.abbrev_types.add(aug_tok1.type_no_period)
                  if verbose:
-                    print(('  Rare Abbrev: %s' % aug_tok1.type))
+                    print(("  Rare Abbrev: %s" % aug_tok1.type))
  
              # Does second token have a high likelihood of starting a sentence?
              if self._is_potential_sent_starter(aug_tok2, aug_tok1):
@@ -786,7 +800,8 @@ class PunktTrainer(PunktBaseClass):
              # Is this bigram a potential collocation?
              if self._is_potential_collocation(aug_tok1, aug_tok2):
                  self._collocation_fdist[
-                    (aug_tok1.type_no_period, aug_tok2.type_no_sentperiod)] += 1
+                    (aug_tok1.type_no_period, aug_tok2.type_no_sentperiod)
+                ] += 1
  
      def _unique_types(self, tokens):
          return set(aug_tok.type for aug_tok in tokens)
@@ -800,23 +815,23 @@ class PunktTrainer(PunktBaseClass):
          for typ, ll in self._find_sent_starters():
              self._params.sent_starters.add(typ)
              if verbose:
-                print(('  Sent Starter: [%6.4f] %r' % (ll, typ)))
+                print(("  Sent Starter: [%6.4f] %r" % (ll, typ)))
  
          self._params.clear_collocations()
          for (typ1, typ2), ll in self._find_collocations():
-            self._params.collocations.add( (typ1,typ2) )
+            self._params.collocations.add((typ1, typ2))
              if verbose:
-                print(('  Collocation: [%6.4f] %r+%r' %
-                       (ll, typ1, typ2)))
+                print(("  Collocation: [%6.4f] %r+%r" % (ll, typ1, typ2)))
  
          self._finalized = True
  
-    #////////////////////////////////////////////////////////////
-    #{ Overhead reduction
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Overhead reduction
+    # ////////////////////////////////////////////////////////////
  
-    def freq_threshold(self, ortho_thresh=2, type_thresh=2, colloc_thres=2,
-            sentstart_thresh=2):
+    def freq_threshold(
+        self, ortho_thresh=2, type_thresh=2, colloc_thres=2, sentstart_thresh=2
+    ):
          """
          Allows memory use to be reduced after much training by removing data
          about rare tokens that are unlikely to have a statistical effect with
@@ -833,9 +848,11 @@ class PunktTrainer(PunktBaseClass):
  
          self._type_fdist = self._freq_threshold(self._type_fdist, type_thresh)
          self._collocation_fdist = self._freq_threshold(
-                self._collocation_fdist, colloc_thres)
+            self._collocation_fdist, colloc_thres
+        )
          self._sent_starter_fdist = self._freq_threshold(
-                self._sent_starter_fdist, sentstart_thresh)
+            self._sent_starter_fdist, sentstart_thresh
+        )
  
      def _freq_threshold(self, fdist, threshold):
          """
@@ -855,9 +872,9 @@ class PunktTrainer(PunktBaseClass):
          res[None] += num_removed
          return res
  
-    #////////////////////////////////////////////////////////////
-    #{ Orthographic data
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Orthographic data
+    # ////////////////////////////////////////////////////////////
  
      def _get_orthography_data(self, tokens):
          """
@@ -867,7 +884,7 @@ class PunktTrainer(PunktBaseClass):
          positions.
          """
          # 'initial' or 'internal' or 'unknown'
-        context = 'internal'
+        context = "internal"
          tokens = list(tokens)
  
          for aug_tok in tokens:
@@ -875,13 +892,13 @@ class PunktTrainer(PunktBaseClass):
              # that it's a sentence break.  But err on the side of
              # caution (by not positing a sentence break) if we just
              # saw an abbreviation.
-            if aug_tok.parastart and context != 'unknown':
-                context = 'initial'
+            if aug_tok.parastart and context != "unknown":
+                context = "initial"
  
              # If we're at the beginning of a line, then we can't decide
              # between 'internal' and 'initial'.
-            if aug_tok.linestart and context == 'internal':
-                context = 'unknown'
+            if aug_tok.linestart and context == "internal":
+                context = "unknown"
  
              # Find the case-normalized type of the token.  If it's a
              # sentence-final token, strip off the period.
@@ -895,17 +912,17 @@ class PunktTrainer(PunktBaseClass):
              # Decide whether the next word is at a sentence boundary.
              if aug_tok.sentbreak:
                  if not (aug_tok.is_number or aug_tok.is_initial):
-                    context = 'initial'
+                    context = "initial"
                  else:
-                    context = 'unknown'
+                    context = "unknown"
              elif aug_tok.ellipsis or aug_tok.abbr:
-                context = 'unknown'
+                context = "unknown"
              else:
-                context = 'internal'
+                context = "internal"
  
-    #////////////////////////////////////////////////////////////
-    #{ Abbreviations
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Abbreviations
+    # ////////////////////////////////////////////////////////////
  
      def _reclassify_abbrev_types(self, types):
          """
@@ -928,10 +945,10 @@ class PunktTrainer(PunktBaseClass):
          for typ in types:
              # Check some basic conditions, to rule out words that are
              # clearly not abbrev_types.
-            if not _re_non_punct.search(typ) or typ == '##number##':
+            if not _re_non_punct.search(typ) or typ == "##number##":
                  continue
  
-            if typ.endswith('.'):
+            if typ.endswith("."):
                  if typ in self._params.abbrev_types:
                      continue
                  typ = typ[:-1]
@@ -943,7 +960,7 @@ class PunktTrainer(PunktBaseClass):
  
              # Count how many periods & nonperiods are in the
              # candidate.
-            num_periods = typ.count('.') + 1
+            num_periods = typ.count(".") + 1
              num_nonperiods = len(typ) - num_periods + 1
  
              # Let <a> be the candidate without the period, and <b>
@@ -951,12 +968,14 @@ class PunktTrainer(PunktBaseClass):
              # indicates whether <ab> occurs as a single unit (high
              # value of ll), or as two independent units <a> and
              # <b> (low value of ll).
-            count_with_period = self._type_fdist[typ + '.']
+            count_with_period = self._type_fdist[typ + "."]
              count_without_period = self._type_fdist[typ]
              ll = self._dunning_log_likelihood(
                  count_with_period + count_without_period,
-                self._num_period_toks, count_with_period,
-                self._type_fdist.N())
+                self._num_period_toks,
+                count_with_period,
+                self._type_fdist.N(),
+            )
  
              # Apply three scaling factors to 'tweak' the basic log
              # likelihood ratio:
@@ -965,8 +984,9 @@ class PunktTrainer(PunktBaseClass):
              #   F_penalty: penalize occurrences w/o a period
              f_length = math.exp(-num_nonperiods)
              f_periods = num_periods
-            f_penalty = (int(self.IGNORE_ABBREV_PENALTY)
-                    or math.pow(num_nonperiods, -count_without_period))
+            f_penalty = int(self.IGNORE_ABBREV_PENALTY) or math.pow(
+                num_nonperiods, -count_without_period
+            )
              score = ll * f_length * f_periods * f_penalty
  
              yield typ, score, is_add
@@ -978,7 +998,7 @@ class PunktTrainer(PunktBaseClass):
          This fails to include abbreviations otherwise found as "rare".
          """
          self._params.clear_abbrevs()
-        tokens = (typ for typ in self._type_fdist if typ and typ.endswith('.'))
+        tokens = (typ for typ in self._type_fdist if typ and typ.endswith("."))
          for abbr, score, is_add in self._reclassify_abbrev_types(tokens):
              if score >= self.ABBREV:
                  self._params.abbrev_types.add(abbr)
@@ -1006,7 +1026,7 @@ class PunktTrainer(PunktBaseClass):
          # Proceed only if the type hasn't been categorized as an
          # abbreviation already, and is sufficiently rare...
          count = self._type_fdist[typ] + self._type_fdist[typ[:-1]]
-        if (typ in self._params.abbrev_types or count >= self.ABBREV_BACKOFF):
+        if typ in self._params.abbrev_types or count >= self.ABBREV_BACKOFF:
              return False
  
          # Record this token as an abbreviation if the next
@@ -1024,13 +1044,14 @@ class PunktTrainer(PunktBaseClass):
          elif next_tok.first_lower:
              typ2 = next_tok.type_no_sentperiod
              typ2ortho_context = self._params.ortho_context[typ2]
-            if ( (typ2ortho_context & _ORTHO_BEG_UC) and
-                 not (typ2ortho_context & _ORTHO_MID_UC) ):
+            if (typ2ortho_context & _ORTHO_BEG_UC) and not (
+                typ2ortho_context & _ORTHO_MID_UC
+            ):
                  return True
  
-    #////////////////////////////////////////////////////////////
-    #{ Log Likelihoods
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Log Likelihoods
+    # ////////////////////////////////////////////////////////////
  
      # helper for _reclassify_abbrev_types:
      @staticmethod
@@ -1043,14 +1064,12 @@ class PunktTrainer(PunktBaseClass):
          p1 = count_b / N
          p2 = 0.99
  
-        null_hypo = (count_ab * math.log(p1) +
-                     (count_a - count_ab) * math.log(1.0 - p1))
-        alt_hypo  = (count_ab * math.log(p2) +
-                     (count_a - count_ab) * math.log(1.0 - p2))
+        null_hypo = count_ab * math.log(p1) + (count_a - count_ab) * math.log(1.0 - p1)
+        alt_hypo = count_ab * math.log(p2) + (count_a - count_ab) * math.log(1.0 - p2)
  
          likelihood = null_hypo - alt_hypo
  
-        return (-2.0 * likelihood)
+        return -2.0 * likelihood
  
      @staticmethod
      def _col_log_likelihood(count_a, count_b, count_ab, N):
@@ -1070,48 +1089,53 @@ class PunktTrainer(PunktBaseClass):
              p2 = 1
  
          try:
-            summand1 = (count_ab * math.log(p) +
-                        (count_a - count_ab) * math.log(1.0 - p))
+            summand1 = count_ab * math.log(p) + (count_a - count_ab) * math.log(1.0 - p)
          except ValueError as e:
              summand1 = 0
  
          try:
-            summand2 = ((count_b - count_ab) * math.log(p) +
-                        (N - count_a - count_b + count_ab) * math.log(1.0 - p))
+            summand2 = (count_b - count_ab) * math.log(p) + (
+                N - count_a - count_b + count_ab
+            ) * math.log(1.0 - p)
          except ValueError as e:
              summand2 = 0
  
          if count_a == count_ab or p1 <= 0 or p1 >= 1:
              summand3 = 0
          else:
-            summand3 = (count_ab * math.log(p1) +
-                        (count_a - count_ab) * math.log(1.0 - p1))
+            summand3 = count_ab * math.log(p1) + (count_a - count_ab) * math.log(
+                1.0 - p1
+            )
  
          if count_b == count_ab or p2 <= 0 or p2 >= 1:
              summand4 = 0
          else:
-            summand4 = ((count_b - count_ab) * math.log(p2) +
-                        (N - count_a - count_b + count_ab) * math.log(1.0 - p2))
+            summand4 = (count_b - count_ab) * math.log(p2) + (
+                N - count_a - count_b + count_ab
+            ) * math.log(1.0 - p2)
  
          likelihood = summand1 + summand2 - summand3 - summand4
  
-        return (-2.0 * likelihood)
+        return -2.0 * likelihood
  
-    #////////////////////////////////////////////////////////////
-    #{ Collocation Finder
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Collocation Finder
+    # ////////////////////////////////////////////////////////////
  
      def _is_potential_collocation(self, aug_tok1, aug_tok2):
          """
          Returns True if the pair of tokens may form a collocation given
          log-likelihood statistics.
          """
-        return ((self.INCLUDE_ALL_COLLOCS or
-                (self.INCLUDE_ABBREV_COLLOCS and aug_tok1.abbr) or
-                (aug_tok1.sentbreak and
-                    (aug_tok1.is_number or aug_tok1.is_initial)))
-                and aug_tok1.is_non_punct
-                and aug_tok2.is_non_punct)
+        return (
+            (
+                self.INCLUDE_ALL_COLLOCS
+                or (self.INCLUDE_ABBREV_COLLOCS and aug_tok1.abbr)
+                or (aug_tok1.sentbreak and (aug_tok1.is_number or aug_tok1.is_initial))
+            )
+            and aug_tok1.is_non_punct
+            and aug_tok2.is_non_punct
+        )
  
      def _find_collocations(self):
          """
@@ -1127,23 +1151,26 @@ class PunktTrainer(PunktBaseClass):
                  continue
  
              col_count = self._collocation_fdist[types]
-            typ1_count = self._type_fdist[typ1]+self._type_fdist[typ1+'.']
-            typ2_count = self._type_fdist[typ2]+self._type_fdist[typ2+'.']
-            if (typ1_count > 1 and typ2_count > 1
-                    and self.MIN_COLLOC_FREQ <
-                        col_count <= min(typ1_count, typ2_count)):
-
-                ll = self._col_log_likelihood(typ1_count, typ2_count,
-                                              col_count, self._type_fdist.N())
+            typ1_count = self._type_fdist[typ1] + self._type_fdist[typ1 + "."]
+            typ2_count = self._type_fdist[typ2] + self._type_fdist[typ2 + "."]
+            if (
+                typ1_count > 1
+                and typ2_count > 1
+                and self.MIN_COLLOC_FREQ < col_count <= min(typ1_count, typ2_count)
+            ):
+
+                ll = self._col_log_likelihood(
+                    typ1_count, typ2_count, col_count, self._type_fdist.N()
+                )
                  # Filter out the not-so-collocative
-                if (ll >= self.COLLOCATION and
-                    (self._type_fdist.N()/typ1_count >
-                     typ2_count/col_count)):
+                if ll >= self.COLLOCATION and (
+                    self._type_fdist.N() / typ1_count > typ2_count / col_count
+                ):
                      yield (typ1, typ2), ll
  
-    #////////////////////////////////////////////////////////////
-    #{ Sentence-Starter Finder
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Sentence-Starter Finder
+    # ////////////////////////////////////////////////////////////
  
      def _is_potential_sent_starter(self, cur_tok, prev_tok):
          """
@@ -1153,9 +1180,11 @@ class PunktTrainer(PunktBaseClass):
          # If a token (i) is preceded by a sentece break that is
          # not a potential ordinal number or initial, and (ii) is
          # alphabetic, then it is a a sentence-starter.
-        return ( prev_tok.sentbreak and
-             not (prev_tok.is_number or prev_tok.is_initial) and
-             cur_tok.is_alpha )
+        return (
+            prev_tok.sentbreak
+            and not (prev_tok.is_number or prev_tok.is_initial)
+            and cur_tok.is_alpha
+        )
  
      def _find_sent_starters(self):
          """
@@ -1167,19 +1196,23 @@ class PunktTrainer(PunktBaseClass):
                  continue
  
              typ_at_break_count = self._sent_starter_fdist[typ]
-            typ_count = self._type_fdist[typ]+self._type_fdist[typ+'.']
+            typ_count = self._type_fdist[typ] + self._type_fdist[typ + "."]
              if typ_count < typ_at_break_count:
                  # needed after freq_threshold
                  continue
  
-            ll = self._col_log_likelihood(self._sentbreak_count, typ_count,
-                                         typ_at_break_count,
-                                          self._type_fdist.N())
-
-            if (ll >= self.SENT_STARTER and
-                self._type_fdist.N()/self._sentbreak_count >
-                typ_count/typ_at_break_count):
+            ll = self._col_log_likelihood(
+                self._sentbreak_count,
+                typ_count,
+                typ_at_break_count,
+                self._type_fdist.N(),
+            )
  
+            if (
+                ll >= self.SENT_STARTER
+                and self._type_fdist.N() / self._sentbreak_count
+                > typ_count / typ_at_break_count
+            ):
                  yield typ, ll
  
      def _get_sentbreak_count(self, tokens):
@@ -1191,11 +1224,11 @@ class PunktTrainer(PunktBaseClass):
  
  
  ######################################################################
-#{ Punkt Sentence Tokenizer
+# { Punkt Sentence Tokenizer
  ######################################################################
  
  
-class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
+class PunktSentenceTokenizer(PunktBaseClass, TokenizerI):
      """
      A sentence tokenizer which uses an unsupervised algorithm to build
      a model for abbreviation words, collocations, and words that start
@@ -1203,14 +1236,15 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
      This approach has been shown to work well for many European
      languages.
      """
-    def __init__(self, train_text=None, verbose=False,
-            lang_vars=PunktLanguageVars(), token_cls=PunktToken):
+
+    def __init__(
+        self, train_text=None, verbose=False, lang_vars=None, token_cls=PunktToken
+    ):
          """
          train_text can either be the sole training text for this sentence
          boundary detector, or can be a PunktParameters object.
          """
-        PunktBaseClass.__init__(self, lang_vars=lang_vars,
-                token_cls=token_cls)
+        PunktBaseClass.__init__(self, lang_vars=lang_vars, token_cls=token_cls)
  
          if train_text:
              self._params = self.train(train_text, verbose)
@@ -1221,14 +1255,15 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          given. Repeated calls to this method destroy previous parameters. For
          incremental training, instantiate a separate PunktTrainer instance.
          """
-        if not isinstance(train_text, string_types):
+        if not isinstance(train_text, str):
              return train_text
-        return PunktTrainer(train_text, lang_vars=self._lang_vars,
-                token_cls=self._Token).get_params()
+        return PunktTrainer(
+            train_text, lang_vars=self._lang_vars, token_cls=self._Token
+        ).get_params()
  
-    #////////////////////////////////////////////////////////////
-    #{ Tokenization
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Tokenization
+    # ////////////////////////////////////////////////////////////
  
      def tokenize(self, text, realign_boundaries=True):
          """
@@ -1245,35 +1280,41 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          """
  
          for match in self._lang_vars.period_context_re().finditer(text):
-            decision_text = match.group() + match.group('after_tok')
+            decision_text = match.group() + match.group("after_tok")
              tokens = self._tokenize_words(decision_text)
              tokens = list(self._annotate_first_pass(tokens))
              while not tokens[0].period_final:
                  tokens.pop(0)
-            yield dict(period_index=match.end() - 1,
+            yield dict(
+                period_index=match.end() - 1,
                  text=decision_text,
                  type1=tokens[0].type,
                  type2=tokens[1].type,
                  type1_in_abbrs=bool(tokens[0].abbr),
                  type1_is_initial=bool(tokens[0].is_initial),
-                type2_is_sent_starter=tokens[1].type_no_sentperiod in self._params.sent_starters,
+                type2_is_sent_starter=tokens[1].type_no_sentperiod
+                in self._params.sent_starters,
                  type2_ortho_heuristic=self._ortho_heuristic(tokens[1]),
-                type2_ortho_contexts=set(self._params._debug_ortho_context(tokens[1].type_no_sentperiod)),
-                collocation=(tokens[0].type_no_sentperiod, tokens[1].type_no_sentperiod) in self._params.collocations,
-
-                reason=self._second_pass_annotation(tokens[0], tokens[1]) or REASON_DEFAULT_DECISION,
+                type2_ortho_contexts=set(
+                    self._params._debug_ortho_context(tokens[1].type_no_sentperiod)
+                ),
+                collocation=(tokens[0].type_no_sentperiod, tokens[1].type_no_sentperiod)
+                in self._params.collocations,
+                reason=self._second_pass_annotation(tokens[0], tokens[1])
+                or REASON_DEFAULT_DECISION,
                  break_decision=tokens[0].sentbreak,
              )
  
      def span_tokenize(self, text, realign_boundaries=True):
          """
-        Given a text, returns a list of the (start, end) spans of sentences
+        Given a text, generates (start, end) spans of sentences
          in the text.
          """
          slices = self._slices_from_text(text)
          if realign_boundaries:
              slices = self._realign_boundaries(text, slices)
-        return [(sl.start, sl.stop) for sl in slices]
+        for sl in slices:
+            yield (sl.start, sl.stop)
  
      def sentences_from_text(self, text, realign_boundaries=True):
          """
@@ -1287,12 +1328,12 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
      def _slices_from_text(self, text):
          last_break = 0
          for match in self._lang_vars.period_context_re().finditer(text):
-            context = match.group() + match.group('after_tok')
+            context = match.group() + match.group("after_tok")
              if self.text_contains_sentbreak(context):
                  yield slice(last_break, match.end())
-                if match.group('next_tok'):
+                if match.group("next_tok"):
                      # next sentence starts after whitespace
-                    last_break = match.start('next_tok')
+                    last_break = match.start("next_tok")
                  else:
                      # next sentence starts at following punctuation
                      last_break = match.end()
@@ -1333,7 +1374,7 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          """
          Returns True if the given text includes a sentence break.
          """
-        found = False # used to ignore last token
+        found = False  # used to ignore last token
          for t in self._annotate_tokens(self._tokenize_words(text)):
              if found:
                  return True
@@ -1381,8 +1422,8 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          tokens = self._annotate_second_pass(tokens)
  
          ## [XX] TESTING
-        #tokens = list(tokens)
-        #self.dump(tokens)
+        # tokens = list(tokens)
+        # self.dump(tokens)
  
          return tokens
  
@@ -1399,9 +1440,9 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          pos = 0
  
          # A regular expression that finds pieces of whitespace:
-        WS_REGEXP = re.compile(r'\s*')
+        WS_REGEXP = re.compile(r"\s*")
  
-        sentence = ''
+        sentence = ""
          for aug_tok in tokens:
              tok = aug_tok.tok
  
@@ -1414,13 +1455,14 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
              # that contain whitespace in the source text.  If our
              # token doesn't match, see if adding whitespace helps.
              # If so, then use the version with whitespace.
-            if text[pos:pos+len(tok)] != tok:
-                pat = '\s*'.join(re.escape(c) for c in tok)
-                m = re.compile(pat).match(text,pos)
-                if m: tok = m.group()
+            if text[pos : pos + len(tok)] != tok:
+                pat = "\s*".join(re.escape(c) for c in tok)
+                m = re.compile(pat).match(text, pos)
+                if m:
+                    tok = m.group()
  
              # Move our position pointer to the end of the token.
-            assert text[pos:pos+len(tok)] == tok
+            assert text[pos : pos + len(tok)] == tok
              pos += len(tok)
  
              # Add this token.  If it's not at the beginning of the
@@ -1433,7 +1475,7 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
              # If we're at a sentence break, then start a new sentence.
              if aug_tok.sentbreak:
                  yield sentence
-                sentence = ''
+                sentence = ""
  
          # If the last sentence is emtpy, discard it.
          if sentence:
@@ -1441,27 +1483,27 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
  
      # [XX] TESTING
      def dump(self, tokens):
-        print('writing to /tmp/punkt.new...')
-        with open('/tmp/punkt.new', 'w') as outfile:
+        print("writing to /tmp/punkt.new...")
+        with open("/tmp/punkt.new", "w") as outfile:
              for aug_tok in tokens:
                  if aug_tok.parastart:
-                    outfile.write('\n\n')
+                    outfile.write("\n\n")
                  elif aug_tok.linestart:
-                    outfile.write('\n')
+                    outfile.write("\n")
                  else:
-                    outfile.write(' ')
+                    outfile.write(" ")
  
                  outfile.write(str(aug_tok))
  
-    #////////////////////////////////////////////////////////////
-    #{ Customization Variables
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Customization Variables
+    # ////////////////////////////////////////////////////////////
  
-    PUNCTUATION = tuple(';:,.!?')
+    PUNCTUATION = tuple(";:,.!?")
  
-    #////////////////////////////////////////////////////////////
-    #{ Annotation Procedures
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
+    # { Annotation Procedures
+    # ////////////////////////////////////////////////////////////
  
      def _annotate_second_pass(self, tokens):
          """
@@ -1506,8 +1548,7 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          # [4.2. Token-Based Reclassification of Abbreviations] If
          # the token is an abbreviation or an ellipsis, then decide
          # whether we should *also* classify it as a sentbreak.
-        if ( (aug_tok1.abbr or aug_tok1.ellipsis) and
-             (not tok_is_initial) ):
+        if (aug_tok1.abbr or aug_tok1.ellipsis) and (not tok_is_initial):
              # [4.1.1. Orthographic Heuristic] Check if there's
              # orthogrpahic evidence about whether the next word
              # starts a sentence or not.
@@ -1520,15 +1561,14 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
              # next word is capitalized, and is a member of the
              # frequent-sentence-starters list, then label tok as a
              # sentence break.
-            if ( aug_tok2.first_upper and
-                 next_typ in self._params.sent_starters):
+            if aug_tok2.first_upper and next_typ in self._params.sent_starters:
                  aug_tok1.sentbreak = True
                  return REASON_ABBR_WITH_SENTENCE_STARTER
  
          # [4.3. Token-Based Detection of Initials and Ordinals]
          # Check if any initials or ordinals tokens that are marked
          # as sentbreaks should be reclassified as abbreviations.
-        if tok_is_initial or typ == '##number##':
+        if tok_is_initial or typ == "##number##":
  
              # [4.1.1. Orthographic Heuristic] Check if there's
              # orthogrpahic evidence about whether the next word
@@ -1546,9 +1586,12 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
              # Special heuristic for initials: if orthogrpahic
              # heuristc is unknown, and next word is always
              # capitalized, then mark as abbrev (eg: J. Bach).
-            if ( is_sent_starter == 'unknown' and tok_is_initial and
-                 aug_tok2.first_upper and
-                 not (self._params.ortho_context[next_typ] & _ORTHO_LC) ):
+            if (
+                is_sent_starter == "unknown"
+                and tok_is_initial
+                and aug_tok2.first_upper
+                and not (self._params.ortho_context[next_typ] & _ORTHO_LC)
+            ):
                  aug_tok1.sentbreak = False
                  aug_tok1.abbr = True
                  return REASON_INITIAL_WITH_SPECIAL_ORTHOGRAPHIC_HEURISTIC
@@ -1568,25 +1611,27 @@ class PunktSentenceTokenizer(PunktBaseClass,TokenizerI):
          # If the word is capitalized, occurs at least once with a
          # lower case first letter, and never occurs with an upper case
          # first letter sentence-internally, then it's a sentence starter.
-        if ( aug_tok.first_upper and
-             (ortho_context & _ORTHO_LC) and
-             not (ortho_context & _ORTHO_MID_UC) ):
+        if (
+            aug_tok.first_upper
+            and (ortho_context & _ORTHO_LC)
+            and not (ortho_context & _ORTHO_MID_UC)
+        ):
              return True
  
          # If the word is lower case, and either (a) we've seen it used
          # with upper case, or (b) we've never seen it used
          # sentence-initially with lower case, then it's not a sentence
          # starter.
-        if ( aug_tok.first_lower and
-             ((ortho_context & _ORTHO_UC) or
-              not (ortho_context & _ORTHO_BEG_LC)) ):
+        if aug_tok.first_lower and (
+            (ortho_context & _ORTHO_UC) or not (ortho_context & _ORTHO_BEG_LC)
+        ):
              return False
  
          # Otherwise, we're not sure.
-        return 'unknown'
+        return "unknown"
  
  
-DEBUG_DECISION_FMT = '''Text: %(text)r (at offset %(period_index)d)
+DEBUG_DECISION_FMT = """Text: %(text)r (at offset %(period_index)d)
  Sentence break? %(break_decision)s (%(reason)s)
  Collocation? %(collocation)s
  %(type1)r:
@@ -1596,13 +1641,18 @@ Collocation? %(collocation)s
      known sentence starter: %(type2_is_sent_starter)s
      orthographic heuristic suggests is a sentence starter? %(type2_ortho_heuristic)s
      orthographic contexts in training: %(type2_ortho_contexts)s
-'''
+"""
+
+
  def format_debug_decision(d):
      return DEBUG_DECISION_FMT % d
  
+
  def demo(text, tok_cls=PunktSentenceTokenizer, train_cls=PunktTrainer):
      """Builds a punkt model and applies it to the same text"""
-    cleanup = lambda s: re.compile(r'(?:\r|^\s+)', re.MULTILINE).sub('', s).replace('\n', ' ')
+    cleanup = (
+        lambda s: re.compile(r"(?:\r|^\s+)", re.MULTILINE).sub("", s).replace("\n", " ")
+    )
      trainer = train_cls()
      trainer.INCLUDE_ALL_COLLOCS = True
      trainer.train(text)
diff --git a/nlp_resource_data/nltk/tokenize/punkt.pyc b/nlp_resource_data/nltk/tokenize/punkt.pyc

deleted file mode 100755 (executable)

index 0ced09b..0000000

Binary files a/nlp_resource_data/nltk/tokenize/punkt.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/regexp.py b/nlp_resource_data/nltk/tokenize/regexp.py

old mode 100755 (executable)

new mode 100644 (file)

index cb0b61d..dd4630e
--- a/nlp_resource_data/nltk/tokenize/regexp.py
+++ b/nlp_resource_data/nltk/tokenize/regexp.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tokenizers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  #         Trevor Cohn <tacohn@csse.unimelb.edu.au>
@@ -65,15 +65,13 @@ argument.  This differs from the conventions used by Python's
  ``re`` functions, where the pattern is always the first argument.
  (This is for consistency with the other NLTK tokenizers.)
  """
-from __future__ import unicode_literals
  
  import re
  
  from nltk.tokenize.api import TokenizerI
  from nltk.tokenize.util import regexp_span_tokenize
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
+
  class RegexpTokenizer(TokenizerI):
      """
      A tokenizer that splits a string using a regular expression, which
@@ -100,21 +98,27 @@ class RegexpTokenizer(TokenizerI):
          used: `re.UNICODE | re.MULTILINE | re.DOTALL`.
  
      """
-    def __init__(self, pattern, gaps=False, discard_empty=True,
-                 flags=re.UNICODE | re.MULTILINE | re.DOTALL):
+
+    def __init__(
+        self,
+        pattern,
+        gaps=False,
+        discard_empty=True,
+        flags=re.UNICODE | re.MULTILINE | re.DOTALL,
+    ):
          # If they gave us a regexp object, extract the pattern.
-        pattern = getattr(pattern, 'pattern', pattern)
+        pattern = getattr(pattern, "pattern", pattern)
  
          self._pattern = pattern
          self._gaps = gaps
          self._discard_empty = discard_empty
          self._flags = flags
          self._regexp = None
-        
+
      def _check_regexp(self):
          if self._regexp is None:
              self._regexp = re.compile(self._pattern, self._flags)
-        
+
      def tokenize(self, text):
          self._check_regexp()
          # If our regexp matches gaps, use re.split:
@@ -140,9 +144,14 @@ class RegexpTokenizer(TokenizerI):
                  yield m.span()
  
      def __repr__(self):
-        return ('%s(pattern=%r, gaps=%r, discard_empty=%r, flags=%r)' %
-                (self.__class__.__name__, self._pattern, self._gaps,
-                 self._discard_empty, self._flags))
+        return "%s(pattern=%r, gaps=%r, discard_empty=%r, flags=%r)" % (
+            self.__class__.__name__,
+            self._pattern,
+            self._gaps,
+            self._discard_empty,
+            self._flags,
+        )
+
  
  class WhitespaceTokenizer(RegexpTokenizer):
      r"""
@@ -157,7 +166,8 @@ class WhitespaceTokenizer(RegexpTokenizer):
      """
  
      def __init__(self):
-        RegexpTokenizer.__init__(self, r'\s+', gaps=True)
+        RegexpTokenizer.__init__(self, r"\s+", gaps=True)
+
  
  class BlanklineTokenizer(RegexpTokenizer):
      """
@@ -165,8 +175,10 @@ class BlanklineTokenizer(RegexpTokenizer):
      Blank lines are defined as lines containing no characters, except for
      space or tab characters.
      """
+
      def __init__(self):
-        RegexpTokenizer.__init__(self, r'\s*\n\s*\n\s*', gaps=True)
+        RegexpTokenizer.__init__(self, r"\s*\n\s*\n\s*", gaps=True)
+
  
  class WordPunctTokenizer(RegexpTokenizer):
      """
@@ -179,15 +191,23 @@ class WordPunctTokenizer(RegexpTokenizer):
          ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York',
          '.', 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
      """
+
      def __init__(self):
-        RegexpTokenizer.__init__(self, r'\w+|[^\w\s]+')
+        RegexpTokenizer.__init__(self, r"\w+|[^\w\s]+")
+
  
  ######################################################################
-#{ Tokenization Functions
+# { Tokenization Functions
  ######################################################################
  
-def regexp_tokenize(text, pattern, gaps=False, discard_empty=True,
-                    flags=re.UNICODE | re.MULTILINE | re.DOTALL):
+
+def regexp_tokenize(
+    text,
+    pattern,
+    gaps=False,
+    discard_empty=True,
+    flags=re.UNICODE | re.MULTILINE | re.DOTALL,
+):
      """
      Return a tokenized copy of *text*.  See :class:`.RegexpTokenizer`
      for descriptions of the arguments.
@@ -195,8 +215,6 @@ def regexp_tokenize(text, pattern, gaps=False, discard_empty=True,
      tokenizer = RegexpTokenizer(pattern, gaps, discard_empty, flags)
      return tokenizer.tokenize(text)
  
+
  blankline_tokenize = BlanklineTokenizer().tokenize
  wordpunct_tokenize = WordPunctTokenizer().tokenize
-
-
-
diff --git a/nlp_resource_data/nltk/tokenize/regexp.pyc b/nlp_resource_data/nltk/tokenize/regexp.pyc

deleted file mode 100755 (executable)

index 35643f5..0000000

Binary files a/nlp_resource_data/nltk/tokenize/regexp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/repp.py b/nlp_resource_data/nltk/tokenize/repp.py

old mode 100755 (executable)

new mode 100644 (file)

index aa2aa6c..49b5139
--- a/nlp_resource_data/nltk/tokenize/repp.py
+++ b/nlp_resource_data/nltk/tokenize/repp.py
@@ -8,26 +8,23 @@
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals, print_function
-from six import text_type
-
  import os
  import re
  import sys
  import subprocess
  import tempfile
  
-
  from nltk.data import ZipFilePathPointer
  from nltk.internals import find_dir
  
  from nltk.tokenize.api import TokenizerI
  
+
  class ReppTokenizer(TokenizerI):
      """
      A class for word tokenization using the REPP parser described in
-    Rebecca Dridan and Stephan Oepen (2012) Tokenization: Returning to a 
-    Long Solved Problem - A Survey, Contrastive  Experiment, Recommendations, 
+    Rebecca Dridan and Stephan Oepen (2012) Tokenization: Returning to a
+    Long Solved Problem - A Survey, Contrastive  Experiment, Recommendations,
      and Toolkit. In ACL. http://anthology.aclweb.org/P/P12/P12-2.pdf#page=406
  
      >>> sents = ['Tokenization is widely regarded as a solved problem due to the high accuracy that rulebased tokenizers achieve.' ,
@@ -37,115 +34,118 @@ class ReppTokenizer(TokenizerI):
      >>> tokenizer = ReppTokenizer('/home/alvas/repp/') # doctest: +SKIP
      >>> for sent in sents:                             # doctest: +SKIP
      ...     tokenizer.tokenize(sent)                   # doctest: +SKIP
-    ... 
+    ...
      (u'Tokenization', u'is', u'widely', u'regarded', u'as', u'a', u'solved', u'problem', u'due', u'to', u'the', u'high', u'accuracy', u'that', u'rulebased', u'tokenizers', u'achieve', u'.')
      (u'But', u'rule-based', u'tokenizers', u'are', u'hard', u'to', u'maintain', u'and', u'their', u'rules', u'language', u'specific', u'.')
      (u'We', u'evaluated', u'our', u'method', u'on', u'three', u'languages', u'and', u'obtained', u'error', u'rates', u'of', u'0.27', u'%', u'(', u'English', u')', u',', u'0.35', u'%', u'(', u'Dutch', u')', u'and', u'0.76', u'%', u'(', u'Italian', u')', u'for', u'our', u'best', u'models', u'.')
  
      >>> for sent in tokenizer.tokenize_sents(sents): # doctest: +SKIP
-    ...     print sent                               # doctest: +SKIP
-    ... 
+    ...     print(sent)                              # doctest: +SKIP
+    ...
      (u'Tokenization', u'is', u'widely', u'regarded', u'as', u'a', u'solved', u'problem', u'due', u'to', u'the', u'high', u'accuracy', u'that', u'rulebased', u'tokenizers', u'achieve', u'.')
      (u'But', u'rule-based', u'tokenizers', u'are', u'hard', u'to', u'maintain', u'and', u'their', u'rules', u'language', u'specific', u'.')
      (u'We', u'evaluated', u'our', u'method', u'on', u'three', u'languages', u'and', u'obtained', u'error', u'rates', u'of', u'0.27', u'%', u'(', u'English', u')', u',', u'0.35', u'%', u'(', u'Dutch', u')', u'and', u'0.76', u'%', u'(', u'Italian', u')', u'for', u'our', u'best', u'models', u'.')
      >>> for sent in tokenizer.tokenize_sents(sents, keep_token_positions=True): # doctest: +SKIP
-    ...     print sent                                                          # doctest: +SKIP
-    ... 
+    ...     print(sent)                                                         # doctest: +SKIP
+    ...
      [(u'Tokenization', 0, 12), (u'is', 13, 15), (u'widely', 16, 22), (u'regarded', 23, 31), (u'as', 32, 34), (u'a', 35, 36), (u'solved', 37, 43), (u'problem', 44, 51), (u'due', 52, 55), (u'to', 56, 58), (u'the', 59, 62), (u'high', 63, 67), (u'accuracy', 68, 76), (u'that', 77, 81), (u'rulebased', 82, 91), (u'tokenizers', 92, 102), (u'achieve', 103, 110), (u'.', 110, 111)]
      [(u'But', 0, 3), (u'rule-based', 4, 14), (u'tokenizers', 15, 25), (u'are', 26, 29), (u'hard', 30, 34), (u'to', 35, 37), (u'maintain', 38, 46), (u'and', 47, 50), (u'their', 51, 56), (u'rules', 57, 62), (u'language', 63, 71), (u'specific', 72, 80), (u'.', 80, 81)]
      [(u'We', 0, 2), (u'evaluated', 3, 12), (u'our', 13, 16), (u'method', 17, 23), (u'on', 24, 26), (u'three', 27, 32), (u'languages', 33, 42), (u'and', 43, 46), (u'obtained', 47, 55), (u'error', 56, 61), (u'rates', 62, 67), (u'of', 68, 70), (u'0.27', 71, 75), (u'%', 75, 76), (u'(', 77, 78), (u'English', 78, 85), (u')', 85, 86), (u',', 86, 87), (u'0.35', 88, 92), (u'%', 92, 93), (u'(', 94, 95), (u'Dutch', 95, 100), (u')', 100, 101), (u'and', 102, 105), (u'0.76', 106, 110), (u'%', 110, 111), (u'(', 112, 113), (u'Italian', 113, 120), (u')', 120, 121), (u'for', 122, 125), (u'our', 126, 129), (u'best', 130, 134), (u'models', 135, 141), (u'.', 141, 142)]
      """
-    def __init__(self, repp_dir, encoding='utf8'):
+
+    def __init__(self, repp_dir, encoding="utf8"):
          self.repp_dir = self.find_repptokenizer(repp_dir)
-        # Set a directory to store the temporary files. 
+        # Set a directory to store the temporary files.
          self.working_dir = tempfile.gettempdir()
          # Set an encoding for the input strings.
          self.encoding = encoding
-        
+
      def tokenize(self, sentence):
          """
-        Use Repp to tokenize a single sentence.  
-        
+        Use Repp to tokenize a single sentence.
+
          :param sentence: A single sentence string.
          :type sentence: str
-        :return: A tuple of tokens. 
+        :return: A tuple of tokens.
          :rtype: tuple(str)
          """
          return next(self.tokenize_sents([sentence]))
-    
+
      def tokenize_sents(self, sentences, keep_token_positions=False):
          """
          Tokenize multiple sentences using Repp.
-                
+
          :param sentences: A list of sentence strings.
          :type sentences: list(str)
          :return: A list of tuples of tokens
          :rtype: iter(tuple(str))
          """
-        with tempfile.NamedTemporaryFile(prefix='repp_input.', 
-            dir=self.working_dir, mode='w', delete=False) as input_file:
+        with tempfile.NamedTemporaryFile(
+            prefix="repp_input.", dir=self.working_dir, mode="w", delete=False
+        ) as input_file:
              # Write sentences to temporary input file.
              for sent in sentences:
-                input_file.write(text_type(sent) + '\n')
+                input_file.write(str(sent) + "\n")
              input_file.close()
-            # Generate command to run REPP. 
-            cmd =self.generate_repp_command(input_file.name)
+            # Generate command to run REPP.
+            cmd = self.generate_repp_command(input_file.name)
              # Decode the stdout and strips the ending newline.
              repp_output = self._execute(cmd).decode(self.encoding).strip()
              for tokenized_sent in self.parse_repp_outputs(repp_output):
                  if not keep_token_positions:
                      # Removes token position information.
                      tokenized_sent, starts, ends = zip(*tokenized_sent)
-                yield tokenized_sent      
-        
+                yield tokenized_sent
+
      def generate_repp_command(self, inputfilename):
          """
          This module generates the REPP command to be used at the terminal.
-        
+
          :param inputfilename: path to the input file
          :type inputfilename: str
          """
-        cmd = [self.repp_dir + '/src/repp']
-        cmd+= ['-c', self.repp_dir + '/erg/repp.set']
-        cmd+= ['--format', 'triple']
-        cmd+= [inputfilename]
-        return cmd  
+        cmd = [self.repp_dir + "/src/repp"]
+        cmd += ["-c", self.repp_dir + "/erg/repp.set"]
+        cmd += ["--format", "triple"]
+        cmd += [inputfilename]
+        return cmd
  
      @staticmethod
      def _execute(cmd):
          p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
          stdout, stderr = p.communicate()
          return stdout
-    
-    @staticmethod    
+
+    @staticmethod
      def parse_repp_outputs(repp_output):
          """
          This module parses the tri-tuple format that REPP outputs using the
          "--format triple" option and returns an generator with tuple of string
          tokens.
-        
+
          :param repp_output:
          :type repp_output: type
          :return: an iterable of the tokenized sentences as tuples of strings
          :rtype: iter(tuple)
          """
-        line_regex = re.compile('^\((\d+), (\d+), (.+)\)$', re.MULTILINE)
-        for section in repp_output.split('\n\n'):
-            words_with_positions = [(token, int(start), int(end))
-                                    for start, end, token in 
-                                    line_regex.findall(section)]
+        line_regex = re.compile("^\((\d+), (\d+), (.+)\)$", re.MULTILINE)
+        for section in repp_output.split("\n\n"):
+            words_with_positions = [
+                (token, int(start), int(end))
+                for start, end, token in line_regex.findall(section)
+            ]
              words = tuple(t[2] for t in words_with_positions)
              yield words_with_positions
-    
+
      def find_repptokenizer(self, repp_dirname):
          """
          A module to find REPP tokenizer binary and its *repp.set* config file.
          """
-        if os.path.exists(repp_dirname): # If a full path is given.
+        if os.path.exists(repp_dirname):  # If a full path is given.
              _repp_dir = repp_dirname
-        else: # Try to find path to REPP directory in environment variables.
-            _repp_dir = find_dir(repp_dirname, env_vars=('REPP_TOKENIZER',))
+        else:  # Try to find path to REPP directory in environment variables.
+            _repp_dir = find_dir(repp_dirname, env_vars=("REPP_TOKENIZER",))
          # Checks for the REPP binary and erg/repp.set config file.
-        assert os.path.exists(_repp_dir+'/src/repp')
-        assert os.path.exists(_repp_dir+'/erg/repp.set')
+        assert os.path.exists(_repp_dir + "/src/repp")
+        assert os.path.exists(_repp_dir + "/erg/repp.set")
          return _repp_dir
diff --git a/nlp_resource_data/nltk/tokenize/repp.pyc b/nlp_resource_data/nltk/tokenize/repp.pyc

deleted file mode 100755 (executable)

index b2e140a..0000000

Binary files a/nlp_resource_data/nltk/tokenize/repp.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/sexpr.py b/nlp_resource_data/nltk/tokenize/sexpr.py

old mode 100755 (executable)

new mode 100644 (file)

index d1bdb4e..9313a94
--- a/nlp_resource_data/nltk/tokenize/sexpr.py
+++ b/nlp_resource_data/nltk/tokenize/sexpr.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tokenizers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Yoav Goldberg <yoavg@cs.bgu.ac.il>
  #         Steven Bird <stevenbird1@gmail.com> (minor edits)
  # URL: <http://nltk.sourceforge.net>
@@ -53,6 +53,7 @@ import re
  
  from nltk.tokenize.api import TokenizerI
  
+
  class SExprTokenizer(TokenizerI):
      """
      A tokenizer that divides strings into s-expressions.
@@ -75,14 +76,15 @@ class SExprTokenizer(TokenizerI):
      :param strict: If true, then raise an exception when tokenizing an ill-formed sexpr.
      """
  
-    def __init__(self, parens='()', strict=True):
+    def __init__(self, parens="()", strict=True):
          if len(parens) != 2:
-            raise ValueError('parens must contain exactly two strings')
+            raise ValueError("parens must contain exactly two strings")
          self._strict = strict
          self._open_paren = parens[0]
          self._close_paren = parens[1]
-        self._paren_regexp = re.compile('%s|%s' % (re.escape(parens[0]),
-                                                   re.escape(parens[1])))
+        self._paren_regexp = re.compile(
+            "%s|%s" % (re.escape(parens[0]), re.escape(parens[1]))
+        )
  
      def tokenize(self, text):
          """
@@ -117,26 +119,22 @@ class SExprTokenizer(TokenizerI):
          for m in self._paren_regexp.finditer(text):
              paren = m.group()
              if depth == 0:
-                result += text[pos:m.start()].split()
+                result += text[pos : m.start()].split()
                  pos = m.start()
              if paren == self._open_paren:
                  depth += 1
              if paren == self._close_paren:
                  if self._strict and depth == 0:
-                    raise ValueError('Un-matched close paren at char %d'
-                                     % m.start())
-                depth = max(0, depth-1)
+                    raise ValueError("Un-matched close paren at char %d" % m.start())
+                depth = max(0, depth - 1)
                  if depth == 0:
-                    result.append(text[pos:m.end()])
+                    result.append(text[pos : m.end()])
                      pos = m.end()
          if self._strict and depth > 0:
-            raise ValueError('Un-matched open paren at char %d' % pos)
+            raise ValueError("Un-matched open paren at char %d" % pos)
          if pos < len(text):
              result.append(text[pos:])
          return result
  
-sexpr_tokenize = SExprTokenizer().tokenize
-
-
-
  
+sexpr_tokenize = SExprTokenizer().tokenize
diff --git a/nlp_resource_data/nltk/tokenize/sexpr.pyc b/nlp_resource_data/nltk/tokenize/sexpr.pyc

deleted file mode 100755 (executable)

index 1594542..0000000

Binary files a/nlp_resource_data/nltk/tokenize/sexpr.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/simple.py b/nlp_resource_data/nltk/tokenize/simple.py

old mode 100755 (executable)

new mode 100644 (file)

index 2b7ffe4..ac1e400
--- a/nlp_resource_data/nltk/tokenize/simple.py
+++ b/nlp_resource_data/nltk/tokenize/simple.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Simple Tokenizers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.sourceforge.net>
@@ -34,10 +34,11 @@ that expects a tokenizer.  For example, these tokenizers can be used
  to specify the tokenization conventions when building a `CorpusReader`.
  
  """
-from __future__ import unicode_literals
+
  from nltk.tokenize.api import TokenizerI, StringTokenizer
  from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize
  
+
  class SpaceTokenizer(StringTokenizer):
      r"""Tokenize a string using the space character as a delimiter,
      which is the same as ``s.split(' ')``.
@@ -49,7 +50,8 @@ class SpaceTokenizer(StringTokenizer):
          'Please', 'buy', 'me\ntwo', 'of', 'them.\n\nThanks.']
      """
  
-    _string = ' '
+    _string = " "
+
  
  class TabTokenizer(StringTokenizer):
      r"""Tokenize a string use the tab character as a delimiter,
@@ -60,7 +62,8 @@ class TabTokenizer(StringTokenizer):
          ['a', 'b c\n', ' d']
      """
  
-    _string = '\t'
+    _string = "\t"
+
  
  class CharTokenizer(StringTokenizer):
      """Tokenize a string into individual characters.  If this functionality
@@ -74,6 +77,7 @@ class CharTokenizer(StringTokenizer):
          for i, j in enumerate(range(1, len(s) + 1)):
              yield i, j
  
+
  class LineTokenizer(TokenizerI):
      r"""Tokenize a string into its lines, optionally discarding blank lines.
      This is similar to ``s.split('\n')``.
@@ -97,40 +101,40 @@ class LineTokenizer(TokenizerI):
             a corresponding token ``''`` after that newline.
      """
  
-    def __init__(self, blanklines='discard'):
-        valid_blanklines = ('discard', 'keep', 'discard-eof')
+    def __init__(self, blanklines="discard"):
+        valid_blanklines = ("discard", "keep", "discard-eof")
          if blanklines not in valid_blanklines:
-            raise ValueError('Blank lines must be one of: %s' %
-                             ' '.join(valid_blanklines))
+            raise ValueError(
+                "Blank lines must be one of: %s" % " ".join(valid_blanklines)
+            )
  
          self._blanklines = blanklines
  
      def tokenize(self, s):
          lines = s.splitlines()
          # If requested, strip off blank lines.
-        if self._blanklines == 'discard':
+        if self._blanklines == "discard":
              lines = [l for l in lines if l.rstrip()]
-        elif self._blanklines == 'discard-eof':
+        elif self._blanklines == "discard-eof":
              if lines and not lines[-1].strip():
                  lines.pop()
          return lines
  
      # discard-eof not implemented
      def span_tokenize(self, s):
-        if self._blanklines == 'keep':
-            for span in string_span_tokenize(s, r'\n'):
+        if self._blanklines == "keep":
+            for span in string_span_tokenize(s, r"\n"):
                  yield span
          else:
-            for span in regexp_span_tokenize(s, r'\n(\s+\n)*'):
+            for span in regexp_span_tokenize(s, r"\n(\s+\n)*"):
                  yield span
  
+
  ######################################################################
-#{ Tokenization Functions
+# { Tokenization Functions
  ######################################################################
  # XXX: it is stated in module docs that there is no function versions
  
-def line_tokenize(text, blanklines='discard'):
-    return LineTokenizer(blanklines).tokenize(text)
-
-
  
+def line_tokenize(text, blanklines="discard"):
+    return LineTokenizer(blanklines).tokenize(text)
diff --git a/nlp_resource_data/nltk/tokenize/simple.pyc b/nlp_resource_data/nltk/tokenize/simple.pyc

deleted file mode 100755 (executable)

index 29678ec..0000000

Binary files a/nlp_resource_data/nltk/tokenize/simple.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/sonority_sequencing.py b/nlp_resource_data/nltk/tokenize/sonority_sequencing.py

new file mode 100644 (file)

index 0000000..fb6b080
--- /dev/null
+++ b/nlp_resource_data/nltk/tokenize/sonority_sequencing.py
@@ -0,0 +1,192 @@
+# Natural Language Toolkit: Tokenizers
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Christopher Hench <chris.l.hench@gmail.com>
+#         Alex Estes
+# URL: <http://nltk.sourceforge.net>
+# For license information, see LICENSE.TXT
+
+"""
+The Sonority Sequencing Principle (SSP) is a language agnostic algorithm proposed
+by Otto Jesperson in 1904. The sonorous quality of a phoneme is judged by the
+openness of the lips. Syllable breaks occur before troughs in sonority. For more
+on the SSP see Selkirk (1984).
+
+The default implementation uses the English alphabet, but the `sonority_hiearchy`
+can be modified to IPA or any other alphabet for the use-case. The SSP is a
+universal syllabification algorithm, but that does not mean it performs equally
+across languages. Bartlett et al. (2009) is a good benchmark for English accuracy
+if utilizing IPA (pg. 311).
+
+Importantly, if a custom hiearchy is supplied and vowels span across more than
+one level, they should be given separately to the `vowels` class attribute.
+
+References:
+- Otto Jespersen. 1904. Lehrbuch der Phonetik.
+  Leipzig, Teubner. Chapter 13, Silbe, pp. 185-203.
+- Elisabeth Selkirk. 1984. On the major class features and syllable theory.
+  In Aronoff & Oehrle (eds.) Language Sound Structure: Studies in Phonology.
+  Cambridge, MIT Press. pp. 107-136.
+- Susan Bartlett, et al. 2009. On the Syllabification of Phonemes.
+  In HLT-NAACL. pp. 308-316.
+"""
+
+import warnings
+
+import re
+from string import punctuation
+
+from nltk.tokenize.api import TokenizerI
+from nltk.util import ngrams
+
+
+class SyllableTokenizer(TokenizerI):
+    """
+    Syllabifies words based on the Sonority Sequencing Principle (SSP).
+
+        >>> from nltk.tokenize import SyllableTokenizer
+        >>> from nltk import word_tokenize
+        >>> SSP = SyllableTokenizer()
+        >>> SSP.tokenize('justification')
+        ['jus', 'ti', 'fi', 'ca', 'tion']
+        >>> text = "This is a foobar-like sentence."
+        >>> [SSP.tokenize(token) for token in word_tokenize(text)]
+        [['This'], ['is'], ['a'], ['foo', 'bar', '-', 'li', 'ke'], ['sen', 'ten', 'ce'], ['.']]
+    """
+
+    def __init__(self, lang="en", sonority_hierarchy=False):
+        """
+        :param lang: Language parameter, default is English, 'en'
+        :type lang: str
+        :param sonority_hierarchy: Sonority hierarchy according to the
+                                   Sonority Sequencing Principle.
+        :type sonority_hierarchy: list(str)
+        """
+        # Sonority hierarchy should be provided in descending order.
+        # If vowels are spread across multiple levels, they should be
+        # passed assigned self.vowels var together, otherwise should be
+        # placed in first index of hierarchy.
+        if not sonority_hierarchy and lang == "en":
+            sonority_hierarchy = [
+                "aeiouy",  # vowels.
+                "lmnrw",  # nasals.
+                "zvsf",  # fricatives.
+                "bcdgtkpqxhj",  # stops.
+            ]
+
+        self.vowels = sonority_hierarchy[0]
+        self.phoneme_map = {}
+        for i, level in enumerate(sonority_hierarchy):
+            for c in level:
+                sonority_level = len(sonority_hierarchy) - i
+                self.phoneme_map[c] = sonority_level
+                self.phoneme_map[c.upper()] = sonority_level
+
+    def assign_values(self, token):
+        """
+        Assigns each phoneme its value from the sonority hierarchy.
+        Note: Sentence/text has to be tokenized first.
+
+        :param token: Single word or token
+        :type token: str
+        :return: List of tuples, first element is character/phoneme and
+                 second is the soronity value.
+        :rtype: list(tuple(str, int))
+        """
+        syllables_values = []
+        for c in token:
+            try:
+                syllables_values.append((c, self.phoneme_map[c]))
+            except KeyError:
+                if c not in punctuation:
+                    warnings.warn(
+                        "Character not defined in sonority_hierarchy,"
+                        " assigning as vowel: '{}'".format(c)
+                    )
+                    syllables_values.append((c, max(self.phoneme_map.values())))
+                    self.vowels += c
+                else:  # If it's a punctuation, assing -1.
+                    syllables_values.append((c, -1))
+        return syllables_values
+
+    def validate_syllables(self, syllable_list):
+        """
+        Ensures each syllable has at least one vowel.
+        If the following syllable doesn't have vowel, add it to the current one.
+
+        :param syllable_list: Single word or token broken up into syllables.
+        :type syllable_list: list(str)
+        :return: Single word or token broken up into syllables
+                 (with added syllables if necessary)
+        :rtype: list(str)
+        """
+        valid_syllables = []
+        front = ""
+        for i, syllable in enumerate(syllable_list):
+            if syllable in punctuation:
+                valid_syllables.append(syllable)
+                continue
+            if not re.search("|".join(self.vowels), syllable):
+                if len(valid_syllables) == 0:
+                    front += syllable
+                else:
+                    valid_syllables = valid_syllables[:-1] + [
+                        valid_syllables[-1] + syllable
+                    ]
+            else:
+                if len(valid_syllables) == 0:
+                    valid_syllables.append(front + syllable)
+                else:
+                    valid_syllables.append(syllable)
+
+        return valid_syllables
+
+    def tokenize(self, token):
+        """
+        Apply the SSP to return a list of syllables.
+        Note: Sentence/text has to be tokenized first.
+
+        :param token: Single word or token
+        :type token: str
+        :return syllable_list: Single word or token broken up into syllables.
+        :rtype: list(str)
+        """
+        # assign values from hierarchy
+        syllables_values = self.assign_values(token)
+
+        # if only one vowel return word
+        if sum(token.count(x) for x in self.vowels) <= 1:
+            return [token]
+
+        syllable_list = []
+        syllable = syllables_values[0][0]  # start syllable with first phoneme
+        for trigram in ngrams(syllables_values, n=3):
+            phonemes, values = zip(*trigram)
+            # Sonority of previous, focal and following phoneme
+            prev_value, focal_value, next_value = values
+            # Focal phoneme.
+            focal_phoneme = phonemes[1]
+
+            # These cases trigger syllable break.
+            if focal_value == -1:  # If it's a punctuation, just break.
+                syllable_list.append(syllable)
+                syllable_list.append(focal_phoneme)
+                syllable = ""
+            elif prev_value >= focal_value == next_value:
+                syllable += focal_phoneme
+                syllable_list.append(syllable)
+                syllable = ""
+
+            elif prev_value > focal_value < next_value:
+                syllable_list.append(syllable)
+                syllable = ""
+                syllable += focal_phoneme
+
+            # no syllable break
+            else:
+                syllable += focal_phoneme
+
+        syllable += syllables_values[-1][0]  # append last phoneme
+        syllable_list.append(syllable)
+
+        return self.validate_syllables(syllable_list)
diff --git a/nlp_resource_data/nltk/tokenize/stanford.py b/nlp_resource_data/nltk/tokenize/stanford.py

old mode 100755 (executable)

new mode 100644 (file)

index 9ac8352..b17f591
--- a/nlp_resource_data/nltk/tokenize/stanford.py
+++ b/nlp_resource_data/nltk/tokenize/stanford.py
@@ -1,27 +1,24 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Interface to the Stanford Tokenizer
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Xu <xxu@student.unimelb.edu.au>
  #
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals, print_function
-
  import tempfile
  import os
  import json
  from subprocess import PIPE
  import warnings
  
-from six import text_type
-
  from nltk.internals import find_jar, config_java, java, _java_options
  from nltk.tokenize.api import TokenizerI
  from nltk.parse.corenlp import CoreNLPParser
  
-_stanford_url = 'https://nlp.stanford.edu/software/tokenizer.shtml'
+_stanford_url = "https://nlp.stanford.edu/software/tokenizer.shtml"
+
  
  class StanfordTokenizer(TokenizerI):
      r"""
@@ -36,28 +33,43 @@ class StanfordTokenizer(TokenizerI):
      ['The', 'color', 'of', 'the', 'wall', 'is', 'blue', '.']
      """
  
-    _JAR = 'stanford-postagger.jar'
+    _JAR = "stanford-postagger.jar"
  
-    def __init__(self, path_to_jar=None, encoding='utf8', options=None, verbose=False, java_options='-mx1000m'):
+    def __init__(
+        self,
+        path_to_jar=None,
+        encoding="utf8",
+        options=None,
+        verbose=False,
+        java_options="-mx1000m",
+    ):
          # Raise deprecation warning.
-        warnings.simplefilter('always', DeprecationWarning)
-        warnings.warn(str("\nThe StanfordTokenizer will "
-                          "be deprecated in version 3.2.5.\n"
-                          "Please use \033[91mnltk.parse.corenlp.CoreNLPTokenizer\033[0m instead.'"),
-                      DeprecationWarning, stacklevel=2)
-        warnings.simplefilter('ignore', DeprecationWarning)
+        warnings.warn(
+            str(
+                "\nThe StanfordTokenizer will "
+                "be deprecated in version 3.2.5.\n"
+                "Please use \033[91mnltk.parse.corenlp.CoreNLPParser\033[0m instead.'"
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
          self._stanford_jar = find_jar(
-            self._JAR, path_to_jar,
-            env_vars=('STANFORD_POSTAGGER',),
-            searchpath=(), url=_stanford_url,
-            verbose=verbose
+            self._JAR,
+            path_to_jar,
+            env_vars=("STANFORD_POSTAGGER",),
+            searchpath=(),
+            url=_stanford_url,
+            verbose=verbose,
          )
  
          self._encoding = encoding
          self.java_options = java_options
  
          options = {} if options is None else options
-        self._options_cmd = ','.join('{0}={1}'.format(key, val) for key, val in options.items())
+        self._options_cmd = ",".join(
+            "{0}={1}".format(key, val) for key, val in options.items()
+        )
  
      @staticmethod
      def _parse_tokenized_output(s):
@@ -67,27 +79,25 @@ class StanfordTokenizer(TokenizerI):
          """
          Use stanford tokenizer's PTBTokenizer to tokenize multiple sentences.
          """
-        cmd = [
-            'edu.stanford.nlp.process.PTBTokenizer',
-        ]
+        cmd = ["edu.stanford.nlp.process.PTBTokenizer"]
          return self._parse_tokenized_output(self._execute(cmd, s))
  
      def _execute(self, cmd, input_, verbose=False):
          encoding = self._encoding
-        cmd.extend(['-charset', encoding])
+        cmd.extend(["-charset", encoding])
          _options_cmd = self._options_cmd
          if _options_cmd:
-            cmd.extend(['-options', self._options_cmd])
+            cmd.extend(["-options", self._options_cmd])
  
-        default_options = ' '.join(_java_options)
+        default_options = " ".join(_java_options)
  
          # Configure java.
          config_java(options=self.java_options, verbose=verbose)
  
          # Windows is incompatible with NamedTemporaryFile() without passing in delete=False.
-        with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file:
+        with tempfile.NamedTemporaryFile(mode="wb", delete=False) as input_file:
              # Write the actual sentences to the temporary input file
-            if isinstance(input_, text_type) and encoding:
+            if isinstance(input_, str) and encoding:
                  input_ = input_.encode(encoding)
              input_file.write(input_)
              input_file.flush()
@@ -95,8 +105,9 @@ class StanfordTokenizer(TokenizerI):
              cmd.append(input_file.name)
  
              # Run the tagger and get the output.
-            stdout, stderr = java(cmd, classpath=self._stanford_jar,
-                                  stdout=PIPE, stderr=PIPE)
+            stdout, stderr = java(
+                cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE
+            )
              stdout = stdout.decode(encoding)
  
          os.unlink(input_file.name)
@@ -107,38 +118,12 @@ class StanfordTokenizer(TokenizerI):
          return stdout
  
  
-class CoreNLPTokenizer(CoreNLPParser):
-    def __init__(self, url='http://localhost:9000', encoding='utf8'):
-        r"""
-        This is a duck-type of CoreNLPParser that has the tokenizing
-        functionality similar to the original Stanford POS tagger.
-
-            >>> from nltk.tokenize.stanford import CoreNLPTokenizer
-            >>> s = "Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\nThanks."
-            >>> CoreNLPTokenizer(url='http://localhost:9000').tokenize(s) == expected # doctest: +SKIP
-            [u'Good', u'muffins', u'cost', u'$', u'3.88', u'in', u'New', u'York', u'.', u'Please', u'buy', u'me', u'two', u'of', u'them', u'.', u'Thanks', u'.']
-        """
-        super(CoreNLPTokenizer, self).__init__(url, encoding)
-
-    def tokenize(self, text, properties=None):
-        """
-        Tokenize a string of text. Consistent with the StanfordTokenizer, This
-        function returns a list of string. The orignal CoreNLPParser.tokenize()
-        returns a generator of string.
-        """
-        return list(super(CoreNLPTokenizer, self).tokenize(text, properties))
-
-
  def setup_module(module):
      from nose import SkipTest
  
      try:
          StanfordTokenizer()
      except LookupError:
-        raise SkipTest('doctests from nltk.tokenize.stanford are skipped because the stanford postagger jar doesn\'t exist')
-
-    try:
-        CoreNLPTokenizer()
-    except LookupError:
-        raise SkipTest('doctests from nltk.tokenize.stanford.CoreNLPTokenizer are skipped because the '
-                       'stanford corenlp server not started')
+        raise SkipTest(
+            "doctests from nltk.tokenize.stanford are skipped because the stanford postagger jar doesn't exist"
+        )
diff --git a/nlp_resource_data/nltk/tokenize/stanford.pyc b/nlp_resource_data/nltk/tokenize/stanford.pyc

deleted file mode 100755 (executable)

index 42e119a..0000000

Binary files a/nlp_resource_data/nltk/tokenize/stanford.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/stanford_segmenter.py b/nlp_resource_data/nltk/tokenize/stanford_segmenter.py

old mode 100755 (executable)

new mode 100644 (file)

index 077cbef..2595945
--- a/nlp_resource_data/nltk/tokenize/stanford_segmenter.py
+++ b/nlp_resource_data/nltk/tokenize/stanford_segmenter.py
@@ -3,7 +3,7 @@
  # Natural Language Toolkit: Interface to the Stanford Segmenter
  # for Chinese and Arabic
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: 52nlp <52nlpcn@gmail.com>
  #         Casper Lehmann-Strøm <casperlehmann@gmail.com>
  #         Alex Constantin <alex@keyworder.ch>
@@ -11,22 +11,24 @@
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import unicode_literals, print_function
-
  import tempfile
  import os
  import json
-from subprocess import PIPE
  import warnings
+from subprocess import PIPE
  
-from nltk import compat
-from nltk.internals import find_jar, find_file, find_dir, \
-                           config_java, java, _java_options
+from nltk.internals import (
+    find_jar,
+    find_file,
+    find_dir,
+    config_java,
+    java,
+    _java_options,
+)
  from nltk.tokenize.api import TokenizerI
  
-from six import text_type
  
-_stanford_url = 'https://nlp.stanford.edu/software'
+_stanford_url = "https://nlp.stanford.edu/software"
  
  
  class StanfordSegmenter(TokenizerI):
@@ -51,38 +53,53 @@ class StanfordSegmenter(TokenizerI):
      <BLANKLINE>
      """
  
-    _JAR = 'stanford-segmenter.jar'
-
-    def __init__(self,
-                 path_to_jar=None,
-                 path_to_slf4j=None,
-                 java_class=None,
-                 path_to_model=None,
-                 path_to_dict=None,
-                 path_to_sihan_corpora_dict=None,
-                 sihan_post_processing='false',
-                 keep_whitespaces='false',
-                 encoding='UTF-8', options=None,
-                 verbose=False, java_options='-mx2g'):
+    _JAR = "stanford-segmenter.jar"
+
+    def __init__(
+        self,
+        path_to_jar=None,
+        path_to_slf4j=None,
+        java_class=None,
+        path_to_model=None,
+        path_to_dict=None,
+        path_to_sihan_corpora_dict=None,
+        sihan_post_processing="false",
+        keep_whitespaces="false",
+        encoding="UTF-8",
+        options=None,
+        verbose=False,
+        java_options="-mx2g",
+    ):
          # Raise deprecation warning.
-        warnings.simplefilter('always', DeprecationWarning)
-        warnings.warn(str("\nThe StanfordTokenizer will "
-                          "be deprecated in version 3.2.5.\n"
-                          "Please use \033[91mnltk.parse.corenlp.CoreNLPTokenizer\033[0m instead.'"),
-                      DeprecationWarning, stacklevel=2)
-        warnings.simplefilter('ignore', DeprecationWarning)
+        warnings.simplefilter("always", DeprecationWarning)
+        warnings.warn(
+            str(
+                "\nThe StanfordTokenizer will "
+                "be deprecated in version 3.2.5.\n"
+                "Please use \033[91mnltk.parse.corenlp.CoreNLPTokenizer\033[0m instead.'"
+            ),
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        warnings.simplefilter("ignore", DeprecationWarning)
  
          stanford_segmenter = find_jar(
-                self._JAR, path_to_jar,
-                env_vars=('STANFORD_SEGMENTER',),
-                searchpath=(), url=_stanford_url,
-                verbose=verbose)
+            self._JAR,
+            path_to_jar,
+            env_vars=("STANFORD_SEGMENTER",),
+            searchpath=(),
+            url=_stanford_url,
+            verbose=verbose,
+        )
          if path_to_slf4j is not None:
              slf4j = find_jar(
-                'slf4j-api.jar', path_to_slf4j,
-                env_vars=('SLF4J', 'STANFORD_SEGMENTER',),
-                searchpath=(), url=_stanford_url,
-                verbose=verbose)
+                "slf4j-api.jar",
+                path_to_slf4j,
+                env_vars=("SLF4J", "STANFORD_SEGMENTER"),
+                searchpath=(),
+                url=_stanford_url,
+                verbose=verbose,
+            )
          else:
              slf4j = None
  
@@ -102,7 +119,9 @@ class StanfordSegmenter(TokenizerI):
          self._encoding = encoding
          self.java_options = java_options
          options = {} if options is None else options
-        self._options_cmd = ','.join('{0}={1}'.format(key, json.dumps(val)) for key, val in options.items())
+        self._options_cmd = ",".join(
+            "{0}={1}".format(key, json.dumps(val)) for key, val in options.items()
+        )
  
      def default_config(self, lang):
          """
@@ -111,51 +130,71 @@ class StanfordSegmenter(TokenizerI):
          """
  
          search_path = ()
-        if os.environ.get('STANFORD_SEGMENTER'):
-            search_path = {os.path.join(os.environ.get('STANFORD_SEGMENTER'), 'data')}
+        if os.environ.get("STANFORD_SEGMENTER"):
+            search_path = {os.path.join(os.environ.get("STANFORD_SEGMENTER"), "data")}
  
          # init for Chinese-specific files
          self._dict = None
          self._sihan_corpora_dict = None
-        self._sihan_post_processing = 'false'
+        self._sihan_post_processing = "false"
  
-        if lang == 'ar':
-            self._java_class = 'edu.stanford.nlp.international.arabic.process.ArabicSegmenter'
-            model = 'arabic-segmenter-atb+bn+arztrain.ser.gz'
+        if lang == "ar":
+            self._java_class = (
+                "edu.stanford.nlp.international.arabic.process.ArabicSegmenter"
+            )
+            model = "arabic-segmenter-atb+bn+arztrain.ser.gz"
  
-        elif lang == 'zh':
-            self._java_class = 'edu.stanford.nlp.ie.crf.CRFClassifier'
-            model = 'pku.gz'
-            self._sihan_post_processing = 'true'
+        elif lang == "zh":
+            self._java_class = "edu.stanford.nlp.ie.crf.CRFClassifier"
+            model = "pku.gz"
+            self._sihan_post_processing = "true"
  
-            path_to_dict = 'dict-chris6.ser.gz'
+            path_to_dict = "dict-chris6.ser.gz"
              try:
-                self._dict = find_file(path_to_dict, searchpath=search_path,
-                                       url=_stanford_url, verbose=False,
-                                       env_vars=('STANFORD_MODELS',))
+                self._dict = find_file(
+                    path_to_dict,
+                    searchpath=search_path,
+                    url=_stanford_url,
+                    verbose=False,
+                    env_vars=("STANFORD_MODELS",),
+                )
              except LookupError:
-                raise LookupError("Could not find '%s' (tried using env. "
-                    "variables STANFORD_MODELS and <STANFORD_SEGMENTER>/data/)" % path_to_dict)
+                raise LookupError(
+                    "Could not find '%s' (tried using env. "
+                    "variables STANFORD_MODELS and <STANFORD_SEGMENTER>/data/)"
+                    % path_to_dict
+                )
  
-            sihan_dir = './data/'
+            sihan_dir = "./data/"
              try:
-                path_to_sihan_dir = find_dir(sihan_dir,
-                                             url=_stanford_url, verbose=False,
-                                             env_vars=('STANFORD_SEGMENTER',))
+                path_to_sihan_dir = find_dir(
+                    sihan_dir,
+                    url=_stanford_url,
+                    verbose=False,
+                    env_vars=("STANFORD_SEGMENTER",),
+                )
                  self._sihan_corpora_dict = os.path.join(path_to_sihan_dir, sihan_dir)
              except LookupError:
-                raise LookupError("Could not find '%s' (tried using the "
-                    "STANFORD_SEGMENTER environment variable)" % sihan_dir)
+                raise LookupError(
+                    "Could not find '%s' (tried using the "
+                    "STANFORD_SEGMENTER environment variable)" % sihan_dir
+                )
          else:
-            raise LookupError("Unsupported language '%'" % lang)
+            raise LookupError("Unsupported language {}".format(lang))
  
          try:
-            self._model = find_file(model, searchpath=search_path,
-                                    url=_stanford_url, verbose=False,
-                                    env_vars=('STANFORD_MODELS', 'STANFORD_SEGMENTER',))
+            self._model = find_file(
+                model,
+                searchpath=search_path,
+                url=_stanford_url,
+                verbose=False,
+                env_vars=("STANFORD_MODELS", "STANFORD_SEGMENTER"),
+            )
          except LookupError:
-            raise LookupError("Could not find '%s' (tried using env. "
-                "variables STANFORD_MODELS and <STANFORD_SEGMENTER>/data/)" % model)
+            raise LookupError(
+                "Could not find '%s' (tried using env. "
+                "variables STANFORD_MODELS and <STANFORD_SEGMENTER>/data/)" % model
+            )
  
      def tokenize(self, s):
          super().tokenize(s)
@@ -165,14 +204,24 @@ class StanfordSegmenter(TokenizerI):
          """
          cmd = [
              self._java_class,
-            '-loadClassifier', self._model,
-            '-keepAllWhitespaces', self._keep_whitespaces,
-            '-textFile', input_file_path
+            "-loadClassifier",
+            self._model,
+            "-keepAllWhitespaces",
+            self._keep_whitespaces,
+            "-textFile",
+            input_file_path,
          ]
          if self._sihan_corpora_dict is not None:
-            cmd.extend(['-serDictionary', self._dict,
-                        '-sighanCorporaDict', self._sihan_corpora_dict,
-                        '-sighanPostProcessing', self._sihan_post_processing])
+            cmd.extend(
+                [
+                    "-serDictionary",
+                    self._dict,
+                    "-sighanCorporaDict",
+                    self._sihan_corpora_dict,
+                    "-sighanPostProcessing",
+                    self._sihan_post_processing,
+                ]
+            )
  
          stdout = self._execute(cmd)
  
@@ -189,23 +238,33 @@ class StanfordSegmenter(TokenizerI):
          _input_fh, self._input_file_path = tempfile.mkstemp(text=True)
  
          # Write the actural sentences to the temporary input file
-        _input_fh = os.fdopen(_input_fh, 'wb')
-        _input = '\n'.join((' '.join(x) for x in sentences))
-        if isinstance(_input, text_type) and encoding:
+        _input_fh = os.fdopen(_input_fh, "wb")
+        _input = "\n".join((" ".join(x) for x in sentences))
+        if isinstance(_input, str) and encoding:
              _input = _input.encode(encoding)
          _input_fh.write(_input)
          _input_fh.close()
  
          cmd = [
              self._java_class,
-            '-loadClassifier', self._model,
-            '-keepAllWhitespaces', self._keep_whitespaces,
-            '-textFile', self._input_file_path
+            "-loadClassifier",
+            self._model,
+            "-keepAllWhitespaces",
+            self._keep_whitespaces,
+            "-textFile",
+            self._input_file_path,
          ]
          if self._sihan_corpora_dict is not None:
-            cmd.extend(['-serDictionary', self._dict,
-                        '-sighanCorporaDict', self._sihan_corpora_dict,
-                        '-sighanPostProcessing', self._sihan_post_processing])
+            cmd.extend(
+                [
+                    "-serDictionary",
+                    self._dict,
+                    "-sighanCorporaDict",
+                    self._sihan_corpora_dict,
+                    "-sighanPostProcessing",
+                    self._sihan_post_processing,
+                ]
+            )
  
          stdout = self._execute(cmd)
  
@@ -216,17 +275,19 @@ class StanfordSegmenter(TokenizerI):
  
      def _execute(self, cmd, verbose=False):
          encoding = self._encoding
-        cmd.extend(['-inputEncoding', encoding])
+        cmd.extend(["-inputEncoding", encoding])
          _options_cmd = self._options_cmd
          if _options_cmd:
-            cmd.extend(['-options', self._options_cmd])
+            cmd.extend(["-options", self._options_cmd])
  
-        default_options = ' '.join(_java_options)
+        default_options = " ".join(_java_options)
  
          # Configure java.
          config_java(options=self.java_options, verbose=verbose)
  
-        stdout, _stderr = java(cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE)
+        stdout, _stderr = java(
+            cmd, classpath=self._stanford_jar, stdout=PIPE, stderr=PIPE
+        )
          stdout = stdout.decode(encoding)
  
          # Return java configurations to their default values.
@@ -240,7 +301,9 @@ def setup_module(module):
  
      try:
          seg = StanfordSegmenter()
-        seg.default_config('ar')
-        seg.default_config('zh')
+        seg.default_config("ar")
+        seg.default_config("zh")
      except LookupError as e:
-        raise SkipTest('Tests for nltk.tokenize.stanford_segmenter skipped: %s' % str(e))
+        raise SkipTest(
+            "Tests for nltk.tokenize.stanford_segmenter skipped: %s" % str(e)
+        )
diff --git a/nlp_resource_data/nltk/tokenize/stanford_segmenter.pyc b/nlp_resource_data/nltk/tokenize/stanford_segmenter.pyc

deleted file mode 100755 (executable)

index c67cfdb..0000000

Binary files a/nlp_resource_data/nltk/tokenize/stanford_segmenter.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/texttiling.py b/nlp_resource_data/nltk/tokenize/texttiling.py

old mode 100755 (executable)

new mode 100644 (file)

index cf2e46e..dbcc980
--- a/nlp_resource_data/nltk/tokenize/texttiling.py
+++ b/nlp_resource_data/nltk/tokenize/texttiling.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: TextTiling
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: George Boutsioukis
  #
  # URL: <http://nltk.org/>
@@ -61,23 +61,25 @@ class TextTilingTokenizer(TokenizerI):
      [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]
      """
  
-    def __init__(self,
-                 w=20,
-                 k=10,
-                 similarity_method=BLOCK_COMPARISON,
-                 stopwords=None,
-                 smoothing_method=DEFAULT_SMOOTHING,
-                 smoothing_width=2,
-                 smoothing_rounds=1,
-                 cutoff_policy=HC,
-                 demo_mode=False):
-
+    def __init__(
+        self,
+        w=20,
+        k=10,
+        similarity_method=BLOCK_COMPARISON,
+        stopwords=None,
+        smoothing_method=DEFAULT_SMOOTHING,
+        smoothing_width=2,
+        smoothing_rounds=1,
+        cutoff_policy=HC,
+        demo_mode=False,
+    ):
  
          if stopwords is None:
              from nltk.corpus import stopwords
-            stopwords = stopwords.words('english')
+
+            stopwords = stopwords.words("english")
          self.__dict__.update(locals())
-        del self.__dict__['self']
+        del self.__dict__["self"]
  
      def tokenize(self, text):
          """Return a tokenized copy of *text*, where each "token" represents
@@ -90,8 +92,9 @@ class TextTilingTokenizer(TokenizerI):
          # Tokenization step starts here
  
          # Remove punctuation
-        nopunct_text = ''.join(c for c in lowercase_text
-                               if re.match("[a-z\-\' \n\t]", c))
+        nopunct_text = "".join(
+            c for c in lowercase_text if re.match("[a-z\-' \n\t]", c)
+        )
          nopunct_par_breaks = self._mark_paragraph_breaks(nopunct_text)
  
          tokseqs = self._divide_to_tokensequences(nopunct_text)
@@ -101,12 +104,13 @@ class TextTilingTokenizer(TokenizerI):
          # implementation states that it offers no benefit to the
          # process. It might be interesting to test the existing
          # stemmers though.
-        #words = _stem_words(words)
+        # words = _stem_words(words)
  
          # Filter stopwords
          for ts in tokseqs:
-            ts.wrdindex_list = [wi for wi in ts.wrdindex_list
-                                if wi[0] not in self.stopwords]
+            ts.wrdindex_list = [
+                wi for wi in ts.wrdindex_list if wi[0] not in self.stopwords
+            ]
  
          token_table = self._create_token_table(tokseqs, nopunct_par_breaks)
          # End of the Tokenization step
@@ -116,18 +120,26 @@ class TextTilingTokenizer(TokenizerI):
              gap_scores = self._block_comparison(tokseqs, token_table)
          elif self.similarity_method == VOCABULARY_INTRODUCTION:
              raise NotImplementedError("Vocabulary introduction not implemented")
+        else:
+            raise ValueError(
+                "Similarity method {} not recognized".format(self.similarity_method)
+            )
  
          if self.smoothing_method == DEFAULT_SMOOTHING:
              smooth_scores = self._smooth_scores(gap_scores)
+        else:
+            raise ValueError(
+                "Smoothing method {} not recognized".format(self.smoothing_method)
+            )
          # End of Lexical score Determination
  
          # Boundary identification
          depth_scores = self._depth_scores(smooth_scores)
          segment_boundaries = self._identify_boundaries(depth_scores)
  
-        normalized_boundaries = self._normalize_boundaries(text,
-                                                           segment_boundaries,
-                                                           paragraph_breaks)
+        normalized_boundaries = self._normalize_boundaries(
+            text, segment_boundaries, paragraph_breaks
+        )
          # End of Boundary Identification
          segmented_text = []
          prevb = 0
@@ -138,7 +150,7 @@ class TextTilingTokenizer(TokenizerI):
              segmented_text.append(text[prevb:b])
              prevb = b
  
-        if prevb < text_length: # append any text that may be remaining
+        if prevb < text_length:  # append any text that may be remaining
              segmented_text.append(text[prevb:])
  
          if not segmented_text:
@@ -149,41 +161,38 @@ class TextTilingTokenizer(TokenizerI):
          return segmented_text
  
      def _block_comparison(self, tokseqs, token_table):
-        "Implements the block comparison method"
+        """Implements the block comparison method"""
+
          def blk_frq(tok, block):
-            ts_occs = filter(lambda o: o[0] in block,
-                             token_table[tok].ts_occurences)
+            ts_occs = filter(lambda o: o[0] in block, token_table[tok].ts_occurences)
              freq = sum([tsocc[1] for tsocc in ts_occs])
              return freq
  
          gap_scores = []
-        numgaps = len(tokseqs)-1
+        numgaps = len(tokseqs) - 1
  
          for curr_gap in range(numgaps):
              score_dividend, score_divisor_b1, score_divisor_b2 = 0.0, 0.0, 0.0
              score = 0.0
-            #adjust window size for boundary conditions
-            if curr_gap < self.k-1:
+            # adjust window size for boundary conditions
+            if curr_gap < self.k - 1:
                  window_size = curr_gap + 1
-            elif curr_gap > numgaps-self.k:
+            elif curr_gap > numgaps - self.k:
                  window_size = numgaps - curr_gap
              else:
                  window_size = self.k
  
-            b1 = [ts.index
-                  for ts in tokseqs[curr_gap-window_size+1 : curr_gap+1]]
-            b2 = [ts.index
-                  for ts in tokseqs[curr_gap+1 : curr_gap+window_size+1]]
+            b1 = [ts.index for ts in tokseqs[curr_gap - window_size + 1 : curr_gap + 1]]
+            b2 = [ts.index for ts in tokseqs[curr_gap + 1 : curr_gap + window_size + 1]]
  
              for t in token_table:
-                score_dividend += blk_frq(t, b1)*blk_frq(t, b2)
-                score_divisor_b1 += blk_frq(t, b1)**2
-                score_divisor_b2 += blk_frq(t, b2)**2
+                score_dividend += blk_frq(t, b1) * blk_frq(t, b2)
+                score_divisor_b1 += blk_frq(t, b1) ** 2
+                score_divisor_b2 += blk_frq(t, b2) ** 2
              try:
-                score = score_dividend/math.sqrt(score_divisor_b1*
-                                                 score_divisor_b2)
+                score = score_dividend / math.sqrt(score_divisor_b1 * score_divisor_b2)
              except ZeroDivisionError:
-                pass # score += 0.0
+                pass  # score += 0.0
  
              gap_scores.append(score)
  
@@ -191,8 +200,9 @@ class TextTilingTokenizer(TokenizerI):
  
      def _smooth_scores(self, gap_scores):
          "Wraps the smooth function from the SciPy Cookbook"
-        return list(smooth(numpy.array(gap_scores[:]),
-                           window_len = self.smoothing_width+1))
+        return list(
+            smooth(numpy.array(gap_scores[:]), window_len=self.smoothing_width + 1)
+        )
  
      def _mark_paragraph_breaks(self, text):
          """Identifies indented text or line breaks as the beginning of
@@ -204,7 +214,7 @@ class TextTilingTokenizer(TokenizerI):
          last_break = 0
          pbreaks = [0]
          for pb in matches:
-            if pb.start()-last_break < MIN_PARAGRAPH:
+            if pb.start() - last_break < MIN_PARAGRAPH:
                  continue
              else:
                  pbreaks.append(pb.start())
@@ -219,8 +229,10 @@ class TextTilingTokenizer(TokenizerI):
          matches = re.finditer("\w+", text)
          for match in matches:
              wrdindex_list.append((match.group(), match.start()))
-        return [TokenSequence(i/w, wrdindex_list[i:i+w])
-                for i in range(0, len(wrdindex_list), w)]
+        return [
+            TokenSequence(i / w, wrdindex_list[i : i + w])
+            for i in range(0, len(wrdindex_list), w)
+        ]
  
      def _create_token_table(self, token_sequences, par_breaks):
          "Creates a table of TokenTableFields"
@@ -231,11 +243,11 @@ class TextTilingTokenizer(TokenizerI):
          current_par_break = next(pb_iter)
          if current_par_break == 0:
              try:
-                current_par_break = next(pb_iter) #skip break at 0
+                current_par_break = next(pb_iter)  # skip break at 0
              except StopIteration:
                  raise ValueError(
                      "No paragraph breaks were found(text too short perhaps?)"
-                    )
+                )
          for ts in token_sequences:
              for word, index in ts.wrdindex_list:
                  try:
@@ -243,7 +255,7 @@ class TextTilingTokenizer(TokenizerI):
                          current_par_break = next(pb_iter)
                          current_par += 1
                  except StopIteration:
-                    #hit bottom
+                    # hit bottom
                      pass
  
                  if word in token_table:
@@ -255,19 +267,18 @@ class TextTilingTokenizer(TokenizerI):
  
                      if token_table[word].last_tok_seq != current_tok_seq:
                          token_table[word].last_tok_seq = current_tok_seq
-                        token_table[word]\
-                                .ts_occurences.append([current_tok_seq,1])
+                        token_table[word].ts_occurences.append([current_tok_seq, 1])
                      else:
                          token_table[word].ts_occurences[-1][1] += 1
-                else: #new word
-                    token_table[word] = TokenTableField(first_pos=index,
-                                                        ts_occurences= \
-                                                          [[current_tok_seq,1]],
-                                                        total_count=1,
-                                                        par_count=1,
-                                                        last_par=current_par,
-                                                        last_tok_seq= \
-                                                          current_tok_seq)
+                else:  # new word
+                    token_table[word] = TokenTableField(
+                        first_pos=index,
+                        ts_occurences=[[current_tok_seq, 1]],
+                        total_count=1,
+                        par_count=1,
+                        last_par=current_par,
+                        last_tok_seq=current_tok_seq,
+                    )
  
              current_tok_seq += 1
  
@@ -279,24 +290,27 @@ class TextTilingTokenizer(TokenizerI):
  
          boundaries = [0 for x in depth_scores]
  
-        avg = sum(depth_scores)/len(depth_scores)
+        avg = sum(depth_scores) / len(depth_scores)
          stdev = numpy.std(depth_scores)
  
-        #SB: what is the purpose of this conditional?
+        # SB: what is the purpose of this conditional?
          if self.cutoff_policy == LC:
-            cutoff = avg-stdev/2.0
+            cutoff = avg - stdev / 2.0
          else:
-            cutoff = avg-stdev/2.0
+            cutoff = avg - stdev / 2.0
  
          depth_tuples = sorted(zip(depth_scores, range(len(depth_scores))))
          depth_tuples.reverse()
-        hp = list(filter(lambda x:x[0]>cutoff, depth_tuples))
+        hp = list(filter(lambda x: x[0] > cutoff, depth_tuples))
  
          for dt in hp:
              boundaries[dt[1]] = 1
-            for dt2 in hp: #undo if there is a boundary close already
-                if dt[1] != dt2[1] and abs(dt2[1]-dt[1]) < 4 \
-                       and boundaries[dt2[1]] == 1:
+            for dt2 in hp:  # undo if there is a boundary close already
+                if (
+                    dt[1] != dt2[1]
+                    and abs(dt2[1] - dt[1]) < 4
+                    and boundaries[dt2[1]] == 1
+                ):
                      boundaries[dt[1]] = 0
          return boundaries
  
@@ -305,9 +319,9 @@ class TextTilingTokenizer(TokenizerI):
          between the left and right peaks and the gap's score"""
  
          depth_scores = [0 for x in scores]
-        #clip boundaries: this holds on the rule of thumb(my thumb)
-        #that a section shouldn't be smaller than at least 2
-        #pseudosentences for small texts and around 5 for larger ones.
+        # clip boundaries: this holds on the rule of thumb(my thumb)
+        # that a section shouldn't be smaller than at least 2
+        # pseudosentences for small texts and around 5 for larger ones.
  
          clip = min(max(len(scores) // 10, 2), 5)
          index = clip
@@ -344,19 +358,20 @@ class TextTilingTokenizer(TokenizerI):
                  seen_word = False
                  word_count += 1
              if char not in " \t\n" and not seen_word:
-                seen_word=True
-            if gaps_seen < len(boundaries) and word_count > \
-                                               (max(gaps_seen*self.w, self.w)):
+                seen_word = True
+            if gaps_seen < len(boundaries) and word_count > (
+                max(gaps_seen * self.w, self.w)
+            ):
                  if boundaries[gaps_seen] == 1:
-                    #find closest paragraph break
+                    # find closest paragraph break
                      best_fit = len(text)
                      for br in paragraph_breaks:
-                        if best_fit > abs(br-char_count):
-                            best_fit = abs(br-char_count)
+                        if best_fit > abs(br - char_count):
+                            best_fit = abs(br - char_count)
                              bestbr = br
                          else:
                              break
-                    if bestbr not in norm_boundaries: #avoid duplicates
+                    if bestbr not in norm_boundaries:  # avoid duplicates
                          norm_boundaries.append(bestbr)
                  gaps_seen += 1
  
@@ -366,29 +381,31 @@ class TextTilingTokenizer(TokenizerI):
  class TokenTableField(object):
      """A field in the token table holding parameters for each token,
      used later in the process"""
-    def __init__(self,
-                 first_pos,
-                 ts_occurences,
-                 total_count=1,
-                 par_count=1,
-                 last_par=0,
-                 last_tok_seq=None):
+
+    def __init__(
+        self,
+        first_pos,
+        ts_occurences,
+        total_count=1,
+        par_count=1,
+        last_par=0,
+        last_tok_seq=None,
+    ):
          self.__dict__.update(locals())
-        del self.__dict__['self']
+        del self.__dict__["self"]
+
  
  class TokenSequence(object):
      "A token list with its original length and its index"
-    def __init__(self,
-                 index,
-                 wrdindex_list,
-                 original_length=None):
-        original_length=original_length or len(wrdindex_list)
+
+    def __init__(self, index, wrdindex_list, original_length=None):
+        original_length = original_length or len(wrdindex_list)
          self.__dict__.update(locals())
-        del self.__dict__['self']
+        del self.__dict__["self"]
  
  
-#Pasted from the SciPy cookbook: http://www.scipy.org/Cookbook/SignalSmooth
-def smooth(x,window_len=11,window='flat'):
+# Pasted from the SciPy cookbook: http://www.scipy.org/Cookbook/SignalSmooth
+def smooth(x, window_len=11, window="flat"):
      """smooth the data using a window with requested size.
  
      This method is based on the convolution of a scaled window with the signal.
@@ -424,27 +441,31 @@ def smooth(x,window_len=11,window='flat'):
      if window_len < 3:
          return x
  
-    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
-        raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
+    if window not in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
+        raise ValueError(
+            "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
+        )
  
-    s=numpy.r_[2*x[0]-x[window_len:1:-1],x,2*x[-1]-x[-1:-window_len:-1]]
+    s = numpy.r_[2 * x[0] - x[window_len:1:-1], x, 2 * x[-1] - x[-1:-window_len:-1]]
  
-    #print(len(s))
-    if window == 'flat': #moving average
-        w = numpy.ones(window_len,'d')
+    # print(len(s))
+    if window == "flat":  # moving average
+        w = numpy.ones(window_len, "d")
      else:
-        w = eval('numpy.' + window + '(window_len)')
+        w = eval("numpy." + window + "(window_len)")
  
-    y = numpy.convolve(w/w.sum(), s, mode='same')
+    y = numpy.convolve(w / w.sum(), s, mode="same")
  
-    return y[window_len-1:-window_len+1]
+    return y[window_len - 1 : -window_len + 1]
  
  
  def demo(text=None):
      from nltk.corpus import brown
      from matplotlib import pylab
+
      tt = TextTilingTokenizer(demo_mode=True)
-    if text is None: text = brown.raw()[:10000]
+    if text is None:
+        text = brown.raw()[:10000]
      s, ss, d, b = tt.tokenize(text)
      pylab.xlabel("Sentence Gap index")
      pylab.ylabel("Gap Scores")
@@ -454,5 +475,3 @@ def demo(text=None):
      pylab.stem(range(len(b)), b)
      pylab.legend()
      pylab.show()
-
-
diff --git a/nlp_resource_data/nltk/tokenize/texttiling.pyc b/nlp_resource_data/nltk/tokenize/texttiling.pyc

deleted file mode 100755 (executable)

index 025f18a..0000000

Binary files a/nlp_resource_data/nltk/tokenize/texttiling.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/toktok.py b/nlp_resource_data/nltk/tokenize/toktok.py

old mode 100755 (executable)

new mode 100644 (file)

index 3c46373..0c595b2
--- a/nlp_resource_data/nltk/tokenize/toktok.py
+++ b/nlp_resource_data/nltk/tokenize/toktok.py
@@ -9,34 +9,34 @@
  # For license information, see LICENSE.TXT
  
  """
-The tok-tok tokenizer is a simple, general tokenizer, where the input has one 
+The tok-tok tokenizer is a simple, general tokenizer, where the input has one
  sentence per line; thus only final period is tokenized.
  
-Tok-tok has been tested on, and gives reasonably good results for English, 
-Persian, Russian, Czech, French, German, Vietnamese, Tajik, and a few others. 
+Tok-tok has been tested on, and gives reasonably good results for English,
+Persian, Russian, Czech, French, German, Vietnamese, Tajik, and a few others.
  The input should be in UTF-8 encoding.
  
  Reference:
-Jon Dehdari. 2014. A Neurophysiologically-Inspired Statistical Language 
-Model (Doctoral dissertation). Columbus, OH, USA: The Ohio State University. 
+Jon Dehdari. 2014. A Neurophysiologically-Inspired Statistical Language
+Model (Doctoral dissertation). Columbus, OH, USA: The Ohio State University.
  """
  
  import re
-from six import text_type
  
  from nltk.tokenize.api import TokenizerI
  
+
  class ToktokTokenizer(TokenizerI):
      """
      This is a Python port of the tok-tok.pl from
      https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl
-    
+
      >>> toktok = ToktokTokenizer()
      >>> text = u'Is 9.5 or 525,600 my favorite number?'
-    >>> print (toktok.tokenize(text, return_str=True))
+    >>> print(toktok.tokenize(text, return_str=True))
      Is 9.5 or 525,600 my favorite number ?
      >>> text = u'The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things'
-    >>> print (toktok.tokenize(text, return_str=True))
+    >>> print(toktok.tokenize(text, return_str=True))
      The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things
      >>> text = u'\xa1This, is a sentence with weird\xbb symbols\u2026 appearing everywhere\xbf'
      >>> expected = u'\xa1 This , is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf'
@@ -44,112 +44,137 @@ class ToktokTokenizer(TokenizerI):
      >>> toktok.tokenize(text) == [u'\xa1', u'This', u',', u'is', u'a', u'sentence', u'with', u'weird', u'\xbb', u'symbols', u'\u2026', u'appearing', u'everywhere', u'\xbf']
      True
      """
+
      # Replace non-breaking spaces with normal spaces.
      NON_BREAKING = re.compile(u"\u00A0"), " "
-    
+
      # Pad some funky punctuation.
      FUNKY_PUNCT_1 = re.compile(u'([،;؛¿!"\])}»›”؟¡%٪°±©®।॥…])'), r" \1 "
      # Pad more funky punctuation.
-    FUNKY_PUNCT_2 = re.compile(u'([({\[“‘„‚«‹「『])'), r" \1 "
+    FUNKY_PUNCT_2 = re.compile(u"([({\[“‘„‚«‹「『])"), r" \1 "
      # Pad En dash and em dash
-    EN_EM_DASHES = re.compile(u'([–—])'), r" \1 "
-    
+    EN_EM_DASHES = re.compile(u"([–—])"), r" \1 "
+
      # Replace problematic character with numeric character reference.
-    AMPERCENT = re.compile('& '), '&amp; '
-    TAB = re.compile('\t'), ' &#9; '
-    PIPE = re.compile('\|'), ' &#124; '
-    
-    # Pad numbers with commas to keep them from further tokenization. 
-    COMMA_IN_NUM = re.compile(r'(?<!,)([,،])(?![,\d])'), r' \1 '
-    
+    AMPERCENT = re.compile("& "), "&amp; "
+    TAB = re.compile("\t"), " &#9; "
+    PIPE = re.compile("\|"), " &#124; "
+
+    # Pad numbers with commas to keep them from further tokenization.
+    COMMA_IN_NUM = re.compile(r"(?<!,)([,،])(?![,\d])"), r" \1 "
+
      # Just pad problematic (often neurotic) hyphen/single quote, etc.
-    PROB_SINGLE_QUOTES = re.compile(r"(['’`])"), r' \1 '
+    PROB_SINGLE_QUOTES = re.compile(r"(['’`])"), r" \1 "
      # Group ` ` stupid quotes ' ' into a single token.
      STUPID_QUOTES_1 = re.compile(r" ` ` "), r" `` "
      STUPID_QUOTES_2 = re.compile(r" ' ' "), r" '' "
-    
-    # Don't tokenize period unless it ends the line and that it isn't 
-    # preceded by another period, e.g.  
-    # "something ..." -> "something ..." 
-    # "something." -> "something ." 
+
+    # Don't tokenize period unless it ends the line and that it isn't
+    # preceded by another period, e.g.
+    # "something ..." -> "something ..."
+    # "something." -> "something ."
      FINAL_PERIOD_1 = re.compile(r"(?<!\.)\.$"), r" ."
-    # Don't tokenize period unless it ends the line eg. 
+    # Don't tokenize period unless it ends the line eg.
      # " ... stuff." ->  "... stuff ."
      FINAL_PERIOD_2 = re.compile(r"""(?<!\.)\.\s*(["'’»›”]) *$"""), r" . \1"
  
      # Treat continuous commas as fake German,Czech, etc.: „
-    MULTI_COMMAS = re.compile(r'(,{2,})'), r' \1 '
+    MULTI_COMMAS = re.compile(r"(,{2,})"), r" \1 "
      # Treat continuous dashes as fake en-dash, etc.
-    MULTI_DASHES = re.compile(r'(-{2,})'), r' \1 '
+    MULTI_DASHES = re.compile(r"(-{2,})"), r" \1 "
      # Treat multiple periods as a thing (eg. ellipsis)
-    MULTI_DOTS = re.compile(r'(\.{2,})'), r' \1 '
+    MULTI_DOTS = re.compile(r"(\.{2,})"), r" \1 "
  
      # This is the \p{Open_Punctuation} from Perl's perluniprops
      # see http://perldoc.perl.org/perluniprops.html
-    OPEN_PUNCT = text_type(u'([{\u0f3a\u0f3c\u169b\u201a\u201e\u2045\u207d'
-                            u'\u208d\u2329\u2768\u276a\u276c\u276e\u2770\u2772'
-                            u'\u2774\u27c5\u27e6\u27e8\u27ea\u27ec\u27ee\u2983'
-                            u'\u2985\u2987\u2989\u298b\u298d\u298f\u2991\u2993'
-                            u'\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26'
-                            u'\u2e28\u3008\u300a\u300c\u300e\u3010\u3014\u3016'
-                            u'\u3018\u301a\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39'
-                            u'\ufe3b\ufe3d\ufe3f\ufe41\ufe43\ufe47\ufe59\ufe5b'
-                            u'\ufe5d\uff08\uff3b\uff5b\uff5f\uff62')
+    OPEN_PUNCT = str(
+        u"([{\u0f3a\u0f3c\u169b\u201a\u201e\u2045\u207d"
+        u"\u208d\u2329\u2768\u276a\u276c\u276e\u2770\u2772"
+        u"\u2774\u27c5\u27e6\u27e8\u27ea\u27ec\u27ee\u2983"
+        u"\u2985\u2987\u2989\u298b\u298d\u298f\u2991\u2993"
+        u"\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26"
+        u"\u2e28\u3008\u300a\u300c\u300e\u3010\u3014\u3016"
+        u"\u3018\u301a\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39"
+        u"\ufe3b\ufe3d\ufe3f\ufe41\ufe43\ufe47\ufe59\ufe5b"
+        u"\ufe5d\uff08\uff3b\uff5b\uff5f\uff62"
+    )
      # This is the \p{Close_Punctuation} from Perl's perluniprops
-    CLOSE_PUNCT = text_type(u')]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a'
-                            u'\u2769\u276b\u276d\u276f\u2771\u2773\u2775\u27c6'
-                            u'\u27e7\u27e9\u27eb\u27ed\u27ef\u2984\u2986\u2988'
-                            u'\u298a\u298c\u298e\u2990\u2992\u2994\u2996\u2998'
-                            u'\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29\u3009'
-                            u'\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b'
-                            u'\u301e\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c'
-                            u'\ufe3e\ufe40\ufe42\ufe44\ufe48\ufe5a\ufe5c\ufe5e'
-                            u'\uff09\uff3d\uff5d\uff60\uff63')
+    CLOSE_PUNCT = str(
+        u")]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a"
+        u"\u2769\u276b\u276d\u276f\u2771\u2773\u2775\u27c6"
+        u"\u27e7\u27e9\u27eb\u27ed\u27ef\u2984\u2986\u2988"
+        u"\u298a\u298c\u298e\u2990\u2992\u2994\u2996\u2998"
+        u"\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29\u3009"
+        u"\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b"
+        u"\u301e\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c"
+        u"\ufe3e\ufe40\ufe42\ufe44\ufe48\ufe5a\ufe5c\ufe5e"
+        u"\uff09\uff3d\uff5d\uff60\uff63"
+    )
      # This is the \p{Close_Punctuation} from Perl's perluniprops
-    CURRENCY_SYM = text_type(u'$\xa2\xa3\xa4\xa5\u058f\u060b\u09f2\u09f3\u09fb'
-                             u'\u0af1\u0bf9\u0e3f\u17db\u20a0\u20a1\u20a2\u20a3'
-                             u'\u20a4\u20a5\u20a6\u20a7\u20a8\u20a9\u20aa\u20ab'
-                             u'\u20ac\u20ad\u20ae\u20af\u20b0\u20b1\u20b2\u20b3'
-                             u'\u20b4\u20b5\u20b6\u20b7\u20b8\u20b9\u20ba\ua838'
-                             u'\ufdfc\ufe69\uff04\uffe0\uffe1\uffe5\uffe6')
-    
+    CURRENCY_SYM = str(
+        u"$\xa2\xa3\xa4\xa5\u058f\u060b\u09f2\u09f3\u09fb"
+        u"\u0af1\u0bf9\u0e3f\u17db\u20a0\u20a1\u20a2\u20a3"
+        u"\u20a4\u20a5\u20a6\u20a7\u20a8\u20a9\u20aa\u20ab"
+        u"\u20ac\u20ad\u20ae\u20af\u20b0\u20b1\u20b2\u20b3"
+        u"\u20b4\u20b5\u20b6\u20b7\u20b8\u20b9\u20ba\ua838"
+        u"\ufdfc\ufe69\uff04\uffe0\uffe1\uffe5\uffe6"
+    )
+
      # Pad spaces after opening punctuations.
-    OPEN_PUNCT_RE = re.compile(u'([{}])'.format(OPEN_PUNCT)), r'\1 '
+    OPEN_PUNCT_RE = re.compile(u"([{}])".format(OPEN_PUNCT)), r"\1 "
      # Pad spaces before closing punctuations.
-    CLOSE_PUNCT_RE = re.compile(u'([{}])'.format(CLOSE_PUNCT)), r'\1 '
+    CLOSE_PUNCT_RE = re.compile(u"([{}])".format(CLOSE_PUNCT)), r"\1 "
      # Pad spaces after currency symbols.
-    CURRENCY_SYM_RE = re.compile(u'([{}])'.format(CURRENCY_SYM)), r'\1 '
-    
+    CURRENCY_SYM_RE = re.compile(u"([{}])".format(CURRENCY_SYM)), r"\1 "
+
      # Use for tokenizing URL-unfriendly characters: [:/?#]
-    URL_FOE_1 = re.compile(r':(?!//)'), r' : ' # in perl s{:(?!//)}{ : }g;
-    URL_FOE_2 = re.compile(r'\?(?!\S)'), r' ? ' # in perl s{\?(?!\S)}{ ? }g;
+    URL_FOE_1 = re.compile(r":(?!//)"), r" : "  # in perl s{:(?!//)}{ : }g;
+    URL_FOE_2 = re.compile(r"\?(?!\S)"), r" ? "  # in perl s{\?(?!\S)}{ ? }g;
      # in perl: m{://} or m{\S+\.\S+/\S+} or s{/}{ / }g;
-    URL_FOE_3 = re.compile(r'(:\/\/)[\S+\.\S+\/\S+][\/]'), ' / '
-    URL_FOE_4 = re.compile(r' /'), r' / ' # s{ /}{ / }g;
-    
+    URL_FOE_3 = re.compile(r"(:\/\/)[\S+\.\S+\/\S+][\/]"), " / "
+    URL_FOE_4 = re.compile(r" /"), r" / "  # s{ /}{ / }g;
+
      # Left/Right strip, i.e. remove heading/trailing spaces.
      # These strip regexes should NOT be used,
-    # instead use str.lstrip(), str.rstrip() or str.strip() 
-    # (They are kept for reference purposes to the original toktok.pl code)  
-    LSTRIP = re.compile(r'^ +'), ''
-    RSTRIP = re.compile(r'\s+$'),'\n' 
+    # instead use str.lstrip(), str.rstrip() or str.strip()
+    # (They are kept for reference purposes to the original toktok.pl code)
+    LSTRIP = re.compile(r"^ +"), ""
+    RSTRIP = re.compile(r"\s+$"), "\n"
      # Merge multiple spaces.
-    ONE_SPACE = re.compile(r' {2,}'), ' '
-    
-    TOKTOK_REGEXES = [NON_BREAKING, FUNKY_PUNCT_1, 
-                      URL_FOE_1, URL_FOE_2, URL_FOE_3, URL_FOE_4,
-                      AMPERCENT, TAB, PIPE,
-                      OPEN_PUNCT_RE, CLOSE_PUNCT_RE, 
-                      MULTI_COMMAS, COMMA_IN_NUM, FINAL_PERIOD_2,
-                      PROB_SINGLE_QUOTES, STUPID_QUOTES_1, STUPID_QUOTES_2,
-                      CURRENCY_SYM_RE, EN_EM_DASHES, MULTI_DASHES, MULTI_DOTS,
-                      FINAL_PERIOD_1, FINAL_PERIOD_2, ONE_SPACE]
-    
+    ONE_SPACE = re.compile(r" {2,}"), " "
+
+    TOKTOK_REGEXES = [
+        NON_BREAKING,
+        FUNKY_PUNCT_1,
+        URL_FOE_1,
+        URL_FOE_2,
+        URL_FOE_3,
+        URL_FOE_4,
+        AMPERCENT,
+        TAB,
+        PIPE,
+        OPEN_PUNCT_RE,
+        CLOSE_PUNCT_RE,
+        MULTI_COMMAS,
+        COMMA_IN_NUM,
+        FINAL_PERIOD_2,
+        PROB_SINGLE_QUOTES,
+        STUPID_QUOTES_1,
+        STUPID_QUOTES_2,
+        CURRENCY_SYM_RE,
+        EN_EM_DASHES,
+        MULTI_DASHES,
+        MULTI_DOTS,
+        FINAL_PERIOD_1,
+        FINAL_PERIOD_2,
+        ONE_SPACE,
+    ]
+
      def tokenize(self, text, return_str=False):
-        text = text_type(text) # Converts input string into unicode.
+        text = str(text)  # Converts input string into unicode.
          for regexp, subsitution in self.TOKTOK_REGEXES:
              text = regexp.sub(subsitution, text)
          # Finally, strips heading and trailing spaces
          # and converts output string into unicode.
-        text = text_type(text.strip()) 
-        return text if return_str else text.split()
-\ No newline at end of file
+        text = str(text.strip())
+        return text if return_str else text.split()
diff --git a/nlp_resource_data/nltk/tokenize/toktok.pyc b/nlp_resource_data/nltk/tokenize/toktok.pyc

deleted file mode 100755 (executable)

index 1ff7f90..0000000

Binary files a/nlp_resource_data/nltk/tokenize/toktok.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/treebank.py b/nlp_resource_data/nltk/tokenize/treebank.py

old mode 100755 (executable)

new mode 100644 (file)

index f3ae637..593ff05
--- a/nlp_resource_data/nltk/tokenize/treebank.py
+++ b/nlp_resource_data/nltk/tokenize/treebank.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: Tokenizers
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Michael Heilman <mheilman@cmu.edu> (re-port from http://www.cis.upenn.edu/~treebank/tokenizer.sed)
  #
@@ -19,23 +19,7 @@ and available at http://www.cis.upenn.edu/~treebank/tokenizer.sed.
  import re
  from nltk.tokenize.api import TokenizerI
  from nltk.tokenize.util import align_tokens
-
-
-class MacIntyreContractions:
-    """
-    List of contractions adapted from Robert MacIntyre's tokenizer.
-    """
-    CONTRACTIONS2 = [r"(?i)\b(can)(?#X)(not)\b",
-                     r"(?i)\b(d)(?#X)('ye)\b",
-                     r"(?i)\b(gim)(?#X)(me)\b",
-                     r"(?i)\b(gon)(?#X)(na)\b",
-                     r"(?i)\b(got)(?#X)(ta)\b",
-                     r"(?i)\b(lem)(?#X)(me)\b",
-                     r"(?i)\b(mor)(?#X)('n)\b",
-                     r"(?i)\b(wan)(?#X)(na)\s"]
-    CONTRACTIONS3 = [r"(?i) ('t)(?#X)(is)\b", r"(?i) ('t)(?#X)(was)\b"]
-    CONTRACTIONS4 = [r"(?i)\b(whad)(dd)(ya)\b",
-                     r"(?i)\b(wha)(t)(cha)\b"]
+from nltk.tokenize.destructive import MacIntyreContractions
  
  
  class TreebankWordTokenizer(TokenizerI):
@@ -63,41 +47,46 @@ class TreebankWordTokenizer(TokenizerI):
          ['hi', ',', 'my', 'name', 'ca', "n't", 'hello', ',']
      """
  
-    #starting quotes
+    # starting quotes
      STARTING_QUOTES = [
-        (re.compile(r'^\"'), r'``'),
-        (re.compile(r'(``)'), r' \1 '),
-        (re.compile(r'([ (\[{<])"'), r'\1 `` '),
+        (re.compile(r"^\""), r"``"),
+        (re.compile(r"(``)"), r" \1 "),
+        (re.compile(r"([ \(\[{<])(\"|\'{2})"), r"\1 `` "),
      ]
  
-    #punctuation
+    # punctuation
      PUNCTUATION = [
-        (re.compile(r'([:,])([^\d])'), r' \1 \2'),
-        (re.compile(r'([:,])$'), r' \1 '),
-        (re.compile(r'\.\.\.'), r' ... '),
-        (re.compile(r'[;@#$%&]'), r' \g<0> '),
-        (re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'), r'\1 \2\3 '), # Handles the final period.
-        (re.compile(r'[?!]'), r' \g<0> '),
-
+        (re.compile(r"([:,])([^\d])"), r" \1 \2"),
+        (re.compile(r"([:,])$"), r" \1 "),
+        (re.compile(r"\.\.\."), r" ... "),
+        (re.compile(r"[;@#$%&]"), r" \g<0> "),
+        (
+            re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'),
+            r"\1 \2\3 ",
+        ),  # Handles the final period.
+        (re.compile(r"[?!]"), r" \g<0> "),
          (re.compile(r"([^'])' "), r"\1 ' "),
      ]
  
      # Pads parentheses
-    PARENS_BRACKETS = (re.compile(r'[\]\[\(\)\{\}\<\>]'), r' \g<0> ')
+    PARENS_BRACKETS = (re.compile(r"[\]\[\(\)\{\}\<\>]"), r" \g<0> ")
  
      # Optionally: Convert parentheses, brackets and converts them to PTB symbols.
      CONVERT_PARENTHESES = [
-        (re.compile(r'\('), '-LRB-'), (re.compile(r'\)'), '-RRB-'),
-        (re.compile(r'\['), '-LSB-'), (re.compile(r'\]'), '-RSB-'),
-        (re.compile(r'\{'), '-LCB-'), (re.compile(r'\}'), '-RCB-')
+        (re.compile(r"\("), "-LRB-"),
+        (re.compile(r"\)"), "-RRB-"),
+        (re.compile(r"\["), "-LSB-"),
+        (re.compile(r"\]"), "-RSB-"),
+        (re.compile(r"\{"), "-LCB-"),
+        (re.compile(r"\}"), "-RCB-"),
      ]
  
-    DOUBLE_DASHES = (re.compile(r'--'), r' -- ')
+    DOUBLE_DASHES = (re.compile(r"--"), r" -- ")
  
-    #ending quotes
+    # ending quotes
      ENDING_QUOTES = [
          (re.compile(r'"'), " '' "),
-        (re.compile(r'(\S)(\'\')'), r'\1 \2 '),
+        (re.compile(r"(\S)(\'\')"), r"\1 \2 "),
          (re.compile(r"([^' ])('[sS]|'[mM]|'[dD]|') "), r"\1 \2 "),
          (re.compile(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1 \2 "),
      ]
@@ -126,16 +115,16 @@ class TreebankWordTokenizer(TokenizerI):
          regexp, substitution = self.DOUBLE_DASHES
          text = regexp.sub(substitution, text)
  
-        #add extra space to make things easier
+        # add extra space to make things easier
          text = " " + text + " "
  
          for regexp, substitution in self.ENDING_QUOTES:
              text = regexp.sub(substitution, text)
  
          for regexp in self.CONTRACTIONS2:
-            text = regexp.sub(r' \1 \2 ', text)
+            text = regexp.sub(r" \1 \2 ", text)
          for regexp in self.CONTRACTIONS3:
-            text = regexp.sub(r' \1 \2 ', text)
+            text = regexp.sub(r" \1 \2 ", text)
  
          # We are not using CONTRACTIONS4 since
          # they are also commented out in the SED scripts
@@ -154,7 +143,7 @@ class TreebankWordTokenizer(TokenizerI):
              ... (24, 26), (27, 30), (31, 32), (32, 36), (36, 37), (37, 38),
              ... (40, 46), (47, 48), (48, 51), (51, 52), (53, 55), (56, 59),
              ... (60, 62), (63, 68), (69, 70), (70, 76), (76, 77), (77, 78)]
-            >>> TreebankWordTokenizer().span_tokenize(s) == expected
+            >>> list(TreebankWordTokenizer().span_tokenize(s)) == expected
              True
              >>> expected = ['Good', 'muffins', 'cost', '$', '3.88', 'in',
              ... 'New', '(', 'York', ')', '.', 'Please', '(', 'buy', ')',
@@ -162,21 +151,43 @@ class TreebankWordTokenizer(TokenizerI):
              >>> [s[start:end] for start, end in TreebankWordTokenizer().span_tokenize(s)] == expected
              True
  
+            Additional example
+            >>> from nltk.tokenize import TreebankWordTokenizer
+            >>> s = '''I said, "I'd like to buy some ''good muffins" which cost $3.88\\n each in New (York)."'''
+            >>> expected = [(0, 1), (2, 6), (6, 7), (8, 9), (9, 10), (10, 12),
+            ... (13, 17), (18, 20), (21, 24), (25, 29), (30, 32), (32, 36),
+            ... (37, 44), (44, 45), (46, 51), (52, 56), (57, 58), (58, 62),
+            ... (64, 68), (69, 71), (72, 75), (76, 77), (77, 81), (81, 82),
+            ... (82, 83), (83, 84)]
+            >>> list(TreebankWordTokenizer().span_tokenize(s)) == expected
+            True
+            >>> expected = ['I', 'said', ',', '"', 'I', "'d", 'like', 'to',
+            ... 'buy', 'some', "''", "good", 'muffins', '"', 'which', 'cost',
+            ... '$', '3.88', 'each', 'in', 'New', '(', 'York', ')', '.', '"']
+            >>> [s[start:end] for start, end in TreebankWordTokenizer().span_tokenize(s)] == expected
+            True
+
          """
          raw_tokens = self.tokenize(text)
  
          # Convert converted quotes back to original double quotes
-        # Do this only if original text contains double quote(s)
-        if '"' in text:
+        # Do this only if original text contains double quote(s) or double
+        # single-quotes (because '' might be transformed to `` if it is
+        # treated as starting quotes).
+        if ('"' in text) or ("''" in text):
              # Find double quotes and converted quotes
-            matched = [m.group() for m in re.finditer(r'[(``)(\'\')(")]+', text)]
-            
+            matched = [m.group() for m in re.finditer(r"``|'{2}|\"", text)]
+
              # Replace converted quotes back to double quotes
-            tokens = [matched.pop(0) if tok in ['"', "``", "''"] else tok for tok in raw_tokens]
+            tokens = [
+                matched.pop(0) if tok in ['"', "``", "''"] else tok
+                for tok in raw_tokens
+            ]
          else:
              tokens = raw_tokens
  
-        return align_tokens(tokens, text)
+        for tok in align_tokens(tokens, text):
+            yield tok
  
  
  class TreebankWordDetokenizer(TokenizerI):
@@ -216,7 +227,7 @@ class TreebankWordDetokenizer(TokenizerI):
      True
  
      During tokenization it's safe to add more spaces but during detokenization,
-    simply undoing the padding doesn't really help. 
+    simply undoing the padding doesn't really help.
  
      - During tokenization, left and right pad is added to [!?], when
        detokenizing, only left shift the [!?] is needed.
@@ -238,76 +249,90 @@ class TreebankWordDetokenizer(TokenizerI):
      >>> twd.detokenize(toks)
      "hello, i can't feel; my feet! Help!! He said: Help, help?!"
      """
+
      _contractions = MacIntyreContractions()
-    CONTRACTIONS2 = [re.compile(pattern.replace('(?#X)', '\s'))
-                    for pattern in _contractions.CONTRACTIONS2]
-    CONTRACTIONS3 = [re.compile(pattern.replace('(?#X)', '\s'))
-                    for pattern in _contractions.CONTRACTIONS3]
+    CONTRACTIONS2 = [
+        re.compile(pattern.replace("(?#X)", "\s"))
+        for pattern in _contractions.CONTRACTIONS2
+    ]
+    CONTRACTIONS3 = [
+        re.compile(pattern.replace("(?#X)", "\s"))
+        for pattern in _contractions.CONTRACTIONS3
+    ]
  
-    #ending quotes
+    # ending quotes
      ENDING_QUOTES = [
          (re.compile(r"([^' ])\s('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1\2 "),
          (re.compile(r"([^' ])\s('[sS]|'[mM]|'[dD]|') "), r"\1\2 "),
-        (re.compile(r'(\S)(\'\')'), r'\1\2 '),
-        (re.compile(r" '' "), '"')
-        ]
+        (re.compile(r"(\S)(\'\')"), r"\1\2 "),
+        (re.compile(r" '' "), '"'),
+    ]
  
      # Handles double dashes
-    DOUBLE_DASHES = (re.compile(r' -- '), r'--')
+    DOUBLE_DASHES = (re.compile(r" -- "), r"--")
  
      # Optionally: Convert parentheses, brackets and converts them from PTB symbols.
      CONVERT_PARENTHESES = [
-        (re.compile('-LRB-'), '('), (re.compile('-RRB-'), ')'),
-        (re.compile('-LSB-'), '['), (re.compile('-RSB-'), ']'),
-        (re.compile('-LCB-'), '{'), (re.compile('-RCB-'), '}')
+        (re.compile("-LRB-"), "("),
+        (re.compile("-RRB-"), ")"),
+        (re.compile("-LSB-"), "["),
+        (re.compile("-RSB-"), "]"),
+        (re.compile("-LCB-"), "{"),
+        (re.compile("-RCB-"), "}"),
      ]
  
      # Undo padding on parentheses.
-    PARENS_BRACKETS = [(re.compile(r'\s([\[\(\{\<])\s'), r' \g<1>'),
-                       (re.compile(r'\s([\]\)\}\>])\s'), r'\g<1> '),
-                       (re.compile(r'([\]\)\}\>])\s([:;,.])'), r'\1\2')]
+    PARENS_BRACKETS = [
+        (re.compile(r"\s([\[\(\{\<])\s"), r" \g<1>"),
+        (re.compile(r"\s([\]\)\}\>])\s"), r"\g<1> "),
+        (re.compile(r"([\]\)\}\>])\s([:;,.])"), r"\1\2"),
+    ]
  
-    #punctuation
+    # punctuation
      PUNCTUATION = [
          (re.compile(r"([^'])\s'\s"), r"\1' "),
-        (re.compile(r'\s([?!])'), r'\g<1>'), # Strip left pad for [?!]
-        #(re.compile(r'\s([?!])\s'), r'\g<1>'),
-        (re.compile(r'([^\.])\s(\.)([\]\)}>"\']*)\s*$'), r'\1\2\3'),
+        (re.compile(r"\s([?!])"), r"\g<1>"),  # Strip left pad for [?!]
+        # (re.compile(r'\s([?!])\s'), r'\g<1>'),
+        (re.compile(r'([^\.])\s(\.)([\]\)}>"\']*)\s*$'), r"\1\2\3"),
          # When tokenizing, [;@#$%&] are padded with whitespace regardless of
          # whether there are spaces before or after them.
          # But during detokenization, we need to distinguish between left/right
          # pad, so we split this up.
-        (re.compile(r'\s([#$])\s'), r' \g<1>'), # Left pad.
-        (re.compile(r'\s([;%])\s'), r'\g<1> '), # Right pad.
-        (re.compile(r'\s([&])\s'), r' \g<1> '), # Unknown pad.
-        (re.compile(r'\s\.\.\.\s'), r'...'),
-        (re.compile(r'\s([:,])\s$'), r'\1'),
-        (re.compile(r'\s([:,])\s([^\d])'), r'\1 \2') # Keep right pad after comma/colon before non-digits.
-        #(re.compile(r'\s([:,])\s([^\d])'), r'\1\2')
-        ]
-
-    #starting quotes
+        (re.compile(r"\s([#$])\s"), r" \g<1>"),  # Left pad.
+        (re.compile(r"\s([;%])\s"), r"\g<1> "),  # Right pad.
+        (re.compile(r"\s([&*])\s"), r" \g<1> "),  # Unknown pad.
+        (re.compile(r"\s\.\.\.\s"), r"..."),
+        (re.compile(r"\s([:,])\s$"), r"\1"),
+        (
+            re.compile(r"\s([:,])\s([^\d])"),
+            r"\1 \2",
+        )  # Keep right pad after comma/colon before non-digits.
+        # (re.compile(r'\s([:,])\s([^\d])'), r'\1\2')
+    ]
+
+    # starting quotes
      STARTING_QUOTES = [
-        (re.compile(r'([ (\[{<])\s``'), r'\1"'),
-        (re.compile(r'\s(``)\s'), r'\1'),
-        (re.compile(r'^``'), r'\"'),
+        (re.compile(r"([ (\[{<])\s``"), r'\1"'),
+        (re.compile(r"\s(``)\s"), r"\1"),
+        (re.compile(r"^``"), r"\""),
      ]
  
      def tokenize(self, tokens, convert_parentheses=False):
          """
-        Python port of the Moses detokenizer.
+        Treebank detokenizer, created by undoing the regexes from
+        the TreebankWordTokenizer.tokenize.
  
          :param tokens: A list of strings, i.e. tokenized text.
          :type tokens: list(str)
          :return: str
          """
-        text = ' '.join(tokens)
+        text = " ".join(tokens)
          # Reverse the contractions regexes.
          # Note: CONTRACTIONS4 are not used in tokenization.
          for regexp in self.CONTRACTIONS3:
-            text = regexp.sub(r'\1\2', text)
+            text = regexp.sub(r"\1\2", text)
          for regexp in self.CONTRACTIONS2:
-            text = regexp.sub(r'\1\2', text)
+            text = regexp.sub(r"\1\2", text)
  
          # Reverse the regexes applied for ending quotes.
          for regexp, substitution in self.ENDING_QUOTES:
diff --git a/nlp_resource_data/nltk/tokenize/treebank.pyc b/nlp_resource_data/nltk/tokenize/treebank.pyc

deleted file mode 100755 (executable)

index 8ae93b3..0000000

Binary files a/nlp_resource_data/nltk/tokenize/treebank.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tokenize/util.py b/nlp_resource_data/nltk/tokenize/util.py

old mode 100755 (executable)

new mode 100644 (file)

index f19894b..be7c12b
--- a/nlp_resource_data/nltk/tokenize/util.py
+++ b/nlp_resource_data/nltk/tokenize/util.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Tokenizer Utilities
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.sourceforge.net>
  # For license information, see LICENSE.TXT
@@ -9,6 +9,7 @@
  from re import finditer
  from xml.sax.saxutils import escape, unescape
  
+
  def string_span_tokenize(s, sep):
      r"""
      Return the offsets of the tokens in *s*, as a sequence of ``(start, end)``
@@ -42,6 +43,7 @@ def string_span_tokenize(s, sep):
  
          left = right + len(sep)
  
+
  def regexp_span_tokenize(s, regexp):
      r"""
      Return the offsets of the tokens in *s*, as a sequence of ``(start, end)``
@@ -68,6 +70,7 @@ def regexp_span_tokenize(s, regexp):
          left = next
      yield left, len(s)
  
+
  def spans_to_relative(spans):
      r"""
      Return a sequence of relative spans, given a sequence of spans.
@@ -98,8 +101,9 @@ class CJKChars(object):
      This is a Python port of the CJK code point enumerations of Moses tokenizer:
      https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/detokenizer.perl#L309
      """
+
      # Hangul Jamo (1100–11FF)
-    Hangul_Jamo = (4352, 4607) # (ord(u"\u1100"), ord(u"\u11ff"))
+    Hangul_Jamo = (4352, 4607)  # (ord(u"\u1100"), ord(u"\u11ff"))
  
      # CJK Radicals Supplement (2E80–2EFF)
      # Kangxi Radicals (2F00–2FDF)
@@ -120,30 +124,39 @@ class CJKChars(object):
      # CJK Unified Ideographs (4E00–9FFF)
      # Yi Syllables (A000–A48F)
      # Yi Radicals (A490–A4CF)
-    CJK_Radicals = (11904, 42191) # (ord(u"\u2e80"), ord(u"\ua4cf"))
+    CJK_Radicals = (11904, 42191)  # (ord(u"\u2e80"), ord(u"\ua4cf"))
  
      # Phags-pa (A840–A87F)
-    Phags_Pa = (43072, 43135) # (ord(u"\ua840"), ord(u"\ua87f"))
+    Phags_Pa = (43072, 43135)  # (ord(u"\ua840"), ord(u"\ua87f"))
  
      # Hangul Syllables (AC00–D7AF)
-    Hangul_Syllables = (44032, 55215) # (ord(u"\uAC00"), ord(u"\uD7AF"))
+    Hangul_Syllables = (44032, 55215)  # (ord(u"\uAC00"), ord(u"\uD7AF"))
  
      # CJK Compatibility Ideographs (F900–FAFF)
-    CJK_Compatibility_Ideographs = (63744, 64255) # (ord(u"\uF900"), ord(u"\uFAFF"))
+    CJK_Compatibility_Ideographs = (63744, 64255)  # (ord(u"\uF900"), ord(u"\uFAFF"))
  
      # CJK Compatibility Forms (FE30–FE4F)
-    CJK_Compatibility_Forms = (65072, 65103) # (ord(u"\uFE30"), ord(u"\uFE4F"))
+    CJK_Compatibility_Forms = (65072, 65103)  # (ord(u"\uFE30"), ord(u"\uFE4F"))
  
      # Range U+FF65–FFDC encodes halfwidth forms, of Katakana and Hangul characters
-    Katakana_Hangul_Halfwidth = (65381, 65500) # (ord(u"\uFF65"), ord(u"\uFFDC"))
+    Katakana_Hangul_Halfwidth = (65381, 65500)  # (ord(u"\uFF65"), ord(u"\uFFDC"))
  
      # Supplementary Ideographic Plane 20000–2FFFF
-    Supplementary_Ideographic_Plane = (131072, 196607) # (ord(u"\U00020000"), ord(u"\U0002FFFF"))
-
-    ranges = [Hangul_Jamo, CJK_Radicals, Phags_Pa, Hangul_Syllables,
-              CJK_Compatibility_Ideographs, CJK_Compatibility_Forms,
-              Katakana_Hangul_Halfwidth, Supplementary_Ideographic_Plane]
-
+    Supplementary_Ideographic_Plane = (
+        131072,
+        196607,
+    )  # (ord(u"\U00020000"), ord(u"\U0002FFFF"))
+
+    ranges = [
+        Hangul_Jamo,
+        CJK_Radicals,
+        Phags_Pa,
+        Hangul_Syllables,
+        CJK_Compatibility_Ideographs,
+        CJK_Compatibility_Forms,
+        Katakana_Hangul_Halfwidth,
+        Supplementary_Ideographic_Plane,
+    ]
  
  
  def is_cjk(character):
@@ -161,11 +174,21 @@ def is_cjk(character):
      :type character: char
      :return: bool
      """
-    return any([start <= ord(character) <= end for start, end in
-                [(4352, 4607), (11904, 42191), (43072, 43135), (44032, 55215),
-                 (63744, 64255), (65072, 65103), (65381, 65500),
-                 (131072, 196607)]
-                ])
+    return any(
+        [
+            start <= ord(character) <= end
+            for start, end in [
+                (4352, 4607),
+                (11904, 42191),
+                (43072, 43135),
+                (44032, 55215),
+                (63744, 64255),
+                (65072, 65103),
+                (65381, 65500),
+                (131072, 196607),
+            ]
+        ]
+    )
  
  
  def xml_escape(text):
@@ -188,9 +211,16 @@ def xml_escape(text):
      :type text: str
      :rtype: str
      """
-    return escape(text, entities={ r"'": r"&apos;", r'"': r"&quot;",
-                                   r"|": r"&#124;",
-                                   r"[": r"&#91;",  r"]": r"&#93;", })
+    return escape(
+        text,
+        entities={
+            r"'": r"&apos;",
+            r'"': r"&quot;",
+            r"|": r"&#124;",
+            r"[": r"&#91;",
+            r"]": r"&#93;",
+        },
+    )
  
  
  def xml_unescape(text):
@@ -212,9 +242,16 @@ def xml_unescape(text):
      :type text: str
      :rtype: str
      """
-    return unescape(text, entities={ r"&apos;":r"'", r"&quot;":r'"',
-                                     r"&#124;":r"|",
-                                     r"&#91;":r"[",  r"&#93;":r"]", })
+    return unescape(
+        text,
+        entities={
+            r"&apos;": r"'",
+            r"&quot;": r'"',
+            r"&#124;": r"|",
+            r"&#91;": r"[",
+            r"&#93;": r"]",
+        },
+    )
  
  
  def align_tokens(tokens, sentence):
diff --git a/nlp_resource_data/nltk/tokenize/util.pyc b/nlp_resource_data/nltk/tokenize/util.pyc

deleted file mode 100755 (executable)

index 7d17b93..0000000

Binary files a/nlp_resource_data/nltk/tokenize/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/toolbox.py b/nlp_resource_data/nltk/toolbox.py

old mode 100755 (executable)

new mode 100644 (file)

index a02ecbd..f9b5caa
--- a/nlp_resource_data/nltk/toolbox.py
+++ b/nlp_resource_data/nltk/toolbox.py
@@ -1,7 +1,7 @@
  # coding: utf-8
  # Natural Language Toolkit: Toolbox Reader
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Greg Aumann <greg_aumann@sil.org>
  # URL: <http://nltk.org>
  # For license information, see LICENSE.TXT
@@ -10,21 +10,19 @@
  Module for reading, writing and manipulating
  Toolbox databases and settings files.
  """
-from __future__ import print_function
  
-import os, re, codecs
+import re, codecs
  from xml.etree.ElementTree import ElementTree, TreeBuilder, Element, SubElement
+from io import StringIO
  
-from six import u
-
-from nltk.compat import StringIO, PY3
-from nltk.data import PathPointer, ZipFilePathPointer, find
+from nltk.data import PathPointer, find
  
  
  class StandardFormat(object):
      """
      Class for reading and processing standard format marker files and strings.
      """
+
      def __init__(self, filename=None, encoding=None):
          self._encoding = encoding
          if filename is not None:
@@ -42,7 +40,7 @@ class StandardFormat(object):
              #      (PathPointer.open doesn't take a mode option)
              self._file = sfm_file.open(self._encoding)
          else:
-            self._file = codecs.open(sfm_file, 'rU', self._encoding)
+            self._file = codecs.open(sfm_file, "rU", self._encoding)
  
      def open_string(self, s):
          """
@@ -61,18 +59,23 @@ class StandardFormat(object):
  
          :rtype: iter(tuple(str, str))
          """
-        join_string = '\n'
-        line_regexp = r'^%s(?:\\(\S+)\s*)?(.*)$'
+        join_string = "\n"
+        line_regexp = r"^%s(?:\\(\S+)\s*)?(.*)$"
          # discard a BOM in the first line
-        first_line_pat = re.compile(line_regexp % '(?:\xef\xbb\xbf)?')
-        line_pat = re.compile(line_regexp % '')
+        first_line_pat = re.compile(line_regexp % "(?:\xef\xbb\xbf)?")
+        line_pat = re.compile(line_regexp % "")
          # need to get first line outside the loop for correct handling
          # of the first marker if it spans multiple lines
          file_iter = iter(self._file)
-        line = next(file_iter)
+        # PEP 479, prevent RuntimeError when StopIteration is raised inside generator
+        try:
+            line = next(file_iter)
+        except StopIteration:
+            # no more data is available, terminate the generator
+            return
          mobj = re.match(first_line_pat, line)
          mkr, line_value = mobj.groups()
-        value_lines = [line_value,]
+        value_lines = [line_value]
          self.line_num = 0
          for line in file_iter:
              self.line_num += 1
@@ -81,13 +84,20 @@ class StandardFormat(object):
              if line_mkr:
                  yield (mkr, join_string.join(value_lines))
                  mkr = line_mkr
-                value_lines = [line_value,]
+                value_lines = [line_value]
              else:
                  value_lines.append(line_value)
          self.line_num += 1
          yield (mkr, join_string.join(value_lines))
  
-    def fields(self, strip=True, unwrap=True, encoding=None, errors='strict', unicode_fields=None):
+    def fields(
+        self,
+        strip=True,
+        unwrap=True,
+        encoding=None,
+        errors="strict",
+        unicode_fields=None,
+    ):
          """
          Return an iterator that returns the next field in a ``(marker, value)``
          tuple, where ``marker`` and ``value`` are unicode strings if an ``encoding``
@@ -112,17 +122,11 @@ class StandardFormat(object):
          :rtype: iter(tuple(str, str))
          """
          if encoding is None and unicode_fields is not None:
-            raise ValueError('unicode_fields is set but not encoding.')
-        unwrap_pat = re.compile(r'\n+')
+            raise ValueError("unicode_fields is set but not encoding.")
+        unwrap_pat = re.compile(r"\n+")
          for mkr, val in self.raw_fields():
-            if encoding and not PY3: # kludge - already decoded in PY3?
-                if unicode_fields is not None and mkr in unicode_fields:
-                    val = val.decode('utf8', errors)
-                else:
-                    val = val.decode(encoding, errors)
-                mkr = mkr.decode(encoding, errors)
              if unwrap:
-                val = unwrap_pat.sub(' ', val)
+                val = unwrap_pat.sub(" ", val)
              if strip:
                  val = val.rstrip()
              yield (mkr, val)
@@ -135,10 +139,11 @@ class StandardFormat(object):
          except AttributeError:
              pass
  
+
  class ToolboxData(StandardFormat):
-    def parse(self, grammar=None,  **kwargs):
+    def parse(self, grammar=None, **kwargs):
          if grammar:
-            return self._chunk_parse(grammar=grammar,  **kwargs)
+            return self._chunk_parse(grammar=grammar, **kwargs)
          else:
              return self._record_parse(**kwargs)
  
@@ -195,27 +200,27 @@ class ToolboxData(StandardFormat):
          :return: contents of toolbox data divided into header and records
          """
          builder = TreeBuilder()
-        builder.start('toolbox_data', {})
-        builder.start('header', {})
+        builder.start("toolbox_data", {})
+        builder.start("header", {})
          in_records = False
          for mkr, value in self.fields(**kwargs):
-            if key is None and not in_records and mkr[0] != '_':
+            if key is None and not in_records and mkr[0] != "_":
                  key = mkr
              if mkr == key:
                  if in_records:
-                    builder.end('record')
+                    builder.end("record")
                  else:
-                    builder.end('header')
+                    builder.end("header")
                      in_records = True
-                builder.start('record', {})
+                builder.start("record", {})
              builder.start(mkr, {})
              builder.data(value)
              builder.end(mkr)
          if in_records:
-            builder.end('record')
+            builder.end("record")
          else:
-            builder.end('header')
-        builder.end('toolbox_data')
+            builder.end("header")
+        builder.end("toolbox_data")
          return builder.close()
  
      def _tree2etree(self, parent):
@@ -231,7 +236,7 @@ class ToolboxData(StandardFormat):
                  e.text = text
          return root
  
-    def _chunk_parse(self, grammar=None, root_label='record', trace=0, **kwargs):
+    def _chunk_parse(self, grammar=None, root_label="record", trace=0, **kwargs):
          """
          Returns an element tree structure corresponding to a toolbox data file
          parsed according to the chunk grammar.
@@ -256,17 +261,19 @@ class ToolboxData(StandardFormat):
  
          cp = chunk.RegexpParser(grammar, root_label=root_label, trace=trace)
          db = self.parse(**kwargs)
-        tb_etree = Element('toolbox_data')
-        header = db.find('header')
+        tb_etree = Element("toolbox_data")
+        header = db.find("header")
          tb_etree.append(header)
-        for record in db.findall('record'):
+        for record in db.findall("record"):
              parsed = cp.parse([(elem.text, elem.tag) for elem in record])
              tb_etree.append(self._tree2etree(parsed))
          return tb_etree
  
+
  _is_value = re.compile(r"\S")
  
-def to_sfm_string(tree, encoding=None, errors='strict', unicode_fields=None):
+
+def to_sfm_string(tree, encoding=None, errors="strict", unicode_fields=None):
      """
      Return a string with a standard format representation of the toolbox
      data in tree (tree can be a toolbox database or a single record).
@@ -282,36 +289,43 @@ def to_sfm_string(tree, encoding=None, errors='strict', unicode_fields=None):
      :type unicode_fields: dict(str) or set(str)
      :rtype: str
      """
-    if tree.tag == 'record':
-        root = Element('toolbox_data')
+    if tree.tag == "record":
+        root = Element("toolbox_data")
          root.append(tree)
          tree = root
  
-    if tree.tag != 'toolbox_data':
+    if tree.tag != "toolbox_data":
          raise ValueError("not a toolbox_data element structure")
      if encoding is None and unicode_fields is not None:
-        raise ValueError("if encoding is not specified then neither should unicode_fields")
+        raise ValueError(
+            "if encoding is not specified then neither should unicode_fields"
+        )
      l = []
      for rec in tree:
-        l.append('\n')
+        l.append("\n")
          for field in rec:
              mkr = field.tag
              value = field.text
              if encoding is not None:
                  if unicode_fields is not None and mkr in unicode_fields:
-                    cur_encoding = 'utf8'
+                    cur_encoding = "utf8"
                  else:
                      cur_encoding = encoding
                  if re.search(_is_value, value):
-                    l.append((u("\\%s %s\n") % (mkr, value)).encode(cur_encoding, errors))
+                    l.append(
+                        ("\\%s %s\n" % (mkr, value)).encode(cur_encoding, errors)
+                    )
                  else:
-                    l.append((u("\\%s%s\n") % (mkr, value)).encode(cur_encoding, errors))
+                    l.append(
+                        ("\\%s%s\n" % (mkr, value)).encode(cur_encoding, errors)
+                    )
              else:
                  if re.search(_is_value, value):
                      l.append("\\%s %s\n" % (mkr, value))
                  else:
                      l.append("\\%s%s\n" % (mkr, value))
-    return ''.join(l[1:])
+    return "".join(l[1:])
+
  
  class ToolboxSettings(StandardFormat):
      """This class is the base class for settings files."""
@@ -319,7 +333,7 @@ class ToolboxSettings(StandardFormat):
      def __init__(self):
          super(ToolboxSettings, self).__init__()
  
-    def parse(self, encoding=None, errors='strict', **kwargs):
+    def parse(self, encoding=None, errors="strict", **kwargs):
          """
          Return the contents of toolbox settings file with a nested structure.
  
@@ -335,16 +349,16 @@ class ToolboxSettings(StandardFormat):
          for mkr, value in self.fields(encoding=encoding, errors=errors, **kwargs):
              # Check whether the first char of the field marker
              # indicates a block start (+) or end (-)
-            block=mkr[0]
+            block = mkr[0]
              if block in ("+", "-"):
-                mkr=mkr[1:]
+                mkr = mkr[1:]
              else:
-                block=None
+                block = None
              # Build tree on the basis of block char
              if block == "+":
                  builder.start(mkr, {})
                  builder.data(value)
-            elif block == '-':
+            elif block == "-":
                  builder.end(mkr)
              else:
                  builder.start(mkr, {})
@@ -352,11 +366,19 @@ class ToolboxSettings(StandardFormat):
                  builder.end(mkr)
          return builder.close()
  
-def to_settings_string(tree, encoding=None, errors='strict', unicode_fields=None):
+
+def to_settings_string(tree, encoding=None, errors="strict", unicode_fields=None):
      # write XML to file
      l = list()
-    _to_settings_string(tree.getroot(), l, encoding=encoding, errors=errors, unicode_fields=unicode_fields)
-    return ''.join(l)
+    _to_settings_string(
+        tree.getroot(),
+        l,
+        encoding=encoding,
+        errors=errors,
+        unicode_fields=unicode_fields,
+    )
+    return "".join(l)
+
  
  def _to_settings_string(node, l, **kwargs):
      # write XML to file
@@ -364,19 +386,20 @@ def _to_settings_string(node, l, **kwargs):
      text = node.text
      if len(node) == 0:
          if text:
-            l.append('\\%s %s\n' % (tag, text))
+            l.append("\\%s %s\n" % (tag, text))
          else:
-            l.append('\\%s\n' % tag)
+            l.append("\\%s\n" % tag)
      else:
          if text:
-            l.append('\\+%s %s\n' % (tag, text))
+            l.append("\\+%s %s\n" % (tag, text))
          else:
-            l.append('\\+%s\n' % tag)
+            l.append("\\+%s\n" % tag)
          for n in node:
              _to_settings_string(n, l, **kwargs)
-        l.append('\\-%s\n' % tag)
+        l.append("\\-%s\n" % tag)
      return
  
+
  def remove_blanks(elem):
      """
      Remove all elements and subelements with no text and no child elements.
@@ -391,6 +414,7 @@ def remove_blanks(elem):
              out.append(child)
      elem[:] = out
  
+
  def add_default_fields(elem, default_fields):
      """
      Add blank elements and subelements specified in default_fields.
@@ -400,12 +424,13 @@ def add_default_fields(elem, default_fields):
      :param default_fields: fields to add to each type of element and subelement
      :type default_fields: dict(tuple)
      """
-    for field in default_fields.get(elem.tag,  []):
+    for field in default_fields.get(elem.tag, []):
          if elem.find(field) is None:
              SubElement(elem, field)
      for child in elem:
          add_default_fields(child, default_fields)
  
+
  def sort_fields(elem, field_orders):
      """
      Sort the elements and subelements in order specified in field_orders.
@@ -422,6 +447,7 @@ def sort_fields(elem, field_orders):
              order_key[subfield] = i
      _sort_fields(elem, order_dicts)
  
+
  def _sort_fields(elem, orders_dicts):
      """sort the children of elem"""
      try:
@@ -429,12 +455,15 @@ def _sort_fields(elem, orders_dicts):
      except KeyError:
          pass
      else:
-        tmp = sorted([((order.get(child.tag, 1e9), i), child) for i, child in enumerate(elem)])
+        tmp = sorted(
+            [((order.get(child.tag, 1e9), i), child) for i, child in enumerate(elem)]
+        )
          elem[:] = [child for key, child in tmp]
      for child in elem:
          if len(child):
              _sort_fields(child, orders_dicts)
  
+
  def add_blank_lines(tree, blanks_before, blanks_between):
      """
      Add blank lines before all elements and subelements specified in blank_before.
@@ -467,33 +496,35 @@ def add_blank_lines(tree, blanks_before, blanks_between):
                  add_blank_lines(elem, blanks_before, blanks_between)
              last_elem = elem
  
+
  def demo():
      from itertools import islice
  
-#    zip_path = find('corpora/toolbox.zip')
-#    lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse()
-    file_path = find('corpora/toolbox/rotokas.dic')
+    #    zip_path = find('corpora/toolbox.zip')
+    #    lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse()
+    file_path = find("corpora/toolbox/rotokas.dic")
      lexicon = ToolboxData(file_path).parse()
-    print('first field in fourth record:')
+    print("first field in fourth record:")
      print(lexicon[3][0].tag)
      print(lexicon[3][0].text)
  
-    print('\nfields in sequential order:')
-    for field in islice(lexicon.find('record'), 10):
+    print("\nfields in sequential order:")
+    for field in islice(lexicon.find("record"), 10):
          print(field.tag, field.text)
  
-    print('\nlx fields:')
-    for field in islice(lexicon.findall('record/lx'), 10):
+    print("\nlx fields:")
+    for field in islice(lexicon.findall("record/lx"), 10):
          print(field.text)
  
      settings = ToolboxSettings()
-    file_path = find('corpora/toolbox/MDF/MDF_AltH.typ')
+    file_path = find("corpora/toolbox/MDF/MDF_AltH.typ")
      settings.open(file_path)
-#    settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ'))
-    tree = settings.parse(unwrap=False, encoding='cp1252')
-    print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text)
+    #    settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ'))
+    tree = settings.parse(unwrap=False, encoding="cp1252")
+    print(tree.find("expset/expMDF/rtfPageSetup/paperSize").text)
      settings_tree = ElementTree(tree)
-    print(to_settings_string(settings_tree).encode('utf8'))
+    print(to_settings_string(settings_tree).encode("utf8"))
+
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      demo()
diff --git a/nlp_resource_data/nltk/toolbox.pyc b/nlp_resource_data/nltk/toolbox.pyc

deleted file mode 100755 (executable)

index 73931c0..0000000

Binary files a/nlp_resource_data/nltk/toolbox.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/__init__.py b/nlp_resource_data/nltk/translate/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 9e243e4..21ddf8a
--- a/nlp_resource_data/nltk/translate/__init__.py
+++ b/nlp_resource_data/nltk/translate/__init__.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Machine Translation
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>, Tah Wei Hoon <hoon.tw@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -20,5 +20,6 @@ from nltk.translate.ibm4 import IBMModel4
  from nltk.translate.ibm5 import IBMModel5
  from nltk.translate.bleu_score import sentence_bleu as bleu
  from nltk.translate.ribes_score import sentence_ribes as ribes
+from nltk.translate.meteor_score import meteor_score as meteor
  from nltk.translate.metrics import alignment_error_rate
  from nltk.translate.stack_decoder import StackDecoder
diff --git a/nlp_resource_data/nltk/translate/__init__.pyc b/nlp_resource_data/nltk/translate/__init__.pyc

deleted file mode 100755 (executable)

index 9f905c4..0000000

Binary files a/nlp_resource_data/nltk/translate/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..6e89142

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..51ca6de

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/bleu_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/bleu_score.cpython-37.pyc

new file mode 100644 (file)

index 0000000..4ab78a5

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/bleu_score.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/chrf_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/chrf_score.cpython-37.pyc

new file mode 100644 (file)

index 0000000..431f99e

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/chrf_score.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/gale_church.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/gale_church.cpython-37.pyc

new file mode 100644 (file)

index 0000000..0be5c76

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/gale_church.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/gdfa.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/gdfa.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c41e3e3

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/gdfa.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/gleu_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/gleu_score.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c82a872

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/gleu_score.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm1.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm1.cpython-37.pyc

new file mode 100644 (file)

index 0000000..42b670d

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ibm1.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm2.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm2.cpython-37.pyc

new file mode 100644 (file)

index 0000000..1c38ba0

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ibm2.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm3.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm3.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b85e973

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ibm3.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm4.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm4.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f36c4dc

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ibm4.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm5.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm5.cpython-37.pyc

new file mode 100644 (file)

index 0000000..758fd67

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ibm5.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm_model.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm_model.cpython-37.pyc

new file mode 100644 (file)

index 0000000..86f8784

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ibm_model.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/meteor_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/meteor_score.cpython-37.pyc

new file mode 100644 (file)

index 0000000..9afe132

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/meteor_score.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/metrics.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/metrics.cpython-37.pyc

new file mode 100644 (file)

index 0000000..3cc8a31

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/metrics.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/nist_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/nist_score.cpython-37.pyc

new file mode 100644 (file)

index 0000000..a22b4ea

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/nist_score.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/phrase_based.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/phrase_based.cpython-37.pyc

new file mode 100644 (file)

index 0000000..c76d2fc

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/phrase_based.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/ribes_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ribes_score.cpython-37.pyc

new file mode 100644 (file)

index 0000000..85b6ca2

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/ribes_score.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ae9377f

Binary files /dev/null and b/nlp_resource_data/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/translate/api.py b/nlp_resource_data/nltk/translate/api.py

old mode 100755 (executable)

new mode 100644 (file)

index c05db53..9efcbde
--- a/nlp_resource_data/nltk/translate/api.py
+++ b/nlp_resource_data/nltk/translate/api.py
@@ -1,6 +1,6 @@
  # Natural Language Toolkit: API for alignment and translation objects
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Will Zhang <wilzzha@gmail.com>
  #         Guan Gui <ggui@student.unimelb.edu.au>
  #         Steven Bird <stevenbird1@gmail.com>
@@ -8,39 +8,39 @@
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-from __future__ import print_function, unicode_literals
  import subprocess
  from collections import namedtuple
  
-from nltk.compat import python_2_unicode_compatible
  
-@python_2_unicode_compatible
  class AlignedSent(object):
      """
      Return an aligned sentence object, which encapsulates two sentences
      along with an ``Alignment`` between them.
  
+    Typically used in machine translation to represent a sentence and
+    its translation.
+
          >>> from nltk.translate import AlignedSent, Alignment
          >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
-        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-2 1-3 2-1 3-0'))
+        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
          >>> algnsent.words
          ['klein', 'ist', 'das', 'Haus']
          >>> algnsent.mots
          ['the', 'house', 'is', 'small']
          >>> algnsent.alignment
-        Alignment([(0, 2), (1, 3), (2, 1), (3, 0)])
+        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
          >>> from nltk.corpus import comtrans
          >>> print(comtrans.aligned_sents()[54])
          <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
          >>> print(comtrans.aligned_sents()[54].alignment)
          0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13
  
-    :param words: source language words
+    :param words: Words in the target language sentence
      :type words: list(str)
-    :param mots: target language words
+    :param mots: Words in the source language sentence
      :type mots: list(str)
-    :param alignment: the word-level alignments between the source
-        and target language
+    :param alignment: Word-level alignments between ``words`` and ``mots``.
+        Each alignment is represented as a 2-tuple (words_index, mots_index).
      :type alignment: Alignment
      """
  
@@ -67,6 +67,7 @@ class AlignedSent(object):
      def _set_alignment(self, alignment):
          _check_alignment(len(self.words), len(self.mots), alignment)
          self._alignment = alignment
+
      alignment = property(_get_alignment, _set_alignment)
  
      def __repr__(self):
@@ -84,8 +85,8 @@ class AlignedSent(object):
          """
          Dot representation of the aligned sentence
          """
-        s = 'graph align {\n'
-        s += 'node[shape=plaintext]\n'
+        s = "graph align {\n"
+        s += "node[shape=plaintext]\n"
  
          # Declare node
          for w in self._words:
@@ -95,22 +96,28 @@ class AlignedSent(object):
              s += '"%s_target" [label="%s"] \n' % (w, w)
  
          # Alignment
-        for u,v in self._alignment:
-            s += '"%s_source" -- "%s_target" \n' % (self._words[u] , self._mots[v] )
+        for u, v in self._alignment:
+            s += '"%s_source" -- "%s_target" \n' % (self._words[u], self._mots[v])
  
          # Connect the source words
-        for i in range(len(self._words)-1) :
-            s += '"%s_source" -- "%s_source" [style=invis]\n' % (self._words[i] , self._words[i+1])
+        for i in range(len(self._words) - 1):
+            s += '"%s_source" -- "%s_source" [style=invis]\n' % (
+                self._words[i],
+                self._words[i + 1],
+            )
  
          # Connect the target words
-        for i in range(len(self._mots)-1) :
-            s += '"%s_target" -- "%s_target" [style=invis]\n' % (self._mots[i] , self._mots[i+1])
+        for i in range(len(self._mots) - 1):
+            s += '"%s_target" -- "%s_target" [style=invis]\n' % (
+                self._mots[i],
+                self._mots[i + 1],
+            )
  
          # Put it in the same rank
-        s  += '{rank = same; %s}\n' % (' '.join('"%s_source"' % w for w in self._words))
-        s  += '{rank = same; %s}\n' % (' '.join('"%s_target"' % w for w in self._mots))
+        s += "{rank = same; %s}\n" % (" ".join('"%s_source"' % w for w in self._words))
+        s += "{rank = same; %s}\n" % (" ".join('"%s_target"' % w for w in self._mots))
  
-        s += '}'
+        s += "}"
  
          return s
  
@@ -118,17 +125,20 @@ class AlignedSent(object):
          """
          Ipython magic : show SVG representation of this ``AlignedSent``.
          """
-        dot_string = self._to_dot().encode('utf8')
-        output_format = 'svg'
+        dot_string = self._to_dot().encode("utf8")
+        output_format = "svg"
          try:
-            process = subprocess.Popen(['dot', '-T%s' % output_format], stdin=subprocess.PIPE,
-                                       stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            process = subprocess.Popen(
+                ["dot", "-T%s" % output_format],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
          except OSError:
-            raise Exception('Cannot find the dot binary from Graphviz package')
+            raise Exception("Cannot find the dot binary from Graphviz package")
          out, err = process.communicate(dot_string)
  
-        return out.decode('utf8')
-
+        return out.decode("utf8")
  
      def __str__(self):
          """
@@ -146,10 +156,9 @@ class AlignedSent(object):
  
          :rtype: AlignedSent
          """
-        return AlignedSent(self._mots, self._words,
-                               self._alignment.invert())
+        return AlignedSent(self._mots, self._words, self._alignment.invert())
+
  
-@python_2_unicode_compatible
  class Alignment(frozenset):
      """
      A storage class for representing alignment between two sequences, s1, s2.
@@ -178,7 +187,7 @@ class Alignment(frozenset):
  
      def __new__(cls, pairs):
          self = frozenset.__new__(cls, pairs)
-        self._len = (max(p[0] for p in self) if self != frozenset([]) else 0)
+        self._len = max(p[0] for p in self) if self != frozenset([]) else 0
          self._index = None
          return self
  
@@ -223,7 +232,7 @@ class Alignment(frozenset):
          if not positions:
              positions = list(range(len(self._index)))
          for p in positions:
-            image.update(f for _,f in self._index[p])
+            image.update(f for _, f in self._index[p])
          return sorted(image)
  
      def __repr__(self):
@@ -252,10 +261,12 @@ def _giza2pair(pair_string):
      i, j = pair_string.split("-")
      return int(i), int(j)
  
+
  def _naacl2pair(pair_string):
      i, j, p = pair_string.split("-")
      return int(i), int(j)
  
+
  def _check_alignment(num_words, num_mots, alignment):
      """
      Check whether the alignments are legal.
@@ -277,12 +288,15 @@ def _check_alignment(num_words, num_mots, alignment):
          raise IndexError("Alignment is outside boundary of mots")
  
  
-PhraseTableEntry = namedtuple('PhraseTableEntry', ['trg_phrase', 'log_prob'])
+PhraseTableEntry = namedtuple("PhraseTableEntry", ["trg_phrase", "log_prob"])
+
+
  class PhraseTable(object):
      """
      In-memory store of translations for a given phrase, and the log
      probability of the those translations
      """
+
      def __init__(self):
          self.src_phrases = dict()
  
@@ -314,8 +328,7 @@ class PhraseTable(object):
          if src_phrase not in self.src_phrases:
              self.src_phrases[src_phrase] = []
          self.src_phrases[src_phrase].append(entry)
-        self.src_phrases[src_phrase].sort(key=lambda e: e.log_prob,
-                                          reverse=True)
+        self.src_phrases[src_phrase].sort(key=lambda e: e.log_prob, reverse=True)
  
      def __contains__(self, src_phrase):
          return src_phrase in self.src_phrases
diff --git a/nlp_resource_data/nltk/translate/api.pyc b/nlp_resource_data/nltk/translate/api.pyc

deleted file mode 100755 (executable)

index af1d614..0000000

Binary files a/nlp_resource_data/nltk/translate/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/bleu_score.py b/nlp_resource_data/nltk/translate/bleu_score.py

old mode 100755 (executable)

new mode 100644 (file)

index e30d112..a6a79a1
--- a/nlp_resource_data/nltk/translate/bleu_score.py
+++ b/nlp_resource_data/nltk/translate/bleu_score.py
@@ -1,33 +1,30 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: BLEU Score
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# Contributors: Dmitrijs Milajevs, Liling Tan
+# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  """BLEU score implementation."""
-from __future__ import division
  
  import math
  import sys
-import fractions
+from fractions import Fraction
  import warnings
  from collections import Counter
  
  from nltk.util import ngrams
  
-try:
-    fractions.Fraction(0, 1000, _normalize=False)
-    from fractions import Fraction
-except TypeError:
-    from nltk.compat import Fraction
  
-
-def sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25),
-                  smoothing_function=None, auto_reweigh=False,
-                  emulate_multibleu=False):
+def sentence_bleu(
+    references,
+    hypothesis,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+):
      """
      Calculate BLEU score (Bilingual Evaluation Understudy) from
      Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
@@ -58,17 +55,32 @@ def sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25),
      >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
      0.5045...
  
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS
-    0.3969...
+    If there is no ngrams overlap for any order of n-grams, BLEU returns the
+    value 0. This is because the precision for the order of n-grams without
+    overlap is 0, and the geometric mean in the final BLEU score computation
+    multiplies the 0 with the precision of other n-grams. This results in 0
+    (independently of the precision of the othe n-gram orders). The following
+    example has zero 3-gram and 4-gram overlaps:
+
+    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
+    0.0
+
+    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
+    function can be used.
+
+    >>> chencherry = SmoothingFunction()
+    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
+    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
+    0.0370...
  
-    The default BLEU calculates a score for up to 4grams using uniform
-    weights. To evaluate your translations with higher/lower order ngrams,
-    use customized weights. E.g. when accounting for up to 6grams with uniform
-    weights:
+    The default BLEU calculates a score for up to 4-grams using uniform
+    weights (this is called BLEU-4). To evaluate your translations with
+    higher/lower order ngrams, use customized weights. E.g. when accounting
+    for up to 5-grams with uniform weights (this is called BLEU-5) use:
  
-    >>> weights = (0.1666, 0.1666, 0.1666, 0.1666, 0.1666)
+    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
      >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-    0.4583...
+    0.3920...
  
      :param references: reference sentences
      :type references: list(list(str))
@@ -78,20 +90,23 @@ def sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25),
      :type weights: list(float)
      :param smoothing_function:
      :type smoothing_function: SmoothingFunction
-    :param auto_reweigh:
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
      :type auto_reweigh: bool
-    :param emulate_multibleu: bool
      :return: The sentence-level BLEU score.
      :rtype: float
      """
-    return corpus_bleu([references], [hypothesis],
-                        weights, smoothing_function, auto_reweigh,
-                        emulate_multibleu)
-
-
-def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25),
-                smoothing_function=None, auto_reweigh=False,
-                emulate_multibleu=False):
+    return corpus_bleu(
+        [references], [hypothesis], weights, smoothing_function, auto_reweigh
+    )
+
+
+def corpus_bleu(
+    list_of_references,
+    hypotheses,
+    weights=(0.25, 0.25, 0.25, 0.25),
+    smoothing_function=None,
+    auto_reweigh=False,
+):
      """
      Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
      the hypotheses and their respective references.
@@ -132,27 +147,28 @@ def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25)
      >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
      0.6223...
  
-    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
-    :type references: list(list(list(str)))
+    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
+    :type list_of_references: list(list(list(str)))
      :param hypotheses: a list of hypothesis sentences
      :type hypotheses: list(list(str))
      :param weights: weights for unigrams, bigrams, trigrams and so on
      :type weights: list(float)
      :param smoothing_function:
      :type smoothing_function: SmoothingFunction
-    :param auto_reweigh:
+    :param auto_reweigh: Option to re-normalize the weights uniformly.
      :type auto_reweigh: bool
-    :param emulate_multibleu: bool
      :return: The corpus-level BLEU score.
      :rtype: float
      """
      # Before proceeding to compute BLEU, perform sanity checks.
  
-    p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
-    p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
+    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
+    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
      hyp_lengths, ref_lengths = 0, 0
  
-    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"
+    assert len(list_of_references) == len(hypotheses), (
+        "The number of hypotheses and their reference(s) should be the " "same "
+    )
  
      # Iterate through each hypothesis and their corresponding references.
      for references, hypothesis in zip(list_of_references, hypotheses):
@@ -165,7 +181,7 @@ def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25)
  
          # Calculate the hypothesis length and the closest reference length.
          # Adds them to the corpus-level hypothesis and reference counts.
-        hyp_len =  len(hypothesis)
+        hyp_len = len(hypothesis)
          hyp_lengths += hyp_len
          ref_lengths += closest_ref_length(references, hyp_len)
  
@@ -176,11 +192,13 @@ def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25)
      # order of n-grams < 4 and weights is set at default.
      if auto_reweigh:
          if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
-            weights = ( 1 / hyp_lengths ,) * hyp_lengths
+            weights = (1 / hyp_lengths,) * hyp_lengths
  
      # Collects the various precision values for the different ngram orders.
-    p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False)
-           for i, _ in enumerate(weights, start=1)]
+    p_n = [
+        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
+        for i, _ in enumerate(weights, start=1)
+    ]
  
      # Returns 0 if there's no matching n-grams
      # We only need to check for p_numerators[1] == 0, since if there's
@@ -195,11 +213,12 @@ def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25)
      # Note: smoothing_function() may convert values into floats;
      #       it tries to retain the Fraction object as much as the
      #       smoothing method allows.
-    p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis,
-                             hyp_len=hyp_len, emulate_multibleu=emulate_multibleu)
-    s = (w * math.log(p_i) for i, (w, p_i) in enumerate(zip(weights, p_n)))
-    s =  bp * math.exp(math.fsum(s))
-    return round(s, 4) if emulate_multibleu else s
+    p_n = smoothing_function(
+        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
+    )
+    s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n))
+    s = bp * math.exp(math.fsum(s))
+    return s
  
  
  def modified_precision(references, hypothesis, n):
@@ -291,17 +310,19 @@ def modified_precision(references, hypothesis, n):
      # Set an empty Counter if hypothesis is empty.
      counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
      # Extract a union of references' counts.
-    ## max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
+    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
      max_counts = {}
      for reference in references:
-        reference_counts = Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
+        reference_counts = (
+            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
+        )
          for ngram in counts:
-            max_counts[ngram] = max(max_counts.get(ngram, 0),
-                                    reference_counts[ngram])
+            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
  
      # Assigns the intersection between hypothesis and references' counts.
-    clipped_counts = {ngram: min(count, max_counts[ngram])
-                      for ngram, count in counts.items()}
+    clipped_counts = {
+        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
+    }
  
      numerator = sum(clipped_counts.values())
      # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
@@ -319,14 +340,15 @@ def closest_ref_length(references, hyp_len):
  
      :param references: A list of reference translations.
      :type references: list(list(str))
-    :param hypothesis: The length of the hypothesis.
-    :type hypothesis: int
+    :param hyp_len: The length of the hypothesis.
+    :type hyp_len: int
      :return: The length of the reference that's closest to the hypothesis.
      :rtype: int
      """
      ref_lens = (len(reference) for reference in references)
-    closest_ref_len = min(ref_lens, key=lambda ref_len:
-                          (abs(ref_len - hyp_len), ref_len))
+    closest_ref_len = min(
+        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
+    )
      return closest_ref_len
  
  
@@ -409,7 +431,7 @@ def brevity_penalty(closest_ref_len, hyp_len):
      :type hyp_len: int
      :param closest_ref_len: The length of the closest reference for a single
      hypothesis OR the sum of all the closest references for every hypotheses.
-    :type closest_reference_len: int
+    :type closest_ref_len: int
      :return: BLEU's brevity penalty.
      :rtype: float
      """
@@ -430,6 +452,7 @@ class SmoothingFunction:
      Smoothing Techniques for Sentence-Level BLEU. In WMT14.
      http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
      """
+
      def __init__(self, epsilon=0.1, alpha=5, k=5):
          """
          This will initialize the parameters required for the various smoothing
@@ -444,23 +467,23 @@ class SmoothingFunction:
          ...               'Party', 'commands']
  
          >>> chencherry = SmoothingFunction()
-        >>> print (sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
          0.4118...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
          0.4118...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
          0.4118...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
          0.4489...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
          0.4118...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
          0.4118...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
          0.4905...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
          0.4135...
-        >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
+        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
          0.4905...
  
          :param epsilon: the epsilon value use in method 1
@@ -475,30 +498,40 @@ class SmoothingFunction:
          self.k = k
  
      def method0(self, p_n, *args, **kwargs):
-        """ No smoothing. """
+        """
+        No smoothing.
+        """
          p_n_new = []
-        _emulate_multibleu = kwargs['emulate_multibleu']
          for i, p_i in enumerate(p_n):
              if p_i.numerator != 0:
                  p_n_new.append(p_i)
-            elif _emulate_multibleu and i < 5:
-                return [sys.float_info.min]
              else:
-                _msg = str("\nCorpus/Sentence contains 0 counts of {}-gram overlaps.\n"
-                           "BLEU scores might be undesirable; "
-                           "use SmoothingFunction().").format(i+1)
+                _msg = str(
+                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
+                    "Therefore the BLEU score evaluates to 0, independently of\n"
+                    "how many N-gram overlaps of lower order it contains.\n"
+                    "Consider using lower n-gram order or use "
+                    "SmoothingFunction()"
+                ).format(i + 1)
                  warnings.warn(_msg)
-                # If this order of n-gram returns 0 counts, the higher order
-                # n-gram would also return 0, thus breaking the loop here.
-                break
+                # When numerator==0 where denonminator==0 or !=0, the result
+                # for the precision score should be equal to 0 or undefined.
+                # Due to BLEU geometric mean computation in logarithm space,
+                # we we need to take the return sys.float_info.min such that
+                # math.log(sys.float_info.min) returns a 0 precision score.
+                p_n_new.append(sys.float_info.min)
          return p_n_new
  
      def method1(self, p_n, *args, **kwargs):
          """
          Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
          """
-        return [(p_i.numerator + self.epsilon)/ p_i.denominator
-                if p_i.numerator == 0 else p_i for p_i in p_n]
+        return [
+            (p_i.numerator + self.epsilon) / p_i.denominator
+            if p_i.numerator == 0
+            else p_i
+            for p_i in p_n
+        ]
  
      def method2(self, p_n, *args, **kwargs):
          """
@@ -507,7 +540,10 @@ class SmoothingFunction:
          machine translation quality using longest common subsequence and
          skip-bigram statistics. In ACL04.
          """
-        return [Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False) for p_i in p_n]
+        return [
+            Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False)
+            for p_i in p_n
+        ]
  
      def method3(self, p_n, *args, **kwargs):
          """
@@ -524,14 +560,14 @@ class SmoothingFunction:
           - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
           - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
          """
-        incvnt = 1 # From the mteval-v13a.pl, it's referred to as k.
+        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
          for i, p_i in enumerate(p_n):
              if p_i.numerator == 0:
-                p_n[i] = 1 / (2**incvnt * p_i.denominator)
-                incvnt+=1
+                p_n[i] = 1 / (2 ** incvnt * p_i.denominator)
+                incvnt += 1
          return p_n
  
-    def method4(self, p_n, references, hypothesis, hyp_len, *args, **kwargs):
+    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
          """
          Smoothing method 4:
          Shorter translations may have inflated precision values due to having
@@ -539,30 +575,33 @@ class SmoothingFunction:
          smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
          suggests dividing by 1/ln(len(T)), where T is the length of the translation.
          """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
          for i, p_i in enumerate(p_n):
              if p_i.numerator == 0 and hyp_len != 0:
-                incvnt = i+1 * self.k / math.log(hyp_len) # Note that this K is different from the K from NIST.
-                p_n[i] = 1 / incvnt
+                incvnt = i + 1 * self.k / math.log(
+                    hyp_len
+                )  # Note that this K is different from the K from NIST.
+                p_n[i] = incvnt / p_i.denominator
          return p_n
  
-
-    def method5(self, p_n, references, hypothesis, hyp_len, *args, **kwargs):
+    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
          """
          Smoothing method 5:
          The matched counts for similar values of n should be similar. To a
          calculate the n-gram matched count, it averages the n−1, n and n+1 gram
          matched counts.
          """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
          m = {}
          # Requires an precision value for an addition ngram order.
          p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
          m[-1] = p_n[0] + 1
          for i, p_i in enumerate(p_n):
-            p_n[i] = (m[i-1] + p_i + p_n_plus1[i+1]) / 3
+            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
              m[i] = p_n[i]
          return p_n
  
-    def method6(self, p_n, references, hypothesis, hyp_len, *args, **kwargs):
+    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
          """
          Smoothing method 6:
          Interpolates the maximum likelihood estimate of the precision *p_n* with
@@ -571,30 +610,30 @@ class SmoothingFunction:
          Gao and He (2013) Training MRF-Based Phrase Translation Models using
          Gradient Ascent. In NAACL.
          """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
          # This smoothing only works when p_1 and p_2 is non-zero.
          # Raise an error with an appropriate message when the input is too short
          # to use this smoothing technique.
          assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
          for i, p_i in enumerate(p_n):
-            if i in [0,1]: # Skips the first 2 orders of ngrams.
+            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
                  continue
              else:
-                pi0 = 0 if p_n[i-2] == 0 else p_n[i-1]**2 / p_n[i-2]
+                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
                  # No. of ngrams in translation that matches the reference.
                  m = p_i.numerator
                  # No. of ngrams in translation.
-                l = sum(1 for _ in ngrams(hypothesis, i+1))
+                l = sum(1 for _ in ngrams(hypothesis, i + 1))
                  # Calculates the interpolated precision.
                  p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
          return p_n
  
-    def method7(self, p_n, references, hypothesis, hyp_len, *args, **kwargs):
+    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
          """
-        Smoothing method 6:
-        Interpolates the maximum likelihood estimate of the precision *p_n* with
-        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
-        between pn and pn−1 will be the same as that between pn−1 and pn−2.
+        Smoothing method 7:
+        Interpolates methods 5 and 6.
          """
+        hyp_len = hyp_len if hyp_len else len(hypothesis)
          p_n = self.method4(p_n, references, hypothesis, hyp_len)
          p_n = self.method5(p_n, references, hypothesis, hyp_len)
          return p_n
diff --git a/nlp_resource_data/nltk/translate/bleu_score.pyc b/nlp_resource_data/nltk/translate/bleu_score.pyc

deleted file mode 100755 (executable)

index 9bc1c65..0000000

Binary files a/nlp_resource_data/nltk/translate/bleu_score.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/chrf_score.py b/nlp_resource_data/nltk/translate/chrf_score.py

old mode 100755 (executable)

new mode 100644 (file)

index 1748633..ef5fb90
--- a/nlp_resource_data/nltk/translate/chrf_score.py
+++ b/nlp_resource_data/nltk/translate/chrf_score.py
@@ -1,19 +1,22 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: ChrF score
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Maja Popovic
-# Contributors: Liling Tan
+# Contributors: Liling Tan, Aleš Tamchyna (Memsource)
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  """ ChrF score implementation """
-from __future__ import division
-from collections import Counter
+from collections import Counter, defaultdict
+import re
  
-from nltk.util import ngrams, everygrams
+from nltk.util import ngrams
  
-def sentence_chrf(reference, hypothesis, min_len=1, max_len=6, beta=3.0):
+
+def sentence_chrf(
+    reference, hypothesis, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True
+):
      """
      Calculates the sentence level CHRF (Character n-gram F-score) described in
       - Maja Popovic. 2015. CHRF: Character n-gram F-score for Automatic MT Evaluation.
@@ -23,7 +26,13 @@ def sentence_chrf(reference, hypothesis, min_len=1, max_len=6, beta=3.0):
         In Proceedings of the 1st Conference on Machine Translation.
         http://www.statmt.org/wmt16/pdf/W16-2341.pdf
  
-    Unlike multi-reference BLEU, CHRF only supports a single reference.
+    This implementation of CHRF only supports a single reference at the moment.
+
+    For details not reported in the paper, consult Maja Popovic's original
+    implementation: https://github.com/m-popovic/chrF
+
+    The code should output results equivalent to running CHRF++ with the
+    following options: -nw 0 -b 3
  
      An example from the original BLEU paper
      http://www.aclweb.org/anthology/P02-1040.pdf
@@ -35,16 +44,16 @@ def sentence_chrf(reference, hypothesis, min_len=1, max_len=6, beta=3.0):
          >>> hyp2 = str('It is to insure the troops forever hearing the activity '
          ...            'guidebook that party direct').split()
          >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS
-        0.6768...
+        0.6349...
          >>> sentence_chrf(ref1, hyp2) # doctest: +ELLIPSIS
-        0.4201...
+        0.3330...
  
      The infamous "the the the ... " example
  
          >>> ref = 'the cat is on the mat'.split()
          >>> hyp = 'the the the the the the the'.split()
          >>> sentence_chrf(ref, hyp)  # doctest: +ELLIPSIS
-        0.2530...
+        0.1468...
  
      An example to show that this function allows users to use strings instead of
      tokens, i.e. list(str) as inputs.
@@ -54,16 +63,16 @@ def sentence_chrf(reference, hypothesis, min_len=1, max_len=6, beta=3.0):
          >>> hyp1 = str('It is a guide to action which ensures that the military '
          ...            'always obeys the commands of the party')
          >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS
-        0.6768...
+        0.6349...
          >>> type(ref1) == type(hyp1) == str
          True
          >>> sentence_chrf(ref1.split(), hyp1.split()) # doctest: +ELLIPSIS
-        0.6768...
+        0.6349...
  
      To skip the unigrams and only use 2- to 3-grams:
  
          >>> sentence_chrf(ref1, hyp1, min_len=2, max_len=3) # doctest: +ELLIPSIS
-        0.7018...
+        0.6617...
  
      :param references: reference sentence
      :type references: list(str) / str
@@ -75,18 +84,82 @@ def sentence_chrf(reference, hypothesis, min_len=1, max_len=6, beta=3.0):
      :type max_len: int
      :param beta: the parameter to assign more importance to recall over precision
      :type beta: float
+    :param ignore_whitespace: ignore whitespace characters in scoring
+    :type ignore_whitespace: bool
      :return: the sentence level CHRF score.
      :rtype: float
      """
-    return corpus_chrf([reference], [hypothesis], min_len, max_len, beta=beta)
+    return corpus_chrf(
+        [reference],
+        [hypothesis],
+        min_len,
+        max_len,
+        beta=beta,
+        ignore_whitespace=ignore_whitespace,
+    )
+
  
+def _preprocess(sent, ignore_whitespace):
+    if type(sent) != str:
+        # turn list of tokens into a string
+        sent = " ".join(sent)
  
-def corpus_chrf(list_of_references, hypotheses, min_len=1, max_len=6, beta=3.0):
+    if ignore_whitespace:
+        sent = re.sub(r"\s+", "", sent)
+    return sent
+
+
+def chrf_precision_recall_fscore_support(
+    reference, hypothesis, n, beta=3.0, epsilon=1e-16
+):
+    """
+    This function computes the precision, recall and fscore from the ngram
+    overlaps. It returns the `support` which is the true positive score.
+
+    By underspecifying the input type, the function will be agnostic as to how
+    it computes the ngrams and simply take the whichever element in the list;
+    it could be either token or character.
+
+    :param reference: The reference sentence.
+    :type reference: list
+    :param hypothesis: The hypothesis sentence.
+    :type hypothesis: list
+    :param n: Extract up to the n-th order ngrams
+    :type n: int
+    :param beta: The parameter to assign more importance to recall over precision.
+    :type beta: float
+    :param epsilon: The fallback value if the hypothesis or reference is empty.
+    :type epsilon: float
+    :return: Returns the precision, recall and f-score and support (true positive).
+    :rtype: tuple(float)
+    """
+    ref_ngrams = Counter(ngrams(reference, n))
+    hyp_ngrams = Counter(ngrams(hypothesis, n))
+
+    # calculate the number of ngram matches
+    overlap_ngrams = ref_ngrams & hyp_ngrams
+    tp = sum(overlap_ngrams.values())  # True positives.
+    tpfp = sum(hyp_ngrams.values())  # True positives + False positives.
+    tpfn = sum(ref_ngrams.values())  # True positives + False negatives.
+
+    try:
+        prec = tp / tpfp  # precision
+        rec = tp / tpfn  # recall
+        factor = beta ** 2
+        fscore = (1 + factor) * (prec * rec) / (factor * prec + rec)
+    except ZeroDivisionError:
+        prec = rec = fscore = epsilon
+    return prec, rec, fscore, tp
+
+
+def corpus_chrf(
+    references, hypotheses, min_len=1, max_len=6, beta=3.0, ignore_whitespace=True
+):
      """
      Calculates the corpus level CHRF (Character n-gram F-score), it is the
-    micro-averaged value of the sentence/segment level CHRF score.
+    macro-averaged value of the sentence/segment level CHRF score.
  
-    CHRF only supports a single reference.
+    This implementation of CHRF only supports a single reference at the moment.
  
          >>> ref1 = str('It is a guide to action that ensures that the military '
          ...            'will forever heed Party commands').split()
@@ -98,40 +171,53 @@ def corpus_chrf(list_of_references, hypotheses, min_len=1, max_len=6, beta=3.0):
          >>> hyp2 = str('It is to insure the troops forever hearing the activity '
          ...            'guidebook that party direct')
          >>> corpus_chrf([ref1, ref2, ref1, ref2], [hyp1, hyp2, hyp2, hyp1]) # doctest: +ELLIPSIS
-        0.4915...
+        0.3910...
  
      :param references: a corpus of list of reference sentences, w.r.t. hypotheses
-    :type references: list(list(str)) / list(str)
+    :type references: list(list(str))
      :param hypotheses: a list of hypothesis sentences
-    :type hypotheses: list(list(str)) / list(str)
+    :type hypotheses: list(list(str))
      :param min_len: The minimum order of n-gram this function should extract.
      :type min_len: int
      :param max_len: The maximum order of n-gram this function should extract.
      :type max_len: int
      :param beta: the parameter to assign more importance to recall over precision
      :type beta: float
+    :param ignore_whitespace: ignore whitespace characters in scoring
+    :type ignore_whitespace: bool
      :return: the sentence level CHRF score.
      :rtype: float
      """
  
-    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their references should be the same"
+    assert len(references) == len(
+        hypotheses
+    ), "The number of hypotheses and their references should be the same"
+    num_sents = len(hypotheses)
+
+    # Keep f-scores for each n-gram order separate
+    ngram_fscores = defaultdict(lambda: list())
  
      # Iterate through each hypothesis and their corresponding references.
-    for reference, hypothesis in zip(list_of_references, hypotheses):
-        # Cheating condition to allow users to input strings instead of tokens.
-        if type(reference) and type(hypothesis) != str:
-            reference, hypothesis = ' '.join(reference), ' '.join(hypothesis)
-        # For each order of ngram, calculate the no. of ngram matches and
-        # keep track of no. of ngram in references.
-        ref_ngrams = Counter(everygrams(reference, min_len, max_len))
-        hyp_ngrams = Counter(everygrams(hypothesis, min_len, max_len))
-        overlap_ngrams = ref_ngrams & hyp_ngrams
-        tp = sum(overlap_ngrams.values()) # True positives.
-        tpfp = sum(hyp_ngrams.values()) # True positives + False positives.
-        tffn = sum(ref_ngrams.values()) # True posities + False negatives.
-
-    precision = tp / tpfp
-    recall = tp / tffn
-    factor = beta**2
-    score = (1+ factor ) * (precision * recall) / ( factor * precision + recall)
-    return score
+    for reference, hypothesis in zip(references, hypotheses):
+
+        # preprocess both reference and hypothesis
+        reference = _preprocess(reference, ignore_whitespace)
+        hypothesis = _preprocess(hypothesis, ignore_whitespace)
+
+        # Calculate f-scores for each sentence and for each n-gram order
+        # separately.
+        for n in range(min_len, max_len + 1):
+            # Compute the precision, recall, fscore and support.
+            prec, rec, fscore, tp = chrf_precision_recall_fscore_support(
+                reference, hypothesis, n, beta=beta
+            )
+            ngram_fscores[n].append(fscore)
+
+    # how many n-gram sizes
+    num_ngram_sizes = len(ngram_fscores)
+
+    # sum of f-scores over all sentences for each n-gram order
+    total_scores = [sum(fscores) for n, fscores in ngram_fscores.items()]
+
+    # macro-average over n-gram orders and over all sentences
+    return (sum(total_scores) / num_ngram_sizes) / num_sents
diff --git a/nlp_resource_data/nltk/translate/chrf_score.pyc b/nlp_resource_data/nltk/translate/chrf_score.pyc

deleted file mode 100755 (executable)

index 13fa6a1..0000000

Binary files a/nlp_resource_data/nltk/translate/chrf_score.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/gale_church.py b/nlp_resource_data/nltk/translate/gale_church.py

old mode 100755 (executable)

new mode 100644 (file)

index a543b4c..80aa4c1
--- a/nlp_resource_data/nltk/translate/gale_church.py
+++ b/nlp_resource_data/nltk/translate/gale_church.py
@@ -2,7 +2,7 @@
  
  # Natural Language Toolkit: Gale-Church Aligner
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Torsten Marek <marek@ifi.uzh.ch>
  # Contributor: Cassidy Laidlaw, Liling Tan
  # URL: <http://nltk.org/>
@@ -17,43 +17,61 @@ http://aclweb.org/anthology/J93-1004.pdf
  
  """
  
-from __future__ import division
  import math
  
  try:
      from scipy.stats import norm
      from norm import logsf as norm_logsf
  except ImportError:
+
      def erfcc(x):
          """Complementary error function."""
          z = abs(x)
          t = 1 / (1 + 0.5 * z)
-        r = t * math.exp(-z * z -
-                         1.26551223 + t *
-                         (1.00002368 + t *
-                          (.37409196 + t *
-                           (.09678418 + t *
-                            (-.18628806 + t *
-                             (.27886807 + t *
-                              (-1.13520398 + t *
-                               (1.48851587 + t *
-                                (-.82215223 + t * .17087277)))))))))
-        if x >= 0.:
+        r = t * math.exp(
+            -z * z
+            - 1.26551223
+            + t
+            * (
+                1.00002368
+                + t
+                * (
+                    0.37409196
+                    + t
+                    * (
+                        0.09678418
+                        + t
+                        * (
+                            -0.18628806
+                            + t
+                            * (
+                                0.27886807
+                                + t
+                                * (
+                                    -1.13520398
+                                    + t
+                                    * (1.48851587 + t * (-0.82215223 + t * 0.17087277))
+                                )
+                            )
+                        )
+                    )
+                )
+            )
+        )
+        if x >= 0.0:
              return r
          else:
-            return 2. - r
-
+            return 2.0 - r
  
      def norm_cdf(x):
          """Return the area under the normal distribution from M{-∞..x}."""
          return 1 - 0.5 * erfcc(x / math.sqrt(2))
  
-
      def norm_logsf(x):
          try:
              return math.log(1 - norm_cdf(x))
          except ValueError:
-            return float('-inf')
+            return float("-inf")
  
  
  LOG2 = math.log(2)
@@ -80,8 +98,8 @@ class LanguageIndependent(object):
  def trace(backlinks, source_sents_lens, target_sents_lens):
      """
      Traverse the alignment cost from the tracebacks and retrieves
-    appropriate sentence pairs. 
-    
+    appropriate sentence pairs.
+
      :param backlinks: A dictionary where the key is the alignment points and value is the cost (referencing the LanguageIndependent.PRIORS)
      :type backlinks: dict
      :param source_sents_lens: A list of target sentences' lengths
@@ -91,11 +109,11 @@ def trace(backlinks, source_sents_lens, target_sents_lens):
      """
      links = []
      position = (len(source_sents_lens), len(target_sents_lens))
-    while position != (0, 0) and all(p >=0 for p in position):
+    while position != (0, 0) and all(p >= 0 for p in position):
          try:
              s, t = backlinks[position]
          except TypeError:
-            position = (position[0]-1 , position[1]-1)
+            position = (position[0] - 1, position[1] - 1)
              continue
          for i in range(s):
              for j in range(t):
@@ -124,14 +142,16 @@ def align_log_prob(i, j, source_sents, target_sents, alignment, params):
          # actually, the paper says l_s * params.VARIANCE_CHARACTERS, this is based on the C
          # reference implementation. With l_s in the denominator, insertions are impossible.
          m = (l_s + l_t / params.AVERAGE_CHARACTERS) / 2
-        delta = (l_s * params.AVERAGE_CHARACTERS - l_t) / math.sqrt(m * params.VARIANCE_CHARACTERS)
+        delta = (l_s * params.AVERAGE_CHARACTERS - l_t) / math.sqrt(
+            m * params.VARIANCE_CHARACTERS
+        )
      except ZeroDivisionError:
-        return float('-inf')
+        return float("-inf")
  
-    return - (LOG2 + norm_logsf(abs(delta)) + math.log(params.PRIORS[alignment]))
+    return -(LOG2 + norm_logsf(abs(delta)) + math.log(params.PRIORS[alignment]))
  
  
-def align_blocks(source_sents_lens, target_sents_lens, params = LanguageIndependent):
+def align_blocks(source_sents_lens, target_sents_lens, params=LanguageIndependent):
      """Return the sentence alignment of two text blocks (usually paragraphs).
  
          >>> align_blocks([5,5,5], [7,7,7])
@@ -156,22 +176,23 @@ def align_blocks(source_sents_lens, target_sents_lens, params = LanguageIndepend
  
      backlinks = {}
  
-    for i in range(len(source_sents_lens) + 1): 
+    for i in range(len(source_sents_lens) + 1):
          for j in range(len(target_sents_lens) + 1):
-            min_dist = float('inf')
+            min_dist = float("inf")
              min_align = None
              for a in alignment_types:
-                prev_i = - 1 - a[0]
+                prev_i = -1 - a[0]
                  prev_j = j - a[1]
                  if prev_i < -len(D) or prev_j < 0:
                      continue
-                p = D[prev_i][prev_j] + align_log_prob(i, j, source_sents_lens, 
-                                                       target_sents_lens, a, params)
+                p = D[prev_i][prev_j] + align_log_prob(
+                    i, j, source_sents_lens, target_sents_lens, a, params
+                )
                  if p < min_dist:
                      min_dist = p
                      min_align = a
  
-            if min_dist == float('inf'):
+            if min_dist == float("inf"):
                  min_dist = 0
  
              backlinks[(i, j)] = min_align
@@ -180,19 +201,19 @@ def align_blocks(source_sents_lens, target_sents_lens, params = LanguageIndepend
          if len(D) > 2:
              D.pop(0)
          D.append([])
-    
+
      return trace(backlinks, source_sents_lens, target_sents_lens)
  
  
-def align_texts(source_blocks, target_blocks, params = LanguageIndependent):
+def align_texts(source_blocks, target_blocks, params=LanguageIndependent):
      """Creates the sentence alignment of two texts.
  
-    Texts can consist of several blocks. Block boundaries cannot be crossed by sentence 
-    alignment links. 
+    Texts can consist of several blocks. Block boundaries cannot be crossed by sentence
+    alignment links.
  
      Each block consists of a list that contains the lengths (in characters) of the sentences
      in this block.
-    
+
      @param source_blocks: The list of blocks in the source text.
      @param target_blocks: The list of blocks in the target text.
      @param params: the sentence alignment parameters.
@@ -200,49 +221,46 @@ def align_texts(source_blocks, target_blocks, params = LanguageIndependent):
      @returns: A list of sentence alignment lists
      """
      if len(source_blocks) != len(target_blocks):
-        raise ValueError("Source and target texts do not have the same number of blocks.")
-    
-    return [align_blocks(source_block, target_block, params) 
-            for source_block, target_block in zip(source_blocks, target_blocks)]
+        raise ValueError(
+            "Source and target texts do not have the same number of blocks."
+        )
+
+    return [
+        align_blocks(source_block, target_block, params)
+        for source_block, target_block in zip(source_blocks, target_blocks)
+    ]
  
  
  # File I/O functions; may belong in a corpus reader
  
+
  def split_at(it, split_value):
-    """Splits an iterator C{it} at values of C{split_value}. 
+    """Splits an iterator C{it} at values of C{split_value}.
  
      Each instance of C{split_value} is swallowed. The iterator produces
      subiterators which need to be consumed fully before the next subiterator
      can be used.
      """
+
      def _chunk_iterator(first):
          v = first
          while v != split_value:
              yield v
              v = it.next()
-    
+
      while True:
          yield _chunk_iterator(it.next())
-        
+
  
  def parse_token_stream(stream, soft_delimiter, hard_delimiter):
-    """Parses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens) 
+    """Parses a stream of tokens and splits it into sentences (using C{soft_delimiter} tokens)
      and blocks (using C{hard_delimiter} tokens) for use with the L{align_texts} function.
      """
      return [
-        [sum(len(token) for token in sentence_it) 
-         for sentence_it in split_at(block_it, soft_delimiter)]
-        for block_it in split_at(stream, hard_delimiter)]
-
-
-
-
-#    Code for test files in nltk_contrib/align/data/*.tok
-#    import sys
-#    from contextlib import nested
-#    with nested(open(sys.argv[1], "r"), open(sys.argv[2], "r")) as (s, t):
-#        source = parse_token_stream((l.strip() for l in s), ".EOS", ".EOP")
-#        target = parse_token_stream((l.strip() for l in t), ".EOS", ".EOP")
-#        print align_texts(source, target)
-
+        [
+            sum(len(token) for token in sentence_it)
+            for sentence_it in split_at(block_it, soft_delimiter)
+        ]
+        for block_it in split_at(stream, hard_delimiter)
+    ]
  
diff --git a/nlp_resource_data/nltk/translate/gale_church.pyc b/nlp_resource_data/nltk/translate/gale_church.pyc

deleted file mode 100755 (executable)

index c90be27..0000000

Binary files a/nlp_resource_data/nltk/translate/gale_church.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/gdfa.py b/nlp_resource_data/nltk/translate/gdfa.py

old mode 100755 (executable)

new mode 100644 (file)

index e0e7f04..bc0e91b
--- a/nlp_resource_data/nltk/translate/gdfa.py
+++ b/nlp_resource_data/nltk/translate/gdfa.py
@@ -1,29 +1,29 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: GDFA word alignment symmetrization
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Liling Tan
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-import codecs
  from collections import defaultdict
  
+
  def grow_diag_final_and(srclen, trglen, e2f, f2e):
      """
      This module symmetrisatizes the source-to-target and target-to-source
      word alignment output and produces, aka. GDFA algorithm (Koehn, 2005).
-    
+
      Step 1: Find the intersection of the bidirectional alignment.
-    
+
      Step 2: Search for additional neighbor alignment points to be added, given
              these criteria: (i) neighbor alignments points are not in the
              intersection and (ii) neighbor alignments are in the union.
-            
+
      Step 3: Add all other alignment points thats not in the intersection, not in
              the neighboring alignments that met the criteria but in the original
              foward/backward alignment outputs.
-    
+
          >>> forw = ('0-0 2-1 9-2 21-3 10-4 7-5 11-6 9-7 12-8 1-9 3-10 '
          ...         '4-11 17-12 17-13 25-14 13-15 24-16 11-17 28-18')
          >>> back = ('0-0 1-9 2-9 3-10 4-11 5-12 6-6 7-5 8-6 9-7 10-4 '
@@ -39,17 +39,17 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e):
          >>> trglen = len(trgtext.split())
          >>>
          >>> gdfa = grow_diag_final_and(srclen, trglen, forw, back)
-        >>> gdfa == set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12),
+        >>> gdfa == sorted(set([(28, 18), (6, 6), (24, 17), (2, 1), (15, 12), (13, 12),
          ...         (2, 9), (3, 10), (26, 17), (25, 15), (8, 6), (9, 7), (20,
          ...         13), (18, 13), (0, 0), (10, 4), (13, 15), (23, 14), (7, 5),
          ...         (25, 14), (1, 9), (17, 13), (4, 11), (11, 17), (9, 2), (22,
          ...         12), (27, 18), (24, 16), (21, 3), (19, 12), (17, 12), (5,
-        ...         12), (11, 6), (12, 8)])
+        ...         12), (11, 6), (12, 8)]))
          True
-    
+
      References:
-    Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot. 
-    2005. Edinburgh System Description for the 2005 IWSLT Speech 
+    Koehn, P., A. Axelrod, A. Birch, C. Callison, M. Osborne, and D. Talbot.
+    2005. Edinburgh System Description for the 2005 IWSLT Speech
      Translation Evaluation. In MT Eval Workshop.
  
      :type srclen: int
@@ -67,19 +67,19 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e):
      """
  
      # Converts pharaoh text format into list of tuples.
-    e2f = [tuple(map(int,a.split('-'))) for a in e2f.split()]
-    f2e = [tuple(map(int,a.split('-'))) for a in f2e.split()]
-    
-    neighbors = [(-1,0),(0,-1),(1,0),(0,1),(-1,-1),(-1,1),(1,-1),(1,1)]
-    alignment = set(e2f).intersection(set(f2e)) # Find the intersection.
+    e2f = [tuple(map(int, a.split("-"))) for a in e2f.split()]
+    f2e = [tuple(map(int, a.split("-"))) for a in f2e.split()]
+
+    neighbors = [(-1, 0), (0, -1), (1, 0), (0, 1), (-1, -1), (-1, 1), (1, -1), (1, 1)]
+    alignment = set(e2f).intersection(set(f2e))  # Find the intersection.
      union = set(e2f).union(set(f2e))
  
      # *aligned* is used to check if neighbors are aligned in grow_diag()
      aligned = defaultdict(set)
-    for i,j in alignment:
-        aligned['e'].add(i)
-        aligned['j'].add(j)
-    
+    for i, j in alignment:
+        aligned["e"].add(i)
+        aligned["f"].add(j)
+
      def grow_diag():
          """
          Search for the neighbor points and them to the intersected alignment
@@ -88,44 +88,52 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e):
          prev_len = len(alignment) - 1
          # iterate until no new points added
          while prev_len < len(alignment):
+            no_new_points = True
              # for english word e = 0 ... en
              for e in range(srclen):
                  # for foreign word f = 0 ... fn
-                for f in range(trglen): 
+                for f in range(trglen):
                      # if ( e aligned with f)
-                    if (e,f) in alignment:
+                    if (e, f) in alignment:
                          # for each neighboring point (e-new, f-new)
                          for neighbor in neighbors:
-                            neighbor = tuple(i+j for i,j in zip((e,f),neighbor))
+                            neighbor = tuple(i + j for i, j in zip((e, f), neighbor))
                              e_new, f_new = neighbor
-                            # if ( ( e-new not aligned and f-new not aligned) 
+                            # if ( ( e-new not aligned and f-new not aligned)
                              # and (e-new, f-new in union(e2f, f2e) )
-                            if (e_new not in aligned and f_new not in aligned)\
-                            and neighbor in union:
+                            if (
+                                e_new not in aligned and f_new not in aligned
+                            ) and neighbor in union:
                                  alignment.add(neighbor)
-                                aligned['e'].add(e_new); aligned['f'].add(f_new)
-                                prev_len+=1
-                                                                    
+                                aligned["e"].add(e_new)
+                                aligned["f"].add(f_new)
+                                prev_len += 1
+                                no_new_points = False
+            # iterate until no new points added
+            if no_new_points:
+                break
+
      def final_and(a):
          """
-        Adds remaining points that are not in the intersection, not in the 
+        Adds remaining points that are not in the intersection, not in the
          neighboring alignments but in the original *e2f* and *f2e* alignments
          """
          # for english word e = 0 ... en
          for e_new in range(srclen):
              # for foreign word f = 0 ... fn
              for f_new in range(trglen):
-                # if ( ( e-new not aligned and f-new not aligned) 
+                # if ( ( e-new not aligned and f-new not aligned)
                  # and (e-new, f-new in union(e2f, f2e) )
-                if (e_new not in aligned
+                if (
+                    e_new not in aligned
                      and f_new not in aligned
-                    and (e_new, f_new) in a):
-
+                    and (e_new, f_new) in union
+                ):
                      alignment.add((e_new, f_new))
-                    aligned['e'].add(e_new); aligned['f'].add(f_new)
+                    aligned["e"].add(e_new)
+                    aligned["f"].add(f_new)
  
      grow_diag()
      final_and(e2f)
      final_and(f2e)
-    return alignment
-
+    return sorted(alignment)
diff --git a/nlp_resource_data/nltk/translate/gdfa.pyc b/nlp_resource_data/nltk/translate/gdfa.pyc

deleted file mode 100755 (executable)

index 4015011..0000000

Binary files a/nlp_resource_data/nltk/translate/gdfa.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/gleu_score.py b/nlp_resource_data/nltk/translate/gleu_score.py

old mode 100755 (executable)

new mode 100644 (file)

index e73be4e..9fe7214
--- a/nlp_resource_data/nltk/translate/gleu_score.py
+++ b/nlp_resource_data/nltk/translate/gleu_score.py
@@ -1,14 +1,14 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: GLEU Score
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors:
  # Contributors: Mike Schuster, Michael Wayne Goodman, Liling Tan
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  """ GLEU score implementation. """
-from __future__ import division
+
  from collections import Counter
  
  from nltk.util import ngrams, everygrams
@@ -81,12 +81,8 @@ def sentence_gleu(references, hypothesis, min_len=1, max_len=4):
      :return: the sentence level GLEU score.
      :rtype: float
      """
-    return corpus_gleu(
-        [references],
-        [hypothesis],
-        min_len=min_len,
-        max_len=max_len
-    )
+    return corpus_gleu([references], [hypothesis], min_len=min_len, max_len=max_len)
+
  
  def corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4):
      """
@@ -148,7 +144,9 @@ def corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4):
      :rtype: float
      """
      # sanity check
-    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"
+    assert len(list_of_references) == len(
+        hypotheses
+    ), "The number of hypotheses and their reference(s) should be the same"
  
      # sum matches and max-token-lengths over all sentences
      corpus_n_match = 0
@@ -157,7 +155,7 @@ def corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4):
      for references, hypothesis in zip(list_of_references, hypotheses):
          hyp_ngrams = Counter(everygrams(hypothesis, min_len, max_len))
          tpfp = sum(hyp_ngrams.values())  # True positives + False positives.
-        
+
          hyp_counts = []
          for reference in references:
              ref_ngrams = Counter(everygrams(reference, min_len, max_len))
@@ -180,7 +178,7 @@ def corpus_gleu(list_of_references, hypotheses, min_len=1, max_len=4):
  
          # use the reference yielding the highest score
          if hyp_counts:
-            n_match, n_all = max(hyp_counts, key=lambda hc: hc[0]/hc[1])
+            n_match, n_all = max(hyp_counts, key=lambda hc: hc[0] / hc[1])
              corpus_n_match += n_match
              corpus_n_all += n_all
  
diff --git a/nlp_resource_data/nltk/translate/gleu_score.pyc b/nlp_resource_data/nltk/translate/gleu_score.pyc

deleted file mode 100755 (executable)

index 3e4710f..0000000

Binary files a/nlp_resource_data/nltk/translate/gleu_score.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ibm1.py b/nlp_resource_data/nltk/translate/ibm1.py

old mode 100755 (executable)

new mode 100644 (file)

index 35e0420..013f5e4
--- a/nlp_resource_data/nltk/translate/ibm1.py
+++ b/nlp_resource_data/nltk/translate/ibm1.py
@@ -15,22 +15,25 @@
  """
  Lexical translation model that ignores word order.
  
-In IBM Model 1, word order is ignored for simplicity. Thus, the
-following three alignments are equally likely. As long as the word
-alignments are equivalent, it doesn't matter where the word
-occurs in the source or target sentence.
+In IBM Model 1, word order is ignored for simplicity. As long as the
+word alignments are equivalent, it doesn't matter where the word occurs
+in the source or target sentence. Thus, the following three alignments
+are equally likely.
  
  Source: je mange du jambon
  Target: i eat some ham
-Alignment: (1,1) (2,2) (3,3) (4,4)
+Alignment: (0,0) (1,1) (2,2) (3,3)
  
  Source: je mange du jambon
  Target: some ham eat i
-Alignment: (1,4) (2,3) (3,1) (4,2)
+Alignment: (0,2) (1,3) (2,1) (3,1)
  
  Source: du jambon je mange
  Target: eat i some ham
-Alignment: (1,3) (2,4) (3,2) (4,1)
+Alignment: (0,3) (1,2) (2,0) (3,1)
+
+Note that an alignment is represented here as
+(word_index_in_target, word_index_in_source).
  
  The EM algorithm used in Model 1 is:
  E step - In the training data, count how many times a source language
@@ -60,7 +63,6 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2),
  263-311.
  """
  
-from __future__ import division
  from collections import defaultdict
  from nltk.translate import AlignedSent
  from nltk.translate import Alignment
@@ -102,8 +104,7 @@ class IBMModel1(IBMModel):
  
      """
  
-    def __init__(self, sentence_aligned_corpus, iterations,
-                 probability_tables=None):
+    def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None):
          """
          Train on ``sentence_aligned_corpus`` and create a lexical
          translation model.
@@ -131,19 +132,22 @@ class IBMModel1(IBMModel):
              self.set_uniform_probabilities(sentence_aligned_corpus)
          else:
              # Set user-defined probabilities
-            self.translation_table = probability_tables['translation_table']
+            self.translation_table = probability_tables["translation_table"]
  
          for n in range(0, iterations):
              self.train(sentence_aligned_corpus)
  
-        self.__align_all(sentence_aligned_corpus)
+        self.align_all(sentence_aligned_corpus)
  
      def set_uniform_probabilities(self, sentence_aligned_corpus):
          initial_prob = 1 / len(self.trg_vocab)
          if initial_prob < IBMModel.MIN_PROB:
-            warnings.warn("Target language vocabulary is too large (" +
-                          str(len(self.trg_vocab)) + " words). "
-                          "Results may be less accurate.")
+            warnings.warn(
+                "Target language vocabulary is too large ("
+                + str(len(self.trg_vocab))
+                + " words). "
+                "Results may be less accurate."
+            )
  
          for t in self.trg_vocab:
              self.translation_table[t] = defaultdict(lambda: initial_prob)
@@ -211,11 +215,11 @@ class IBMModel1(IBMModel):
  
          return max(prob, IBMModel.MIN_PROB)
  
-    def __align_all(self, parallel_corpus):
+    def align_all(self, parallel_corpus):
          for sentence_pair in parallel_corpus:
-            self.__align(sentence_pair)
+            self.align(sentence_pair)
  
-    def __align(self, sentence_pair):
+    def align(self, sentence_pair):
          """
          Determines the best word alignment for one sentence pair from
          the corpus that the model was trained on.
@@ -233,8 +237,7 @@ class IBMModel1(IBMModel):
  
          for j, trg_word in enumerate(sentence_pair.words):
              # Initialize trg_word to align with the NULL token
-            best_prob = max(self.translation_table[trg_word][None],
-                            IBMModel.MIN_PROB)
+            best_prob = max(self.translation_table[trg_word][None], IBMModel.MIN_PROB)
              best_alignment_point = None
              for i, src_word in enumerate(sentence_pair.mots):
                  align_prob = self.translation_table[trg_word][src_word]
diff --git a/nlp_resource_data/nltk/translate/ibm1.pyc b/nlp_resource_data/nltk/translate/ibm1.pyc

deleted file mode 100755 (executable)

index a72258f..0000000

Binary files a/nlp_resource_data/nltk/translate/ibm1.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ibm2.py b/nlp_resource_data/nltk/translate/ibm2.py

old mode 100755 (executable)

new mode 100644 (file)

index f2c17a7..a806d41
--- a/nlp_resource_data/nltk/translate/ibm2.py
+++ b/nlp_resource_data/nltk/translate/ibm2.py
@@ -46,14 +46,14 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2),
  263-311.
  """
  
-from __future__ import division
+import warnings
  from collections import defaultdict
+
  from nltk.translate import AlignedSent
  from nltk.translate import Alignment
  from nltk.translate import IBMModel
  from nltk.translate import IBMModel1
  from nltk.translate.ibm_model import Counts
-import warnings
  
  
  class IBMModel2(IBMModel):
@@ -96,8 +96,7 @@ class IBMModel2(IBMModel):
  
      """
  
-    def __init__(self, sentence_aligned_corpus, iterations,
-                 probability_tables=None):
+    def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None):
          """
          Train on ``sentence_aligned_corpus`` and create a lexical
          translation model and an alignment model.
@@ -130,13 +129,13 @@ class IBMModel2(IBMModel):
              self.set_uniform_probabilities(sentence_aligned_corpus)
          else:
              # Set user-defined probabilities
-            self.translation_table = probability_tables['translation_table']
-            self.alignment_table = probability_tables['alignment_table']
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
  
          for n in range(0, iterations):
              self.train(sentence_aligned_corpus)
  
-        self.__align_all(sentence_aligned_corpus)
+        self.align_all(sentence_aligned_corpus)
  
      def set_uniform_probabilities(self, sentence_aligned_corpus):
          # a(i | j,l,m) = 1 / (l+1) for all i, j, l, m
@@ -148,8 +147,11 @@ class IBMModel2(IBMModel):
                  l_m_combinations.add((l, m))
                  initial_prob = 1 / (l + 1)
                  if initial_prob < IBMModel.MIN_PROB:
-                    warnings.warn("A source sentence is too long (" + str(l) +
-                                  " words). Results may be less accurate.")
+                    warnings.warn(
+                        "A source sentence is too long ("
+                        + str(l)
+                        + " words). Results may be less accurate."
+                    )
  
                  for i in range(0, l + 1):
                      for j in range(1, m + 1):
@@ -159,7 +161,7 @@ class IBMModel2(IBMModel):
          counts = Model2Counts()
          for aligned_sentence in parallel_corpus:
              src_sentence = [None] + aligned_sentence.mots
-            trg_sentence = ['UNUSED'] + aligned_sentence.words  # 1-indexed
+            trg_sentence = ["UNUSED"] + aligned_sentence.words  # 1-indexed
              l = len(aligned_sentence.mots)
              m = len(aligned_sentence.words)
  
@@ -171,8 +173,7 @@ class IBMModel2(IBMModel):
                  t = trg_sentence[j]
                  for i in range(0, l + 1):
                      s = src_sentence[i]
-                    count = self.prob_alignment_point(
-                        i, j, src_sentence, trg_sentence)
+                    count = self.prob_alignment_point(i, j, src_sentence, trg_sentence)
                      normalized_count = count / total_count[t]
  
                      counts.update_lexical_translation(normalized_count, s, t)
@@ -188,10 +189,11 @@ class IBMModel2(IBMModel):
              for j, src_sentence_lengths in j_s.items():
                  for l, trg_sentence_lengths in src_sentence_lengths.items():
                      for m in trg_sentence_lengths:
-                        estimate = (counts.alignment[i][j][l][m] /
-                                    counts.alignment_for_any_i[j][l][m])
-                        self.alignment_table[i][j][l][m] = max(estimate,
-                                                               MIN_PROB)
+                        estimate = (
+                            counts.alignment[i][j][l][m]
+                            / counts.alignment_for_any_i[j][l][m]
+                        )
+                        self.alignment_table[i][j][l][m] = max(estimate, MIN_PROB)
  
      def prob_all_alignments(self, src_sentence, trg_sentence):
          """
@@ -212,7 +214,8 @@ class IBMModel2(IBMModel):
              t = trg_sentence[j]
              for i in range(0, len(src_sentence)):
                  alignment_prob_for_t[t] += self.prob_alignment_point(
-                    i, j, src_sentence, trg_sentence)
+                    i, j, src_sentence, trg_sentence
+                )
          return alignment_prob_for_t
  
      def prob_alignment_point(self, i, j, src_sentence, trg_sentence):
@@ -240,16 +243,18 @@ class IBMModel2(IBMModel):
                  continue  # skip the dummy zeroeth element
              trg_word = alignment_info.trg_sentence[j]
              src_word = alignment_info.src_sentence[i]
-            prob *= (self.translation_table[trg_word][src_word] *
-                     self.alignment_table[i][j][l][m])
+            prob *= (
+                self.translation_table[trg_word][src_word]
+                * self.alignment_table[i][j][l][m]
+            )
  
          return max(prob, IBMModel.MIN_PROB)
  
-    def __align_all(self, parallel_corpus):
+    def align_all(self, parallel_corpus):
          for sentence_pair in parallel_corpus:
-            self.__align(sentence_pair)
+            self.align(sentence_pair)
  
-    def __align(self, sentence_pair):
+    def align(self, sentence_pair):
          """
          Determines the best word alignment for one sentence pair from
          the corpus that the model was trained on.
@@ -270,13 +275,17 @@ class IBMModel2(IBMModel):
  
          for j, trg_word in enumerate(sentence_pair.words):
              # Initialize trg_word to align with the NULL token
-            best_prob = (self.translation_table[trg_word][None] *
-                         self.alignment_table[0][j + 1][l][m])
+            best_prob = (
+                self.translation_table[trg_word][None]
+                * self.alignment_table[0][j + 1][l][m]
+            )
              best_prob = max(best_prob, IBMModel.MIN_PROB)
              best_alignment_point = None
              for i, src_word in enumerate(sentence_pair.mots):
-                align_prob = (self.translation_table[trg_word][src_word] *
-                              self.alignment_table[i + 1][j + 1][l][m])
+                align_prob = (
+                    self.translation_table[trg_word][src_word]
+                    * self.alignment_table[i + 1][j + 1][l][m]
+                )
                  if align_prob >= best_prob:
                      best_prob = align_prob
                      best_alignment_point = i
@@ -291,13 +300,15 @@ class Model2Counts(Counts):
      Data object to store counts of various parameters during training.
      Includes counts for alignment.
      """
+
      def __init__(self):
          super(Model2Counts, self).__init__()
          self.alignment = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
-                lambda: 0.0))))
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+        )
          self.alignment_for_any_i = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+            lambda: defaultdict(lambda: defaultdict(lambda: 0.0))
+        )
  
      def update_lexical_translation(self, count, s, t):
          self.t_given_s[t][s] += count
diff --git a/nlp_resource_data/nltk/translate/ibm2.pyc b/nlp_resource_data/nltk/translate/ibm2.pyc

deleted file mode 100755 (executable)

index df9809f..0000000

Binary files a/nlp_resource_data/nltk/translate/ibm2.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ibm3.py b/nlp_resource_data/nltk/translate/ibm3.py

old mode 100755 (executable)

new mode 100644 (file)

index 8af6059..ed491f9
--- a/nlp_resource_data/nltk/translate/ibm3.py
+++ b/nlp_resource_data/nltk/translate/ibm3.py
@@ -73,15 +73,15 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2),
  263-311.
  """
  
-from __future__ import division
+import warnings
  from collections import defaultdict
  from math import factorial
+
  from nltk.translate import AlignedSent
  from nltk.translate import Alignment
  from nltk.translate import IBMModel
  from nltk.translate import IBMModel2
  from nltk.translate.ibm_model import Counts
-import warnings
  
  
  class IBMModel3(IBMModel):
@@ -134,8 +134,7 @@ class IBMModel3(IBMModel):
  
      """
  
-    def __init__(self, sentence_aligned_corpus, iterations,
-                 probability_tables=None):
+    def __init__(self, sentence_aligned_corpus, iterations, probability_tables=None):
          """
          Train on ``sentence_aligned_corpus`` and create a lexical
          translation model, a distortion model, a fertility model, and a
@@ -170,11 +169,11 @@ class IBMModel3(IBMModel):
              self.set_uniform_probabilities(sentence_aligned_corpus)
          else:
              # Set user-defined probabilities
-            self.translation_table = probability_tables['translation_table']
-            self.alignment_table = probability_tables['alignment_table']
-            self.fertility_table = probability_tables['fertility_table']
-            self.p1 = probability_tables['p1']
-            self.distortion_table = probability_tables['distortion_table']
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+            self.fertility_table = probability_tables["fertility_table"]
+            self.p1 = probability_tables["p1"]
+            self.distortion_table = probability_tables["distortion_table"]
  
          for n in range(0, iterations):
              self.train(sentence_aligned_corpus)
@@ -182,8 +181,10 @@ class IBMModel3(IBMModel):
      def reset_probabilities(self):
          super(IBMModel3, self).reset_probabilities()
          self.distortion_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
-                lambda: self.MIN_PROB))))
+            lambda: defaultdict(
+                lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+            )
+        )
          """
          dict[int][int][int][int]: float. Probability(j | i,l,m).
          Values accessed as ``distortion_table[j][i][l][m]``.
@@ -199,8 +200,11 @@ class IBMModel3(IBMModel):
                  l_m_combinations.add((l, m))
                  initial_prob = 1 / m
                  if initial_prob < IBMModel.MIN_PROB:
-                    warnings.warn("A target sentence is too long (" + str(m) +
-                                  " words). Results may be less accurate.")
+                    warnings.warn(
+                        "A target sentence is too long ("
+                        + str(m)
+                        + " words). Results may be less accurate."
+                    )
                  for j in range(1, m + 1):
                      for i in range(0, l + 1):
                          self.distortion_table[j][i][l][m] = initial_prob
@@ -227,7 +231,8 @@ class IBMModel3(IBMModel):
              sampled_alignments, best_alignment = self.sample(aligned_sentence)
              # Record the most probable alignment
              aligned_sentence.alignment = Alignment(
-                best_alignment.zero_indexed_alignment())
+                best_alignment.zero_indexed_alignment()
+            )
  
              # E step (a): Compute normalization factors to weigh counts
              total_count = self.prob_of_alignments(sampled_alignments)
@@ -239,9 +244,9 @@ class IBMModel3(IBMModel):
  
                  for j in range(1, m + 1):
                      counts.update_lexical_translation(
-                        normalized_count, alignment_info, j)
-                    counts.update_distortion(
-                        normalized_count, alignment_info, j, l, m)
+                        normalized_count, alignment_info, j
+                    )
+                    counts.update_distortion(normalized_count, alignment_info, j, l, m)
  
                  counts.update_null_generation(normalized_count, alignment_info)
                  counts.update_fertility(normalized_count, alignment_info)
@@ -263,10 +268,11 @@ class IBMModel3(IBMModel):
              for i, src_sentence_lengths in i_s.items():
                  for l, trg_sentence_lengths in src_sentence_lengths.items():
                      for m in trg_sentence_lengths:
-                        estimate = (counts.distortion[j][i][l][m] /
-                                    counts.distortion_for_any_j[i][l][m])
-                        self.distortion_table[j][i][l][m] = max(estimate,
-                                                                MIN_PROB)
+                        estimate = (
+                            counts.distortion[j][i][l][m]
+                            / counts.distortion_for_any_j[i][l][m]
+                        )
+                        self.distortion_table[j][i][l][m] = max(estimate, MIN_PROB)
  
      def prob_t_a_given_s(self, alignment_info):
          """
@@ -285,8 +291,7 @@ class IBMModel3(IBMModel):
  
          # Combine NULL insertion probability
          null_fertility = alignment_info.fertility_of_i(0)
-        probability *= (pow(p1, null_fertility) *
-                        pow(p0, m - 2 * null_fertility))
+        probability *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility)
          if probability < MIN_PROB:
              return MIN_PROB
  
@@ -299,8 +304,9 @@ class IBMModel3(IBMModel):
          # Combine fertility probabilities
          for i in range(1, l + 1):
              fertility = alignment_info.fertility_of_i(i)
-            probability *= (factorial(fertility) *
-                self.fertility_table[fertility][src_sentence[i]])
+            probability *= (
+                factorial(fertility) * self.fertility_table[fertility][src_sentence[i]]
+            )
              if probability < MIN_PROB:
                  return MIN_PROB
  
@@ -310,8 +316,9 @@ class IBMModel3(IBMModel):
              i = alignment_info.alignment[j]
              s = src_sentence[i]
  
-            probability *= (self.translation_table[t][s] *
-                self.distortion_table[j][i][l][m])
+            probability *= (
+                self.translation_table[t][s] * self.distortion_table[j][i][l][m]
+            )
              if probability < MIN_PROB:
                  return MIN_PROB
  
@@ -323,13 +330,15 @@ class Model3Counts(Counts):
      Data object to store counts of various parameters during training.
      Includes counts for distortion.
      """
+
      def __init__(self):
          super(Model3Counts, self).__init__()
          self.distortion = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
-                lambda: 0.0))))
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+        )
          self.distortion_for_any_j = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
+            lambda: defaultdict(lambda: defaultdict(lambda: 0.0))
+        )
  
      def update_distortion(self, count, alignment_info, j, l, m):
          i = alignment_info.alignment[j]
diff --git a/nlp_resource_data/nltk/translate/ibm3.pyc b/nlp_resource_data/nltk/translate/ibm3.pyc

deleted file mode 100755 (executable)

index 073a93a..0000000

Binary files a/nlp_resource_data/nltk/translate/ibm3.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ibm4.py b/nlp_resource_data/nltk/translate/ibm4.py

old mode 100755 (executable)

new mode 100644 (file)

index bb8d913..fc6c295
--- a/nlp_resource_data/nltk/translate/ibm4.py
+++ b/nlp_resource_data/nltk/translate/ibm4.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: IBM Model 4
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tah Wei Hoon <hoon.tw@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -101,16 +101,16 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2),
  263-311.
  """
  
-from __future__ import division
+import warnings
  from collections import defaultdict
  from math import factorial
+
  from nltk.translate import AlignedSent
  from nltk.translate import Alignment
  from nltk.translate import IBMModel
  from nltk.translate import IBMModel3
  from nltk.translate.ibm_model import Counts
  from nltk.translate.ibm_model import longest_target_sentence_length
-import warnings
  
  
  class IBMModel4(IBMModel):
@@ -165,9 +165,14 @@ class IBMModel4(IBMModel):
  
      """
  
-    def __init__(self, sentence_aligned_corpus, iterations,
-                 source_word_classes, target_word_classes,
-                 probability_tables=None):
+    def __init__(
+        self,
+        sentence_aligned_corpus,
+        iterations,
+        source_word_classes,
+        target_word_classes,
+        probability_tables=None,
+    ):
          """
          Train on ``sentence_aligned_corpus`` and create a lexical
          translation model, distortion models, a fertility model, and a
@@ -215,14 +220,14 @@ class IBMModel4(IBMModel):
              self.set_uniform_probabilities(sentence_aligned_corpus)
          else:
              # Set user-defined probabilities
-            self.translation_table = probability_tables['translation_table']
-            self.alignment_table = probability_tables['alignment_table']
-            self.fertility_table = probability_tables['fertility_table']
-            self.p1 = probability_tables['p1']
-            self.head_distortion_table = probability_tables[
-                'head_distortion_table']
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+            self.fertility_table = probability_tables["fertility_table"]
+            self.p1 = probability_tables["p1"]
+            self.head_distortion_table = probability_tables["head_distortion_table"]
              self.non_head_distortion_table = probability_tables[
-                'non_head_distortion_table']
+                "non_head_distortion_table"
+            ]
  
          for n in range(0, iterations):
              self.train(sentence_aligned_corpus)
@@ -230,7 +235,8 @@ class IBMModel4(IBMModel):
      def reset_probabilities(self):
          super(IBMModel4, self).reset_probabilities()
          self.head_distortion_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)))
+            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        )
          """
          dict[int][int][int]: float. Probability(displacement of head
          word | word class of previous cept,target word class).
@@ -238,7 +244,8 @@ class IBMModel4(IBMModel):
          """
  
          self.non_head_distortion_table = defaultdict(
-            lambda: defaultdict(lambda: self.MIN_PROB))
+            lambda: defaultdict(lambda: self.MIN_PROB)
+        )
          """
          dict[int][int]: float. Probability(displacement of non-head
          word | target word class).
@@ -263,18 +270,21 @@ class IBMModel4(IBMModel):
          else:
              initial_prob = 1 / (2 * (max_m - 1))
          if initial_prob < IBMModel.MIN_PROB:
-            warnings.warn("A target sentence is too long (" + str(max_m) +
-                          " words). Results may be less accurate.")
+            warnings.warn(
+                "A target sentence is too long ("
+                + str(max_m)
+                + " words). Results may be less accurate."
+            )
  
          for dj in range(1, max_m):
              self.head_distortion_table[dj] = defaultdict(
-                lambda: defaultdict(lambda: initial_prob))
+                lambda: defaultdict(lambda: initial_prob)
+            )
              self.head_distortion_table[-dj] = defaultdict(
-                lambda: defaultdict(lambda: initial_prob))
-            self.non_head_distortion_table[dj] = defaultdict(
-                lambda: initial_prob)
-            self.non_head_distortion_table[-dj] = defaultdict(
-                lambda: initial_prob)
+                lambda: defaultdict(lambda: initial_prob)
+            )
+            self.non_head_distortion_table[dj] = defaultdict(lambda: initial_prob)
+            self.non_head_distortion_table[-dj] = defaultdict(lambda: initial_prob)
  
      def train(self, parallel_corpus):
          counts = Model4Counts()
@@ -285,7 +295,8 @@ class IBMModel4(IBMModel):
              sampled_alignments, best_alignment = self.sample(aligned_sentence)
              # Record the most probable alignment
              aligned_sentence.alignment = Alignment(
-                best_alignment.zero_indexed_alignment())
+                best_alignment.zero_indexed_alignment()
+            )
  
              # E step (a): Compute normalization factors to weigh counts
              total_count = self.prob_of_alignments(sampled_alignments)
@@ -297,10 +308,15 @@ class IBMModel4(IBMModel):
  
                  for j in range(1, m + 1):
                      counts.update_lexical_translation(
-                        normalized_count, alignment_info, j)
+                        normalized_count, alignment_info, j
+                    )
                      counts.update_distortion(
-                        normalized_count, alignment_info, j,
-                        self.src_classes, self.trg_classes)
+                        normalized_count,
+                        alignment_info,
+                        j,
+                        self.src_classes,
+                        self.trg_classes,
+                    )
  
                  counts.update_null_generation(normalized_count, alignment_info)
                  counts.update_fertility(normalized_count, alignment_info)
@@ -321,16 +337,19 @@ class IBMModel4(IBMModel):
          for dj, src_classes in counts.head_distortion.items():
              for s_cls, trg_classes in src_classes.items():
                  for t_cls in trg_classes:
-                    estimate = (counts.head_distortion[dj][s_cls][t_cls] /
-                                counts.head_distortion_for_any_dj[s_cls][t_cls])
-                    head_d_table[dj][s_cls][t_cls] = max(estimate,
-                                                         IBMModel.MIN_PROB)
+                    estimate = (
+                        counts.head_distortion[dj][s_cls][t_cls]
+                        / counts.head_distortion_for_any_dj[s_cls][t_cls]
+                    )
+                    head_d_table[dj][s_cls][t_cls] = max(estimate, IBMModel.MIN_PROB)
  
          non_head_d_table = self.non_head_distortion_table
          for dj, trg_classes in counts.non_head_distortion.items():
              for t_cls in trg_classes:
-                estimate = (counts.non_head_distortion[dj][t_cls] /
-                            counts.non_head_distortion_for_any_dj[t_cls])
+                estimate = (
+                    counts.non_head_distortion[dj][t_cls]
+                    / counts.non_head_distortion_for_any_dj[t_cls]
+                )
                  non_head_d_table[dj][t_cls] = max(estimate, IBMModel.MIN_PROB)
  
      def prob_t_a_given_s(self, alignment_info):
@@ -352,7 +371,7 @@ class IBMModel4(IBMModel):
              p0 = 1 - p1
              null_fertility = alignment_info.fertility_of_i(0)
              m = len(alignment_info.trg_sentence) - 1
-            value *= (pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility))
+            value *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility)
              if value < MIN_PROB:
                  return MIN_PROB
  
@@ -366,8 +385,10 @@ class IBMModel4(IBMModel):
              src_sentence = alignment_info.src_sentence
              for i in range(1, len(src_sentence)):
                  fertility = alignment_info.fertility_of_i(i)
-                value *= (factorial(fertility) *
-                          ibm_model.fertility_table[fertility][src_sentence[i]])
+                value *= (
+                    factorial(fertility)
+                    * ibm_model.fertility_table[fertility][src_sentence[i]]
+                )
                  if value < MIN_PROB:
                      return MIN_PROB
              return value
@@ -400,6 +421,7 @@ class IBMModel4(IBMModel):
              trg_class = ibm_model.trg_classes[t]
              dj = j - previous_position
              return ibm_model.non_head_distortion_table[dj][trg_class]
+
          # end nested functions
  
          # Abort computation whenever probability falls below MIN_PROB at
@@ -429,18 +451,17 @@ class Model4Counts(Counts):
      Data object to store counts of various parameters during training.
      Includes counts for distortion.
      """
+
      def __init__(self):
          super(Model4Counts, self).__init__()
          self.head_distortion = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
-        self.head_distortion_for_any_dj = defaultdict(
-            lambda: defaultdict(lambda: 0.0))
-        self.non_head_distortion = defaultdict(
-            lambda: defaultdict(lambda: 0.0))
+            lambda: defaultdict(lambda: defaultdict(lambda: 0.0))
+        )
+        self.head_distortion_for_any_dj = defaultdict(lambda: defaultdict(lambda: 0.0))
+        self.non_head_distortion = defaultdict(lambda: defaultdict(lambda: 0.0))
          self.non_head_distortion_for_any_dj = defaultdict(lambda: 0.0)
  
-    def update_distortion(self, count, alignment_info, j,
-                          src_classes, trg_classes):
+    def update_distortion(self, count, alignment_info, j, src_classes, trg_classes):
          i = alignment_info.alignment[j]
          t = alignment_info.trg_sentence[j]
          if i == 0:
diff --git a/nlp_resource_data/nltk/translate/ibm4.pyc b/nlp_resource_data/nltk/translate/ibm4.pyc

deleted file mode 100755 (executable)

index 144ecf1..0000000

Binary files a/nlp_resource_data/nltk/translate/ibm4.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ibm5.py b/nlp_resource_data/nltk/translate/ibm5.py

old mode 100755 (executable)

new mode 100644 (file)

index df34afc..88a64f2
--- a/nlp_resource_data/nltk/translate/ibm5.py
+++ b/nlp_resource_data/nltk/translate/ibm5.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: IBM Model 5
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tah Wei Hoon <hoon.tw@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -111,16 +111,16 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2),
  263-311.
  """
  
-from __future__ import division
+import warnings
  from collections import defaultdict
  from math import factorial
+
  from nltk.translate import AlignedSent
  from nltk.translate import Alignment
  from nltk.translate import IBMModel
  from nltk.translate import IBMModel4
  from nltk.translate.ibm_model import Counts
  from nltk.translate.ibm_model import longest_target_sentence_length
-import warnings
  
  
  class IBMModel5(IBMModel):
@@ -167,14 +167,20 @@ class IBMModel5(IBMModel):
      Alignment([(0, 0), (1, 1), (2, 2), (3, None), (4, 3)])
  
      """
+
      MIN_SCORE_FACTOR = 0.2
      """
      Alignments with scores below this factor are pruned during sampling
      """
  
-    def __init__(self, sentence_aligned_corpus, iterations,
-                 source_word_classes, target_word_classes,
-                 probability_tables=None):
+    def __init__(
+        self,
+        sentence_aligned_corpus,
+        iterations,
+        source_word_classes,
+        target_word_classes,
+        probability_tables=None,
+    ):
          """
          Train on ``sentence_aligned_corpus`` and create a lexical
          translation model, vacancy models, a fertility model, and a
@@ -215,8 +221,12 @@ class IBMModel5(IBMModel):
  
          if probability_tables is None:
              # Get probabilities from IBM model 4
-            ibm4 = IBMModel4(sentence_aligned_corpus, iterations,
-                             source_word_classes, target_word_classes)
+            ibm4 = IBMModel4(
+                sentence_aligned_corpus,
+                iterations,
+                source_word_classes,
+                target_word_classes,
+            )
              self.translation_table = ibm4.translation_table
              self.alignment_table = ibm4.alignment_table
              self.fertility_table = ibm4.fertility_table
@@ -226,18 +236,16 @@ class IBMModel5(IBMModel):
              self.set_uniform_probabilities(sentence_aligned_corpus)
          else:
              # Set user-defined probabilities
-            self.translation_table = probability_tables['translation_table']
-            self.alignment_table = probability_tables['alignment_table']
-            self.fertility_table = probability_tables['fertility_table']
-            self.p1 = probability_tables['p1']
-            self.head_distortion_table = probability_tables[
-                'head_distortion_table']
+            self.translation_table = probability_tables["translation_table"]
+            self.alignment_table = probability_tables["alignment_table"]
+            self.fertility_table = probability_tables["fertility_table"]
+            self.p1 = probability_tables["p1"]
+            self.head_distortion_table = probability_tables["head_distortion_table"]
              self.non_head_distortion_table = probability_tables[
-                'non_head_distortion_table']
-            self.head_vacancy_table = probability_tables[
-                'head_vacancy_table']
-            self.non_head_vacancy_table = probability_tables[
-                'non_head_vacancy_table']
+                "non_head_distortion_table"
+            ]
+            self.head_vacancy_table = probability_tables["head_vacancy_table"]
+            self.non_head_vacancy_table = probability_tables["non_head_vacancy_table"]
  
          for n in range(0, iterations):
              self.train(sentence_aligned_corpus)
@@ -245,7 +253,8 @@ class IBMModel5(IBMModel):
      def reset_probabilities(self):
          super(IBMModel5, self).reset_probabilities()
          self.head_vacancy_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)))
+            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        )
          """
          dict[int][int][int]: float. Probability(vacancy difference |
          number of remaining valid positions,target word class).
@@ -253,7 +262,8 @@ class IBMModel5(IBMModel):
          """
  
          self.non_head_vacancy_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB)))
+            lambda: defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
+        )
          """
          dict[int][int][int]: float. Probability(vacancy difference |
          number of remaining valid positions,target word class).
@@ -276,20 +286,25 @@ class IBMModel5(IBMModel):
          # Thus, the number of possible vacancy difference values is
          # (max_v) - (1-max_v) + 1 = 2 * max_v.
          if max_m > 0 and (1 / (2 * max_m)) < IBMModel.MIN_PROB:
-            warnings.warn("A target sentence is too long (" + str(max_m) +
-                          " words). Results may be less accurate.")
+            warnings.warn(
+                "A target sentence is too long ("
+                + str(max_m)
+                + " words). Results may be less accurate."
+            )
  
          for max_v in range(1, max_m + 1):
              for dv in range(1, max_m + 1):
                  initial_prob = 1 / (2 * max_v)
-                self.head_vacancy_table[dv][max_v] = defaultdict(
-                    lambda: initial_prob)
-                self.head_vacancy_table[-(dv-1)][max_v] = defaultdict(
-                    lambda: initial_prob)
+                self.head_vacancy_table[dv][max_v] = defaultdict(lambda: initial_prob)
+                self.head_vacancy_table[-(dv - 1)][max_v] = defaultdict(
+                    lambda: initial_prob
+                )
                  self.non_head_vacancy_table[dv][max_v] = defaultdict(
-                    lambda: initial_prob)
-                self.non_head_vacancy_table[-(dv-1)][max_v] = defaultdict(
-                    lambda: initial_prob)
+                    lambda: initial_prob
+                )
+                self.non_head_vacancy_table[-(dv - 1)][max_v] = defaultdict(
+                    lambda: initial_prob
+                )
  
      def train(self, parallel_corpus):
          counts = Model5Counts()
@@ -301,7 +316,8 @@ class IBMModel5(IBMModel):
              sampled_alignments, best_alignment = self.sample(aligned_sentence)
              # Record the most probable alignment
              aligned_sentence.alignment = Alignment(
-                best_alignment.zero_indexed_alignment())
+                best_alignment.zero_indexed_alignment()
+            )
  
              # E step (a): Compute normalization factors to weigh counts
              total_count = self.prob_of_alignments(sampled_alignments)
@@ -313,13 +329,14 @@ class IBMModel5(IBMModel):
  
                  for j in range(1, m + 1):
                      counts.update_lexical_translation(
-                        normalized_count, alignment_info, j)
+                        normalized_count, alignment_info, j
+                    )
  
                  slots = Slots(m)
                  for i in range(1, l + 1):
                      counts.update_vacancy(
-                        normalized_count, alignment_info, i,
-                        self.trg_classes, slots)
+                        normalized_count, alignment_info, i, self.trg_classes, slots
+                    )
  
                  counts.update_null_generation(normalized_count, alignment_info)
                  counts.update_fertility(normalized_count, alignment_info)
@@ -359,8 +376,9 @@ class IBMModel5(IBMModel):
              and the best alignment of the set for convenience
          :rtype: set(AlignmentInfo), AlignmentInfo
          """
-        sampled_alignments, best_alignment = super(
-            IBMModel5, self).sample(sentence_pair)
+        sampled_alignments, best_alignment = super(IBMModel5, self).sample(
+            sentence_pair
+        )
          return self.prune(sampled_alignments), best_alignment
  
      def prune(self, alignment_infos):
@@ -410,7 +428,8 @@ class IBMModel5(IBMModel):
              old_alignment = alignment
              for neighbor_alignment in self.neighboring(alignment, j_pegged):
                  neighbor_probability = IBMModel4.model4_prob_t_a_given_s(
-                    neighbor_alignment, self)
+                    neighbor_alignment, self
+                )
  
                  if neighbor_probability > max_probability:
                      alignment = neighbor_alignment
@@ -439,7 +458,7 @@ class IBMModel5(IBMModel):
              p0 = 1 - p1
              null_fertility = alignment_info.fertility_of_i(0)
              m = len(alignment_info.trg_sentence) - 1
-            value *= (pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility))
+            value *= pow(p1, null_fertility) * pow(p0, m - 2 * null_fertility)
              if value < MIN_PROB:
                  return MIN_PROB
  
@@ -453,8 +472,10 @@ class IBMModel5(IBMModel):
              src_sentence = alignment_info.src_sentence
              for i in range(1, len(src_sentence)):
                  fertility = alignment_info.fertility_of_i(i)
-                value *= (factorial(fertility) *
-                          self.fertility_table[fertility][src_sentence[i]])
+                value *= (
+                    factorial(fertility)
+                    * self.fertility_table[fertility][src_sentence[i]]
+                )
                  if value < MIN_PROB:
                      return MIN_PROB
              return value
@@ -494,8 +515,7 @@ class IBMModel5(IBMModel):
                  previous_vacancies = slots.vacancies_at(previous_position)
                  j = tablet[k]
                  dv = slots.vacancies_at(j) - previous_vacancies
-                max_v = (total_vacancies - tablet_length + k + 1 -
-                         previous_vacancies)
+                max_v = total_vacancies - tablet_length + k + 1 - previous_vacancies
                  trg_class = self.trg_classes[alignment_info.trg_sentence[j]]
                  value *= self.non_head_vacancy_table[dv][max_v][trg_class]
                  slots.occupy(j)  # mark position as occupied
@@ -504,6 +524,7 @@ class IBMModel5(IBMModel):
                      return MIN_PROB
  
              return value
+
          # end nested functions
  
          # Abort computation whenever probability falls below MIN_PROB at
@@ -534,20 +555,21 @@ class IBMModel5(IBMModel):
          for dv, max_vs in counts.head_vacancy.items():
              for max_v, trg_classes in max_vs.items():
                  for t_cls in trg_classes:
-                    estimate = (counts.head_vacancy[dv][max_v][t_cls] /
-                                counts.head_vacancy_for_any_dv[max_v][t_cls])
-                    head_vacancy_table[dv][max_v][t_cls] = max(estimate,
-                                                               MIN_PROB)
+                    estimate = (
+                        counts.head_vacancy[dv][max_v][t_cls]
+                        / counts.head_vacancy_for_any_dv[max_v][t_cls]
+                    )
+                    head_vacancy_table[dv][max_v][t_cls] = max(estimate, MIN_PROB)
  
          non_head_vacancy_table = self.non_head_vacancy_table
          for dv, max_vs in counts.non_head_vacancy.items():
              for max_v, trg_classes in max_vs.items():
                  for t_cls in trg_classes:
                      estimate = (
-                        counts.non_head_vacancy[dv][max_v][t_cls] /
-                        counts.non_head_vacancy_for_any_dv[max_v][t_cls])
-                    non_head_vacancy_table[dv][max_v][t_cls] = max(estimate,
-                                                                   MIN_PROB)
+                        counts.non_head_vacancy[dv][max_v][t_cls]
+                        / counts.non_head_vacancy_for_any_dv[max_v][t_cls]
+                    )
+                    non_head_vacancy_table[dv][max_v][t_cls] = max(estimate, MIN_PROB)
  
  
  class Model5Counts(Counts):
@@ -555,16 +577,17 @@ class Model5Counts(Counts):
      Data object to store counts of various parameters during training.
      Includes counts for vacancies.
      """
+
      def __init__(self):
          super(Model5Counts, self).__init__()
          self.head_vacancy = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
-        self.head_vacancy_for_any_dv = defaultdict(
-            lambda: defaultdict(lambda: 0.0))
+            lambda: defaultdict(lambda: defaultdict(lambda: 0.0))
+        )
+        self.head_vacancy_for_any_dv = defaultdict(lambda: defaultdict(lambda: 0.0))
          self.non_head_vacancy = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
-        self.non_head_vacancy_for_any_dv = defaultdict(
-            lambda: defaultdict(lambda: 0.0))
+            lambda: defaultdict(lambda: defaultdict(lambda: 0.0))
+        )
+        self.non_head_vacancy_for_any_dv = defaultdict(lambda: defaultdict(lambda: 0.0))
  
      def update_vacancy(self, count, alignment_info, i, trg_classes, slots):
          """
@@ -602,8 +625,7 @@ class Model5Counts(Counts):
              previous_vacancies = slots.vacancies_at(previous_position)
              j = tablet[k]
              dv = slots.vacancies_at(j) - previous_vacancies
-            max_v = (total_vacancies - tablet_length + k + 1 -
-                     previous_vacancies)
+            max_v = total_vacancies - tablet_length + k + 1 - previous_vacancies
              trg_class = trg_classes[alignment_info.trg_sentence[j]]
              self.non_head_vacancy[dv][max_v][trg_class] += count
              self.non_head_vacancy_for_any_dv[max_v][trg_class] += count
@@ -616,6 +638,7 @@ class Slots(object):
      Represents positions in a target sentence. Used to keep track of
      which slot (position) is occupied.
      """
+
      def __init__(self, target_sentence_length):
          self._slots = [False] * (target_sentence_length + 1)  # 1-indexed
  
diff --git a/nlp_resource_data/nltk/translate/ibm5.pyc b/nlp_resource_data/nltk/translate/ibm5.pyc

deleted file mode 100755 (executable)

index b597bf4..0000000

Binary files a/nlp_resource_data/nltk/translate/ibm5.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ibm_model.py b/nlp_resource_data/nltk/translate/ibm_model.py

old mode 100755 (executable)

new mode 100644 (file)

index 4dfe4e6..3b9b913
--- a/nlp_resource_data/nltk/translate/ibm_model.py
+++ b/nlp_resource_data/nltk/translate/ibm_model.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: IBM Model Core
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tah Wei Hoon <hoon.tw@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -37,7 +37,7 @@ Robert L. Mercer. 1993. The Mathematics of Statistical Machine
  Translation: Parameter Estimation. Computational Linguistics, 19 (2),
  263-311.
  """
-from __future__ import division
+
  from bisect import insort_left
  from collections import defaultdict
  from copy import deepcopy
@@ -62,6 +62,7 @@ class IBMModel(object):
      """
      Abstract base class for all IBM models
      """
+
      # Avoid division by zero and precision errors by imposing a minimum
      # value for probabilities. Note that this approach is theoretically
      # incorrect, since it may create probabilities that sum to more
@@ -75,23 +76,25 @@ class IBMModel(object):
  
      def reset_probabilities(self):
          self.translation_table = defaultdict(
-            lambda: defaultdict(lambda: IBMModel.MIN_PROB))
+            lambda: defaultdict(lambda: IBMModel.MIN_PROB)
+        )
          """
          dict[str][str]: float. Probability(target word | source word).
          Values accessed as ``translation_table[target_word][source_word]``.
          """
  
          self.alignment_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
-                lambda: IBMModel.MIN_PROB))))
+            lambda: defaultdict(
+                lambda: defaultdict(lambda: defaultdict(lambda: IBMModel.MIN_PROB))
+            )
+        )
          """
          dict[int][int][int][int]: float. Probability(i | j,l,m).
          Values accessed as ``alignment_table[i][j][l][m]``.
          Used in model 2 and hill climbing in models 3 and above
          """
  
-        self.fertility_table = defaultdict(
-            lambda: defaultdict(lambda: self.MIN_PROB))
+        self.fertility_table = defaultdict(lambda: defaultdict(lambda: self.MIN_PROB))
          """
          dict[int][str]: float. Probability(fertility | source word).
          Values accessed as ``fertility_table[fertility][source_word]``.
@@ -169,8 +172,7 @@ class IBMModel(object):
          # with the constraint that j is aligned (pegged) to i
          for j in range(1, m + 1):
              for i in range(0, l + 1):
-                initial_alignment = self.best_model2_alignment(
-                    sentence_pair, j, i)
+                initial_alignment = self.best_model2_alignment(sentence_pair, j, i)
                  potential_alignment = self.hillclimb(initial_alignment, j)
                  neighbors = self.neighboring(potential_alignment, j)
                  sampled_alignments.update(neighbors)
@@ -199,7 +201,7 @@ class IBMModel(object):
          :type i_pegged: int
          """
          src_sentence = [None] + sentence_pair.mots
-        trg_sentence = ['UNUSED'] + sentence_pair.words  # 1-indexed
+        trg_sentence = ["UNUSED"] + sentence_pair.words  # 1-indexed
  
          l = len(src_sentence) - 1  # exclude NULL
          m = len(trg_sentence) - 1
@@ -218,8 +220,9 @@ class IBMModel(object):
  
                  for i in range(0, l + 1):
                      s = src_sentence[i]
-                    alignment_prob = (self.translation_table[t][s] *
-                                      self.alignment_table[i][j][l][m])
+                    alignment_prob = (
+                        self.translation_table[t][s] * self.alignment_table[i][j][l][m]
+                    )
  
                      if alignment_prob >= max_alignment_prob:
                          max_alignment_prob = alignment_prob
@@ -228,8 +231,9 @@ class IBMModel(object):
              alignment[j] = best_i
              cepts[best_i].append(j)
  
-        return AlignmentInfo(tuple(alignment), tuple(src_sentence),
-                             tuple(trg_sentence), cepts)
+        return AlignmentInfo(
+            tuple(alignment), tuple(src_sentence), tuple(trg_sentence), cepts
+        )
  
      def hillclimb(self, alignment_info, j_pegged=None):
          """
@@ -302,8 +306,11 @@ class IBMModel(object):
                      new_cepts[old_i].remove(j)
  
                      new_alignment_info = AlignmentInfo(
-                        tuple(new_alignment), alignment_info.src_sentence,
-                        alignment_info.trg_sentence, new_cepts)
+                        tuple(new_alignment),
+                        alignment_info.src_sentence,
+                        alignment_info.trg_sentence,
+                        new_cepts,
+                    )
                      neighbors.add(new_alignment_info)
  
          for j in range(1, m + 1):
@@ -327,8 +334,11 @@ class IBMModel(object):
                          insort_left(new_cepts[i], other_j)
  
                          new_alignment_info = AlignmentInfo(
-                            tuple(new_alignment), alignment_info.src_sentence,
-                            alignment_info.trg_sentence, new_cepts)
+                            tuple(new_alignment),
+                            alignment_info.src_sentence,
+                            alignment_info.trg_sentence,
+                            new_cepts,
+                        )
                          neighbors.add(new_alignment_info)
  
          return neighbors
@@ -342,8 +352,7 @@ class IBMModel(object):
      def maximize_fertility_probabilities(self, counts):
          for phi, src_words in counts.fertility.items():
              for s in src_words:
-                estimate = (counts.fertility[phi][s] /
-                            counts.fertility_for_any_phi[s])
+                estimate = counts.fertility[phi][s] / counts.fertility_for_any_phi[s]
                  self.fertility_table[phi][s] = max(estimate, IBMModel.MIN_PROB)
  
      def maximize_null_generation_probabilities(self, counts):
@@ -387,8 +396,10 @@ class AlignmentInfo(object):
  
      def __init__(self, alignment, src_sentence, trg_sentence, cepts):
          if not isinstance(alignment, tuple):
-            raise TypeError("The alignment must be a tuple because it is used "
-                            "to uniquely identify AlignmentInfo objects.")
+            raise TypeError(
+                "The alignment must be a tuple because it is used "
+                "to uniquely identify AlignmentInfo objects."
+            )
  
          self.alignment = alignment
          """
@@ -457,8 +468,10 @@ class AlignmentInfo(object):
          """
          i = self.alignment[j]
          if i == 0:
-            raise ValueError("Words aligned to NULL cannot have a previous "
-                             "cept because NULL has no position")
+            raise ValueError(
+                "Words aligned to NULL cannot have a previous "
+                "cept because NULL has no position"
+            )
          previous_cept = i - 1
          while previous_cept > 0 and self.fertility_of_i(previous_cept) == 0:
              previous_cept -= 1
@@ -507,6 +520,7 @@ class Counts(object):
      """
      Data object to store counts of various parameters during training
      """
+
      def __init__(self):
          self.t_given_s = defaultdict(lambda: defaultdict(lambda: 0.0))
          self.any_t_given_s = defaultdict(lambda: 0.0)
diff --git a/nlp_resource_data/nltk/translate/ibm_model.pyc b/nlp_resource_data/nltk/translate/ibm_model.pyc

deleted file mode 100755 (executable)

index 249238b..0000000

Binary files a/nlp_resource_data/nltk/translate/ibm_model.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/meteor_score.py b/nlp_resource_data/nltk/translate/meteor_score.py

new file mode 100644 (file)

index 0000000..008836f
--- /dev/null
+++ b/nlp_resource_data/nltk/translate/meteor_score.py
@@ -0,0 +1,434 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Machine Translation
+#
+# Copyright (C) 2001-2020 NLTK Project
+# Author: Uday Krishna <udaykrishna5@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+from nltk.stem.porter import PorterStemmer
+from nltk.corpus import wordnet
+from itertools import chain, product
+
+
+def _generate_enums(hypothesis, reference, preprocess=str.lower):
+    """
+    Takes in string inputs for hypothesis and reference and returns
+    enumerated word lists for each of them
+
+    :param hypothesis: hypothesis string
+    :type hypothesis: str
+    :param reference: reference string
+    :type reference: str
+    :preprocess: preprocessing method (default str.lower)
+    :type preprocess: method
+    :return: enumerated words list
+    :rtype: list of 2D tuples, list of 2D tuples
+    """
+    hypothesis_list = list(enumerate(preprocess(hypothesis).split()))
+    reference_list = list(enumerate(preprocess(reference).split()))
+    return hypothesis_list, reference_list
+
+
+def exact_match(hypothesis, reference):
+    """
+    matches exact words in hypothesis and reference
+    and returns a word mapping based on the enumerated
+    word id between hypothesis and reference
+
+    :param hypothesis: hypothesis string
+    :type hypothesis: str
+    :param reference: reference string
+    :type reference: str
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    :rtype: list of 2D tuples, list of 2D tuples,  list of 2D tuples
+    """
+    hypothesis_list, reference_list = _generate_enums(hypothesis, reference)
+    return _match_enums(hypothesis_list, reference_list)
+
+
+def _match_enums(enum_hypothesis_list, enum_reference_list):
+    """
+    matches exact words in hypothesis and reference and returns
+    a word mapping between enum_hypothesis_list and enum_reference_list
+    based on the enumerated word id.
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :type enum_hypothesis_list: list of tuples
+    :param enum_reference_list: enumerated reference list
+    :type enum_reference_list: list of 2D tuples
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    :rtype: list of 2D tuples, list of 2D tuples,  list of 2D tuples
+    """
+    word_match = []
+    for i in range(len(enum_hypothesis_list))[::-1]:
+        for j in range(len(enum_reference_list))[::-1]:
+            if enum_hypothesis_list[i][1] == enum_reference_list[j][1]:
+                word_match.append(
+                    (enum_hypothesis_list[i][0], enum_reference_list[j][0])
+                )
+                (enum_hypothesis_list.pop(i)[1], enum_reference_list.pop(j)[1])
+                break
+    return word_match, enum_hypothesis_list, enum_reference_list
+
+
+def _enum_stem_match(
+    enum_hypothesis_list, enum_reference_list, stemmer=PorterStemmer()
+):
+    """
+    Stems each word and matches them in hypothesis and reference
+    and returns a word mapping between enum_hypothesis_list and
+    enum_reference_list based on the enumerated word id. The function also
+    returns a enumerated list of unmatched words for hypothesis and reference.
+
+    :param enum_hypothesis_list:
+    :type enum_hypothesis_list:
+    :param enum_reference_list:
+    :type enum_reference_list:
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    :rtype: list of 2D tuples, list of 2D tuples,  list of 2D tuples
+    """
+    stemmed_enum_list1 = [
+        (word_pair[0], stemmer.stem(word_pair[1])) for word_pair in enum_hypothesis_list
+    ]
+
+    stemmed_enum_list2 = [
+        (word_pair[0], stemmer.stem(word_pair[1])) for word_pair in enum_reference_list
+    ]
+
+    word_match, enum_unmat_hypo_list, enum_unmat_ref_list = _match_enums(
+        stemmed_enum_list1, stemmed_enum_list2
+    )
+
+    enum_unmat_hypo_list = (
+        list(zip(*enum_unmat_hypo_list)) if len(enum_unmat_hypo_list) > 0 else []
+    )
+
+    enum_unmat_ref_list = (
+        list(zip(*enum_unmat_ref_list)) if len(enum_unmat_ref_list) > 0 else []
+    )
+
+    enum_hypothesis_list = list(
+        filter(lambda x: x[0] not in enum_unmat_hypo_list, enum_hypothesis_list)
+    )
+
+    enum_reference_list = list(
+        filter(lambda x: x[0] not in enum_unmat_ref_list, enum_reference_list)
+    )
+
+    return word_match, enum_hypothesis_list, enum_reference_list
+
+
+def stem_match(hypothesis, reference, stemmer=PorterStemmer()):
+    """
+    Stems each word and matches them in hypothesis and reference
+    and returns a word mapping between hypothesis and reference
+
+    :param hypothesis:
+    :type hypothesis:
+    :param reference:
+    :type reference:
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :type stemmer: nltk.stem.api.StemmerI or any class that
+                   implements a stem method
+    :return: enumerated matched tuples, enumerated unmatched hypothesis tuples,
+             enumerated unmatched reference tuples
+    :rtype: list of 2D tuples, list of 2D tuples,  list of 2D tuples
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _enum_stem_match(enum_hypothesis_list, enum_reference_list, stemmer=stemmer)
+
+
+def _enum_wordnetsyn_match(enum_hypothesis_list, enum_reference_list, wordnet=wordnet):
+    """
+    Matches each word in reference to a word in hypothesis
+    if any synonym of a hypothesis word is the exact match
+    to the reference word.
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :param enum_reference_list: enumerated reference list
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :type wordnet: WordNetCorpusReader
+    :return: list of matched tuples, unmatched hypothesis list, unmatched reference list
+    :rtype:  list of tuples, list of tuples, list of tuples
+
+    """
+    word_match = []
+    for i in range(len(enum_hypothesis_list))[::-1]:
+        hypothesis_syns = set(
+            chain(
+                *[
+                    [
+                        lemma.name()
+                        for lemma in synset.lemmas()
+                        if lemma.name().find("_") < 0
+                    ]
+                    for synset in wordnet.synsets(enum_hypothesis_list[i][1])
+                ]
+            )
+        ).union({enum_hypothesis_list[i][1]})
+        for j in range(len(enum_reference_list))[::-1]:
+            if enum_reference_list[j][1] in hypothesis_syns:
+                word_match.append(
+                    (enum_hypothesis_list[i][0], enum_reference_list[j][0])
+                )
+                enum_hypothesis_list.pop(i), enum_reference_list.pop(j)
+                break
+    return word_match, enum_hypothesis_list, enum_reference_list
+
+
+def wordnetsyn_match(hypothesis, reference, wordnet=wordnet):
+    """
+    Matches each word in reference to a word in hypothesis if any synonym
+    of a hypothesis word is the exact match to the reference word.
+
+    :param hypothesis: hypothesis string
+    :param reference: reference string
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :type wordnet: WordNetCorpusReader
+    :return: list of mapped tuples
+    :rtype: list of tuples
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _enum_wordnetsyn_match(
+        enum_hypothesis_list, enum_reference_list, wordnet=wordnet
+    )
+
+
+def _enum_allign_words(
+    enum_hypothesis_list, enum_reference_list, stemmer=PorterStemmer(), wordnet=wordnet
+):
+    """
+    Aligns/matches words in the hypothesis to reference by sequentially
+    applying exact match, stemmed match and wordnet based synonym match.
+    in case there are multiple matches the match which has the least number
+    of crossing is chosen. Takes enumerated list as input instead of
+    string input
+
+    :param enum_hypothesis_list: enumerated hypothesis list
+    :param enum_reference_list: enumerated reference list
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :type wordnet: WordNetCorpusReader
+    :return: sorted list of matched tuples, unmatched hypothesis list,
+             unmatched reference list
+    :rtype: list of tuples, list of tuples, list of tuples
+    """
+    exact_matches, enum_hypothesis_list, enum_reference_list = _match_enums(
+        enum_hypothesis_list, enum_reference_list
+    )
+
+    stem_matches, enum_hypothesis_list, enum_reference_list = _enum_stem_match(
+        enum_hypothesis_list, enum_reference_list, stemmer=stemmer
+    )
+
+    wns_matches, enum_hypothesis_list, enum_reference_list = _enum_wordnetsyn_match(
+        enum_hypothesis_list, enum_reference_list, wordnet=wordnet
+    )
+
+    return (
+        sorted(
+            exact_matches + stem_matches + wns_matches, key=lambda wordpair: wordpair[0]
+        ),
+        enum_hypothesis_list,
+        enum_reference_list,
+    )
+
+
+def allign_words(hypothesis, reference, stemmer=PorterStemmer(), wordnet=wordnet):
+    """
+    Aligns/matches words in the hypothesis to reference by sequentially
+    applying exact match, stemmed match and wordnet based synonym match.
+    In case there are multiple matches the match which has the least number
+    of crossing is chosen.
+
+    :param hypothesis: hypothesis string
+    :param reference: reference string
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :type wordnet: WordNetCorpusReader
+    :return: sorted list of matched tuples, unmatched hypothesis list, unmatched reference list
+    :rtype: list of tuples, list of tuples, list of tuples
+    """
+    enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference)
+    return _enum_allign_words(
+        enum_hypothesis_list, enum_reference_list, stemmer=stemmer, wordnet=wordnet
+    )
+
+
+def _count_chunks(matches):
+    """
+    Counts the fewest possible number of chunks such that matched unigrams
+    of each chunk are adjacent to each other. This is used to caluclate the
+    fragmentation part of the metric.
+
+    :param matches: list containing a mapping of matched words (output of allign_words)
+    :return: Number of chunks a sentence is divided into post allignment
+    :rtype: int
+    """
+    i = 0
+    chunks = 1
+    while i < len(matches) - 1:
+        if (matches[i + 1][0] == matches[i][0] + 1) and (
+            matches[i + 1][1] == matches[i][1] + 1
+        ):
+            i += 1
+            continue
+        i += 1
+        chunks += 1
+    return chunks
+
+
+def single_meteor_score(
+    reference,
+    hypothesis,
+    preprocess=str.lower,
+    stemmer=PorterStemmer(),
+    wordnet=wordnet,
+    alpha=0.9,
+    beta=3,
+    gamma=0.5,
+):
+    """
+    Calculates METEOR score for single hypothesis and reference as per
+    "Meteor: An Automatic Metric for MT Evaluation with HighLevels of
+    Correlation with Human Judgments" by Alon Lavie and Abhaya Agarwal,
+    in Proceedings of ACL.
+    http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf
+
+
+    >>> hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party'
+
+    >>> reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands'
+
+
+    >>> round(single_meteor_score(reference1, hypothesis1),4)
+    0.7398
+
+        If there is no words match during the alignment the method returns the
+        score as 0. We can safely  return a zero instead of raising a
+        division by zero error as no match usually implies a bad translation.
+
+    >>> round(meteor_score('this is a cat', 'non matching hypothesis'),4)
+    0.0
+
+    :param references: reference sentences
+    :type references: list(str)
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: str
+    :param preprocess: preprocessing function (default str.lower)
+    :type preprocess: method
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :type wordnet: WordNetCorpusReader
+    :param alpha: parameter for controlling relative weights of precision and recall.
+    :type alpha: float
+    :param beta: parameter for controlling shape of penalty as a
+                 function of as a function of fragmentation.
+    :type beta: float
+    :param gamma: relative weight assigned to fragmentation penality.
+    :type gamma: float
+    :return: The sentence-level METEOR score.
+    :rtype: float
+    """
+    enum_hypothesis, enum_reference = _generate_enums(
+        hypothesis, reference, preprocess=preprocess
+    )
+    translation_length = len(enum_hypothesis)
+    reference_length = len(enum_reference)
+    matches, _, _ = _enum_allign_words(enum_hypothesis, enum_reference, stemmer=stemmer)
+    matches_count = len(matches)
+    try:
+        precision = float(matches_count) / translation_length
+        recall = float(matches_count) / reference_length
+        fmean = (precision * recall) / (alpha * precision + (1 - alpha) * recall)
+        chunk_count = float(_count_chunks(matches))
+        frag_frac = chunk_count / matches_count
+    except ZeroDivisionError:
+        return 0.0
+    penalty = gamma * frag_frac ** beta
+    return (1 - penalty) * fmean
+
+
+def meteor_score(
+    references,
+    hypothesis,
+    preprocess=str.lower,
+    stemmer=PorterStemmer(),
+    wordnet=wordnet,
+    alpha=0.9,
+    beta=3,
+    gamma=0.5,
+):
+    """
+    Calculates METEOR score for hypothesis with multiple references as
+    described in "Meteor: An Automatic Metric for MT Evaluation with
+    HighLevels of Correlation with Human Judgments" by Alon Lavie and
+    Abhaya Agarwal, in Proceedings of ACL.
+    http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf
+
+
+    In case of multiple references the best score is chosen. This method
+    iterates over single_meteor_score and picks the best pair among all
+    the references for a given hypothesis
+
+    >>> hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party'
+    >>> hypothesis2 = 'It is to insure the troops forever hearing the activity guidebook that party direct'
+
+    >>> reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands'
+    >>> reference2 = 'It is the guiding principle which guarantees the military forces always being under the command of the Party'
+    >>> reference3 = 'It is the practical guide for the army always to heed the directions of the party'
+
+    >>> round(meteor_score([reference1, reference2, reference3], hypothesis1),4)
+    0.7398
+
+        If there is no words match during the alignment the method returns the
+        score as 0. We can safely  return a zero instead of raising a
+        division by zero error as no match usually implies a bad translation.
+
+    >>> round(meteor_score(['this is a cat'], 'non matching hypothesis'),4)
+    0.0
+
+    :param references: reference sentences
+    :type references: list(str)
+    :param hypothesis: a hypothesis sentence
+    :type hypothesis: str
+    :param preprocess: preprocessing function (default str.lower)
+    :type preprocess: method
+    :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer())
+    :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method
+    :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet)
+    :type wordnet: WordNetCorpusReader
+    :param alpha: parameter for controlling relative weights of precision and recall.
+    :type alpha: float
+    :param beta: parameter for controlling shape of penalty as a function
+                 of as a function of fragmentation.
+    :type beta: float
+    :param gamma: relative weight assigned to fragmentation penality.
+    :type gamma: float
+    :return: The sentence-level METEOR score.
+    :rtype: float
+    """
+    return max(
+        [
+            single_meteor_score(
+                reference,
+                hypothesis,
+                stemmer=stemmer,
+                wordnet=wordnet,
+                alpha=alpha,
+                beta=beta,
+                gamma=gamma,
+            )
+            for reference in references
+        ]
+    )
diff --git a/nlp_resource_data/nltk/translate/metrics.py b/nlp_resource_data/nltk/translate/metrics.py

old mode 100755 (executable)

new mode 100644 (file)

index e9fef3e..d11addb
--- a/nlp_resource_data/nltk/translate/metrics.py
+++ b/nlp_resource_data/nltk/translate/metrics.py
@@ -1,12 +1,12 @@
  # Natural Language Toolkit: Translation metrics
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Will Zhang <wilzzha@gmail.com>
  #         Guan Gui <ggui@student.unimelb.edu.au>
  #         Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import division
+
  
  def alignment_error_rate(reference, hypothesis, possible=None):
      """
@@ -34,7 +34,8 @@ def alignment_error_rate(reference, hypothesis, possible=None):
      if possible is None:
          possible = reference
      else:
-        assert(reference.issubset(possible)) # sanity check
+        assert reference.issubset(possible)  # sanity check
  
-    return (1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) /
-            float(len(hypothesis) + len(reference)))
+    return 1.0 - (len(hypothesis & reference) + len(hypothesis & possible)) / float(
+        len(hypothesis) + len(reference)
+    )
diff --git a/nlp_resource_data/nltk/translate/metrics.pyc b/nlp_resource_data/nltk/translate/metrics.pyc

deleted file mode 100755 (executable)

index efff81f..0000000

Binary files a/nlp_resource_data/nltk/translate/metrics.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/nist_score.py b/nlp_resource_data/nltk/translate/nist_score.py

old mode 100755 (executable)

new mode 100644 (file)

index 1bedf65..ca9ac2b
--- a/nlp_resource_data/nltk/translate/nist_score.py
+++ b/nlp_resource_data/nltk/translate/nist_score.py
@@ -1,27 +1,19 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: NIST Score
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors:
  # Contributors:
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
  """NIST score implementation."""
-from __future__ import division
  
  import math
  import fractions
  from collections import Counter
  
  from nltk.util import ngrams
-from nltk.translate.bleu_score import modified_precision, closest_ref_length
-
-try:
-    fractions.Fraction(0, 1000, _normalize=False)
-    from fractions import Fraction
-except TypeError:
-    from nltk.compat import Fraction
  
  
  def sentence_nist(references, hypothesis, n=5):
@@ -64,10 +56,10 @@ def sentence_nist(references, hypothesis, n=5):
      ...               'of', 'the', 'party']
  
      >>> sentence_nist([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
-    0.0854...
+    3.3709...
  
      >>> sentence_nist([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS
-    0.1485...
+    1.4619...
  
      :param references: reference sentences
      :type references: list(list(str))
@@ -78,6 +70,7 @@ def sentence_nist(references, hypothesis, n=5):
      """
      return corpus_nist([references], [hypothesis], n)
  
+
  def corpus_nist(list_of_references, hypotheses, n=5):
      """
      Calculate a single corpus-level NIST score (aka. system-level BLEU) for all
@@ -91,46 +84,94 @@ def corpus_nist(list_of_references, hypotheses, n=5):
      :type n: int
      """
      # Before proceeding to compute NIST, perform sanity checks.
-    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"
-
-    p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
-    p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
-    sysoutput_lengths = Counter() # Key = ngram order, and value = no. of ngram in hyp.
-    hyp_lengths, ref_lengths = 0, 0
-
-    # Iterate through each hypothesis and their corresponding references.
-    for references, hypothesis in zip(list_of_references, hypotheses):
-        # For each order of ngram, calculate the numerator and
-        # denominator for the corpus-level modified precision.
-        for i, _ in enumerate(range(1,n+1)):
-            p_i = modified_precision(references, hypothesis, i)
-            p_numerators[i] += p_i.numerator
-            p_denominators[i] += p_i.denominator
-            # Adds the no. of ngrams in the hypothesis.
-            sysoutput_lengths[i] += len(hypothesis) - (i - 1)
-
-        # Calculate the hypothesis length and the closest reference length.
-        # Adds them to the corpus-level hypothesis and reference counts.
-        hyp_len =  len(hypothesis)
-        hyp_lengths += hyp_len
-        ref_lengths += closest_ref_length(references, hyp_len)
-
-    # Calculate corpus-level brevity penalty.
-    bp = nist_length_penalty(ref_lengths, hyp_lengths)
-
-    # Collects the various precision values for the different ngram orders.
-    p_n = [Fraction(p_numerators[i], p_denominators[i], _normalize=False)
-           for i, _ in enumerate(range(1,n+1))]
-
+    assert len(list_of_references) == len(
+        hypotheses
+    ), "The number of hypotheses and their reference(s) should be the same"
+
+    # Collect the ngram coounts from the reference sentences.
+    ngram_freq = Counter()
+    total_reference_words = 0
+    for (
+        references
+    ) in list_of_references:  # For each source sent, there's a list of reference sents.
+        for reference in references:
+            # For each order of ngram, count the ngram occurrences.
+            for i in range(1, n + 1):
+                ngram_freq.update(ngrams(reference, i))
+            total_reference_words += len(reference)
+
+    # Compute the information weights based on the reference sentences.
      # Eqn 2 in Doddington (2002):
      # Info(w_1 ... w_n) = log_2 [ (# of occurrences of w_1 ... w_n-1) / (# of occurrences of w_1 ... w_n) ]
-    info = [0 if p_n[i].numerator == 0 or p_n[i+1].numerator == 0 # Handles math domain and zero division errors.
-            else math.log(p_n[i].numerator / p_n[i+1].numerator)
-            for i in range(len(p_n)-1)]
-    return sum(info_i/sysoutput_lengths[i] for i, info_i in enumerate(info)) * bp
-
-
-def nist_length_penalty(closest_ref_len, hyp_len):
+    information_weights = {}
+    for _ngram in ngram_freq:  # w_1 ... w_n
+        _mgram = _ngram[:-1]  #  w_1 ... w_n-1
+        # From https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/mteval-v13a.pl#L546
+        # it's computed as such:
+        #     denominator = ngram_freq[_mgram] if _mgram and _mgram in ngram_freq else denominator = total_reference_words
+        #     information_weights[_ngram] = -1 * math.log(ngram_freq[_ngram]/denominator) / math.log(2)
+        #
+        # Mathematically, it's equivalent to the our implementation:
+        if _mgram and _mgram in ngram_freq:
+            numerator = ngram_freq[_mgram]
+        else:
+            numerator = total_reference_words
+        information_weights[_ngram] = math.log(numerator / ngram_freq[_ngram], 2)
+
+    # Micro-average.
+    nist_precision_numerator_per_ngram = Counter()
+    nist_precision_denominator_per_ngram = Counter()
+    l_ref, l_sys = 0, 0
+    # For each order of ngram.
+    for i in range(1, n + 1):
+        # Iterate through each hypothesis and their corresponding references.
+        for references, hypothesis in zip(list_of_references, hypotheses):
+            hyp_len = len(hypothesis)
+
+            # Find reference with the best NIST score.
+            nist_score_per_ref = []
+            for reference in references:
+                _ref_len = len(reference)
+                # Counter of ngrams in hypothesis.
+                hyp_ngrams = (
+                    Counter(ngrams(hypothesis, i))
+                    if len(hypothesis) >= i
+                    else Counter()
+                )
+                ref_ngrams = (
+                    Counter(ngrams(reference, i)) if len(reference) >= i else Counter()
+                )
+                ngram_overlaps = hyp_ngrams & ref_ngrams
+                # Precision part of the score in Eqn 3
+                _numerator = sum(
+                    information_weights[_ngram] * count
+                    for _ngram, count in ngram_overlaps.items()
+                )
+                _denominator = sum(hyp_ngrams.values())
+                _precision = 0 if _denominator == 0 else _numerator / _denominator
+                nist_score_per_ref.append(
+                    (_precision, _numerator, _denominator, _ref_len)
+                )
+            # Best reference.
+            precision, numerator, denominator, ref_len = max(nist_score_per_ref)
+            nist_precision_numerator_per_ngram[i] += numerator
+            nist_precision_denominator_per_ngram[i] += denominator
+            l_ref += ref_len
+            l_sys += hyp_len
+
+    # Final NIST micro-average mean aggregation.
+    nist_precision = 0
+    for i in nist_precision_numerator_per_ngram:
+        precision = (
+            nist_precision_numerator_per_ngram[i]
+            / nist_precision_denominator_per_ngram[i]
+        )
+        nist_precision += precision
+    # Eqn 3 in Doddington(2002)
+    return nist_precision * nist_length_penalty(l_ref, l_sys)
+
+
+def nist_length_penalty(ref_len, hyp_len):
      """
      Calculates the NIST length penalty, from Eq. 3 in Doddington (2002)
  
@@ -146,10 +187,10 @@ def nist_length_penalty(closest_ref_len, hyp_len):
      of the score of small variations in the length of a translation.
      See Fig. 4 in  Doddington (2002)
      """
-    ratio = closest_ref_len / hyp_len
+    ratio = hyp_len / ref_len
      if 0 < ratio < 1:
          ratio_x, score_x = 1.5, 0.5
-        beta = math.log(score_x) / math.log(score_x)**2
-        return math.exp(beta * math.log(ratio)**2)
-    else: # ratio <= 0 or ratio >= 1
+        beta = math.log(score_x) / math.log(ratio_x) ** 2
+        return math.exp(beta * math.log(ratio) ** 2)
+    else:  # ratio <= 0 or ratio >= 1
          return max(min(ratio, 1.0), 0.0)
diff --git a/nlp_resource_data/nltk/translate/nist_score.pyc b/nlp_resource_data/nltk/translate/nist_score.pyc

deleted file mode 100755 (executable)

index 7b5afe4..0000000

Binary files a/nlp_resource_data/nltk/translate/nist_score.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/phrase_based.py b/nlp_resource_data/nltk/translate/phrase_based.py

old mode 100755 (executable)

new mode 100644 (file)

index 5bbc094..a50887e
--- a/nlp_resource_data/nltk/translate/phrase_based.py
+++ b/nlp_resource_data/nltk/translate/phrase_based.py
@@ -1,40 +1,51 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Phrase Extraction Algorithm
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Authors: Liling Tan, Fredrik Hedman, Petra Barancikova
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
-def extract(f_start, f_end, e_start, e_end, 
-            alignment, f_aligned,
-            srctext, trgtext, srclen, trglen, max_phrase_length):
+
+def extract(
+    f_start,
+    f_end,
+    e_start,
+    e_end,
+    alignment,
+    f_aligned,
+    srctext,
+    trgtext,
+    srclen,
+    trglen,
+    max_phrase_length,
+):
      """
-    This function checks for alignment point consistency and extracts 
+    This function checks for alignment point consistency and extracts
      phrases using the chunk of consistent phrases.
-    
+
      A phrase pair (e, f ) is consistent with an alignment A if and only if:
  
      (i) No English words in the phrase pair are aligned to words outside it.
-    
+
             ∀e i ∈ e, (e i , f j ) ∈ A ⇒ f j ∈ f
-    
-    (ii) No Foreign words in the phrase pair are aligned to words outside it. 
-            
+
+    (ii) No Foreign words in the phrase pair are aligned to words outside it.
+
              ∀f j ∈ f , (e i , f j ) ∈ A ⇒ e i ∈ e
-    
-    (iii) The phrase pair contains at least one alignment point. 
-            
+
+    (iii) The phrase pair contains at least one alignment point.
+
              ∃e i ∈ e  ̄ , f j ∈ f  ̄ s.t. (e i , f j ) ∈ A
-    
+
      :type f_start: int
      :param f_start: Starting index of the possible foreign language phrases
      :type f_end: int
-    :param f_end: Starting index of the possible foreign language phrases
+    :param f_end: End index of the possible foreign language phrases
      :type e_start: int
      :param e_start: Starting index of the possible source language phrases
      :type e_end: int
-    :param e_end: Starting index of the possible source language phrases
+    :param e_end: End index of the possible source language phrases
      :type srctext: list
      :param srctext: The source language tokens, a list of string.
      :type trgtext: list
@@ -48,8 +59,8 @@ def extract(f_start, f_end, e_start, e_end,
      if f_end < 0:  # 0-based indexing.
          return {}
      # Check if alignment points are consistent.
-    for e,f in alignment:
-        if ((f_start <= f <= f_end) and (e < e_start or e > e_end)):
+    for e, f in alignment:
+        if (f_start <= f <= f_end) and (e < e_start or e > e_end):
              return {}
  
      # Add phrase pairs (incl. additional unaligned f)
@@ -60,37 +71,39 @@ def extract(f_start, f_end, e_start, e_end,
          while True:
              # add phrase pair ([e_start, e_end], [fs, fe]) to set E
              # Need to +1 in range  to include the end-point.
-            src_phrase = " ".join(srctext[e_start:e_end+1])
-            trg_phrase = " ".join(trgtext[fs:fe+1])
+            src_phrase = " ".join(srctext[e_start : e_end + 1])
+            trg_phrase = " ".join(trgtext[fs : fe + 1])
              # Include more data for later ordering.
-            phrases.add(((e_start, e_end+1), (f_start, f_end+1), 
-                         src_phrase, trg_phrase))
+            phrases.add(
+                ((e_start, e_end + 1), (fs, fe + 1), src_phrase, trg_phrase)
+            )
              fe += 1
-            if fe in f_aligned or fe == trglen:
+            if fe in f_aligned or fe >= trglen:
                  break
-        fs -=1 
+        fs -= 1
          if fs in f_aligned or fs < 0:
              break
      return phrases
  
+
  def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0):
      """
-    Phrase extraction algorithm extracts all consistent phrase pairs from 
+    Phrase extraction algorithm extracts all consistent phrase pairs from
      a word-aligned sentence pair.
  
-    The idea is to loop over all possible source language (e) phrases and find 
-    the minimal foreign phrase (f) that matches each of them. Matching is done 
-    by identifying all alignment points for the source phrase and finding the 
-    shortest foreign phrase that includes all the foreign counterparts for the 
+    The idea is to loop over all possible source language (e) phrases and find
+    the minimal foreign phrase (f) that matches each of them. Matching is done
+    by identifying all alignment points for the source phrase and finding the
+    shortest foreign phrase that includes all the foreign counterparts for the
      source words.
  
-    In short, a phrase alignment has to 
+    In short, a phrase alignment has to
      (a) contain all alignment points for all covered words
      (b) contain at least one alignment point
-            
+
      >>> srctext = "michael assumes that he will stay in the house"
      >>> trgtext = "michael geht davon aus , dass er im haus bleibt"
-    >>> alignment = [(0,0), (1,1), (1,2), (1,3), (2,5), (3,6), (4,9), 
+    >>> alignment = [(0,0), (1,1), (1,2), (1,3), (2,5), (3,6), (4,9),
      ... (5,9), (6,7), (7,7), (8,8)]
      >>> phrases = phrase_extraction(srctext, trgtext, alignment)
      >>> for i in sorted(phrases):
@@ -98,20 +111,20 @@ def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0):
      ...
      ((0, 1), (0, 1), 'michael', 'michael')
      ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus')
-    ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus ,')
+    ((0, 2), (0, 5), 'michael assumes', 'michael geht davon aus ,')
      ((0, 3), (0, 6), 'michael assumes that', 'michael geht davon aus , dass')
      ((0, 4), (0, 7), 'michael assumes that he', 'michael geht davon aus , dass er')
      ((0, 9), (0, 10), 'michael assumes that he will stay in the house', 'michael geht davon aus , dass er im haus bleibt')
      ((1, 2), (1, 4), 'assumes', 'geht davon aus')
-    ((1, 2), (1, 4), 'assumes', 'geht davon aus ,')
+    ((1, 2), (1, 5), 'assumes', 'geht davon aus ,')
      ((1, 3), (1, 6), 'assumes that', 'geht davon aus , dass')
      ((1, 4), (1, 7), 'assumes that he', 'geht davon aus , dass er')
      ((1, 9), (1, 10), 'assumes that he will stay in the house', 'geht davon aus , dass er im haus bleibt')
-    ((2, 3), (5, 6), 'that', ', dass')
+    ((2, 3), (4, 6), 'that', ', dass')
      ((2, 3), (5, 6), 'that', 'dass')
-    ((2, 4), (5, 7), 'that he', ', dass er')
+    ((2, 4), (4, 7), 'that he', ', dass er')
      ((2, 4), (5, 7), 'that he', 'dass er')
-    ((2, 9), (5, 10), 'that he will stay in the house', ', dass er im haus bleibt')
+    ((2, 9), (4, 10), 'that he will stay in the house', ', dass er im haus bleibt')
      ((2, 9), (5, 10), 'that he will stay in the house', 'dass er im haus bleibt')
      ((3, 4), (6, 7), 'he', 'er')
      ((3, 9), (6, 10), 'he will stay in the house', 'er im haus bleibt')
@@ -120,34 +133,34 @@ def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0):
      ((6, 8), (7, 8), 'in the', 'im')
      ((6, 9), (7, 9), 'in the house', 'im haus')
      ((8, 9), (8, 9), 'house', 'haus')
-    
+
      :type srctext: str
      :param srctext: The sentence string from the source language.
      :type trgtext: str
      :param trgtext: The sentence string from the target language.
-    :type alignment: str
+    :type alignment: list(tuple)
      :param alignment: The word alignment outputs as list of tuples, where
          the first elements of tuples are the source words' indices and
          second elements are the target words' indices. This is also the output
          format of nltk.translate.ibm1
      :rtype: list(tuple)
-    :return: A list of tuples, each element in a list is a phrase and each 
-        phrase is a tuple made up of (i) its source location, (ii) its target 
+    :return: A list of tuples, each element in a list is a phrase and each
+        phrase is a tuple made up of (i) its source location, (ii) its target
          location, (iii) the source phrase and (iii) the target phrase. The phrase
-        list of tuples represents all the possible phrases extracted from the 
-        word alignments. 
+        list of tuples represents all the possible phrases extracted from the
+        word alignments.
      :type max_phrase_length: int
      :param max_phrase_length: maximal phrase length, if 0 or not specified
          it is set to a length of the longer sentence (srctext or trgtext).
      """
  
-    srctext = srctext.split()   # e
-    trgtext = trgtext.split()   # f
-    srclen = len(srctext)       # len(e)
-    trglen = len(trgtext)       # len(f)
+    srctext = srctext.split()  # e
+    trgtext = trgtext.split()  # f
+    srclen = len(srctext)  # len(e)
+    trglen = len(trgtext)  # len(f)
      # Keeps an index of which source/target words that are aligned.
-    f_aligned = [j for _,j in alignment]
-    max_phrase_length = max_phrase_length or max(srclen,trglen)
+    f_aligned = [j for _, j in alignment]
+    max_phrase_length = max_phrase_length or max(srclen, trglen)
  
      # set of phrase pairs BP
      bp = set()
@@ -158,18 +171,26 @@ def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0):
              # // find the minimally matching foreign phrase
              # (f start , f end ) = ( length(f), 0 )
              # f_start ∈ [0, len(f) - 1]; f_end ∈ [0, len(f) - 1]
-            f_start, f_end = trglen-1 , -1  #  0-based indexing
- 
-            for e,f in alignment:
+            f_start, f_end = trglen - 1, -1  #  0-based indexing
+
+            for e, f in alignment:
                  if e_start <= e <= e_end:
                      f_start = min(f, f_start)
                      f_end = max(f, f_end)
              # add extract (f start , f end , e start , e end ) to set BP
-            phrases = extract(f_start, f_end, e_start, e_end, 
-                              alignment, f_aligned,
-                              srctext, trgtext, srclen, trglen,
-                              max_phrase_length)
+            phrases = extract(
+                f_start,
+                f_end,
+                e_start,
+                e_end,
+                alignment,
+                f_aligned,
+                srctext,
+                trgtext,
+                srclen,
+                trglen,
+                max_phrase_length,
+            )
              if phrases:
                  bp.update(phrases)
      return bp
-
diff --git a/nlp_resource_data/nltk/translate/phrase_based.pyc b/nlp_resource_data/nltk/translate/phrase_based.pyc

deleted file mode 100755 (executable)

index c2e198f..0000000

Binary files a/nlp_resource_data/nltk/translate/phrase_based.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/ribes_score.py b/nlp_resource_data/nltk/translate/ribes_score.py

old mode 100755 (executable)

new mode 100644 (file)

index 553e68f..912084f
--- a/nlp_resource_data/nltk/translate/ribes_score.py
+++ b/nlp_resource_data/nltk/translate/ribes_score.py
@@ -1,13 +1,13 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: RIBES Score
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Contributors: Katsuhito Sudoh, Liling Tan, Kasramvd, J.F.Sebastian
  #               Mark Byers, ekhumoro, P. Ortiz
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  """ RIBES score implementation """
-from __future__ import division
+
  from itertools import islice
  import math
  
@@ -16,26 +16,26 @@ from nltk.util import ngrams, choose
  
  def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10):
      """
-    The RIBES (Rank-based Intuitive Bilingual Evaluation Score) from 
-    Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and 
-    Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for 
-    Distant Language Pairs". In Proceedings of EMNLP. 
-    http://www.aclweb.org/anthology/D/D10/D10-1092.pdf 
-    
-    The generic RIBES scores used in shared task, e.g. Workshop for 
+    The RIBES (Rank-based Intuitive Bilingual Evaluation Score) from
+    Hideki Isozaki, Tsutomu Hirao, Kevin Duh, Katsuhito Sudoh and
+    Hajime Tsukada. 2010. "Automatic Evaluation of Translation Quality for
+    Distant Language Pairs". In Proceedings of EMNLP.
+    http://www.aclweb.org/anthology/D/D10/D10-1092.pdf
+
+    The generic RIBES scores used in shared task, e.g. Workshop for
      Asian Translation (WAT) uses the following RIBES calculations:
-    
+
          RIBES = kendall_tau * (alpha**p1) * (beta**bp)
-    
+
      Please note that this re-implementation differs from the official
      RIBES implementation and though it emulates the results as describe
-    in the original paper, there are further optimization implemented 
+    in the original paper, there are further optimization implemented
      in the official RIBES script.
-    
-    Users are encouraged to use the official RIBES script instead of this 
+
+    Users are encouraged to use the official RIBES script instead of this
      implementation when evaluating your machine translation system. Refer
      to http://www.kecl.ntt.co.jp/icl/lirg/ribes/ for the official script.
-    
+
      :param references: a list of reference sentences
      :type reference: list(list(str))
      :param hypothesis: a hypothesis sentence
@@ -53,31 +53,31 @@ def sentence_ribes(references, hypothesis, alpha=0.25, beta=0.10):
          # Collects the *worder* from the ranked correlation alignments.
          worder = word_rank_alignment(reference, hypothesis)
          nkt = kendall_tau(worder)
-            
+
          # Calculates the brevity penalty
-        bp = min(1.0, math.exp(1.0 - len(reference)/len(hypothesis)))
-        
+        bp = min(1.0, math.exp(1.0 - len(reference) / len(hypothesis)))
+
          # Calculates the unigram precision, *p1*
          p1 = len(worder) / len(hypothesis)
-        
-        _ribes = nkt * (p1 ** alpha) *  (bp ** beta)
-        
-        if _ribes > best_ribes: # Keeps the best score.
+
+        _ribes = nkt * (p1 ** alpha) * (bp ** beta)
+
+        if _ribes > best_ribes:  # Keeps the best score.
              best_ribes = _ribes
-        
+
      return best_ribes
  
  
  def corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.10):
      """
-    This function "calculates RIBES for a system output (hypothesis) with 
-    multiple references, and returns "best" score among multi-references and 
-    individual scores. The scores are corpus-wise, i.e., averaged by the number 
+    This function "calculates RIBES for a system output (hypothesis) with
+    multiple references, and returns "best" score among multi-references and
+    individual scores. The scores are corpus-wise, i.e., averaged by the number
      of sentences." (c.f. RIBES version 1.03.1 code).
-    
-    Different from BLEU's micro-average precision, RIBES calculates the 
-    macro-average precision by averaging the best RIBES score for each pair of 
-    hypothesis and its corresponding references 
+
+    Different from BLEU's micro-average precision, RIBES calculates the
+    macro-average precision by averaging the best RIBES score for each pair of
+    hypothesis and its corresponding references
  
      >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
      ...         'ensures', 'that', 'the', 'military', 'always',
@@ -91,17 +91,17 @@ def corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.10):
      >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
      ...          'army', 'always', 'to', 'heed', 'the', 'directions',
      ...          'of', 'the', 'party']
-    
-    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', 
+
+    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
      ...         'interested', 'in', 'world', 'history']
-    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', 
+    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
      ...          'because', 'he', 'read', 'the', 'book']
-    
+
      >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
      >>> hypotheses = [hyp1, hyp2]
      >>> round(corpus_ribes(list_of_references, hypotheses),4)
      0.3597
-    
+
      :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
      :type references: list(list(list(str)))
      :param hypotheses: a list of hypothesis sentences
@@ -118,66 +118,66 @@ def corpus_ribes(list_of_references, hypotheses, alpha=0.25, beta=0.10):
      for references, hypothesis in zip(list_of_references, hypotheses):
          corpus_best_ribes += sentence_ribes(references, hypothesis, alpha, beta)
      return corpus_best_ribes / len(hypotheses)
-    
-        
+
+
  def position_of_ngram(ngram, sentence):
      """
-    This function returns the position of the first instance of the ngram 
+    This function returns the position of the first instance of the ngram
      appearing in a sentence.
-    
+
      Note that one could also use string as follows but the code is a little
      convoluted with type casting back and forth:
-        
+
          char_pos = ' '.join(sent)[:' '.join(sent).index(' '.join(ngram))]
          word_pos = char_pos.count(' ')
-        
+
      Another way to conceive this is:
-    
-        return next(i for i, ng in enumerate(ngrams(sentence, len(ngram))) 
+
+        return next(i for i, ng in enumerate(ngrams(sentence, len(ngram)))
                      if ng == ngram)
-                    
+
      :param ngram: The ngram that needs to be searched
      :type ngram: tuple
      :param sentence: The list of tokens to search from.
      :type sentence: list(str)
      """
      # Iterates through the ngrams in sentence.
-    for i,sublist in enumerate(ngrams(sentence, len(ngram))):
+    for i, sublist in enumerate(ngrams(sentence, len(ngram))):
          # Returns the index of the word when ngram matches.
          if ngram == sublist:
              return i
  
  
  def word_rank_alignment(reference, hypothesis, character_based=False):
-    """    
+    """
      This is the word rank alignment algorithm described in the paper to produce
-    the *worder* list, i.e. a list of word indices of the hypothesis word orders 
+    the *worder* list, i.e. a list of word indices of the hypothesis word orders
      w.r.t. the list of reference words.
-    
-    Below is (H0, R0) example from the Isozaki et al. 2010 paper, 
+
+    Below is (H0, R0) example from the Isozaki et al. 2010 paper,
      note the examples are indexed from 1 but the results here are indexed from 0:
-    
+
          >>> ref = str('he was interested in world history because he '
          ... 'read the book').split()
          >>> hyp = str('he read the book because he was interested in world '
          ... 'history').split()
          >>> word_rank_alignment(ref, hyp)
          [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
-        
+
      The (H1, R1) example from the paper, note the 0th index:
-    
+
          >>> ref = 'John hit Bob yesterday'.split()
          >>> hyp = 'Bob hit John yesterday'.split()
          >>> word_rank_alignment(ref, hyp)
          [2, 1, 0, 3]
  
      Here is the (H2, R2) example from the paper, note the 0th index here too:
-    
+
          >>> ref = 'the boy read the book'.split()
          >>> hyp = 'the book was read by the boy'.split()
          >>> word_rank_alignment(ref, hyp)
          [3, 4, 2, 0, 1]
-        
+
      :param reference: a reference sentence
      :type reference: list(str)
      :param hypothesis: a hypothesis sentence
@@ -189,7 +189,7 @@ def word_rank_alignment(reference, hypothesis, character_based=False):
      # This is used for matching context window later in the algorithm.
      ref_ngrams = []
      hyp_ngrams = []
-    for n in range(1, len(reference)+1):
+    for n in range(1, len(reference) + 1):
          for ng in ngrams(reference, n):
              ref_ngrams.append(ng)
          for ng in ngrams(hypothesis, n):
@@ -198,46 +198,46 @@ def word_rank_alignment(reference, hypothesis, character_based=False):
          # If word is not in the reference, continue.
          if h_word not in reference:
              continue
-        # If we can determine one-to-one word correspondence for unigrams that 
+        # If we can determine one-to-one word correspondence for unigrams that
          # only appear once in both the reference and hypothesis.
          elif hypothesis.count(h_word) == reference.count(h_word) == 1:
              worder.append(reference.index(h_word))
          else:
-            max_window_size = max(i, hyp_len-i+1)
+            max_window_size = max(i, hyp_len - i + 1)
              for window in range(1, max_window_size):
-                if i+window < hyp_len: # If searching the right context is possible.
+                if i + window < hyp_len:  # If searching the right context is possible.
                      # Retrieve the right context window.
-                    right_context_ngram = tuple(islice(hypothesis, i, i+window+1))
+                    right_context_ngram = tuple(islice(hypothesis, i, i + window + 1))
                      num_times_in_ref = ref_ngrams.count(right_context_ngram)
-                    num_times_in_hyp = hyp_ngrams.count(right_context_ngram) 
+                    num_times_in_hyp = hyp_ngrams.count(right_context_ngram)
                      # If ngram appears only once in both ref and hyp.
                      if num_times_in_ref == num_times_in_hyp == 1:
                          # Find the position of ngram that matched the reference.
                          pos = position_of_ngram(right_context_ngram, reference)
                          worder.append(pos)  # Add the positions of the ngram.
                          break
-                if window <= i: # If searching the left context is possible.
+                if window <= i:  # If searching the left context is possible.
                      # Retrieve the left context window.
-                    left_context_ngram = tuple(islice(hypothesis, i-window, i+1))
+                    left_context_ngram = tuple(islice(hypothesis, i - window, i + 1))
                      num_times_in_ref = ref_ngrams.count(left_context_ngram)
                      num_times_in_hyp = hyp_ngrams.count(left_context_ngram)
                      if num_times_in_ref == num_times_in_hyp == 1:
                          # Find the position of ngram that matched the reference.
                          pos = position_of_ngram(left_context_ngram, reference)
                          # Add the positions of the ngram.
-                        worder.append(pos+ len(left_context_ngram) -1)  
+                        worder.append(pos + len(left_context_ngram) - 1)
                          break
      return worder
  
-    
+
  def find_increasing_sequences(worder):
      """
-    Given the *worder* list, this function groups monotonic +1 sequences. 
-    
+    Given the *worder* list, this function groups monotonic +1 sequences.
+
          >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
          >>> list(find_increasing_sequences(worder))
          [(7, 8, 9, 10), (0, 1, 2, 3, 4, 5)]
-    
+
      :param worder: The worder list output from word_rank_alignment
      :param type: list(int)
      """
@@ -258,20 +258,20 @@ def kendall_tau(worder, normalize=True):
      """
      Calculates the Kendall's Tau correlation coefficient given the *worder*
      list of word alignments from word_rank_alignment(), using the formula:
-    
+
          tau = 2 * num_increasing_pairs / num_possible pairs -1
-    
+
      Note that the no. of increasing pairs can be discontinuous in the *worder*
-    list and each each increasing sequence can be tabulated as choose(len(seq), 2) 
+    list and each each increasing sequence can be tabulated as choose(len(seq), 2)
      no. of increasing pairs, e.g.
-    
+
          >>> worder = [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
          >>> number_possible_pairs = choose(len(worder), 2)
          >>> round(kendall_tau(worder, normalize=False),3)
          -0.236
          >>> round(kendall_tau(worder),3)
          0.382
-    
+
      :param worder: The worder list output from word_rank_alignment
      :type worder: list(int)
      :param normalize: Flag to indicate normalization
@@ -283,43 +283,43 @@ def kendall_tau(worder, normalize=True):
      # Extract the groups of increasing/monotonic sequences.
      increasing_sequences = find_increasing_sequences(worder)
      # Calculate no. of increasing_pairs in *worder* list.
-    num_increasing_pairs = sum(choose(len(seq),2) for seq in increasing_sequences) 
+    num_increasing_pairs = sum(choose(len(seq), 2) for seq in increasing_sequences)
      # Calculate no. of possible pairs.
      num_possible_pairs = choose(worder_len, 2)
      # Kendall's Tau computation.
-    tau = 2 * num_increasing_pairs / num_possible_pairs -1
-    if normalize: # If normalized, the tau output falls between 0.0 to 1.0
-        return (tau + 1) /2
-    else: # Otherwise, the tau outputs falls between -1.0 to +1.0
+    tau = 2 * num_increasing_pairs / num_possible_pairs - 1
+    if normalize:  # If normalized, the tau output falls between 0.0 to 1.0
+        return (tau + 1) / 2
+    else:  # Otherwise, the tau outputs falls between -1.0 to +1.0
          return tau
  
  
  def spearman_rho(worder, normalize=True):
      """
-    Calculates the Spearman's Rho correlation coefficient given the *worder* 
+    Calculates the Spearman's Rho correlation coefficient given the *worder*
      list of word alignment from word_rank_alignment(), using the formula:
-    
-        rho = 1 - sum(d**2) / choose(len(worder)+1, 3)  
-        
+
+        rho = 1 - sum(d**2) / choose(len(worder)+1, 3)
+
      Given that d is the sum of difference between the *worder* list of indices
      and the original word indices from the reference sentence.
-    
+
      Using the (H0,R0) and (H5, R5) example from the paper
-    
+
          >>> worder =  [7, 8, 9, 10, 6, 0, 1, 2, 3, 4, 5]
          >>> round(spearman_rho(worder, normalize=False), 3)
          -0.591
          >>> round(spearman_rho(worder), 3)
          0.205
-    
+
      :param worder: The worder list output from word_rank_alignment
      :param type: list(int)
      """
      worder_len = len(worder)
-    sum_d_square = sum((wi - i)**2 for wi, i in zip(worder, range(worder_len)))
-    rho = 1 - sum_d_square / choose(worder_len+1, 3)
-    
-    if normalize: # If normalized, the rho output falls between 0.0 to 1.0
-        return (rho + 1) /2
-    else: # Otherwise, the rho outputs falls between -1.0 to +1.0
+    sum_d_square = sum((wi - i) ** 2 for wi, i in zip(worder, range(worder_len)))
+    rho = 1 - sum_d_square / choose(worder_len + 1, 3)
+
+    if normalize:  # If normalized, the rho output falls between 0.0 to 1.0
+        return (rho + 1) / 2
+    else:  # Otherwise, the rho outputs falls between -1.0 to +1.0
          return rho
diff --git a/nlp_resource_data/nltk/translate/ribes_score.pyc b/nlp_resource_data/nltk/translate/ribes_score.pyc

deleted file mode 100755 (executable)

index 8761542..0000000

Binary files a/nlp_resource_data/nltk/translate/ribes_score.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/translate/stack_decoder.py b/nlp_resource_data/nltk/translate/stack_decoder.py

old mode 100755 (executable)

new mode 100644 (file)

index e9442d7..af0ce7e
--- a/nlp_resource_data/nltk/translate/stack_decoder.py
+++ b/nlp_resource_data/nltk/translate/stack_decoder.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Stack decoder
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Tah Wei Hoon <hoon.tw@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -76,6 +76,7 @@ class StackDecoder(object):
      ['nobody', 'expects', 'the', 'spanish', 'inquisition', '!']
  
      """
+
      def __init__(self, phrase_table, language_model):
          """
          :param phrase_table: Table of translations for source language
@@ -151,8 +152,10 @@ class StackDecoder(object):
          """
          sentence = tuple(src_sentence)  # prevent accidental modification
          sentence_length = len(sentence)
-        stacks = [_Stack(self.stack_size, self.beam_threshold)
-                  for _ in range(0, sentence_length + 1)]
+        stacks = [
+            _Stack(self.stack_size, self.beam_threshold)
+            for _ in range(0, sentence_length + 1)
+        ]
          empty_hypothesis = _Hypothesis()
          stacks[0].push(empty_hypothesis)
  
@@ -160,29 +163,35 @@ class StackDecoder(object):
          future_score_table = self.compute_future_scores(sentence)
          for stack in stacks:
              for hypothesis in stack:
-                possible_expansions = StackDecoder.valid_phrases(all_phrases,
-                                                                 hypothesis)
+                possible_expansions = StackDecoder.valid_phrases(
+                    all_phrases, hypothesis
+                )
                  for src_phrase_span in possible_expansions:
-                    src_phrase = sentence[src_phrase_span[0]:src_phrase_span[1]]
-                    for translation_option in (self.phrase_table.
-                                               translations_for(src_phrase)):
+                    src_phrase = sentence[src_phrase_span[0] : src_phrase_span[1]]
+                    for translation_option in self.phrase_table.translations_for(
+                        src_phrase
+                    ):
                          raw_score = self.expansion_score(
-                            hypothesis, translation_option, src_phrase_span)
+                            hypothesis, translation_option, src_phrase_span
+                        )
                          new_hypothesis = _Hypothesis(
                              raw_score=raw_score,
                              src_phrase_span=src_phrase_span,
                              trg_phrase=translation_option.trg_phrase,
-                            previous=hypothesis
+                            previous=hypothesis,
                          )
                          new_hypothesis.future_score = self.future_score(
-                            new_hypothesis, future_score_table, sentence_length)
+                            new_hypothesis, future_score_table, sentence_length
+                        )
                          total_words = new_hypothesis.total_translated_words()
                          stacks[total_words].push(new_hypothesis)
  
          if not stacks[sentence_length]:
-            warnings.warn('Unable to translate all words. '
-                          'The source sentence contains words not in '
-                          'the phrase table')
+            warnings.warn(
+                "Unable to translate all words. "
+                "The source sentence contains words not in "
+                "the phrase table"
+            )
              # Instead of returning empty output, perhaps a partial
              # translation could be returned
              return []
@@ -229,14 +238,15 @@ class StackDecoder(object):
          subsequence covering positions 2, 3, and 4.
          :rtype: dict(int: (dict(int): float))
          """
-        scores = defaultdict(lambda: defaultdict(lambda: float('-inf')))
+        scores = defaultdict(lambda: defaultdict(lambda: float("-inf")))
          for seq_length in range(1, len(src_sentence) + 1):
              for start in range(0, len(src_sentence) - seq_length + 1):
                  end = start + seq_length
                  phrase = src_sentence[start:end]
                  if phrase in self.phrase_table:
-                    score = self.phrase_table.translations_for(
-                        phrase)[0].log_prob  # pick best (first) translation
+                    score = self.phrase_table.translations_for(phrase)[
+                        0
+                    ].log_prob  # pick best (first) translation
                      # Warning: API of language_model is subject to change
                      score += self.language_model.probability(phrase)
                      scores[start][end] = score
@@ -244,8 +254,7 @@ class StackDecoder(object):
                  # check if a better score can be obtained by combining
                  # two child subsequences
                  for mid in range(start + 1, end):
-                    combined_score = (scores[start][mid] +
-                                      scores[mid][end])
+                    combined_score = scores[start][mid] + scores[mid][end]
                      if combined_score > scores[start][end]:
                          scores[start][end] = combined_score
          return scores
@@ -279,7 +288,8 @@ class StackDecoder(object):
          # The API of language_model is subject to change; it could accept
          # a string, a list of words, and/or some other type
          score += self.language_model.probability_change(
-            hypothesis, translation_option.trg_phrase)
+            hypothesis, translation_option.trg_phrase
+        )
          score += self.distortion_score(hypothesis, src_phrase_span)
          score -= self.word_penalty * len(translation_option.trg_phrase)
          return score
@@ -309,8 +319,7 @@ class StackDecoder(object):
              cover untranslated positions.
          :rtype: list(tuple(int, int))
          """
-        untranslated_spans = hypothesis.untranslated_spans(
-            len(all_phrases_from))
+        untranslated_spans = hypothesis.untranslated_spans(len(all_phrases_from))
          valid_phrases = []
          for available_span in untranslated_spans:
              start = available_span[0]
@@ -341,8 +350,15 @@ class _Hypothesis(object):
      ``src_phrase_span`` in the hypothesis chain. Similarly, the
      translation output can be found by traversing up the chain.
      """
-    def __init__(self, raw_score=0.0, src_phrase_span=(), trg_phrase=(),
-                 previous=None, future_score=0.0):
+
+    def __init__(
+        self,
+        raw_score=0.0,
+        src_phrase_span=(),
+        trg_phrase=(),
+        previous=None,
+        future_score=0.0,
+    ):
          """
          :param raw_score: Likelihood of hypothesis so far.
              Higher is better. Does not account for untranslated words.
@@ -415,8 +431,7 @@ class _Hypothesis(object):
          current_hypothesis = self
          while current_hypothesis.previous is not None:
              translated_span = current_hypothesis.src_phrase_span
-            translated_positions.extend(range(translated_span[0],
-                                              translated_span[1]))
+            translated_positions.extend(range(translated_span[0], translated_span[1]))
              current_hypothesis = current_hypothesis.previous
          return translated_positions
  
@@ -439,6 +454,7 @@ class _Stack(object):
      """
      Collection of _Hypothesis objects
      """
+
      def __init__(self, max_size=100, beam_threshold=0.0):
          """
          :param beam_threshold: Hypotheses that score less than this
@@ -450,7 +466,7 @@ class _Stack(object):
          self.items = []
  
          if beam_threshold == 0.0:
-            self.__log_beam_threshold = float('-inf')
+            self.__log_beam_threshold = float("-inf")
          else:
              self.__log_beam_threshold = log(beam_threshold)
  
@@ -496,4 +512,5 @@ class _Stack(object):
  
      def __bool__(self):
          return len(self.items) != 0
-    __nonzero__=__bool__
+
+    __nonzero__ = __bool__
diff --git a/nlp_resource_data/nltk/translate/stack_decoder.pyc b/nlp_resource_data/nltk/translate/stack_decoder.pyc

deleted file mode 100755 (executable)

index 9bacfdd..0000000

Binary files a/nlp_resource_data/nltk/translate/stack_decoder.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/tree.py b/nlp_resource_data/nltk/tree.py

old mode 100755 (executable)

new mode 100644 (file)

index 193a003..1614c45
--- a/nlp_resource_data/nltk/tree.py
+++ b/nlp_resource_data/nltk/tree.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Text Trees
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Edward Loper <edloper@gmail.com>
  #         Steven Bird <stevenbird1@gmail.com>
  #         Peter Ljunglöf <peter.ljunglof@gu.se>
@@ -13,27 +13,25 @@
  Class for representing hierarchical language structures, such as
  syntax trees and morphological trees.
  """
-from __future__ import print_function, unicode_literals
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
-
-# TODO: add LabelledTree (can be used for dependency trees)
  
  import re
+import sys
+from abc import ABCMeta, abstractmethod
  
-from six import string_types
  
  from nltk.grammar import Production, Nonterminal
  from nltk.probability import ProbabilisticMixIn
  from nltk.util import slice_bounds
-from nltk.compat import python_2_unicode_compatible, unicode_repr
  from nltk.internals import raise_unorderable_types
  
+# TODO: add LabelledTree (can be used for dependency trees)
+
  ######################################################################
  ## Trees
  ######################################################################
  
-@python_2_unicode_compatible
+
+
  class Tree(list):
      """
      A Tree represents a hierarchical grouping of leaves and subtrees.
@@ -97,24 +95,30 @@ class Tree(list):
  
      - ``Tree.fromstring(s)`` constructs a new tree by parsing the string ``s``.
      """
+
      def __init__(self, node, children=None):
          if children is None:
-            raise TypeError("%s: Expected a node value and child list "
-                                % type(self).__name__)
-        elif isinstance(children, string_types):
-            raise TypeError("%s() argument 2 should be a list, not a "
-                            "string" % type(self).__name__)
+            raise TypeError(
+                "%s: Expected a node value and child list " % type(self).__name__
+            )
+        elif isinstance(children, str):
+            raise TypeError(
+                "%s() argument 2 should be a list, not a "
+                "string" % type(self).__name__
+            )
          else:
              list.__init__(self, children)
              self._label = node
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Comparison operators
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def __eq__(self, other):
-        return (self.__class__ is other.__class__ and
-                (self._label, list(self)) == (other._label, list(other)))
+        return self.__class__ is other.__class__ and (self._label, list(self)) == (
+            other._label,
+            list(other),
+        )
  
      def __lt__(self, other):
          if not isinstance(other, Tree):
@@ -133,22 +137,25 @@ class Tree(list):
      __le__ = lambda self, other: self < other or self == other
      __ge__ = lambda self, other: not self < other
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Disabled list operations
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def __mul__(self, v):
-        raise TypeError('Tree does not support multiplication')
+        raise TypeError("Tree does not support multiplication")
+
      def __rmul__(self, v):
-        raise TypeError('Tree does not support multiplication')
+        raise TypeError("Tree does not support multiplication")
+
      def __add__(self, v):
-        raise TypeError('Tree does not support addition')
+        raise TypeError("Tree does not support addition")
+
      def __radd__(self, v):
-        raise TypeError('Tree does not support addition')
+        raise TypeError("Tree does not support addition")
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Indexing (with support for tree positions)
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def __getitem__(self, index):
          if isinstance(index, (int, slice)):
@@ -161,48 +168,55 @@ class Tree(list):
              else:
                  return self[index[0]][index[1:]]
          else:
-            raise TypeError("%s indices must be integers, not %s" %
-                            (type(self).__name__, type(index).__name__))
+            raise TypeError(
+                "%s indices must be integers, not %s"
+                % (type(self).__name__, type(index).__name__)
+            )
  
      def __setitem__(self, index, value):
          if isinstance(index, (int, slice)):
              return list.__setitem__(self, index, value)
          elif isinstance(index, (list, tuple)):
              if len(index) == 0:
-                raise IndexError('The tree position () may not be '
-                                 'assigned to.')
+                raise IndexError("The tree position () may not be " "assigned to.")
              elif len(index) == 1:
                  self[index[0]] = value
              else:
                  self[index[0]][index[1:]] = value
          else:
-            raise TypeError("%s indices must be integers, not %s" %
-                            (type(self).__name__, type(index).__name__))
+            raise TypeError(
+                "%s indices must be integers, not %s"
+                % (type(self).__name__, type(index).__name__)
+            )
  
      def __delitem__(self, index):
          if isinstance(index, (int, slice)):
              return list.__delitem__(self, index)
          elif isinstance(index, (list, tuple)):
              if len(index) == 0:
-                raise IndexError('The tree position () may not be deleted.')
+                raise IndexError("The tree position () may not be deleted.")
              elif len(index) == 1:
                  del self[index[0]]
              else:
                  del self[index[0]][index[1:]]
          else:
-            raise TypeError("%s indices must be integers, not %s" %
-                            (type(self).__name__, type(index).__name__))
+            raise TypeError(
+                "%s indices must be integers, not %s"
+                % (type(self).__name__, type(index).__name__)
+            )
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Basic tree operations
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def _get_node(self):
          """Outdated method to access the node value; use the label() method instead."""
          raise NotImplementedError("Use label() to access a node label.")
+
      def _set_node(self, value):
          """Outdated method to set the node value; use the set_label() method instead."""
          raise NotImplementedError("Use set_label() method to set a node label.")
+
      node = property(_get_node, _set_node)
  
      def label(self):
@@ -294,7 +308,7 @@ class Tree(list):
                  max_child_height = max(max_child_height, 1)
          return 1 + max_child_height
  
-    def treepositions(self, order='preorder'):
+    def treepositions(self, order="preorder"):
          """
              >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
              >>> t.treepositions() # doctest: +ELLIPSIS
@@ -308,14 +322,16 @@ class Tree(list):
              ``leaves``.
          """
          positions = []
-        if order in ('preorder', 'bothorder'): positions.append( () )
+        if order in ("preorder", "bothorder"):
+            positions.append(())
          for i, child in enumerate(self):
              if isinstance(child, Tree):
                  childpos = child.treepositions(order)
-                positions.extend((i,)+p for p in childpos)
+                positions.extend((i,) + p for p in childpos)
              else:
-                positions.append( (i,) )
-        if order in ('postorder', 'bothorder'): positions.append( () )
+                positions.append((i,))
+        if order in ("postorder", "bothorder"):
+            positions.append(())
          return positions
  
      def subtrees(self, filter=None):
@@ -356,8 +372,10 @@ class Tree(list):
          :rtype: list(Production)
          """
  
-        if not isinstance(self._label, string_types):
-            raise TypeError('Productions can only be generated from trees having node labels that are strings')
+        if not isinstance(self._label, str):
+            raise TypeError(
+                "Productions can only be generated from trees having node labels that are strings"
+            )
  
          prods = [Production(Nonterminal(self._label), _child_names(self))]
          for child in self:
@@ -394,19 +412,22 @@ class Tree(list):
          :raise IndexError: If this tree contains fewer than ``index+1``
              leaves, or if ``index<0``.
          """
-        if index < 0: raise IndexError('index must be non-negative')
+        if index < 0:
+            raise IndexError("index must be non-negative")
  
          stack = [(self, ())]
          while stack:
              value, treepos = stack.pop()
              if not isinstance(value, Tree):
-                if index == 0: return treepos
-                else: index -= 1
+                if index == 0:
+                    return treepos
+                else:
+                    index -= 1
              else:
-                for i in range(len(value)-1, -1, -1):
-                    stack.append( (value[i], treepos+(i,)) )
+                for i in range(len(value) - 1, -1, -1):
+                    stack.append((value[i], treepos + (i,)))
  
-        raise IndexError('index must be less than or equal to len(self)')
+        raise IndexError("index must be less than or equal to len(self)")
  
      def treeposition_spanning_leaves(self, start, end):
          """
@@ -415,22 +436,29 @@ class Tree(list):
          :raise ValueError: if ``end <= start``
          """
          if end <= start:
-            raise ValueError('end must be greater than start')
+            raise ValueError("end must be greater than start")
          # Find the tree positions of the start & end leaves, and
          # take the longest common subsequence.
          start_treepos = self.leaf_treeposition(start)
-        end_treepos = self.leaf_treeposition(end-1)
+        end_treepos = self.leaf_treeposition(end - 1)
          # Find the first index where they mismatch:
          for i in range(len(start_treepos)):
              if i == len(end_treepos) or start_treepos[i] != end_treepos[i]:
                  return start_treepos[:i]
          return start_treepos
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Transforms
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
-    def chomsky_normal_form(self, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^"):
+    def chomsky_normal_form(
+        self,
+        factor="right",
+        horzMarkov=None,
+        vertMarkov=0,
+        childChar="|",
+        parentChar="^",
+    ):
          """
          This method can modify a tree in three ways:
  
@@ -455,9 +483,12 @@ class Tree(list):
          :type  parentChar: str
          """
          from nltk.treetransforms import chomsky_normal_form
+
          chomsky_normal_form(self, factor, horzMarkov, vertMarkov, childChar, parentChar)
  
-    def un_chomsky_normal_form(self, expandUnary = True, childChar = "|", parentChar = "^", unaryChar = "+"):
+    def un_chomsky_normal_form(
+        self, expandUnary=True, childChar="|", parentChar="^", unaryChar="+"
+    ):
          """
          This method modifies the tree in three ways:
  
@@ -477,9 +508,10 @@ class Tree(list):
          :type  unaryChar: str
          """
          from nltk.treetransforms import un_chomsky_normal_form
+
          un_chomsky_normal_form(self, expandUnary, childChar, parentChar, unaryChar)
  
-    def collapse_unary(self, collapsePOS = False, collapseRoot = False, joinChar = "+"):
+    def collapse_unary(self, collapsePOS=False, collapseRoot=False, joinChar="+"):
          """
          Collapse subtrees with a single child (ie. unary productions)
          into a new non-terminal (Tree node) joined by 'joinChar'.
@@ -499,11 +531,12 @@ class Tree(list):
          :type  joinChar: str
          """
          from nltk.treetransforms import collapse_unary
+
          collapse_unary(self, collapsePOS, collapseRoot, joinChar)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Convert, copy
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      @classmethod
      def convert(cls, tree):
@@ -521,31 +554,48 @@ class Tree(list):
          else:
              return tree
  
+    def __copy__(self):
+        return self.copy()
+
+    def __deepcopy__(self, memo):
+        return self.copy(deep=True)
+
      def copy(self, deep=False):
-        if not deep: return type(self)(self._label, self)
-        else: return type(self).convert(self)
+        if not deep:
+            return type(self)(self._label, self)
+        else:
+            return type(self).convert(self)
+
+    def _frozen_class(self):
+        return ImmutableTree
  
-    def _frozen_class(self): return ImmutableTree
      def freeze(self, leaf_freezer=None):
          frozen_class = self._frozen_class()
          if leaf_freezer is None:
              newcopy = frozen_class.convert(self)
          else:
              newcopy = self.copy(deep=True)
-            for pos in newcopy.treepositions('leaves'):
+            for pos in newcopy.treepositions("leaves"):
                  newcopy[pos] = leaf_freezer(newcopy[pos])
              newcopy = frozen_class.convert(newcopy)
-        hash(newcopy) # Make sure the leaves are hashable.
+        hash(newcopy)  # Make sure the leaves are hashable.
          return newcopy
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Parsing
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      @classmethod
-    def fromstring(cls, s, brackets='()', read_node=None, read_leaf=None,
-              node_pattern=None, leaf_pattern=None,
-              remove_empty_top_bracketing=False):
+    def fromstring(
+        cls,
+        s,
+        brackets="()",
+        read_node=None,
+        read_leaf=None,
+        node_pattern=None,
+        leaf_pattern=None,
+        remove_empty_top_bracketing=False,
+    ):
          """
          Read a bracketed tree string and return the resulting tree.
          Trees are represented as nested brackettings, such as::
@@ -595,29 +645,32 @@ class Tree(list):
              then it will return a tree of that type.
          :rtype: Tree
          """
-        if not isinstance(brackets, string_types) or len(brackets) != 2:
-            raise TypeError('brackets must be a length-2 string')
-        if re.search('\s', brackets):
-            raise TypeError('whitespace brackets not allowed')
+        if not isinstance(brackets, str) or len(brackets) != 2:
+            raise TypeError("brackets must be a length-2 string")
+        if re.search("\s", brackets):
+            raise TypeError("whitespace brackets not allowed")
          # Construct a regexp that will tokenize the string.
          open_b, close_b = brackets
          open_pattern, close_pattern = (re.escape(open_b), re.escape(close_b))
          if node_pattern is None:
-            node_pattern = '[^\s%s%s]+' % (open_pattern, close_pattern)
+            node_pattern = "[^\s%s%s]+" % (open_pattern, close_pattern)
          if leaf_pattern is None:
-            leaf_pattern = '[^\s%s%s]+' % (open_pattern, close_pattern)
-        token_re = re.compile('%s\s*(%s)?|%s|(%s)' % (
-            open_pattern, node_pattern, close_pattern, leaf_pattern))
+            leaf_pattern = "[^\s%s%s]+" % (open_pattern, close_pattern)
+        token_re = re.compile(
+            "%s\s*(%s)?|%s|(%s)"
+            % (open_pattern, node_pattern, close_pattern, leaf_pattern)
+        )
          # Walk through each token, updating a stack of trees.
-        stack = [(None, [])] # list of (node, children) tuples
+        stack = [(None, [])]  # list of (node, children) tuples
          for match in token_re.finditer(s):
              token = match.group()
              # Beginning of a tree/subtree
              if token[0] == open_b:
                  if len(stack) == 1 and len(stack[0][1]) > 0:
-                    cls._parse_error(s, match, 'end-of-string')
+                    cls._parse_error(s, match, "end-of-string")
                  label = token[1:].lstrip()
-                if read_node is not None: label = read_node(label)
+                if read_node is not None:
+                    label = read_node(label)
                  stack.append((label, []))
              # End of a tree/subtree
              elif token == close_b:
@@ -625,21 +678,22 @@ class Tree(list):
                      if len(stack[0][1]) == 0:
                          cls._parse_error(s, match, open_b)
                      else:
-                        cls._parse_error(s, match, 'end-of-string')
+                        cls._parse_error(s, match, "end-of-string")
                  label, children = stack.pop()
                  stack[-1][1].append(cls(label, children))
              # Leaf node
              else:
                  if len(stack) == 1:
                      cls._parse_error(s, match, open_b)
-                if read_leaf is not None: token = read_leaf(token)
+                if read_leaf is not None:
+                    token = read_leaf(token)
                  stack[-1][1].append(token)
  
          # check that we got exactly one complete tree.
          if len(stack) > 1:
-            cls._parse_error(s, 'end-of-string', close_b)
+            cls._parse_error(s, "end-of-string", close_b)
          elif len(stack[0][1]) == 0:
-            cls._parse_error(s, 'end-of-string', open_b)
+            cls._parse_error(s, "end-of-string", open_b)
          else:
              assert stack[0][0] is None
              assert len(stack[0][1]) == 1
@@ -647,7 +701,7 @@ class Tree(list):
  
          # If the tree has an extra level with node='', then get rid of
          # it.  E.g.: "((S (NP ...) (VP ...)))"
-        if remove_empty_top_bracketing and tree._label == '' and len(tree) == 1:
+        if remove_empty_top_bracketing and tree._label == "" and len(tree) == 1:
              tree = tree[0]
          # return the tree.
          return tree
@@ -661,32 +715,38 @@ class Tree(list):
          :param expecting: what we expected to see instead.
          """
          # Construct a basic error message
-        if match == 'end-of-string':
-            pos, token = len(s), 'end-of-string'
+        if match == "end-of-string":
+            pos, token = len(s), "end-of-string"
          else:
              pos, token = match.start(), match.group()
-        msg = '%s.read(): expected %r but got %r\n%sat index %d.' % (
-            cls.__name__, expecting, token, ' '*12, pos)
+        msg = "%s.read(): expected %r but got %r\n%sat index %d." % (
+            cls.__name__,
+            expecting,
+            token,
+            " " * 12,
+            pos,
+        )
          # Add a display showing the error token itsels:
-        s = s.replace('\n', ' ').replace('\t', ' ')
+        s = s.replace("\n", " ").replace("\t", " ")
          offset = pos
-        if len(s) > pos+10:
-            s = s[:pos+10]+'...'
+        if len(s) > pos + 10:
+            s = s[: pos + 10] + "..."
          if pos > 10:
-            s = '...'+s[pos-10:]
+            s = "..." + s[pos - 10 :]
              offset = 13
-        msg += '\n%s"%s"\n%s^' % (' '*16, s, ' '*(17+offset))
+        msg += '\n%s"%s"\n%s^' % (" " * 16, s, " " * (17 + offset))
          raise ValueError(msg)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Visualization & String Representation
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
  
      def draw(self):
          """
          Open a new window containing a graphical diagram of this tree.
          """
          from nltk.draw.tree import draw_trees
+
          draw_trees(self)
  
      def pretty_print(self, sentence=None, highlight=(), stream=None, **kwargs):
@@ -696,12 +756,16 @@ class Tree(list):
          `nltk.treeprettyprinter.TreePrettyPrinter`.
          """
          from nltk.treeprettyprinter import TreePrettyPrinter
-        print(TreePrettyPrinter(self, sentence, highlight).text(**kwargs),
-              file=stream)
+
+        print(TreePrettyPrinter(self, sentence, highlight).text(**kwargs), file=stream)
  
      def __repr__(self):
-        childstr = ", ".join(unicode_repr(c) for c in self)
-        return '%s(%s, [%s])' % (type(self).__name__, unicode_repr(self._label), childstr)
+        childstr = ", ".join(repr(c) for c in self)
+        return "%s(%s, [%s])" % (
+            type(self).__name__,
+            repr(self._label),
+            childstr,
+        )
  
      def _repr_png_(self):
          """
@@ -716,21 +780,43 @@ class Tree(list):
          from nltk.draw.tree import tree_to_treesegment
          from nltk.draw.util import CanvasFrame
          from nltk.internals import find_binary
+
          _canvas_frame = CanvasFrame()
          widget = tree_to_treesegment(_canvas_frame.canvas(), self)
          _canvas_frame.add_widget(widget)
          x, y, w, h = widget.bbox()
          # print_to_file uses scrollregion to set the width and height of the pdf.
-        _canvas_frame.canvas()['scrollregion'] = (0, 0, w, h)
+        _canvas_frame.canvas()["scrollregion"] = (0, 0, w, h)
          with tempfile.NamedTemporaryFile() as file:
-            in_path = '{0:}.ps'.format(file.name)
-            out_path = '{0:}.png'.format(file.name)
+            in_path = "{0:}.ps".format(file.name)
+            out_path = "{0:}.png".format(file.name)
              _canvas_frame.print_to_file(in_path)
              _canvas_frame.destroy_widget(widget)
-            subprocess.call([find_binary('gs', binary_names=['gswin32c.exe', 'gswin64c.exe'], env_vars=['PATH'], verbose=False)] +
-                            '-q -dEPSCrop -sDEVICE=png16m -r90 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dSAFER -dBATCH -dNOPAUSE -sOutputFile={0:} {1:}'
-                            .format(out_path, in_path).split())
-            with open(out_path, 'rb') as sr:
+            try:
+                subprocess.call(
+                    [
+                        find_binary(
+                            "gs",
+                            binary_names=["gswin32c.exe", "gswin64c.exe"],
+                            env_vars=["PATH"],
+                            verbose=False,
+                        )
+                    ]
+                    + "-q -dEPSCrop -sDEVICE=png16m -r90 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dSAFER -dBATCH -dNOPAUSE -sOutputFile={0:} {1:}".format(
+                        out_path, in_path
+                    ).split()
+                )
+            except LookupError:
+                pre_error_message = str(
+                    "The Ghostscript executable isn't found.\n"
+                    "See http://web.mit.edu/ghostscript/www/Install.htm\n"
+                    "If you're using a Mac, you can try installing\n"
+                    "https://docs.brew.sh/Installation then `brew install ghostscript`"
+                )
+                print(pre_error_message, file=sys.stderr)
+                raise LookupError
+
+            with open(out_path, "rb") as sr:
                  res = sr.read()
              os.remove(in_path)
              os.remove(out_path)
@@ -751,7 +837,7 @@ class Tree(list):
              stream = None
          print(self.pformat(**kwargs), file=stream)
  
-    def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False):
+    def pformat(self, margin=70, indent=0, nodesep="", parens="()", quotes=False):
          """
          :return: A pretty-printed string representation of this tree.
          :rtype: str
@@ -772,21 +858,24 @@ class Tree(list):
              return s
  
          # If it doesn't fit on one line, then write it on multi-lines.
-        if isinstance(self._label, string_types):
-            s = '%s%s%s' % (parens[0], self._label, nodesep)
+        if isinstance(self._label, str):
+            s = "%s%s%s" % (parens[0], self._label, nodesep)
          else:
-            s = '%s%s%s' % (parens[0], unicode_repr(self._label), nodesep)
+            s = "%s%s%s" % (parens[0], repr(self._label), nodesep)
          for child in self:
              if isinstance(child, Tree):
-                s += '\n'+' '*(indent+2)+child.pformat(margin, indent+2,
-                                                  nodesep, parens, quotes)
+                s += (
+                    "\n"
+                    + " " * (indent + 2)
+                    + child.pformat(margin, indent + 2, nodesep, parens, quotes)
+                )
              elif isinstance(child, tuple):
-                s += '\n'+' '*(indent+2)+ "/".join(child)
-            elif isinstance(child, string_types) and not quotes:
-                s += '\n'+' '*(indent+2)+ '%s' % child
+                s += "\n" + " " * (indent + 2) + "/".join(child)
+            elif isinstance(child, str) and not quotes:
+                s += "\n" + " " * (indent + 2) + "%s" % child
              else:
-                s += '\n'+' '*(indent+2)+ unicode_repr(child)
-        return s+parens[1]
+                s += "\n" + " " * (indent + 2) + repr(child)
+        return s + parens[1]
  
      def pformat_latex_qtree(self):
          r"""
@@ -806,10 +895,10 @@ class Tree(list):
          :return: A latex qtree representation of this tree.
          :rtype: str
          """
-        reserved_chars = re.compile('([#\$%&~_\{\}])')
+        reserved_chars = re.compile("([#\$%&~_\{\}])")
  
-        pformat = self.pformat(indent=6, nodesep='', parens=('[.', ' ]'))
-        return r'\Tree ' + re.sub(reserved_chars, r'\\\1', pformat)
+        pformat = self.pformat(indent=6, nodesep="", parens=("[.", " ]"))
+        return r"\Tree " + re.sub(reserved_chars, r"\\\1", pformat)
  
      def _pformat_flat(self, nodesep, parens, quotes):
          childstrs = []
@@ -818,16 +907,26 @@ class Tree(list):
                  childstrs.append(child._pformat_flat(nodesep, parens, quotes))
              elif isinstance(child, tuple):
                  childstrs.append("/".join(child))
-            elif isinstance(child, string_types) and not quotes:
-                childstrs.append('%s' % child)
+            elif isinstance(child, str) and not quotes:
+                childstrs.append("%s" % child)
              else:
-                childstrs.append(unicode_repr(child))
-        if isinstance(self._label, string_types):
-            return '%s%s%s %s%s' % (parens[0], self._label, nodesep,
-                                    " ".join(childstrs), parens[1])
+                childstrs.append(repr(child))
+        if isinstance(self._label, str):
+            return "%s%s%s %s%s" % (
+                parens[0],
+                self._label,
+                nodesep,
+                " ".join(childstrs),
+                parens[1],
+            )
          else:
-            return '%s%s%s %s%s' % (parens[0], unicode_repr(self._label), nodesep,
-                                    " ".join(childstrs), parens[1])
+            return "%s%s%s %s%s" % (
+                parens[0],
+                repr(self._label),
+                nodesep,
+                " ".join(childstrs),
+                parens[1],
+            )
  
  
  class ImmutableTree(Tree):
@@ -838,33 +937,46 @@ class ImmutableTree(Tree):
          try:
              self._hash = hash((self._label, tuple(self)))
          except (TypeError, ValueError):
-            raise ValueError("%s: node value and children "
-                             "must be immutable" % type(self).__name__)
+            raise ValueError(
+                "%s: node value and children " "must be immutable" % type(self).__name__
+            )
  
      def __setitem__(self, index, value):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def __setslice__(self, i, j, value):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def __delitem__(self, index):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def __delslice__(self, i, j):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def __iadd__(self, other):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def __imul__(self, other):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def append(self, v):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def extend(self, v):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def pop(self, v=None):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def remove(self, v):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def reverse(self):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def sort(self):
-        raise ValueError('%s may not be modified' % type(self).__name__)
+        raise ValueError("%s may not be modified" % type(self).__name__)
+
      def __hash__(self):
          return self._hash
  
@@ -873,16 +985,15 @@ class ImmutableTree(Tree):
          Set the node label.  This will only succeed the first time the
          node label is set, which should occur in ImmutableTree.__init__().
          """
-        if hasattr(self, '_label'):
-            raise ValueError('%s may not be modified' % type(self).__name__)
+        if hasattr(self, "_label"):
+            raise ValueError("%s may not be modified" % type(self).__name__)
          self._label = value
  
  
  ######################################################################
  ## Parented trees
  ######################################################################
-@add_metaclass(ABCMeta)
-class AbstractParentedTree(Tree):
+class AbstractParentedTree(Tree, metaclass=ABCMeta):
      """
      An abstract base class for a ``Tree`` that automatically maintains
      pointers to parent nodes.  These parent pointers are updated
@@ -922,9 +1033,9 @@ class AbstractParentedTree(Tree):
                  if isinstance(child, Tree):
                      self._setparent(child, i)
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Parent management
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      @abstractmethod
      def _setparent(self, child, index, dry_run=False):
          """
@@ -961,9 +1072,9 @@ class AbstractParentedTree(Tree):
          :param index: The index of ``child`` in ``self``.
          """
  
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Methods that add/remove children
-    #////////////////////////////////////////////////////////////
+    # ////////////////////////////////////////////////////////////
      # Every method that adds or removes a child must make
      # appropriate calls to _setparent() and _delparent().
  
@@ -980,8 +1091,10 @@ class AbstractParentedTree(Tree):
  
          # del ptree[i]
          elif isinstance(index, int):
-            if index < 0: index += len(self)
-            if index < 0: raise IndexError('index out of range')
+            if index < 0:
+                index += len(self)
+            if index < 0:
+                raise IndexError("index out of range")
              # Clear the child's parent pointer.
              if isinstance(self[index], Tree):
                  self._delparent(self[index], index)
@@ -991,7 +1104,7 @@ class AbstractParentedTree(Tree):
          elif isinstance(index, (list, tuple)):
              # del ptree[()]
              if len(index) == 0:
-                raise IndexError('The tree position () may not be deleted.')
+                raise IndexError("The tree position () may not be deleted.")
              # del ptree[(i,)]
              elif len(index) == 1:
                  del self[index[0]]
@@ -1000,8 +1113,10 @@ class AbstractParentedTree(Tree):
                  del self[index[0]][index[1:]]
  
          else:
-            raise TypeError("%s indices must be integers, not %s" %
-                            (type(self).__name__, type(index).__name__))
+            raise TypeError(
+                "%s indices must be integers, not %s"
+                % (type(self).__name__, type(index).__name__)
+            )
  
      def __setitem__(self, index, value):
          # ptree[start:stop] = value
@@ -1014,7 +1129,7 @@ class AbstractParentedTree(Tree):
              # up in an inconsistent state if an error does occur.
              for i, child in enumerate(value):
                  if isinstance(child, Tree):
-                    self._setparent(child, start + i*step, dry_run=True)
+                    self._setparent(child, start + i * step, dry_run=True)
              # clear the child pointers of all parents we're removing
              for i in range(start, stop, step):
                  if isinstance(self[i], Tree):
@@ -1024,14 +1139,16 @@ class AbstractParentedTree(Tree):
              # reversing the elements in a tree.
              for i, child in enumerate(value):
                  if isinstance(child, Tree):
-                    self._setparent(child, start + i*step)
+                    self._setparent(child, start + i * step)
              # finally, update the content of the child list itself.
              super(AbstractParentedTree, self).__setitem__(index, value)
  
          # ptree[i] = value
          elif isinstance(index, int):
-            if index < 0: index += len(self)
-            if index < 0: raise IndexError('index out of range')
+            if index < 0:
+                index += len(self)
+            if index < 0:
+                raise IndexError("index out of range")
              # if the value is not changing, do nothing.
              if value is self[index]:
                  return
@@ -1047,7 +1164,7 @@ class AbstractParentedTree(Tree):
          elif isinstance(index, (list, tuple)):
              # ptree[()] = value
              if len(index) == 0:
-                raise IndexError('The tree position () may not be assigned to.')
+                raise IndexError("The tree position () may not be assigned to.")
              # ptree[(i,)] = value
              elif len(index) == 1:
                  self[index[0]] = value
@@ -1056,8 +1173,10 @@ class AbstractParentedTree(Tree):
                  self[index[0]][index[1:]] = value
  
          else:
-            raise TypeError("%s indices must be integers, not %s" %
-                            (type(self).__name__, type(index).__name__))
+            raise TypeError(
+                "%s indices must be integers, not %s"
+                % (type(self).__name__, type(index).__name__)
+            )
  
      def append(self, child):
          if isinstance(child, Tree):
@@ -1074,16 +1193,20 @@ class AbstractParentedTree(Tree):
          # Handle negative indexes.  Note that if index < -len(self),
          # we do *not* raise an IndexError, unlike __getitem__.  This
          # is done for consistency with list.__getitem__ and list.index.
-        if index < 0: index += len(self)
-        if index < 0: index = 0
+        if index < 0:
+            index += len(self)
+        if index < 0:
+            index = 0
          # Set the child's parent, and update our child list.
          if isinstance(child, Tree):
              self._setparent(child, index)
          super(AbstractParentedTree, self).insert(index, child)
  
      def pop(self, index=-1):
-        if index < 0: index += len(self)
-        if index < 0: raise IndexError('index out of range')
+        if index < 0:
+            index += len(self)
+        if index < 0:
+            raise IndexError("index out of range")
          if isinstance(self[index], Tree):
              self._delparent(self[index], index)
          return super(AbstractParentedTree, self).pop(index)
@@ -1102,14 +1225,18 @@ class AbstractParentedTree(Tree):
      # __getitem__ etc., but use max(0, start) and max(0, stop) because
      # because negative indices are already handled *before*
      # __getslice__ is called; and we don't want to double-count them.
-    if hasattr(list, '__getslice__'):
+    if hasattr(list, "__getslice__"):
+
          def __getslice__(self, start, stop):
              return self.__getitem__(slice(max(0, start), max(0, stop)))
+
          def __delslice__(self, start, stop):
              return self.__delitem__(slice(max(0, start), max(0, stop)))
+
          def __setslice__(self, start, stop, value):
              return self.__setitem__(slice(max(0, start), max(0, stop)), value)
  
+
  class ParentedTree(AbstractParentedTree):
      """
      A ``Tree`` that automatically maintains parent pointers for
@@ -1127,6 +1254,7 @@ class ParentedTree(AbstractParentedTree):
      or ``MultiParentedTrees``.  Mixing tree implementations may result
      in incorrect parent pointers and in ``TypeError`` exceptions.
      """
+
      def __init__(self, node, children=None):
          self._parent = None
          """The parent of this Tree, or None if it has no parent."""
@@ -1141,11 +1269,12 @@ class ParentedTree(AbstractParentedTree):
                      child._parent = None
                      self._setparent(child, i)
  
-    def _frozen_class(self): return ImmutableParentedTree
+    def _frozen_class(self):
+        return ImmutableParentedTree
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def parent(self):
          """The parent of this tree, or None if it has no parent."""
@@ -1159,24 +1288,26 @@ class ParentedTree(AbstractParentedTree):
          ``ptree.parent.index(ptree)``, since the ``index()`` method
          returns the first child that is equal to its argument.
          """
-        if self._parent is None: return None
+        if self._parent is None:
+            return None
          for i, child in enumerate(self._parent):
-            if child is self: return i
-        assert False, 'expected to find self in self._parent!'
+            if child is self:
+                return i
+        assert False, "expected to find self in self._parent!"
  
      def left_sibling(self):
          """The left sibling of this tree, or None if it has none."""
          parent_index = self.parent_index()
          if self._parent and parent_index > 0:
-            return self._parent[parent_index-1]
-        return None # no left sibling
+            return self._parent[parent_index - 1]
+        return None  # no left sibling
  
      def right_sibling(self):
          """The right sibling of this tree, or None if it has none."""
          parent_index = self.parent_index()
-        if self._parent and parent_index < (len(self._parent)-1):
-            return self._parent[parent_index+1]
-        return None # no right sibling
+        if self._parent and parent_index < (len(self._parent) - 1):
+            return self._parent[parent_index + 1]
+        return None  # no right sibling
  
      def root(self):
          """
@@ -1199,10 +1330,9 @@ class ParentedTree(AbstractParentedTree):
          else:
              return self.parent().treeposition() + (self.parent_index(),)
  
-
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Parent Management
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _delparent(self, child, index):
          # Sanity checks
@@ -1216,13 +1346,13 @@ class ParentedTree(AbstractParentedTree):
      def _setparent(self, child, index, dry_run=False):
          # If the child's type is incorrect, then complain.
          if not isinstance(child, ParentedTree):
-            raise TypeError('Can not insert a non-ParentedTree '+
-                            'into a ParentedTree')
+            raise TypeError(
+                "Can not insert a non-ParentedTree " + "into a ParentedTree"
+            )
  
          # If child already has a parent, then complain.
          if child._parent is not None:
-            raise ValueError('Can not insert a subtree that already '
-                             'has a parent.')
+            raise ValueError("Can not insert a subtree that already " "has a parent.")
  
          # Set child's parent pointer & index.
          if not dry_run:
@@ -1246,6 +1376,7 @@ class MultiParentedTree(AbstractParentedTree):
      ``Trees`` or ``ParentedTrees``.  Mixing tree implementations may
      result in incorrect parent pointers and in ``TypeError`` exceptions.
      """
+
      def __init__(self, node, children=None):
          self._parents = []
          """A list of this tree's parents.  This list should not
@@ -1262,11 +1393,12 @@ class MultiParentedTree(AbstractParentedTree):
                      child._parents = []
                      self._setparent(child, i)
  
-    def _frozen_class(self): return ImmutableMultiParentedTree
+    def _frozen_class(self):
+        return ImmutableMultiParentedTree
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Methods
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def parents(self):
          """
@@ -1289,9 +1421,11 @@ class MultiParentedTree(AbstractParentedTree):
  
          :type: list(MultiParentedTree)
          """
-        return [parent[index-1]
-                for (parent, index) in self._get_parent_indices()
-                if index > 0]
+        return [
+            parent[index - 1]
+            for (parent, index) in self._get_parent_indices()
+            if index > 0
+        ]
  
      def right_siblings(self):
          """
@@ -1303,15 +1437,19 @@ class MultiParentedTree(AbstractParentedTree):
  
          :type: list(MultiParentedTree)
          """
-        return [parent[index+1]
-                for (parent, index) in self._get_parent_indices()
-                if index < (len(parent)-1)]
+        return [
+            parent[index + 1]
+            for (parent, index) in self._get_parent_indices()
+            if index < (len(parent) - 1)
+        ]
  
      def _get_parent_indices(self):
-        return [(parent, index)
-                for parent in self._parents
-                for index, child in enumerate(parent)
-                if child is self]
+        return [
+            (parent, index)
+            for parent in self._parents
+            for index, child in enumerate(parent)
+            if child is self
+        ]
  
      def roots(self):
          """
@@ -1341,9 +1479,10 @@ class MultiParentedTree(AbstractParentedTree):
            for parent_index in ptree.parent_indices(parent):
                parent[parent_index] is ptree
          """
-        if parent not in self._parents: return []
-        else: return [index for (index, child) in enumerate(parent)
-                      if child is self]
+        if parent not in self._parents:
+            return []
+        else:
+            return [index for (index, child) in enumerate(parent) if child is self]
  
      def treepositions(self, root):
          """
@@ -1357,15 +1496,17 @@ class MultiParentedTree(AbstractParentedTree):
          if self is root:
              return [()]
          else:
-            return [treepos+(index,)
-                    for parent in self._parents
-                    for treepos in parent.treepositions(root)
-                    for (index, child) in enumerate(parent) if child is self]
-
+            return [
+                treepos + (index,)
+                for parent in self._parents
+                for treepos in parent.treepositions(root)
+                for (index, child) in enumerate(parent)
+                if child is self
+            ]
  
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
      # Parent Management
-    #/////////////////////////////////////////////////////////////////
+    # /////////////////////////////////////////////////////////////////
  
      def _delparent(self, child, index):
          # Sanity checks
@@ -1376,26 +1517,31 @@ class MultiParentedTree(AbstractParentedTree):
          # If the only copy of child in self is at index, then delete
          # self from child's parent list.
          for i, c in enumerate(self):
-            if c is child and i != index: break
+            if c is child and i != index:
+                break
          else:
              child._parents.remove(self)
  
      def _setparent(self, child, index, dry_run=False):
          # If the child's type is incorrect, then complain.
          if not isinstance(child, MultiParentedTree):
-            raise TypeError('Can not insert a non-MultiParentedTree '+
-                            'into a MultiParentedTree')
+            raise TypeError(
+                "Can not insert a non-MultiParentedTree " + "into a MultiParentedTree"
+            )
  
          # Add self as a parent pointer if it's not already listed.
          if not dry_run:
              for parent in child._parents:
-                if parent is self: break
+                if parent is self:
+                    break
              else:
                  child._parents.append(self)
  
+
  class ImmutableParentedTree(ImmutableTree, ParentedTree):
      pass
  
+
  class ImmutableMultiParentedTree(ImmutableTree, MultiParentedTree):
      pass
  
@@ -1404,21 +1550,29 @@ class ImmutableMultiParentedTree(ImmutableTree, MultiParentedTree):
  ## Probabilistic trees
  ######################################################################
  
-@python_2_unicode_compatible
+
+
  class ProbabilisticTree(Tree, ProbabilisticMixIn):
      def __init__(self, node, children=None, **prob_kwargs):
          Tree.__init__(self, node, children)
          ProbabilisticMixIn.__init__(self, **prob_kwargs)
  
      # We have to patch up these methods to make them work right:
-    def _frozen_class(self): return ImmutableProbabilisticTree
+    def _frozen_class(self):
+        return ImmutableProbabilisticTree
+
      def __repr__(self):
-        return '%s (p=%r)' % (Tree.unicode_repr(self), self.prob())
+        return "%s (p=%r)" % (Tree.__repr__(self), self.prob())
+
      def __str__(self):
-        return '%s (p=%.6g)' % (self.pformat(margin=60), self.prob())
+        return "%s (p=%.6g)" % (self.pformat(margin=60), self.prob())
+
      def copy(self, deep=False):
-        if not deep: return type(self)(self._label, self, prob=self.prob())
-        else: return type(self).convert(self)
+        if not deep:
+            return type(self)(self._label, self, prob=self.prob())
+        else:
+            return type(self).convert(self)
+
      @classmethod
      def convert(cls, val):
          if isinstance(val, Tree):
@@ -1431,21 +1585,26 @@ class ProbabilisticTree(Tree, ProbabilisticMixIn):
              return val
  
      def __eq__(self, other):
-        return (self.__class__ is other.__class__ and
-                (self._label, list(self), self.prob()) ==
-                (other._label, list(other), other.prob()))
+        return self.__class__ is other.__class__ and (
+            self._label,
+            list(self),
+            self.prob(),
+        ) == (other._label, list(other), other.prob())
  
      def __lt__(self, other):
          if not isinstance(other, Tree):
              raise_unorderable_types("<", self, other)
          if self.__class__ is other.__class__:
-            return ((self._label, list(self), self.prob()) <
-                    (other._label, list(other), other.prob()))
+            return (self._label, list(self), self.prob()) < (
+                other._label,
+                list(other),
+                other.prob(),
+            )
          else:
              return self.__class__.__name__ < other.__class__.__name__
  
  
-@python_2_unicode_compatible
+
  class ImmutableProbabilisticTree(ImmutableTree, ProbabilisticMixIn):
      def __init__(self, node, children=None, **prob_kwargs):
          ImmutableTree.__init__(self, node, children)
@@ -1453,14 +1612,21 @@ class ImmutableProbabilisticTree(ImmutableTree, ProbabilisticMixIn):
          self._hash = hash((self._label, tuple(self), self.prob()))
  
      # We have to patch up these methods to make them work right:
-    def _frozen_class(self): return ImmutableProbabilisticTree
+    def _frozen_class(self):
+        return ImmutableProbabilisticTree
+
      def __repr__(self):
-        return '%s [%s]' % (Tree.unicode_repr(self), self.prob())
+        return "%s [%s]" % (Tree.__repr__(self), self.prob())
+
      def __str__(self):
-        return '%s [%s]' % (self.pformat(margin=60), self.prob())
+        return "%s [%s]" % (self.pformat(margin=60), self.prob())
+
      def copy(self, deep=False):
-        if not deep: return type(self)(self._label, self, prob=self.prob())
-        else: return type(self).convert(self)
+        if not deep:
+            return type(self)(self._label, self, prob=self.prob())
+        else:
+            return type(self).convert(self)
+
      @classmethod
      def convert(cls, val):
          if isinstance(val, Tree):
@@ -1482,16 +1648,19 @@ def _child_names(tree):
              names.append(child)
      return names
  
+
  ######################################################################
  ## Parsing
  ######################################################################
  
+
  def bracket_parse(s):
      """
      Use Tree.read(s, remove_empty_top_bracketing=True) instead.
      """
      raise NameError("Use Tree.read(s, remove_empty_top_bracketing=True) instead.")
  
+
  def sinica_parse(s):
      """
      Parse a Sinica Treebank string and return a tree.  Trees are represented as nested brackettings,
@@ -1503,22 +1672,26 @@ def sinica_parse(s):
      :param s: The string to be converted
      :type s: str
      """
-    tokens = re.split(r'([()| ])', s)
+    tokens = re.split(r"([()| ])", s)
      for i in range(len(tokens)):
-        if tokens[i] == '(':
-            tokens[i-1], tokens[i] = tokens[i], tokens[i-1]     # pull nonterminal inside parens
-        elif ':' in tokens[i]:
-            fields = tokens[i].split(':')
-            if len(fields) == 2:                                # non-terminal
+        if tokens[i] == "(":
+            tokens[i - 1], tokens[i] = (
+                tokens[i],
+                tokens[i - 1],
+            )  # pull nonterminal inside parens
+        elif ":" in tokens[i]:
+            fields = tokens[i].split(":")
+            if len(fields) == 2:  # non-terminal
                  tokens[i] = fields[1]
              else:
                  tokens[i] = "(" + fields[-2] + " " + fields[-1] + ")"
-        elif tokens[i] == '|':
-            tokens[i] = ''
+        elif tokens[i] == "|":
+            tokens[i] = ""
  
      treebank_string = " ".join(tokens)
      return Tree.fromstring(treebank_string, remove_empty_top_bracketing=True)
  
+
  #    s = re.sub(r'^#[^\s]*\s', '', s)  # remove leading identifier
  #    s = re.sub(r'\w+:', '', s)       # remove role tags
  
@@ -1528,6 +1701,7 @@ def sinica_parse(s):
  ## Demonstration
  ######################################################################
  
+
  def demo():
      """
      A demonstration showing how Trees and Trees can be
@@ -1539,28 +1713,28 @@ def demo():
      from nltk import Tree, ProbabilisticTree
  
      # Demonstrate tree parsing.
-    s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))'
+    s = "(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))"
      t = Tree.fromstring(s)
      print("Convert bracketed string into tree:")
      print(t)
      print(t.__repr__())
  
      print("Display tree properties:")
-    print(t.label())         # tree's constituent type
-    print(t[0])             # tree's first child
-    print(t[1])             # tree's second child
+    print(t.label())  # tree's constituent type
+    print(t[0])  # tree's first child
+    print(t[1])  # tree's second child
      print(t.height())
      print(t.leaves())
      print(t[1])
-    print(t[1,1])
-    print(t[1,1,0])
+    print(t[1, 1])
+    print(t[1, 1, 0])
  
      # Demonstrate tree modification.
      the_cat = t[0]
-    the_cat.insert(1, Tree.fromstring('(JJ big)'))
+    the_cat.insert(1, Tree.fromstring("(JJ big)"))
      print("Tree modification:")
      print(t)
-    t[1,1,1] = Tree.fromstring('(NN cake)')
+    t[1, 1, 1] = Tree.fromstring("(NN cake)")
      print(t)
      print()
  
@@ -1574,7 +1748,7 @@ def demo():
      print()
  
      # Demonstrate probabilistic trees.
-    pt = ProbabilisticTree('x', ['y', 'z'], prob=0.5)
+    pt = ProbabilisticTree("x", ["y", "z"], prob=0.5)
      print("Probabilistic Tree:")
      print(pt)
      print()
@@ -1596,10 +1770,20 @@ def demo():
      print()
  
      # Demonstrate tree nodes containing objects other than strings
-    t.set_label(('test', 3))
+    t.set_label(("test", 3))
      print(t)
  
-__all__ = ['ImmutableProbabilisticTree', 'ImmutableTree', 'ProbabilisticMixIn',
-           'ProbabilisticTree', 'Tree', 'bracket_parse',
-           'sinica_parse', 'ParentedTree', 'MultiParentedTree',
-           'ImmutableParentedTree', 'ImmutableMultiParentedTree']
+
+__all__ = [
+    "ImmutableProbabilisticTree",
+    "ImmutableTree",
+    "ProbabilisticMixIn",
+    "ProbabilisticTree",
+    "Tree",
+    "bracket_parse",
+    "sinica_parse",
+    "ParentedTree",
+    "MultiParentedTree",
+    "ImmutableParentedTree",
+    "ImmutableMultiParentedTree",
+]
diff --git a/nlp_resource_data/nltk/tree.pyc b/nlp_resource_data/nltk/tree.pyc

deleted file mode 100755 (executable)

index e4dae61..0000000

Binary files a/nlp_resource_data/nltk/tree.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/treeprettyprinter.py b/nlp_resource_data/nltk/treeprettyprinter.py

old mode 100755 (executable)

new mode 100644 (file)

index 9e82d5b..50b0bb0
--- a/nlp_resource_data/nltk/treeprettyprinter.py
+++ b/nlp_resource_data/nltk/treeprettyprinter.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: ASCII visualization of NLTK trees
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Andreas van Cranenburgh <A.W.vanCranenburgh@uva.nl>
  #         Peter Ljunglöf <peter.ljunglof@gu.se>
  # URL: <http://nltk.org/>
@@ -18,35 +18,29 @@ Graph Algorithms and Applications, 10(2) 141--157 (2006)149.
  http://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf
  """
  
-from __future__ import division, print_function, unicode_literals
-
-from nltk.util import slice_bounds, OrderedDict
-from nltk.compat import python_2_unicode_compatible, unicode_repr
-from nltk.internals import raise_unorderable_types
-from nltk.tree import Tree
-
  import re
-import sys
-import codecs
-from cgi import escape
+try:
+    from html import escape
+except ImportError:
+    from cgi import escape
  from collections import defaultdict
  from operator import itemgetter
-from itertools import chain, islice
  
+from nltk.util import OrderedDict
+from nltk.tree import Tree
  
  ANSICOLOR = {
-    'black': 30,
-    'red': 31,
-    'green': 32,
-    'yellow': 33,
-    'blue': 34,
-    'magenta': 35,
-    'cyan': 36,
-    'white': 37,
+    "black": 30,
+    "red": 31,
+    "green": 32,
+    "yellow": 33,
+    "blue": 34,
+    "magenta": 35,
+    "cyan": 36,
+    "white": 37,
  }
  
  
-@python_2_unicode_compatible
  class TreePrettyPrinter(object):
      """
      Pretty-print a tree in text format, either as ASCII or Unicode.
@@ -78,8 +72,11 @@ class TreePrettyPrinter(object):
      def __init__(self, tree, sentence=None, highlight=()):
          if sentence is None:
              leaves = tree.leaves()
-            if (leaves and not any(len(a) == 0 for a in tree.subtrees())
-                    and all(isinstance(a, int) for a in leaves)):
+            if (
+                leaves
+                and not any(len(a) == 0 for a in tree.subtrees())
+                and all(isinstance(a, int) for a in leaves)
+            ):
                  sentence = [str(a) for a in leaves]
              else:
                  # this deals with empty nodes (frontier non-terminals)
@@ -94,16 +91,18 @@ class TreePrettyPrinter(object):
                          for n, b in enumerate(a):
                              if not isinstance(b, Tree):
                                  a[n] = len(sentence)
-                                sentence.append('%s' % b)
+                                if type(b) == tuple:
+                                    b = "/".join(b)
+                                sentence.append("%s" % b)
          self.nodes, self.coords, self.edges, self.highlight = self.nodecoords(
-                tree, sentence, highlight)
+            tree, sentence, highlight
+        )
  
      def __str__(self):
          return self.text()
  
      def __repr__(self):
-        return '<TreePrettyPrinter with %d nodes>' % len(self.nodes)
-
+        return "<TreePrettyPrinter with %d nodes>" % len(self.nodes)
  
      @staticmethod
      def nodecoords(tree, sentence, highlight):
@@ -140,6 +139,7 @@ class TreePrettyPrinter(object):
          - edges[id]: parent id of node with this id (ordered dictionary)
          - highlighted: set of ids that should be highlighted
          """
+
          def findcell(m, matrix, startoflevel, children):
              """
              Find vacant row, column index for node ``m``.
@@ -161,44 +161,57 @@ class TreePrettyPrinter(object):
                  startoflevel = len(matrix)
              for rowidx in range(startoflevel, len(matrix) + 1):
                  if rowidx == len(matrix):  # need to add a new row
-                    matrix.append([vertline if a not in (corner, None)
-                            else None for a in matrix[-1]])
+                    matrix.append(
+                        [
+                            vertline if a not in (corner, None) else None
+                            for a in matrix[-1]
+                        ]
+                    )
                  row = matrix[rowidx]
                  i = j = center
                  if len(children[m]) == 1:  # place unaries directly above child
                      return rowidx, next(iter(children[m]))[1]
-                elif all(a is None or a == vertline for a
-                        in row[min(candidates):max(candidates) + 1]):
+                elif all(
+                    a is None or a == vertline
+                    for a in row[min(candidates) : max(candidates) + 1]
+                ):
                      # find free column
                      for n in range(scale):
                          i = j = center + n
                          while j > minidx or i < maxidx:
-                            if i < maxidx and (matrix[rowidx][i] is None
-                                    or i in candidates):
+                            if i < maxidx and (
+                                matrix[rowidx][i] is None or i in candidates
+                            ):
                                  return rowidx, i
-                            elif j > minidx and (matrix[rowidx][j] is None
-                                    or j in candidates):
+                            elif j > minidx and (
+                                matrix[rowidx][j] is None or j in candidates
+                            ):
                                  return rowidx, j
                              i += scale
                              j -= scale
-            raise ValueError('could not find a free cell for:\n%s\n%s'
-                    'min=%d; max=%d' % (tree[m], minidx, maxidx, dumpmatrix()))
+            raise ValueError(
+                "could not find a free cell for:\n%s\n%s"
+                "min=%d; max=%d" % (tree[m], minidx, maxidx, dumpmatrix())
+            )
  
          def dumpmatrix():
              """Dump matrix contents for debugging purposes."""
-            return '\n'.join(
-                '%2d: %s' % (n, ' '.join(('%2r' % i)[:2] for i in row))
-                for n, row in enumerate(matrix))
+            return "\n".join(
+                "%2d: %s" % (n, " ".join(("%2r" % i)[:2] for i in row))
+                for n, row in enumerate(matrix)
+            )
  
          leaves = tree.leaves()
          if not all(isinstance(n, int) for n in leaves):
-            raise ValueError('All leaves must be integer indices.')
+            raise ValueError("All leaves must be integer indices.")
          if len(leaves) != len(set(leaves)):
-            raise ValueError('Indices must occur at most once.')
+            raise ValueError("Indices must occur at most once.")
          if not all(0 <= n < len(sentence) for n in leaves):
-            raise ValueError('All leaves must be in the interval 0..n '
-                    'with n=len(sentence)\ntokens: %d indices: '
-                    '%r\nsentence: %s' % (len(sentence), tree.leaves(), sentence))
+            raise ValueError(
+                "All leaves must be in the interval 0..n "
+                "with n=len(sentence)\ntokens: %d indices: "
+                "%r\nsentence: %s" % (len(sentence), tree.leaves(), sentence)
+            )
          vertline, corner = -1, -2  # constants
          tree = tree.copy(True)
          for a in tree.subtrees():
@@ -212,8 +225,9 @@ class TreePrettyPrinter(object):
          matrix = [[None] * (len(sentence) * scale)]
          nodes = {}
          ids = dict((a, n) for n, a in enumerate(positions))
-        highlighted_nodes = set(n for a, n in ids.items()
-                                if not highlight or tree[a] in highlight)
+        highlighted_nodes = set(
+            n for a, n in ids.items() if not highlight or tree[a] in highlight
+        )
          levels = dict((n, []) for n in range(maxdepth - 1))
          terminals = []
          for a in positions:
@@ -224,8 +238,7 @@ class TreePrettyPrinter(object):
                  terminals.append(a)
  
          for n in levels:
-            levels[n].sort(key=lambda n: max(tree[n].leaves())
-                    - min(tree[n].leaves()))
+            levels[n].sort(key=lambda n: max(tree[n].leaves()) - min(tree[n].leaves()))
          terminals.sort()
          positions = set(positions)
  
@@ -235,7 +248,7 @@ class TreePrettyPrinter(object):
              matrix[0][i] = ids[m]
              nodes[ids[m]] = sentence[tree[m]]
              if nodes[ids[m]] is None:
-                nodes[ids[m]] = '...'
+                nodes[ids[m]] = "..."
                  highlighted_nodes.discard(ids[m])
              positions.remove(m)
              childcols[m[:-1]].add((0, i))
@@ -246,15 +259,23 @@ class TreePrettyPrinter(object):
          for n in sorted(levels, reverse=True):
              nodesatdepth = levels[n]
              startoflevel = len(matrix)
-            matrix.append([vertline if a not in (corner, None) else None
-                    for a in matrix[-1]])
+            matrix.append(
+                [vertline if a not in (corner, None) else None for a in matrix[-1]]
+            )
              for m in nodesatdepth:  # [::-1]:
                  if n < maxdepth - 1 and childcols[m]:
                      _, pivot = min(childcols[m], key=itemgetter(1))
-                    if (set(a[:-1] for row in matrix[:-1] for a in row[:pivot]
-                            if isinstance(a, tuple)) &
-                        set(a[:-1] for row in matrix[:-1] for a in row[pivot:]
-                            if isinstance(a, tuple))):
+                    if set(
+                        a[:-1]
+                        for row in matrix[:-1]
+                        for a in row[:pivot]
+                        if isinstance(a, tuple)
+                    ) & set(
+                        a[:-1]
+                        for row in matrix[:-1]
+                        for a in row[pivot:]
+                        if isinstance(a, tuple)
+                    ):
                          crossed.add(m)
  
                  rowidx, i = findcell(m, matrix, startoflevel, childcols)
@@ -275,14 +296,16 @@ class TreePrettyPrinter(object):
  
          # remove unused columns, right to left
          for m in range(scale * len(sentence) - 1, -1, -1):
-            if not any(isinstance(row[m], (Tree, int))
-                    for row in matrix):
+            if not any(isinstance(row[m], (Tree, int)) for row in matrix):
                  for row in matrix:
                      del row[m]
  
          # remove unused rows, reverse
-        matrix = [row for row in reversed(matrix)
-                if not all(a is None or a == vertline for a in row)]
+        matrix = [
+            row
+            for row in reversed(matrix)
+            if not all(a is None or a == vertline for a in row)
+        ]
  
          # collect coordinates of nodes
          coords = {}
@@ -292,21 +315,31 @@ class TreePrettyPrinter(object):
                      coords[i] = n, m
  
          # move crossed edges last
-        positions = sorted([a for level in levels.values()
-                for a in level], key=lambda a: a[:-1] in crossed)
+        positions = sorted(
+            [a for level in levels.values() for a in level],
+            key=lambda a: a[:-1] in crossed,
+        )
  
          # collect edges from node to node
          edges = OrderedDict()
          for i in reversed(positions):
              for j, _ in enumerate(tree[i]):
-                edges[ids[i + (j, )]] = ids[i]
+                edges[ids[i + (j,)]] = ids[i]
  
          return nodes, coords, edges, highlighted_nodes
  
-
-    def text(self, nodedist=1, unicodelines=False, html=False, ansi=False,
-             nodecolor='blue', leafcolor='red', funccolor='green',
-             abbreviate=None, maxwidth=16):
+    def text(
+        self,
+        nodedist=1,
+        unicodelines=False,
+        html=False,
+        ansi=False,
+        nodecolor="blue",
+        leafcolor="red",
+        funccolor="green",
+        abbreviate=None,
+        maxwidth=16,
+    ):
          """
          :return: ASCII art for a discontinuous tree.
  
@@ -325,28 +358,28 @@ class TreePrettyPrinter(object):
          if abbreviate == True:
              abbreviate = 5
          if unicodelines:
-            horzline = '\u2500'
-            leftcorner = '\u250c'
-            rightcorner = '\u2510'
-            vertline = ' \u2502 '
-            tee = horzline + '\u252C' + horzline
-            bottom = horzline + '\u2534' + horzline
-            cross = horzline + '\u253c' + horzline
-            ellipsis = '\u2026'
+            horzline = "\u2500"
+            leftcorner = "\u250c"
+            rightcorner = "\u2510"
+            vertline = " \u2502 "
+            tee = horzline + "\u252C" + horzline
+            bottom = horzline + "\u2534" + horzline
+            cross = horzline + "\u253c" + horzline
+            ellipsis = "\u2026"
          else:
-            horzline = '_'
-            leftcorner = rightcorner = ' '
-            vertline = ' | '
+            horzline = "_"
+            leftcorner = rightcorner = " "
+            vertline = " | "
              tee = 3 * horzline
-            cross = bottom = '_|_'
-            ellipsis = '.'
+            cross = bottom = "_|_"
+            ellipsis = "."
  
          def crosscell(cur, x=vertline):
              """Overwrite center of this cell with a vertical branch."""
              splitl = len(cur) - len(cur) // 2 - len(x) // 2 - 1
              lst = list(cur)
-            lst[splitl:splitl + len(x)] = list(x)
-            return ''.join(lst)
+            lst[splitl : splitl + len(x)] = list(x)
+            return "".join(lst)
  
          result = []
          matrix = defaultdict(dict)
@@ -357,20 +390,24 @@ class TreePrettyPrinter(object):
          maxchildcol = {}
          childcols = defaultdict(set)
          labels = {}
-        wrapre = re.compile('(.{%d,%d}\\b\\W*|.{%d})' % (
-                maxwidth - 4, maxwidth, maxwidth))
+        wrapre = re.compile(
+            "(.{%d,%d}\\b\\W*|.{%d})" % (maxwidth - 4, maxwidth, maxwidth)
+        )
          # collect labels and coordinates
          for a in self.nodes:
              row, column = self.coords[a]
              matrix[row][column] = a
              maxcol = max(maxcol, column)
-            label = (self.nodes[a].label() if isinstance(self.nodes[a], Tree)
-                     else self.nodes[a])
+            label = (
+                self.nodes[a].label()
+                if isinstance(self.nodes[a], Tree)
+                else self.nodes[a]
+            )
              if abbreviate and len(label) > abbreviate:
                  label = label[:abbreviate] + ellipsis
              if maxwidth and len(label) > maxwidth:
-                label = wrapre.sub(r'\1\n', label).strip()
-            label = label.split('\n')
+                label = wrapre.sub(r"\1\n", label).strip()
+            label = label.split("\n")
              maxnodeheight[row] = max(maxnodeheight[row], len(label))
              maxnodewith[column] = max(maxnodewith[column], max(map(len, label)))
              labels[a] = label
@@ -382,9 +419,11 @@ class TreePrettyPrinter(object):
              maxchildcol[parent] = max(maxchildcol.get(parent, column), column)
          # bottom up level order traversal
          for row in sorted(matrix, reverse=True):
-            noderows = [[''.center(maxnodewith[col]) for col in range(maxcol + 1)]
-                    for _ in range(maxnodeheight[row])]
-            branchrow = [''.center(maxnodewith[col]) for col in range(maxcol + 1)]
+            noderows = [
+                ["".center(maxnodewith[col]) for col in range(maxcol + 1)]
+                for _ in range(maxnodeheight[row])
+            ]
+            branchrow = ["".center(maxnodewith[col]) for col in range(maxcol + 1)]
              for col in matrix[row]:
                  n = matrix[row][col]
                  node = self.nodes[n]
@@ -394,13 +433,14 @@ class TreePrettyPrinter(object):
                      if n in minchildcol and minchildcol[n] < maxchildcol[n]:
                          i, j = minchildcol[n], maxchildcol[n]
                          a, b = (maxnodewith[i] + 1) // 2 - 1, maxnodewith[j] // 2
-                        branchrow[i] = ((' ' * a) + leftcorner).ljust(
-                                maxnodewith[i], horzline)
-                        branchrow[j] = (rightcorner + (' ' * b)).rjust(
-                                maxnodewith[j], horzline)
+                        branchrow[i] = ((" " * a) + leftcorner).ljust(
+                            maxnodewith[i], horzline
+                        )
+                        branchrow[j] = (rightcorner + (" " * b)).rjust(
+                            maxnodewith[j], horzline
+                        )
                          for i in range(minchildcol[n] + 1, maxchildcol[n]):
-                            if i == col and any(
-                                    a == i for _, a in childcols[n]):
+                            if i == col and any(a == i for _, a in childcols[n]):
                                  line = cross
                              elif i == col:
                                  line = bottom
@@ -413,41 +453,44 @@ class TreePrettyPrinter(object):
                          branchrow[col] = crosscell(branchrow[col])
                  text = [a.center(maxnodewith[col]) for a in text]
                  color = nodecolor if isinstance(node, Tree) else leafcolor
-                if isinstance(node, Tree) and node.label().startswith('-'):
+                if isinstance(node, Tree) and node.label().startswith("-"):
                      color = funccolor
                  if html:
-                    text = [escape(a) for a in text]
+                    text = [escape(a, quote=False) for a in text]
                      if n in self.highlight:
-                        text = ['<font color=%s>%s</font>' % (
-                                color, a) for a in text]
+                        text = ["<font color=%s>%s</font>" % (color, a) for a in text]
                  elif ansi and n in self.highlight:
-                    text = ['\x1b[%d;1m%s\x1b[0m' % (
-                            ANSICOLOR[color], a) for a in text]
+                    text = ["\x1b[%d;1m%s\x1b[0m" % (ANSICOLOR[color], a) for a in text]
                  for x in range(maxnodeheight[row]):
                      # draw vertical lines in partially filled multiline node
                      # labels, but only if it's not a frontier node.
-                    noderows[x][col] = (text[x] if x < len(text)
-                            else (vertline if childcols[n] else ' ').center(
-                                maxnodewith[col], ' '))
+                    noderows[x][col] = (
+                        text[x]
+                        if x < len(text)
+                        else (vertline if childcols[n] else " ").center(
+                            maxnodewith[col], " "
+                        )
+                    )
              # for each column, if there is a node below us which has a parent
              # above us, draw a vertical branch in that column.
              if row != max(matrix):
                  for n, (childrow, col) in self.coords.items():
-                    if (n > 0 and
-                            self.coords[self.edges[n]][0] < row < childrow):
+                    if n > 0 and self.coords[self.edges[n]][0] < row < childrow:
                          branchrow[col] = crosscell(branchrow[col])
                          if col not in matrix[row]:
                              for noderow in noderows:
                                  noderow[col] = crosscell(noderow[col])
-                branchrow = [a + ((a[-1] if a[-1] != ' ' else b[0]) * nodedist)
-                        for a, b in zip(branchrow, branchrow[1:] + [' '])]
-                result.append(''.join(branchrow))
-            result.extend((' ' * nodedist).join(noderow)
-                    for noderow in reversed(noderows))
-        return '\n'.join(reversed(result)) + '\n'
-
+                branchrow = [
+                    a + ((a[-1] if a[-1] != " " else b[0]) * nodedist)
+                    for a, b in zip(branchrow, branchrow[1:] + [" "])
+                ]
+                result.append("".join(branchrow))
+            result.extend(
+                (" " * nodedist).join(noderow) for noderow in reversed(noderows)
+            )
+        return "\n".join(reversed(result)) + "\n"
  
-    def svg(self, nodecolor='blue', leafcolor='red', funccolor='green'):
+    def svg(self, nodecolor="blue", leafcolor="red", funccolor="green"):
          """
          :return: SVG representation of a tree.
          """
@@ -457,14 +500,18 @@ class TreePrettyPrinter(object):
          hstart = vstart = 20
          width = max(col for _, col in self.coords.values())
          height = max(row for row, _ in self.coords.values())
-        result = ['<svg version="1.1" xmlns="http://www.w3.org/2000/svg" '
-                  'width="%dem" height="%dem" viewBox="%d %d %d %d">' % (
-                      width * 3,
-                      height * 2.5,
-                      -hstart, -vstart,
-                      width * hscale + 3 * hstart,
-                      height * vscale + 3 * vstart)
-                      ]
+        result = [
+            '<svg version="1.1" xmlns="http://www.w3.org/2000/svg" '
+            'width="%dem" height="%dem" viewBox="%d %d %d %d">'
+            % (
+                width * 3,
+                height * 2.5,
+                -hstart,
+                -vstart,
+                width * hscale + 3 * hstart,
+                height * vscale + 3 * vstart,
+            )
+        ]
  
          children = defaultdict(set)
          for n in self.nodes:
@@ -485,10 +532,12 @@ class TreePrettyPrinter(object):
              xmax = hstart + hscale * max(childx)
              result.append(
                  '\t<polyline style="stroke:black; stroke-width:1; fill:none;" '
-                'points="%g,%g %g,%g" />' % (xmin, y, xmax, y))
+                'points="%g,%g %g,%g" />' % (xmin, y, xmax, y)
+            )
              result.append(
                  '\t<polyline style="stroke:black; stroke-width:1; fill:none;" '
-                'points="%g,%g %g,%g" />' % (x, y, x, y - fontsize // 3))
+                'points="%g,%g %g,%g" />' % (x, y, x, y - fontsize // 3)
+            )
  
          # vertical branches from children to parents
          for child, parent in self.edges.items():
@@ -505,7 +554,7 @@ class TreePrettyPrinter(object):
                  ' points="%g,%g %g,%g" />' % (childx, childy, childx, y + 5),
                  '\t<polyline style="stroke:black; stroke-width:1; fill:none;"'
                  ' points="%g,%g %g,%g" />' % (childx, childy, childx, y),
-                ]
+            ]
  
          # write nodes with coordinates
          for n, (row, column) in self.coords.items():
@@ -514,25 +563,32 @@ class TreePrettyPrinter(object):
              y = row * vscale + vstart
              if n in self.highlight:
                  color = nodecolor if isinstance(node, Tree) else leafcolor
-                if isinstance(node, Tree) and node.label().startswith('-'):
+                if isinstance(node, Tree) and node.label().startswith("-"):
                      color = funccolor
              else:
-                color = 'black'
-            result += ['\t<text style="text-anchor: middle; fill: %s; '
-                       'font-size: %dpx;" x="%g" y="%g">%s</text>' % (
-                           color, fontsize, x, y,
-                           escape(node.label() if isinstance(node, Tree)
-                                  else node))]
+                color = "black"
+            result += [
+                '\t<text style="text-anchor: middle; fill: %s; '
+                'font-size: %dpx;" x="%g" y="%g">%s</text>'
+                % (
+                    color,
+                    fontsize,
+                    x,
+                    y,
+                    escape(node.label() if isinstance(node, Tree) else node, quote=False),
+                )
+            ]
  
-        result += ['</svg>']
-        return '\n'.join(result)
+        result += ["</svg>"]
+        return "\n".join(result)
  
  
  def test():
      """Do some tree drawing tests."""
+
      def print_tree(n, tree, sentence=None, ansi=True, **xargs):
          print()
-        print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves())))
+        print('{0}: "{1}"'.format(n, " ".join(sentence or tree.leaves())))
          print(tree)
          print()
          drawtree = TreePrettyPrinter(tree, sentence)
@@ -542,23 +598,28 @@ def test():
              print(drawtree.text(unicodelines=False, ansi=False, **xargs))
  
      from nltk.corpus import treebank
+
      for n in [0, 1440, 1591, 2771, 2170]:
          tree = treebank.parsed_sents()[n]
          print_tree(n, tree, nodedist=2, maxwidth=8)
      print()
-    print('ASCII version:')
+    print("ASCII version:")
      print(TreePrettyPrinter(tree).text(nodedist=2))
  
      tree = Tree.fromstring(
-        '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
-        '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
-        '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
-    sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
-                ' zwemmen of terrassen .'.split())
-    print_tree('Discontinuous tree', tree, sentence, nodedist=2)
+        "(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) "
+        "(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) "
+        "(vg 10) (inf (verb 11)))))) (punct 12))",
+        read_leaf=int,
+    )
+    sentence = (
+        "Ze had met haar moeder kunnen gaan winkelen ,"
+        " zwemmen of terrassen .".split()
+    )
+    print_tree("Discontinuous tree", tree, sentence, nodedist=2)
  
  
-__all__ = ['TreePrettyPrinter']
+__all__ = ["TreePrettyPrinter"]
  
-if __name__ == '__main__':
+if __name__ == "__main__":
      test()
diff --git a/nlp_resource_data/nltk/treeprettyprinter.pyc b/nlp_resource_data/nltk/treeprettyprinter.pyc

deleted file mode 100755 (executable)

index ffd6d3f..0000000

Binary files a/nlp_resource_data/nltk/treeprettyprinter.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/treetransforms.py b/nlp_resource_data/nltk/treetransforms.py

old mode 100755 (executable)

new mode 100644 (file)

index cf21528..0c422f6
--- a/nlp_resource_data/nltk/treetransforms.py
+++ b/nlp_resource_data/nltk/treetransforms.py
@@ -106,17 +106,20 @@ The following is a short tutorial on the available transformations.
       C   D      C   D
  
  """
-from __future__ import print_function
  
  from nltk.tree import Tree
  
-def chomsky_normal_form(tree, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^"):
+
+def chomsky_normal_form(
+    tree, factor="right", horzMarkov=None, vertMarkov=0, childChar="|", parentChar="^"
+):
      # assume all subtrees have homogeneous children
      # assume all terminals have no siblings
  
      # A semi-hack to have elegant looking code below.  As a result,
      # any subtree with a branching factor greater than 999 will be incorrectly truncated.
-    if horzMarkov is None: horzMarkov = 999
+    if horzMarkov is None:
+        horzMarkov = 999
  
      # Traverse the tree depth-first keeping a list of ancestor nodes to the root.
      # I chose not to use the tree.treepositions() method since it requires
@@ -127,15 +130,15 @@ def chomsky_normal_form(tree, factor="right", horzMarkov=None, vertMarkov=0, chi
      nodeList = [(tree, [tree.label()])]
      while nodeList != []:
          node, parent = nodeList.pop()
-        if isinstance(node,Tree):
+        if isinstance(node, Tree):
  
              # parent annotation
              parentString = ""
              originalNode = node.label()
-            if vertMarkov != 0 and node != tree and isinstance(node[0],Tree):
+            if vertMarkov != 0 and node != tree and isinstance(node[0], Tree):
                  parentString = "%s<%s>" % (parentChar, "-".join(parent))
                  node.set_label(node.label() + parentString)
-                parent = [originalNode] + parent[:vertMarkov - 1]
+                parent = [originalNode] + parent[: vertMarkov - 1]
  
              # add children to the agenda before we mess with them
              for child in node:
@@ -145,17 +148,31 @@ def chomsky_normal_form(tree, factor="right", horzMarkov=None, vertMarkov=0, chi
              if len(node) > 2:
                  childNodes = [child.label() for child in node]
                  nodeCopy = node.copy()
-                node[0:] = [] # delete the children
+                node[0:] = []  # delete the children
  
                  curNode = node
                  numChildren = len(nodeCopy)
-                for i in range(1,numChildren - 1):
+                for i in range(1, numChildren - 1):
                      if factor == "right":
-                        newHead = "%s%s<%s>%s" % (originalNode, childChar, "-".join(childNodes[i:min([i+horzMarkov,numChildren])]),parentString) # create new head
+                        newHead = "%s%s<%s>%s" % (
+                            originalNode,
+                            childChar,
+                            "-".join(
+                                childNodes[i : min([i + horzMarkov, numChildren])]
+                            ),
+                            parentString,
+                        )  # create new head
                          newNode = Tree(newHead, [])
                          curNode[0:] = [nodeCopy.pop(0), newNode]
                      else:
-                        newHead = "%s%s<%s>%s" % (originalNode, childChar, "-".join(childNodes[max([numChildren-i-horzMarkov,0]):-i]),parentString)
+                        newHead = "%s%s<%s>%s" % (
+                            originalNode,
+                            childChar,
+                            "-".join(
+                                childNodes[max([numChildren - i - horzMarkov, 0]) : -i]
+                            ),
+                            parentString,
+                        )
                          newNode = Tree(newHead, [])
                          curNode[0:] = [newNode, nodeCopy.pop()]
  
@@ -164,12 +181,14 @@ def chomsky_normal_form(tree, factor="right", horzMarkov=None, vertMarkov=0, chi
                  curNode[0:] = [child for child in nodeCopy]
  
  
-def un_chomsky_normal_form(tree, expandUnary = True, childChar = "|", parentChar = "^", unaryChar = "+"):
+def un_chomsky_normal_form(
+    tree, expandUnary=True, childChar="|", parentChar="^", unaryChar="+"
+):
      # Traverse the tree-depth first keeping a pointer to the parent for modification purposes.
-    nodeList = [(tree,[])]
+    nodeList = [(tree, [])]
      while nodeList != []:
-        node,parent = nodeList.pop()
-        if isinstance(node,Tree):
+        node, parent = nodeList.pop()
+        if isinstance(node, Tree):
              # if the node contains the 'childChar' character it means that
              # it is an artificial node and can be removed, although we still need
              # to move its children to its parent
@@ -181,10 +200,10 @@ def un_chomsky_normal_form(tree, expandUnary = True, childChar = "|", parentChar
                  # means the grammar was left factored.  We must insert the children
                  # at the beginning of the parent's children
                  if nodeIndex == 0:
-                    parent.insert(0,node[0])
-                    parent.insert(1,node[1])
+                    parent.insert(0, node[0])
+                    parent.insert(1, node[1])
                  else:
-                    parent.extend([node[0],node[1]])
+                    parent.extend([node[0], node[1]])
  
                  # parent is now the current node so the children of parent will be added to the agenda
                  node = parent
@@ -198,15 +217,17 @@ def un_chomsky_normal_form(tree, expandUnary = True, childChar = "|", parentChar
                  if expandUnary == True:
                      unaryIndex = node.label().find(unaryChar)
                      if unaryIndex != -1:
-                        newNode = Tree(node.label()[unaryIndex + 1:], [i for i in node])
+                        newNode = Tree(
+                            node.label()[unaryIndex + 1 :], [i for i in node]
+                        )
                          node.set_label(node.label()[:unaryIndex])
                          node[0:] = [newNode]
  
              for child in node:
-                nodeList.append((child,node))
+                nodeList.append((child, node))
  
  
-def collapse_unary(tree, collapsePOS = False, collapseRoot = False, joinChar = "+"):
+def collapse_unary(tree, collapsePOS=False, collapseRoot=False, joinChar="+"):
      """
      Collapse subtrees with a single child (ie. unary productions)
      into a new non-terminal (Tree node) joined by 'joinChar'.
@@ -236,8 +257,12 @@ def collapse_unary(tree, collapsePOS = False, collapseRoot = False, joinChar = "
      # depth-first traversal of tree
      while nodeList != []:
          node = nodeList.pop()
-        if isinstance(node,Tree):
-            if len(node) == 1 and isinstance(node[0], Tree) and (collapsePOS == True or isinstance(node[0,0], Tree)):
+        if isinstance(node, Tree):
+            if (
+                len(node) == 1
+                and isinstance(node[0], Tree)
+                and (collapsePOS == True or isinstance(node[0, 0], Tree))
+            ):
                  node.set_label(node.label() + joinChar + node[0].label())
                  node[0:] = [child for child in node[0]]
                  # since we assigned the child's children to the current node,
@@ -247,10 +272,12 @@ def collapse_unary(tree, collapsePOS = False, collapseRoot = False, joinChar = "
                  for child in node:
                      nodeList.append(child)
  
+
  #################################################################
  # Demonstration
  #################################################################
  
+
  def demo():
      """
      A demonstration showing how each tree transform can be used.
@@ -303,7 +330,8 @@ def demo():
  
      draw_trees(t, collapsedTree, cnfTree, parentTree, original)
  
-if __name__ == '__main__':
+
+if __name__ == "__main__":
      demo()
  
  __all__ = ["chomsky_normal_form", "un_chomsky_normal_form", "collapse_unary"]
diff --git a/nlp_resource_data/nltk/treetransforms.pyc b/nlp_resource_data/nltk/treetransforms.pyc

deleted file mode 100755 (executable)

index cf931cd..0000000

Binary files a/nlp_resource_data/nltk/treetransforms.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/twitter/__init__.py b/nlp_resource_data/nltk/twitter/__init__.py

old mode 100755 (executable)

new mode 100644 (file)

index 655d7a9..1666e2c
--- a/nlp_resource_data/nltk/twitter/__init__.py
+++ b/nlp_resource_data/nltk/twitter/__init__.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Twitter
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
@@ -17,12 +17,20 @@ try:
      import twython
  except ImportError:
      import warnings
-    warnings.warn("The twython library has not been installed. "
-                  "Some functionality from the twitter package will not be available.")
+
+    warnings.warn(
+        "The twython library has not been installed. "
+        "Some functionality from the twitter package will not be available."
+    )
  else:
      from nltk.twitter.util import Authenticate, credsfromfile
-    from nltk.twitter.twitterclient import Streamer, Query, Twitter,\
-         TweetViewer, TweetWriter
+    from nltk.twitter.twitterclient import (
+        Streamer,
+        Query,
+        Twitter,
+        TweetViewer,
+        TweetWriter,
+    )
  
  
  from nltk.twitter.common import json2csv
diff --git a/nlp_resource_data/nltk/twitter/__init__.pyc b/nlp_resource_data/nltk/twitter/__init__.pyc

deleted file mode 100755 (executable)

index 7627ae6..0000000

Binary files a/nlp_resource_data/nltk/twitter/__init__.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/twitter/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/__init__.cpython-37.pyc

new file mode 100644 (file)

index 0000000..053ad6a

Binary files /dev/null and b/nlp_resource_data/nltk/twitter/__pycache__/__init__.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/twitter/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/api.cpython-37.pyc

new file mode 100644 (file)

index 0000000..b098ccc

Binary files /dev/null and b/nlp_resource_data/nltk/twitter/__pycache__/api.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/twitter/__pycache__/common.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/common.cpython-37.pyc

new file mode 100644 (file)

index 0000000..f04e698

Binary files /dev/null and b/nlp_resource_data/nltk/twitter/__pycache__/common.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/twitter/__pycache__/twitter_demo.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/twitter_demo.cpython-37.pyc

new file mode 100644 (file)

index 0000000..ccbed61

Binary files /dev/null and b/nlp_resource_data/nltk/twitter/__pycache__/twitter_demo.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc

new file mode 100644 (file)

index 0000000..40849cc

Binary files /dev/null and b/nlp_resource_data/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/twitter/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/util.cpython-37.pyc

new file mode 100644 (file)

index 0000000..8433890

Binary files /dev/null and b/nlp_resource_data/nltk/twitter/__pycache__/util.cpython-37.pyc differ
diff --git a/nlp_resource_data/nltk/twitter/api.py b/nlp_resource_data/nltk/twitter/api.py

old mode 100755 (executable)

new mode 100644 (file)

index 05c71f8..1533ad2
--- a/nlp_resource_data/nltk/twitter/api.py
+++ b/nlp_resource_data/nltk/twitter/api.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Twitter API
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #         Lorenzo Rubio <lrnzcig@gmail.com>
  # URL: <http://nltk.org/>
@@ -12,11 +12,9 @@ This module provides an interface for TweetHandlers, and support for timezone
  handling.
  """
  
-from abc import ABCMeta, abstractmethod
-from six import add_metaclass
-from datetime import tzinfo, timedelta, datetime
-from nltk.compat import UTC
  import time as _time
+from abc import ABCMeta, abstractmethod
+from datetime import tzinfo, timedelta, timezone, datetime
  
  
  class LocalTimezoneOffsetWithUTC(tzinfo):
@@ -31,6 +29,7 @@ class LocalTimezoneOffsetWithUTC(tzinfo):
  
      Reference: https://docs.python.org/3/library/datetime.html
      """
+
      STDOFFSET = timedelta(seconds=-_time.timezone)
  
      if _time.daylight:
@@ -48,14 +47,14 @@ class LocalTimezoneOffsetWithUTC(tzinfo):
  LOCAL = LocalTimezoneOffsetWithUTC()
  
  
-@add_metaclass(ABCMeta)
-class BasicTweetHandler(object):
+class BasicTweetHandler(metaclass=ABCMeta):
      """
      Minimal implementation of `TweetHandler`.
  
      Counts the number of Tweets and decides when the client should stop
      fetching them.
      """
+
      def __init__(self, limit=20):
          self.limit = limit
          self.counter = 0
@@ -77,11 +76,13 @@ class BasicTweetHandler(object):
          """
          return self.counter < self.limit and not self.do_stop
  
+
  class TweetHandlerI(BasicTweetHandler):
      """
      Interface class whose subclasses should implement a handle method that
      Twitter clients can delegate to.
      """
+
      def __init__(self, limit=20, upper_date_limit=None, lower_date_limit=None):
          """
          :param int limit: The number of data items to process in the current\
@@ -123,12 +124,13 @@ class TweetHandlerI(BasicTweetHandler):
          Validate date limits.
          """
          if self.upper_date_limit or self.lower_date_limit:
-            date_fmt = '%a %b %d %H:%M:%S +0000 %Y'
-            tweet_date = \
-                datetime.strptime(data['created_at'],
-                                  date_fmt).replace(tzinfo=UTC)
-            if (self.upper_date_limit and tweet_date > self.upper_date_limit) or \
-               (self.lower_date_limit and tweet_date < self.lower_date_limit):
+            date_fmt = "%a %b %d %H:%M:%S +0000 %Y"
+            tweet_date = datetime.strptime(data["created_at"], date_fmt).replace(
+                tzinfo=timezone.utc
+            )
+            if (self.upper_date_limit and tweet_date > self.upper_date_limit) or (
+                self.lower_date_limit and tweet_date < self.lower_date_limit
+            ):
                  if self.upper_date_limit:
                      message = "earlier"
                      date_limit = self.upper_date_limit
@@ -136,6 +138,9 @@ class TweetHandlerI(BasicTweetHandler):
                      message = "later"
                      date_limit = self.lower_date_limit
                  if verbose:
-                    print("Date limit {0} is {1} than date of current tweet {2}".\
-                      format(date_limit, message, tweet_date))
+                    print(
+                        "Date limit {0} is {1} than date of current tweet {2}".format(
+                            date_limit, message, tweet_date
+                        )
+                    )
                  self.do_stop = True
diff --git a/nlp_resource_data/nltk/twitter/api.pyc b/nlp_resource_data/nltk/twitter/api.pyc

deleted file mode 100755 (executable)

index 8805e46..0000000

Binary files a/nlp_resource_data/nltk/twitter/api.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/twitter/common.py b/nlp_resource_data/nltk/twitter/common.py

old mode 100755 (executable)

new mode 100644 (file)

index 811f56f..e4b3182
--- a/nlp_resource_data/nltk/twitter/common.py
+++ b/nlp_resource_data/nltk/twitter/common.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Twitter client
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #         Lorenzo Rubio <lrnzcig@gmail.com>
  # URL: <http://nltk.org/>
@@ -11,17 +11,15 @@
  Utility functions for the :module:`twitterclient` module which do not require
  the `twython` library to have been installed.
  """
-from __future__ import print_function
-
  import csv
  import gzip
  import json
  
-import nltk.compat as compat
-
+from nltk.internals import deprecated
  
  HIER_SEPARATOR = "."
  
+
  def extract_fields(tweet, fields):
      """
      Extract field values from a full tweet and return them as a list
@@ -35,9 +33,12 @@ def extract_fields(tweet, fields):
          try:
              _add_field_to_out(tweet, field, out)
          except TypeError:
-            raise RuntimeError('Fatal error when extracting fields. Cannot find field ', field)
+            raise RuntimeError(
+                "Fatal error when extracting fields. Cannot find field ", field
+            )
      return out
  
+
  def _add_field_to_out(json, field, out):
      if _is_composed_key(field):
          key, value = _get_key_value_composed(field)
@@ -45,11 +46,13 @@ def _add_field_to_out(json, field, out):
      else:
          out += [json[field]]
  
+
  def _is_composed_key(field):
      if HIER_SEPARATOR in field:
          return True
      return False
  
+
  def _get_key_value_composed(field):
      out = field.split(HIER_SEPARATOR)
      # there could be up to 3 levels
@@ -57,6 +60,7 @@ def _get_key_value_composed(field):
      value = HIER_SEPARATOR.join(out[1:])
      return key, value
  
+
  def _get_entity_recursive(json, entity):
      if not json:
          return None
@@ -68,7 +72,7 @@ def _get_entity_recursive(json, entity):
              # structure that contain other Twitter objects. See:
              # https://dev.twitter.com/overview/api/entities-in-twitter-objects
  
-            if key == 'entities' or key == 'extended_entities':
+            if key == "entities" or key == "extended_entities":
                  candidate = _get_entity_recursive(value, entity)
                  if candidate is not None:
                      return candidate
@@ -82,8 +86,10 @@ def _get_entity_recursive(json, entity):
      else:
          return None
  
-def json2csv(fp, outfile, fields, encoding='utf8', errors='replace',
-             gzip_compress=False):
+
+def json2csv(
+    fp, outfile, fields, encoding="utf8", errors="replace", gzip_compress=False
+):
      """
      Extract selected fields from a file of line-separated JSON tweets and
      write to a file in CSV format.
@@ -107,7 +113,7 @@ def json2csv(fp, outfile, fields, encoding='utf8', errors='replace',
      are 'id_str' for the tweetID and 'text' for the text of the tweet. See\
      <https://dev.twitter.com/overview/api/tweets> for a full list of fields.\
      e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']\
-    Additonally, it allows IDs from other Twitter objects, e. g.,\
+    Additionally, it allows IDs from other Twitter objects, e. g.,\
      ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']
  
      :param error: Behaviour for encoding errors, see\
@@ -115,7 +121,7 @@ def json2csv(fp, outfile, fields, encoding='utf8', errors='replace',
  
      :param gzip_compress: if `True`, output files are compressed with gzip
      """
-    (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress)
+    (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress)
      # write the list of fields as header
      writer.writerow(fields)
      # process the file
@@ -126,27 +132,31 @@ def json2csv(fp, outfile, fields, encoding='utf8', errors='replace',
      outf.close()
  
  
+@deprecated("Use open() and csv.writer() directly instead.")
  def outf_writer_compat(outfile, encoding, errors, gzip_compress=False):
-    """
-    Identify appropriate CSV writer given the Python version
-    """
-    if compat.PY3:
-        if gzip_compress:
-            outf = gzip.open(outfile, 'wt', encoding=encoding, errors=errors)
-        else:
-            outf = open(outfile, 'w', encoding=encoding, errors=errors)
-        writer = csv.writer(outf)
+    """Get a CSV writer with optional compression."""
+    return _outf_writer(outfile, encoding, errors, gzip_compress)
+
+
+def _outf_writer(outfile, encoding, errors, gzip_compress=False):
+    if gzip_compress:
+        outf = gzip.open(outfile, "wt", encoding=encoding, errors=errors)
      else:
-        if gzip_compress:
-            outf = gzip.open(outfile, 'wb')
-        else:
-            outf = open(outfile, 'wb')
-        writer = compat.UnicodeWriter(outf, encoding=encoding, errors=errors)
+        outf = open(outfile, "w", encoding=encoding, errors=errors)
+    writer = csv.writer(outf)
      return (writer, outf)
  
  
-def json2csv_entities(tweets_file, outfile, main_fields, entity_type, entity_fields,
-                      encoding='utf8', errors='replace', gzip_compress=False):
+def json2csv_entities(
+    tweets_file,
+    outfile,
+    main_fields,
+    entity_type,
+    entity_fields,
+    encoding="utf8",
+    errors="replace",
+    gzip_compress=False,
+):
      """
      Extract selected fields from a file of line-separated JSON tweets and
      write to a file in CSV format.
@@ -187,7 +197,7 @@ def json2csv_entities(tweets_file, outfile, main_fields, entity_type, entity_fie
      :param gzip_compress: if `True`, ouput files are compressed with gzip
      """
  
-    (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress)
+    (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress)
      header = get_header_field_list(main_fields, entity_type, entity_fields)
      writer.writerow(header)
      for line in tweets_file:
@@ -207,6 +217,7 @@ def json2csv_entities(tweets_file, outfile, main_fields, entity_type, entity_fie
              _write_to_file(tweet_fields, items, entity_fields, writer)
      outf.close()
  
+
  def get_header_field_list(main_fields, entity_type, entity_fields):
      if _is_composed_key(entity_type):
          key, value = _get_key_value_composed(entity_type)
@@ -223,6 +234,7 @@ def get_header_field_list(main_fields, entity_type, entity_fields):
      output2 = [HIER_SEPARATOR.join([sub_entity, x]) for x in entity_fields]
      return output1 + output2
  
+
  def _write_to_file(object_fields, items, entity_fields, writer):
      if not items:
          # it could be that the entity is just not present for the tweet
@@ -246,8 +258,12 @@ def _write_to_file(object_fields, items, entity_fields, writer):
              kd, vd = _get_key_value_composed(d)
              json_dict = items[kd]
              if not isinstance(json_dict, dict):
-                raise RuntimeError("""Key {0} does not contain a dictionary
-                in the json file""".format(kd))
+                raise RuntimeError(
+                    """Key {0} does not contain a dictionary
+                in the json file""".format(
+                        kd
+                    )
+                )
              row += [json_dict[vd]]
          writer.writerow(row)
          return
@@ -255,4 +271,3 @@ def _write_to_file(object_fields, items, entity_fields, writer):
      for item in items:
          row = object_fields + extract_fields(item, entity_fields)
          writer.writerow(row)
-
diff --git a/nlp_resource_data/nltk/twitter/common.pyc b/nlp_resource_data/nltk/twitter/common.pyc

deleted file mode 100755 (executable)

index 7b371e2..0000000

Binary files a/nlp_resource_data/nltk/twitter/common.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/twitter/twitter_demo.py b/nlp_resource_data/nltk/twitter/twitter_demo.py

old mode 100755 (executable)

new mode 100644 (file)

index 3338587..a241c07
--- a/nlp_resource_data/nltk/twitter/twitter_demo.py
+++ b/nlp_resource_data/nltk/twitter/twitter_demo.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Twitter client
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #         Lorenzo Rubio <lrnzcig@gmail.com>
  # URL: <http://nltk.org/>
@@ -30,22 +30,28 @@ For documentation about the Twitter APIs, see `The Streaming APIs Overview
  For error codes see Twitter's
  `Error Codes and Responses <https://dev.twitter.com/overview/api/response-codes>`
  """
-from __future__ import print_function
  
  import datetime
  from functools import wraps
  import json
+from io import StringIO
  
-from nltk.compat import StringIO
+from nltk.twitter import (
+    Query,
+    Streamer,
+    Twitter,
+    TweetViewer,
+    TweetWriter,
+    credsfromfile,
+)
  
-from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter,\
-     credsfromfile
  
+SPACER = "###################################"
  
-SPACER = '###################################'
  
  def verbose(func):
      """Decorator for demo functions"""
+
      @wraps(func)
      def with_formatting(*args, **kwargs):
          print()
@@ -53,27 +59,30 @@ def verbose(func):
          print("Using %s" % (func.__name__))
          print(SPACER)
          return func(*args, **kwargs)
+
      return with_formatting
  
+
  def yesterday():
      """
      Get yesterday's datetime as a 5-tuple.
      """
-    date =  datetime.datetime.now()
+    date = datetime.datetime.now()
      date -= datetime.timedelta(days=1)
      date_tuple = date.timetuple()[:6]
      return date_tuple
  
+
  def setup():
      """
      Initialize global variables for the demos.
      """
      global USERIDS, FIELDS
  
-    USERIDS = ['759251', '612473', '15108702', '6017542', '2673523800']
+    USERIDS = ["759251", "612473", "15108702", "6017542", "2673523800"]
      # UserIDs corresponding to\
      #           @CNN,    @BBCNews, @ReutersLive, @BreakingNews, @AJELive
-    FIELDS = ['id_str']
+    FIELDS = ["id_str"]
  
  
  @verbose
@@ -83,16 +92,18 @@ def twitterclass_demo():
      """
      tw = Twitter()
      print("Track from the public stream\n")
-    tw.tweets(keywords='love, hate', limit=10) #public stream
+    tw.tweets(keywords="love, hate", limit=10)  # public stream
      print(SPACER)
      print("Search past Tweets\n")
      tw = Twitter()
-    tw.tweets(keywords='love, hate', stream=False, limit=10) # search past tweets
+    tw.tweets(keywords="love, hate", stream=False, limit=10)  # search past tweets
      print(SPACER)
-    print("Follow two accounts in the public stream" +
-          " -- be prepared to wait a few minutes\n")
+    print(
+        "Follow two accounts in the public stream"
+        + " -- be prepared to wait a few minutes\n"
+    )
      tw = Twitter()
-    tw.tweets(follow=['759251', '6017542'], stream=True, limit=5) #public stream
+    tw.tweets(follow=["759251", "6017542"], stream=True, limit=5)  # public stream
  
  
  @verbose
@@ -118,18 +129,18 @@ def tracktoscreen_demo(track="taylor swift", limit=10):
  
  
  @verbose
-def search_demo(keywords='nltk'):
+def search_demo(keywords="nltk"):
      """
      Use the REST API to search for past tweets containing a given keyword.
      """
      oauth = credsfromfile()
      client = Query(**oauth)
      for tweet in client.search_tweets(keywords=keywords, limit=10):
-        print(tweet['text'])
+        print(tweet["text"])
  
  
  @verbose
-def tweets_by_user_demo(user='NLTK_org', count=200):
+def tweets_by_user_demo(user="NLTK_org", count=200):
      """
      Use the REST API to search for past tweets by a given user.
      """
@@ -148,9 +159,9 @@ def lookup_by_userid_demo():
      client = Query(**oauth)
      user_info = client.user_info_from_id(USERIDS)
      for info in user_info:
-        name = info['screen_name']
-        followers = info['followers_count']
-        following = info['friends_count']
+        name = info["screen_name"]
+        followers = info["followers_count"]
+        following = info["friends_count"]
          print("{0}, followers: {1}, following: {2}".format(name, followers, following))
  
  
@@ -198,7 +209,7 @@ def limit_by_time_demo(keywords="nltk"):
      print("Cutoff date: {}\n".format(dt_date))
  
      for tweet in client.search_tweets(keywords=keywords):
-        print("{} ".format(tweet['created_at']), end='')
+        print("{} ".format(tweet["created_at"]), end="")
          client.handler.handle(tweet)
  
  
@@ -240,8 +251,8 @@ def expand_tweetids_demo():
      corresponding full Tweets, if available.
  
      """
-    ids_f =\
-        StringIO("""\
+    ids_f = StringIO(
+        """\
          588665495492124672
          588665495487909888
          588665495508766721
@@ -251,24 +262,34 @@ def expand_tweetids_demo():
          588665495525588992
          588665495487844352
          588665495492014081
-        588665495512948737""")
+        588665495512948737"""
+    )
      oauth = credsfromfile()
      client = Query(**oauth)
      hydrated = client.expand_tweetids(ids_f)
  
      for tweet in hydrated:
-            id_str = tweet['id_str']
-            print('id: {}'.format(id_str))
-            text = tweet['text']
-            if text.startswith('@null'):
-                text = "[Tweet not available]"
-            print(text + '\n')
-
-
-
-ALL = [twitterclass_demo, sampletoscreen_demo, tracktoscreen_demo,
-       search_demo, tweets_by_user_demo, lookup_by_userid_demo, followtoscreen_demo,
-       streamtofile_demo, limit_by_time_demo, corpusreader_demo, expand_tweetids_demo]
+        id_str = tweet["id_str"]
+        print("id: {}".format(id_str))
+        text = tweet["text"]
+        if text.startswith("@null"):
+            text = "[Tweet not available]"
+        print(text + "\n")
+
+
+ALL = [
+    twitterclass_demo,
+    sampletoscreen_demo,
+    tracktoscreen_demo,
+    search_demo,
+    tweets_by_user_demo,
+    lookup_by_userid_demo,
+    followtoscreen_demo,
+    streamtofile_demo,
+    limit_by_time_demo,
+    corpusreader_demo,
+    expand_tweetids_demo,
+]
  
  """
  Select demo functions to run. E.g. replace the following line with "DEMOS =
@@ -285,4 +306,3 @@ if __name__ == "__main__":
      print("\n" + SPACER)
      print("All demos completed")
      print(SPACER)
-
diff --git a/nlp_resource_data/nltk/twitter/twitter_demo.pyc b/nlp_resource_data/nltk/twitter/twitter_demo.pyc

deleted file mode 100755 (executable)

index c79b286..0000000

Binary files a/nlp_resource_data/nltk/twitter/twitter_demo.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/twitter/twitterclient.py b/nlp_resource_data/nltk/twitter/twitterclient.py

old mode 100755 (executable)

new mode 100644 (file)

index bd6197f..a2af7af
--- a/nlp_resource_data/nltk/twitter/twitterclient.py
+++ b/nlp_resource_data/nltk/twitter/twitterclient.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Twitter client
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #         Lorenzo Rubio <lrnzcig@gmail.com>
  # URL: <http://nltk.org/>
@@ -27,10 +27,10 @@ import datetime
  import itertools
  import json
  import os
-import requests
  import time
  import gzip
  
+import requests
  
  from twython import Twython, TwythonStreamer
  from twython.exceptions import TwythonRateLimitError, TwythonError
@@ -39,7 +39,6 @@ from nltk.twitter.util import credsfromfile, guess_path
  from nltk.twitter.api import TweetHandlerI, BasicTweetHandler
  
  
-
  class Streamer(TwythonStreamer):
      """
      Retrieve data from the Twitter Streaming API.
@@ -47,12 +46,14 @@ class Streamer(TwythonStreamer):
      The streaming API requires
      `OAuth 1.0 <http://en.wikipedia.org/wiki/OAuth>`_ authentication.
      """
+
      def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret):
  
          self.handler = None
          self.do_continue = True
-        TwythonStreamer.__init__(self, app_key, app_secret, oauth_token,
-                                 oauth_token_secret)
+        TwythonStreamer.__init__(
+            self, app_key, app_secret, oauth_token, oauth_token_secret
+        )
  
      def register(self, handler):
          """
@@ -68,7 +69,7 @@ class Streamer(TwythonStreamer):
          """
          if self.do_continue:
              if self.handler is not None:
-                if 'text' in data:
+                if "text" in data:
                      self.handler.counter += 1
                      self.handler.handle(data)
                      self.do_continue = self.handler.do_continue()
@@ -78,7 +79,6 @@ class Streamer(TwythonStreamer):
              self.disconnect()
              self.handler.on_finish()
  
-
      def on_error(self, status_code, data):
          """
          :param status_code: The status code returned by the Twitter API
@@ -104,15 +104,15 @@ class Streamer(TwythonStreamer):
                      print("Error (stream will continue): {0}".format(e))
                  continue
  
-    def filter(self, track='', follow='', lang='en'):
+    def filter(self, track="", follow="", lang="en"):
          """
          Wrapper for 'statuses / filter' API call
          """
          while self.do_continue:
-            #Stream in an endless loop until limit is reached
+            # Stream in an endless loop until limit is reached
  
              try:
-                if track == '' and follow == '':
+                if track == "" and follow == "":
                      msg = "Please supply a value for 'track', 'follow'"
                      raise ValueError(msg)
                  self.statuses.filter(track=track, follow=follow, lang=lang)
@@ -126,8 +126,8 @@ class Query(Twython):
      """
      Retrieve data from the Twitter REST API.
      """
-    def __init__(self, app_key, app_secret, oauth_token,
-                 oauth_token_secret):
+
+    def __init__(self, app_key, app_secret, oauth_token, oauth_token_secret):
          self.handler = None
          self.do_continue = True
          Twython.__init__(self, app_key, app_secret, oauth_token, oauth_token_secret)
@@ -161,16 +161,13 @@ class Query(Twython):
  
          # The Twitter endpoint takes lists of up to 100 ids, so we chunk the
          # ids.
-        id_chunks = [ids[i:i+100] for i in range(0, len(ids), 100)]
+        id_chunks = [ids[i : i + 100] for i in range(0, len(ids), 100)]
  
-        chunked_tweets = (self.lookup_status(id=chunk) for chunk in
-                          id_chunks)
+        chunked_tweets = (self.lookup_status(id=chunk) for chunk in id_chunks)
  
          return itertools.chain.from_iterable(chunked_tweets)
  
-
-
-    def _search_tweets(self, keywords, limit=100, lang='en'):
+    def _search_tweets(self, keywords, limit=100, lang="en"):
          """
          Assumes that the handler has been informed. Fetches Tweets from
          search_tweets generator output and passses them to handler
@@ -181,21 +178,28 @@ class Query(Twython):
          :param str lang: language
          """
          while True:
-            tweets = self.search_tweets(keywords=keywords, limit=limit, lang=lang,
-                                        max_id=self.handler.max_id)
+            tweets = self.search_tweets(
+                keywords=keywords, limit=limit, lang=lang, max_id=self.handler.max_id
+            )
              for tweet in tweets:
                  self.handler.handle(tweet)
              if not (self.handler.do_continue() and self.handler.repeat):
                  break
          self.handler.on_finish()
  
-    def search_tweets(self, keywords, limit=100, lang='en', max_id=None,
-                      retries_after_twython_exception=0):
+    def search_tweets(
+        self,
+        keywords,
+        limit=100,
+        lang="en",
+        max_id=None,
+        retries_after_twython_exception=0,
+    ):
          """
          Call the REST API ``'search/tweets'`` endpoint with some plausible
          defaults. See `the Twitter search documentation
          <https://dev.twitter.com/rest/public/search>`_ for more information
-        about admissable search parameters.
+        about admissible search parameters.
  
          :param str keywords: A list of query terms to search for, written as\
          a comma-separated string
@@ -215,33 +219,38 @@ class Query(Twython):
          if max_id:
              self.handler.max_id = max_id
          else:
-            results = self.search(q=keywords, count=min(100, limit), lang=lang,
-                                  result_type='recent')
-            count = len(results['statuses'])
+            results = self.search(
+                q=keywords, count=min(100, limit), lang=lang, result_type="recent"
+            )
+            count = len(results["statuses"])
              if count == 0:
                  print("No Tweets available through REST API for those keywords")
                  return
              count_from_query = count
-            self.handler.max_id = results['statuses'][count - 1]['id'] - 1
+            self.handler.max_id = results["statuses"][count - 1]["id"] - 1
  
-            for result in results['statuses']:
+            for result in results["statuses"]:
                  yield result
                  self.handler.counter += 1
                  if self.handler.do_continue() == False:
                      return
  
-
          # Pagination loop: keep fetching Tweets until the desired count is
          # reached while dealing with Twitter rate limits.
          retries = 0
          while count_from_query < limit:
              try:
-                mcount = min(100, limit-count_from_query)
-                results = self.search(q=keywords, count=mcount, lang=lang,
-                                      max_id=self.handler.max_id, result_type='recent')
+                mcount = min(100, limit - count_from_query)
+                results = self.search(
+                    q=keywords,
+                    count=mcount,
+                    lang=lang,
+                    max_id=self.handler.max_id,
+                    result_type="recent",
+                )
              except TwythonRateLimitError as e:
                  print("Waiting for 15 minutes -{0}".format(e))
-                time.sleep(15*60) # wait 15 minutes
+                time.sleep(15 * 60)  # wait 15 minutes
                  continue
              except TwythonError as e:
                  print("Fatal error in Twython request -{0}".format(e))
@@ -249,7 +258,7 @@ class Query(Twython):
                      raise e
                  retries += 1
  
-            count = len(results['statuses'])
+            count = len(results["statuses"])
              if count == 0:
                  print("No more Tweets available through rest api")
                  return
@@ -258,9 +267,9 @@ class Query(Twython):
              # results['search_metadata']['next_results'], but as part of a
              # query and difficult to fetch. This is doing the equivalent
              # (last tweet id minus one)
-            self.handler.max_id = results['statuses'][count - 1]['id'] - 1
+            self.handler.max_id = results["statuses"][count - 1]["id"] - 1
  
-            for result in results['statuses']:
+            for result in results["statuses"]:
                  yield result
                  self.handler.counter += 1
                  if self.handler.do_continue() == False:
@@ -277,7 +286,7 @@ class Query(Twython):
          """
          return [self.show_user(user_id=userid) for userid in userids]
  
-    def user_tweets(self, screen_name, limit, include_rts='false'):
+    def user_tweets(self, screen_name, limit, include_rts="false"):
          """
          Return a collection of the most recent Tweets posted by the user
  
@@ -287,27 +296,35 @@ class Query(Twython):
          :param str include_rts: Whether to include statuses which have been\
          retweeted by the user; possible values are 'true' and 'false'
          """
-        data = self.get_user_timeline(screen_name=screen_name, count=limit,
-                                      include_rts=include_rts)
+        data = self.get_user_timeline(
+            screen_name=screen_name, count=limit, include_rts=include_rts
+        )
          for item in data:
              self.handler.handle(item)
  
  
-
-
  class Twitter(object):
      """
      Wrapper class with restricted functionality and fewer options.
      """
+
      def __init__(self):
          self._oauth = credsfromfile()
          self.streamer = Streamer(**self._oauth)
          self.query = Query(**self._oauth)
  
-
-    def tweets(self, keywords='', follow='', to_screen=True, stream=True,
-               limit=100, date_limit=None, lang='en', repeat=False,
-               gzip_compress=False):
+    def tweets(
+        self,
+        keywords="",
+        follow="",
+        to_screen=True,
+        stream=True,
+        limit=100,
+        date_limit=None,
+        lang="en",
+        repeat=False,
+        gzip_compress=False,
+    ):
          """
          Process some Tweets in a simple manner.
  
@@ -347,16 +364,19 @@ class Twitter(object):
              lower_date_limit = date_limit
  
          if to_screen:
-            handler = TweetViewer(limit=limit,
-                                  upper_date_limit=upper_date_limit,
-                                  lower_date_limit=lower_date_limit)
+            handler = TweetViewer(
+                limit=limit,
+                upper_date_limit=upper_date_limit,
+                lower_date_limit=lower_date_limit,
+            )
          else:
-            handler = TweetWriter(limit=limit,
-                                  upper_date_limit=upper_date_limit,
-                                  lower_date_limit=lower_date_limit, repeat=repeat,
-                                  gzip_compress=gzip_compress)
-
-
+            handler = TweetWriter(
+                limit=limit,
+                upper_date_limit=upper_date_limit,
+                lower_date_limit=lower_date_limit,
+                repeat=repeat,
+                gzip_compress=gzip_compress,
+            )
  
          if to_screen:
              handler = TweetViewer(limit=limit)
@@ -368,25 +388,28 @@ class Twitter(object):
                  upper_date_limit = None
                  lower_date_limit = date_limit
  
-            handler = TweetWriter(limit=limit, upper_date_limit=upper_date_limit,
-                                  lower_date_limit=lower_date_limit, repeat=repeat,
-                                  gzip_compress=gzip_compress)
+            handler = TweetWriter(
+                limit=limit,
+                upper_date_limit=upper_date_limit,
+                lower_date_limit=lower_date_limit,
+                repeat=repeat,
+                gzip_compress=gzip_compress,
+            )
  
          if stream:
              self.streamer.register(handler)
-            if keywords == '' and follow == '':
+            if keywords == "" and follow == "":
                  self.streamer.sample()
              else:
                  self.streamer.filter(track=keywords, follow=follow, lang=lang)
          else:
              self.query.register(handler)
-            if keywords == '':
+            if keywords == "":
                  raise ValueError("Please supply at least one keyword to search for.")
              else:
                  self.query._search_tweets(keywords, limit=limit, lang=lang)
  
  
-
  class TweetViewer(TweetHandlerI):
      """
      Handle data by sending it to the terminal.
@@ -400,7 +423,7 @@ class TweetViewer(TweetHandlerI):
          :rtype: bool
          :param data: Tweet object returned by Twitter API
          """
-        text = data['text']
+        text = data["text"]
          print(text)
  
          self.check_date_limit(data)
@@ -408,16 +431,24 @@ class TweetViewer(TweetHandlerI):
              return
  
      def on_finish(self):
-        print('Written {0} Tweets'.format(self.counter))
+        print("Written {0} Tweets".format(self.counter))
  
  
  class TweetWriter(TweetHandlerI):
      """
      Handle data by writing it to a file.
      """
-    def __init__(self, limit=2000, upper_date_limit=None, lower_date_limit=None,
-                 fprefix='tweets', subdir='twitter-files', repeat=False,
-                 gzip_compress=False):
+
+    def __init__(
+        self,
+        limit=2000,
+        upper_date_limit=None,
+        lower_date_limit=None,
+        fprefix="tweets",
+        subdir="twitter-files",
+        repeat=False,
+        gzip_compress=False,
+    ):
          """
          The difference between the upper and lower date limits depends on
          whether Tweets are coming in an ascending date order (i.e. when
@@ -454,7 +485,6 @@ class TweetWriter(TweetHandlerI):
          self.output = None
          TweetHandlerI.__init__(self, limit, upper_date_limit, lower_date_limit)
  
-
      def timestamped_file(self):
          """
          :return: timestamped file name
@@ -467,16 +497,15 @@ class TweetWriter(TweetHandlerI):
                  os.mkdir(subdir)
  
          fname = os.path.join(subdir, fprefix)
-        fmt = '%Y%m%d-%H%M%S'
+        fmt = "%Y%m%d-%H%M%S"
          timestamp = datetime.datetime.now().strftime(fmt)
          if self.gzip_compress:
-            suffix = '.gz'
+            suffix = ".gz"
          else:
-            suffix = ''
-        outfile = '{0}.{1}.json{2}'.format(fname, timestamp, suffix)
+            suffix = ""
+        outfile = "{0}.{1}.json{2}".format(fname, timestamp, suffix)
          return outfile
  
-
      def handle(self, data):
          """
          Write Twitter data as line-delimited JSON into one or more files.
@@ -486,14 +515,14 @@ class TweetWriter(TweetHandlerI):
          """
          if self.startingup:
              if self.gzip_compress:
-                self.output = gzip.open(self.fname, 'w')
+                self.output = gzip.open(self.fname, "w")
              else:
-                self.output = open(self.fname, 'w')
-            print('Writing to {0}'.format(self.fname))
+                self.output = open(self.fname, "w")
+            print("Writing to {0}".format(self.fname))
  
          json_data = json.dumps(data)
          if self.gzip_compress:
-            self.output.write((json_data + "\n").encode('utf-8'))
+            self.output.write((json_data + "\n").encode("utf-8"))
          else:
              self.output.write(json_data + "\n")
  
@@ -504,7 +533,7 @@ class TweetWriter(TweetHandlerI):
          self.startingup = False
  
      def on_finish(self):
-        print('Written {0} Tweets'.format(self.counter))
+        print("Written {0} Tweets".format(self.counter))
          if self.output:
              self.output.close()
  
@@ -522,7 +551,6 @@ class TweetWriter(TweetHandlerI):
              self._restart_file()
          return True
  
-
      def _restart_file(self):
          self.on_finish()
          self.fname = self.timestamped_file()
diff --git a/nlp_resource_data/nltk/twitter/twitterclient.pyc b/nlp_resource_data/nltk/twitter/twitterclient.pyc

deleted file mode 100755 (executable)

index 1ea2733..0000000

Binary files a/nlp_resource_data/nltk/twitter/twitterclient.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/twitter/util.py b/nlp_resource_data/nltk/twitter/util.py

old mode 100755 (executable)

new mode 100644 (file)

index 16b1507..1d859f9
--- a/nlp_resource_data/nltk/twitter/util.py
+++ b/nlp_resource_data/nltk/twitter/util.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  # Natural Language Toolkit: Twitter client
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Ewan Klein <ewan@inf.ed.ac.uk>
  #         Lorenzo Rubio <lrnzcig@gmail.com>
  # URL: <http://nltk.org/>
@@ -11,36 +11,37 @@
  Authentication utilities to accompany :module:`twitterclient`.
  """
  
-from __future__ import print_function
-
  import os
  import pprint
  from twython import Twython
  
+
  def credsfromfile(creds_file=None, subdir=None, verbose=False):
      """
      Convenience function for authentication
      """
-    return Authenticate().load_creds(creds_file=creds_file, subdir=subdir, verbose=verbose)
+    return Authenticate().load_creds(
+        creds_file=creds_file, subdir=subdir, verbose=verbose
+    )
  
  
  class Authenticate(object):
      """
      Methods for authenticating with Twitter.
      """
+
      def __init__(self):
-        self.creds_file = 'credentials.txt'
+        self.creds_file = "credentials.txt"
          self.creds_fullpath = None
  
          self.oauth = {}
          try:
-            self.twitter_dir = os.environ['TWITTER']
+            self.twitter_dir = os.environ["TWITTER"]
              self.creds_subdir = self.twitter_dir
          except KeyError:
              self.twitter_dir = None
              self.creds_subdir = None
  
-
      def load_creds(self, creds_file=None, subdir=None, verbose=False):
          """
          Read OAuth credentials from a text file.
@@ -70,25 +71,28 @@ class Authenticate(object):
  
          if subdir is None:
              if self.creds_subdir is None:
-                msg = "Supply a value to the 'subdir' parameter or" +\
-                      " set the TWITTER environment variable."
+                msg = (
+                    "Supply a value to the 'subdir' parameter or"
+                    + " set the TWITTER environment variable."
+                )
                  raise ValueError(msg)
          else:
              self.creds_subdir = subdir
  
-        self.creds_fullpath =\
-            os.path.normpath(os.path.join(self.creds_subdir, self.creds_file))
+        self.creds_fullpath = os.path.normpath(
+            os.path.join(self.creds_subdir, self.creds_file)
+        )
  
          if not os.path.isfile(self.creds_fullpath):
-            raise OSError('Cannot find file {}'.format(self.creds_fullpath))
+            raise OSError("Cannot find file {}".format(self.creds_fullpath))
  
          with open(self.creds_fullpath) as infile:
              if verbose:
-                print('Reading credentials file {}'.format(self.creds_fullpath))
+                print("Reading credentials file {}".format(self.creds_fullpath))
  
              for line in infile:
-                if '=' in line:
-                    name, value = line.split('=', 1)
+                if "=" in line:
+                    name, value = line.split("=", 1)
                      self.oauth[name.strip()] = value.strip()
  
          self._validate_creds_file(verbose=verbose)
@@ -98,16 +102,16 @@ class Authenticate(object):
      def _validate_creds_file(self, verbose=False):
          """Check validity of a credentials file."""
          oauth1 = False
-        oauth1_keys = ['app_key', 'app_secret', 'oauth_token', 'oauth_token_secret']
+        oauth1_keys = ["app_key", "app_secret", "oauth_token", "oauth_token_secret"]
          oauth2 = False
-        oauth2_keys = ['app_key', 'app_secret', 'access_token']
+        oauth2_keys = ["app_key", "app_secret", "access_token"]
          if all(k in self.oauth for k in oauth1_keys):
              oauth1 = True
          elif all(k in self.oauth for k in oauth2_keys):
              oauth2 = True
  
          if not (oauth1 or oauth2):
-            msg = 'Missing or incorrect entries in {}\n'.format(self.creds_file)
+            msg = "Missing or incorrect entries in {}\n".format(self.creds_file)
              msg += pprint.pformat(self.oauth)
              raise ValueError(msg)
          elif verbose:
@@ -121,15 +125,15 @@ def add_access_token(creds_file=None):
      """
      if creds_file is None:
          path = os.path.dirname(__file__)
-        creds_file = os.path.join(path, 'credentials2.txt')
+        creds_file = os.path.join(path, "credentials2.txt")
      oauth2 = credsfromfile(creds_file=creds_file)
-    app_key = oauth2['app_key']
-    app_secret = oauth2['app_secret']
+    app_key = oauth2["app_key"]
+    app_secret = oauth2["app_secret"]
  
      twitter = Twython(app_key, app_secret, oauth_version=2)
      access_token = twitter.obtain_access_token()
-    tok = 'access_token={}\n'.format(access_token)
-    with open(creds_file, 'a') as infile:
+    tok = "access_token={}\n".format(access_token)
+    with open(creds_file, "a") as infile:
          print(tok, file=infile)
  
  
diff --git a/nlp_resource_data/nltk/twitter/util.pyc b/nlp_resource_data/nltk/twitter/util.pyc

deleted file mode 100755 (executable)

index bda337c..0000000

Binary files a/nlp_resource_data/nltk/twitter/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/util.py b/nlp_resource_data/nltk/util.py

old mode 100755 (executable)

new mode 100644 (file)

index 2dcb782..baff54e
--- a/nlp_resource_data/nltk/util.py
+++ b/nlp_resource_data/nltk/util.py
@@ -1,10 +1,9 @@
  # Natural Language Toolkit: Utility functions
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # Author: Steven Bird <stevenbird1@gmail.com>
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
-from __future__ import print_function
  
  import sys
  import inspect
@@ -16,57 +15,67 @@ import pydoc
  import bisect
  import os
  
-from itertools import islice, chain, combinations
+from itertools import islice, chain, combinations, tee
  from pprint import pprint
  from collections import defaultdict, deque
  from sys import version_info
  
-from six import class_types, string_types, text_type
-from six.moves.urllib.request import (build_opener, install_opener, getproxies,
-                                      ProxyHandler, ProxyBasicAuthHandler,
-                                      ProxyDigestAuthHandler,
-                                      HTTPPasswordMgrWithDefaultRealm)
+from urllib.request import (
+    build_opener,
+    install_opener,
+    getproxies,
+    ProxyHandler,
+    ProxyBasicAuthHandler,
+    ProxyDigestAuthHandler,
+    HTTPPasswordMgrWithDefaultRealm,
+)
  
  from nltk.internals import slice_bounds, raise_unorderable_types
  from nltk.collections import *
-from nltk.compat import python_2_unicode_compatible
-
  
  
  ######################################################################
  # Short usage message
  ######################################################################
  
-def usage(obj, selfname='self'):
-    str(obj) # In case it's lazy, this will load it.
  
-    if not isinstance(obj, class_types):
+def usage(obj, selfname="self"):
+    str(obj)  # In case it's lazy, this will load it.
+
+    if not isinstance(obj, type):
          obj = obj.__class__
  
-    print('%s supports the following operations:' % obj.__name__)
+    print("%s supports the following operations:" % obj.__name__)
      for (name, method) in sorted(pydoc.allmethods(obj).items()):
-        if name.startswith('_'): continue
-        if getattr(method, '__deprecated__', False): continue
+        if name.startswith("_"):
+            continue
+        if getattr(method, "__deprecated__", False):
+            continue
  
-        if sys.version_info[0] >= 3:
-            getargspec = inspect.getfullargspec
-        else:
-            getargspec = inspect.getargspec
+        getargspec = inspect.getfullargspec
          args, varargs, varkw, defaults = getargspec(method)[:4]
-        if (args and args[0]=='self' and
-            (defaults is None or len(args)>len(defaults))):
+        if (
+            args
+            and args[0] == "self"
+            and (defaults is None or len(args) > len(defaults))
+        ):
              args = args[1:]
-            name = '%s.%s' % (selfname, name)
-        argspec = inspect.formatargspec(
-            args, varargs, varkw, defaults)
-        print(textwrap.fill('%s%s' % (name, argspec),
-                            initial_indent='  - ',
-                            subsequent_indent=' '*(len(name)+5)))
+            name = "%s.%s" % (selfname, name)
+        argspec = inspect.formatargspec(args, varargs, varkw, defaults)
+        print(
+            textwrap.fill(
+                "%s%s" % (name, argspec),
+                initial_indent="  - ",
+                subsequent_indent=" " * (len(name) + 5),
+            )
+        )
+
  
  ##########################################################################
  # IDLE
  ##########################################################################
  
+
  def in_idle():
      """
      Return True if this function is run within idle.  Tkinter
@@ -79,12 +88,15 @@ def in_idle():
      :rtype: bool
      """
      import sys
-    return sys.stdin.__class__.__name__ in ('PyShell', 'RPCProxy')
+
+    return sys.stdin.__class__.__name__ in ("PyShell", "RPCProxy")
+
  
  ##########################################################################
  # PRETTY PRINTING
  ##########################################################################
  
+
  def pr(data, start=0, end=None):
      """
      Pretty print a sequence of data items
@@ -98,6 +110,7 @@ def pr(data, start=0, end=None):
      """
      pprint(list(islice(data, start, end)))
  
+
  def print_string(s, width=70):
      """
      Pretty print a string, breaking lines on whitespace
@@ -107,7 +120,8 @@ def print_string(s, width=70):
      :param width: the display width
      :type width: int
      """
-    print('\n'.join(textwrap.wrap(s, width=width)))
+    print("\n".join(textwrap.wrap(s, width=width)))
+
  
  def tokenwrap(tokens, separator=" ", width=70):
      """
@@ -120,17 +134,22 @@ def tokenwrap(tokens, separator=" ", width=70):
      :param width: the display width (default=70)
      :type width: int
      """
-    return '\n'.join(textwrap.wrap(separator.join(tokens), width=width))
+    return "\n".join(textwrap.wrap(separator.join(tokens), width=width))
  
  
  ##########################################################################
  # Python version
  ##########################################################################
  
+
  def py25():
      return version_info[0] == 2 and version_info[1] == 5
+
+
  def py26():
      return version_info[0] == 2 and version_info[1] == 6
+
+
  def py27():
      return version_info[0] == 2 and version_info[1] == 7
  
@@ -139,8 +158,8 @@ def py27():
  # Indexing
  ##########################################################################
  
-class Index(defaultdict):
  
+class Index(defaultdict):
      def __init__(self, pairs):
          defaultdict.__init__(self, list)
          for key, value in pairs:
@@ -151,6 +170,7 @@ class Index(defaultdict):
  ## Regexp display (thanks to David Mertz)
  ######################################################################
  
+
  def re_show(regexp, string, left="{", right="}"):
      """
      Return a string with markers surrounding the matched substrings.
@@ -176,18 +196,20 @@ def re_show(regexp, string, left="{", right="}"):
  
  # recipe from David Mertz
  def filestring(f):
-    if hasattr(f, 'read'):
+    if hasattr(f, "read"):
          return f.read()
-    elif isinstance(f, string_types):
-        with open(f, 'r') as infile:
+    elif isinstance(f, str):
+        with open(f, "r") as infile:
              return infile.read()
      else:
          raise ValueError("Must be called with a filename or file-like object")
  
+
  ##########################################################################
  # Breadth-First Search
  ##########################################################################
  
+
  def breadth_first(tree, children=iter, maxdepth=-1):
      """Traverse the nodes of a tree in breadth-first order.
      (No need to check for cycles.)
@@ -207,6 +229,7 @@ def breadth_first(tree, children=iter, maxdepth=-1):
              except TypeError:
                  pass
  
+
  ##########################################################################
  # Guess Character Encoding
  ##########################################################################
@@ -214,6 +237,7 @@ def breadth_first(tree, children=iter, maxdepth=-1):
  # adapted from io.py in the docutils extension module (http://docutils.sourceforge.net)
  # http://www.pyzine.com/Issue008/Section_Articles/article_Encodings.html
  
+
  def guess_encoding(data):
      """
      Given a byte string, attempt to decode it.
@@ -229,7 +253,7 @@ def guess_encoding(data):
      """
      successful_encoding = None
      # we make 'utf-8' the first encoding
-    encodings = ['utf-8']
+    encodings = ["utf-8"]
      #
      # next we add anything we can learn from the locale
      try:
@@ -246,14 +270,14 @@ def guess_encoding(data):
          pass
      #
      # we try 'latin-1' last
-    encodings.append('latin-1')
+    encodings.append("latin-1")
      for enc in encodings:
          # some of the locale calls
          # may have returned None
          if not enc:
              continue
          try:
-            decoded = text_type(data, enc)
+            decoded = str(data, enc)
              successful_encoding = enc
  
          except (UnicodeError, LookupError):
@@ -261,30 +285,35 @@ def guess_encoding(data):
          else:
              break
      if not successful_encoding:
-         raise UnicodeError(
-        'Unable to decode input data.  Tried the following encodings: %s.'
-        % ', '.join([repr(enc) for enc in encodings if enc]))
+        raise UnicodeError(
+            "Unable to decode input data. "
+            "Tried the following encodings: %s."
+            % ", ".join([repr(enc) for enc in encodings if enc])
+        )
      else:
-         return (decoded, successful_encoding)
+        return (decoded, successful_encoding)
  
  
  ##########################################################################
  # Remove repeated elements from a list deterministcally
  ##########################################################################
  
+
  def unique_list(xs):
      seen = set()
      # not seen.add(x) here acts to make the code shorter without using if statements, seen.add(x) always returns None.
      return [x for x in xs if x not in seen and not seen.add(x)]
  
+
  ##########################################################################
  # Invert a dictionary
  ##########################################################################
  
+
  def invert_dict(d):
      inverted_dict = defaultdict(list)
      for key in d:
-        if hasattr(d[key], '__iter__'):
+        if hasattr(d[key], "__iter__"):
              for term in d[key]:
                  inverted_dict[term].append(key)
          else:
@@ -297,6 +326,7 @@ def invert_dict(d):
  # The graph is represented as a dictionary of sets
  ##########################################################################
  
+
  def transitive_closure(graph, reflexive=False):
      """
      Calculate the transitive closure of a directed graph,
@@ -347,21 +377,28 @@ def invert_graph(graph):
      return inverted
  
  
-
  ##########################################################################
  # HTML Cleaning
  ##########################################################################
  
+
  def clean_html(html):
-    raise NotImplementedError ("To remove HTML markup, use BeautifulSoup's get_text() function")
+    raise NotImplementedError(
+        "To remove HTML markup, use BeautifulSoup's get_text() function"
+    )
+
  
  def clean_url(url):
-    raise NotImplementedError ("To remove HTML markup, use BeautifulSoup's get_text() function")
+    raise NotImplementedError(
+        "To remove HTML markup, use BeautifulSoup's get_text() function"
+    )
+
  
  ##########################################################################
  # FLATTEN LISTS
  ##########################################################################
  
+
  def flatten(*args):
      """
      Flatten a list.
@@ -376,7 +413,8 @@ def flatten(*args):
  
      x = []
      for l in args:
-        if not isinstance(l, (list, tuple)): l = [l]
+        if not isinstance(l, (list, tuple)):
+            l = [l]
          for item in l:
              if isinstance(item, (list, tuple)):
                  x.extend(flatten(item))
@@ -384,12 +422,20 @@ def flatten(*args):
                  x.append(item)
      return x
  
+
  ##########################################################################
  # Ngram iteration
  ##########################################################################
  
-def pad_sequence(sequence, n, pad_left=False, pad_right=False,
-                 left_pad_symbol=None, right_pad_symbol=None):
+
+def pad_sequence(
+    sequence,
+    n,
+    pad_left=False,
+    pad_right=False,
+    left_pad_symbol=None,
+    right_pad_symbol=None,
+):
      """
      Returns a padded sequence of items before ngram extraction.
  
@@ -416,15 +462,23 @@ def pad_sequence(sequence, n, pad_left=False, pad_right=False,
      """
      sequence = iter(sequence)
      if pad_left:
-        sequence = chain((left_pad_symbol,) * (n-1), sequence)
+        sequence = chain((left_pad_symbol,) * (n - 1), sequence)
      if pad_right:
-        sequence = chain(sequence, (right_pad_symbol,) * (n-1))
+        sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
      return sequence
  
+
  # add a flag to pad the sequence so we get peripheral ngrams?
  
-def ngrams(sequence, n, pad_left=False, pad_right=False,
-           left_pad_symbol=None, right_pad_symbol=None):
+
+def ngrams(
+    sequence,
+    n,
+    pad_left=False,
+    pad_right=False,
+    left_pad_symbol=None,
+    right_pad_symbol=None,
+):
      """
      Return the ngrams generated from a sequence of items, as an iterator.
      For example:
@@ -460,18 +514,26 @@ def ngrams(sequence, n, pad_left=False, pad_right=False,
      :type right_pad_symbol: any
      :rtype: sequence or iter
      """
-    sequence = pad_sequence(sequence, n, pad_left, pad_right,
-                            left_pad_symbol, right_pad_symbol)
+    sequence = pad_sequence(
+        sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
+    )
  
      history = []
      while n > 1:
-        history.append(next(sequence))
+        # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
+        try:
+            next_item = next(sequence)
+        except StopIteration:
+            # no more data, terminate the generator
+            return
+        history.append(next_item)
          n -= 1
      for item in sequence:
          history.append(item)
          yield tuple(history)
          del history[0]
  
+
  def bigrams(sequence, **kwargs):
      """
      Return the bigrams generated from a sequence of items, as an iterator.
@@ -491,6 +553,7 @@ def bigrams(sequence, **kwargs):
      for item in ngrams(sequence, 2, **kwargs):
          yield item
  
+
  def trigrams(sequence, **kwargs):
      """
      Return the trigrams generated from a sequence of items, as an iterator.
@@ -510,6 +573,7 @@ def trigrams(sequence, **kwargs):
      for item in ngrams(sequence, 3, **kwargs):
          yield item
  
+
  def everygrams(sequence, min_len=1, max_len=-1, **kwargs):
      """
      Returns all possible ngrams generated from a sequence of items, as an iterator.
@@ -531,10 +595,11 @@ def everygrams(sequence, min_len=1, max_len=-1, **kwargs):
  
      if max_len == -1:
          max_len = len(sequence)
-    for n in range(min_len, max_len+1):
+    for n in range(min_len, max_len + 1):
          for ng in ngrams(sequence, n, **kwargs):
              yield ng
  
+
  def skipgrams(sequence, n, k, **kwargs):
      """
      Returns all possible skipgrams generated from a sequence of items, as an iterator.
@@ -557,7 +622,7 @@ def skipgrams(sequence, n, k, **kwargs):
      """
  
      # Pads the sequence as desired by **kwargs.
-    if 'pad_left' in kwargs or 'pad_right' in kwargs:
+    if "pad_left" in kwargs or "pad_right" in kwargs:
          sequence = pad_sequence(sequence, n, **kwargs)
  
      # Note when iterating through the ngrams, the pad_right here is not
@@ -572,6 +637,7 @@ def skipgrams(sequence, n, k, **kwargs):
                  continue
              yield head + skip_tail
  
+
  ######################################################################
  # Binary Search in a File
  ######################################################################
@@ -588,12 +654,12 @@ def binary_search_file(file, key, cache={}, cacheDepth=-1):
      :param key: the identifier we are searching for.
      """
  
-    key = key + ' '
+    key = key + " "
      keylen = len(key)
      start = 0
      currentDepth = 0
  
-    if hasattr(file, 'name'):
+    if hasattr(file, "name"):
          end = os.stat(file.name).st_size - 1
      else:
          file.seek(0, 2)
@@ -612,13 +678,14 @@ def binary_search_file(file, key, cache={}, cacheDepth=-1):
              while True:
                  file.seek(max(0, middle - 1))
                  if middle > 0:
-                    file.readline()
+                    file.discard_line()
                  offset = file.tell()
                  line = file.readline()
-                if line != "": break
+                if line != "":
+                    break
                  # at EOF; try to find start of the last line
-                middle = (start + middle)//2
-                if middle == end -1:
+                middle = (start + middle) // 2
+                if middle == end - 1:
                      return None
              if currentDepth < cacheDepth:
                  cache[middle] = (offset, line)
@@ -644,11 +711,13 @@ def binary_search_file(file, key, cache={}, cacheDepth=-1):
  
      return None
  
+
  ######################################################################
  # Proxy configuration
  ######################################################################
  
-def set_proxy(proxy, user=None, password=''):
+
+def set_proxy(proxy, user=None, password=""):
      """
      Set the HTTP proxy for Python to download through.
  
@@ -661,24 +730,21 @@ def set_proxy(proxy, user=None, password=''):
          authentication.
      :param password: The password to authenticate with.
      """
-    from nltk import compat
-
      if proxy is None:
          # Try and find the system proxy settings
          try:
-            proxy = getproxies()['http']
+            proxy = getproxies()["http"]
          except KeyError:
-            raise ValueError('Could not detect default proxy settings')
+            raise ValueError("Could not detect default proxy settings")
  
      # Set up the proxy handler
-    proxy_handler = ProxyHandler({'https': proxy, 'http': proxy})
+    proxy_handler = ProxyHandler({"https": proxy, "http": proxy})
      opener = build_opener(proxy_handler)
  
      if user is not None:
          # Set up basic proxy authentication if provided
          password_manager = HTTPPasswordMgrWithDefaultRealm()
-        password_manager.add_password(realm=None, uri=proxy, user=user,
-                passwd=password)
+        password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password)
          opener.add_handler(ProxyBasicAuthHandler(password_manager))
          opener.add_handler(ProxyDigestAuthHandler(password_manager))
  
@@ -705,22 +771,24 @@ def elementtree_indent(elem, level=0):
      :return:  Contents of elem indented to reflect its structure
      """
  
-    i = "\n" + level*"  "
+    i = "\n" + level * "  "
      if len(elem):
          if not elem.text or not elem.text.strip():
              elem.text = i + "  "
          for elem in elem:
-            elementtree_indent(elem, level+1)
+            elementtree_indent(elem, level + 1)
          if not elem.tail or not elem.tail.strip():
              elem.tail = i
      else:
          if level and (not elem.tail or not elem.tail.strip()):
              elem.tail = i
  
+
  ######################################################################
  # Mathematical approximations
  ######################################################################
  
+
  def choose(n, k):
      """
      This function is a fast way to calculate binomial coefficients, commonly
@@ -749,3 +817,29 @@ def choose(n, k):
          return ntok // ktok
      else:
          return 0
+
+
+######################################################################
+# Iteration utilities
+######################################################################
+
+
+def pairwise(iterable):
+    """s -> (s0,s1), (s1,s2), (s2, s3), ..."""
+    a, b = tee(iterable)
+    next(b, None)
+    return zip(a, b)
+
+######################################################################
+# Parallization.
+######################################################################
+
+
+def parallelize_preprocess(func, iterator, processes, progress_bar=False):
+    from tqdm import tqdm
+    from joblib import Parallel, delayed
+
+    iterator = tqdm(iterator) if progress_bar else iterator
+    if processes <= 1:
+        return map(func, iterator)
+    return Parallel(n_jobs=processes)(delayed(func)(line) for line in iterator)
diff --git a/nlp_resource_data/nltk/util.pyc b/nlp_resource_data/nltk/util.pyc

deleted file mode 100755 (executable)

index 897630b..0000000

Binary files a/nlp_resource_data/nltk/util.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/wsd.py b/nlp_resource_data/nltk/wsd.py

old mode 100755 (executable)

new mode 100644 (file)

index f77b0cb..ed9599c
--- a/nlp_resource_data/nltk/wsd.py
+++ b/nlp_resource_data/nltk/wsd.py
@@ -3,7 +3,7 @@
  # Authors: Liling Tan <alvations@gmail.com>,
  #          Dmitrijs Milajevs <dimazest@gmail.com>
  #
-# Copyright (C) 2001-2017 NLTK Project
+# Copyright (C) 2001-2020 NLTK Project
  # URL: <http://nltk.org/>
  # For license information, see LICENSE.TXT
  
@@ -14,7 +14,7 @@ def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
      """Return a synset for an ambiguous word in a context.
  
      :param iter context_sentence: The context sentence where the ambiguous word
-    occurs, passed as an iterable of words.
+         occurs, passed as an iterable of words.
      :param str ambiguous_word: The ambiguous word that requires WSD.
      :param str pos: A specified Part-of-Speech (POS).
      :param iter synsets: Possible synsets of the ambiguous word.
@@ -49,5 +49,3 @@ def lesk(context_sentence, ambiguous_word, pos=None, synsets=None):
      )
  
      return sense
-
-
diff --git a/nlp_resource_data/nltk/wsd.pyc b/nlp_resource_data/nltk/wsd.pyc

deleted file mode 100755 (executable)

index d3281e2..0000000

Binary files a/nlp_resource_data/nltk/wsd.pyc and /dev/null differ
diff --git a/packaging/nlp.spec b/packaging/nlp.spec

index a0716221316f2788b5ff5c056a4bb3bc0d4961ea..78fa66e191aad5b310dc3dac09d09811911b8118 100755 (executable)
--- a/packaging/nlp.spec
+++ b/packaging/nlp.spec
@@ -67,8 +67,8 @@ tpk-backend -d %{name} --preload --force-remove
  %attr(755,root,root) %{_app_bin_dir}/org.tizen.nlp.service
  %{_app_bin_dir}/*
  %{TZ_SYS_RO_PACKAGES}/org.tizen.nlp.service.xml
-%{_libdir}/python2.7/site-packages/langdetect/*
-%{_libdir}/python2.7/site-packages/nltk/*
+%{_libdir}/python3.7/site-packages/langdetect/*
+%{_libdir}/python3.7/site-packages/nltk/*
  %license LICENSE
  
  %files data-en
author	jay.ho.park <jay.ho.park@samsung.com>
	Thu, 3 Sep 2020 05:04:01 +0000 (14:04 +0900)
committer	jay.ho.park <jay.ho.park@samsung.com>
	Thu, 3 Sep 2020 05:04:07 +0000 (14:04 +0900)